diff --git a/data-ingest/ingest b/data-ingest/ingest index 979a586..4d37cfe 100755 --- a/data-ingest/ingest +++ b/data-ingest/ingest @@ -1,6 +1,8 @@ #!/usr/bin/env bash set -eau -o pipefail +NEEDED_COLUMNS=("IdentifiantImmeuble" "EtatImmeuble" "CoordonneeImmeubleX" "CoordonneeImmeubleY" "NumeroVoieImmeuble" "TypeVoieImmeuble" "NomVoieImmeuble" "CodePostalImmeuble" "CommuneImmeuble" "DateDebutAcceptationCmdAcces" "DateMiseEnServiceCommercialeImmeuble") + if [ "$#" -ne 2 ]; then echo "Usage: ingest path-to-directory-containing-IPE-CSVs path-to-generated-db" echo "" @@ -35,13 +37,36 @@ for ipeFile in ${ipeFiles}; do fi if ! $firstFile; then import_opt="-skip 1" + else + header=$(head -n1 $ipeFile) + OLD_IFS=$IFS + export IFS=";" + idx=1 + idx_to_keep=() + for column in $header; do + export IFS=$OLD_IFS + if [[ " ${NEEDED_COLUMNS[*]} " =~ " ${column} " ]]; then + idx_to_keep+=("$idx") + fi + idx=$((idx+1)) + export IFS=";" + done + export IFS=$OLD_IFS + cut_idx_to_keep=$(IFS=',';echo "${idx_to_keep[*]}";IFS=$' \t\n') + echo " Column indexes that will be kept in csv files: $cut_idx_to_keep (matching columns ${NEEDED_COLUMNS[*]})" fi firstFile=false fi - cat >> "${tmpSql}" < ${ipeFile}.cut + useIpeFile=${ipeFile}.cut + fi + cat >> "${tmpSql}" < "${tmpSql}" <