Axione-IPE-Viewer/data-ingest/ingest

118 lines
3.5 KiB
Text
Raw Permalink Normal View History

2022-02-21 12:47:59 +01:00
#!/usr/bin/env bash
set -eau -o pipefail
2022-12-08 12:28:33 +01:00
NEEDED_COLUMNS=("IdentifiantImmeuble" "EtatImmeuble" "CoordonneeImmeubleX" "CoordonneeImmeubleY" "NumeroVoieImmeuble" "TypeVoieImmeuble" "NomVoieImmeuble" "CodePostalImmeuble" "CommuneImmeuble" "DateDebutAcceptationCmdAcces" "DateMiseEnServiceCommercialeImmeuble")
2022-02-21 12:47:59 +01:00
if [ "$#" -ne 2 ]; then
echo "Usage: ingest path-to-directory-containing-IPE-CSVs path-to-generated-db"
echo ""
exit 1
fi
fullIpeDirPath=$(realpath "${1}")
fullDbPath=$(realpath "${2}")
tmpSql=$(mktemp)
clean_tmp () {
rm "${tmpSql}"
}
trap clean_tmp EXIT
echo "[+] Ingesting IPE data."
2022-02-21 12:47:59 +01:00
echo " The following files will be ingested."
echo ""
ipeFiles=$(find "${fullIpeDirPath}" -name '*.csv')
cat > "${tmpSql}" <<EOF
.separator ";"
EOF
2022-11-16 16:10:30 +01:00
firstFile=true
2022-02-21 12:47:59 +01:00
for ipeFile in ${ipeFiles}; do
2022-11-16 16:10:30 +01:00
echo " ${ipeFile}"
head -n1 $ipeFile | grep -q IdentifiantImmeuble && header=true || header=false
import_opt=""
if $firstFile || $header; then
if ! $header; then
echo "ERROR: first file ${ipeFile} does not have a good header"
exit 1
fi
if ! $firstFile; then
import_opt="-skip 1"
2022-12-08 12:28:33 +01:00
else
header=$(head -n1 $ipeFile)
OLD_IFS=$IFS
export IFS=";"
idx=1
idx_to_keep=()
for column in $header; do
export IFS=$OLD_IFS
if [[ " ${NEEDED_COLUMNS[*]} " =~ " ${column} " ]]; then
idx_to_keep+=("$idx")
fi
idx=$((idx+1))
export IFS=";"
done
export IFS=$OLD_IFS
cut_idx_to_keep=$(IFS=',';echo "${idx_to_keep[*]}";IFS=$' \t\n')
echo " Column indexes that will be kept in csv files: $cut_idx_to_keep (matching columns ${NEEDED_COLUMNS[*]})"
2022-11-16 16:10:30 +01:00
fi
firstFile=false
fi
2022-12-08 12:28:33 +01:00
useIpeFile=${ipeFile}
if [[ "$cut_idx_to_keep" != "" ]]; then
cut -d';' -f$cut_idx_to_keep $ipeFile > ${ipeFile}.cut
useIpeFile=${ipeFile}.cut
fi
2022-11-16 16:10:30 +01:00
cat >> "${tmpSql}" <<EOF
2022-12-08 12:28:33 +01:00
.import ${import_opt} ${useIpeFile} ipe
2022-02-21 12:47:59 +01:00
EOF
done
2022-11-16 16:10:30 +01:00
2022-02-21 12:47:59 +01:00
echo ""
rc=0
sqlite3 "${fullDbPath}" < "${tmpSql}" 2>&1 | grep -v "filling the rest with NULL" || rc="${PIPESTATUS[0]}"
if [[ $rc -ne 0 ]]; then
echo "Error executing sqlite import"
exit "$rc"
fi
2022-02-21 12:47:59 +01:00
2022-11-16 16:10:30 +01:00
echo "[+] Create separate table with id immeuble index and its state."
cat > "${tmpSql}" <<EOF
CREATE TABLE refimm (IdentifiantImmeuble text NOT NULL, EtatImmeuble text NOT NULL, DateDebutAcceptationCmdAcces text NOT NULL);
2022-11-16 16:10:30 +01:00
CREATE UNIQUE INDEX idx_IdentifiantImmeuble on refimm (IdentifiantImmeuble);
INSERT OR REPLACE INTO refimm SELECT IdentifiantImmeuble,EtatImmeuble, DateDebutAcceptationCmdAcces FROM ipe;
2022-11-16 16:10:30 +01:00
EOF
sqlite3 "${fullDbPath}" < "${tmpSql}"
2022-02-21 12:47:59 +01:00
echo "[+] Ingesting spatial data."
cat > "${tmpSql}" <<EOF
SELECT load_extension("mod_spatialite");
SELECT InitSpatialMetaData();
-- Despite the SELECT, we're actually creating a new ImmeubleGeoPoint
-- column here. The spatialite API is a bit weird...
SELECT AddGeometryColumn('ipe','ImmeubleGeoPoint',4326,'POINT');
-- The geodata is expressed in RGF93 (SRID 2154). We need to project
-- it to WSG84 to display it on a OSM map.
UPDATE ipe SET ImmeubleGeoPoint =
Transform(
MakePoint(
CAST(CoordonneeImmeubleX as DOUBLE),
CAST(CoordonneeImmeubleY as DOUBLE),
2154)
,4326);
EOF
sqlite3 "${fullDbPath}" < "${tmpSql}"
2022-02-21 12:47:59 +01:00
echo "[+] Creating Rtree index (spatial index). We're almost done."
cat > "${tmpSql}" <<EOF
SELECT load_extension("mod_spatialite");
SELECT CreateSpatialIndex('ipe','ImmeubleGeoPoint');
EOF
2022-02-21 12:47:59 +01:00
sqlite3 "${fullDbPath}" < "${tmpSql}"
echo "[+] SQLite database generated at ${fullDbPath}"