2022-02-21 12:47:59 +01:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
set -eau -o pipefail
|
|
|
|
|
2022-12-08 12:28:33 +01:00
|
|
|
NEEDED_COLUMNS=("IdentifiantImmeuble" "EtatImmeuble" "CoordonneeImmeubleX" "CoordonneeImmeubleY" "NumeroVoieImmeuble" "TypeVoieImmeuble" "NomVoieImmeuble" "CodePostalImmeuble" "CommuneImmeuble" "DateDebutAcceptationCmdAcces" "DateMiseEnServiceCommercialeImmeuble")
|
|
|
|
|
2022-02-21 12:47:59 +01:00
|
|
|
if [ "$#" -ne 2 ]; then
|
|
|
|
echo "Usage: ingest path-to-directory-containing-IPE-CSVs path-to-generated-db"
|
|
|
|
echo ""
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
fullIpeDirPath=$(realpath "${1}")
|
|
|
|
fullDbPath=$(realpath "${2}")
|
|
|
|
|
|
|
|
tmpSql=$(mktemp)
|
|
|
|
clean_tmp () {
|
|
|
|
rm "${tmpSql}"
|
|
|
|
}
|
|
|
|
trap clean_tmp EXIT
|
|
|
|
|
2022-02-21 19:00:11 +01:00
|
|
|
echo "[+] Ingesting IPE data."
|
2022-02-21 12:47:59 +01:00
|
|
|
echo " The following files will be ingested."
|
|
|
|
echo ""
|
|
|
|
ipeFiles=$(find "${fullIpeDirPath}" -name '*.csv')
|
|
|
|
cat > "${tmpSql}" <<EOF
|
|
|
|
.separator ";"
|
|
|
|
EOF
|
|
|
|
|
2022-11-16 16:10:30 +01:00
|
|
|
firstFile=true
|
2022-02-21 12:47:59 +01:00
|
|
|
for ipeFile in ${ipeFiles}; do
|
2022-11-16 16:10:30 +01:00
|
|
|
echo " ${ipeFile}"
|
|
|
|
head -n1 $ipeFile | grep -q IdentifiantImmeuble && header=true || header=false
|
|
|
|
import_opt=""
|
|
|
|
if $firstFile || $header; then
|
|
|
|
if ! $header; then
|
|
|
|
echo "ERROR: first file ${ipeFile} does not have a good header"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
if ! $firstFile; then
|
|
|
|
import_opt="-skip 1"
|
2022-12-08 12:28:33 +01:00
|
|
|
else
|
|
|
|
header=$(head -n1 $ipeFile)
|
|
|
|
OLD_IFS=$IFS
|
|
|
|
export IFS=";"
|
|
|
|
idx=1
|
|
|
|
idx_to_keep=()
|
|
|
|
for column in $header; do
|
|
|
|
export IFS=$OLD_IFS
|
|
|
|
if [[ " ${NEEDED_COLUMNS[*]} " =~ " ${column} " ]]; then
|
|
|
|
idx_to_keep+=("$idx")
|
|
|
|
fi
|
|
|
|
idx=$((idx+1))
|
|
|
|
export IFS=";"
|
|
|
|
done
|
|
|
|
export IFS=$OLD_IFS
|
|
|
|
cut_idx_to_keep=$(IFS=',';echo "${idx_to_keep[*]}";IFS=$' \t\n')
|
|
|
|
echo " Column indexes that will be kept in csv files: $cut_idx_to_keep (matching columns ${NEEDED_COLUMNS[*]})"
|
2022-11-16 16:10:30 +01:00
|
|
|
fi
|
|
|
|
firstFile=false
|
|
|
|
fi
|
2022-12-08 12:28:33 +01:00
|
|
|
|
|
|
|
useIpeFile=${ipeFile}
|
|
|
|
if [[ "$cut_idx_to_keep" != "" ]]; then
|
|
|
|
cut -d';' -f$cut_idx_to_keep $ipeFile > ${ipeFile}.cut
|
|
|
|
useIpeFile=${ipeFile}.cut
|
|
|
|
fi
|
2022-11-16 16:10:30 +01:00
|
|
|
cat >> "${tmpSql}" <<EOF
|
2022-12-08 12:28:33 +01:00
|
|
|
.import ${import_opt} ${useIpeFile} ipe
|
2022-02-21 12:47:59 +01:00
|
|
|
EOF
|
2022-12-08 12:28:33 +01:00
|
|
|
rm -f ${ipeFile}.cut
|
2022-02-21 12:47:59 +01:00
|
|
|
done
|
2022-11-16 16:10:30 +01:00
|
|
|
|
2022-02-21 12:47:59 +01:00
|
|
|
echo ""
|
|
|
|
|
2022-10-14 13:56:31 +02:00
|
|
|
rc=0
|
|
|
|
sqlite3 "${fullDbPath}" < "${tmpSql}" 2>&1 | grep -v "filling the rest with NULL" || rc="${PIPESTATUS[0]}"
|
|
|
|
if [[ $rc -ne 0 ]]; then
|
|
|
|
echo "Error executing sqlite import"
|
|
|
|
exit "$rc"
|
|
|
|
fi
|
2022-02-21 12:47:59 +01:00
|
|
|
|
2022-11-16 16:10:30 +01:00
|
|
|
echo "[+] Create separate table with id immeuble index and its state."
|
|
|
|
cat > "${tmpSql}" <<EOF
|
|
|
|
CREATE TABLE refimm (IdentifiantImmeuble text NOT NULL, EtatImmeuble text NOT NULL);
|
|
|
|
CREATE UNIQUE INDEX idx_IdentifiantImmeuble on refimm (IdentifiantImmeuble);
|
2022-12-08 12:28:33 +01:00
|
|
|
INSERT OR REPLACE INTO refimm SELECT IdentifiantImmeuble,EtatImmeuble FROM ipe;
|
2022-11-16 16:10:30 +01:00
|
|
|
EOF
|
|
|
|
sqlite3 "${fullDbPath}" < "${tmpSql}"
|
|
|
|
|
2022-02-21 12:47:59 +01:00
|
|
|
echo "[+] Ingesting spatial data."
|
|
|
|
|
|
|
|
cat > "${tmpSql}" <<EOF
|
|
|
|
SELECT load_extension("mod_spatialite");
|
|
|
|
SELECT InitSpatialMetaData();
|
|
|
|
|
|
|
|
-- Despite the SELECT, we're actually creating a new ImmeubleGeoPoint
|
|
|
|
-- column here. The spatialite API is a bit weird...
|
|
|
|
SELECT AddGeometryColumn('ipe','ImmeubleGeoPoint',4326,'POINT');
|
|
|
|
|
|
|
|
-- The geodata is expressed in RGF93 (SRID 2154). We need to project
|
|
|
|
-- it to WSG84 to display it on a OSM map.
|
|
|
|
UPDATE ipe SET ImmeubleGeoPoint =
|
|
|
|
Transform(
|
|
|
|
MakePoint(
|
|
|
|
CAST(CoordonneeImmeubleX as DOUBLE),
|
|
|
|
CAST(CoordonneeImmeubleY as DOUBLE),
|
|
|
|
2154)
|
|
|
|
,4326);
|
|
|
|
EOF
|
2022-02-21 19:00:11 +01:00
|
|
|
sqlite3 "${fullDbPath}" < "${tmpSql}"
|
2022-02-21 12:47:59 +01:00
|
|
|
|
2022-02-21 19:00:11 +01:00
|
|
|
echo "[+] Creating Rtree index (spatial index). We're almost done."
|
|
|
|
cat > "${tmpSql}" <<EOF
|
|
|
|
SELECT load_extension("mod_spatialite");
|
|
|
|
SELECT CreateSpatialIndex('ipe','ImmeubleGeoPoint');
|
|
|
|
EOF
|
2022-02-21 12:47:59 +01:00
|
|
|
sqlite3 "${fullDbPath}" < "${tmpSql}"
|
|
|
|
|
|
|
|
echo "[+] SQLite database generated at ${fullDbPath}"
|