From 4a6f063c1f6579be3c4f51d7ad5ad365e630828d Mon Sep 17 00:00:00 2001 From: Johan Le Baut Date: Mon, 11 Apr 2022 09:49:51 +0200 Subject: [PATCH] Add script to fetch latest arcep data --- data-ingest/fetch_latest_arcep.sh | 155 ++++++++++++++++++++++++++++++ data-ingest/ingest_arcep | 57 +++++++++++ webapp/ipe_fetcher/axione.py | 35 +++++-- webapp/main.py | 2 +- webapp/templates/app.js | 3 + 5 files changed, 243 insertions(+), 9 deletions(-) create mode 100755 data-ingest/fetch_latest_arcep.sh create mode 100755 data-ingest/ingest_arcep diff --git a/data-ingest/fetch_latest_arcep.sh b/data-ingest/fetch_latest_arcep.sh new file mode 100755 index 0000000..b963add --- /dev/null +++ b/data-ingest/fetch_latest_arcep.sh @@ -0,0 +1,155 @@ +#!/usr/bin/env bash +set -eau -o pipefail + +# API where to list arcep files +GOUV_API_URL=https://www.data.gouv.fr/api/1/datasets/le-marche-du-haut-et-tres-haut-debit-fixe-deploiements/ +# File to store last versions downloaded +VERSIONS_FILENAME=.arcep_versions +## Content of version file: +# LAST_ARCEP_ZIP=__.zip +# BEFORE_ARCEP_ZIP=__.zip + +# Global vars +g_last_arcep_zip="" +g_before_arcep_zip="" +g_penultimate_arcep_zip="" +g_arcep_to_unzip="" + +# Script usage +usage() { + source + echo + echo "Usage : $0 -d|--dir-out (-r|--remove-penultimate)" + echo + echo " With:" + echo " -d|--dir-out: folder where to store zip files" + echo " (-r|--remove-penultimate): if set, remove 2nd before last version after dl latest file" + echo +} + +# Get already dl data info +source_versions() { + dir_out=$1 + ver_file=${dir_out}/${VERSIONS_FILENAME} + LAST_ARCEP_ZIP="" + BEFORE_ARCEP_ZIP="" + [[ -f ${ver_file} ]] && source ${ver_file} + g_last_arcep_zip=${LAST_ARCEP_ZIP} + g_before_arcep_zip=${BEFORE_ARCEP_ZIP} +} + +# Dl arcep latest data if needed +dl_latest_arcep() { + dir_out=$1 + rc=0 + + echo "Create out dir ${dir_out} if not exist" + mkdir -p ${dir_out} + ver_file=${dir_out}/${VERSIONS_FILENAME} + touch ${ver_file} + + latest_file_url="$(curl -s ${GOUV_API_URL} | jq -r '.resources[] |objects | .url' | grep -i immeubles | head -1)" + file_date=$(echo $latest_file_url | cut -f6 -d '/') + file_name=$(echo $latest_file_url | cut -f7 -d '/') + latest_f=${file_date}__${file_name} + + echo "Found ${latest_f} Check if already exist" + if [[ -n ${g_last_arcep_zip} && "${latest_f}" = "${g_last_arcep_zip}" ]]; then + echo "File ${latest_f} is already the latest ! Do not do anything" + else + echo "File ${latest_f} not there, download it" + wget -O ${dir_out}/${latest_f} ${latest_file_url} || rc=1 + g_penultimate_arcep_zip=${g_before_arcep_zip} + g_before_arcep_zip=${g_last_arcep_zip} + g_last_arcep_zip=${latest_f} + g_arcep_to_unzip=${latest_f} + echo "OK, update versions file" + echo "LAST_ARCEP_ZIP=${g_last_arcep_zip}" > ${ver_file} + echo "BEFORE_ARCEP_ZIP=${g_before_arcep_zip}" >> ${ver_file} + fi + + return ${rc} +} + +# Unzip a dl arcep file +unzip_arcep() { + dir_out=$1 + zip_file=$2 + zip_dir=$(echo ${zip_file} | rev | cut -f2- -d '.' | rev) + mkdir -p "${dir_out}/$zip_dir" + echo "Unzip file ${dir_out}/${zip_file}" + unzip ${dir_out}/${zip_file} -d ${dir_out}/$zip_dir || return 1 + return 0 +} + +# main +main () { + # Init input vars + remove_penultimate=false + dir_out="" + + # Read inputs + [[ $# -eq 0 ]] && usage && return 1 + while [ -n $1 ] ; do + case $1 in + -d|--dir-out) + dir_out=$(realpath $2) + shift + ;; + -r|--remove-penultimate) + remove_penultimate=true + ;; + -h|--help) + usage && exit 0 + ;; + *) + echo "Unknown command: $1" + usage && exit 1 + ;; + esac + [[ $# -le 1 ]] && break + shift + done + + # check inputs + if [[ -z ${dir_out} ]]; then + echo "Error: You need to specify an output dir -d|--dir-out " + usage + return 1 + fi + + rc=0 + # Read existing dl versions + source_versions ${dir_out} || rc=1 + # Download latest zip file if needed + [[ $rc -eq 0 ]] && dl_latest_arcep ${dir_out} || rc=1 + # If download succeeded and there is a file to unzip + if [[ $rc -eq 0 && -n $g_arcep_to_unzip ]]; then + # unzip file + unzip_arcep ${dir_out} ${g_last_arcep_zip} || rc=1 + + # Unzip succeeded and need to remove penultimate arcep data (if exists) + if [[ $rc -eq 0 \ + && $remove_penultimate \ + && -n $g_penultimate_arcep_zip \ + && -f ${dir_out}/$g_penultimate_arcep_zip ]]; then + + echo "Delete penultimate zip ${dir_out}/$g_penultimate_arcep_zip" + rm -f ${dir_out}/$g_penultimate_arcep_zip + zip_dir=$(echo ${g_penultimate_arcep_zip} | rev | cut -f2- -d '.' | rev) + if [[ -d ${dir_out}/${zip_dir} ]]; then + echo "remove dir ${dir_out}/${zip_dir}" + rm -rf ${dir_out}/${zip_dir} + fi + elif [[ $rc -ne 0 ]]; then + echo "Failed to unzip ${g_last_arcep_zip} !" + fi + fi + return $rc +} + + +### Call main +main "$@" || exit 1 + +exit 0 diff --git a/data-ingest/ingest_arcep b/data-ingest/ingest_arcep new file mode 100755 index 0000000..174706f --- /dev/null +++ b/data-ingest/ingest_arcep @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +set -eau -o pipefail + +ARCEP_WWW="https://www.data.gouv.fr/fr/datasets/le-marche-du-haut-et-tres-haut-debit-fixe-deploiements/" + +if [ "$#" -ne 2 ]; then + echo "Usage: ingest path-to-arcep-ipe-csv-file path-to-generated-db" + echo "ARCEP file can be downloaded here: ${ARCEP_WWW}" + echo "" + exit 1 +fi +fullIpePath=$(realpath "${1}") +fullDbPath=$(realpath "${2}") + +tmpSql=$(mktemp) +clean_tmp () { + rm "${tmpSql}" +} +trap clean_tmp EXIT + +echo "[+] Ingesting IPE data from ${fullIpePath}" +echo "" +cat > "${tmpSql}" < "${tmpSql}" < "${tmpSql}" < { + console.log(building.numVoieImm) + console.log("lat", building.y) + console.log("lng", building.x) const latlng = new L.latLng(building.y, building.x); const addrImm = `${building.numVoieImm} ${building.typeVoieImm} ${building.nomVoieImm}` const marker = new L.marker(latlng)