Axione-IPE-Viewer/data-ingest/fetch_latest_arcep.sh
2022-12-09 15:34:28 +01:00

160 lines
4.3 KiB
Bash
Executable file

#!/usr/bin/env bash
set -eau -o pipefail
# API where to list arcep files
GOUV_API_URL=https://www.data.gouv.fr/api/1/datasets/le-marche-du-haut-et-tres-haut-debit-fixe-deploiements/
# File to store last versions downloaded
VERSIONS_FILENAME=.arcep_versions
## Content of version file:
# LAST_ARCEP_ZIP=<file date>__<file name>.zip
# BEFORE_ARCEP_ZIP=<file date>__<file name>.zip
# Global vars
g_last_arcep_zip=""
g_before_arcep_zip=""
g_penultimate_arcep_zip=""
g_arcep_to_unzip=""
# Script usage
usage() {
echo
echo "Usage : $0 -d|--dir-out <dir to put downloads in> (-r|--remove-penultimate)"
echo
echo " With:"
echo " -d|--dir-out: folder where to store zip files"
echo " (-r|--remove-penultimate): if set, remove 2nd before last version after dl latest file"
echo " (-f|--force-dl): if set, will force re-download data and process it"
echo
}
# Get already dl data info
source_versions() {
dir_out=$1
ver_file=${dir_out}/${VERSIONS_FILENAME}
LAST_ARCEP_ZIP=""
BEFORE_ARCEP_ZIP=""
[[ -f ${ver_file} ]] && source ${ver_file}
g_last_arcep_zip=${LAST_ARCEP_ZIP}
g_before_arcep_zip=${BEFORE_ARCEP_ZIP}
}
# Dl arcep latest data if needed
dl_latest_arcep() {
dir_out=$1
force_dl=$2
rc=0
echo "Create out dir ${dir_out} if not exist"
mkdir -p ${dir_out}
ver_file=${dir_out}/${VERSIONS_FILENAME}
touch ${ver_file}
latest_file_url="$(curl -s ${GOUV_API_URL} | jq -r '.resources[] |objects | .url' | grep -i immeubles | head -1)"
file_date=$(echo $latest_file_url | cut -f6 -d '/')
file_name=$(echo $latest_file_url | cut -f7 -d '/')
latest_f=${file_date}__${file_name}
echo "Found ${latest_f} Check if already exist"
if [[ -n ${g_last_arcep_zip} && "${latest_f}" = "${g_last_arcep_zip}" && $force_dl != "true" ]]; then
echo "File ${latest_f} is already the latest ! Do not do anything"
else
echo "File ${latest_f} not there, download it"
wget -O ${dir_out}/${latest_f} ${latest_file_url} || rc=1
g_penultimate_arcep_zip=${g_before_arcep_zip}
g_before_arcep_zip=${g_last_arcep_zip}
g_last_arcep_zip=${latest_f}
g_arcep_to_unzip=${latest_f}
echo "OK, update versions file"
echo "LAST_ARCEP_ZIP=${g_last_arcep_zip}" > ${ver_file}
echo "BEFORE_ARCEP_ZIP=${g_before_arcep_zip}" >> ${ver_file}
fi
return ${rc}
}
# Unzip a dl arcep file
unzip_arcep() {
dir_out=$1
zip_file=$2
zip_dir=$(echo ${zip_file} | rev | cut -f2- -d '.' | rev)
mkdir -p "${dir_out}/$zip_dir"
echo "Unzip file ${dir_out}/${zip_file}"
unzip ${dir_out}/${zip_file} -d ${dir_out}/$zip_dir || return 1
return 0
}
# main
main () {
# Init input vars
remove_penultimate=false
force_dl=false
dir_out=""
# Read inputs
[[ $# -eq 0 ]] && usage && return 1
while [ -n $1 ] ; do
case $1 in
-d|--dir-out)
dir_out=$(realpath $2)
shift
;;
-r|--remove-penultimate)
remove_penultimate=true
;;
-f|--force-dl)
force_dl=true
;;
-h|--help)
usage && exit 0
;;
*)
echo "Unknown command: $1"
usage && exit 1
;;
esac
[[ $# -le 1 ]] && break
shift
done
# check inputs
if [[ -z ${dir_out} ]]; then
echo "Error: You need to specify an output dir -d|--dir-out <dir path>"
usage
return 1
fi
rc=0
# Read existing dl versions
source_versions ${dir_out} || rc=1
# Download latest zip file if needed
[[ $rc -eq 0 ]] && dl_latest_arcep ${dir_out} $force_dl || rc=1
# If download succeeded and there is a file to unzip
if [[ $rc -eq 0 && -n $g_arcep_to_unzip ]]; then
# unzip file
unzip_arcep ${dir_out} ${g_last_arcep_zip} || rc=1
# Unzip succeeded and need to remove penultimate arcep data (if exists)
if [[ $rc -eq 0 \
&& $remove_penultimate \
&& -n $g_penultimate_arcep_zip \
&& -f ${dir_out}/$g_penultimate_arcep_zip ]]; then
echo "Delete penultimate zip ${dir_out}/$g_penultimate_arcep_zip"
rm -f ${dir_out}/$g_penultimate_arcep_zip
zip_dir=$(echo ${g_penultimate_arcep_zip} | rev | cut -f2- -d '.' | rev)
if [[ -d ${dir_out}/${zip_dir} ]]; then
echo "remove dir ${dir_out}/${zip_dir}"
rm -rf ${dir_out}/${zip_dir}
fi
elif [[ $rc -ne 0 ]]; then
echo "Failed to unzip ${g_last_arcep_zip} !"
fi
fi
return $rc
}
### Call main
main "$@" || exit 1
exit 0