Axione-IPE-Viewer/data-ingest/fetch_latest_arcep.sh

160 lines
4.3 KiB
Bash
Raw Normal View History

2022-04-11 09:49:51 +02:00
#!/usr/bin/env bash
set -eau -o pipefail
# API where to list arcep files
GOUV_API_URL=https://www.data.gouv.fr/api/1/datasets/le-marche-du-haut-et-tres-haut-debit-fixe-deploiements/
# File to store last versions downloaded
VERSIONS_FILENAME=.arcep_versions
2023-02-28 22:31:35 +01:00
## Content of version file:
2022-04-11 09:49:51 +02:00
# LAST_ARCEP_ZIP=<file date>__<file name>.zip
# BEFORE_ARCEP_ZIP=<file date>__<file name>.zip
# Global vars
g_last_arcep_zip=""
g_before_arcep_zip=""
g_penultimate_arcep_zip=""
g_arcep_to_unzip=""
# Script usage
usage() {
echo
echo "Usage : $0 -d|--dir-out <dir to put downloads in> (-r|--remove-penultimate)"
echo
echo " With:"
echo " -d|--dir-out: folder where to store zip files"
echo " (-r|--remove-penultimate): if set, remove 2nd before last version after dl latest file"
2022-12-09 15:34:28 +01:00
echo " (-f|--force-dl): if set, will force re-download data and process it"
2022-04-11 09:49:51 +02:00
echo
}
# Get already dl data info
source_versions() {
dir_out=$1
ver_file=${dir_out}/${VERSIONS_FILENAME}
LAST_ARCEP_ZIP=""
BEFORE_ARCEP_ZIP=""
2023-02-28 22:31:35 +01:00
[[ -f ${ver_file} ]] && source "${ver_file}"
2022-04-11 09:49:51 +02:00
g_last_arcep_zip=${LAST_ARCEP_ZIP}
g_before_arcep_zip=${BEFORE_ARCEP_ZIP}
}
# Dl arcep latest data if needed
dl_latest_arcep() {
dir_out=$1
2022-12-09 15:34:28 +01:00
force_dl=$2
2022-04-11 09:49:51 +02:00
rc=0
echo "Create out dir ${dir_out} if not exist"
2023-02-28 22:31:35 +01:00
mkdir -p "${dir_out}"
2022-04-11 09:49:51 +02:00
ver_file=${dir_out}/${VERSIONS_FILENAME}
2023-02-28 22:31:35 +01:00
touch "${ver_file}"
2022-04-11 09:49:51 +02:00
2023-02-28 22:31:35 +01:00
latest_file_url="$(curl -s ${GOUV_API_URL} | jq -r '.resources[] |objects | .url' | grep -i immeubles | head -1)"
file_date=$(echo "$latest_file_url" | cut -f6 -d '/')
file_name=$(echo "$latest_file_url" | cut -f7 -d '/')
2022-04-11 09:49:51 +02:00
latest_f=${file_date}__${file_name}
echo "Found ${latest_f} Check if already exist"
2022-12-09 15:34:28 +01:00
if [[ -n ${g_last_arcep_zip} && "${latest_f}" = "${g_last_arcep_zip}" && $force_dl != "true" ]]; then
2022-04-11 09:49:51 +02:00
echo "File ${latest_f} is already the latest ! Do not do anything"
else
echo "File ${latest_f} not there, download it"
2023-02-28 22:31:35 +01:00
wget -O "${dir_out}"/"${latest_f}" "${latest_file_url}" || rc=1
2022-04-11 09:49:51 +02:00
g_penultimate_arcep_zip=${g_before_arcep_zip}
g_before_arcep_zip=${g_last_arcep_zip}
g_last_arcep_zip=${latest_f}
g_arcep_to_unzip=${latest_f}
echo "OK, update versions file"
2023-02-28 22:31:35 +01:00
echo "LAST_ARCEP_ZIP=${g_last_arcep_zip}" >"${ver_file}"
echo "BEFORE_ARCEP_ZIP=${g_before_arcep_zip}" >>"${ver_file}"
2022-04-11 09:49:51 +02:00
fi
return ${rc}
}
# Unzip a dl arcep file
unzip_arcep() {
dir_out=$1
zip_file=$2
2023-02-28 22:31:35 +01:00
zip_dir=$(echo "${zip_file}" | rev | cut -f2- -d '.' | rev)
2022-04-11 09:49:51 +02:00
mkdir -p "${dir_out}/$zip_dir"
echo "Unzip file ${dir_out}/${zip_file}"
2023-02-28 22:31:35 +01:00
unzip "${dir_out}"/"${zip_file}" -d "${dir_out}"/"$zip_dir" || return 1
2022-04-11 09:49:51 +02:00
return 0
}
# main
2023-02-28 22:31:35 +01:00
main() {
2022-04-11 09:49:51 +02:00
# Init input vars
remove_penultimate=false
2022-12-09 15:34:28 +01:00
force_dl=false
2022-04-11 09:49:51 +02:00
dir_out=""
# Read inputs
[[ $# -eq 0 ]] && usage && return 1
2023-02-28 22:31:35 +01:00
while [ -n "$1" ]; do
2022-04-11 09:49:51 +02:00
case $1 in
2023-02-28 22:31:35 +01:00
-d | --dir-out)
dir_out=$(realpath "$2")
shift
;;
-r | --remove-penultimate)
remove_penultimate=true
;;
-f | --force-dl)
force_dl=true
;;
-h | --help)
usage && exit 0
;;
*)
echo "Unknown command: $1"
usage && exit 1
;;
2022-04-11 09:49:51 +02:00
esac
[[ $# -le 1 ]] && break
shift
done
# check inputs
if [[ -z ${dir_out} ]]; then
echo "Error: You need to specify an output dir -d|--dir-out <dir path>"
usage
return 1
fi
rc=0
# Read existing dl versions
2023-02-28 22:31:35 +01:00
source_versions "${dir_out}" || rc=1
2022-04-11 09:49:51 +02:00
# Download latest zip file if needed
2023-02-28 22:31:35 +01:00
[[ $rc -eq 0 ]] && dl_latest_arcep "${dir_out}" $force_dl || rc=1
2022-04-11 09:49:51 +02:00
# If download succeeded and there is a file to unzip
if [[ $rc -eq 0 && -n $g_arcep_to_unzip ]]; then
# unzip file
2023-02-28 22:31:35 +01:00
unzip_arcep "${dir_out}" "${g_last_arcep_zip}" || rc=1
2022-12-09 15:34:28 +01:00
2022-04-11 09:49:51 +02:00
# Unzip succeeded and need to remove penultimate arcep data (if exists)
2023-02-28 22:31:35 +01:00
if [[ $rc -eq 0 && \
$remove_penultimate && -n \
$g_penultimate_arcep_zip && -f \
${dir_out}/$g_penultimate_arcep_zip ]]; then
2022-04-11 09:49:51 +02:00
echo "Delete penultimate zip ${dir_out}/$g_penultimate_arcep_zip"
2023-02-28 22:31:35 +01:00
rm -f "${dir_out}"/"$g_penultimate_arcep_zip"
zip_dir=$(echo "${g_penultimate_arcep_zip}" | rev | cut -f2- -d '.' | rev)
2022-04-11 09:49:51 +02:00
if [[ -d ${dir_out}/${zip_dir} ]]; then
echo "remove dir ${dir_out}/${zip_dir}"
2023-02-28 22:31:35 +01:00
rm -rf "${dir_out}"/"${zip_dir}"
2022-04-11 09:49:51 +02:00
fi
elif [[ $rc -ne 0 ]]; then
echo "Failed to unzip ${g_last_arcep_zip} !"
fi
fi
return $rc
}
### Call main
main "$@" || exit 1
exit 0