From 275238fa3980b141cd6d8cbdca8439f4bfee0c03 Mon Sep 17 00:00:00 2001 From: Johan Le Baut Date: Fri, 9 Dec 2022 15:34:28 +0100 Subject: [PATCH] script to fetch arcep data --- data-ingest/fetch_latest_arcep.sh | 48 +++++++++++++++++-------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/data-ingest/fetch_latest_arcep.sh b/data-ingest/fetch_latest_arcep.sh index 266efa6..6426ec6 100755 --- a/data-ingest/fetch_latest_arcep.sh +++ b/data-ingest/fetch_latest_arcep.sh @@ -17,13 +17,13 @@ g_arcep_to_unzip="" # Script usage usage() { - source echo echo "Usage : $0 -d|--dir-out (-r|--remove-penultimate)" echo echo " With:" echo " -d|--dir-out: folder where to store zip files" echo " (-r|--remove-penultimate): if set, remove 2nd before last version after dl latest file" + echo " (-f|--force-dl): if set, will force re-download data and process it" echo } @@ -33,8 +33,7 @@ source_versions() { ver_file=${dir_out}/${VERSIONS_FILENAME} LAST_ARCEP_ZIP="" BEFORE_ARCEP_ZIP="" - # shellcheck source=/dev/null - [[ -f ${ver_file} ]] && source "${ver_file}" + [[ -f ${ver_file} ]] && source ${ver_file} g_last_arcep_zip=${LAST_ARCEP_ZIP} g_before_arcep_zip=${BEFORE_ARCEP_ZIP} } @@ -42,31 +41,32 @@ source_versions() { # Dl arcep latest data if needed dl_latest_arcep() { dir_out=$1 + force_dl=$2 rc=0 echo "Create out dir ${dir_out} if not exist" - mkdir -p "${dir_out}" + mkdir -p ${dir_out} ver_file=${dir_out}/${VERSIONS_FILENAME} - touch "${ver_file}" + touch ${ver_file} latest_file_url="$(curl -s ${GOUV_API_URL} | jq -r '.resources[] |objects | .url' | grep -i immeubles | head -1)" - file_date=$(echo "$latest_file_url" | cut -f6 -d '/') - file_name=$(echo "$latest_file_url" | cut -f7 -d '/') + file_date=$(echo $latest_file_url | cut -f6 -d '/') + file_name=$(echo $latest_file_url | cut -f7 -d '/') latest_f=${file_date}__${file_name} echo "Found ${latest_f} Check if already exist" - if [[ -n ${g_last_arcep_zip} && "${latest_f}" = "${g_last_arcep_zip}" ]]; then + if [[ -n ${g_last_arcep_zip} && "${latest_f}" = "${g_last_arcep_zip}" && $force_dl != "true" ]]; then echo "File ${latest_f} is already the latest ! Do not do anything" else echo "File ${latest_f} not there, download it" - wget -O "${dir_out}"/"${latest_f}" "${latest_file_url}" || rc=1 + wget -O ${dir_out}/${latest_f} ${latest_file_url} || rc=1 g_penultimate_arcep_zip=${g_before_arcep_zip} g_before_arcep_zip=${g_last_arcep_zip} g_last_arcep_zip=${latest_f} g_arcep_to_unzip=${latest_f} echo "OK, update versions file" - echo "LAST_ARCEP_ZIP=${g_last_arcep_zip}" > "${ver_file}" - echo "BEFORE_ARCEP_ZIP=${g_before_arcep_zip}" >> "${ver_file}" + echo "LAST_ARCEP_ZIP=${g_last_arcep_zip}" > ${ver_file} + echo "BEFORE_ARCEP_ZIP=${g_before_arcep_zip}" >> ${ver_file} fi return ${rc} @@ -76,10 +76,10 @@ dl_latest_arcep() { unzip_arcep() { dir_out=$1 zip_file=$2 - zip_dir=$(echo "${zip_file}" | rev | cut -f2- -d '.' | rev) + zip_dir=$(echo ${zip_file} | rev | cut -f2- -d '.' | rev) mkdir -p "${dir_out}/$zip_dir" echo "Unzip file ${dir_out}/${zip_file}" - unzip "${dir_out}"/"${zip_file}" -d "${dir_out}"/"$zip_dir" || return 1 + unzip ${dir_out}/${zip_file} -d ${dir_out}/$zip_dir || return 1 return 0 } @@ -87,19 +87,23 @@ unzip_arcep() { main () { # Init input vars remove_penultimate=false + force_dl=false dir_out="" # Read inputs [[ $# -eq 0 ]] && usage && return 1 - while [[ -n "$1" ]] ; do + while [ -n $1 ] ; do case $1 in -d|--dir-out) - dir_out=$(realpath "$2") + dir_out=$(realpath $2) shift ;; -r|--remove-penultimate) remove_penultimate=true ;; + -f|--force-dl) + force_dl=true + ;; -h|--help) usage && exit 0 ;; @@ -121,14 +125,14 @@ main () { rc=0 # Read existing dl versions - source_versions "${dir_out}" || rc=1 + source_versions ${dir_out} || rc=1 # Download latest zip file if needed - [[ $rc -eq 0 ]] && dl_latest_arcep "${dir_out}" || rc=1 + [[ $rc -eq 0 ]] && dl_latest_arcep ${dir_out} $force_dl || rc=1 # If download succeeded and there is a file to unzip if [[ $rc -eq 0 && -n $g_arcep_to_unzip ]]; then # unzip file - unzip_arcep "${dir_out}" "${g_last_arcep_zip}" || rc=1 - + unzip_arcep ${dir_out} ${g_last_arcep_zip} || rc=1 + # Unzip succeeded and need to remove penultimate arcep data (if exists) if [[ $rc -eq 0 \ && $remove_penultimate \ @@ -136,11 +140,11 @@ main () { && -f ${dir_out}/$g_penultimate_arcep_zip ]]; then echo "Delete penultimate zip ${dir_out}/$g_penultimate_arcep_zip" - rm -f "${dir_out}"/"$g_penultimate_arcep_zip" - zip_dir=$(echo "${g_penultimate_arcep_zip}" | rev | cut -f2- -d '.' | rev) + rm -f ${dir_out}/$g_penultimate_arcep_zip + zip_dir=$(echo ${g_penultimate_arcep_zip} | rev | cut -f2- -d '.' | rev) if [[ -d ${dir_out}/${zip_dir} ]]; then echo "remove dir ${dir_out}/${zip_dir}" - rm -rf "${dir_out:?}"/"${zip_dir}" + rm -rf ${dir_out}/${zip_dir} fi elif [[ $rc -ne 0 ]]; then echo "Failed to unzip ${g_last_arcep_zip} !"