From 4e6e669002ecf21e6347dc89e924193cc1258cfd Mon Sep 17 00:00:00 2001 From: maverick-7487 <93658675+maverick-7487@users.noreply.github.com> Date: Wed, 22 Apr 2026 18:02:34 +0200 Subject: [PATCH 1/5] argo downloader scripts - to test --- src/bitsea/Float/argo_downloader.py | 122 ++++++++++++++++++++++++++ src/bitsea/Float/argo_downloader_2.py | 121 +++++++++++++++++++++++++ 2 files changed, 243 insertions(+) create mode 100644 src/bitsea/Float/argo_downloader.py create mode 100644 src/bitsea/Float/argo_downloader_2.py diff --git a/src/bitsea/Float/argo_downloader.py b/src/bitsea/Float/argo_downloader.py new file mode 100644 index 00000000..e89d8ee6 --- /dev/null +++ b/src/bitsea/Float/argo_downloader.py @@ -0,0 +1,122 @@ +import argparse +import os +import subprocess as sp +from pathlib import Path +def argument(): + parser = argparse.ArgumentParser(description = ''' + downloads netcdf files of argo floats in the mediterranean sea, + using one or two lists of floats + ''', formatter_class=argparse.RawTextHelpFormatter) + + + parser.add_argument( '--getcommand',"-g", + type = str, + required = True, + help = 'path of the ncftpget command', + example = '/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ncftpget') + # example $OPA_BINDIR/ncftpget + # OPA_BINDIR=/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ + parser.add_argument( '--remotedir',"-r", + type = str, + required = True, + help = 'remote directory of the argo indexed files', + example = '/ifremer/argo/dac/') + # example $REMOTEDIR=/ifremer/argo/dac/ + parser.add_argument( '--coriolisdir',"-c", + type = str, + required = True, + help = 'local directory of the argo indexed files', + example = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/') + # example $ONLINE_REPO/CORIOLIS/ + # ONLINE_REPO=/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/ + parser.add_argument( '--update_file',"-u", + type = str, + required = True, + help = 'path to the update file containing the list of floats to download') + # example $UPDATE_FILE=$ONLINE_REPO/CORIOLIS/download/DIFF_floats.txt + parser.add_argument( '--indexer_file',"-i", + type = str, + required = False, + help = 'path to the indexer file containing the list of floats that have already been downloaded') + # example $INDEXER_CORIOLIS=$ONLINE_REPO/CORIOLIS/download/Med_floats.txt + + + return parser.parse_args() + + + +args = argument() + +ncftpget_command = args.getcommand +remote_dir = Path(args.remotedir) +local_coriolis_dir = Path(args.coriolisdir) +update_file = args.update_file +if args.indexer_file: + indexer_file = args.indexer_file +else: + indexer_file = None + +local_tmp_download_path = local_coriolis_dir / "download" / "tmp" + +def parse_float_line(line): + split_line = line.split(",") + + if len(split_line) > 0: + float_path_remote = Path(split_line[0]) + float_path_remote_split = float_path_remote.parts + float_dir = float_path_remote_split[1] #wmo number + float_file_name = float_path_remote_split[-1] + float_path_local = local_coriolis_dir / float_dir / float_file_name + return float_path_remote, float_path_local, float_file_name + else: + return None, None, None + +def download_move_float(float_path_remote, float_path_local, float_file_name): + command = f"{ncftpget_command} -u anonymous ftp.ifremer.fr {local_tmp_download_path} {str(remote_dir / float_path_remote)}" + print(f"Executing command: {command}") + res=sp.run(command, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT) + res_str = res.stdout.decode("utf-8") + print(res_str) + if res.returncode == 0: + os.rename(local_tmp_download_path + "/" + float_file_name, float_path_local) + print(f"Successfully downloaded and moved {float_file_name} to {float_path_local}") + else: + print(f"Failed to download {float_file_name}. Command output: {res_str}") + return res.returncode == 0 + + +# download from indexer file list (if provided) +if indexer_file is not None: + with open(indexer_file, "r") as f: + already_downloaded_floats = set(line.strip() for line in f) + for line in already_downloaded_floats: + float_path_remote, float_path_local, float_file_name = parse_float_line(line) + if float_path_local is not None and \ + float_path_remote is not None and \ + float_file_name is not None and \ + not os.path.exists(float_path_local): + download_move_float(float_path_remote, float_path_local, float_file_name) + +# download from DIFF_floats.txt list +with open(update_file, "r") as f: + floats_to_download = set(line.strip() for line in f) +for line in floats_to_download: + float_path_remote, float_path_local, float_file_name = parse_float_line(line) + if float_path_local is not None and \ + float_path_remote is not None and \ + float_file_name is not None: + download_move_float(float_path_remote, float_path_local, float_file_name) + +# clean up local Coriolis directory by removing files +# that begin with "SR" when "SD" files with the same +# WMO number and rise up number exist. +if os.path.isdir(local_coriolis_dir): + for wmo_dir in os.listdir(local_coriolis_dir): + local_float_dir = local_coriolis_dir / wmo_dir + if (not local_float_dir.is_dir()) or (len(wmo_dir) != 7) or (not wmo_dir.isdigit()): + continue + for filename in os.listdir(local_float_dir): + if filename.startswith('SD') and filename.endswith('.nc'): + sr_path = local_float_dir / ('SR' + filename[2:]) + if sr_path.exists(): + os.remove(sr_path) \ No newline at end of file diff --git a/src/bitsea/Float/argo_downloader_2.py b/src/bitsea/Float/argo_downloader_2.py new file mode 100644 index 00000000..8f4d51c0 --- /dev/null +++ b/src/bitsea/Float/argo_downloader_2.py @@ -0,0 +1,121 @@ +import argparse +import os +from ftplib import FTP +from pathlib import Path, PurePosixPath +def argument(): + parser = argparse.ArgumentParser(description = ''' + downloads netcdf files of argo floats in the mediterranean sea, + using one or two lists of floats + ''', formatter_class=argparse.RawTextHelpFormatter) + + + parser.add_argument( '--remotedir',"-r", + type = str, + required = True, + help = 'remote directory of the argo indexed files', + example = '/ifremer/argo/dac/') + # example $REMOTEDIR=/ifremer/argo/dac/ + parser.add_argument( '--coriolisdir',"-c", + type = str, + required = True, + help = 'local directory of the argo indexed files', + example = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/') + # example $ONLINE_REPO/CORIOLIS/ + # ONLINE_REPO=/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/ + parser.add_argument( '--update_file',"-u", + type = str, + required = True, + help = 'path to the update file containing the list of floats to download') + # example $UPDATE_FILE=$ONLINE_REPO/CORIOLIS/download/DIFF_floats.txt + parser.add_argument( '--indexer_file',"-i", + type = str, + required = False, + help = 'path to the indexer file containing the list of floats that have already been downloaded') + # example $INDEXER_CORIOLIS=$ONLINE_REPO/CORIOLIS/download/Med_floats.txt + + + return parser.parse_args() + +args = argument() + +remote_dir = PurePosixPath(args.remotedir) +local_coriolis_dir = Path(args.coriolisdir) +update_file = args.update_file +if args.indexer_file: + indexer_file = args.indexer_file +else: + indexer_file = None + +local_tmp_download_path = local_coriolis_dir / "download" / "tmp" + +def parse_float_line(line): + split_line = line.split(",") + + if len(split_line) > 0: + float_path_remote = PurePosixPath(split_line[0]) + float_path_remote_split = float_path_remote.parts + float_dir = float_path_remote_split[1] #wmo number + float_file_name = float_path_remote_split[-1] + float_path_local = local_coriolis_dir / float_dir / float_file_name + return float_path_remote, float_path_local, float_file_name + else: + return None, None, None + +def download_move_float(float_path_remote, float_path_local, float_file_name): + local_tmp_download_path.mkdir(parents=True, exist_ok=True) + float_path_local.parent.mkdir(parents=True, exist_ok=True) + tmp_file_path = local_tmp_download_path / float_file_name + remote_file_path = str(remote_dir / float_path_remote) + + print(f"Downloading {remote_file_path} from ftp.ifremer.fr") + + try: + with FTP("ftp.ifremer.fr") as connection: + connection.login() + with open(tmp_file_path, "wb") as tmp_file: + connection.retrbinary(f"RETR {remote_file_path}", tmp_file.write) + except Exception as exc: + if tmp_file_path.exists(): + os.remove(tmp_file_path) + print(f"Failed to download {float_file_name}: {exc}") + return False + + os.rename(tmp_file_path, float_path_local) + print(f"Successfully downloaded and moved {float_file_name} to {float_path_local}") + return True + +# download from indexer file list (if provided) +if indexer_file is not None: + with open(indexer_file, "r") as f: + already_downloaded_floats = set(line.strip() for line in f) + for line in already_downloaded_floats: + float_path_remote, float_path_local, float_file_name = parse_float_line(line) + if float_path_local is not None and \ + float_path_remote is not None and \ + float_file_name is not None and \ + not os.path.exists(float_path_local): + download_move_float(float_path_remote, float_path_local, float_file_name) + +# download from DIFF_floats.txt list +with open(update_file, "r") as f: + floats_to_download = set(line.strip() for line in f) +for line in floats_to_download: + float_path_remote, float_path_local, float_file_name = parse_float_line(line) + if float_path_local is not None and \ + float_path_remote is not None and \ + float_file_name is not None: + download_move_float(float_path_remote, float_path_local, float_file_name) + +# clean up local Coriolis directory by removing files +# that begin with "SR" when "SD" files with the same +# WMO number and rise up number exist. +if os.path.isdir(local_coriolis_dir): + for wmo_dir in os.listdir(local_coriolis_dir): + local_float_dir = local_coriolis_dir / wmo_dir + if (not local_float_dir.is_dir()) or (len(wmo_dir) != 7) or (not wmo_dir.isdigit()): + continue + for filename in os.listdir(local_float_dir): + if filename.startswith('SD') and filename.endswith('.nc'): + sr_path = local_float_dir / ('SR' + filename[2:]) + if sr_path.exists(): + os.remove(sr_path) \ No newline at end of file From 7fc4cc774ed244c90baf7dc0c563f93de8e2836a Mon Sep 17 00:00:00 2001 From: maverick-7487 <93658675+maverick-7487@users.noreply.github.com> Date: Wed, 22 Apr 2026 20:54:13 +0200 Subject: [PATCH 2/5] default values added default values to script arguments --- src/bitsea/Float/argo_downloader.py | 18 +++++++----------- src/bitsea/Float/argo_downloader_2.py | 15 ++++++--------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/src/bitsea/Float/argo_downloader.py b/src/bitsea/Float/argo_downloader.py index e89d8ee6..07aadf19 100644 --- a/src/bitsea/Float/argo_downloader.py +++ b/src/bitsea/Float/argo_downloader.py @@ -13,33 +13,29 @@ def argument(): type = str, required = True, help = 'path of the ncftpget command', - example = '/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ncftpget') - # example $OPA_BINDIR/ncftpget + default = '/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ncftpget') # OPA_BINDIR=/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ parser.add_argument( '--remotedir',"-r", type = str, required = True, help = 'remote directory of the argo indexed files', - example = '/ifremer/argo/dac/') - # example $REMOTEDIR=/ifremer/argo/dac/ + default = '/ifremer/argo/dac/') parser.add_argument( '--coriolisdir',"-c", type = str, required = True, help = 'local directory of the argo indexed files', - example = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/') - # example $ONLINE_REPO/CORIOLIS/ + default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/') # ONLINE_REPO=/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/ parser.add_argument( '--update_file',"-u", type = str, required = True, - help = 'path to the update file containing the list of floats to download') - # example $UPDATE_FILE=$ONLINE_REPO/CORIOLIS/download/DIFF_floats.txt + help = 'path to the update file containing the list of floats to download', + default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/download/DIFF_floats.txt') parser.add_argument( '--indexer_file',"-i", type = str, required = False, - help = 'path to the indexer file containing the list of floats that have already been downloaded') - # example $INDEXER_CORIOLIS=$ONLINE_REPO/CORIOLIS/download/Med_floats.txt - + help = 'path to the indexer file containing the list of floats that have already been downloaded', + default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/download/Med_floats.txt') return parser.parse_args() diff --git a/src/bitsea/Float/argo_downloader_2.py b/src/bitsea/Float/argo_downloader_2.py index 8f4d51c0..13144f02 100644 --- a/src/bitsea/Float/argo_downloader_2.py +++ b/src/bitsea/Float/argo_downloader_2.py @@ -13,26 +13,23 @@ def argument(): type = str, required = True, help = 'remote directory of the argo indexed files', - example = '/ifremer/argo/dac/') - # example $REMOTEDIR=/ifremer/argo/dac/ + default = '/ifremer/argo/dac/') parser.add_argument( '--coriolisdir',"-c", type = str, required = True, help = 'local directory of the argo indexed files', - example = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/') - # example $ONLINE_REPO/CORIOLIS/ + default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/') # ONLINE_REPO=/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/ parser.add_argument( '--update_file',"-u", type = str, required = True, - help = 'path to the update file containing the list of floats to download') - # example $UPDATE_FILE=$ONLINE_REPO/CORIOLIS/download/DIFF_floats.txt + help = 'path to the update file containing the list of floats to download', + default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/download/DIFF_floats.txt') parser.add_argument( '--indexer_file',"-i", type = str, required = False, - help = 'path to the indexer file containing the list of floats that have already been downloaded') - # example $INDEXER_CORIOLIS=$ONLINE_REPO/CORIOLIS/download/Med_floats.txt - + help = 'path to the indexer file containing the list of floats that have already been downloaded', + default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/download/Med_floats.txt') return parser.parse_args() From 79ef25d831c3ad90cbc985f4f5101424aae6b6ec Mon Sep 17 00:00:00 2001 From: Giorgio Bolzon Date: Tue, 28 Apr 2026 16:40:06 +0200 Subject: [PATCH 3/5] adding launcher.sh and docs in argo_downloader_2.py Co-authored-by: Copilot --- src/bitsea/Float/argo_downloader_2.py | 12 +++++++----- src/bitsea/Float/launcher.sh | 13 +++++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) create mode 100755 src/bitsea/Float/launcher.sh diff --git a/src/bitsea/Float/argo_downloader_2.py b/src/bitsea/Float/argo_downloader_2.py index 13144f02..bb91de73 100644 --- a/src/bitsea/Float/argo_downloader_2.py +++ b/src/bitsea/Float/argo_downloader_2.py @@ -4,8 +4,11 @@ from pathlib import Path, PurePosixPath def argument(): parser = argparse.ArgumentParser(description = ''' - downloads netcdf files of argo floats in the mediterranean sea, - using one or two lists of floats + Downloads: + - netcdf files of argo floats in Med_floats.txt if not already present + - netcdf files of update file, in force mode + + The script also removes files that begin with "SR" when "SD" files with the same WMO number and rise up number exist. ''', formatter_class=argparse.RawTextHelpFormatter) @@ -19,7 +22,6 @@ def argument(): required = True, help = 'local directory of the argo indexed files', default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/') - # ONLINE_REPO=/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/ parser.add_argument( '--update_file',"-u", type = str, required = True, @@ -51,9 +53,9 @@ def parse_float_line(line): if len(split_line) > 0: float_path_remote = PurePosixPath(split_line[0]) float_path_remote_split = float_path_remote.parts - float_dir = float_path_remote_split[1] #wmo number + wmo = float_path_remote_split[1] float_file_name = float_path_remote_split[-1] - float_path_local = local_coriolis_dir / float_dir / float_file_name + float_path_local = local_coriolis_dir / wmo / float_file_name return float_path_remote, float_path_local, float_file_name else: return None, None, None diff --git a/src/bitsea/Float/launcher.sh b/src/bitsea/Float/launcher.sh new file mode 100755 index 00000000..74b1fde1 --- /dev/null +++ b/src/bitsea/Float/launcher.sh @@ -0,0 +1,13 @@ +#! /bin/bash + + +. ../Sat/profile.inc + +export ONLINE_REPO=/g100_scratch/usera07ogs/a07ogs00/V12C/ONLINE +CORIOLIS_DIR=$ONLINE_REPO/CORIOLIS +REMOTEDIR=/ifremer/argo/dac/ +UPDATE_FILE=/g100_work/OGS_prod2528/OPA/V12C-prod/log/daily/DIFF_floats.20260424-20:53:57.txt +INDEXER_CORIOLIS=$ONLINE_REPO/CORIOLIS/download/Med_floats.txt + +my_prex_or_die "python $OPA_BITSEA/Float/argo_downloader_2.py -r $REMOTEDIR -c $CORIOLIS_DIR -u $UPDATE_FILE -i $INDEXER_CORIOLIS" + From 41158eb76a9507502e53a54b419f2223470f7daf Mon Sep 17 00:00:00 2001 From: maverick-7487 <93658675+maverick-7487@users.noreply.github.com> Date: Tue, 28 Apr 2026 18:04:13 +0200 Subject: [PATCH 4/5] last changes print statements of downloaded and removed files, readability of conditions to download, removed 'remotedir' argument, docstrings of the functions, facultative 'getcommand' argument in argo_downloader.py, added optional lines and comments to launcher.sh --- src/bitsea/Float/argo_downloader.py | 74 ++++++++++++++++++--------- src/bitsea/Float/argo_downloader_2.py | 56 ++++++++++++++------ src/bitsea/Float/launcher.sh | 8 ++- 3 files changed, 95 insertions(+), 43 deletions(-) diff --git a/src/bitsea/Float/argo_downloader.py b/src/bitsea/Float/argo_downloader.py index 07aadf19..7a62b824 100644 --- a/src/bitsea/Float/argo_downloader.py +++ b/src/bitsea/Float/argo_downloader.py @@ -4,28 +4,24 @@ from pathlib import Path def argument(): parser = argparse.ArgumentParser(description = ''' - downloads netcdf files of argo floats in the mediterranean sea, - using one or two lists of floats + Downloads: + - netcdf files of argo floats in Med_floats.txt if not already present + - netcdf files of update file, in force mode + + The script also removes files that begin with "SR" when "SD" files with the same WMO number and rise up number exist. ''', formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( '--getcommand',"-g", type = str, - required = True, + required = False, help = 'path of the ncftpget command', default = '/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ncftpget') - # OPA_BINDIR=/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ - parser.add_argument( '--remotedir',"-r", - type = str, - required = True, - help = 'remote directory of the argo indexed files', - default = '/ifremer/argo/dac/') parser.add_argument( '--coriolisdir',"-c", type = str, required = True, help = 'local directory of the argo indexed files', default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/') - # ONLINE_REPO=/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/ parser.add_argument( '--update_file',"-u", type = str, required = True, @@ -44,7 +40,7 @@ def argument(): args = argument() ncftpget_command = args.getcommand -remote_dir = Path(args.remotedir) +remote_dir = Path("/ifremer/argo/dac/") local_coriolis_dir = Path(args.coriolisdir) update_file = args.update_file if args.indexer_file: @@ -55,6 +51,15 @@ def argument(): local_tmp_download_path = local_coriolis_dir / "download" / "tmp" def parse_float_line(line): + """ + Parses one line of the argo floats list. + Input: + - line : str + Output: + - float_path_remote : str or None remote path (source file to be downloaded) + - float_path_local : str or None local path (destination) + - float_file_name : str or None file name + """ split_line = line.split(",") if len(split_line) > 0: @@ -68,44 +73,63 @@ def parse_float_line(line): return None, None, None def download_move_float(float_path_remote, float_path_local, float_file_name): - command = f"{ncftpget_command} -u anonymous ftp.ifremer.fr {local_tmp_download_path} {str(remote_dir / float_path_remote)}" - print(f"Executing command: {command}") + """ + Downloads the netcdf file containing the profiles associated to a specified argo float and rise up number. + Moves the downloaded file to the specified local directory. + Input: + - float_path_remote : str or None remote path (source file to be downloaded) + - float_path_local : str or None local path (destination) + - float_file_name : str or None file name + Output: + - bool : False/True failure/success output states + """ + remote_file_path = str(remote_dir / float_path_remote) + command = f"{ncftpget_command} -u anonymous ftp.ifremer.fr {local_tmp_download_path} {remote_file_path)}" + + print(f"Downloading {remote_file_path} from ftp.ifremer.fr") + res=sp.run(command, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT) res_str = res.stdout.decode("utf-8") - print(res_str) if res.returncode == 0: os.rename(local_tmp_download_path + "/" + float_file_name, float_path_local) - print(f"Successfully downloaded and moved {float_file_name} to {float_path_local}") + print(f"Download OK: {float_path_local}") else: - print(f"Failed to download {float_file_name}. Command output: {res_str}") + print(f"""Download FAILED: {float_file_name}. + Command output: {res_str}""") return res.returncode == 0 # download from indexer file list (if provided) +print("Downloading missing argo float profiles (sync list with existing files)...") if indexer_file is not None: with open(indexer_file, "r") as f: already_downloaded_floats = set(line.strip() for line in f) for line in already_downloaded_floats: float_path_remote, float_path_local, float_file_name = parse_float_line(line) - if float_path_local is not None and \ - float_path_remote is not None and \ - float_file_name is not None and \ - not os.path.exists(float_path_local): + parser_success = float_path_local is not None and float_path_remote is not None and float_file_name is not None + if float_path_local is not None: + profile_is_missing = not os.path.exists(float_path_local) + else: + profile_is_missing = False + if parser_success and profile_is_missing: download_move_float(float_path_remote, float_path_local, float_file_name) +print("...Done.") # download from DIFF_floats.txt list +print("Downloading new argo float profiles...") with open(update_file, "r") as f: floats_to_download = set(line.strip() for line in f) for line in floats_to_download: float_path_remote, float_path_local, float_file_name = parse_float_line(line) - if float_path_local is not None and \ - float_path_remote is not None and \ - float_file_name is not None: + parser_success = float_path_local is not None and float_path_remote is not None and float_file_name is not None + if parser_success: download_move_float(float_path_remote, float_path_local, float_file_name) +print("...Done.") # clean up local Coriolis directory by removing files # that begin with "SR" when "SD" files with the same # WMO number and rise up number exist. +print("Looking for and removing useless SR argo profiles...") if os.path.isdir(local_coriolis_dir): for wmo_dir in os.listdir(local_coriolis_dir): local_float_dir = local_coriolis_dir / wmo_dir @@ -115,4 +139,6 @@ def download_move_float(float_path_remote, float_path_local, float_file_name): if filename.startswith('SD') and filename.endswith('.nc'): sr_path = local_float_dir / ('SR' + filename[2:]) if sr_path.exists(): - os.remove(sr_path) \ No newline at end of file + os.remove(sr_path) + print(f"REMOVED: {str(sr_path)}") +print("...Done.") \ No newline at end of file diff --git a/src/bitsea/Float/argo_downloader_2.py b/src/bitsea/Float/argo_downloader_2.py index bb91de73..50d395ea 100644 --- a/src/bitsea/Float/argo_downloader_2.py +++ b/src/bitsea/Float/argo_downloader_2.py @@ -11,12 +11,6 @@ def argument(): The script also removes files that begin with "SR" when "SD" files with the same WMO number and rise up number exist. ''', formatter_class=argparse.RawTextHelpFormatter) - - parser.add_argument( '--remotedir',"-r", - type = str, - required = True, - help = 'remote directory of the argo indexed files', - default = '/ifremer/argo/dac/') parser.add_argument( '--coriolisdir',"-c", type = str, required = True, @@ -37,7 +31,7 @@ def argument(): args = argument() -remote_dir = PurePosixPath(args.remotedir) +remote_dir = PurePosixPath("/ifremer/argo/dac/") local_coriolis_dir = Path(args.coriolisdir) update_file = args.update_file if args.indexer_file: @@ -48,6 +42,15 @@ def argument(): local_tmp_download_path = local_coriolis_dir / "download" / "tmp" def parse_float_line(line): + """ + Parses one line of the argo floats list. + Input: + - line : str + Output: + - float_path_remote : str or None remote path (source file to be downloaded) + - float_path_local : str or None local path (destination) + - float_file_name : str or None file name + """ split_line = line.split(",") if len(split_line) > 0: @@ -61,6 +64,16 @@ def parse_float_line(line): return None, None, None def download_move_float(float_path_remote, float_path_local, float_file_name): + """ + Downloads the netcdf file containing the profiles associated to a specified argo float and rise up number. + Moves the downloaded file to the specified local directory. + Input: + - float_path_remote : str or None remote path (source file to be downloaded) + - float_path_local : str or None local path (destination) + - float_file_name : str or None file name + Output: + - bool : False/True failure/success output states + """ local_tmp_download_path.mkdir(parents=True, exist_ok=True) float_path_local.parent.mkdir(parents=True, exist_ok=True) tmp_file_path = local_tmp_download_path / float_file_name @@ -76,38 +89,45 @@ def download_move_float(float_path_remote, float_path_local, float_file_name): except Exception as exc: if tmp_file_path.exists(): os.remove(tmp_file_path) - print(f"Failed to download {float_file_name}: {exc}") + print(f"""Download FAILED: {float_file_name}. + Command output: {exc}""") return False os.rename(tmp_file_path, float_path_local) - print(f"Successfully downloaded and moved {float_file_name} to {float_path_local}") + print(f"Download OK: {float_path_local}") return True # download from indexer file list (if provided) +print("Downloading missing argo float profiles (sync list with existing files)...") if indexer_file is not None: with open(indexer_file, "r") as f: already_downloaded_floats = set(line.strip() for line in f) for line in already_downloaded_floats: float_path_remote, float_path_local, float_file_name = parse_float_line(line) - if float_path_local is not None and \ - float_path_remote is not None and \ - float_file_name is not None and \ - not os.path.exists(float_path_local): + parser_success = float_path_local is not None and float_path_remote is not None and float_file_name is not None + if float_path_local is not None: + profile_is_missing = not os.path.exists(float_path_local) + else: + profile_is_missing = False + if parser_success and profile_is_missing: download_move_float(float_path_remote, float_path_local, float_file_name) +print("...Done.") # download from DIFF_floats.txt list +print("Downloading new argo float profiles...") with open(update_file, "r") as f: floats_to_download = set(line.strip() for line in f) for line in floats_to_download: float_path_remote, float_path_local, float_file_name = parse_float_line(line) - if float_path_local is not None and \ - float_path_remote is not None and \ - float_file_name is not None: + parser_success = float_path_local is not None and float_path_remote is not None and float_file_name is not None + if parser_success: download_move_float(float_path_remote, float_path_local, float_file_name) +print("...Done.") # clean up local Coriolis directory by removing files # that begin with "SR" when "SD" files with the same # WMO number and rise up number exist. +print("Looking for and removing useless SR argo profiles...") if os.path.isdir(local_coriolis_dir): for wmo_dir in os.listdir(local_coriolis_dir): local_float_dir = local_coriolis_dir / wmo_dir @@ -117,4 +137,6 @@ def download_move_float(float_path_remote, float_path_local, float_file_name): if filename.startswith('SD') and filename.endswith('.nc'): sr_path = local_float_dir / ('SR' + filename[2:]) if sr_path.exists(): - os.remove(sr_path) \ No newline at end of file + os.remove(sr_path) + print(f"REMOVED: {str(sr_path)}") +print("...Done.") \ No newline at end of file diff --git a/src/bitsea/Float/launcher.sh b/src/bitsea/Float/launcher.sh index 74b1fde1..d03216a2 100755 --- a/src/bitsea/Float/launcher.sh +++ b/src/bitsea/Float/launcher.sh @@ -5,9 +5,13 @@ export ONLINE_REPO=/g100_scratch/usera07ogs/a07ogs00/V12C/ONLINE CORIOLIS_DIR=$ONLINE_REPO/CORIOLIS -REMOTEDIR=/ifremer/argo/dac/ +#REMOTEDIR=/ifremer/argo/dac/ # no more utilized argument UPDATE_FILE=/g100_work/OGS_prod2528/OPA/V12C-prod/log/daily/DIFF_floats.20260424-20:53:57.txt INDEXER_CORIOLIS=$ONLINE_REPO/CORIOLIS/download/Med_floats.txt +#NCFTPGET_PATH=/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ncftpget -my_prex_or_die "python $OPA_BITSEA/Float/argo_downloader_2.py -r $REMOTEDIR -c $CORIOLIS_DIR -u $UPDATE_FILE -i $INDEXER_CORIOLIS" +# version that uses ncftpget +#my_prex_or_die "python $OPA_BITSEA/Float/argo_downloader.py -c $CORIOLIS_DIR -u $UPDATE_FILE -i $INDEXER_CORIOLIS" # optional argument: -g $NCFTPGET_PATH + +my_prex_or_die "python $OPA_BITSEA/Float/argo_downloader_2.py -c $CORIOLIS_DIR -u $UPDATE_FILE -i $INDEXER_CORIOLIS" From e69bdf6f3a5a3c54746b09fb0b2b8fe250e540c0 Mon Sep 17 00:00:00 2001 From: maverick-7487 <93658675+maverick-7487@users.noreply.github.com> Date: Thu, 7 May 2026 12:50:02 +0200 Subject: [PATCH 5/5] removed the script that uses ncftpget; renamed argo_downloader_2.py; cleaned launcher.sh --- src/bitsea/Float/argo_downloader.py | 50 +++++---- src/bitsea/Float/argo_downloader_2.py | 142 -------------------------- src/bitsea/Float/launcher.sh | 7 +- 3 files changed, 25 insertions(+), 174 deletions(-) delete mode 100644 src/bitsea/Float/argo_downloader_2.py diff --git a/src/bitsea/Float/argo_downloader.py b/src/bitsea/Float/argo_downloader.py index 7a62b824..50d395ea 100644 --- a/src/bitsea/Float/argo_downloader.py +++ b/src/bitsea/Float/argo_downloader.py @@ -1,7 +1,7 @@ import argparse import os -import subprocess as sp -from pathlib import Path +from ftplib import FTP +from pathlib import Path, PurePosixPath def argument(): parser = argparse.ArgumentParser(description = ''' Downloads: @@ -11,12 +11,6 @@ def argument(): The script also removes files that begin with "SR" when "SD" files with the same WMO number and rise up number exist. ''', formatter_class=argparse.RawTextHelpFormatter) - - parser.add_argument( '--getcommand',"-g", - type = str, - required = False, - help = 'path of the ncftpget command', - default = '/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ncftpget') parser.add_argument( '--coriolisdir',"-c", type = str, required = True, @@ -35,12 +29,9 @@ def argument(): return parser.parse_args() - - args = argument() -ncftpget_command = args.getcommand -remote_dir = Path("/ifremer/argo/dac/") +remote_dir = PurePosixPath("/ifremer/argo/dac/") local_coriolis_dir = Path(args.coriolisdir) update_file = args.update_file if args.indexer_file: @@ -63,15 +54,15 @@ def parse_float_line(line): split_line = line.split(",") if len(split_line) > 0: - float_path_remote = Path(split_line[0]) + float_path_remote = PurePosixPath(split_line[0]) float_path_remote_split = float_path_remote.parts - float_dir = float_path_remote_split[1] #wmo number + wmo = float_path_remote_split[1] float_file_name = float_path_remote_split[-1] - float_path_local = local_coriolis_dir / float_dir / float_file_name + float_path_local = local_coriolis_dir / wmo / float_file_name return float_path_remote, float_path_local, float_file_name else: return None, None, None - + def download_move_float(float_path_remote, float_path_local, float_file_name): """ Downloads the netcdf file containing the profiles associated to a specified argo float and rise up number. @@ -83,21 +74,28 @@ def download_move_float(float_path_remote, float_path_local, float_file_name): Output: - bool : False/True failure/success output states """ + local_tmp_download_path.mkdir(parents=True, exist_ok=True) + float_path_local.parent.mkdir(parents=True, exist_ok=True) + tmp_file_path = local_tmp_download_path / float_file_name remote_file_path = str(remote_dir / float_path_remote) - command = f"{ncftpget_command} -u anonymous ftp.ifremer.fr {local_tmp_download_path} {remote_file_path)}" - + print(f"Downloading {remote_file_path} from ftp.ifremer.fr") - res=sp.run(command, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT) - res_str = res.stdout.decode("utf-8") - if res.returncode == 0: - os.rename(local_tmp_download_path + "/" + float_file_name, float_path_local) - print(f"Download OK: {float_path_local}") - else: + try: + with FTP("ftp.ifremer.fr") as connection: + connection.login() + with open(tmp_file_path, "wb") as tmp_file: + connection.retrbinary(f"RETR {remote_file_path}", tmp_file.write) + except Exception as exc: + if tmp_file_path.exists(): + os.remove(tmp_file_path) print(f"""Download FAILED: {float_file_name}. - Command output: {res_str}""") - return res.returncode == 0 + Command output: {exc}""") + return False + os.rename(tmp_file_path, float_path_local) + print(f"Download OK: {float_path_local}") + return True # download from indexer file list (if provided) print("Downloading missing argo float profiles (sync list with existing files)...") diff --git a/src/bitsea/Float/argo_downloader_2.py b/src/bitsea/Float/argo_downloader_2.py deleted file mode 100644 index 50d395ea..00000000 --- a/src/bitsea/Float/argo_downloader_2.py +++ /dev/null @@ -1,142 +0,0 @@ -import argparse -import os -from ftplib import FTP -from pathlib import Path, PurePosixPath -def argument(): - parser = argparse.ArgumentParser(description = ''' - Downloads: - - netcdf files of argo floats in Med_floats.txt if not already present - - netcdf files of update file, in force mode - - The script also removes files that begin with "SR" when "SD" files with the same WMO number and rise up number exist. - ''', formatter_class=argparse.RawTextHelpFormatter) - - parser.add_argument( '--coriolisdir',"-c", - type = str, - required = True, - help = 'local directory of the argo indexed files', - default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/') - parser.add_argument( '--update_file',"-u", - type = str, - required = True, - help = 'path to the update file containing the list of floats to download', - default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/download/DIFF_floats.txt') - parser.add_argument( '--indexer_file',"-i", - type = str, - required = False, - help = 'path to the indexer file containing the list of floats that have already been downloaded', - default = '/leonardo_work/OGS_prod2528_0/OPA/V12C/ONLINE/CORIOLIS/download/Med_floats.txt') - - return parser.parse_args() - -args = argument() - -remote_dir = PurePosixPath("/ifremer/argo/dac/") -local_coriolis_dir = Path(args.coriolisdir) -update_file = args.update_file -if args.indexer_file: - indexer_file = args.indexer_file -else: - indexer_file = None - -local_tmp_download_path = local_coriolis_dir / "download" / "tmp" - -def parse_float_line(line): - """ - Parses one line of the argo floats list. - Input: - - line : str - Output: - - float_path_remote : str or None remote path (source file to be downloaded) - - float_path_local : str or None local path (destination) - - float_file_name : str or None file name - """ - split_line = line.split(",") - - if len(split_line) > 0: - float_path_remote = PurePosixPath(split_line[0]) - float_path_remote_split = float_path_remote.parts - wmo = float_path_remote_split[1] - float_file_name = float_path_remote_split[-1] - float_path_local = local_coriolis_dir / wmo / float_file_name - return float_path_remote, float_path_local, float_file_name - else: - return None, None, None - -def download_move_float(float_path_remote, float_path_local, float_file_name): - """ - Downloads the netcdf file containing the profiles associated to a specified argo float and rise up number. - Moves the downloaded file to the specified local directory. - Input: - - float_path_remote : str or None remote path (source file to be downloaded) - - float_path_local : str or None local path (destination) - - float_file_name : str or None file name - Output: - - bool : False/True failure/success output states - """ - local_tmp_download_path.mkdir(parents=True, exist_ok=True) - float_path_local.parent.mkdir(parents=True, exist_ok=True) - tmp_file_path = local_tmp_download_path / float_file_name - remote_file_path = str(remote_dir / float_path_remote) - - print(f"Downloading {remote_file_path} from ftp.ifremer.fr") - - try: - with FTP("ftp.ifremer.fr") as connection: - connection.login() - with open(tmp_file_path, "wb") as tmp_file: - connection.retrbinary(f"RETR {remote_file_path}", tmp_file.write) - except Exception as exc: - if tmp_file_path.exists(): - os.remove(tmp_file_path) - print(f"""Download FAILED: {float_file_name}. - Command output: {exc}""") - return False - - os.rename(tmp_file_path, float_path_local) - print(f"Download OK: {float_path_local}") - return True - -# download from indexer file list (if provided) -print("Downloading missing argo float profiles (sync list with existing files)...") -if indexer_file is not None: - with open(indexer_file, "r") as f: - already_downloaded_floats = set(line.strip() for line in f) - for line in already_downloaded_floats: - float_path_remote, float_path_local, float_file_name = parse_float_line(line) - parser_success = float_path_local is not None and float_path_remote is not None and float_file_name is not None - if float_path_local is not None: - profile_is_missing = not os.path.exists(float_path_local) - else: - profile_is_missing = False - if parser_success and profile_is_missing: - download_move_float(float_path_remote, float_path_local, float_file_name) -print("...Done.") - -# download from DIFF_floats.txt list -print("Downloading new argo float profiles...") -with open(update_file, "r") as f: - floats_to_download = set(line.strip() for line in f) -for line in floats_to_download: - float_path_remote, float_path_local, float_file_name = parse_float_line(line) - parser_success = float_path_local is not None and float_path_remote is not None and float_file_name is not None - if parser_success: - download_move_float(float_path_remote, float_path_local, float_file_name) -print("...Done.") - -# clean up local Coriolis directory by removing files -# that begin with "SR" when "SD" files with the same -# WMO number and rise up number exist. -print("Looking for and removing useless SR argo profiles...") -if os.path.isdir(local_coriolis_dir): - for wmo_dir in os.listdir(local_coriolis_dir): - local_float_dir = local_coriolis_dir / wmo_dir - if (not local_float_dir.is_dir()) or (len(wmo_dir) != 7) or (not wmo_dir.isdigit()): - continue - for filename in os.listdir(local_float_dir): - if filename.startswith('SD') and filename.endswith('.nc'): - sr_path = local_float_dir / ('SR' + filename[2:]) - if sr_path.exists(): - os.remove(sr_path) - print(f"REMOVED: {str(sr_path)}") -print("...Done.") \ No newline at end of file diff --git a/src/bitsea/Float/launcher.sh b/src/bitsea/Float/launcher.sh index d03216a2..8507f33c 100755 --- a/src/bitsea/Float/launcher.sh +++ b/src/bitsea/Float/launcher.sh @@ -5,13 +5,8 @@ export ONLINE_REPO=/g100_scratch/usera07ogs/a07ogs00/V12C/ONLINE CORIOLIS_DIR=$ONLINE_REPO/CORIOLIS -#REMOTEDIR=/ifremer/argo/dac/ # no more utilized argument UPDATE_FILE=/g100_work/OGS_prod2528/OPA/V12C-prod/log/daily/DIFF_floats.20260424-20:53:57.txt INDEXER_CORIOLIS=$ONLINE_REPO/CORIOLIS/download/Med_floats.txt -#NCFTPGET_PATH=/leonardo/home/usera07ogs/a07ogs00/OPA/V12C-dev/HOST/leonardo/bin/ncftpget -# version that uses ncftpget -#my_prex_or_die "python $OPA_BITSEA/Float/argo_downloader.py -c $CORIOLIS_DIR -u $UPDATE_FILE -i $INDEXER_CORIOLIS" # optional argument: -g $NCFTPGET_PATH - -my_prex_or_die "python $OPA_BITSEA/Float/argo_downloader_2.py -c $CORIOLIS_DIR -u $UPDATE_FILE -i $INDEXER_CORIOLIS" +my_prex_or_die "python $OPA_BITSEA/Float/argo_downloader.py -c $CORIOLIS_DIR -u $UPDATE_FILE -i $INDEXER_CORIOLIS"