From b06ca3722411d6099cd974c7878eaf76e335ebc5 Mon Sep 17 00:00:00 2001 From: Joshua Laughner Date: Sat, 9 May 2026 13:26:15 -0400 Subject: [PATCH 1/6] Added an internal option to download a prepared f(O2) file from tccondata This is needed for GGG2020.1, at least for users that need to generate GGG2020.1 files on their own. Recently, the Scripps files changed URLs, which broke the auto-update functionality. By being able to download the prepared f(O2) file from tccondata, this solves both the issue of potential future URL changes out of my control and keeping a consistent f(O2) value across all users. However, since this is publishing a product derived from the Scripps data, I'm keeping the data itself behind a password until I'm sure this is okay with Scripps. --- ginput/priors/fo2_prep.py | 32 +++++++++++++++++++++++++++++++- ginput/priors/tccon_priors.py | 6 ++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/ginput/priors/fo2_prep.py b/ginput/priors/fo2_prep.py index 965124e..fd24494 100644 --- a/ginput/priors/fo2_prep.py +++ b/ginput/priors/fo2_prep.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd from pathlib import Path +import requests from ..common_utils import versioning, readers, mod_constants from ..download import get_fo2_data from ..common_utils.ggg_logging import logger @@ -28,6 +29,7 @@ '# - "dco2": the "co2" values with the base year substracted off\n' '#\n' ] +DEFAULT_TCCONDATA_URL = 'https://tccondata.org/2b-private-qc/o2_mean_dmf.dat' def parse_args(parser: Optional[ArgumentParser]): @@ -92,7 +94,8 @@ def parse_args(parser: Optional[ArgumentParser]): def fo2_update_driver(fo2_file: Union[str, Path] = DEFAULT_FO2_FILE, dest_file: Union[str, Path, None] = None, extrap_to_year: Union[int, None] = None, download_dir: Union[str, Path] = get_fo2_data.DEFAULT_OUT_DIR, no_download: bool = False, no_download_subdir: bool = False, - max_num_backups: int = 5, time_since_mod: Optional[timedelta] = None): + max_num_backups: int = 5, time_since_mod: Optional[timedelta] = None, from_tccondata: bool = False, + source_url=None): """Checks for new versions of the input files needed for f(O2) and updates the f(O2) table file if needed Parameters @@ -126,6 +129,7 @@ def fo2_update_driver(fo2_file: Union[str, Path] = DEFAULT_FO2_FILE, dest_file: new input data. """ fo2_file, dest_file = _finalize_file_paths(fo2_file, dest_file) + if time_since_mod is not None and dest_file is not None and dest_file.exists(): if _check_time_since_modification(dest_file, time_since_mod): logger.info('Will check if fO2 file needs updated') @@ -133,6 +137,15 @@ def fo2_update_driver(fo2_file: Union[str, Path] = DEFAULT_FO2_FILE, dest_file: logger.info('Skipping fO2 file update (modified recently enough)') return + if from_tccondata: + if no_download: + raise ValueError('no_download cannot be True if from_tccondata is as well') + _download_from_tccondata(dest_file=dest_file, source_url=source_url, max_num_backups=max_num_backups) + return + + # TODO: Use the source URL to determine from where to download the Scripps data, if given. This can + # potentially make an assumption of structure if given a string, or accept other types to indicate + # specific files, like the download_dir does through the CLI if no_download: dl_dir = download_dir else: @@ -140,6 +153,23 @@ def fo2_update_driver(fo2_file: Union[str, Path] = DEFAULT_FO2_FILE, dest_file: create_or_update_fo2_file(dl_dir, fo2_file, dest_file=dest_file, extrap_to_year=extrap_to_year, max_num_backups=max_num_backups) +def _download_from_tccondata(dest_file: Union[str, Path], source_url: Optional[str] = None, max_num_backups: int = 5): + if source_url is None: + source_url = DEFAULT_TCCONDATA_URL + + r = requests.get(source_url) + r.raise_for_status() + + dest_file = Path(dest_file) + if dest_file.exists() and max_num_backups > 0: + backup_method = versioning.RollingBackupByDate(date_fmt='%Y%m%dT%H%M') + prev_file = backup_method.make_rolling_backup(dest_file, max_num_backups=max_num_backups) + logger.info(f'Backed up current f(O2) file to {prev_file}') + with open(dest_file, 'w') as f: + logger.info(f'Downloaded f(O2) file from {source_url}') + f.write(r.text) + logger.info(f'Updated f(O2) file at {dest_file}') + def _finalize_file_paths(fo2_file: Union[str, Path, None], dest_file: Union[str, Path, None]) -> Tuple[Union[Path, None], Path]: if fo2_file is None and dest_file is None: diff --git a/ginput/priors/tccon_priors.py b/ginput/priors/tccon_priors.py index 486ec06..8837461 100644 --- a/ginput/priors/tccon_priors.py +++ b/ginput/priors/tccon_priors.py @@ -233,12 +233,13 @@ def __init__(self, max_extrap_years: int = 3, extrap_basis_years: int = 5, auto_update_fo2_file: bool = False, - auto_update_td: dt.timedelta = dt.timedelta(days=7)): + auto_update_td: dt.timedelta = dt.timedelta(days=7), + auto_update_from_tccondata: bool = False): if max_extrap_years <= delay_years: raise ValueError('max_extrap_years must be greater than delay_years') if auto_update_fo2_file: - fo2_prep.fo2_update_driver(o2_mole_fraction_file, time_since_mod=auto_update_td) + fo2_prep.fo2_update_driver(o2_mole_fraction_file, time_since_mod=auto_update_td, from_tccondata=auto_update_fo2_file) if not os.path.exists(o2_mole_fraction_file): raise IOError(f'O2 mole fraction file does not exist at {o2_mole_fraction_file}. Make sure the path is correct and you have run the ' '"update_fo2" subcommand of run_ginput.py at least once OR set auto_update_fo2_file = True when instantiating this class.') @@ -1197,6 +1198,7 @@ def _fit_gas_trend(cls, x, y, fit_type=None): fit_type = cls._max_trend_poly_deg if fit_type is None else fit_type if fit_type == 'exp': logger.debug('Using exponential fit to extrapolate {}'.format(cls._gas_name)) + # Weighting by sqrt(y) recommended in https://stackoverflow.com/a/3433503 fit = np.polynomial.polynomial.Polynomial.fit(x, np.log(y), 1, w=np.sqrt(y)) return lambda t: np.exp(fit(t)) From 030a919e701f7c8341f55acc3d8d8ab4173f3fc4 Mon Sep 17 00:00:00 2001 From: Joshua Laughner Date: Sat, 9 May 2026 13:32:24 -0400 Subject: [PATCH 2/6] Updated version numbers to 1.6.3 --- CITATION.cff | 4 ++-- ginput/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index e5c609d..0774323 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,6 +4,6 @@ authors: - family-names: "Total Carbon Column Observing Network" given-names: "" title: "TCCON/py-ginput" -version: 1.6.2 -date-released: 2026-02-09 +version: 1.6.3 +date-released: 2026-05-12 url: "https://github.com/TCCON/py-ginput" diff --git a/ginput/__init__.py b/ginput/__init__.py index 03f302c..aa17b8a 100644 --- a/ginput/__init__.py +++ b/ginput/__init__.py @@ -1,3 +1,3 @@ # be sure to update in setup.py as well # and man/conf.py -__version__ = '1.6.2' +__version__ = '1.6.3' diff --git a/setup.py b/setup.py index 9e16792..2a89a10 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ long_description_content_type='text/markdown', author='Joshua Laughner, Sebastien Roche, Matthaeus Kiel', author_email='jllacct119@gmail.com', - version='1.6.2', # make sure stays in sync with the version in ginput/__init__.py + version='1.6.3', # make sure stays in sync with the version in ginput/__init__.py url='', install_requires=[ 'astropy>=3.1.2', From e7d65469c14f51095dfb4d76af3064729db1c166 Mon Sep 17 00:00:00 2001 From: Joshua Laughner Date: Sat, 9 May 2026 13:52:33 -0400 Subject: [PATCH 3/6] Fixed incorrect parameter passthrough to `fo2_update_driver` Also allowed the tccondata O2 file URL to be set by an environmental variable. --- ginput/priors/fo2_prep.py | 3 +++ ginput/priors/tccon_priors.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ginput/priors/fo2_prep.py b/ginput/priors/fo2_prep.py index fd24494..b529338 100644 --- a/ginput/priors/fo2_prep.py +++ b/ginput/priors/fo2_prep.py @@ -2,6 +2,7 @@ from datetime import datetime, timedelta, timezone import json import numpy as np +import os import pandas as pd from pathlib import Path import requests @@ -140,6 +141,8 @@ def fo2_update_driver(fo2_file: Union[str, Path] = DEFAULT_FO2_FILE, dest_file: if from_tccondata: if no_download: raise ValueError('no_download cannot be True if from_tccondata is as well') + if source_url is None: + source_url = os.getenv('GINPUT_FO2_URL', None) _download_from_tccondata(dest_file=dest_file, source_url=source_url, max_num_backups=max_num_backups) return diff --git a/ginput/priors/tccon_priors.py b/ginput/priors/tccon_priors.py index 8837461..7d4f41d 100644 --- a/ginput/priors/tccon_priors.py +++ b/ginput/priors/tccon_priors.py @@ -239,7 +239,7 @@ def __init__(self, raise ValueError('max_extrap_years must be greater than delay_years') if auto_update_fo2_file: - fo2_prep.fo2_update_driver(o2_mole_fraction_file, time_since_mod=auto_update_td, from_tccondata=auto_update_fo2_file) + fo2_prep.fo2_update_driver(o2_mole_fraction_file, time_since_mod=auto_update_td, from_tccondata=auto_update_from_tccondata) if not os.path.exists(o2_mole_fraction_file): raise IOError(f'O2 mole fraction file does not exist at {o2_mole_fraction_file}. Make sure the path is correct and you have run the ' '"update_fo2" subcommand of run_ginput.py at least once OR set auto_update_fo2_file = True when instantiating this class.') From db985fb5711d93a8ff4254b08dee57aba07615e0 Mon Sep 17 00:00:00 2001 From: Joshua Laughner Date: Tue, 12 May 2026 12:29:27 -0700 Subject: [PATCH 4/6] Updated URLs related to the O2 files This fixes downloading from Scripps as well as now points the "direct" download to the non-password protected folder on tccondata.org. --- ginput/download/get_fo2_data.py | 18 +++++++++--------- ginput/priors/fo2_prep.py | 6 +++++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/ginput/download/get_fo2_data.py b/ginput/download/get_fo2_data.py index 021c55f..c73b617 100644 --- a/ginput/download/get_fo2_data.py +++ b/ginput/download/get_fo2_data.py @@ -39,7 +39,7 @@ def parse_args(parser: Optional[ArgumentParser] = None): def download_fo2_inputs(out_dir: Union[str, Path] = DEFAULT_OUT_DIR, make_subdir: bool = True, only_if_new: bool = False) -> (Path, bool): """Download the required inputs (NOAA global mean CO2 and Scripps O2/N2 data) to calculate f(O2). - Scripps data are available at https://scrippso2.ucsd.edu/data.html. + Scripps data are available at https://scrippso2.ucsd.edu/data/. NOAA data are available at https://gml.noaa.gov/ccgg/trends/gl_data.html. Parameters @@ -69,11 +69,11 @@ def download_fo2_inputs(out_dir: Union[str, Path] = DEFAULT_OUT_DIR, make_subdir out_dir = Path(out_dir) if not out_dir.is_dir(): raise IOError(f'Target download directory, {out_dir}, does not exist or is not a directory') - + urls = { - 'monthly_o2_alt.csv': 'https://scrippso2.ucsd.edu/assets/data/o2_data/monthly/monthly_o2_alt.csv', - 'monthly_o2_ljo.csv': 'https://scrippso2.ucsd.edu/assets/data/o2_data/monthly/monthly_o2_ljo.csv', - 'monthly_o2_cgo.csv': 'https://scrippso2.ucsd.edu/assets/data/o2_data/monthly/monthly_o2_cgo.csv', + 'monthly_o2_alt.csv': 'https://keelinglabsites.ucsd.edu/websitedatao2/monthly_o2_alt.csv', + 'monthly_o2_ljo.csv': 'https://keelinglabsites.ucsd.edu/websitedatao2/monthly_o2_ljo.csv', + 'monthly_o2_cgo.csv': 'https://keelinglabsites.ucsd.edu/websitedatao2/monthly_o2_cgo.csv', 'co2_annmean_gl.txt': 'https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_annmean_gl.txt', } @@ -85,7 +85,7 @@ def download_fo2_inputs(out_dir: Union[str, Path] = DEFAULT_OUT_DIR, make_subdir if prev_dir is not None: logger.info('MD5 sums match existing files, not saving new files.') return (prev_dir, False) - + if make_subdir: out_dir = out_dir / f'fo2_inputs_{datetime.now():%Y%m%dT%H%M%S}' out_dir.mkdir() @@ -98,7 +98,7 @@ def download_fo2_inputs(out_dir: Union[str, Path] = DEFAULT_OUT_DIR, make_subdir logger.info(f'Wrote {out_file}') return (out_dir, True) - + def _retrieve_url(url: str) -> bytes: r = requests.get(url) @@ -106,7 +106,7 @@ def _retrieve_url(url: str) -> bytes: raise RuntimeError(f'Failed to download {url}, status code = {r.status_code}') else: return r.content - + def _check_if_files_changed(out_dir: Path, subdirs: bool, file_content: dict) -> Optional[Path]: if subdirs: @@ -125,7 +125,7 @@ def _check_if_files_changed(out_dir: Path, subdirs: bool, file_content: dict) -> prev_file = prev_dir / filename if not prev_file.exists(): return None - + content_hash = mod_utils.compute_bytes_checksum(content) file_hash = mod_utils.compute_file_checksum(prev_file) if content_hash != file_hash: diff --git a/ginput/priors/fo2_prep.py b/ginput/priors/fo2_prep.py index b529338..a8fc8de 100644 --- a/ginput/priors/fo2_prep.py +++ b/ginput/priors/fo2_prep.py @@ -30,7 +30,7 @@ '# - "dco2": the "co2" values with the base year substracted off\n' '#\n' ] -DEFAULT_TCCONDATA_URL = 'https://tccondata.org/2b-private-qc/o2_mean_dmf.dat' +DEFAULT_TCCONDATA_URL = 'https://tccondata.org/auxiliary/o2_mean_dmf.dat' def parse_args(parser: Optional[ArgumentParser]): @@ -74,6 +74,10 @@ def parse_args(parser: Optional[ArgumentParser]): help='Disables download of the necessary input file. Instead, the required files ' '(co2_annmean_gl.txt, monthly_o2_alt.csv, monthly_o2_cgo.csv and monthly_o2_ljo.csv) ' 'must be present in that directory.') + parser.add_argument('--from-tccondata', action='store_true', + help='Set this flag to download the standard TCCON O2 DMF file from tccondata.org rather than ' + 'downloading the Scripps and NOAA data and computing the O2 DMF locally. This is recommended ' + 'for TCCON and COCCON users; any other ginput users should prefer to download the Scripps and NOAA data.') parser.add_argument('--no-download-subdir', action='store_true', help='If specified, the input files will be downloaded directly into ' '--download-dir, with no subdirectory created.') From 9bbaba46ebfef51bcb9c9eff7abfaead70a00353 Mon Sep 17 00:00:00 2001 From: Joshua Laughner Date: Tue, 12 May 2026 12:30:24 -0700 Subject: [PATCH 5/6] Fixed issues identified for Pandas 2.x compatibility This should address #14 pending rerun of unit tests. --- ginput/common_utils/readers.py | 8 +++++--- ginput/priors/acos_interface.py | 2 +- ginput/priors/mlo_smo_prep.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ginput/common_utils/readers.py b/ginput/common_utils/readers.py index c19b20f..1e1707b 100644 --- a/ginput/common_utils/readers.py +++ b/ginput/common_utils/readers.py @@ -68,15 +68,17 @@ def read_out_file(out_file, as_dataframes=False, replace_fills=False): if fill_value is None: raise IOError('Could not find fill value in the header of {}'.format(out_file)) - def is_fill(val): return np.isclose(val, fill_value) + def is_fill(val): + return np.isclose(val, fill_value) elif replace_fills is not False: fill_value = replace_fills replace_fills = True - def is_fill(val): return val >= fill_value + def is_fill(val): + return val >= fill_value if replace_fills: - for colname, coldata in df.iteritems(): + for colname, coldata in df.items(): try: xx_fills = is_fill(coldata) except TypeError: diff --git a/ginput/priors/acos_interface.py b/ginput/priors/acos_interface.py index 256969a..3c45f23 100644 --- a/ginput/priors/acos_interface.py +++ b/ginput/priors/acos_interface.py @@ -1157,7 +1157,7 @@ def get_unit(col): dset.attrs['description'] = 'Date associated with the MLO/SMO record' - for colname, column in df.iteritems(): + for colname, column in df.items(): column = column.to_numpy().copy() column[np.isnan(column)] = _fill_val dset = grp.create_dataset(colname, data=column, fillvalue=_fill_val) diff --git a/ginput/priors/mlo_smo_prep.py b/ginput/priors/mlo_smo_prep.py index 5b63616..daebded 100644 --- a/ginput/priors/mlo_smo_prep.py +++ b/ginput/priors/mlo_smo_prep.py @@ -667,7 +667,7 @@ def monthly_avg_rapid_data(df: pd.DataFrame, year_field: Optional[str] = None, m month_field = _find_column(df, 'month', month_field) - monthly_df = df.groupby([year_field, month_field]).mean().reset_index() + monthly_df = df.groupby([year_field, month_field]).mean(numeric_only=True).reset_index() monthly_df.index = pd.DatetimeIndex(pd.Timestamp(int(r[year_field]),int(r[month_field]),1) for _,r in monthly_df.iterrows()) return monthly_df From 0f234eb3f35ad5e6d9c6c28c4d6956850844c9df Mon Sep 17 00:00:00 2001 From: Joshua Laughner Date: Tue, 12 May 2026 15:26:33 -0700 Subject: [PATCH 6/6] Updated history and release checklist Also added a check to ensure that the large files download works --- HISTORY.md | 11 +++++++++++ release-checklist.txt | 4 +++- tests/conftest.py | 2 +- tests/test_fixtures.py | 11 +++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 tests/test_fixtures.py diff --git a/HISTORY.md b/HISTORY.md index 1e2a8f1..58ce1a8 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -19,6 +19,17 @@ improvements to improve the scientific capabilities of this code sometimes requi an update to the API because the new best default behavior requires additional user input. +## 1.6.3 + +This release fixes issues with Pandas 2.x compatibility and changed URLs for +upstream data needed for the O2 DMF calculation. The `update_fo2` subcommand +now has an additional flag to download a pre-computed O2 DMF file from a TCCON +server. + +There were also changes to the automation interface to avoid automatically +generating the O2 DMF file. This was needed to support the TCCON GGG2020.1 +priors automation. + ## 1.6.2 After the 1.6.1 release, we found that `pytz` is not automatically installed diff --git a/release-checklist.txt b/release-checklist.txt index 3671549..77c7113 100644 --- a/release-checklist.txt +++ b/release-checklist.txt @@ -12,8 +12,9 @@ - Create a copy of the feedstock `meta.yaml` that has `path` under `source` point to the ginput directory - In the directory with the `meta.yaml`, do `mkdir -p local-channel && conda build . --output-folder ./local-channel` - Try installing with pixi: in an empty directory, do `pixi init`, edit `pixi.toml` to include "local-channel" as the first channel, then `pixi add ginput` + - To add `local-channel`, insert the path to it as the first argument in the channel list - Verify `__main__` imports (e.g., `pixi run python -c 'from ginput import __main__'`) and the CLI help prints (`pixi run ginput_cli --help`) - - Try installing with (micro)mamba: in an empty directory, do `micromamba create -p ./testenv && micromamba activate ./testenv && micromamba install -c ../local-channel -c conda-forge ginput` + - Try installing with (micro)mamba: in an empty directory, do `micromamba create -p ./testenv && micromamba activate ./testenv && micromamba install -c /PATH/TO/local-channel -c conda-forge ginput` - Verify `__main__` imports (e.g., with the testenv active, do `python -c 'from ginput import __main__'`) and the CLI help prints (`ginput_cli --help`) ## Metadata @@ -21,6 +22,7 @@ - Update HISTORY.md file - Update version number in setup.py, CITATION.cff, and ginput/__init__.py - Ensure that the latest large test files are uploaded to CaltechData + - Worth verifying that the download works with `pytest -k large_files tests/` - Tag the commit with the version as "vX.Y.Z" - Push to public github and create release - Confirm that the PyPI publishing action ran diff --git a/tests/conftest.py b/tests/conftest.py index 26d38c3..a8aa76b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,7 +18,7 @@ def pytest_configure(config): ) -LARGE_FILES_DOI='10.22002/4rgh7-zss31' +LARGE_FILES_DOI='10.22002/skck2-dfz91' _mydir = Path(__file__).parent.resolve() input_data_dir = _mydir / 'test_input_data' output_data_dir = _mydir / 'test_output_data' diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py new file mode 100644 index 0000000..fd14f4f --- /dev/null +++ b/tests/test_fixtures.py @@ -0,0 +1,11 @@ +"""This test module is for tests that check the test fixtures themselves work. + +Tests here are mainly used to manually verify fixtures with external dependencies +(e.g., file downloads) before creating a release. +""" +import pytest + + +@pytest.mark.slow +def test_large_files_download(large_files_dir): + assert True