diff --git a/CITATION.cff b/CITATION.cff index e5c609d..0774323 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,6 +4,6 @@ authors: - family-names: "Total Carbon Column Observing Network" given-names: "" title: "TCCON/py-ginput" -version: 1.6.2 -date-released: 2026-02-09 +version: 1.6.3 +date-released: 2026-05-12 url: "https://github.com/TCCON/py-ginput" diff --git a/HISTORY.md b/HISTORY.md index 1e2a8f1..58ce1a8 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -19,6 +19,17 @@ improvements to improve the scientific capabilities of this code sometimes requi an update to the API because the new best default behavior requires additional user input. +## 1.6.3 + +This release fixes issues with Pandas 2.x compatibility and changed URLs for +upstream data needed for the O2 DMF calculation. The `update_fo2` subcommand +now has an additional flag to download a pre-computed O2 DMF file from a TCCON +server. + +There were also changes to the automation interface to avoid automatically +generating the O2 DMF file. This was needed to support the TCCON GGG2020.1 +priors automation. + ## 1.6.2 After the 1.6.1 release, we found that `pytz` is not automatically installed diff --git a/ginput/__init__.py b/ginput/__init__.py index 03f302c..aa17b8a 100644 --- a/ginput/__init__.py +++ b/ginput/__init__.py @@ -1,3 +1,3 @@ # be sure to update in setup.py as well # and man/conf.py -__version__ = '1.6.2' +__version__ = '1.6.3' diff --git a/ginput/common_utils/readers.py b/ginput/common_utils/readers.py index c19b20f..1e1707b 100644 --- a/ginput/common_utils/readers.py +++ b/ginput/common_utils/readers.py @@ -68,15 +68,17 @@ def read_out_file(out_file, as_dataframes=False, replace_fills=False): if fill_value is None: raise IOError('Could not find fill value in the header of {}'.format(out_file)) - def is_fill(val): return np.isclose(val, fill_value) + def is_fill(val): + return np.isclose(val, fill_value) elif replace_fills is not False: fill_value = replace_fills replace_fills = True - def is_fill(val): return val >= fill_value + def is_fill(val): + return val >= fill_value if replace_fills: - for colname, coldata in df.iteritems(): + for colname, coldata in df.items(): try: xx_fills = is_fill(coldata) except TypeError: diff --git a/ginput/download/get_fo2_data.py b/ginput/download/get_fo2_data.py index 021c55f..c73b617 100644 --- a/ginput/download/get_fo2_data.py +++ b/ginput/download/get_fo2_data.py @@ -39,7 +39,7 @@ def parse_args(parser: Optional[ArgumentParser] = None): def download_fo2_inputs(out_dir: Union[str, Path] = DEFAULT_OUT_DIR, make_subdir: bool = True, only_if_new: bool = False) -> (Path, bool): """Download the required inputs (NOAA global mean CO2 and Scripps O2/N2 data) to calculate f(O2). - Scripps data are available at https://scrippso2.ucsd.edu/data.html. + Scripps data are available at https://scrippso2.ucsd.edu/data/. NOAA data are available at https://gml.noaa.gov/ccgg/trends/gl_data.html. Parameters @@ -69,11 +69,11 @@ def download_fo2_inputs(out_dir: Union[str, Path] = DEFAULT_OUT_DIR, make_subdir out_dir = Path(out_dir) if not out_dir.is_dir(): raise IOError(f'Target download directory, {out_dir}, does not exist or is not a directory') - + urls = { - 'monthly_o2_alt.csv': 'https://scrippso2.ucsd.edu/assets/data/o2_data/monthly/monthly_o2_alt.csv', - 'monthly_o2_ljo.csv': 'https://scrippso2.ucsd.edu/assets/data/o2_data/monthly/monthly_o2_ljo.csv', - 'monthly_o2_cgo.csv': 'https://scrippso2.ucsd.edu/assets/data/o2_data/monthly/monthly_o2_cgo.csv', + 'monthly_o2_alt.csv': 'https://keelinglabsites.ucsd.edu/websitedatao2/monthly_o2_alt.csv', + 'monthly_o2_ljo.csv': 'https://keelinglabsites.ucsd.edu/websitedatao2/monthly_o2_ljo.csv', + 'monthly_o2_cgo.csv': 'https://keelinglabsites.ucsd.edu/websitedatao2/monthly_o2_cgo.csv', 'co2_annmean_gl.txt': 'https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_annmean_gl.txt', } @@ -85,7 +85,7 @@ def download_fo2_inputs(out_dir: Union[str, Path] = DEFAULT_OUT_DIR, make_subdir if prev_dir is not None: logger.info('MD5 sums match existing files, not saving new files.') return (prev_dir, False) - + if make_subdir: out_dir = out_dir / f'fo2_inputs_{datetime.now():%Y%m%dT%H%M%S}' out_dir.mkdir() @@ -98,7 +98,7 @@ def download_fo2_inputs(out_dir: Union[str, Path] = DEFAULT_OUT_DIR, make_subdir logger.info(f'Wrote {out_file}') return (out_dir, True) - + def _retrieve_url(url: str) -> bytes: r = requests.get(url) @@ -106,7 +106,7 @@ def _retrieve_url(url: str) -> bytes: raise RuntimeError(f'Failed to download {url}, status code = {r.status_code}') else: return r.content - + def _check_if_files_changed(out_dir: Path, subdirs: bool, file_content: dict) -> Optional[Path]: if subdirs: @@ -125,7 +125,7 @@ def _check_if_files_changed(out_dir: Path, subdirs: bool, file_content: dict) -> prev_file = prev_dir / filename if not prev_file.exists(): return None - + content_hash = mod_utils.compute_bytes_checksum(content) file_hash = mod_utils.compute_file_checksum(prev_file) if content_hash != file_hash: diff --git a/ginput/priors/acos_interface.py b/ginput/priors/acos_interface.py index 256969a..3c45f23 100644 --- a/ginput/priors/acos_interface.py +++ b/ginput/priors/acos_interface.py @@ -1157,7 +1157,7 @@ def get_unit(col): dset.attrs['description'] = 'Date associated with the MLO/SMO record' - for colname, column in df.iteritems(): + for colname, column in df.items(): column = column.to_numpy().copy() column[np.isnan(column)] = _fill_val dset = grp.create_dataset(colname, data=column, fillvalue=_fill_val) diff --git a/ginput/priors/fo2_prep.py b/ginput/priors/fo2_prep.py index 965124e..a8fc8de 100644 --- a/ginput/priors/fo2_prep.py +++ b/ginput/priors/fo2_prep.py @@ -2,8 +2,10 @@ from datetime import datetime, timedelta, timezone import json import numpy as np +import os import pandas as pd from pathlib import Path +import requests from ..common_utils import versioning, readers, mod_constants from ..download import get_fo2_data from ..common_utils.ggg_logging import logger @@ -28,6 +30,7 @@ '# - "dco2": the "co2" values with the base year substracted off\n' '#\n' ] +DEFAULT_TCCONDATA_URL = 'https://tccondata.org/auxiliary/o2_mean_dmf.dat' def parse_args(parser: Optional[ArgumentParser]): @@ -71,6 +74,10 @@ def parse_args(parser: Optional[ArgumentParser]): help='Disables download of the necessary input file. Instead, the required files ' '(co2_annmean_gl.txt, monthly_o2_alt.csv, monthly_o2_cgo.csv and monthly_o2_ljo.csv) ' 'must be present in that directory.') + parser.add_argument('--from-tccondata', action='store_true', + help='Set this flag to download the standard TCCON O2 DMF file from tccondata.org rather than ' + 'downloading the Scripps and NOAA data and computing the O2 DMF locally. This is recommended ' + 'for TCCON and COCCON users; any other ginput users should prefer to download the Scripps and NOAA data.') parser.add_argument('--no-download-subdir', action='store_true', help='If specified, the input files will be downloaded directly into ' '--download-dir, with no subdirectory created.') @@ -92,7 +99,8 @@ def parse_args(parser: Optional[ArgumentParser]): def fo2_update_driver(fo2_file: Union[str, Path] = DEFAULT_FO2_FILE, dest_file: Union[str, Path, None] = None, extrap_to_year: Union[int, None] = None, download_dir: Union[str, Path] = get_fo2_data.DEFAULT_OUT_DIR, no_download: bool = False, no_download_subdir: bool = False, - max_num_backups: int = 5, time_since_mod: Optional[timedelta] = None): + max_num_backups: int = 5, time_since_mod: Optional[timedelta] = None, from_tccondata: bool = False, + source_url=None): """Checks for new versions of the input files needed for f(O2) and updates the f(O2) table file if needed Parameters @@ -126,6 +134,7 @@ def fo2_update_driver(fo2_file: Union[str, Path] = DEFAULT_FO2_FILE, dest_file: new input data. """ fo2_file, dest_file = _finalize_file_paths(fo2_file, dest_file) + if time_since_mod is not None and dest_file is not None and dest_file.exists(): if _check_time_since_modification(dest_file, time_since_mod): logger.info('Will check if fO2 file needs updated') @@ -133,6 +142,17 @@ def fo2_update_driver(fo2_file: Union[str, Path] = DEFAULT_FO2_FILE, dest_file: logger.info('Skipping fO2 file update (modified recently enough)') return + if from_tccondata: + if no_download: + raise ValueError('no_download cannot be True if from_tccondata is as well') + if source_url is None: + source_url = os.getenv('GINPUT_FO2_URL', None) + _download_from_tccondata(dest_file=dest_file, source_url=source_url, max_num_backups=max_num_backups) + return + + # TODO: Use the source URL to determine from where to download the Scripps data, if given. This can + # potentially make an assumption of structure if given a string, or accept other types to indicate + # specific files, like the download_dir does through the CLI if no_download: dl_dir = download_dir else: @@ -140,6 +160,23 @@ def fo2_update_driver(fo2_file: Union[str, Path] = DEFAULT_FO2_FILE, dest_file: create_or_update_fo2_file(dl_dir, fo2_file, dest_file=dest_file, extrap_to_year=extrap_to_year, max_num_backups=max_num_backups) +def _download_from_tccondata(dest_file: Union[str, Path], source_url: Optional[str] = None, max_num_backups: int = 5): + if source_url is None: + source_url = DEFAULT_TCCONDATA_URL + + r = requests.get(source_url) + r.raise_for_status() + + dest_file = Path(dest_file) + if dest_file.exists() and max_num_backups > 0: + backup_method = versioning.RollingBackupByDate(date_fmt='%Y%m%dT%H%M') + prev_file = backup_method.make_rolling_backup(dest_file, max_num_backups=max_num_backups) + logger.info(f'Backed up current f(O2) file to {prev_file}') + with open(dest_file, 'w') as f: + logger.info(f'Downloaded f(O2) file from {source_url}') + f.write(r.text) + logger.info(f'Updated f(O2) file at {dest_file}') + def _finalize_file_paths(fo2_file: Union[str, Path, None], dest_file: Union[str, Path, None]) -> Tuple[Union[Path, None], Path]: if fo2_file is None and dest_file is None: diff --git a/ginput/priors/mlo_smo_prep.py b/ginput/priors/mlo_smo_prep.py index 5b63616..daebded 100644 --- a/ginput/priors/mlo_smo_prep.py +++ b/ginput/priors/mlo_smo_prep.py @@ -667,7 +667,7 @@ def monthly_avg_rapid_data(df: pd.DataFrame, year_field: Optional[str] = None, m month_field = _find_column(df, 'month', month_field) - monthly_df = df.groupby([year_field, month_field]).mean().reset_index() + monthly_df = df.groupby([year_field, month_field]).mean(numeric_only=True).reset_index() monthly_df.index = pd.DatetimeIndex(pd.Timestamp(int(r[year_field]),int(r[month_field]),1) for _,r in monthly_df.iterrows()) return monthly_df diff --git a/ginput/priors/tccon_priors.py b/ginput/priors/tccon_priors.py index 486ec06..7d4f41d 100644 --- a/ginput/priors/tccon_priors.py +++ b/ginput/priors/tccon_priors.py @@ -233,12 +233,13 @@ def __init__(self, max_extrap_years: int = 3, extrap_basis_years: int = 5, auto_update_fo2_file: bool = False, - auto_update_td: dt.timedelta = dt.timedelta(days=7)): + auto_update_td: dt.timedelta = dt.timedelta(days=7), + auto_update_from_tccondata: bool = False): if max_extrap_years <= delay_years: raise ValueError('max_extrap_years must be greater than delay_years') if auto_update_fo2_file: - fo2_prep.fo2_update_driver(o2_mole_fraction_file, time_since_mod=auto_update_td) + fo2_prep.fo2_update_driver(o2_mole_fraction_file, time_since_mod=auto_update_td, from_tccondata=auto_update_from_tccondata) if not os.path.exists(o2_mole_fraction_file): raise IOError(f'O2 mole fraction file does not exist at {o2_mole_fraction_file}. Make sure the path is correct and you have run the ' '"update_fo2" subcommand of run_ginput.py at least once OR set auto_update_fo2_file = True when instantiating this class.') @@ -1197,6 +1198,7 @@ def _fit_gas_trend(cls, x, y, fit_type=None): fit_type = cls._max_trend_poly_deg if fit_type is None else fit_type if fit_type == 'exp': logger.debug('Using exponential fit to extrapolate {}'.format(cls._gas_name)) + # Weighting by sqrt(y) recommended in https://stackoverflow.com/a/3433503 fit = np.polynomial.polynomial.Polynomial.fit(x, np.log(y), 1, w=np.sqrt(y)) return lambda t: np.exp(fit(t)) diff --git a/release-checklist.txt b/release-checklist.txt index 3671549..77c7113 100644 --- a/release-checklist.txt +++ b/release-checklist.txt @@ -12,8 +12,9 @@ - Create a copy of the feedstock `meta.yaml` that has `path` under `source` point to the ginput directory - In the directory with the `meta.yaml`, do `mkdir -p local-channel && conda build . --output-folder ./local-channel` - Try installing with pixi: in an empty directory, do `pixi init`, edit `pixi.toml` to include "local-channel" as the first channel, then `pixi add ginput` + - To add `local-channel`, insert the path to it as the first argument in the channel list - Verify `__main__` imports (e.g., `pixi run python -c 'from ginput import __main__'`) and the CLI help prints (`pixi run ginput_cli --help`) - - Try installing with (micro)mamba: in an empty directory, do `micromamba create -p ./testenv && micromamba activate ./testenv && micromamba install -c ../local-channel -c conda-forge ginput` + - Try installing with (micro)mamba: in an empty directory, do `micromamba create -p ./testenv && micromamba activate ./testenv && micromamba install -c /PATH/TO/local-channel -c conda-forge ginput` - Verify `__main__` imports (e.g., with the testenv active, do `python -c 'from ginput import __main__'`) and the CLI help prints (`ginput_cli --help`) ## Metadata @@ -21,6 +22,7 @@ - Update HISTORY.md file - Update version number in setup.py, CITATION.cff, and ginput/__init__.py - Ensure that the latest large test files are uploaded to CaltechData + - Worth verifying that the download works with `pytest -k large_files tests/` - Tag the commit with the version as "vX.Y.Z" - Push to public github and create release - Confirm that the PyPI publishing action ran diff --git a/setup.py b/setup.py index 9e16792..2a89a10 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ long_description_content_type='text/markdown', author='Joshua Laughner, Sebastien Roche, Matthaeus Kiel', author_email='jllacct119@gmail.com', - version='1.6.2', # make sure stays in sync with the version in ginput/__init__.py + version='1.6.3', # make sure stays in sync with the version in ginput/__init__.py url='', install_requires=[ 'astropy>=3.1.2', diff --git a/tests/conftest.py b/tests/conftest.py index 26d38c3..a8aa76b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,7 +18,7 @@ def pytest_configure(config): ) -LARGE_FILES_DOI='10.22002/4rgh7-zss31' +LARGE_FILES_DOI='10.22002/skck2-dfz91' _mydir = Path(__file__).parent.resolve() input_data_dir = _mydir / 'test_input_data' output_data_dir = _mydir / 'test_output_data' diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py new file mode 100644 index 0000000..fd14f4f --- /dev/null +++ b/tests/test_fixtures.py @@ -0,0 +1,11 @@ +"""This test module is for tests that check the test fixtures themselves work. + +Tests here are mainly used to manually verify fixtures with external dependencies +(e.g., file downloads) before creating a release. +""" +import pytest + + +@pytest.mark.slow +def test_large_files_download(large_files_dir): + assert True