From 478e452f2c15bb6cf370ad1a89d5cced5c918d3f Mon Sep 17 00:00:00 2001 From: Stefano Piani Date: Tue, 19 May 2026 18:13:41 +0200 Subject: [PATCH] Added documentation for the actions of the Reducer --- docs/source/actions.md | 100 ++++++++++++++++++ docs/source/index.md | 1 + src/medunda/actions/average_between_layers.py | 22 +++- src/medunda/actions/calculate_stats.py | 62 ++++++++++- src/medunda/actions/climatology.py | 50 +++++++++ src/medunda/actions/compute_average.py | 26 ++++- src/medunda/actions/extract_bottom.py | 18 ++++ src/medunda/actions/extract_layer.py | 16 ++- src/medunda/actions/extract_surface.py | 12 +++ .../actions/integrate_between_layers.py | 21 +++- src/medunda/actions/integration.py | 16 +++ 11 files changed, 332 insertions(+), 12 deletions(-) create mode 100644 docs/source/actions.md diff --git a/docs/source/actions.md b/docs/source/actions.md new file mode 100644 index 0000000..518844d --- /dev/null +++ b/docs/source/actions.md @@ -0,0 +1,100 @@ +(actionsdoc)= +# Actions + +Actions are the processing operations that can be applied to oceanographic datasets in Medunda. +Each action is exposed as a sub-command of the CLI and as a Python function that accepts an +`xarray.Dataset` and returns a transformed dataset. + +--- + +## average\_between\_layers + +```{eval-rst} +.. autofunction:: medunda.actions.average_between_layers.average_between_layers +``` + +--- + +## calculate\_stats + +```{eval-rst} +.. autoclass:: medunda.actions.calculate_stats.Stats + :members: + :show-inheritance: +``` + +```{eval-rst} +.. autofunction:: medunda.actions.calculate_stats.calculate_stats +``` + +--- + +## climatology + +```{eval-rst} +.. autofunction:: medunda.actions.climatology.climatology +``` + +--- + +## compute\_average + +```{eval-rst} +.. autofunction:: medunda.actions.compute_average.compute_average +``` + +--- + +## compute\_integral + +```{eval-rst} +.. autofunction:: medunda.actions.integration.compute_integral +``` + +--- + +## extract\_annual\_extremes + +```{eval-rst} +.. autofunction:: medunda.actions.extract_annual_extremes.extract_annual_extremes +``` + +--- + +## extract\_annual\_extremes\_per\_layer + +```{eval-rst} +.. autofunction:: medunda.actions.extract_annual_extremes_per_layer.extract_annual_extremes_per_layer +``` + +--- + +## extract\_bottom + +```{eval-rst} +.. autofunction:: medunda.actions.extract_bottom.extract_bottom +``` + +--- + +## extract\_layer + +```{eval-rst} +.. autofunction:: medunda.actions.extract_layer.extract_layer +``` + +--- + +## extract\_surface + +```{eval-rst} +.. autofunction:: medunda.actions.extract_surface.extract_surface +``` + +--- + +## integrate\_between\_layers + +```{eval-rst} +.. autofunction:: medunda.actions.integrate_between_layers.integrate_between_layers +``` diff --git a/docs/source/index.md b/docs/source/index.md index 9d02154..a3920fa 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -9,4 +9,5 @@ intro workflow downloader components +actions ``` diff --git a/src/medunda/actions/average_between_layers.py b/src/medunda/actions/average_between_layers.py index 59624ff..10ba944 100644 --- a/src/medunda/actions/average_between_layers.py +++ b/src/medunda/actions/average_between_layers.py @@ -30,8 +30,26 @@ def configure_parser(subparsers): def average_between_layers( data: "xr.Dataset", depth_min, depth_max ) -> "xr.Dataset": - """Computes the vertical average of variables between two specified depths. - Returns a dataset containing the weighted average of this strata. + """Compute the depth-weighted vertical average between two specified depths. + + For each variable in the input dataset that has a ``depth`` dimension, the + function selects the depth levels within ``[depth_min, depth_max]`` and + computes a weighted average, where the weight of each depth cell is its + layer height. Masked (NaN) cells are excluded from both the weighted sum + and the normalisation, so the result is always a proper average of the + valid cells. Variables that do not have a ``depth`` dimension are passed + through unchanged. + + Args: + data (xr.Dataset): Input dataset containing the variables to average. + Must include a ``depth`` coordinate. + depth_min (float): Upper bound of the depth range (shallowest depth). + depth_max (float): Lower bound of the depth range (deepest depth). + + Returns: + xr.Dataset: Dataset with the same variables as the input, but with the + ``depth`` dimension collapsed. Each variable is replaced by its + depth-weighted average over the selected depth range. """ averaged_variables = {} for variable in data.data_vars: diff --git a/src/medunda/actions/calculate_stats.py b/src/medunda/actions/calculate_stats.py index 1aed571..9686232 100644 --- a/src/medunda/actions/calculate_stats.py +++ b/src/medunda/actions/calculate_stats.py @@ -33,27 +33,46 @@ def configure_parser(subparsers): class Stats: - """This class provides methods to perform basic statistical calculations""" + """Provides methods to compute basic statistical operations on an array. + + All operations reduce along axis 0, which is assumed to correspond to the + time dimension. + + Args: + data: Numeric array-like object to analyse. + """ def __init__(self, data): self.data = data def mean(self): + """Return the arithmetic mean along axis 0.""" return np.mean(self.data, axis=0) def variance(self): + """Return the variance along axis 0.""" return np.var(self.data, axis=0) def median(self): + """Return the median along axis 0.""" return np.median(self.data, axis=0) def min(self): + """Return the minimum value along axis 0.""" return np.min(self.data, axis=0) def max(self): + """Return the maximum value along axis 0.""" return np.max(self.data, axis=0) def quartiles(self): + """Return the 5th, 25th, 75th, and 95th percentiles along axis 0. + + Returns: + dict[str, numpy.ndarray]: Dictionary mapping each percentile + (as a string, e.g. ``"25"``) to the corresponding percentile + array. + """ percentiles = [5, 25, 75, 95] output = { str(k): np.percentile(self.data, k, axis=0) for k in percentiles @@ -63,6 +82,24 @@ def quartiles(self): def calculate( self, operations: Sequence[str] | None = None ) -> dict[str, Any]: + """Compute one or more statistical operations on the stored data. + + Args: + operations (list[str] | None): Names of the operations to compute. + Accepted values are ``"mean"``, ``"variance"``, + ``"median"``, ``"minimum"``, ``"maximum"``, ``"quartiles"``, + and ``"all"``. When ``None`` or ``["all"]`` is passed, every + available operation is computed. + + Returns: + dict[str, Any]: Dictionary mapping each operation name to its + result. + + Raises: + ValueError: If ``"all"`` is combined with other operation names, + if duplicate operation names are provided, or if an unknown + operation name is requested. + """ available_operations = { "mean": self.mean, "variance": self.variance, @@ -98,8 +135,27 @@ def calculate( def calculate_stats(data: "xr.Dataset", operations) -> "xr.Dataset": - """Regroups and compute some statistical operations - according to the user's choice""" + """Compute statistical operations on each variable in the dataset. + + For each variable in the input dataset (excluding coordinate-like + variables such as ``depth``, ``latitude``, ``longitude``, and ``time``), + the requested statistical operations are computed over all dimensions and + stored as new variables in the output dataset. Output variable names + follow the pattern ``{variable}_{operation}``; quartile outputs use + ``{variable}_quartiles_{percentile}``. + + Args: + data (xr.Dataset): Input dataset containing the variables to analyse. + operations (list[str] | None): Statistical operations to compute. + Accepted values are ``"mean"``, ``"median"``, ``"variance"``, + ``"quartiles"``, ``"minimum"``, ``"maximum"``, and ``"all"``. + Pass ``None`` or ``["all"]`` to compute every available + operation. + + Returns: + xr.Dataset: Dataset whose variables are the results of the requested + statistical operations. + """ ds_results = xr.Dataset() for var_name in data.data_vars: diff --git a/src/medunda/actions/climatology.py b/src/medunda/actions/climatology.py index 6a6a42f..d510975 100644 --- a/src/medunda/actions/climatology.py +++ b/src/medunda/actions/climatology.py @@ -91,6 +91,56 @@ def climatology( start_date=None, end_date=None, ) -> "xr.Dataset": + """Compute the climatological average of a variable at a given temporal frequency. + + The climatology is computed by grouping the data by the requested temporal + period (day-of-year, month, or season) and averaging across all years + within the specified date range. Both monthly and daily source datasets + are supported; the dataset frequency is inferred automatically from the + ``title`` global attribute of the dataset. + + When the source dataset has **monthly** resolution: + + * *monthly* climatology is computed as a year-weighted average where each + month is weighted by its number of days. + * *daily* climatology cannot be computed and raises a :class:`ValueError`. + * *seasonal* climatology groups months into DJF, MAM, JJA, SON and + averages with equal weights. + + When the source dataset has **daily** resolution: + + * *daily* climatology is the mean for each calendar day-of-year across + all years. + * *monthly* climatology is obtained by first computing a daily + climatology then averaging the day-of-year bins within each calendar + month. + * *seasonal* climatology groups day-of-year bins into DJF, MAM, JJA, + SON and averages with equal weights. + + Args: + data (xr.Dataset): Input dataset. Must have a ``time`` coordinate + and a ``title`` global attribute containing either + ``"monthly"`` or ``"daily"``. + variable (str): Name of the variable to compute the climatology for. + Must be present in ``data.data_vars``. + frequency (str): Temporal resolution of the output climatology. One + of ``"daily"``, ``"monthly"``, or ``"seasonally"``. + start_date (datetime-like, optional): Start of the reference period. + Defaults to the first time step in the dataset. + end_date (datetime-like, optional): End of the reference period. + Defaults to the last time step in the dataset. + + Returns: + xr.Dataset: Dataset containing the climatological average of + *variable*, with the time dimension replaced by the climatological + coordinate (``month``, ``dayofyear``, or ``season``). + + Raises: + ValueError: If *variable* is not found in the dataset, if the + dataset frequency cannot be determined from its ``title`` + attribute, or if a daily climatology is requested from a monthly + dataset. + """ # check the variable if variable not in data.data_vars: available_variables = list(data.data_vars.keys()) diff --git a/src/medunda/actions/compute_average.py b/src/medunda/actions/compute_average.py index 31b94cc..5a4ba0b 100644 --- a/src/medunda/actions/compute_average.py +++ b/src/medunda/actions/compute_average.py @@ -52,10 +52,30 @@ def get_volume(data: "xr.Dataset") -> "xr.DataArray": def compute_average(data: "xr.Dataset", axis) -> "xr.Dataset": - """Compute the average on a given axis. + """Compute the average of all variables along a specified axis. + + Three axes are supported: + + * ``"depth"``: Computes the depth-weighted vertical average over the full + depth column using :func:`~medunda.actions.average_between_layers.average_between_layers`. + * ``"space"``: Computes a volume-weighted spatial average over all + (latitude, longitude) grid points using the cell volumes derived from + the grid mask. + * ``"time"``: Computes a simple arithmetic mean over the time dimension. + Args: - data (xr.Dataset): Input dataset with depth as one of the dimensions. - axis: The axis over which to compute the average. + data (xr.Dataset): Input dataset. Must include ``depth``, + ``latitude``, ``longitude``, and ``time`` coordinates as required + by the chosen axis. + axis (str): Axis along which to compute the average. One of + ``"depth"``, ``"space"``, or ``"time"``. + + Returns: + xr.Dataset: Dataset with the chosen dimension collapsed, containing + the averaged values for each variable. + + Raises: + ValueError: If *axis* is not one of the valid choices. """ if axis not in VALID_AXIS.keys(): raise ValueError( diff --git a/src/medunda/actions/extract_bottom.py b/src/medunda/actions/extract_bottom.py index d2e82bd..2b3b316 100644 --- a/src/medunda/actions/extract_bottom.py +++ b/src/medunda/actions/extract_bottom.py @@ -14,6 +14,24 @@ def configure_parser(subparsers): def extract_bottom(data: "xr.Dataset") -> "xr.Dataset": + """Extract the bottom-most valid grid cell for each spatial location. + + For each variable with a ``depth`` dimension, the function uses the + land-sea mask (derived from the first time step) to identify the deepest + unmasked (valid) depth level at every (latitude, longitude) grid point and + returns the corresponding values. Variables without a ``depth`` dimension + are passed through unchanged. + + Args: + data (xr.Dataset): Input dataset. Must include a ``depth`` coordinate + and at least one time step for each variable with a depth + dimension. + + Returns: + xr.Dataset: Dataset with the same variables as the input but with the + ``depth`` dimension removed. Each value corresponds to the deepest + valid cell at the corresponding spatial location. + """ LOGGER.info(f"reading the file: {data}") variables = {} diff --git a/src/medunda/actions/extract_layer.py b/src/medunda/actions/extract_layer.py index c6f1ea0..3c185ee 100644 --- a/src/medunda/actions/extract_layer.py +++ b/src/medunda/actions/extract_layer.py @@ -19,8 +19,20 @@ def configure_parser(subparsers): def extract_layer(data: "xr.Dataset", depth: float) -> "xr.Dataset": - """Extracts the layer nearest to the specified depth from the dataset. - Returns a dataset containing only the layer extracted. + """Extract the layer nearest to a specified depth from the dataset. + + Uses nearest-neighbour selection along the ``depth`` coordinate, so the + actually selected depth may differ slightly from the requested value when + an exact match is not available in the dataset. + + Args: + data (xr.Dataset): Input dataset containing a ``depth`` coordinate. + depth (float): Target depth in metres. + + Returns: + xr.Dataset: Dataset with the same variables as the input but with the + ``depth`` dimension removed, containing values at the depth level + closest to *depth*. """ LOGGER.info(f"reading the file: {data}") diff --git a/src/medunda/actions/extract_surface.py b/src/medunda/actions/extract_surface.py index 70e2678..88d5bcb 100644 --- a/src/medunda/actions/extract_surface.py +++ b/src/medunda/actions/extract_surface.py @@ -13,6 +13,18 @@ def configure_parser(subparsers): def extract_surface(data: "xr.Dataset") -> "xr.Dataset": + """Extract the surface layer (first depth level) from the dataset. + + Selects the shallowest depth level (index 0) across all variables, + removing the ``depth`` dimension from the output dataset. + + Args: + data (xr.Dataset): Input dataset containing a ``depth`` dimension. + + Returns: + xr.Dataset: Dataset with the same variables as the input but with the + ``depth`` dimension removed, containing only surface-level values. + """ LOGGER.info(f"reading the file: {data}") surface_layer = data.isel(depth=0) diff --git a/src/medunda/actions/integrate_between_layers.py b/src/medunda/actions/integrate_between_layers.py index a6a1008..653641d 100644 --- a/src/medunda/actions/integrate_between_layers.py +++ b/src/medunda/actions/integrate_between_layers.py @@ -30,8 +30,25 @@ def configure_parser(subparsers): def integrate_between_layers( data: "xr.Dataset", depth_min: float, depth_max: float ) -> "xr.Dataset": - """Computes the vertical integral of variables between two specified depths. - Returns a dataset containing the weighted average of this strata. + """Compute the vertical integral of variables between two specified depths. + + For each variable in the input dataset that has a ``depth`` dimension, the + function selects the depth levels within ``[depth_min, depth_max]`` and + integrates over those levels by weighting each cell by its layer height. + Grid points that are masked (NaN) at the shallowest selected level are set + to NaN in the output, preserving the land-sea mask. Variables that do not + have a ``depth`` dimension are omitted from the output. + + Args: + data (xr.Dataset): Input dataset containing the variables to integrate. + Must include a ``depth`` coordinate. + depth_min (float): Upper bound of the depth range (shallowest depth). + depth_max (float): Lower bound of the depth range (deepest depth). + + Returns: + xr.Dataset: Dataset containing the vertically integrated values for all + depth-dependent variables over the selected depth range. The ``depth`` + dimension is collapsed in the output. """ integrated_variables = {} for variable in data.data_vars: diff --git a/src/medunda/actions/integration.py b/src/medunda/actions/integration.py index 23f2980..e46295b 100644 --- a/src/medunda/actions/integration.py +++ b/src/medunda/actions/integration.py @@ -15,6 +15,22 @@ def configure_parser(subparsers): def compute_integral(data: "xr.Dataset") -> "xr.Dataset": + """Compute the vertical integral of all depth-dependent variables. + + For each variable in the input dataset that includes a ``depth`` + dimension, the function integrates the variable over the full depth column + by weighting each depth cell by its layer height (in metres). Variables + without a ``depth`` dimension are omitted from the output. + + Args: + data (xr.Dataset): Input dataset. Must include a ``depth`` + coordinate from which layer heights can be derived. + + Returns: + xr.Dataset: Dataset containing the vertically integrated values for + all depth-dependent variables. The ``depth`` dimension is collapsed + in the output. + """ layer_height = compute_layer_height(data.depth.values) lh = xr.DataArray(layer_height, dims=["depth"])