From 478e452f2c15bb6cf370ad1a89d5cced5c918d3f Mon Sep 17 00:00:00 2001
From: Stefano Piani <spiani@ogs.it>
Date: Tue, 19 May 2026 18:13:41 +0200
Subject: [PATCH] Added documentation for the actions of the Reducer

---
 docs/source/actions.md                        | 100 ++++++++++++++++++
 docs/source/index.md                          |   1 +
 src/medunda/actions/average_between_layers.py |  22 +++-
 src/medunda/actions/calculate_stats.py        |  62 ++++++++++-
 src/medunda/actions/climatology.py            |  50 +++++++++
 src/medunda/actions/compute_average.py        |  26 ++++-
 src/medunda/actions/extract_bottom.py         |  18 ++++
 src/medunda/actions/extract_layer.py          |  16 ++-
 src/medunda/actions/extract_surface.py        |  12 +++
 .../actions/integrate_between_layers.py       |  21 +++-
 src/medunda/actions/integration.py            |  16 +++
 11 files changed, 332 insertions(+), 12 deletions(-)
 create mode 100644 docs/source/actions.md

diff --git a/docs/source/actions.md b/docs/source/actions.md
new file mode 100644
index 0000000..518844d
--- /dev/null
+++ b/docs/source/actions.md
@@ -0,0 +1,100 @@
+(actionsdoc)=
+# Actions
+
+Actions are the processing operations that can be applied to oceanographic datasets in Medunda.
+Each action is exposed as a sub-command of the CLI and as a Python function that accepts an
+`xarray.Dataset` and returns a transformed dataset.
+
+---
+
+## average\_between\_layers
+
+```{eval-rst}
+.. autofunction:: medunda.actions.average_between_layers.average_between_layers
+```
+
+---
+
+## calculate\_stats
+
+```{eval-rst}
+.. autoclass:: medunda.actions.calculate_stats.Stats
+   :members:
+   :show-inheritance:
+```
+
+```{eval-rst}
+.. autofunction:: medunda.actions.calculate_stats.calculate_stats
+```
+
+---
+
+## climatology
+
+```{eval-rst}
+.. autofunction:: medunda.actions.climatology.climatology
+```
+
+---
+
+## compute\_average
+
+```{eval-rst}
+.. autofunction:: medunda.actions.compute_average.compute_average
+```
+
+---
+
+## compute\_integral
+
+```{eval-rst}
+.. autofunction:: medunda.actions.integration.compute_integral
+```
+
+---
+
+## extract\_annual\_extremes
+
+```{eval-rst}
+.. autofunction:: medunda.actions.extract_annual_extremes.extract_annual_extremes
+```
+
+---
+
+## extract\_annual\_extremes\_per\_layer
+
+```{eval-rst}
+.. autofunction:: medunda.actions.extract_annual_extremes_per_layer.extract_annual_extremes_per_layer
+```
+
+---
+
+## extract\_bottom
+
+```{eval-rst}
+.. autofunction:: medunda.actions.extract_bottom.extract_bottom
+```
+
+---
+
+## extract\_layer
+
+```{eval-rst}
+.. autofunction:: medunda.actions.extract_layer.extract_layer
+```
+
+---
+
+## extract\_surface
+
+```{eval-rst}
+.. autofunction:: medunda.actions.extract_surface.extract_surface
+```
+
+---
+
+## integrate\_between\_layers
+
+```{eval-rst}
+.. autofunction:: medunda.actions.integrate_between_layers.integrate_between_layers
+```
diff --git a/docs/source/index.md b/docs/source/index.md
index 9d02154..a3920fa 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -9,4 +9,5 @@ intro
 workflow
 downloader
 components
+actions
 ```
diff --git a/src/medunda/actions/average_between_layers.py b/src/medunda/actions/average_between_layers.py
index 59624ff..10ba944 100644
--- a/src/medunda/actions/average_between_layers.py
+++ b/src/medunda/actions/average_between_layers.py
@@ -30,8 +30,26 @@ def configure_parser(subparsers):
 def average_between_layers(
     data: "xr.Dataset", depth_min, depth_max
 ) -> "xr.Dataset":
-    """Computes the vertical average of variables between two specified depths.
-    Returns a dataset containing the weighted average of this strata.
+    """Compute the depth-weighted vertical average between two specified depths.
+
+    For each variable in the input dataset that has a ``depth`` dimension, the
+    function selects the depth levels within ``[depth_min, depth_max]`` and
+    computes a weighted average, where the weight of each depth cell is its
+    layer height.  Masked (NaN) cells are excluded from both the weighted sum
+    and the normalisation, so the result is always a proper average of the
+    valid cells.  Variables that do not have a ``depth`` dimension are passed
+    through unchanged.
+
+    Args:
+        data (xr.Dataset): Input dataset containing the variables to average.
+            Must include a ``depth`` coordinate.
+        depth_min (float): Upper bound of the depth range (shallowest depth).
+        depth_max (float): Lower bound of the depth range (deepest depth).
+
+    Returns:
+        xr.Dataset: Dataset with the same variables as the input, but with the
+        ``depth`` dimension collapsed.  Each variable is replaced by its
+        depth-weighted average over the selected depth range.
     """
     averaged_variables = {}
     for variable in data.data_vars:
diff --git a/src/medunda/actions/calculate_stats.py b/src/medunda/actions/calculate_stats.py
index 1aed571..9686232 100644
--- a/src/medunda/actions/calculate_stats.py
+++ b/src/medunda/actions/calculate_stats.py
@@ -33,27 +33,46 @@ def configure_parser(subparsers):
 
 
 class Stats:
-    """This class provides methods to perform basic statistical calculations"""
+    """Provides methods to compute basic statistical operations on an array.
+
+    All operations reduce along axis 0, which is assumed to correspond to the
+    time dimension.
+
+    Args:
+        data: Numeric array-like object to analyse.
+    """
 
     def __init__(self, data):
         self.data = data
 
     def mean(self):
+        """Return the arithmetic mean along axis 0."""
         return np.mean(self.data, axis=0)
 
     def variance(self):
+        """Return the variance along axis 0."""
         return np.var(self.data, axis=0)
 
     def median(self):
+        """Return the median along axis 0."""
         return np.median(self.data, axis=0)
 
     def min(self):
+        """Return the minimum value along axis 0."""
         return np.min(self.data, axis=0)
 
     def max(self):
+        """Return the maximum value along axis 0."""
         return np.max(self.data, axis=0)
 
     def quartiles(self):
+        """Return the 5th, 25th, 75th, and 95th percentiles along axis 0.
+
+        Returns:
+            dict[str, numpy.ndarray]: Dictionary mapping each percentile
+            (as a string, e.g. ``"25"``) to the corresponding percentile
+            array.
+        """
         percentiles = [5, 25, 75, 95]
         output = {
             str(k): np.percentile(self.data, k, axis=0) for k in percentiles
@@ -63,6 +82,24 @@ def quartiles(self):
     def calculate(
         self, operations: Sequence[str] | None = None
     ) -> dict[str, Any]:
+        """Compute one or more statistical operations on the stored data.
+
+        Args:
+            operations (list[str] | None): Names of the operations to compute.
+                Accepted values are ``"mean"``, ``"variance"``,
+                ``"median"``, ``"minimum"``, ``"maximum"``, ``"quartiles"``,
+                and ``"all"``.  When ``None`` or ``["all"]`` is passed, every
+                available operation is computed.
+
+        Returns:
+            dict[str, Any]: Dictionary mapping each operation name to its
+            result.
+
+        Raises:
+            ValueError: If ``"all"`` is combined with other operation names,
+                if duplicate operation names are provided, or if an unknown
+                operation name is requested.
+        """
         available_operations = {
             "mean": self.mean,
             "variance": self.variance,
@@ -98,8 +135,27 @@ def calculate(
 
 
 def calculate_stats(data: "xr.Dataset", operations) -> "xr.Dataset":
-    """Regroups and compute some statistical operations
-    according to the user's choice"""
+    """Compute statistical operations on each variable in the dataset.
+
+    For each variable in the input dataset (excluding coordinate-like
+    variables such as ``depth``, ``latitude``, ``longitude``, and ``time``),
+    the requested statistical operations are computed over all dimensions and
+    stored as new variables in the output dataset.  Output variable names
+    follow the pattern ``{variable}_{operation}``; quartile outputs use
+    ``{variable}_quartiles_{percentile}``.
+
+    Args:
+        data (xr.Dataset): Input dataset containing the variables to analyse.
+        operations (list[str] | None): Statistical operations to compute.
+            Accepted values are ``"mean"``, ``"median"``, ``"variance"``,
+            ``"quartiles"``, ``"minimum"``, ``"maximum"``, and ``"all"``.
+            Pass ``None`` or ``["all"]`` to compute every available
+            operation.
+
+    Returns:
+        xr.Dataset: Dataset whose variables are the results of the requested
+        statistical operations.
+    """
 
     ds_results = xr.Dataset()
     for var_name in data.data_vars:
diff --git a/src/medunda/actions/climatology.py b/src/medunda/actions/climatology.py
index 6a6a42f..d510975 100644
--- a/src/medunda/actions/climatology.py
+++ b/src/medunda/actions/climatology.py
@@ -91,6 +91,56 @@ def climatology(
     start_date=None,
     end_date=None,
 ) -> "xr.Dataset":
+    """Compute the climatological average of a variable at a given temporal frequency.
+
+    The climatology is computed by grouping the data by the requested temporal
+    period (day-of-year, month, or season) and averaging across all years
+    within the specified date range.  Both monthly and daily source datasets
+    are supported; the dataset frequency is inferred automatically from the
+    ``title`` global attribute of the dataset.
+
+    When the source dataset has **monthly** resolution:
+
+    * *monthly* climatology is computed as a year-weighted average where each
+      month is weighted by its number of days.
+    * *daily* climatology cannot be computed and raises a :class:`ValueError`.
+    * *seasonal* climatology groups months into DJF, MAM, JJA, SON and
+      averages with equal weights.
+
+    When the source dataset has **daily** resolution:
+
+    * *daily* climatology is the mean for each calendar day-of-year across
+      all years.
+    * *monthly* climatology is obtained by first computing a daily
+      climatology then averaging the day-of-year bins within each calendar
+      month.
+    * *seasonal* climatology groups day-of-year bins into DJF, MAM, JJA,
+      SON and averages with equal weights.
+
+    Args:
+        data (xr.Dataset): Input dataset.  Must have a ``time`` coordinate
+            and a ``title`` global attribute containing either
+            ``"monthly"`` or ``"daily"``.
+        variable (str): Name of the variable to compute the climatology for.
+            Must be present in ``data.data_vars``.
+        frequency (str): Temporal resolution of the output climatology.  One
+            of ``"daily"``, ``"monthly"``, or ``"seasonally"``.
+        start_date (datetime-like, optional): Start of the reference period.
+            Defaults to the first time step in the dataset.
+        end_date (datetime-like, optional): End of the reference period.
+            Defaults to the last time step in the dataset.
+
+    Returns:
+        xr.Dataset: Dataset containing the climatological average of
+        *variable*, with the time dimension replaced by the climatological
+        coordinate (``month``, ``dayofyear``, or ``season``).
+
+    Raises:
+        ValueError: If *variable* is not found in the dataset, if the
+            dataset frequency cannot be determined from its ``title``
+            attribute, or if a daily climatology is requested from a monthly
+            dataset.
+    """
     # check the variable
     if variable not in data.data_vars:
         available_variables = list(data.data_vars.keys())
diff --git a/src/medunda/actions/compute_average.py b/src/medunda/actions/compute_average.py
index 31b94cc..5a4ba0b 100644
--- a/src/medunda/actions/compute_average.py
+++ b/src/medunda/actions/compute_average.py
@@ -52,10 +52,30 @@ def get_volume(data: "xr.Dataset") -> "xr.DataArray":
 
 
 def compute_average(data: "xr.Dataset", axis) -> "xr.Dataset":
-    """Compute the average on a given axis.
+    """Compute the average of all variables along a specified axis.
+
+    Three axes are supported:
+
+    * ``"depth"``: Computes the depth-weighted vertical average over the full
+      depth column using :func:`~medunda.actions.average_between_layers.average_between_layers`.
+    * ``"space"``: Computes a volume-weighted spatial average over all
+      (latitude, longitude) grid points using the cell volumes derived from
+      the grid mask.
+    * ``"time"``: Computes a simple arithmetic mean over the time dimension.
+
     Args:
-        data (xr.Dataset): Input dataset with depth as one of the dimensions.
-        axis: The axis over which to compute the average.
+        data (xr.Dataset): Input dataset.  Must include ``depth``,
+            ``latitude``, ``longitude``, and ``time`` coordinates as required
+            by the chosen axis.
+        axis (str): Axis along which to compute the average.  One of
+            ``"depth"``, ``"space"``, or ``"time"``.
+
+    Returns:
+        xr.Dataset: Dataset with the chosen dimension collapsed, containing
+        the averaged values for each variable.
+
+    Raises:
+        ValueError: If *axis* is not one of the valid choices.
     """
     if axis not in VALID_AXIS.keys():
         raise ValueError(
diff --git a/src/medunda/actions/extract_bottom.py b/src/medunda/actions/extract_bottom.py
index d2e82bd..2b3b316 100644
--- a/src/medunda/actions/extract_bottom.py
+++ b/src/medunda/actions/extract_bottom.py
@@ -14,6 +14,24 @@ def configure_parser(subparsers):
 
 
 def extract_bottom(data: "xr.Dataset") -> "xr.Dataset":
+    """Extract the bottom-most valid grid cell for each spatial location.
+
+    For each variable with a ``depth`` dimension, the function uses the
+    land-sea mask (derived from the first time step) to identify the deepest
+    unmasked (valid) depth level at every (latitude, longitude) grid point and
+    returns the corresponding values.  Variables without a ``depth`` dimension
+    are passed through unchanged.
+
+    Args:
+        data (xr.Dataset): Input dataset.  Must include a ``depth`` coordinate
+            and at least one time step for each variable with a depth
+            dimension.
+
+    Returns:
+        xr.Dataset: Dataset with the same variables as the input but with the
+        ``depth`` dimension removed.  Each value corresponds to the deepest
+        valid cell at the corresponding spatial location.
+    """
     LOGGER.info(f"reading the file: {data}")
 
     variables = {}
diff --git a/src/medunda/actions/extract_layer.py b/src/medunda/actions/extract_layer.py
index c6f1ea0..3c185ee 100644
--- a/src/medunda/actions/extract_layer.py
+++ b/src/medunda/actions/extract_layer.py
@@ -19,8 +19,20 @@ def configure_parser(subparsers):
 
 
 def extract_layer(data: "xr.Dataset", depth: float) -> "xr.Dataset":
-    """Extracts the layer nearest to the specified depth from the dataset.
-    Returns a dataset containing only the layer extracted.
+    """Extract the layer nearest to a specified depth from the dataset.
+
+    Uses nearest-neighbour selection along the ``depth`` coordinate, so the
+    actually selected depth may differ slightly from the requested value when
+    an exact match is not available in the dataset.
+
+    Args:
+        data (xr.Dataset): Input dataset containing a ``depth`` coordinate.
+        depth (float): Target depth in metres.
+
+    Returns:
+        xr.Dataset: Dataset with the same variables as the input but with the
+        ``depth`` dimension removed, containing values at the depth level
+        closest to *depth*.
     """
 
     LOGGER.info(f"reading the file: {data}")
diff --git a/src/medunda/actions/extract_surface.py b/src/medunda/actions/extract_surface.py
index 70e2678..88d5bcb 100644
--- a/src/medunda/actions/extract_surface.py
+++ b/src/medunda/actions/extract_surface.py
@@ -13,6 +13,18 @@ def configure_parser(subparsers):
 
 
 def extract_surface(data: "xr.Dataset") -> "xr.Dataset":
+    """Extract the surface layer (first depth level) from the dataset.
+
+    Selects the shallowest depth level (index 0) across all variables,
+    removing the ``depth`` dimension from the output dataset.
+
+    Args:
+        data (xr.Dataset): Input dataset containing a ``depth`` dimension.
+
+    Returns:
+        xr.Dataset: Dataset with the same variables as the input but with the
+        ``depth`` dimension removed, containing only surface-level values.
+    """
     LOGGER.info(f"reading the file: {data}")
 
     surface_layer = data.isel(depth=0)
diff --git a/src/medunda/actions/integrate_between_layers.py b/src/medunda/actions/integrate_between_layers.py
index a6a1008..653641d 100644
--- a/src/medunda/actions/integrate_between_layers.py
+++ b/src/medunda/actions/integrate_between_layers.py
@@ -30,8 +30,25 @@ def configure_parser(subparsers):
 def integrate_between_layers(
     data: "xr.Dataset", depth_min: float, depth_max: float
 ) -> "xr.Dataset":
-    """Computes the vertical integral of variables between two specified depths.
-    Returns a dataset containing the weighted average of this strata.
+    """Compute the vertical integral of variables between two specified depths.
+
+    For each variable in the input dataset that has a ``depth`` dimension, the
+    function selects the depth levels within ``[depth_min, depth_max]`` and
+    integrates over those levels by weighting each cell by its layer height.
+    Grid points that are masked (NaN) at the shallowest selected level are set
+    to NaN in the output, preserving the land-sea mask.  Variables that do not
+    have a ``depth`` dimension are omitted from the output.
+
+    Args:
+        data (xr.Dataset): Input dataset containing the variables to integrate.
+            Must include a ``depth`` coordinate.
+        depth_min (float): Upper bound of the depth range (shallowest depth).
+        depth_max (float): Lower bound of the depth range (deepest depth).
+
+    Returns:
+        xr.Dataset: Dataset containing the vertically integrated values for all
+        depth-dependent variables over the selected depth range.  The ``depth``
+        dimension is collapsed in the output.
     """
     integrated_variables = {}
     for variable in data.data_vars:
diff --git a/src/medunda/actions/integration.py b/src/medunda/actions/integration.py
index 23f2980..e46295b 100644
--- a/src/medunda/actions/integration.py
+++ b/src/medunda/actions/integration.py
@@ -15,6 +15,22 @@ def configure_parser(subparsers):
 
 
 def compute_integral(data: "xr.Dataset") -> "xr.Dataset":
+    """Compute the vertical integral of all depth-dependent variables.
+
+    For each variable in the input dataset that includes a ``depth``
+    dimension, the function integrates the variable over the full depth column
+    by weighting each depth cell by its layer height (in metres).  Variables
+    without a ``depth`` dimension are omitted from the output.
+
+    Args:
+        data (xr.Dataset): Input dataset.  Must include a ``depth``
+            coordinate from which layer heights can be derived.
+
+    Returns:
+        xr.Dataset: Dataset containing the vertically integrated values for
+        all depth-dependent variables.  The ``depth`` dimension is collapsed
+        in the output.
+    """
     layer_height = compute_layer_height(data.depth.values)
     lh = xr.DataArray(layer_height, dims=["depth"])