Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions docs/source/actions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
(actionsdoc)=
# Actions

Actions are the processing operations that can be applied to oceanographic datasets in Medunda.
Each action is exposed as a sub-command of the CLI and as a Python function that accepts an
`xarray.Dataset` and returns a transformed dataset.

---

## average\_between\_layers

```{eval-rst}
.. autofunction:: medunda.actions.average_between_layers.average_between_layers
```

---

## calculate\_stats

```{eval-rst}
.. autoclass:: medunda.actions.calculate_stats.Stats
:members:
:show-inheritance:
```

```{eval-rst}
.. autofunction:: medunda.actions.calculate_stats.calculate_stats
```

---

## climatology

```{eval-rst}
.. autofunction:: medunda.actions.climatology.climatology
```

---

## compute\_average

```{eval-rst}
.. autofunction:: medunda.actions.compute_average.compute_average
```

---

## compute\_integral

```{eval-rst}
.. autofunction:: medunda.actions.integration.compute_integral
```

---

## extract\_annual\_extremes

```{eval-rst}
.. autofunction:: medunda.actions.extract_annual_extremes.extract_annual_extremes
```

---

## extract\_annual\_extremes\_per\_layer

```{eval-rst}
.. autofunction:: medunda.actions.extract_annual_extremes_per_layer.extract_annual_extremes_per_layer
```

---

## extract\_bottom

```{eval-rst}
.. autofunction:: medunda.actions.extract_bottom.extract_bottom
```

---

## extract\_layer

```{eval-rst}
.. autofunction:: medunda.actions.extract_layer.extract_layer
```

---

## extract\_surface

```{eval-rst}
.. autofunction:: medunda.actions.extract_surface.extract_surface
```

---

## integrate\_between\_layers

```{eval-rst}
.. autofunction:: medunda.actions.integrate_between_layers.integrate_between_layers
```
1 change: 1 addition & 0 deletions docs/source/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ intro
workflow
downloader
components
actions
```
22 changes: 20 additions & 2 deletions src/medunda/actions/average_between_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,26 @@ def configure_parser(subparsers):
def average_between_layers(
data: "xr.Dataset", depth_min, depth_max
) -> "xr.Dataset":
"""Computes the vertical average of variables between two specified depths.
Returns a dataset containing the weighted average of this strata.
"""Compute the depth-weighted vertical average between two specified depths.

For each variable in the input dataset that has a ``depth`` dimension, the
function selects the depth levels within ``[depth_min, depth_max]`` and
computes a weighted average, where the weight of each depth cell is its
layer height. Masked (NaN) cells are excluded from both the weighted sum
and the normalisation, so the result is always a proper average of the
valid cells. Variables that do not have a ``depth`` dimension are passed
through unchanged.

Args:
data (xr.Dataset): Input dataset containing the variables to average.
Must include a ``depth`` coordinate.
depth_min (float): Upper bound of the depth range (shallowest depth).
depth_max (float): Lower bound of the depth range (deepest depth).

Returns:
xr.Dataset: Dataset with the same variables as the input, but with the
``depth`` dimension collapsed. Each variable is replaced by its
depth-weighted average over the selected depth range.
"""
averaged_variables = {}
for variable in data.data_vars:
Expand Down
62 changes: 59 additions & 3 deletions src/medunda/actions/calculate_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,46 @@ def configure_parser(subparsers):


class Stats:
"""This class provides methods to perform basic statistical calculations"""
"""Provides methods to compute basic statistical operations on an array.

All operations reduce along axis 0, which is assumed to correspond to the
time dimension.

Args:
data: Numeric array-like object to analyse.
"""

def __init__(self, data):
self.data = data

def mean(self):
"""Return the arithmetic mean along axis 0."""
return np.mean(self.data, axis=0)

def variance(self):
"""Return the variance along axis 0."""
return np.var(self.data, axis=0)

def median(self):
"""Return the median along axis 0."""
return np.median(self.data, axis=0)

def min(self):
"""Return the minimum value along axis 0."""
return np.min(self.data, axis=0)

def max(self):
"""Return the maximum value along axis 0."""
return np.max(self.data, axis=0)

def quartiles(self):
"""Return the 5th, 25th, 75th, and 95th percentiles along axis 0.

Returns:
dict[str, numpy.ndarray]: Dictionary mapping each percentile
(as a string, e.g. ``"25"``) to the corresponding percentile
array.
"""
percentiles = [5, 25, 75, 95]
output = {
str(k): np.percentile(self.data, k, axis=0) for k in percentiles
Expand All @@ -63,6 +82,24 @@ def quartiles(self):
def calculate(
self, operations: Sequence[str] | None = None
) -> dict[str, Any]:
"""Compute one or more statistical operations on the stored data.

Args:
operations (list[str] | None): Names of the operations to compute.
Accepted values are ``"mean"``, ``"variance"``,
``"median"``, ``"minimum"``, ``"maximum"``, ``"quartiles"``,
and ``"all"``. When ``None`` or ``["all"]`` is passed, every
available operation is computed.

Returns:
dict[str, Any]: Dictionary mapping each operation name to its
result.

Raises:
ValueError: If ``"all"`` is combined with other operation names,
if duplicate operation names are provided, or if an unknown
operation name is requested.
"""
available_operations = {
"mean": self.mean,
"variance": self.variance,
Expand Down Expand Up @@ -98,8 +135,27 @@ def calculate(


def calculate_stats(data: "xr.Dataset", operations) -> "xr.Dataset":
"""Regroups and compute some statistical operations
according to the user's choice"""
"""Compute statistical operations on each variable in the dataset.

For each variable in the input dataset (excluding coordinate-like
variables such as ``depth``, ``latitude``, ``longitude``, and ``time``),
the requested statistical operations are computed over all dimensions and
stored as new variables in the output dataset. Output variable names
follow the pattern ``{variable}_{operation}``; quartile outputs use
``{variable}_quartiles_{percentile}``.

Args:
data (xr.Dataset): Input dataset containing the variables to analyse.
operations (list[str] | None): Statistical operations to compute.
Accepted values are ``"mean"``, ``"median"``, ``"variance"``,
``"quartiles"``, ``"minimum"``, ``"maximum"``, and ``"all"``.
Pass ``None`` or ``["all"]`` to compute every available
operation.

Returns:
xr.Dataset: Dataset whose variables are the results of the requested
statistical operations.
"""

ds_results = xr.Dataset()
for var_name in data.data_vars:
Expand Down
50 changes: 50 additions & 0 deletions src/medunda/actions/climatology.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,56 @@ def climatology(
start_date=None,
end_date=None,
) -> "xr.Dataset":
"""Compute the climatological average of a variable at a given temporal frequency.

The climatology is computed by grouping the data by the requested temporal
period (day-of-year, month, or season) and averaging across all years
within the specified date range. Both monthly and daily source datasets
are supported; the dataset frequency is inferred automatically from the
``title`` global attribute of the dataset.

When the source dataset has **monthly** resolution:

* *monthly* climatology is computed as a year-weighted average where each
month is weighted by its number of days.
* *daily* climatology cannot be computed and raises a :class:`ValueError`.
* *seasonal* climatology groups months into DJF, MAM, JJA, SON and
averages with equal weights.

When the source dataset has **daily** resolution:

* *daily* climatology is the mean for each calendar day-of-year across
all years.
* *monthly* climatology is obtained by first computing a daily
climatology then averaging the day-of-year bins within each calendar
month.
* *seasonal* climatology groups day-of-year bins into DJF, MAM, JJA,
SON and averages with equal weights.

Args:
data (xr.Dataset): Input dataset. Must have a ``time`` coordinate
and a ``title`` global attribute containing either
``"monthly"`` or ``"daily"``.
variable (str): Name of the variable to compute the climatology for.
Must be present in ``data.data_vars``.
frequency (str): Temporal resolution of the output climatology. One
of ``"daily"``, ``"monthly"``, or ``"seasonally"``.
start_date (datetime-like, optional): Start of the reference period.
Defaults to the first time step in the dataset.
end_date (datetime-like, optional): End of the reference period.
Defaults to the last time step in the dataset.

Returns:
xr.Dataset: Dataset containing the climatological average of
*variable*, with the time dimension replaced by the climatological
coordinate (``month``, ``dayofyear``, or ``season``).

Raises:
ValueError: If *variable* is not found in the dataset, if the
dataset frequency cannot be determined from its ``title``
attribute, or if a daily climatology is requested from a monthly
dataset.
"""
# check the variable
if variable not in data.data_vars:
available_variables = list(data.data_vars.keys())
Expand Down
26 changes: 23 additions & 3 deletions src/medunda/actions/compute_average.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,30 @@ def get_volume(data: "xr.Dataset") -> "xr.DataArray":


def compute_average(data: "xr.Dataset", axis) -> "xr.Dataset":
"""Compute the average on a given axis.
"""Compute the average of all variables along a specified axis.

Three axes are supported:

* ``"depth"``: Computes the depth-weighted vertical average over the full
depth column using :func:`~medunda.actions.average_between_layers.average_between_layers`.
* ``"space"``: Computes a volume-weighted spatial average over all
(latitude, longitude) grid points using the cell volumes derived from
the grid mask.
* ``"time"``: Computes a simple arithmetic mean over the time dimension.

Args:
data (xr.Dataset): Input dataset with depth as one of the dimensions.
axis: The axis over which to compute the average.
data (xr.Dataset): Input dataset. Must include ``depth``,
``latitude``, ``longitude``, and ``time`` coordinates as required
by the chosen axis.
axis (str): Axis along which to compute the average. One of
``"depth"``, ``"space"``, or ``"time"``.

Returns:
xr.Dataset: Dataset with the chosen dimension collapsed, containing
the averaged values for each variable.

Raises:
ValueError: If *axis* is not one of the valid choices.
"""
if axis not in VALID_AXIS.keys():
raise ValueError(
Expand Down
18 changes: 18 additions & 0 deletions src/medunda/actions/extract_bottom.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,24 @@ def configure_parser(subparsers):


def extract_bottom(data: "xr.Dataset") -> "xr.Dataset":
"""Extract the bottom-most valid grid cell for each spatial location.

For each variable with a ``depth`` dimension, the function uses the
land-sea mask (derived from the first time step) to identify the deepest
unmasked (valid) depth level at every (latitude, longitude) grid point and
returns the corresponding values. Variables without a ``depth`` dimension
are passed through unchanged.

Args:
data (xr.Dataset): Input dataset. Must include a ``depth`` coordinate
and at least one time step for each variable with a depth
dimension.

Returns:
xr.Dataset: Dataset with the same variables as the input but with the
``depth`` dimension removed. Each value corresponds to the deepest
valid cell at the corresponding spatial location.
"""
LOGGER.info(f"reading the file: {data}")

variables = {}
Expand Down
16 changes: 14 additions & 2 deletions src/medunda/actions/extract_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,20 @@ def configure_parser(subparsers):


def extract_layer(data: "xr.Dataset", depth: float) -> "xr.Dataset":
"""Extracts the layer nearest to the specified depth from the dataset.
Returns a dataset containing only the layer extracted.
"""Extract the layer nearest to a specified depth from the dataset.

Uses nearest-neighbour selection along the ``depth`` coordinate, so the
actually selected depth may differ slightly from the requested value when
an exact match is not available in the dataset.

Args:
data (xr.Dataset): Input dataset containing a ``depth`` coordinate.
depth (float): Target depth in metres.

Returns:
xr.Dataset: Dataset with the same variables as the input but with the
``depth`` dimension removed, containing values at the depth level
closest to *depth*.
"""

LOGGER.info(f"reading the file: {data}")
Expand Down
12 changes: 12 additions & 0 deletions src/medunda/actions/extract_surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ def configure_parser(subparsers):


def extract_surface(data: "xr.Dataset") -> "xr.Dataset":
"""Extract the surface layer (first depth level) from the dataset.

Selects the shallowest depth level (index 0) across all variables,
removing the ``depth`` dimension from the output dataset.

Args:
data (xr.Dataset): Input dataset containing a ``depth`` dimension.

Returns:
xr.Dataset: Dataset with the same variables as the input but with the
``depth`` dimension removed, containing only surface-level values.
"""
LOGGER.info(f"reading the file: {data}")

surface_layer = data.isel(depth=0)
Expand Down
Loading