-
Notifications
You must be signed in to change notification settings - Fork 10
Add timerange constraint #399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Add a timerange constraint to ensure the required data is available. |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -6,6 +6,8 @@ | |||||
| import warnings | ||||||
| from collections import defaultdict | ||||||
| from collections.abc import Mapping | ||||||
| from datetime import datetime | ||||||
| from functools import total_ordering | ||||||
| from typing import Literal, Protocol, runtime_checkable | ||||||
|
|
||||||
| if sys.version_info < (3, 11): | ||||||
|
|
@@ -273,6 +275,123 @@ def from_defaults( | |||||
| return cls(supplementary_facets, **kwargs[source_type]) | ||||||
|
|
||||||
|
|
||||||
| @frozen | ||||||
| @total_ordering | ||||||
| class PartialDateTime: | ||||||
| """ | ||||||
| A partial datetime object that can be used to compare datetimes. | ||||||
|
|
||||||
| Only the specified fields are used for comparison. | ||||||
| """ | ||||||
|
|
||||||
| year: int | None = None | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Should year be required? Is there a use case where you would skip fields, i.e. only specifying months?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, e.g selecting a season. Though the current implementation of the continuity check isn't smart enough for that. |
||||||
| month: int | None = None | ||||||
| day: int | None = None | ||||||
| hour: int | None = None | ||||||
| minute: int | None = None | ||||||
| second: int | None = None | ||||||
|
|
||||||
| @property | ||||||
| def _attrs(self) -> dict[str, int]: | ||||||
| """The attributes that are set.""" | ||||||
| return { | ||||||
| a: v | ||||||
| for a in self.__slots__ # type: ignore[attr-defined] | ||||||
| if not a.startswith("_") and (v := getattr(self, a)) is not None | ||||||
| } | ||||||
|
|
||||||
| def __repr__(self) -> str: | ||||||
| return f"{self.__class__.__name__}({', '.join(f'{a}={v}' for a, v in self._attrs.items())})" | ||||||
|
|
||||||
| def __eq__(self, other: object) -> bool: | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we type hint that
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
| if not isinstance(other, datetime): | ||||||
| msg = ( | ||||||
| f"Can only compare PartialDateTime with `datetime.datetime` " | ||||||
| f"objects, got object {other} of type {type(other)}" | ||||||
| ) | ||||||
| raise TypeError(msg) | ||||||
|
|
||||||
| for attr, value in self._attrs.items(): | ||||||
| other_value = getattr(other, attr) | ||||||
| if value != other_value: | ||||||
| return False | ||||||
| return True | ||||||
|
|
||||||
| def __lt__(self, other: object) -> bool: | ||||||
| if not isinstance(other, datetime): | ||||||
| msg = ( | ||||||
| f"Can only compare PartialDateTime with `datetime.datetime` " | ||||||
| f"objects, got object {other} of type {type(other)}" | ||||||
| ) | ||||||
| raise TypeError(msg) | ||||||
|
|
||||||
| for attr, value in self._attrs.items(): | ||||||
| other_value = getattr(other, attr) | ||||||
| if value != other_value: | ||||||
| return value < other_value # type: ignore[no-any-return] | ||||||
| return False | ||||||
|
|
||||||
|
|
||||||
| @frozen | ||||||
| class RequireTimerange: | ||||||
| """ | ||||||
| A constraint that requires datasets to have a specific timerange. | ||||||
|
|
||||||
| Specify the start and/or end of the required timerange using a precision | ||||||
| that matches the frequency of the datasets. | ||||||
|
|
||||||
| For example, to ensure that datasets at monthly frequency cover the period | ||||||
| from 2000 to 2010, use start=PartialDateTime(year=2000, month=1) and | ||||||
| end=PartialDateTime(year=2010, month=12). | ||||||
| """ | ||||||
|
|
||||||
| group_by: tuple[str, ...] | ||||||
| """ | ||||||
| The fields to group the datasets by. Each group must cover the timerange | ||||||
| to fulfill the constraint. | ||||||
| """ | ||||||
|
|
||||||
| start: PartialDateTime | None = None | ||||||
| """ | ||||||
| The start time of the required timerange. If None, no start time is required. | ||||||
| """ | ||||||
|
|
||||||
| end: PartialDateTime | None = None | ||||||
| """ | ||||||
| The end time of the required timerange. If None, no end time is required. | ||||||
| """ | ||||||
|
|
||||||
| def validate(self, group: pd.DataFrame) -> bool: | ||||||
| """ | ||||||
| Check that all subgroups of the group have a contiguous timerange. | ||||||
| """ | ||||||
| group = group.dropna(subset=["start_time", "end_time"]) | ||||||
| for _, subgroup in group.groupby(list(self.group_by)): | ||||||
| start = subgroup["start_time"].min() | ||||||
| end = subgroup["end_time"].max() | ||||||
| result = True | ||||||
| if self.start is not None and start > self.start: | ||||||
| logger.debug( | ||||||
| f"Constraint {self.__class__.__name__} not satisfied " | ||||||
| f"because start time {start} is after required start time " | ||||||
| f"{self.start} for {', '.join(subgroup['path'])}" | ||||||
| ) | ||||||
| result = False | ||||||
| if self.end is not None and end < self.end: | ||||||
| logger.debug( | ||||||
| f"Constraint {self.__class__.__name__} not satisfied " | ||||||
| f"because end time {end} is before required end time " | ||||||
| f"{self.end} for {', '.join(subgroup['path'])}" | ||||||
| ) | ||||||
| result = False | ||||||
| if result: | ||||||
| result = RequireContiguousTimerange(group_by=self.group_by).validate(subgroup) | ||||||
| if not result: | ||||||
| return False | ||||||
|
|
||||||
| return True | ||||||
|
|
||||||
|
|
||||||
| @frozen | ||||||
| class RequireContiguousTimerange: | ||||||
| """ | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't know about this. Useful