-
-
Notifications
You must be signed in to change notification settings - Fork 121
feat(alerts): record_alert primitive + AlertFeed/Octopus producers #3830
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
aaec0c7
53a3c08
93d781b
6ef15be
97e6c39
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -373,6 +373,10 @@ def initialize(self, key, account_id, automatic): | |||||||||||||||
| self.commands = [] | ||||||||||||||||
| self.mpan = None | ||||||||||||||||
| self.free_electricity_events = [] | ||||||||||||||||
| # Track when each intelligent device first reported a non-capable | ||||||||||||||||
| # currentState (e.g. SMART_CONTROL_NOT_AVAILABLE). When the condition | ||||||||||||||||
| # persists beyond 24h we raise a user-facing alert via record_alert. | ||||||||||||||||
| self.smart_control_degraded_since = {} | ||||||||||||||||
|
|
||||||||||||||||
| # API request metrics for monitoring | ||||||||||||||||
| self.requests_total = 0 | ||||||||||||||||
|
|
@@ -587,6 +591,15 @@ async def load_octopus_cache(self): | |||||||||||||||
| self.saving_sessions = data.get("saving_sessions", {}) | ||||||||||||||||
| self.intelligent_devices = data.get("intelligent_devices", {}) | ||||||||||||||||
| self.graphql_token = data.get("kraken_token") | ||||||||||||||||
| # Restore first-seen timestamps for the IOG smart-control | ||||||||||||||||
| # degradation check so the 24h clock survives restarts. | ||||||||||||||||
| raw = data.get("smart_control_degraded_since", {}) or {} | ||||||||||||||||
| self.smart_control_degraded_since = {} | ||||||||||||||||
| for device_id, iso in raw.items(): | ||||||||||||||||
| try: | ||||||||||||||||
| self.smart_control_degraded_since[device_id] = datetime.fromisoformat(iso) | ||||||||||||||||
|
||||||||||||||||
| self.smart_control_degraded_since[device_id] = datetime.fromisoformat(iso) | |
| if isinstance(iso, str) and iso.endswith("Z"): | |
| iso = iso[:-1] + "+00:00" | |
| parsed = datetime.fromisoformat(iso) | |
| if parsed.tzinfo is None: | |
| parsed = parsed.replace(tzinfo=timezone.utc) | |
| self.smart_control_degraded_since[device_id] = parsed |
Copilot
AI
Apr 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
New smart-control degradation alert logic in _maybe_raise_smart_control_alert() is user-facing behavior (24h threshold + TTL refresh) but there are already Octopus tests under apps/predbat/tests/. Please add/extend tests to cover: tracking first_seen across cycles, alert firing after 24h, and clearing when SMART_CONTROL_CAPABLE returns (including cache restore path).
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -17,7 +17,7 @@ | |||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| import math | ||||||||||||||||||||||||||||||||
| from datetime import datetime, timedelta | ||||||||||||||||||||||||||||||||
| from datetime import datetime, timedelta, timezone | ||||||||||||||||||||||||||||||||
| from config import THIS_VERSION | ||||||||||||||||||||||||||||||||
| from const import TIME_FORMAT, PREDICT_STEP | ||||||||||||||||||||||||||||||||
| from utils import dp0, dp1, dp2, dp3, calc_percent_limit, minute_data, minute_data_state | ||||||||||||||||||||||||||||||||
|
|
@@ -2429,6 +2429,95 @@ def record_status(self, message, debug="", had_errors=False, notify=False, extra | |||||||||||||||||||||||||||||||
| if had_errors: | ||||||||||||||||||||||||||||||||
| self.had_errors = True | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| def record_alert(self, category, severity, title, message, dedup_key=None, metadata=None, expires_at=None, action_url=None): | ||||||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||||||
| Record a user-facing alert. Published as a list of dicts on the | ||||||||||||||||||||||||||||||||
| `sensor.<prefix>_system_alerts` entity (attribute `alerts`). | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
| `sensor.<prefix>_system_alerts` entity (attribute `alerts`). | |
| `<prefix>.system_alerts` entity (attribute `alerts`). |
Copilot
AI
Apr 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
record_alert() stores alerts under a plain key = dedup_key or "{category}::{title}". If callers provide a dedup_key without including the category (as in octopus.py), alerts from different categories could collide and overwrite each other. Consider composing the internal dict key from both category + dedup_key (while keeping the original dedup_key field intact for consumers).
| key = dedup_key or "{}::{}".format(category, title) | |
| self._active_alerts[key] = { | |
| "category": category, | |
| "severity": severity, | |
| "title": title, | |
| "message": message, | |
| "dedup_key": key, | |
| consumer_dedup_key = dedup_key or "{}::{}".format(category, title) | |
| internal_key = "{}::{}".format(category, dedup_key) if dedup_key else consumer_dedup_key | |
| self._active_alerts[internal_key] = { | |
| "category": category, | |
| "severity": severity, | |
| "title": title, | |
| "message": message, | |
| "dedup_key": consumer_dedup_key, |
Copilot
AI
Apr 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_publish_system_alerts() only prunes expired alerts when now_dt is available, but PredBat itself does not define now_utc_exact (only ComponentBase does). In that case now_dt will be None, TTL pruning never happens, and recorded_at is always None. Recommend adding a now_utc_exact property/attribute on PredBat (e.g., set in update_time) or switching this logic to use an existing canonical timestamp like self.now_utc so TTL-only lifecycle works in both HA and standalone modes.
| now_dt = self.now_utc_exact if hasattr(self, "now_utc_exact") and self.now_utc_exact else None | |
| now_dt = getattr(self, "now_utc_exact", None) or getattr(self, "now_utc", None) | |
| if now_dt is not None and now_dt.tzinfo is None: | |
| now_dt = now_dt.replace(tzinfo=timezone.utc) |
Copilot
AI
Apr 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Expiry pruning uses if expires_dt < now_dt, but the API description says re-recording with expires_at=now should be pruned on the next publish. Using a strict < can leave an alert around for an extra cycle when the timestamps are equal; consider using <= for the comparison so "expires at now" reliably clears.
| if expires_dt < now_dt: | |
| if expires_dt <= now_dt: |
Copilot
AI
Apr 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PR description says the new entity is published as sensor.<prefix>.system_alerts, but the implementation publishes to self.prefix + ".system_alerts". Please reconcile the PR description and the actual entity id pattern (and ideally keep it consistent with other published entities).
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -473,6 +473,9 @@ def reset(self): | |||||||||
| self.current_status = None | ||||||||||
| self.previous_status = None | ||||||||||
| self.had_errors = False | ||||||||||
| # Active user-facing alerts keyed by (category, dedup_key). See | ||||||||||
| # record_alert()/clear_alert() in output.py. | ||||||||||
|
Comment on lines
+476
to
+477
|
||||||||||
| # Active user-facing alerts keyed by (category, dedup_key). See | |
| # record_alert()/clear_alert() in output.py. | |
| # Active user-facing alerts keyed by the alert identifier string used | |
| # by the output layer. Updated via the alert-recording flow in output.py. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
apply_alerts() now loops over potentially multiple CAP alerts and calls record_alert() for each one; record_alert() immediately publishes the aggregated system_alerts entity each time. This can cause multiple HA state updates per cycle. Consider batching (e.g., add a publish=False option and publish once after the loop, or have AlertFeed build alerts then call a single publish) to reduce churn.