From aaec0c786d849682021fe96a6a176c55268741f0 Mon Sep 17 00:00:00 2001 From: Mark Gascoyne Date: Wed, 22 Apr 2026 07:07:16 +0100 Subject: [PATCH 1/5] feat(alerts): record_alert primitive + system_alerts entity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a general-purpose alert recording mechanism so any component can surface a user-facing issue without building a bespoke publication pipeline. - PredBat.record_alert(category, severity, title, message, dedup_key, metadata, expires_at, action_url) in output.py - ComponentBase.record_alert delegate so components can call self.record_alert(...) naturally (same pattern as record_status) - self._active_alerts dict tracks currently-active alerts keyed by dedup_key (or category::title); initialized in PredBat.__init__ - Publishes to sensor..system_alerts — state on/off, attribute 'alerts' carries the list sorted critical > warning > info - TTL-only lifecycle: producers re-record each cycle while the condition holds; alerts with expires_at in the past are pruned on the next publish. Fast-resolve is re-recording with expires_at=now. No explicit clear_alert — keeps the contract small. - No consumers yet; separate commits wire AlertFeed and octopus.py (IOG smart control lost) to call this --- apps/predbat/component_base.py | 16 +++++++++ apps/predbat/output.py | 65 ++++++++++++++++++++++++++++++++++ apps/predbat/predbat.py | 3 ++ 3 files changed, 84 insertions(+) diff --git a/apps/predbat/component_base.py b/apps/predbat/component_base.py index 4e94a9e8e..86f68efd2 100644 --- a/apps/predbat/component_base.py +++ b/apps/predbat/component_base.py @@ -87,6 +87,22 @@ def get_ha_config(self, name, default): """ return self.base.get_ha_config(name, default) + def record_alert(self, category, severity, title, message, dedup_key=None, metadata=None, expires_at=None, action_url=None): + """ + Record a user-facing alert via the base system. See + `PredBat.record_alert` for full semantics. + """ + return self.base.record_alert( + category=category, + severity=severity, + title=title, + message=message, + dedup_key=dedup_key, + metadata=metadata, + expires_at=expires_at, + action_url=action_url, + ) + def set_arg(self, arg, value): """ Set a configuration argument in the base system. diff --git a/apps/predbat/output.py b/apps/predbat/output.py index 0120b1630..49b3fd4d3 100644 --- a/apps/predbat/output.py +++ b/apps/predbat/output.py @@ -2429,6 +2429,71 @@ def record_status(self, message, debug="", had_errors=False, notify=False, extra if had_errors: self.had_errors = True + def record_alert(self, category, severity, title, message, dedup_key=None, metadata=None, expires_at=None, action_url=None): + """ + Record a user-facing alert. Published as a list of dicts on the + `sensor._system_alerts` entity (attribute `alerts`). + + Producers re-call each cycle while the condition holds and must pass + an `expires_at` slightly longer than their re-check cadence (usually + 2× the plan interval). Alerts with a past `expires_at` are auto-pruned + on the next publish. To fast-resolve an alert early, re-record it with + `expires_at` set to now. + + Args: + category: Short string grouping (e.g. "weather", "system", + "api_keys"). Consumers may filter or route by this. + severity: One of "critical", "warning", "info". + title: Short user-facing title. + message: Longer description. + dedup_key: Optional — if provided, an existing alert with the same + key is replaced (so repeated calls don't accumulate). Defaults + to a composite of category + title. + metadata: Optional dict carried through to consumers for routing + hints (e.g. `{"action": "keep_reserve", "percent": 50}`). + expires_at: ISO-8601 timestamp. Strongly recommended — without + one, an alert persists until the process restarts. + action_url: Optional deep link for consumers that can render it. + """ + key = dedup_key or "{}::{}".format(category, title) + self._active_alerts[key] = { + "category": category, + "severity": severity, + "title": title, + "message": message, + "dedup_key": key, + "metadata": metadata or {}, + "expires_at": expires_at, + "action_url": action_url, + "recorded_at": self.now_utc_exact.isoformat() if hasattr(self, "now_utc_exact") and self.now_utc_exact else None, + } + self._publish_system_alerts() + + def _publish_system_alerts(self): + """Prune expired entries and publish the current active list.""" + now_iso = self.now_utc_exact.isoformat() if hasattr(self, "now_utc_exact") and self.now_utc_exact else None + + # Prune expired + if now_iso: + expired_keys = [k for k, a in self._active_alerts.items() if a.get("expires_at") and a["expires_at"] < now_iso] + for k in expired_keys: + del self._active_alerts[k] + + active = sorted( + self._active_alerts.values(), + key=lambda a: ({"critical": 0, "warning": 1, "info": 2}.get(a.get("severity", "info"), 2), a.get("recorded_at") or ""), + ) + self.dashboard_item( + self.prefix + ".system_alerts", + state="on" if active else "off", + attributes={ + "friendly_name": "PredBat system alerts", + "icon": "mdi:alert-circle-outline", + "alerts": active, + "count": len(active), + }, + ) + def load_today_comparison(self, load_minutes, load_forecast, car_minutes, import_minutes, minutes_now, step=5, save=True): """ Compare predicted vs actual load diff --git a/apps/predbat/predbat.py b/apps/predbat/predbat.py index b516b41bf..fadecf782 100644 --- a/apps/predbat/predbat.py +++ b/apps/predbat/predbat.py @@ -473,6 +473,9 @@ def reset(self): self.current_status = None self.previous_status = None self.had_errors = False + # Active user-facing alerts keyed by (category, dedup_key). See + # record_alert()/clear_alert() in output.py. + self._active_alerts = {} self.plan_valid = False self.plan_last_updated = None self.plan_last_updated_minutes = 0 From 53a3c0885965094bd630746e6384be5c5ca56c41 Mon Sep 17 00:00:00 2001 From: Mark Gascoyne Date: Fri, 24 Apr 2026 18:37:37 +0100 Subject: [PATCH 2/5] feat(alerts): AlertFeed publishes to record_alert Dual-publish: existing HA entity sensor._alertfeed_status is unchanged (backward-compat for HA users); weather alerts are also recorded via the new record_alert() primitive so they appear on sensor..system_alerts alongside other categories. CAP severity maps to framework severity: - Extreme -> critical - Severe -> warning - Moderate -> info - Minor -> info Dedup key: weather:: - stable across cycles of the same CAP alert so TTL refresh works without duplicates. When keep_reserve applies, it is carried in metadata for downstream consumers to render a reserve-hold hint. --- apps/predbat/alertfeed.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/apps/predbat/alertfeed.py b/apps/predbat/alertfeed.py index 8abfc986d..3b6c5e422 100644 --- a/apps/predbat/alertfeed.py +++ b/apps/predbat/alertfeed.py @@ -162,6 +162,41 @@ def apply_alerts(self, alerts, keep, minutes_now, midnight_utc): alert_show.append(item) self.dashboard_item("sensor." + self.prefix + "_alertfeed_status", state=active_alert_text, attributes={"friendly_name": "Weather alerts", "icon": "mdi:alert-outline", "keep": alert_keep, "alerts": alert_show}, app="alertfeed") + # Also publish to the unified alerts framework so downstream consumers + # (dashboards, gateways, SaaS) see weather alerts alongside other + # categories. TTL-only lifecycle: we re-record each cycle, entries + # drop off when no longer active (stops being re-recorded + TTL expires). + for alert in alerts or []: + expires = alert.get("expires") + if not expires: + continue + cap_severity = (alert.get("severity") or "").lower() + framework_severity = "critical" if cap_severity == "extreme" else "warning" if cap_severity == "severe" else "info" + event = alert.get("event") or alert.get("title") or "Weather alert" + onset = alert.get("onset") + area = alert.get("areaDesc") or "your area" + metadata = { + "event": alert.get("event"), + "severity_cap": alert.get("severity"), + "certainty": alert.get("certainty"), + "urgency": alert.get("urgency"), + "area": area, + "onset": str(onset) if onset else None, + } + if keep and keep > 0: + metadata["action"] = "keep_reserve" + metadata["keep_percent"] = keep + dedup_key = "weather:{}:{}".format(event, str(onset) if onset else "no-onset") + self.record_alert( + category="weather", + severity=framework_severity, + title=event, + message="{} until {} ({}/{}/{})".format(area, expires, alert.get("severity") or "unknown", alert.get("certainty") or "unknown", alert.get("urgency") or "unknown"), + dedup_key=dedup_key, + metadata=metadata, + expires_at=expires.isoformat() if hasattr(expires, "isoformat") else str(expires), + ) + return alert_active_keep def is_point_in_polygon(self, lat, lon, polygon): From 93d781b708376409b74d32ee69ba169047916f36 Mon Sep 17 00:00:00 2001 From: Mark Gascoyne Date: Fri, 24 Apr 2026 18:39:55 +0100 Subject: [PATCH 3/5] feat(alerts): raise alert when Octopus loses smart control of IOG device When a customer's IOG charger lands in SMART_CONTROL_NOT_AVAILABLE (or any non-CAPABLE state) for more than 24 hours, raise a system severity=warning alert via record_alert. The alert nudges the customer to re-authorise MyEnergi in the Octopus app. Implementation: - Capture currentState from the dispatches query (already fetched, previously discarded) into each intelligent device dict - Track first_seen per device in self.smart_control_degraded_since - Clear tracking when currentState returns to SMART_CONTROL_CAPABLE - After 24h degraded, re-record each sensor cycle with a 2h TTL so the alert stays alive while the condition persists and auto-expires once it clears (per the TTL-only lifecycle) Closes the original motivating case: a customer lost IOG smart control for ~48h with no feedback; plannedDispatches silently returned empty and the only visible symptom was PredBat ignoring the cheap charging window. --- apps/predbat/octopus.py | 73 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/apps/predbat/octopus.py b/apps/predbat/octopus.py index dae8e8d21..a1181ad89 100644 --- a/apps/predbat/octopus.py +++ b/apps/predbat/octopus.py @@ -373,6 +373,10 @@ def initialize(self, key, account_id, automatic): self.commands = [] self.mpan = None self.free_electricity_events = [] + # Track when each intelligent device first reported a non-capable + # currentState (e.g. SMART_CONTROL_NOT_AVAILABLE). When the condition + # persists beyond 24h we raise a user-facing alert via record_alert. + self.smart_control_degraded_since = {} # API request metrics for monitoring self.requests_total = 0 @@ -1676,9 +1680,19 @@ async def async_get_intelligent_devices(self, account_id, device_id): if vehicle_info.get("model", None) == model: vehicleBatterySizeInKwh = vehicle_info.get("batterySize", None) + # currentState comes from the dispatches query's devices node. + # Example values seen: SMART_CONTROL_CAPABLE, SMART_CONTROL_NOT_AVAILABLE. + current_state = None + if dispatch_result: + for dev in dispatch_result.get("devices", []) or []: + if dev.get("id") == IntelligentdeviceID: + current_state = (dev.get("status") or {}).get("currentState") + break + intelligent_device = { "deviceType": deviceType, "status": status, + "current_state": current_state, "provider": make, "model": model, "is_charger": isCharger, @@ -1843,6 +1857,65 @@ async def async_intelligent_update_sensor(self, account_id): ) self.dashboard_item(self.get_entity_name("number", "intelligent_target_soc", index=device_index), target_soc, attributes={"friendly_name": "Octopus Intelligent Target SOC", "icon": "mdi:battery-percent", "min": 0, "max": 100}, app="octopus") + # Surface SMART_CONTROL_NOT_AVAILABLE as a user-facing alert when + # it persists beyond 24h. The customer's charger has lost Octopus's + # smart control — PredBat will ignore IOG slots, but there is no + # feedback in the app beyond empty plannedDispatches. Alert nudges + # them to re-authorise MyEnergi in the Octopus app. + self._maybe_raise_smart_control_alert(device_id, device) + + def _maybe_raise_smart_control_alert(self, device_id, device): + """Check the device's currentState and raise / refresh a system alert + if it has been non-capable for more than 24h. TTL-only: while the + condition persists we re-record each sensor cycle to keep the alert + alive; when currentState returns to capable we stop re-recording and + the alert expires on its own.""" + current_state = device.get("current_state") + # SMART_CONTROL_CAPABLE is the healthy state. Treat anything else + # (e.g. SMART_CONTROL_NOT_AVAILABLE) as degraded. None means we did + # not observe a state this cycle — leave tracking as-is. + if not current_state: + return + + if current_state == "SMART_CONTROL_CAPABLE": + self.smart_control_degraded_since.pop(device_id, None) + return + + now = self.now_utc_exact + first_seen = self.smart_control_degraded_since.get(device_id) + if first_seen is None: + self.smart_control_degraded_since[device_id] = now + return + + degraded_seconds = (now - first_seen).total_seconds() + if degraded_seconds < 24 * 60 * 60: + return + + # TTL slightly longer than our sensor refresh cadence so the alert + # survives between cycles but drops off once we stop re-recording. + from datetime import timedelta + + ttl = timedelta(hours=2) + expires_at = (now + ttl).isoformat() + + model = device.get("model") or device.get("provider") or "charger" + self.record_alert( + category="system", + severity="warning", + title="Octopus has lost smart control of your {}".format(model), + message=("Octopus can't schedule Intelligent charging sessions right " "now — PredBat can't see your charging slots. Re-authorise " "MyEnergi in the Octopus app (Smart devices) to fix it."), + dedup_key="iog_smart_control_lost:{}".format(device_id), + metadata={ + "device_id": device_id, + "current_state": current_state, + "provider": device.get("provider"), + "model": device.get("model"), + "degraded_since": first_seen.isoformat(), + "degraded_hours": round(degraded_seconds / 3600, 1), + }, + expires_at=expires_at, + ) + async def async_get_account(self, account_id): """ Get the user's account From 6ef15be192cbc36764bd50969f2c29979a134840 Mon Sep 17 00:00:00 2001 From: Mark Gascoyne Date: Fri, 24 Apr 2026 19:08:11 +0100 Subject: [PATCH 4/5] fix(alerts): address PR review findings 1. _publish_system_alerts compared expires_at strings lexicographically against now_utc_exact.isoformat(). Producers legitimately use different timezone offsets (CAP weather = +00:00, octopus code = local offset), and string ordering across offsets is not chronological. Parse expires_at via datetime.fromisoformat (handling trailing 'Z' for older Pythons) and compare against a timezone-aware UTC now. Treat naive timestamps as UTC. 2. smart_control_degraded_since was in-memory only, so every AppDaemon or pod restart reset the 24h alert clock for IOG devices that had been non-capable for days. Persist it to the existing octopus YAML cache (alongside intelligent_devices, saving_sessions, etc.) as ISO strings; rehydrate into datetime on load. The 24h window now survives restarts. --- apps/predbat/octopus.py | 12 ++++++++++++ apps/predbat/output.py | 33 +++++++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/apps/predbat/octopus.py b/apps/predbat/octopus.py index a1181ad89..3927ede10 100644 --- a/apps/predbat/octopus.py +++ b/apps/predbat/octopus.py @@ -591,6 +591,15 @@ async def load_octopus_cache(self): self.saving_sessions = data.get("saving_sessions", {}) self.intelligent_devices = data.get("intelligent_devices", {}) self.graphql_token = data.get("kraken_token") + # Restore first-seen timestamps for the IOG smart-control + # degradation check so the 24h clock survives restarts. + raw = data.get("smart_control_degraded_since", {}) or {} + self.smart_control_degraded_since = {} + for device_id, iso in raw.items(): + try: + self.smart_control_degraded_since[device_id] = datetime.fromisoformat(iso) + except (TypeError, ValueError): + pass # Load tariffs from individual shared cache files # Tariffs will be loaded on-demand when needed via load_tariff_from_cache() @@ -619,6 +628,9 @@ async def save_octopus_cache(self): octopus_cache["saving_sessions"] = self.saving_sessions octopus_cache["intelligent_devices"] = self.intelligent_devices octopus_cache["kraken_token"] = self.graphql_token + # Persist the smart-control degradation first-seen timestamps as ISO + # strings so the 24h alert window survives AppDaemon restarts. + octopus_cache["smart_control_degraded_since"] = {device_id: dt.isoformat() for device_id, dt in self.smart_control_degraded_since.items()} with open(self.user_cache_file, "w") as f: yaml.dump(octopus_cache, f) diff --git a/apps/predbat/output.py b/apps/predbat/output.py index 49b3fd4d3..d6605ece2 100644 --- a/apps/predbat/output.py +++ b/apps/predbat/output.py @@ -17,7 +17,7 @@ """ import math -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from config import THIS_VERSION from const import TIME_FORMAT, PREDICT_STEP from utils import dp0, dp1, dp2, dp3, calc_percent_limit, minute_data, minute_data_state @@ -2471,11 +2471,32 @@ def record_alert(self, category, severity, title, message, dedup_key=None, metad def _publish_system_alerts(self): """Prune expired entries and publish the current active list.""" - now_iso = self.now_utc_exact.isoformat() if hasattr(self, "now_utc_exact") and self.now_utc_exact else None - - # Prune expired - if now_iso: - expired_keys = [k for k, a in self._active_alerts.items() if a.get("expires_at") and a["expires_at"] < now_iso] + # Parse expires_at to timezone-aware datetime and compare against a UTC + # now so producers can pass ISO strings with any offset (e.g. CAP + # weather alerts arrive as +00:00, octopus alerts as local offset). + # Lex comparison on mixed-offset ISO strings would order wrongly. + now_dt = datetime.now(timezone.utc) if hasattr(self, "now_utc_exact") and self.now_utc_exact else None + + if now_dt is not None: + expired_keys = [] + for k, a in self._active_alerts.items(): + expires_at = a.get("expires_at") + if not expires_at: + continue + try: + # Python 3.11+: fromisoformat handles the trailing "Z"; older + # versions don't. Normalise by replacing "Z" with "+00:00". + iso = expires_at.replace("Z", "+00:00") if isinstance(expires_at, str) else None + if iso is None: + continue + expires_dt = datetime.fromisoformat(iso) + except (TypeError, ValueError): + continue + # Treat naive timestamps as UTC for backward compatibility. + if expires_dt.tzinfo is None: + expires_dt = expires_dt.replace(tzinfo=timezone.utc) + if expires_dt < now_dt: + expired_keys.append(k) for k in expired_keys: del self._active_alerts[k] From 97e6c39dd227b228d4e80fc73071b694aba85e5e Mon Sep 17 00:00:00 2001 From: Mark Gascoyne Date: Fri, 24 Apr 2026 19:21:42 +0100 Subject: [PATCH 5/5] fix(alerts): use self.now_utc_exact in _publish_system_alerts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Was calling datetime.now(timezone.utc) directly, ignoring the engine's canonical cycle time. record_status, _maybe_raise_smart_ control_alert, and the recorded_at field on each alert all use self.now_utc_exact — pruning should too so mocked-time tests and deterministic plan cycles see a consistent view of 'now'. self.now_utc_exact is aware (datetime.now(self.local_tz)); Python compares aware datetimes across timezones correctly, so the compare against an aware expires_dt still works. Ref: PR review --- apps/predbat/output.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/predbat/output.py b/apps/predbat/output.py index d6605ece2..13944b6f2 100644 --- a/apps/predbat/output.py +++ b/apps/predbat/output.py @@ -2475,7 +2475,10 @@ def _publish_system_alerts(self): # now so producers can pass ISO strings with any offset (e.g. CAP # weather alerts arrive as +00:00, octopus alerts as local offset). # Lex comparison on mixed-offset ISO strings would order wrongly. - now_dt = datetime.now(timezone.utc) if hasattr(self, "now_utc_exact") and self.now_utc_exact else None + # Use the engine's canonical "now" (aware, local tz) so mocked-time + # tests and deterministic plan cycles stay consistent. Python compares + # aware datetimes across timezones correctly. + now_dt = self.now_utc_exact if hasattr(self, "now_utc_exact") and self.now_utc_exact else None if now_dt is not None: expired_keys = []