From 3a120797c94ca0218164b9848194815ed4d638a1 Mon Sep 17 00:00:00 2001 From: victorsjot Date: Fri, 29 May 2026 08:04:53 +0000 Subject: [PATCH 1/2] feat(rules): first-class suggestion provenance + sharper AI proposals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rule suggestions come from two sources — the deterministic built-in engine (``proactive_*`` heuristics) and the AI integration (``ai_``) — but provenance was only encoded in the heuristic string, so the frontend had to prefix-match it and AI proposals were under-specified. Schema (RuleSuggestionRead): expose computed ``source`` ("ai" | "builtin"), ``source_label``, ``ai_provider``, ``ai_model`` and ``rationale`` fields derived from the heuristic + evidence. No DB migration — these are Pydantic computed fields over existing columns. AI suggestion service: - Strengthen the system prompt: the rationale MUST cite specific evidence (device/codec/container/counts), and each proposal MUST carry a self-assessed ``confidence`` (0..1) and a ``files_affected`` estimate; added a constraint to COMPLEMENT — not restate — the heuristic suggestions already supplied in context. - Parse + clamp the model's ``confidence`` and ``files_affected`` instead of hardcoding 0.5 / 0, so AI proposals rank against heuristic ones in the same review queue. Adds unit tests for the provenance fields + the coercion helpers. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/app/schemas/rule_suggestion.py | 66 +++++++++++++- backend/app/services/ai/suggestions.py | 62 ++++++++++++- .../unit/test_rule_suggestion_provenance.py | 88 +++++++++++++++++++ 3 files changed, 210 insertions(+), 6 deletions(-) create mode 100644 backend/tests/unit/test_rule_suggestion_provenance.py diff --git a/backend/app/schemas/rule_suggestion.py b/backend/app/schemas/rule_suggestion.py index 77c3585..fc0415a 100644 --- a/backend/app/schemas/rule_suggestion.py +++ b/backend/app/schemas/rule_suggestion.py @@ -3,13 +3,21 @@ from __future__ import annotations import datetime as _dt -from typing import Any +from typing import Any, Literal -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, computed_field class RuleSuggestionRead(BaseModel): - """One pending or historical suggestion returned to the frontend.""" + """One pending or historical suggestion returned to the frontend. + + The ``heuristic`` column encodes provenance — built-in engine + heuristics are named ``proactive_*`` while the AI suggestion + service stamps ``ai_``. Rather than make the frontend + string-match that prefix, we expose first-class ``source`` / + ``source_label`` / ``ai_*`` / ``rationale`` fields below so the UI + can label every suggestion's origin consistently. + """ model_config = ConfigDict(from_attributes=True) @@ -29,6 +37,58 @@ class RuleSuggestionRead(BaseModel): dismissed_reason: str | None created_at: _dt.datetime + @computed_field # type: ignore[prop-decorator] + @property + def source(self) -> Literal["ai", "builtin"]: + """Where the suggestion came from. AI suggestions stamp + ``ai_``; everything else is the deterministic + built-in engine.""" + return "ai" if self.heuristic.startswith("ai_") else "builtin" + + @computed_field # type: ignore[prop-decorator] + @property + def ai_provider(self) -> str | None: + """The provider kind (``ollama``, ``openai``, …) for AI + suggestions; ``None`` for built-in ones.""" + if self.source != "ai": + return None + provider = self.evidence.get("provider_kind") + if provider: + return str(provider) + # Fall back to the heuristic suffix: ``ai_ollama`` → ``ollama``. + suffix = self.heuristic[len("ai_") :] + return suffix or None + + @computed_field # type: ignore[prop-decorator] + @property + def ai_model(self) -> str | None: + """The concrete model name the AI proposal was generated + with (e.g. ``qwen2.5-coder:14b``), when recorded.""" + if self.source != "ai": + return None + model = self.evidence.get("model") + return str(model) if model else None + + @computed_field # type: ignore[prop-decorator] + @property + def source_label(self) -> str: + """Short human label for the source chip, e.g. ``AI · ollama`` + or ``Built-in engine``.""" + if self.source == "ai": + provider = self.ai_provider + return f"AI · {provider}" if provider else "AI" + return "Built-in engine" + + @computed_field # type: ignore[prop-decorator] + @property + def rationale(self) -> str | None: + """The one-line "why" behind the suggestion. AI proposals + carry the model's own ``rationale``; built-in heuristics that + populate an ``evidence['rationale']`` surface it here too.""" + rationale = self.evidence.get("rationale") + text = str(rationale).strip() if rationale else "" + return text or None + class SuggestionDeployRequest(BaseModel): """Optional patch applied to the definition before it's saved as diff --git a/backend/app/services/ai/suggestions.py b/backend/app/services/ai/suggestions.py index 57b7b5a..dd4b9ae 100644 --- a/backend/app/services/ai/suggestions.py +++ b/backend/app/services/ai/suggestions.py @@ -71,6 +71,8 @@ DEFAULT_DAILY_BUDGET = 50 TOP_FILES_LIMIT = 50 +# Fallback confidence when the model omits / malforms the field. +DEFAULT_AI_CONFIDENCE = 0.5 SYSTEM_PROMPT = """\ You are an assistant that proposes rules for a media library auditing tool called Auditarr. Each rule has a ``match`` block @@ -83,7 +85,19 @@ 1. Output ONLY a JSON array of proposed rules. No prose, no markdown fences. Each entry is an object with: - ``name``: short human-readable name - - ``rationale``: one-sentence explanation + - ``rationale``: ONE sentence that cites the SPECIFIC + evidence from the supplied data — the device, codec, + container, resolution, or observed counts that justify + the rule (e.g. "Apple TV transcoded HEVC 4K 18 times in + the last 30 days"). Generic rationales like "improves + playback" are NOT acceptable. + - ``confidence``: a number from 0 to 1 for how strongly the + supplied data supports this rule. More observations / + clearer signal = higher; a hunch with little data = low. + - ``files_affected``: your integer estimate of how many + library files this rule would match, inferred from + ``library_summary`` and ``top_files``. Use 0 only if you + genuinely cannot estimate. - ``definition``: the RuleDefinition object the engine will execute 2. Each ``definition`` MUST match this shape: @@ -102,6 +116,11 @@ 7. Prefer ``set_severity`` + ``add_tag`` over destructive actions. The user reviews every suggestion before it deploys. + 8. The data includes ``heuristic_suggestions`` already produced + by Auditarr's deterministic engine. Do NOT restate them — + propose rules that COMPLEMENT them, covering codecs, devices, + or patterns the heuristics did not. Quality over quantity: a + few specific, well-justified rules beat many speculative ones. If you cannot propose any rules, return an empty JSON array: []. """ @@ -297,6 +316,14 @@ async def generate( continue name = str(proposal.get("name") or "AI suggestion") + # v1.12 — use the model's self-reported confidence + impact + # estimate (clamped) instead of hardcoding 0.5 / 0. This + # lets AI proposals rank against heuristic ones in the same + # review queue rather than all clustering at 50%. + confidence = _coerce_confidence(proposal.get("confidence")) + files_affected = _coerce_files_affected( + proposal.get("files_affected") + ) evidence = { "rationale": str(proposal.get("rationale") or ""), "provider_kind": provider_kind, @@ -342,9 +369,9 @@ async def generate( definition=definition, heuristic=f"ai_{provider_kind}", evidence=evidence, - files_affected=0, + files_affected=files_affected, est_runtime_s=None, - confidence=0.5, + confidence=confidence, dedup_key=dedup_key, status="pending", ) @@ -810,6 +837,32 @@ def _dedup_key_for_ai( return f"ai:{provider_kind}:{safe_name}:{digest}" +def _coerce_confidence(raw: Any) -> float: + """Clamp the model's self-reported confidence into [0, 1]. + + Missing / non-numeric / NaN → :data:`DEFAULT_AI_CONFIDENCE` so a + model that omits the field still ranks sensibly in the review + queue. Pure function, exposed for testing.""" + try: + value = float(raw) + except (TypeError, ValueError): + return DEFAULT_AI_CONFIDENCE + if value != value: # NaN + return DEFAULT_AI_CONFIDENCE + return max(0.0, min(1.0, value)) + + +def _coerce_files_affected(raw: Any) -> int: + """Non-negative integer estimate of matched files. Missing / + invalid → 0 (the UI reads 0 as "no estimate"). Pure function, + exposed for testing.""" + try: + value = int(raw) + except (TypeError, ValueError): + return 0 + return max(0, value) + + def _contains_delete_action(definition: dict[str, Any]) -> bool: """v1.9 audit fix (AI-4) — detect ``delete`` actions in any proposed RuleDefinition. @@ -903,9 +956,12 @@ def _compose_system_prompt(operator_ctx: OperatorContext | None) -> str: __all__ = [ "AISuggestionService", "AISuggestResult", + "DEFAULT_AI_CONFIDENCE", "DEFAULT_DAILY_BUDGET", "SYSTEM_PROMPT", "_anonymize_path", + "_coerce_confidence", + "_coerce_files_affected", "_compose_system_prompt", "_contains_delete_action", "_dedup_key_for_ai", diff --git a/backend/tests/unit/test_rule_suggestion_provenance.py b/backend/tests/unit/test_rule_suggestion_provenance.py new file mode 100644 index 0000000..3d64356 --- /dev/null +++ b/backend/tests/unit/test_rule_suggestion_provenance.py @@ -0,0 +1,88 @@ +"""Provenance fields on RuleSuggestionRead + AI confidence/impact coercion. + +Pins that the API surfaces a first-class ``source`` (and AI provider / +model / rationale) derived from the ``heuristic`` column, so the frontend +labels built-in-engine vs AI suggestions consistently instead of +string-matching the heuristic prefix. +""" + +from __future__ import annotations + +import datetime as _dt + +from app.schemas.rule_suggestion import RuleSuggestionRead +from app.services.ai.suggestions import ( + DEFAULT_AI_CONFIDENCE, + _coerce_confidence, + _coerce_files_affected, +) + + +def _make(heuristic: str, evidence: dict) -> RuleSuggestionRead: + return RuleSuggestionRead( + id="s1", + name="n", + definition={}, + heuristic=heuristic, + evidence=evidence, + files_affected=3, + est_runtime_s=None, + confidence=0.7, + dedup_key="k", + status="pending", + deployed_rule_id=None, + deployed_at=None, + dismissed_at=None, + dismissed_reason=None, + created_at=_dt.datetime.now(_dt.UTC), + ) + + +def test_ai_suggestion_exposes_ai_provenance() -> None: + s = _make( + "ai_ollama", + { + "rationale": "Apple TV transcoded HEVC 18x in 30 days", + "model": "qwen2.5-coder:14b", + "provider_kind": "ollama", + }, + ) + assert s.source == "ai" + assert s.source_label == "AI · ollama" + assert s.ai_provider == "ollama" + assert s.ai_model == "qwen2.5-coder:14b" + assert s.rationale == "Apple TV transcoded HEVC 18x in 30 days" + + +def test_ai_provider_falls_back_to_heuristic_suffix() -> None: + s = _make("ai_openai", {}) # no provider_kind recorded + assert s.source == "ai" + assert s.ai_provider == "openai" + assert s.ai_model is None + assert s.rationale is None + + +def test_builtin_suggestion_has_builtin_source_and_no_ai_fields() -> None: + s = _make("proactive_bitrate_ceiling", {"client": "Apple TV"}) + assert s.source == "builtin" + assert s.source_label == "Built-in engine" + assert s.ai_provider is None + assert s.ai_model is None + assert s.rationale is None + + +def test_coerce_confidence_clamps_and_defaults() -> None: + assert _coerce_confidence(0.9) == 0.9 + assert _coerce_confidence(2.0) == 1.0 + assert _coerce_confidence(-1) == 0.0 + assert _coerce_confidence("0.4") == 0.4 + assert _coerce_confidence(None) == DEFAULT_AI_CONFIDENCE + assert _coerce_confidence("nonsense") == DEFAULT_AI_CONFIDENCE + + +def test_coerce_files_affected_floors_at_zero() -> None: + assert _coerce_files_affected(12) == 12 + assert _coerce_files_affected("5") == 5 + assert _coerce_files_affected(-3) == 0 + assert _coerce_files_affected(None) == 0 + assert _coerce_files_affected("x") == 0 From 409fb331d63b733e203eebb64af063cd5b2e4d0f Mon Sep 17 00:00:00 2001 From: victorsjot Date: Fri, 29 May 2026 08:05:10 +0000 Subject: [PATCH 2/2] feat(ui): label rule-suggestion source (AI vs built-in) everywhere MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operators couldn't tell which suggestions came from the AI integration vs the built-in engine: only AI got a badge, built-in ones showed a raw heuristic string (the current ``proactive_*`` heuristics weren't even in the label map, so they rendered as e.g. "proactive_bitrate_ceiling"), and the AI's actual rationale was replaced with a generic placeholder. - New shared ``suggestionDisplay`` module: a ``SuggestionSourceBadge`` (accented "AI · " with the model in the tooltip, or a neutral "Built-in"), a corrected heuristic-label map covering the ``proactive_*`` engine heuristics, and a ``describeSuggestion`` that prefers the backend's real ``rationale`` (the AI model's own words) before falling back to a per-heuristic description. - Use it across all four suggestion surfaces — the Rules Suggestions grid, the dashboard Suggestions card, the top-of-Rules banner, and the review modal (which now shows a "Why the AI suggested this" callout). - Provenance reads the backend's first-class ``source`` field, falling back to the heuristic prefix so older fixtures/payloads still render. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../dashboard/SuggestionReviewModal.tsx | 41 ++++-- .../features/dashboard/SuggestionsCard.tsx | 36 ++--- .../features/rules/RuleSuggestionsBanner.tsx | 13 +- .../src/features/rules/SuggestionsGrid.tsx | 63 ++------ .../src/features/rules/suggestionDisplay.tsx | 139 ++++++++++++++++++ frontend/src/hooks/useRules.ts | 8 + 6 files changed, 213 insertions(+), 87 deletions(-) create mode 100644 frontend/src/features/rules/suggestionDisplay.tsx diff --git a/frontend/src/features/dashboard/SuggestionReviewModal.tsx b/frontend/src/features/dashboard/SuggestionReviewModal.tsx index bad521b..60061be 100644 --- a/frontend/src/features/dashboard/SuggestionReviewModal.tsx +++ b/frontend/src/features/dashboard/SuggestionReviewModal.tsx @@ -34,17 +34,15 @@ import { cn } from "@/lib/cn"; import { fmtNum } from "@/lib/format"; import { VisualRuleBuilder } from "../rules/VisualRuleBuilder"; +import { + describeSuggestion, + heuristicLabel, + SuggestionSourceBadge, + suggestionSource, +} from "../rules/suggestionDisplay"; type Tab = "visual" | "evidence" | "json"; -const HEURISTIC_LABEL: Record = { - high_transcode_codec: "Transcode codec", - bitrate_ceiling: "Bitrate ceiling", - container_compat: "Container compatibility", - resolution_mismatch: "Resolution mismatch", - failed_playback: "Failed playback", -}; - export function SuggestionReviewModal({ suggestion, onClose, @@ -147,9 +145,15 @@ export function SuggestionReviewModal({ {/* Header */}
- - {HEURISTIC_LABEL[suggestion.heuristic] ?? suggestion.heuristic} - + + {suggestionSource(suggestion) === "builtin" ? ( + + {heuristicLabel(suggestion)} + + ) : null}

Review suggestion

= { - high_transcode_codec: "Transcode codec", - bitrate_ceiling: "Bitrate ceiling", - container_compat: "Container compat", - resolution_mismatch: "Resolution", - failed_playback: "Failed playback", -}; +import { + describeSuggestion, + heuristicLabel, + SuggestionSourceBadge, + suggestionSource, +} from "./suggestionDisplay"; export interface SuggestionsGridProps { onReview: (s: RuleSuggestion) => void; @@ -227,9 +226,7 @@ function SuggestionCard({ ? "var(--sev-info)" : "var(--muted-2)"; - const isAI = suggestion.heuristic.startsWith("ai_"); - const heuristicLabel = - HEURISTIC_LABEL[suggestion.heuristic] ?? suggestion.heuristic; + const isBuiltin = suggestionSource(suggestion) === "builtin"; return ( @@ -249,11 +246,9 @@ function SuggestionCard({ flexWrap: "wrap", }} > - {heuristicLabel} - {isAI ? ( - - AI - + + {isBuiltin ? ( + {heuristicLabel(suggestion)} ) : null} ``). + * + * The backend now exposes a first-class ``source`` / ``source_label`` / + * ``ai_model`` / ``rationale`` on each suggestion; these helpers consume + * those fields (falling back to the legacy heuristic-prefix derivation + * for any caller/test fixture that predates them) so the Rules grid, + * the dashboard card, and the review modal all label provenance the + * same way. + */ +import { Pill } from "@/components/ui/Pill"; +import { fmtNum } from "@/lib/format"; +import type { RuleSuggestion } from "@/hooks/useRules"; + +/** Friendly label for a heuristic key. Covers the current + * ``proactive_*`` engine heuristics plus the older Stage-16 names + * kept for back-compat with historical suggestions. */ +export const HEURISTIC_LABEL: Record = { + // Current built-in engine. + proactive_bitrate_ceiling: "Bitrate ceiling", + proactive_codec_compat: "Codec compatibility", + proactive_container_compat: "Container compatibility", + proactive_resolution_ceiling: "Resolution ceiling", + proactive_subtitle_burn: "Subtitle burn-in", + proactive_look_ahead_tv: "Look-ahead (TV)", + // Legacy heuristic names (pre-proactive engine). + high_transcode_codec: "Transcode codec", + bitrate_ceiling: "Bitrate ceiling", + container_compat: "Container compat", + resolution_mismatch: "Resolution", + failed_playback: "Failed playback", +}; + +export function suggestionSource(s: RuleSuggestion): "ai" | "builtin" { + if (s.source === "ai" || s.source === "builtin") return s.source; + return s.heuristic.startsWith("ai_") ? "ai" : "builtin"; +} + +/** Friendly category label for the heuristic (builtin only — AI + * suggestions have no meaningful heuristic category). */ +export function heuristicLabel(s: RuleSuggestion): string { + return HEURISTIC_LABEL[s.heuristic] ?? s.heuristic; +} + +/** + * Provenance chip. AI suggestions render an accented "AI" pill (with + * the model in the tooltip); built-in ones render a neutral "Built-in" + * pill. Shown on every suggestion so the origin is never ambiguous. + */ +export function SuggestionSourceBadge({ + suggestion, + className, +}: { + suggestion: RuleSuggestion; + className?: string; +}) { + if (suggestionSource(suggestion) === "ai") { + const provider = suggestion.ai_provider; + const model = suggestion.ai_model; + const title = model + ? `Proposed by AI (${model})` + : "Proposed by your AI provider"; + return ( + + AI{provider ? ` · ${provider}` : ""} + + ); + } + return ( + + Built-in + + ); +} + +/** + * Human-readable "why" for a suggestion. Prefers the backend-supplied + * ``rationale`` (the AI model's own one-liner, or a heuristic that + * populated one), then falls back to a per-heuristic description. + */ +export function describeSuggestion(s: RuleSuggestion): string | null { + if (s.rationale && s.rationale.trim()) return s.rationale.trim(); + const ev = s.evidence ?? {}; + switch (s.heuristic) { + case "proactive_bitrate_ceiling": + return "Files above your bandwidth ceiling repeatedly force transcodes — capping their bitrate eases network and server load."; + case "proactive_codec_compat": + case "high_transcode_codec": { + const codec = (ev.source_codec as string) ?? null; + const count = (ev.transcode_count as number) ?? s.files_affected; + return codec + ? `Detected ${fmtNum(count)} transcodes from ${codec}. Re-encoding the source lets these clients direct-play.` + : "A codec your clients can't direct-play keeps triggering transcodes — re-encoding lets them play natively."; + } + case "proactive_container_compat": + case "container_compat": { + const fmt = (ev.container as string) ?? null; + return fmt + ? `Clients can't direct-play ${fmt}. Remuxing into a friendlier container avoids needless transcoding.` + : "Clients can't direct-play this container — remuxing avoids needless transcoding."; + } + case "proactive_resolution_ceiling": + case "resolution_mismatch": + return "Source resolution exceeds what these clients decode locally — downscaling cuts transcodes."; + case "proactive_subtitle_burn": + return "Burned-in subtitles are forcing video transcodes — switching to soft subtitles lets clients direct-play."; + case "proactive_look_ahead_tv": + return "Upcoming episodes share the same problematic profile as one that just transcoded — optimizing them ahead of time prevents repeats."; + case "bitrate_ceiling": { + const cap = (ev.bitrate_threshold_kbps as number) ?? null; + return cap + ? `Files above ${(cap / 1000).toFixed(1)} Mbps repeatedly trip the network ceiling — capping their bitrate reduces transcode load.` + : "Files above a high bitrate ceiling repeatedly trip the network — capping helps."; + } + case "failed_playback": { + const count = (ev.failure_count as number) ?? s.files_affected; + return `${fmtNum(count)} playback failures in window — re-encoding usually fixes the underlying decode issue.`; + } + default: + if (suggestionSource(s) === "ai") { + return "An AI provider proposed this rule from your playback data. Review the definition before deploying."; + } + return null; + } +} diff --git a/frontend/src/hooks/useRules.ts b/frontend/src/hooks/useRules.ts index 52221bf..95884ca 100644 --- a/frontend/src/hooks/useRules.ts +++ b/frontend/src/hooks/useRules.ts @@ -449,6 +449,14 @@ export interface RuleSuggestion { dismissed_at: string | null; dismissed_reason: string | null; created_at: string; + // Provenance, computed by the backend from ``heuristic``. Optional + // so older fixtures (and any cached payload) still satisfy the type; + // the display helpers fall back to the heuristic prefix when absent. + source?: "ai" | "builtin"; + source_label?: string; + ai_provider?: string | null; + ai_model?: string | null; + rationale?: string | null; } export interface SuggestionDeployPayload {