WalksWithASwagger · WalksWithASwagger · May 22, 2026 · May 22, 2026
diff --git a/readme.md b/readme.md
@@ -406,6 +406,14 @@ empty, duplicate, stale, oversized, or private-looking docs before they confuse
 the pipeline. Use **Benchmark** to compare whole-KB injection vs RAG and inspect
 the concrete retrieved chunks for a stage/query.
 
+The KB dialog also writes `prompts/<user>/knowledge_base/governance.yaml`.
+Use `canonical_files` for curated voice anchors that should be prioritized and
+labeled in prompts. Use `ignored_files` for stale, duplicate, private-looking,
+or oversized files that should remain auditable but stay out of generation.
+Unignored stale/private/duplicate/oversized findings surface as a concise run
+warning before generation starts, so future agents should treat them as review
+work instead of silently trusting the context.
+
 **Persona** files at `prompts/<user>/personas/*.md` appear beside the built-in
 persona variants in Generation Settings.
 

diff --git a/tests/test_kb_audit.py b/tests/test_kb_audit.py
@@ -52,6 +52,43 @@ def test_duplicate_empty_private_and_stale_warnings(tmp_prompts_dir):
     assert "empty_file" in codes
     assert "private_marker" in codes
     assert "stale_file" in codes
+    assert all(warning.action for warning in audit.warnings)
+
+
+def test_governance_marks_canonical_and_ignored_files(tmp_prompts_dir):
+    kb = tmp_prompts_dir / "alice" / "knowledge_base"
+    kb.mkdir(parents=True)
+    (kb / "voice.md").write_text("voice")
+    (kb / "old.md").write_text("old")
+    (kb / "governance.yaml").write_text(
+        "canonical_files:\n"
+        "  - voice.md\n"
+        "ignored_files:\n"
+        "  - old.md\n"
+    )
+
+    audit = kb_audit.audit_profile("alice")
+    docs = {doc.name: doc for doc in audit.documents}
+
+    assert docs["voice"].canonical is True
+    assert docs["old"].ignored is True
+    assert any(warning.code == "ignored_file" for warning in audit.warnings)
+
+
+def test_generation_warnings_exclude_ignored_files(tmp_prompts_dir):
+    kb = tmp_prompts_dir / "alice" / "knowledge_base"
+    kb.mkdir(parents=True)
+    (kb / "private-token.md").write_text("keep local")
+    (kb / "confidential.md").write_text("ignored")
+    (kb / "governance.yaml").write_text(
+        "ignored_files:\n"
+        "  - confidential.md\n"
+    )
+
+    warnings = kb_audit.generation_warnings("alice")
+    names = {warning.path.rsplit("/", 1)[-1] for warning in warnings if warning.path}
+
+    assert names == {"private-token.md"}
 
 
 def test_to_dict_includes_summary(tmp_prompts_dir):

diff --git a/tests/test_prompts.py b/tests/test_prompts.py
@@ -49,6 +49,34 @@ def test_loads_md_and_txt(self, tmp_prompts_dir):
         loaded = prompts.load_knowledge_base("alice")
         assert loaded == {"Voice Guide": "friendly tone", "Style": "short sentences"}
 
+    def test_governance_ignores_files_and_labels_canonical_anchors(self, tmp_prompts_dir):
+        kb = tmp_prompts_dir / "alice" / "knowledge_base"
+        kb.mkdir(parents=True)
+        (kb / "voice.md").write_text("canonical tone")
+        (kb / "notes.md").write_text("usable notes")
+        (kb / "old.md").write_text("ignored notes")
+        (kb / "governance.yaml").write_text(
+            "canonical_files:\n"
+            "  - voice.md\n"
+            "ignored_files:\n"
+            "  - old.md\n"
+        )
+
+        loaded = prompts.load_knowledge_base("alice")
+
+        assert list(loaded) == ["Canonical Voice Anchor: Voice", "Notes"]
+        assert "Old" not in loaded
+
+    def test_generation_warning_summarizes_unresolved_governance(self, tmp_prompts_dir):
+        kb = tmp_prompts_dir / "alice" / "knowledge_base"
+        kb.mkdir(parents=True)
+        (kb / "private-token.md").write_text("local secret")
+
+        warning = prompts.knowledge_base_generation_warning("alice")
+
+        assert "unresolved KB governance" in warning
+        assert "private-token.md" in warning
+
 
 class TestPromptPrecedence:
     def test_custom_prompt_overrides_md(self, tmp_prompts_dir):

diff --git a/ui/dialogs.py b/ui/dialogs.py
@@ -274,6 +274,37 @@ def knowledge_base_manager() -> None:
         f"{summary['approx_tokens']:,} est. tokens · "
         f"{summary['warnings']} signal(s)"
     )
+    if audit.documents:
+        file_names = [Path(doc.path).name for doc in audit.documents]
+        with st.expander("Review governance", expanded=False):
+            canonical = st.multiselect(
+                "Canonical voice anchors",
+                options=file_names,
+                default=[
+                    name for name in audit.governance.canonical_files
+                    if name.lower() in {item.lower() for item in file_names}
+                ],
+                help="Canonical anchors are prioritized and labeled in generation prompts.",
+                key="kb_governance_canonical",
+            )
+            ignored = st.multiselect(
+                "Ignored files",
+                options=file_names,
+                default=[
+                    name for name in audit.governance.ignored_files
+                    if name.lower() in {item.lower() for item in file_names}
+                ],
+                help="Ignored files stay in the audit but are excluded from generation.",
+                key="kb_governance_ignored",
+            )
+            if st.button("Save governance", type="primary", use_container_width=True):
+                kb_audit_mod.save_governance(
+                    user,
+                    canonical_files=canonical,
+                    ignored_files=ignored,
+                )
+                st.toast("KB governance saved.", icon=":material/check_circle:")
+                st.rerun()
     audit_json = json.dumps(audit.to_dict(), indent=2)
     st.download_button(
         "Download audit JSON",
@@ -292,13 +323,21 @@ def knowledge_base_manager() -> None:
             else:
                 st.info(message)
     if audit.documents:
+        actions_by_path = {
+            warning.path: warning.action
+            for warning in audit.warnings
+            if warning.path
+        }
         st.dataframe(
             [
                 {
                     "File": Path(doc.path).name,
                     "Role": doc.role,
+                    "Canonical": doc.canonical,
+                    "Ignored": doc.ignored,
                     "Tokens": doc.approx_tokens,
                     "Modified": doc.modified_at[:10],
+                    "Action": actions_by_path.get(doc.path, "Ready for generation."),
                 }
                 for doc in audit.documents
             ],

diff --git a/ui/pipeline.py b/ui/pipeline.py
@@ -73,6 +73,10 @@ def _execute_run() -> None:
     s = st.session_state
     adapters = adapters_mod.get_adapters()
     kb = prompts_mod.load_knowledge_base(s.selected_user) if s.selected_user else {}
+    s.kb_governance_warning = (
+        prompts_mod.knowledge_base_generation_warning(s.selected_user)
+        if s.selected_user and kb else None
+    )
 
     # Capture a start timestamp so the Run metrics block can report
     # wall-clock duration. End timestamp is written in `finally`.
@@ -144,6 +148,8 @@ def _execute_run() -> None:
                 return
 
             # ---- Full pipeline ---------------------------------------
+            if s.get("kb_governance_warning"):
+                status.write(f"KB governance warning: {s.kb_governance_warning}")
             _inspect_retrieval(s, status)
             # Progress callback writes to the sac.steps index and streams
             # a status line per stage.
@@ -290,6 +296,7 @@ def _run_metadata(pending, mode: str, s) -> dict:
             s.get("recipe_effective_settings"),
         ),
         "selected_user": s.selected_user,
+        "kb_governance_warning": s.get("kb_governance_warning"),
         "provider": s.ai_provider,
         "model": s.ai_model,
         "settings": {

diff --git a/ui/session.py b/ui/session.py
@@ -112,6 +112,7 @@
     "recipe_effective_settings": None,
     "scorecard_summary": None,
     "handoff_draft_preview": None,
+    "kb_governance_warning": None,
     # Wall-clock timestamps for per-run duration. pipeline.py sets
     # pipeline_started_at before stage 0 and _build_bundle reads both to
     # compute duration_seconds for the Run metrics block.