From 0049649fe7847dad23974c5ea08640c33ab533f5 Mon Sep 17 00:00:00 2001 From: Linh Ngo Date: Sat, 30 May 2026 15:13:26 +0700 Subject: [PATCH 1/6] =?UTF-8?q?feat(knowledge):=20sk=20knowledge=20bulk-ta?= =?UTF-8?q?g=20=E2=80=94=20batch=20retag=20entries=20by=20query/filter=20(?= =?UTF-8?q?#722)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - knowledge-health.py: cmd_bulk_tag with --query/--wing/--room/--tag selectors, --add-tag/--set-wing/--set-room mutations, dry-run by default, --apply to commit - sk.py: wire sk knowledge bulk-tag routing - test_fixes.py: +28 tests (I722) Closes #722 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- knowledge-health.py | 140 +++++++++++++++++++++ sk.py | 3 + test_fixes.py | 292 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 435 insertions(+) diff --git a/knowledge-health.py b/knowledge-health.py index ef130055..17e4d28d 100644 --- a/knowledge-health.py +++ b/knowledge-health.py @@ -2592,6 +2592,10 @@ def main(): print(format_insights_report(insights)) return + if args and args[0] == "bulk-tag": + cmd_bulk_tag(args[1:]) + return + if args and args[0] == "pin": if len(args) < 2: print("Usage: knowledge-health.py pin ", file=sys.stderr) @@ -2701,5 +2705,141 @@ def cmd_pins() -> None: print(f"⚠ pins failed: {exc}", file=sys.stderr) +def cmd_bulk_tag(args: list) -> None: + """Bulk-tag knowledge entries by selector + mutation. + + Selectors (at least one required): + --query TEXT Substring match on title+content + --wing W Filter by wing + --room R Filter by room + --tag T Filter by existing tag + + Mutations (at least one required): + --add-tag TAG Add tag to matched entries + --set-wing W Set wing on matched entries + --set-room R Set room on matched entries + + Flags: + --apply Commit changes (dry-run by default) + """ + import datetime as _dt_bt + + query_text = None + wing = None + room = None + tag = None + add_tag = None + set_wing = None + set_room = None + apply_flag = "--apply" in args + + if "--query" in args: + idx = args.index("--query") + query_text = args[idx + 1] if idx + 1 < len(args) else None + if "--wing" in args: + idx = args.index("--wing") + wing = args[idx + 1] if idx + 1 < len(args) else None + if "--room" in args: + idx = args.index("--room") + room = args[idx + 1] if idx + 1 < len(args) else None + if "--tag" in args: + idx = args.index("--tag") + tag = args[idx + 1] if idx + 1 < len(args) else None + if "--add-tag" in args: + idx = args.index("--add-tag") + add_tag = args[idx + 1] if idx + 1 < len(args) else None + if "--set-wing" in args: + idx = args.index("--set-wing") + set_wing = args[idx + 1] if idx + 1 < len(args) else None + if "--set-room" in args: + idx = args.index("--set-room") + set_room = args[idx + 1] if idx + 1 < len(args) else None + + has_selector = any(v is not None for v in [query_text, wing, room, tag]) + has_mutation = any(v is not None for v in [add_tag, set_wing, set_room]) + + if not has_selector: + print( + "⚠ bulk-tag requires at least one selector (--query, --wing, --room, --tag).", + file=sys.stderr, + ) + sys.exit(1) + if not has_mutation: + print( + "⚠ bulk-tag requires at least one mutation (--add-tag, --set-wing, --set-room).", + file=sys.stderr, + ) + sys.exit(1) + + try: + db = get_db() + base = "SELECT DISTINCT ke.id FROM knowledge_entries ke" + where: list[str] = ["ke.deleted_at IS NULL"] + params: list = [] + + if tag: + base += " JOIN entry_concept_tags ect ON ke.id = ect.entry_id" + where.append("ect.tag = ?") + params.append(tag) + if wing: + where.append("ke.wing = ?") + params.append(wing) + if room: + where.append("ke.room = ?") + params.append(room) + if query_text: + where.append("(ke.title LIKE ? OR ke.content LIKE ?)") + like_val = "%" + query_text + "%" + params.extend([like_val, like_val]) + + sel_query = base + " WHERE " + " AND ".join(where) + matched_ids = [r[0] for r in db.execute(sel_query, params).fetchall()] + count = len(matched_ids) + + print(f"{count} entr(ies) would be affected.") + if not apply_flag: + print("(Dry-run — pass --apply to commit changes.)") + db.close() + return + + if count == 0: + print("Nothing to update.") + db.close() + return + + now_str = _dt_bt.datetime.utcnow().isoformat() + if set_wing is not None: + for eid in matched_ids: + db.execute( + "UPDATE knowledge_entries SET wing=? WHERE id=?", + (set_wing, eid), + ) + if set_room is not None: + for eid in matched_ids: + db.execute( + "UPDATE knowledge_entries SET room=? WHERE id=?", + (set_room, eid), + ) + if add_tag is not None: + for eid in matched_ids: + existing = db.execute( + "SELECT 1 FROM entry_concept_tags WHERE entry_id=? AND tag=?", + (eid, add_tag), + ).fetchone() + if not existing: + db.execute( + "INSERT INTO entry_concept_tags (entry_id, tag, source, tagged_at)" + " VALUES (?,?,?,?)", + (eid, add_tag, "bulk-tag", now_str), + ) + + db.commit() + db.close() + print(f"✅ Applied to {count} entr(ies).") + except Exception as exc: + print(f"⚠ bulk-tag failed: {exc}", file=sys.stderr) + sys.exit(1) + + if __name__ == "__main__": main() diff --git a/sk.py b/sk.py index 1e488fa6..e7419f66 100644 --- a/sk.py +++ b/sk.py @@ -246,6 +246,7 @@ "pin": "knowledge-health.py", "unpin": "knowledge-health.py", "pins": "knowledge-health.py", + "bulk-tag": "knowledge-health.py", }, } @@ -1009,6 +1010,8 @@ def main(argv: list[str] | None = None) -> int: return _run(_GROUPS[cmd][sub], ["--list"] + sub_rest) if cmd == "knowledge" and sub in ("pin", "unpin", "pins"): return _run(_GROUPS[cmd][sub], [sub] + sub_rest) + if cmd == "knowledge" and sub == "bulk-tag": + return _run(_GROUPS[cmd][sub], ["bulk-tag"] + sub_rest) return _run(_GROUPS[cmd][sub], sub_rest) # Unknown diff --git a/test_fixes.py b/test_fixes.py index 30ce9677..6e6003d4 100755 --- a/test_fixes.py +++ b/test_fixes.py @@ -9781,3 +9781,295 @@ def _make_i720_db(db_path: Path) -> None: for _fn in FAIL_NAMES: print(f" ❌ {_fn}") sys.exit(0 if FAIL == 0 else 1) + +# === I722: Knowledge Bulk-Tag === +# --------------------------------------------------------------------------- +print("\n🔍 I722: sk knowledge bulk-tag command") + +try: + import importlib.util as _ilu722 + import sqlite3 as _sq722 + import io as _io722 + import sys as _sys722 + import os as _os722 + + _kh722_spec = _ilu722.spec_from_file_location("kh722", REPO / "knowledge-health.py") + _kh722 = _ilu722.module_from_spec(_kh722_spec) # type: ignore[arg-type] + _kh722_spec.loader.exec_module(_kh722) # type: ignore[union-attr] + + test("I722-1: cmd_bulk_tag function exists", hasattr(_kh722, "cmd_bulk_tag") and callable(_kh722.cmd_bulk_tag)) + + # Build isolated test DB + _td722 = REPO / f".test_i722_{_os722.getpid()}.db" + _db722 = _sq722.connect(str(_td722)) + _db722.executescript(""" + CREATE TABLE knowledge_entries ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT DEFAULT '', + content TEXT DEFAULT '', + category TEXT DEFAULT 'mistake', + confidence REAL DEFAULT 0.8, + tags TEXT DEFAULT '', + priority TEXT DEFAULT 'P2', + wing TEXT, + room TEXT, + deleted_at TEXT, + first_seen TEXT DEFAULT (datetime('now')) + ); + CREATE TABLE entry_concept_tags ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + entry_id INTEGER NOT NULL, + tag TEXT NOT NULL, + source TEXT DEFAULT '', + tagged_at TEXT DEFAULT (datetime('now')) + ); + INSERT INTO knowledge_entries (id, title, content, wing, room) VALUES + (1, 'Alpha python entry', 'some python content', 'engineering', 'backend'), + (2, 'Beta rust entry', 'some rust content', 'engineering', 'systems'), + (3, 'Gamma docs entry', 'documentation body', 'product', 'docs'); + INSERT INTO entry_concept_tags (entry_id, tag, source) VALUES (1, 'python', 'manual'); + """) + _db722.commit() + _db722.close() + + _orig_db722 = _kh722.DB_PATH + _kh722.DB_PATH = _td722 + _orig_stdout722 = _sys722.stdout + _orig_stderr722 = _sys722.stderr + + # I722-2: error when no selector given + _stderr722 = _io722.StringIO() + _sys722.stderr = _stderr722 + _raised722_nosel = False + try: + _kh722.cmd_bulk_tag(["--add-tag", "newtag"]) + except SystemExit: + _raised722_nosel = True + except Exception: + _raised722_nosel = True + finally: + _sys722.stderr = _orig_stderr722 + _err722_nosel = _stderr722.getvalue() + test("I722-2: error when no selector given", + _raised722_nosel or "selector" in _err722_nosel.lower() or "require" in _err722_nosel.lower(), + f"err={_err722_nosel!r}") + + # I722-3: error when no mutation given + _stderr722b = _io722.StringIO() + _sys722.stderr = _stderr722b + _raised722_nomut = False + try: + _kh722.cmd_bulk_tag(["--wing", "engineering"]) + except SystemExit: + _raised722_nomut = True + except Exception: + _raised722_nomut = True + finally: + _sys722.stderr = _orig_stderr722 + _err722_nomut = _stderr722b.getvalue() + test("I722-3: error when no mutation given", + _raised722_nomut or "mutation" in _err722_nomut.lower() or "require" in _err722_nomut.lower(), + f"err={_err722_nomut!r}") + + # I722-4: dry-run shows count, no changes applied + _buf722 = _io722.StringIO() + _sys722.stdout = _buf722 + try: + _kh722.cmd_bulk_tag(["--wing", "engineering", "--add-tag", "bulk-tested"]) + except SystemExit: + pass + finally: + _sys722.stdout = _orig_stdout722 + _out722_dry = _buf722.getvalue() + test("I722-4a: dry-run prints count message", + "would be affected" in _out722_dry or "entr" in _out722_dry, + f"out={_out722_dry!r}") + test("I722-4b: dry-run prints dry-run notice", + "dry" in _out722_dry.lower() or "--apply" in _out722_dry, + f"out={_out722_dry!r}") + _db722v = _sq722.connect(str(_td722)) + _dry_tags = _db722v.execute("SELECT tag FROM entry_concept_tags WHERE tag='bulk-tested'").fetchall() + _db722v.close() + test("I722-4c: dry-run makes no changes to DB", len(_dry_tags) == 0, f"found tags={_dry_tags}") + + # I722-5: --apply actually updates entries (--set-wing) + _buf722b = _io722.StringIO() + _sys722.stdout = _buf722b + try: + _kh722.cmd_bulk_tag(["--wing", "engineering", "--set-room", "infra", "--apply"]) + except SystemExit: + pass + finally: + _sys722.stdout = _orig_stdout722 + _out722_apply = _buf722b.getvalue() + _db722w = _sq722.connect(str(_td722)) + _infra_rows = _db722w.execute( + "SELECT id FROM knowledge_entries WHERE room='infra'" + ).fetchall() + _db722w.close() + test("I722-5a: --apply updates DB", len(_infra_rows) >= 2, f"rows={_infra_rows}") + test("I722-5b: --apply prints Applied message", + "Applied" in _out722_apply or "applied" in _out722_apply, + f"out={_out722_apply!r}") + + # I722-6: --query filter selects matching entries only + _buf722c = _io722.StringIO() + _sys722.stdout = _buf722c + try: + _kh722.cmd_bulk_tag(["--query", "python", "--add-tag", "query-tested", "--apply"]) + except SystemExit: + pass + finally: + _sys722.stdout = _orig_stdout722 + _db722q = _sq722.connect(str(_td722)) + _qtag_rows = _db722q.execute( + "SELECT entry_id FROM entry_concept_tags WHERE tag='query-tested'" + ).fetchall() + _db722q.close() + test("I722-6a: --query selects matching entries", len(_qtag_rows) >= 1, f"rows={_qtag_rows}") + test("I722-6b: --query does not tag non-matching entries", + all(r[0] == 1 for r in _qtag_rows), + f"unexpected entries tagged: {_qtag_rows}") + + # I722-7: --add-tag inserts into entry_concept_tags + _buf722d = _io722.StringIO() + _sys722.stdout = _buf722d + try: + _kh722.cmd_bulk_tag(["--wing", "product", "--add-tag", "tagged-product", "--apply"]) + except SystemExit: + pass + finally: + _sys722.stdout = _orig_stdout722 + _db722t = _sq722.connect(str(_td722)) + _tag_rows = _db722t.execute( + "SELECT entry_id, tag, source FROM entry_concept_tags WHERE tag='tagged-product'" + ).fetchall() + _db722t.close() + test("I722-7a: --add-tag inserts into entry_concept_tags", len(_tag_rows) >= 1, + f"rows={_tag_rows}") + test("I722-7b: --add-tag sets source='bulk-tag'", + all(r[2] == "bulk-tag" for r in _tag_rows), + f"rows={_tag_rows}") + + # I722-8: --set-wing updates wing column + _buf722e = _io722.StringIO() + _sys722.stdout = _buf722e + try: + _kh722.cmd_bulk_tag(["--room", "docs", "--set-wing", "newwing", "--apply"]) + except SystemExit: + pass + finally: + _sys722.stdout = _orig_stdout722 + _db722sw = _sq722.connect(str(_td722)) + _wing_rows = _db722sw.execute( + "SELECT id, wing FROM knowledge_entries WHERE wing='newwing'" + ).fetchall() + _db722sw.close() + test("I722-8: --set-wing updates wing column", len(_wing_rows) >= 1, f"rows={_wing_rows}") + + # I722-9: --tag selector filters by existing tag + _buf722f = _io722.StringIO() + _sys722.stdout = _buf722f + try: + _kh722.cmd_bulk_tag(["--tag", "python", "--add-tag", "tag-filter-tested", "--apply"]) + except SystemExit: + pass + finally: + _sys722.stdout = _orig_stdout722 + _db722tf = _sq722.connect(str(_td722)) + _tf_rows = _db722tf.execute( + "SELECT entry_id FROM entry_concept_tags WHERE tag='tag-filter-tested'" + ).fetchall() + _db722tf.close() + test("I722-9a: --tag selector matches entries with that tag", len(_tf_rows) == 1, + f"rows={_tf_rows}") + test("I722-9b: --tag selector only affects entry with that tag", + _tf_rows[0][0] == 1 if _tf_rows else False, + f"rows={_tf_rows}") + + # I722-10: --add-tag is idempotent (no duplicate tags) + _buf722g = _io722.StringIO() + _sys722.stdout = _buf722g + try: + _kh722.cmd_bulk_tag(["--tag", "python", "--add-tag", "tag-filter-tested", "--apply"]) + except SystemExit: + pass + finally: + _sys722.stdout = _orig_stdout722 + _db722idem = _sq722.connect(str(_td722)) + _idem_rows = _db722idem.execute( + "SELECT COUNT(*) FROM entry_concept_tags WHERE entry_id=1 AND tag='tag-filter-tested'" + ).fetchone() + _db722idem.close() + test("I722-10: --add-tag is idempotent (no duplicate tags)", _idem_rows[0] == 1, + f"count={_idem_rows[0]}") + + # I722-11: source checks + _kh_src722 = (REPO / "knowledge-health.py").read_text(encoding="utf-8") + _bt_body = _kh_src722.split("def cmd_bulk_tag")[1].split("\ndef ")[0] if "def cmd_bulk_tag" in _kh_src722 else "" + test("I722-11a: cmd_bulk_tag uses ? placeholders (no f-string SQL)", + "f\"SELECT" not in _bt_body and "f'SELECT" not in _bt_body + and "f\"UPDATE" not in _bt_body and "f'UPDATE" not in _bt_body + and "f\"INSERT" not in _bt_body and "f'INSERT" not in _bt_body, + "f-string SQL found in cmd_bulk_tag") + test("I722-11b: cmd_bulk_tag handles --query selector", + '"--query"' in _bt_body or "'--query'" in _bt_body, + "--query not handled in cmd_bulk_tag") + test("I722-11c: cmd_bulk_tag handles --add-tag mutation", + '"--add-tag"' in _bt_body or "'--add-tag'" in _bt_body, + "--add-tag not handled in cmd_bulk_tag") + test("I722-11d: cmd_bulk_tag handles --set-wing mutation", + '"--set-wing"' in _bt_body or "'--set-wing'" in _bt_body, + "--set-wing not handled in cmd_bulk_tag") + test("I722-11e: cmd_bulk_tag uses entry_concept_tags for --add-tag", + "entry_concept_tags" in _bt_body, + "entry_concept_tags not referenced in cmd_bulk_tag") + + # I722-12: sk.py routing checks + _sk_src722 = (REPO / "sk.py").read_text(encoding="utf-8") + test("I722-12a: sk.py has 'bulk-tag' in knowledge group", + '"bulk-tag"' in _sk_src722 or "'bulk-tag'" in _sk_src722, + "bulk-tag route not found in sk.py") + test("I722-12b: sk.py dispatches bulk-tag subcommand", + "bulk-tag" in _sk_src722, + "bulk-tag dispatch not found in sk.py") + + # I722-13: main() routes bulk-tag subcommand + _main_body = _kh_src722.split("def main")[1] if "def main" in _kh_src722 else "" + test("I722-13: main() handles bulk-tag subcommand", + '"bulk-tag"' in _main_body or "'bulk-tag'" in _main_body, + "bulk-tag routing not found in main()") + + # I722-14: multiple mutations work together + _buf722h = _io722.StringIO() + _sys722.stdout = _buf722h + try: + _kh722.cmd_bulk_tag([ + "--wing", "engineering", "--set-wing", "eng2", "--add-tag", "multi-mutate", "--apply" + ]) + except SystemExit: + pass + finally: + _sys722.stdout = _orig_stdout722 + _db722mm = _sq722.connect(str(_td722)) + _mm_wing = _db722mm.execute( + "SELECT COUNT(*) FROM knowledge_entries WHERE wing='eng2'" + ).fetchone() + _mm_tag = _db722mm.execute( + "SELECT COUNT(*) FROM entry_concept_tags WHERE tag='multi-mutate'" + ).fetchone() + _db722mm.close() + test("I722-14a: multiple mutations: set-wing applied", _mm_wing[0] >= 1, + f"wing count={_mm_wing[0]}") + test("I722-14b: multiple mutations: add-tag applied", _mm_tag[0] >= 1, + f"tag count={_mm_tag[0]}") + + # cleanup + _kh722.DB_PATH = _orig_db722 + try: + _td722.unlink() + except Exception: + pass + +except Exception as _e722: + test("I722: bulk-tag test setup", False, str(_e722)) From 4ee801ce356abc59a01a741ad594ff95a5c73dc2 Mon Sep 17 00:00:00 2001 From: Linh Ngo Date: Sat, 30 May 2026 15:57:26 +0700 Subject: [PATCH 2/6] fix(tests): move sys.exit() after I722 tests so they actually run --- test_fixes.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/test_fixes.py b/test_fixes.py index 6e6003d4..5fd90677 100755 --- a/test_fixes.py +++ b/test_fixes.py @@ -9773,17 +9773,7 @@ def _make_i720_db(db_path: Path) -> None: except Exception as _e724_16: test("I724-16: sk session stats routing", False, str(_e724_16)) -# --------------------------------------------------------------------------- -if FAIL == 0: - print("🎉 All tests passed!") -else: - print(f"⚠️ {FAIL} test(s) need attention") - for _fn in FAIL_NAMES: - print(f" ❌ {_fn}") -sys.exit(0 if FAIL == 0 else 1) - # === I722: Knowledge Bulk-Tag === -# --------------------------------------------------------------------------- print("\n🔍 I722: sk knowledge bulk-tag command") try: @@ -10073,3 +10063,10 @@ def _make_i720_db(db_path: Path) -> None: except Exception as _e722: test("I722: bulk-tag test setup", False, str(_e722)) + +# --------------------------------------------------------------------------- +if FAIL == 0: + print("🎉 All tests passed!") +else: + print(f"⚠️ {FAIL} test(s) need attention") +sys.exit(0 if FAIL == 0 else 1) From da6e603987147795a4b76e7475d4fc62cfac977f Mon Sep 17 00:00:00 2001 From: Linh Ngo Date: Sat, 30 May 2026 15:57:33 +0700 Subject: [PATCH 3/6] fix(lint): ruff auto-fix test_fixes.py --- test_fixes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_fixes.py b/test_fixes.py index 5fd90677..d8b90c55 100755 --- a/test_fixes.py +++ b/test_fixes.py @@ -9778,10 +9778,10 @@ def _make_i720_db(db_path: Path) -> None: try: import importlib.util as _ilu722 - import sqlite3 as _sq722 import io as _io722 - import sys as _sys722 import os as _os722 + import sqlite3 as _sq722 + import sys as _sys722 _kh722_spec = _ilu722.spec_from_file_location("kh722", REPO / "knowledge-health.py") _kh722 = _ilu722.module_from_spec(_kh722_spec) # type: ignore[arg-type] From c4f744b4695635b82c8824830286e4b9ce780daa Mon Sep 17 00:00:00 2001 From: Linh Ngo Date: Sat, 30 May 2026 16:04:41 +0700 Subject: [PATCH 4/6] fix(format): ruff format test_fixes.py --- test_fixes.py | 176 +++++++++++++++++++++++++++----------------------- 1 file changed, 95 insertions(+), 81 deletions(-) diff --git a/test_fixes.py b/test_fixes.py index d8b90c55..ed14c1ed 100755 --- a/test_fixes.py +++ b/test_fixes.py @@ -9840,9 +9840,11 @@ def _make_i720_db(db_path: Path) -> None: finally: _sys722.stderr = _orig_stderr722 _err722_nosel = _stderr722.getvalue() - test("I722-2: error when no selector given", - _raised722_nosel or "selector" in _err722_nosel.lower() or "require" in _err722_nosel.lower(), - f"err={_err722_nosel!r}") + test( + "I722-2: error when no selector given", + _raised722_nosel or "selector" in _err722_nosel.lower() or "require" in _err722_nosel.lower(), + f"err={_err722_nosel!r}", + ) # I722-3: error when no mutation given _stderr722b = _io722.StringIO() @@ -9857,9 +9859,11 @@ def _make_i720_db(db_path: Path) -> None: finally: _sys722.stderr = _orig_stderr722 _err722_nomut = _stderr722b.getvalue() - test("I722-3: error when no mutation given", - _raised722_nomut or "mutation" in _err722_nomut.lower() or "require" in _err722_nomut.lower(), - f"err={_err722_nomut!r}") + test( + "I722-3: error when no mutation given", + _raised722_nomut or "mutation" in _err722_nomut.lower() or "require" in _err722_nomut.lower(), + f"err={_err722_nomut!r}", + ) # I722-4: dry-run shows count, no changes applied _buf722 = _io722.StringIO() @@ -9871,12 +9875,16 @@ def _make_i720_db(db_path: Path) -> None: finally: _sys722.stdout = _orig_stdout722 _out722_dry = _buf722.getvalue() - test("I722-4a: dry-run prints count message", - "would be affected" in _out722_dry or "entr" in _out722_dry, - f"out={_out722_dry!r}") - test("I722-4b: dry-run prints dry-run notice", - "dry" in _out722_dry.lower() or "--apply" in _out722_dry, - f"out={_out722_dry!r}") + test( + "I722-4a: dry-run prints count message", + "would be affected" in _out722_dry or "entr" in _out722_dry, + f"out={_out722_dry!r}", + ) + test( + "I722-4b: dry-run prints dry-run notice", + "dry" in _out722_dry.lower() or "--apply" in _out722_dry, + f"out={_out722_dry!r}", + ) _db722v = _sq722.connect(str(_td722)) _dry_tags = _db722v.execute("SELECT tag FROM entry_concept_tags WHERE tag='bulk-tested'").fetchall() _db722v.close() @@ -9893,14 +9901,14 @@ def _make_i720_db(db_path: Path) -> None: _sys722.stdout = _orig_stdout722 _out722_apply = _buf722b.getvalue() _db722w = _sq722.connect(str(_td722)) - _infra_rows = _db722w.execute( - "SELECT id FROM knowledge_entries WHERE room='infra'" - ).fetchall() + _infra_rows = _db722w.execute("SELECT id FROM knowledge_entries WHERE room='infra'").fetchall() _db722w.close() test("I722-5a: --apply updates DB", len(_infra_rows) >= 2, f"rows={_infra_rows}") - test("I722-5b: --apply prints Applied message", - "Applied" in _out722_apply or "applied" in _out722_apply, - f"out={_out722_apply!r}") + test( + "I722-5b: --apply prints Applied message", + "Applied" in _out722_apply or "applied" in _out722_apply, + f"out={_out722_apply!r}", + ) # I722-6: --query filter selects matching entries only _buf722c = _io722.StringIO() @@ -9912,14 +9920,14 @@ def _make_i720_db(db_path: Path) -> None: finally: _sys722.stdout = _orig_stdout722 _db722q = _sq722.connect(str(_td722)) - _qtag_rows = _db722q.execute( - "SELECT entry_id FROM entry_concept_tags WHERE tag='query-tested'" - ).fetchall() + _qtag_rows = _db722q.execute("SELECT entry_id FROM entry_concept_tags WHERE tag='query-tested'").fetchall() _db722q.close() test("I722-6a: --query selects matching entries", len(_qtag_rows) >= 1, f"rows={_qtag_rows}") - test("I722-6b: --query does not tag non-matching entries", - all(r[0] == 1 for r in _qtag_rows), - f"unexpected entries tagged: {_qtag_rows}") + test( + "I722-6b: --query does not tag non-matching entries", + all(r[0] == 1 for r in _qtag_rows), + f"unexpected entries tagged: {_qtag_rows}", + ) # I722-7: --add-tag inserts into entry_concept_tags _buf722d = _io722.StringIO() @@ -9935,11 +9943,8 @@ def _make_i720_db(db_path: Path) -> None: "SELECT entry_id, tag, source FROM entry_concept_tags WHERE tag='tagged-product'" ).fetchall() _db722t.close() - test("I722-7a: --add-tag inserts into entry_concept_tags", len(_tag_rows) >= 1, - f"rows={_tag_rows}") - test("I722-7b: --add-tag sets source='bulk-tag'", - all(r[2] == "bulk-tag" for r in _tag_rows), - f"rows={_tag_rows}") + test("I722-7a: --add-tag inserts into entry_concept_tags", len(_tag_rows) >= 1, f"rows={_tag_rows}") + test("I722-7b: --add-tag sets source='bulk-tag'", all(r[2] == "bulk-tag" for r in _tag_rows), f"rows={_tag_rows}") # I722-8: --set-wing updates wing column _buf722e = _io722.StringIO() @@ -9951,9 +9956,7 @@ def _make_i720_db(db_path: Path) -> None: finally: _sys722.stdout = _orig_stdout722 _db722sw = _sq722.connect(str(_td722)) - _wing_rows = _db722sw.execute( - "SELECT id, wing FROM knowledge_entries WHERE wing='newwing'" - ).fetchall() + _wing_rows = _db722sw.execute("SELECT id, wing FROM knowledge_entries WHERE wing='newwing'").fetchall() _db722sw.close() test("I722-8: --set-wing updates wing column", len(_wing_rows) >= 1, f"rows={_wing_rows}") @@ -9967,15 +9970,14 @@ def _make_i720_db(db_path: Path) -> None: finally: _sys722.stdout = _orig_stdout722 _db722tf = _sq722.connect(str(_td722)) - _tf_rows = _db722tf.execute( - "SELECT entry_id FROM entry_concept_tags WHERE tag='tag-filter-tested'" - ).fetchall() + _tf_rows = _db722tf.execute("SELECT entry_id FROM entry_concept_tags WHERE tag='tag-filter-tested'").fetchall() _db722tf.close() - test("I722-9a: --tag selector matches entries with that tag", len(_tf_rows) == 1, - f"rows={_tf_rows}") - test("I722-9b: --tag selector only affects entry with that tag", - _tf_rows[0][0] == 1 if _tf_rows else False, - f"rows={_tf_rows}") + test("I722-9a: --tag selector matches entries with that tag", len(_tf_rows) == 1, f"rows={_tf_rows}") + test( + "I722-9b: --tag selector only affects entry with that tag", + _tf_rows[0][0] == 1 if _tf_rows else False, + f"rows={_tf_rows}", + ) # I722-10: --add-tag is idempotent (no duplicate tags) _buf722g = _io722.StringIO() @@ -9991,68 +9993,78 @@ def _make_i720_db(db_path: Path) -> None: "SELECT COUNT(*) FROM entry_concept_tags WHERE entry_id=1 AND tag='tag-filter-tested'" ).fetchone() _db722idem.close() - test("I722-10: --add-tag is idempotent (no duplicate tags)", _idem_rows[0] == 1, - f"count={_idem_rows[0]}") + test("I722-10: --add-tag is idempotent (no duplicate tags)", _idem_rows[0] == 1, f"count={_idem_rows[0]}") # I722-11: source checks _kh_src722 = (REPO / "knowledge-health.py").read_text(encoding="utf-8") _bt_body = _kh_src722.split("def cmd_bulk_tag")[1].split("\ndef ")[0] if "def cmd_bulk_tag" in _kh_src722 else "" - test("I722-11a: cmd_bulk_tag uses ? placeholders (no f-string SQL)", - "f\"SELECT" not in _bt_body and "f'SELECT" not in _bt_body - and "f\"UPDATE" not in _bt_body and "f'UPDATE" not in _bt_body - and "f\"INSERT" not in _bt_body and "f'INSERT" not in _bt_body, - "f-string SQL found in cmd_bulk_tag") - test("I722-11b: cmd_bulk_tag handles --query selector", - '"--query"' in _bt_body or "'--query'" in _bt_body, - "--query not handled in cmd_bulk_tag") - test("I722-11c: cmd_bulk_tag handles --add-tag mutation", - '"--add-tag"' in _bt_body or "'--add-tag'" in _bt_body, - "--add-tag not handled in cmd_bulk_tag") - test("I722-11d: cmd_bulk_tag handles --set-wing mutation", - '"--set-wing"' in _bt_body or "'--set-wing'" in _bt_body, - "--set-wing not handled in cmd_bulk_tag") - test("I722-11e: cmd_bulk_tag uses entry_concept_tags for --add-tag", - "entry_concept_tags" in _bt_body, - "entry_concept_tags not referenced in cmd_bulk_tag") + test( + "I722-11a: cmd_bulk_tag uses ? placeholders (no f-string SQL)", + 'f"SELECT' not in _bt_body + and "f'SELECT" not in _bt_body + and 'f"UPDATE' not in _bt_body + and "f'UPDATE" not in _bt_body + and 'f"INSERT' not in _bt_body + and "f'INSERT" not in _bt_body, + "f-string SQL found in cmd_bulk_tag", + ) + test( + "I722-11b: cmd_bulk_tag handles --query selector", + '"--query"' in _bt_body or "'--query'" in _bt_body, + "--query not handled in cmd_bulk_tag", + ) + test( + "I722-11c: cmd_bulk_tag handles --add-tag mutation", + '"--add-tag"' in _bt_body or "'--add-tag'" in _bt_body, + "--add-tag not handled in cmd_bulk_tag", + ) + test( + "I722-11d: cmd_bulk_tag handles --set-wing mutation", + '"--set-wing"' in _bt_body or "'--set-wing'" in _bt_body, + "--set-wing not handled in cmd_bulk_tag", + ) + test( + "I722-11e: cmd_bulk_tag uses entry_concept_tags for --add-tag", + "entry_concept_tags" in _bt_body, + "entry_concept_tags not referenced in cmd_bulk_tag", + ) # I722-12: sk.py routing checks _sk_src722 = (REPO / "sk.py").read_text(encoding="utf-8") - test("I722-12a: sk.py has 'bulk-tag' in knowledge group", - '"bulk-tag"' in _sk_src722 or "'bulk-tag'" in _sk_src722, - "bulk-tag route not found in sk.py") - test("I722-12b: sk.py dispatches bulk-tag subcommand", - "bulk-tag" in _sk_src722, - "bulk-tag dispatch not found in sk.py") + test( + "I722-12a: sk.py has 'bulk-tag' in knowledge group", + '"bulk-tag"' in _sk_src722 or "'bulk-tag'" in _sk_src722, + "bulk-tag route not found in sk.py", + ) + test( + "I722-12b: sk.py dispatches bulk-tag subcommand", + "bulk-tag" in _sk_src722, + "bulk-tag dispatch not found in sk.py", + ) # I722-13: main() routes bulk-tag subcommand _main_body = _kh_src722.split("def main")[1] if "def main" in _kh_src722 else "" - test("I722-13: main() handles bulk-tag subcommand", - '"bulk-tag"' in _main_body or "'bulk-tag'" in _main_body, - "bulk-tag routing not found in main()") + test( + "I722-13: main() handles bulk-tag subcommand", + '"bulk-tag"' in _main_body or "'bulk-tag'" in _main_body, + "bulk-tag routing not found in main()", + ) # I722-14: multiple mutations work together _buf722h = _io722.StringIO() _sys722.stdout = _buf722h try: - _kh722.cmd_bulk_tag([ - "--wing", "engineering", "--set-wing", "eng2", "--add-tag", "multi-mutate", "--apply" - ]) + _kh722.cmd_bulk_tag(["--wing", "engineering", "--set-wing", "eng2", "--add-tag", "multi-mutate", "--apply"]) except SystemExit: pass finally: _sys722.stdout = _orig_stdout722 _db722mm = _sq722.connect(str(_td722)) - _mm_wing = _db722mm.execute( - "SELECT COUNT(*) FROM knowledge_entries WHERE wing='eng2'" - ).fetchone() - _mm_tag = _db722mm.execute( - "SELECT COUNT(*) FROM entry_concept_tags WHERE tag='multi-mutate'" - ).fetchone() + _mm_wing = _db722mm.execute("SELECT COUNT(*) FROM knowledge_entries WHERE wing='eng2'").fetchone() + _mm_tag = _db722mm.execute("SELECT COUNT(*) FROM entry_concept_tags WHERE tag='multi-mutate'").fetchone() _db722mm.close() - test("I722-14a: multiple mutations: set-wing applied", _mm_wing[0] >= 1, - f"wing count={_mm_wing[0]}") - test("I722-14b: multiple mutations: add-tag applied", _mm_tag[0] >= 1, - f"tag count={_mm_tag[0]}") + test("I722-14a: multiple mutations: set-wing applied", _mm_wing[0] >= 1, f"wing count={_mm_wing[0]}") + test("I722-14b: multiple mutations: add-tag applied", _mm_tag[0] >= 1, f"tag count={_mm_tag[0]}") # cleanup _kh722.DB_PATH = _orig_db722 @@ -10069,4 +10081,6 @@ def _make_i720_db(db_path: Path) -> None: print("🎉 All tests passed!") else: print(f"⚠️ {FAIL} test(s) need attention") + for _fn in FAIL_NAMES: + print(f" ❌ {_fn}") sys.exit(0 if FAIL == 0 else 1) From aa47624975d08bc689bbda26bc91b1b300abb43c Mon Sep 17 00:00:00 2001 From: Linh Ngo Date: Sat, 30 May 2026 16:06:40 +0700 Subject: [PATCH 5/6] fix(format): ruff format knowledge-health.py sk.py --- knowledge-health.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/knowledge-health.py b/knowledge-health.py index 17e4d28d..36bc08f8 100644 --- a/knowledge-health.py +++ b/knowledge-health.py @@ -2828,8 +2828,7 @@ def cmd_bulk_tag(args: list) -> None: ).fetchone() if not existing: db.execute( - "INSERT INTO entry_concept_tags (entry_id, tag, source, tagged_at)" - " VALUES (?,?,?,?)", + "INSERT INTO entry_concept_tags (entry_id, tag, source, tagged_at) VALUES (?,?,?,?)", (eid, add_tag, "bulk-tag", now_str), ) From 1e6242fbdc692e645c69640ca9d8edf1fcea19f3 Mon Sep 17 00:00:00 2001 From: Linh Ngo Date: Sat, 30 May 2026 19:03:36 +0700 Subject: [PATCH 6/6] =?UTF-8?q?fix(bulk-tag):=20reviewer=20issues=20?= =?UTF-8?q?=E2=80=94=20LIKE=20escaping,=20deleted=5Fat=20guard,=20db.close?= =?UTF-8?q?()=20on=20error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Escape LIKE metacharacters (\, %, _) in --query argument with ESCAPE '\' clause to prevent wildcard injection from user input - Guard deleted_at IS NULL clause with PRAGMA table_info check so the command works on older DB schemas that lack the deleted_at column - Add db.close() in the except handler to prevent connection leaks on error - Build WHERE clause only when conditions are non-empty (handles edge case of no columns/conditions with schema missing deleted_at) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- knowledge-health.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/knowledge-health.py b/knowledge-health.py index 36bc08f8..3d780d26 100644 --- a/knowledge-health.py +++ b/knowledge-health.py @@ -2773,8 +2773,11 @@ def cmd_bulk_tag(args: list) -> None: try: db = get_db() + ke_cols = {r[1] for r in db.execute("PRAGMA table_info(knowledge_entries)").fetchall()} + deleted_at_clause = "ke.deleted_at IS NULL" if "deleted_at" in ke_cols else "" + base = "SELECT DISTINCT ke.id FROM knowledge_entries ke" - where: list[str] = ["ke.deleted_at IS NULL"] + where: list[str] = [deleted_at_clause] if deleted_at_clause else [] params: list = [] if tag: @@ -2788,11 +2791,12 @@ def cmd_bulk_tag(args: list) -> None: where.append("ke.room = ?") params.append(room) if query_text: - where.append("(ke.title LIKE ? OR ke.content LIKE ?)") - like_val = "%" + query_text + "%" + where.append("(ke.title LIKE ? ESCAPE '\\' OR ke.content LIKE ? ESCAPE '\\')") + escaped = query_text.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + like_val = "%" + escaped + "%" params.extend([like_val, like_val]) - sel_query = base + " WHERE " + " AND ".join(where) + sel_query = base + (" WHERE " + " AND ".join(where) if where else "") matched_ids = [r[0] for r in db.execute(sel_query, params).fetchall()] count = len(matched_ids) @@ -2836,6 +2840,10 @@ def cmd_bulk_tag(args: list) -> None: db.close() print(f"✅ Applied to {count} entr(ies).") except Exception as exc: + try: + db.close() + except Exception: + pass print(f"⚠ bulk-tag failed: {exc}", file=sys.stderr) sys.exit(1)