From 9fc1f8fb3eb31090f35f8236d336dfb8fc494675 Mon Sep 17 00:00:00 2001 From: Christopher Skene Date: Tue, 7 Apr 2026 11:13:18 +1000 Subject: [PATCH 1/4] fix: add sqlite edge compound indexes --- code_review_graph/graph.py | 164 +++++++++++++++++--------------- code_review_graph/migrations.py | 59 +++++++----- tests/test_migrations.py | 21 ++-- 3 files changed, 130 insertions(+), 114 deletions(-) diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py index 2dfa97f..047f2e4 100644 --- a/code_review_graph/graph.py +++ b/code_review_graph/graph.py @@ -70,6 +70,8 @@ CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_qualified); CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_qualified); CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind); +CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind); +CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind); CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path); """ @@ -125,9 +127,7 @@ class GraphStore: def __init__(self, db_path: str | Path) -> None: self.db_path = Path(db_path) self.db_path.parent.mkdir(parents=True, exist_ok=True) - self._conn = sqlite3.connect( - str(self.db_path), timeout=30, check_same_thread=False - ) + self._conn = sqlite3.connect(str(self.db_path), timeout=30, check_same_thread=False) self._conn.row_factory = sqlite3.Row self._conn.execute("PRAGMA journal_mode=WAL") self._conn.execute("PRAGMA busy_timeout=5000") @@ -136,8 +136,7 @@ def __init__(self, db_path: str | Path) -> None: if get_schema_version(self._conn) < 1: # Fresh DB — metadata table just created by _init_schema self._conn.execute( - "INSERT OR IGNORE INTO metadata (key, value) " - "VALUES ('schema_version', '1')" + "INSERT OR IGNORE INTO metadata (key, value) VALUES ('schema_version', '1')" ) self._conn.commit() run_migrations(self._conn) @@ -186,11 +185,21 @@ def upsert_node(self, node: NodeInfo, file_hash: str = "") -> int: extra=excluded.extra, updated_at=excluded.updated_at """, ( - node.kind, node.name, qualified, node.file_path, - node.line_start, node.line_end, node.language, - node.parent_name, node.params, node.return_type, - node.modifiers, int(node.is_test), file_hash, - extra, now, + node.kind, + node.name, + qualified, + node.file_path, + node.line_start, + node.line_end, + node.language, + node.parent_name, + node.params, + node.return_type, + node.modifiers, + int(node.is_test), + file_hash, + extra, + now, ), ) row = self._conn.execute( @@ -324,9 +333,7 @@ def search_nodes(self, query: str, limit: int = 20) -> list[GraphNode]: conditions: list[str] = [] params: list[str | int] = [] for word in words: - conditions.append( - "(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)" - ) + conditions.append("(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)") params.extend([f"%{word}%", f"%{word}%"]) where = " AND ".join(conditions) @@ -357,10 +364,14 @@ def get_impact_radius( """ if BFS_ENGINE == "networkx": return self._get_impact_radius_networkx( - changed_files, max_depth=max_depth, max_nodes=max_nodes, + changed_files, + max_depth=max_depth, + max_nodes=max_nodes, ) return self.get_impact_radius_sql( - changed_files, max_depth=max_depth, max_nodes=max_nodes, + changed_files, + max_depth=max_depth, + max_nodes=max_nodes, ) # -- SQLite recursive CTE version (default) --------------------------- @@ -405,15 +416,12 @@ def get_impact_radius_sql( # Build recursive CTE — use a temp table for the seed set to # keep the query plan efficient and stay under variable limits. - self._conn.execute( - "CREATE TEMP TABLE IF NOT EXISTS _impact_seeds " - "(qn TEXT PRIMARY KEY)" - ) + self._conn.execute("CREATE TEMP TABLE IF NOT EXISTS _impact_seeds (qn TEXT PRIMARY KEY)") self._conn.execute("DELETE FROM _impact_seeds") batch_size = 450 seed_list = list(seeds) for i in range(0, len(seed_list), batch_size): - batch = seed_list[i:i + batch_size] + batch = seed_list[i : i + batch_size] placeholders = ",".join("(?)" for _ in batch) self._conn.execute( # nosec B608 f"INSERT OR IGNORE INTO _impact_seeds (qn) VALUES {placeholders}", @@ -440,7 +448,8 @@ def get_impact_radius_sql( LIMIT ? """ rows = self._conn.execute( - cte_sql, (max_depth, max_depth, max_nodes + len(seeds)), + cte_sql, + (max_depth, max_depth, max_nodes + len(seeds)), ).fetchall() # Split into seeds vs impacted @@ -573,7 +582,8 @@ def get_stats(self) -> GraphStats: edges_by_kind[row["kind"]] = row["cnt"] languages = [ - r["language"] for r in self._conn.execute( + r["language"] + for r in self._conn.execute( "SELECT DISTINCT language FROM nodes WHERE language IS NOT NULL AND language != ''" ) ] @@ -644,9 +654,7 @@ def get_nodes_by_size( def get_node_by_id(self, node_id: int) -> Optional[GraphNode]: """Fetch a single node by its integer primary key.""" - row = self._conn.execute( - "SELECT * FROM nodes WHERE id = ?", (node_id,) - ).fetchone() + row = self._conn.execute("SELECT * FROM nodes WHERE id = ?", (node_id,)).fetchone() return self._row_to_node(row) if row else None def get_nodes_by_kind( @@ -672,15 +680,15 @@ def get_nodes_by_kind( params.append(f"%{file_pattern}%") where = " AND ".join(conditions) rows = self._conn.execute( # nosec B608 - f"SELECT * FROM nodes WHERE {where}", params, + f"SELECT * FROM nodes WHERE {where}", + params, ).fetchall() return [self._row_to_node(r) for r in rows] def count_flow_memberships(self, node_id: int) -> int: """Return the number of flows a node participates in.""" row = self._conn.execute( - "SELECT COUNT(*) as cnt FROM flow_memberships " - "WHERE node_id = ?", + "SELECT COUNT(*) as cnt FROM flow_memberships WHERE node_id = ?", (node_id,), ).fetchone() return row["cnt"] if row else 0 @@ -696,7 +704,8 @@ def get_node_community_id(self, node_id: int) -> int | None: return None def get_community_ids_by_qualified_names( - self, qns: list[str], + self, + qns: list[str], ) -> dict[str, int | None]: """Batch-fetch ``community_id`` for a list of qualified names. @@ -706,7 +715,7 @@ def get_community_ids_by_qualified_names( result: dict[str, int | None] = {} batch_size = 450 for i in range(0, len(qns), batch_size): - batch = qns[i:i + batch_size] + batch = qns[i : i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 "SELECT qualified_name, community_id FROM nodes " @@ -720,8 +729,7 @@ def get_community_ids_by_qualified_names( def get_files_matching(self, pattern: str) -> list[str]: """Return distinct ``file_path`` values matching a LIKE suffix.""" rows = self._conn.execute( - "SELECT DISTINCT file_path FROM nodes " - "WHERE file_path LIKE ?", + "SELECT DISTINCT file_path FROM nodes WHERE file_path LIKE ?", (f"%{pattern}",), ).fetchall() return [r["file_path"] for r in rows] @@ -729,12 +737,13 @@ def get_files_matching(self, pattern: str) -> list[str]: def get_nodes_without_signature(self) -> list[sqlite3.Row]: """Return raw rows for nodes that have no signature yet.""" return self._conn.execute( - "SELECT id, name, kind, params, return_type " - "FROM nodes WHERE signature IS NULL" + "SELECT id, name, kind, params, return_type FROM nodes WHERE signature IS NULL" ).fetchall() def update_node_signature( - self, node_id: int, signature: str, + self, + node_id: int, + signature: str, ) -> None: """Set the ``signature`` column for a single node.""" self._conn.execute( @@ -748,18 +757,14 @@ def get_all_community_ids(self) -> dict[str, int | None]: Used primarily by the visualization exporter. """ try: - rows = self._conn.execute( - "SELECT qualified_name, community_id FROM nodes" - ).fetchall() - return { - r["qualified_name"]: r["community_id"] - for r in rows - } + rows = self._conn.execute("SELECT qualified_name, community_id FROM nodes").fetchall() + return {r["qualified_name"]: r["community_id"] for r in rows} except Exception: return {} def get_node_ids_by_files( - self, file_paths: list[str], + self, + file_paths: list[str], ) -> set[int]: """Return node IDs belonging to the given file paths.""" if not file_paths: @@ -767,18 +772,18 @@ def get_node_ids_by_files( result: set[int] = set() batch_size = 450 for i in range(0, len(file_paths), batch_size): - batch = file_paths[i:i + batch_size] + batch = file_paths[i : i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 - "SELECT id FROM nodes " - f"WHERE file_path IN ({placeholders})", + f"SELECT id FROM nodes WHERE file_path IN ({placeholders})", batch, ).fetchall() result.update(r["id"] for r in rows) return result def get_flow_ids_by_node_ids( - self, node_ids: set[int], + self, + node_ids: set[int], ) -> list[int]: """Return distinct flow IDs that contain any of *node_ids*.""" if not node_ids: @@ -787,11 +792,10 @@ def get_flow_ids_by_node_ids( result: list[int] = [] batch_size = 450 for i in range(0, len(nids), batch_size): - batch = nids[i:i + batch_size] + batch = nids[i : i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 - "SELECT DISTINCT flow_id FROM flow_memberships " - f"WHERE node_id IN ({placeholders})", + f"SELECT DISTINCT flow_id FROM flow_memberships WHERE node_id IN ({placeholders})", batch, ).fetchall() result.extend(r["flow_id"] for r in rows) @@ -810,15 +814,15 @@ def get_flow_qualified_names(self, flow_id: int) -> set[str]: def get_node_kind_by_id(self, node_id: int) -> str | None: """Return just the ``kind`` column for a node, or ``None``.""" row = self._conn.execute( - "SELECT kind FROM nodes WHERE id = ?", (node_id,), + "SELECT kind FROM nodes WHERE id = ?", + (node_id,), ).fetchone() return row["kind"] if row else None def get_all_call_targets(self) -> set[str]: """Return the set of all CALLS-edge target qualified names.""" rows = self._conn.execute( - "SELECT DISTINCT target_qualified FROM edges " - "WHERE kind = 'CALLS'" + "SELECT DISTINCT target_qualified FROM edges WHERE kind = 'CALLS'" ).fetchall() return {r["target_qualified"] for r in rows} @@ -827,25 +831,24 @@ def get_communities_list( ) -> list[sqlite3.Row]: """Return raw rows from the ``communities`` table.""" try: - return self._conn.execute( - "SELECT id, name FROM communities" - ).fetchall() + return self._conn.execute("SELECT id, name FROM communities").fetchall() except Exception: return [] def get_community_member_qns( - self, community_id: int, + self, + community_id: int, ) -> list[str]: """Return qualified names of nodes in a community.""" rows = self._conn.execute( - "SELECT qualified_name FROM nodes " - "WHERE community_id = ?", + "SELECT qualified_name FROM nodes WHERE community_id = ?", (community_id,), ).fetchall() return [r["qualified_name"] for r in rows] def get_nodes_by_community_id( - self, community_id: int, + self, + community_id: int, ) -> list[GraphNode]: """Return all nodes belonging to a community.""" rows = self._conn.execute( @@ -855,34 +858,34 @@ def get_nodes_by_community_id( return [self._row_to_node(r) for r in rows] def get_outgoing_targets( - self, source_qns: list[str], + self, + source_qns: list[str], ) -> list[str]: """Return ``target_qualified`` for edges sourced from *source_qns*.""" results: list[str] = [] batch_size = 450 for i in range(0, len(source_qns), batch_size): - batch = source_qns[i:i + batch_size] + batch = source_qns[i : i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 - "SELECT target_qualified FROM edges " - f"WHERE source_qualified IN ({placeholders})", + f"SELECT target_qualified FROM edges WHERE source_qualified IN ({placeholders})", batch, ).fetchall() results.extend(r["target_qualified"] for r in rows) return results def get_incoming_sources( - self, target_qns: list[str], + self, + target_qns: list[str], ) -> list[str]: """Return ``source_qualified`` for edges targeting *target_qns*.""" results: list[str] = [] batch_size = 450 for i in range(0, len(target_qns), batch_size): - batch = target_qns[i:i + batch_size] + batch = target_qns[i : i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 - "SELECT source_qualified FROM edges " - f"WHERE target_qualified IN ({placeholders})", + f"SELECT source_qualified FROM edges WHERE target_qualified IN ({placeholders})", batch, ).fetchall() results.extend(r["source_qualified"] for r in rows) @@ -907,7 +910,7 @@ def get_edges_among(self, qualified_names: set[str]) -> list[GraphEdge]: results: list[GraphEdge] = [] batch_size = 450 # Stay well under SQLite's default 999 limit for i in range(0, len(qns), batch_size): - batch = qns[i:i + batch_size] + batch = qns[i : i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 f"SELECT * FROM edges WHERE source_qualified IN ({placeholders})", @@ -927,7 +930,7 @@ def _batch_get_nodes(self, qualified_names: set[str]) -> list[GraphNode]: results: list[GraphNode] = [] batch_size = 450 for i in range(0, len(qns), batch_size): - batch = qns[i:i + batch_size] + batch = qns[i : i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 f"SELECT * FROM nodes WHERE qualified_name IN ({placeholders})", @@ -997,18 +1000,19 @@ def _sanitize_name(s: str, max_len: int = 256) -> str: agent behaviour. """ # Strip control chars 0x00-0x1F except \t (0x09) and \n (0x0A) - cleaned = "".join( - ch for ch in s - if ch in ("\t", "\n") or ord(ch) >= 0x20 - ) + cleaned = "".join(ch for ch in s if ch in ("\t", "\n") or ord(ch) >= 0x20) return cleaned[:max_len] def node_to_dict(n: GraphNode) -> dict: return { - "id": n.id, "kind": n.kind, "name": _sanitize_name(n.name), - "qualified_name": _sanitize_name(n.qualified_name), "file_path": n.file_path, - "line_start": n.line_start, "line_end": n.line_end, + "id": n.id, + "kind": n.kind, + "name": _sanitize_name(n.name), + "qualified_name": _sanitize_name(n.qualified_name), + "file_path": n.file_path, + "line_start": n.line_start, + "line_end": n.line_end, "language": n.language, "parent_name": _sanitize_name(n.parent_name) if n.parent_name else n.parent_name, "is_test": n.is_test, @@ -1017,8 +1021,10 @@ def node_to_dict(n: GraphNode) -> dict: def edge_to_dict(e: GraphEdge) -> dict: return { - "id": e.id, "kind": e.kind, + "id": e.id, + "kind": e.kind, "source": _sanitize_name(e.source_qualified), "target": _sanitize_name(e.target_qualified), - "file_path": e.file_path, "line": e.line, + "file_path": e.file_path, + "line": e.line, } diff --git a/code_review_graph/migrations.py b/code_review_graph/migrations.py index ddb446e..1734c69 100644 --- a/code_review_graph/migrations.py +++ b/code_review_graph/migrations.py @@ -20,9 +20,7 @@ def get_schema_version(conn: sqlite3.Connection) -> int: int: The schema version (0 if metadata table doesn't exist, 1 if not set). """ try: - row = conn.execute( - "SELECT value FROM metadata WHERE key = 'schema_version'" - ).fetchone() + row = conn.execute("SELECT value FROM metadata WHERE key = 'schema_version'").fetchone() if row is None: return 1 return int(row[0] if isinstance(row, (tuple, list)) else row["value"]) @@ -39,10 +37,20 @@ def _set_schema_version(conn: sqlite3.Connection, version: int) -> None: ) -_KNOWN_TABLES = frozenset({ - "nodes", "edges", "metadata", "communities", "flows", "flow_memberships", "nodes_fts", - "community_summaries", "flow_snapshots", "risk_index", -}) +_KNOWN_TABLES = frozenset( + { + "nodes", + "edges", + "metadata", + "communities", + "flows", + "flow_memberships", + "nodes_fts", + "community_summaries", + "flow_snapshots", + "risk_index", + } +) def _has_column(conn: sqlite3.Connection, table: str, column: str) -> bool: @@ -59,8 +67,7 @@ def _table_exists(conn: sqlite3.Connection, table: str) -> bool: if table not in _KNOWN_TABLES: raise ValueError(f"Unknown table: {table}") row = conn.execute( - "SELECT count(*) FROM sqlite_master WHERE type IN ('table', 'view') " - "AND name = ?", + "SELECT count(*) FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?", (table,), ).fetchone() return row[0] > 0 @@ -102,12 +109,8 @@ def _migrate_v3(conn: sqlite3.Connection) -> None: PRIMARY KEY (flow_id, node_id) ) """) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_flows_criticality ON flows(criticality DESC)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_flows_entry ON flows(entry_point_id)" - ) + conn.execute("CREATE INDEX IF NOT EXISTS idx_flows_criticality ON flows(criticality DESC)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_flows_entry ON flows(entry_point_id)") conn.execute( "CREATE INDEX IF NOT EXISTS idx_flow_memberships_node ON flow_memberships(node_id)" ) @@ -132,12 +135,8 @@ def _migrate_v4(conn: sqlite3.Connection) -> None: if not _has_column(conn, "nodes", "community_id"): conn.execute("ALTER TABLE nodes ADD COLUMN community_id INTEGER") logger.info("Migration v4: added 'community_id' column to nodes") - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community_id)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_communities_parent ON communities(parent_id)" - ) + conn.execute("CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community_id)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_communities_parent ON communities(parent_id)") conn.execute( "CREATE INDEX IF NOT EXISTS idx_communities_cohesion ON communities(cohesion DESC)" ) @@ -195,12 +194,21 @@ def _migrate_v6(conn: sqlite3.Connection) -> None: FOREIGN KEY (node_id) REFERENCES nodes(id) ) """) + conn.execute("CREATE INDEX IF NOT EXISTS idx_risk_index_score ON risk_index(risk_score DESC)") + logger.info( + "Migration v6: created summary tables (community_summaries, flow_snapshots, risk_index)" + ) + + +def _migrate_v7(conn: sqlite3.Connection) -> None: + """v7: Add compound edge indexes for summary and risk queries.""" + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind)" + ) conn.execute( - "CREATE INDEX IF NOT EXISTS idx_risk_index_score " - "ON risk_index(risk_score DESC)" + "CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind)" ) - logger.info("Migration v6: created summary tables " - "(community_summaries, flow_snapshots, risk_index)") + logger.info("Migration v7: added compound edge indexes") # --------------------------------------------------------------------------- @@ -213,6 +221,7 @@ def _migrate_v6(conn: sqlite3.Connection) -> None: 4: _migrate_v4, 5: _migrate_v5, 6: _migrate_v6, + 7: _migrate_v7, } LATEST_VERSION = max(MIGRATIONS.keys()) diff --git a/tests/test_migrations.py b/tests/test_migrations.py index 3802aae..e903ded 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -34,9 +34,7 @@ def test_v1_db_migrates_to_latest(self): # Manually create a v1 database (base schema only, version=1) conn = sqlite3.connect(str(self.tmp.name)) - conn.execute( - "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '1')" - ) + conn.execute("INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '1')") conn.commit() # Drop migration artifacts to simulate v1 conn.execute("DROP TABLE IF EXISTS flows") @@ -98,9 +96,7 @@ def test_get_schema_version_no_metadata_table(self): def test_get_schema_version_no_key(self): """get_schema_version returns 1 when metadata exists but key is missing.""" conn = sqlite3.connect(":memory:") - conn.execute( - "CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)" - ) + conn.execute("CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)") conn.commit() assert get_schema_version(conn) == 1 conn.close() @@ -117,7 +113,6 @@ def test_run_migrations_on_already_current_db(self): version_after = get_schema_version(self.store._conn) assert version_before == version_after == LATEST_VERSION - def test_v6_summary_tables_exist(self): """v6 summary tables should exist after migration.""" tables = _get_table_names(self.store._conn) @@ -134,10 +129,16 @@ def test_v6_migration_idempotent(self): tables = _get_table_names(self.store._conn) assert "community_summaries" in tables + def test_v7_compound_edge_indexes_exist(self): + """v7 compound edge indexes should exist after migration.""" + rows = self.store._conn.execute("PRAGMA index_list(edges)").fetchall() + indexes = {row[1] if isinstance(row, tuple) else row["name"] for row in rows} + + assert "idx_edges_target_kind" in indexes + assert "idx_edges_source_kind" in indexes + def _get_table_names(conn: sqlite3.Connection) -> set[str]: """Helper: return all table/view names in the database.""" - rows = conn.execute( - "SELECT name FROM sqlite_master WHERE type IN ('table', 'view')" - ).fetchall() + rows = conn.execute("SELECT name FROM sqlite_master WHERE type IN ('table', 'view')").fetchall() return {row[0] if isinstance(row, (tuple, list)) else row["name"] for row in rows} From 2fb80b9b3757c3253d5173d98b64d4fcd8064755 Mon Sep 17 00:00:00 2001 From: Christopher Skene Date: Tue, 7 Apr 2026 11:27:29 +1000 Subject: [PATCH 2/4] Revert "fix: add sqlite edge compound indexes" This reverts commit 9fc1f8fb3eb31090f35f8236d336dfb8fc494675. --- code_review_graph/graph.py | 164 +++++++++++++++----------------- code_review_graph/migrations.py | 59 +++++------- tests/test_migrations.py | 21 ++-- 3 files changed, 114 insertions(+), 130 deletions(-) diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py index 047f2e4..2dfa97f 100644 --- a/code_review_graph/graph.py +++ b/code_review_graph/graph.py @@ -70,8 +70,6 @@ CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_qualified); CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_qualified); CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind); -CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind); -CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind); CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path); """ @@ -127,7 +125,9 @@ class GraphStore: def __init__(self, db_path: str | Path) -> None: self.db_path = Path(db_path) self.db_path.parent.mkdir(parents=True, exist_ok=True) - self._conn = sqlite3.connect(str(self.db_path), timeout=30, check_same_thread=False) + self._conn = sqlite3.connect( + str(self.db_path), timeout=30, check_same_thread=False + ) self._conn.row_factory = sqlite3.Row self._conn.execute("PRAGMA journal_mode=WAL") self._conn.execute("PRAGMA busy_timeout=5000") @@ -136,7 +136,8 @@ def __init__(self, db_path: str | Path) -> None: if get_schema_version(self._conn) < 1: # Fresh DB — metadata table just created by _init_schema self._conn.execute( - "INSERT OR IGNORE INTO metadata (key, value) VALUES ('schema_version', '1')" + "INSERT OR IGNORE INTO metadata (key, value) " + "VALUES ('schema_version', '1')" ) self._conn.commit() run_migrations(self._conn) @@ -185,21 +186,11 @@ def upsert_node(self, node: NodeInfo, file_hash: str = "") -> int: extra=excluded.extra, updated_at=excluded.updated_at """, ( - node.kind, - node.name, - qualified, - node.file_path, - node.line_start, - node.line_end, - node.language, - node.parent_name, - node.params, - node.return_type, - node.modifiers, - int(node.is_test), - file_hash, - extra, - now, + node.kind, node.name, qualified, node.file_path, + node.line_start, node.line_end, node.language, + node.parent_name, node.params, node.return_type, + node.modifiers, int(node.is_test), file_hash, + extra, now, ), ) row = self._conn.execute( @@ -333,7 +324,9 @@ def search_nodes(self, query: str, limit: int = 20) -> list[GraphNode]: conditions: list[str] = [] params: list[str | int] = [] for word in words: - conditions.append("(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)") + conditions.append( + "(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)" + ) params.extend([f"%{word}%", f"%{word}%"]) where = " AND ".join(conditions) @@ -364,14 +357,10 @@ def get_impact_radius( """ if BFS_ENGINE == "networkx": return self._get_impact_radius_networkx( - changed_files, - max_depth=max_depth, - max_nodes=max_nodes, + changed_files, max_depth=max_depth, max_nodes=max_nodes, ) return self.get_impact_radius_sql( - changed_files, - max_depth=max_depth, - max_nodes=max_nodes, + changed_files, max_depth=max_depth, max_nodes=max_nodes, ) # -- SQLite recursive CTE version (default) --------------------------- @@ -416,12 +405,15 @@ def get_impact_radius_sql( # Build recursive CTE — use a temp table for the seed set to # keep the query plan efficient and stay under variable limits. - self._conn.execute("CREATE TEMP TABLE IF NOT EXISTS _impact_seeds (qn TEXT PRIMARY KEY)") + self._conn.execute( + "CREATE TEMP TABLE IF NOT EXISTS _impact_seeds " + "(qn TEXT PRIMARY KEY)" + ) self._conn.execute("DELETE FROM _impact_seeds") batch_size = 450 seed_list = list(seeds) for i in range(0, len(seed_list), batch_size): - batch = seed_list[i : i + batch_size] + batch = seed_list[i:i + batch_size] placeholders = ",".join("(?)" for _ in batch) self._conn.execute( # nosec B608 f"INSERT OR IGNORE INTO _impact_seeds (qn) VALUES {placeholders}", @@ -448,8 +440,7 @@ def get_impact_radius_sql( LIMIT ? """ rows = self._conn.execute( - cte_sql, - (max_depth, max_depth, max_nodes + len(seeds)), + cte_sql, (max_depth, max_depth, max_nodes + len(seeds)), ).fetchall() # Split into seeds vs impacted @@ -582,8 +573,7 @@ def get_stats(self) -> GraphStats: edges_by_kind[row["kind"]] = row["cnt"] languages = [ - r["language"] - for r in self._conn.execute( + r["language"] for r in self._conn.execute( "SELECT DISTINCT language FROM nodes WHERE language IS NOT NULL AND language != ''" ) ] @@ -654,7 +644,9 @@ def get_nodes_by_size( def get_node_by_id(self, node_id: int) -> Optional[GraphNode]: """Fetch a single node by its integer primary key.""" - row = self._conn.execute("SELECT * FROM nodes WHERE id = ?", (node_id,)).fetchone() + row = self._conn.execute( + "SELECT * FROM nodes WHERE id = ?", (node_id,) + ).fetchone() return self._row_to_node(row) if row else None def get_nodes_by_kind( @@ -680,15 +672,15 @@ def get_nodes_by_kind( params.append(f"%{file_pattern}%") where = " AND ".join(conditions) rows = self._conn.execute( # nosec B608 - f"SELECT * FROM nodes WHERE {where}", - params, + f"SELECT * FROM nodes WHERE {where}", params, ).fetchall() return [self._row_to_node(r) for r in rows] def count_flow_memberships(self, node_id: int) -> int: """Return the number of flows a node participates in.""" row = self._conn.execute( - "SELECT COUNT(*) as cnt FROM flow_memberships WHERE node_id = ?", + "SELECT COUNT(*) as cnt FROM flow_memberships " + "WHERE node_id = ?", (node_id,), ).fetchone() return row["cnt"] if row else 0 @@ -704,8 +696,7 @@ def get_node_community_id(self, node_id: int) -> int | None: return None def get_community_ids_by_qualified_names( - self, - qns: list[str], + self, qns: list[str], ) -> dict[str, int | None]: """Batch-fetch ``community_id`` for a list of qualified names. @@ -715,7 +706,7 @@ def get_community_ids_by_qualified_names( result: dict[str, int | None] = {} batch_size = 450 for i in range(0, len(qns), batch_size): - batch = qns[i : i + batch_size] + batch = qns[i:i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 "SELECT qualified_name, community_id FROM nodes " @@ -729,7 +720,8 @@ def get_community_ids_by_qualified_names( def get_files_matching(self, pattern: str) -> list[str]: """Return distinct ``file_path`` values matching a LIKE suffix.""" rows = self._conn.execute( - "SELECT DISTINCT file_path FROM nodes WHERE file_path LIKE ?", + "SELECT DISTINCT file_path FROM nodes " + "WHERE file_path LIKE ?", (f"%{pattern}",), ).fetchall() return [r["file_path"] for r in rows] @@ -737,13 +729,12 @@ def get_files_matching(self, pattern: str) -> list[str]: def get_nodes_without_signature(self) -> list[sqlite3.Row]: """Return raw rows for nodes that have no signature yet.""" return self._conn.execute( - "SELECT id, name, kind, params, return_type FROM nodes WHERE signature IS NULL" + "SELECT id, name, kind, params, return_type " + "FROM nodes WHERE signature IS NULL" ).fetchall() def update_node_signature( - self, - node_id: int, - signature: str, + self, node_id: int, signature: str, ) -> None: """Set the ``signature`` column for a single node.""" self._conn.execute( @@ -757,14 +748,18 @@ def get_all_community_ids(self) -> dict[str, int | None]: Used primarily by the visualization exporter. """ try: - rows = self._conn.execute("SELECT qualified_name, community_id FROM nodes").fetchall() - return {r["qualified_name"]: r["community_id"] for r in rows} + rows = self._conn.execute( + "SELECT qualified_name, community_id FROM nodes" + ).fetchall() + return { + r["qualified_name"]: r["community_id"] + for r in rows + } except Exception: return {} def get_node_ids_by_files( - self, - file_paths: list[str], + self, file_paths: list[str], ) -> set[int]: """Return node IDs belonging to the given file paths.""" if not file_paths: @@ -772,18 +767,18 @@ def get_node_ids_by_files( result: set[int] = set() batch_size = 450 for i in range(0, len(file_paths), batch_size): - batch = file_paths[i : i + batch_size] + batch = file_paths[i:i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 - f"SELECT id FROM nodes WHERE file_path IN ({placeholders})", + "SELECT id FROM nodes " + f"WHERE file_path IN ({placeholders})", batch, ).fetchall() result.update(r["id"] for r in rows) return result def get_flow_ids_by_node_ids( - self, - node_ids: set[int], + self, node_ids: set[int], ) -> list[int]: """Return distinct flow IDs that contain any of *node_ids*.""" if not node_ids: @@ -792,10 +787,11 @@ def get_flow_ids_by_node_ids( result: list[int] = [] batch_size = 450 for i in range(0, len(nids), batch_size): - batch = nids[i : i + batch_size] + batch = nids[i:i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 - f"SELECT DISTINCT flow_id FROM flow_memberships WHERE node_id IN ({placeholders})", + "SELECT DISTINCT flow_id FROM flow_memberships " + f"WHERE node_id IN ({placeholders})", batch, ).fetchall() result.extend(r["flow_id"] for r in rows) @@ -814,15 +810,15 @@ def get_flow_qualified_names(self, flow_id: int) -> set[str]: def get_node_kind_by_id(self, node_id: int) -> str | None: """Return just the ``kind`` column for a node, or ``None``.""" row = self._conn.execute( - "SELECT kind FROM nodes WHERE id = ?", - (node_id,), + "SELECT kind FROM nodes WHERE id = ?", (node_id,), ).fetchone() return row["kind"] if row else None def get_all_call_targets(self) -> set[str]: """Return the set of all CALLS-edge target qualified names.""" rows = self._conn.execute( - "SELECT DISTINCT target_qualified FROM edges WHERE kind = 'CALLS'" + "SELECT DISTINCT target_qualified FROM edges " + "WHERE kind = 'CALLS'" ).fetchall() return {r["target_qualified"] for r in rows} @@ -831,24 +827,25 @@ def get_communities_list( ) -> list[sqlite3.Row]: """Return raw rows from the ``communities`` table.""" try: - return self._conn.execute("SELECT id, name FROM communities").fetchall() + return self._conn.execute( + "SELECT id, name FROM communities" + ).fetchall() except Exception: return [] def get_community_member_qns( - self, - community_id: int, + self, community_id: int, ) -> list[str]: """Return qualified names of nodes in a community.""" rows = self._conn.execute( - "SELECT qualified_name FROM nodes WHERE community_id = ?", + "SELECT qualified_name FROM nodes " + "WHERE community_id = ?", (community_id,), ).fetchall() return [r["qualified_name"] for r in rows] def get_nodes_by_community_id( - self, - community_id: int, + self, community_id: int, ) -> list[GraphNode]: """Return all nodes belonging to a community.""" rows = self._conn.execute( @@ -858,34 +855,34 @@ def get_nodes_by_community_id( return [self._row_to_node(r) for r in rows] def get_outgoing_targets( - self, - source_qns: list[str], + self, source_qns: list[str], ) -> list[str]: """Return ``target_qualified`` for edges sourced from *source_qns*.""" results: list[str] = [] batch_size = 450 for i in range(0, len(source_qns), batch_size): - batch = source_qns[i : i + batch_size] + batch = source_qns[i:i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 - f"SELECT target_qualified FROM edges WHERE source_qualified IN ({placeholders})", + "SELECT target_qualified FROM edges " + f"WHERE source_qualified IN ({placeholders})", batch, ).fetchall() results.extend(r["target_qualified"] for r in rows) return results def get_incoming_sources( - self, - target_qns: list[str], + self, target_qns: list[str], ) -> list[str]: """Return ``source_qualified`` for edges targeting *target_qns*.""" results: list[str] = [] batch_size = 450 for i in range(0, len(target_qns), batch_size): - batch = target_qns[i : i + batch_size] + batch = target_qns[i:i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 - f"SELECT source_qualified FROM edges WHERE target_qualified IN ({placeholders})", + "SELECT source_qualified FROM edges " + f"WHERE target_qualified IN ({placeholders})", batch, ).fetchall() results.extend(r["source_qualified"] for r in rows) @@ -910,7 +907,7 @@ def get_edges_among(self, qualified_names: set[str]) -> list[GraphEdge]: results: list[GraphEdge] = [] batch_size = 450 # Stay well under SQLite's default 999 limit for i in range(0, len(qns), batch_size): - batch = qns[i : i + batch_size] + batch = qns[i:i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 f"SELECT * FROM edges WHERE source_qualified IN ({placeholders})", @@ -930,7 +927,7 @@ def _batch_get_nodes(self, qualified_names: set[str]) -> list[GraphNode]: results: list[GraphNode] = [] batch_size = 450 for i in range(0, len(qns), batch_size): - batch = qns[i : i + batch_size] + batch = qns[i:i + batch_size] placeholders = ",".join("?" for _ in batch) rows = self._conn.execute( # nosec B608 f"SELECT * FROM nodes WHERE qualified_name IN ({placeholders})", @@ -1000,19 +997,18 @@ def _sanitize_name(s: str, max_len: int = 256) -> str: agent behaviour. """ # Strip control chars 0x00-0x1F except \t (0x09) and \n (0x0A) - cleaned = "".join(ch for ch in s if ch in ("\t", "\n") or ord(ch) >= 0x20) + cleaned = "".join( + ch for ch in s + if ch in ("\t", "\n") or ord(ch) >= 0x20 + ) return cleaned[:max_len] def node_to_dict(n: GraphNode) -> dict: return { - "id": n.id, - "kind": n.kind, - "name": _sanitize_name(n.name), - "qualified_name": _sanitize_name(n.qualified_name), - "file_path": n.file_path, - "line_start": n.line_start, - "line_end": n.line_end, + "id": n.id, "kind": n.kind, "name": _sanitize_name(n.name), + "qualified_name": _sanitize_name(n.qualified_name), "file_path": n.file_path, + "line_start": n.line_start, "line_end": n.line_end, "language": n.language, "parent_name": _sanitize_name(n.parent_name) if n.parent_name else n.parent_name, "is_test": n.is_test, @@ -1021,10 +1017,8 @@ def node_to_dict(n: GraphNode) -> dict: def edge_to_dict(e: GraphEdge) -> dict: return { - "id": e.id, - "kind": e.kind, + "id": e.id, "kind": e.kind, "source": _sanitize_name(e.source_qualified), "target": _sanitize_name(e.target_qualified), - "file_path": e.file_path, - "line": e.line, + "file_path": e.file_path, "line": e.line, } diff --git a/code_review_graph/migrations.py b/code_review_graph/migrations.py index 1734c69..ddb446e 100644 --- a/code_review_graph/migrations.py +++ b/code_review_graph/migrations.py @@ -20,7 +20,9 @@ def get_schema_version(conn: sqlite3.Connection) -> int: int: The schema version (0 if metadata table doesn't exist, 1 if not set). """ try: - row = conn.execute("SELECT value FROM metadata WHERE key = 'schema_version'").fetchone() + row = conn.execute( + "SELECT value FROM metadata WHERE key = 'schema_version'" + ).fetchone() if row is None: return 1 return int(row[0] if isinstance(row, (tuple, list)) else row["value"]) @@ -37,20 +39,10 @@ def _set_schema_version(conn: sqlite3.Connection, version: int) -> None: ) -_KNOWN_TABLES = frozenset( - { - "nodes", - "edges", - "metadata", - "communities", - "flows", - "flow_memberships", - "nodes_fts", - "community_summaries", - "flow_snapshots", - "risk_index", - } -) +_KNOWN_TABLES = frozenset({ + "nodes", "edges", "metadata", "communities", "flows", "flow_memberships", "nodes_fts", + "community_summaries", "flow_snapshots", "risk_index", +}) def _has_column(conn: sqlite3.Connection, table: str, column: str) -> bool: @@ -67,7 +59,8 @@ def _table_exists(conn: sqlite3.Connection, table: str) -> bool: if table not in _KNOWN_TABLES: raise ValueError(f"Unknown table: {table}") row = conn.execute( - "SELECT count(*) FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?", + "SELECT count(*) FROM sqlite_master WHERE type IN ('table', 'view') " + "AND name = ?", (table,), ).fetchone() return row[0] > 0 @@ -109,8 +102,12 @@ def _migrate_v3(conn: sqlite3.Connection) -> None: PRIMARY KEY (flow_id, node_id) ) """) - conn.execute("CREATE INDEX IF NOT EXISTS idx_flows_criticality ON flows(criticality DESC)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_flows_entry ON flows(entry_point_id)") + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_flows_criticality ON flows(criticality DESC)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_flows_entry ON flows(entry_point_id)" + ) conn.execute( "CREATE INDEX IF NOT EXISTS idx_flow_memberships_node ON flow_memberships(node_id)" ) @@ -135,8 +132,12 @@ def _migrate_v4(conn: sqlite3.Connection) -> None: if not _has_column(conn, "nodes", "community_id"): conn.execute("ALTER TABLE nodes ADD COLUMN community_id INTEGER") logger.info("Migration v4: added 'community_id' column to nodes") - conn.execute("CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community_id)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_communities_parent ON communities(parent_id)") + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community_id)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_communities_parent ON communities(parent_id)" + ) conn.execute( "CREATE INDEX IF NOT EXISTS idx_communities_cohesion ON communities(cohesion DESC)" ) @@ -194,21 +195,12 @@ def _migrate_v6(conn: sqlite3.Connection) -> None: FOREIGN KEY (node_id) REFERENCES nodes(id) ) """) - conn.execute("CREATE INDEX IF NOT EXISTS idx_risk_index_score ON risk_index(risk_score DESC)") - logger.info( - "Migration v6: created summary tables (community_summaries, flow_snapshots, risk_index)" - ) - - -def _migrate_v7(conn: sqlite3.Connection) -> None: - """v7: Add compound edge indexes for summary and risk queries.""" - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind)" - ) conn.execute( - "CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind)" + "CREATE INDEX IF NOT EXISTS idx_risk_index_score " + "ON risk_index(risk_score DESC)" ) - logger.info("Migration v7: added compound edge indexes") + logger.info("Migration v6: created summary tables " + "(community_summaries, flow_snapshots, risk_index)") # --------------------------------------------------------------------------- @@ -221,7 +213,6 @@ def _migrate_v7(conn: sqlite3.Connection) -> None: 4: _migrate_v4, 5: _migrate_v5, 6: _migrate_v6, - 7: _migrate_v7, } LATEST_VERSION = max(MIGRATIONS.keys()) diff --git a/tests/test_migrations.py b/tests/test_migrations.py index e903ded..3802aae 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -34,7 +34,9 @@ def test_v1_db_migrates_to_latest(self): # Manually create a v1 database (base schema only, version=1) conn = sqlite3.connect(str(self.tmp.name)) - conn.execute("INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '1')") + conn.execute( + "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '1')" + ) conn.commit() # Drop migration artifacts to simulate v1 conn.execute("DROP TABLE IF EXISTS flows") @@ -96,7 +98,9 @@ def test_get_schema_version_no_metadata_table(self): def test_get_schema_version_no_key(self): """get_schema_version returns 1 when metadata exists but key is missing.""" conn = sqlite3.connect(":memory:") - conn.execute("CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)") + conn.execute( + "CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)" + ) conn.commit() assert get_schema_version(conn) == 1 conn.close() @@ -113,6 +117,7 @@ def test_run_migrations_on_already_current_db(self): version_after = get_schema_version(self.store._conn) assert version_before == version_after == LATEST_VERSION + def test_v6_summary_tables_exist(self): """v6 summary tables should exist after migration.""" tables = _get_table_names(self.store._conn) @@ -129,16 +134,10 @@ def test_v6_migration_idempotent(self): tables = _get_table_names(self.store._conn) assert "community_summaries" in tables - def test_v7_compound_edge_indexes_exist(self): - """v7 compound edge indexes should exist after migration.""" - rows = self.store._conn.execute("PRAGMA index_list(edges)").fetchall() - indexes = {row[1] if isinstance(row, tuple) else row["name"] for row in rows} - - assert "idx_edges_target_kind" in indexes - assert "idx_edges_source_kind" in indexes - def _get_table_names(conn: sqlite3.Connection) -> set[str]: """Helper: return all table/view names in the database.""" - rows = conn.execute("SELECT name FROM sqlite_master WHERE type IN ('table', 'view')").fetchall() + rows = conn.execute( + "SELECT name FROM sqlite_master WHERE type IN ('table', 'view')" + ).fetchall() return {row[0] if isinstance(row, (tuple, list)) else row["name"] for row in rows} From 5a31784a19573bb2542d9848fbbe995e9dda6c9c Mon Sep 17 00:00:00 2001 From: Christopher Skene Date: Tue, 7 Apr 2026 11:32:19 +1000 Subject: [PATCH 3/4] fix: add sqlite edge compound indexes --- code_review_graph/graph.py | 2 ++ code_review_graph/migrations.py | 14 ++++++++++++++ tests/test_migrations.py | 8 ++++++++ 3 files changed, 24 insertions(+) diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py index 2dfa97f..83841e9 100644 --- a/code_review_graph/graph.py +++ b/code_review_graph/graph.py @@ -70,6 +70,8 @@ CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_qualified); CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_qualified); CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind); +CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind); +CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind); CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path); """ diff --git a/code_review_graph/migrations.py b/code_review_graph/migrations.py index ddb446e..06d4d07 100644 --- a/code_review_graph/migrations.py +++ b/code_review_graph/migrations.py @@ -203,6 +203,19 @@ def _migrate_v6(conn: sqlite3.Connection) -> None: "(community_summaries, flow_snapshots, risk_index)") +def _migrate_v7(conn: sqlite3.Connection) -> None: + """v7: Add compound edge indexes for summary and risk queries.""" + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_edges_target_kind " + "ON edges(target_qualified, kind)" + ) + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_edges_source_kind " + "ON edges(source_qualified, kind)" + ) + logger.info("Migration v7: added compound edge indexes") + + # --------------------------------------------------------------------------- # Migration registry # --------------------------------------------------------------------------- @@ -213,6 +226,7 @@ def _migrate_v6(conn: sqlite3.Connection) -> None: 4: _migrate_v4, 5: _migrate_v5, 6: _migrate_v6, + 7: _migrate_v7, } LATEST_VERSION = max(MIGRATIONS.keys()) diff --git a/tests/test_migrations.py b/tests/test_migrations.py index 3802aae..d208e60 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -134,6 +134,14 @@ def test_v6_migration_idempotent(self): tables = _get_table_names(self.store._conn) assert "community_summaries" in tables + def test_v7_compound_edge_indexes_exist(self): + """v7 compound edge indexes should exist after migration.""" + rows = self.store._conn.execute("PRAGMA index_list(edges)").fetchall() + indexes = {row[1] if isinstance(row, tuple) else row["name"] for row in rows} + + assert "idx_edges_target_kind" in indexes + assert "idx_edges_source_kind" in indexes + def _get_table_names(conn: sqlite3.Connection) -> set[str]: """Helper: return all table/view names in the database.""" From 05cbcd6056d1537a4b274ca4b11d98cea1f33680 Mon Sep 17 00:00:00 2001 From: Christopher Skene Date: Thu, 9 Apr 2026 07:00:08 +1000 Subject: [PATCH 4/4] Update supported schema version to 7 --- code-review-graph-vscode/src/backend/sqlite.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code-review-graph-vscode/src/backend/sqlite.ts b/code-review-graph-vscode/src/backend/sqlite.ts index 267b3a2..9fead0f 100644 --- a/code-review-graph-vscode/src/backend/sqlite.ts +++ b/code-review-graph-vscode/src/backend/sqlite.ts @@ -208,7 +208,7 @@ export class SqliteReader { if (row) { const version = parseInt(row.value, 10); // Must match LATEST_VERSION in code_review_graph/migrations.py - const SUPPORTED_SCHEMA_VERSION = 6; + const SUPPORTED_SCHEMA_VERSION = 7; if (!isNaN(version) && version > SUPPORTED_SCHEMA_VERSION) { return `Database was created with a newer version (schema v${version}). Update the extension.`; }