From 9fc1f8fb3eb31090f35f8236d336dfb8fc494675 Mon Sep 17 00:00:00 2001
From: Christopher Skene <chris.skene@gmail.com>
Date: Tue, 7 Apr 2026 11:13:18 +1000
Subject: [PATCH 1/4] fix: add sqlite edge compound indexes

---
 code_review_graph/graph.py      | 164 +++++++++++++++++---------------
 code_review_graph/migrations.py |  59 +++++++-----
 tests/test_migrations.py        |  21 ++--
 3 files changed, 130 insertions(+), 114 deletions(-)

diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py
index 2dfa97f..047f2e4 100644
--- a/code_review_graph/graph.py
+++ b/code_review_graph/graph.py
@@ -70,6 +70,8 @@
 CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_qualified);
 CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_qualified);
 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
+CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind);
+CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind);
 CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path);
 """
 
@@ -125,9 +127,7 @@ class GraphStore:
     def __init__(self, db_path: str | Path) -> None:
         self.db_path = Path(db_path)
         self.db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._conn = sqlite3.connect(
-            str(self.db_path), timeout=30, check_same_thread=False
-        )
+        self._conn = sqlite3.connect(str(self.db_path), timeout=30, check_same_thread=False)
         self._conn.row_factory = sqlite3.Row
         self._conn.execute("PRAGMA journal_mode=WAL")
         self._conn.execute("PRAGMA busy_timeout=5000")
@@ -136,8 +136,7 @@ def __init__(self, db_path: str | Path) -> None:
         if get_schema_version(self._conn) < 1:
             # Fresh DB — metadata table just created by _init_schema
             self._conn.execute(
-                "INSERT OR IGNORE INTO metadata (key, value) "
-                "VALUES ('schema_version', '1')"
+                "INSERT OR IGNORE INTO metadata (key, value) VALUES ('schema_version', '1')"
             )
             self._conn.commit()
         run_migrations(self._conn)
@@ -186,11 +185,21 @@ def upsert_node(self, node: NodeInfo, file_hash: str = "") -> int:
                  extra=excluded.extra, updated_at=excluded.updated_at
             """,
             (
-                node.kind, node.name, qualified, node.file_path,
-                node.line_start, node.line_end, node.language,
-                node.parent_name, node.params, node.return_type,
-                node.modifiers, int(node.is_test), file_hash,
-                extra, now,
+                node.kind,
+                node.name,
+                qualified,
+                node.file_path,
+                node.line_start,
+                node.line_end,
+                node.language,
+                node.parent_name,
+                node.params,
+                node.return_type,
+                node.modifiers,
+                int(node.is_test),
+                file_hash,
+                extra,
+                now,
             ),
         )
         row = self._conn.execute(
@@ -324,9 +333,7 @@ def search_nodes(self, query: str, limit: int = 20) -> list[GraphNode]:
         conditions: list[str] = []
         params: list[str | int] = []
         for word in words:
-            conditions.append(
-                "(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)"
-            )
+            conditions.append("(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)")
             params.extend([f"%{word}%", f"%{word}%"])
 
         where = " AND ".join(conditions)
@@ -357,10 +364,14 @@ def get_impact_radius(
         """
         if BFS_ENGINE == "networkx":
             return self._get_impact_radius_networkx(
-                changed_files, max_depth=max_depth, max_nodes=max_nodes,
+                changed_files,
+                max_depth=max_depth,
+                max_nodes=max_nodes,
             )
         return self.get_impact_radius_sql(
-            changed_files, max_depth=max_depth, max_nodes=max_nodes,
+            changed_files,
+            max_depth=max_depth,
+            max_nodes=max_nodes,
         )
 
     # -- SQLite recursive CTE version (default) ---------------------------
@@ -405,15 +416,12 @@ def get_impact_radius_sql(
 
         # Build recursive CTE — use a temp table for the seed set to
         # keep the query plan efficient and stay under variable limits.
-        self._conn.execute(
-            "CREATE TEMP TABLE IF NOT EXISTS _impact_seeds "
-            "(qn TEXT PRIMARY KEY)"
-        )
+        self._conn.execute("CREATE TEMP TABLE IF NOT EXISTS _impact_seeds (qn TEXT PRIMARY KEY)")
         self._conn.execute("DELETE FROM _impact_seeds")
         batch_size = 450
         seed_list = list(seeds)
         for i in range(0, len(seed_list), batch_size):
-            batch = seed_list[i:i + batch_size]
+            batch = seed_list[i : i + batch_size]
             placeholders = ",".join("(?)" for _ in batch)
             self._conn.execute(  # nosec B608
                 f"INSERT OR IGNORE INTO _impact_seeds (qn) VALUES {placeholders}",
@@ -440,7 +448,8 @@ def get_impact_radius_sql(
         LIMIT ?
         """
         rows = self._conn.execute(
-            cte_sql, (max_depth, max_depth, max_nodes + len(seeds)),
+            cte_sql,
+            (max_depth, max_depth, max_nodes + len(seeds)),
         ).fetchall()
 
         # Split into seeds vs impacted
@@ -573,7 +582,8 @@ def get_stats(self) -> GraphStats:
             edges_by_kind[row["kind"]] = row["cnt"]
 
         languages = [
-            r["language"] for r in self._conn.execute(
+            r["language"]
+            for r in self._conn.execute(
                 "SELECT DISTINCT language FROM nodes WHERE language IS NOT NULL AND language != ''"
             )
         ]
@@ -644,9 +654,7 @@ def get_nodes_by_size(
 
     def get_node_by_id(self, node_id: int) -> Optional[GraphNode]:
         """Fetch a single node by its integer primary key."""
-        row = self._conn.execute(
-            "SELECT * FROM nodes WHERE id = ?", (node_id,)
-        ).fetchone()
+        row = self._conn.execute("SELECT * FROM nodes WHERE id = ?", (node_id,)).fetchone()
         return self._row_to_node(row) if row else None
 
     def get_nodes_by_kind(
@@ -672,15 +680,15 @@ def get_nodes_by_kind(
             params.append(f"%{file_pattern}%")
         where = " AND ".join(conditions)
         rows = self._conn.execute(  # nosec B608
-            f"SELECT * FROM nodes WHERE {where}", params,
+            f"SELECT * FROM nodes WHERE {where}",
+            params,
         ).fetchall()
         return [self._row_to_node(r) for r in rows]
 
     def count_flow_memberships(self, node_id: int) -> int:
         """Return the number of flows a node participates in."""
         row = self._conn.execute(
-            "SELECT COUNT(*) as cnt FROM flow_memberships "
-            "WHERE node_id = ?",
+            "SELECT COUNT(*) as cnt FROM flow_memberships WHERE node_id = ?",
             (node_id,),
         ).fetchone()
         return row["cnt"] if row else 0
@@ -696,7 +704,8 @@ def get_node_community_id(self, node_id: int) -> int | None:
         return None
 
     def get_community_ids_by_qualified_names(
-        self, qns: list[str],
+        self,
+        qns: list[str],
     ) -> dict[str, int | None]:
         """Batch-fetch ``community_id`` for a list of qualified names.
 
@@ -706,7 +715,7 @@ def get_community_ids_by_qualified_names(
         result: dict[str, int | None] = {}
         batch_size = 450
         for i in range(0, len(qns), batch_size):
-            batch = qns[i:i + batch_size]
+            batch = qns[i : i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
                 "SELECT qualified_name, community_id FROM nodes "
@@ -720,8 +729,7 @@ def get_community_ids_by_qualified_names(
     def get_files_matching(self, pattern: str) -> list[str]:
         """Return distinct ``file_path`` values matching a LIKE suffix."""
         rows = self._conn.execute(
-            "SELECT DISTINCT file_path FROM nodes "
-            "WHERE file_path LIKE ?",
+            "SELECT DISTINCT file_path FROM nodes WHERE file_path LIKE ?",
             (f"%{pattern}",),
         ).fetchall()
         return [r["file_path"] for r in rows]
@@ -729,12 +737,13 @@ def get_files_matching(self, pattern: str) -> list[str]:
     def get_nodes_without_signature(self) -> list[sqlite3.Row]:
         """Return raw rows for nodes that have no signature yet."""
         return self._conn.execute(
-            "SELECT id, name, kind, params, return_type "
-            "FROM nodes WHERE signature IS NULL"
+            "SELECT id, name, kind, params, return_type FROM nodes WHERE signature IS NULL"
         ).fetchall()
 
     def update_node_signature(
-        self, node_id: int, signature: str,
+        self,
+        node_id: int,
+        signature: str,
     ) -> None:
         """Set the ``signature`` column for a single node."""
         self._conn.execute(
@@ -748,18 +757,14 @@ def get_all_community_ids(self) -> dict[str, int | None]:
         Used primarily by the visualization exporter.
         """
         try:
-            rows = self._conn.execute(
-                "SELECT qualified_name, community_id FROM nodes"
-            ).fetchall()
-            return {
-                r["qualified_name"]: r["community_id"]
-                for r in rows
-            }
+            rows = self._conn.execute("SELECT qualified_name, community_id FROM nodes").fetchall()
+            return {r["qualified_name"]: r["community_id"] for r in rows}
         except Exception:
             return {}
 
     def get_node_ids_by_files(
-        self, file_paths: list[str],
+        self,
+        file_paths: list[str],
     ) -> set[int]:
         """Return node IDs belonging to the given file paths."""
         if not file_paths:
@@ -767,18 +772,18 @@ def get_node_ids_by_files(
         result: set[int] = set()
         batch_size = 450
         for i in range(0, len(file_paths), batch_size):
-            batch = file_paths[i:i + batch_size]
+            batch = file_paths[i : i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
-                "SELECT id FROM nodes "
-                f"WHERE file_path IN ({placeholders})",
+                f"SELECT id FROM nodes WHERE file_path IN ({placeholders})",
                 batch,
             ).fetchall()
             result.update(r["id"] for r in rows)
         return result
 
     def get_flow_ids_by_node_ids(
-        self, node_ids: set[int],
+        self,
+        node_ids: set[int],
     ) -> list[int]:
         """Return distinct flow IDs that contain any of *node_ids*."""
         if not node_ids:
@@ -787,11 +792,10 @@ def get_flow_ids_by_node_ids(
         result: list[int] = []
         batch_size = 450
         for i in range(0, len(nids), batch_size):
-            batch = nids[i:i + batch_size]
+            batch = nids[i : i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
-                "SELECT DISTINCT flow_id FROM flow_memberships "
-                f"WHERE node_id IN ({placeholders})",
+                f"SELECT DISTINCT flow_id FROM flow_memberships WHERE node_id IN ({placeholders})",
                 batch,
             ).fetchall()
             result.extend(r["flow_id"] for r in rows)
@@ -810,15 +814,15 @@ def get_flow_qualified_names(self, flow_id: int) -> set[str]:
     def get_node_kind_by_id(self, node_id: int) -> str | None:
         """Return just the ``kind`` column for a node, or ``None``."""
         row = self._conn.execute(
-            "SELECT kind FROM nodes WHERE id = ?", (node_id,),
+            "SELECT kind FROM nodes WHERE id = ?",
+            (node_id,),
         ).fetchone()
         return row["kind"] if row else None
 
     def get_all_call_targets(self) -> set[str]:
         """Return the set of all CALLS-edge target qualified names."""
         rows = self._conn.execute(
-            "SELECT DISTINCT target_qualified FROM edges "
-            "WHERE kind = 'CALLS'"
+            "SELECT DISTINCT target_qualified FROM edges WHERE kind = 'CALLS'"
         ).fetchall()
         return {r["target_qualified"] for r in rows}
 
@@ -827,25 +831,24 @@ def get_communities_list(
     ) -> list[sqlite3.Row]:
         """Return raw rows from the ``communities`` table."""
         try:
-            return self._conn.execute(
-                "SELECT id, name FROM communities"
-            ).fetchall()
+            return self._conn.execute("SELECT id, name FROM communities").fetchall()
         except Exception:
             return []
 
     def get_community_member_qns(
-        self, community_id: int,
+        self,
+        community_id: int,
     ) -> list[str]:
         """Return qualified names of nodes in a community."""
         rows = self._conn.execute(
-            "SELECT qualified_name FROM nodes "
-            "WHERE community_id = ?",
+            "SELECT qualified_name FROM nodes WHERE community_id = ?",
             (community_id,),
         ).fetchall()
         return [r["qualified_name"] for r in rows]
 
     def get_nodes_by_community_id(
-        self, community_id: int,
+        self,
+        community_id: int,
     ) -> list[GraphNode]:
         """Return all nodes belonging to a community."""
         rows = self._conn.execute(
@@ -855,34 +858,34 @@ def get_nodes_by_community_id(
         return [self._row_to_node(r) for r in rows]
 
     def get_outgoing_targets(
-        self, source_qns: list[str],
+        self,
+        source_qns: list[str],
     ) -> list[str]:
         """Return ``target_qualified`` for edges sourced from *source_qns*."""
         results: list[str] = []
         batch_size = 450
         for i in range(0, len(source_qns), batch_size):
-            batch = source_qns[i:i + batch_size]
+            batch = source_qns[i : i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
-                "SELECT target_qualified FROM edges "
-                f"WHERE source_qualified IN ({placeholders})",
+                f"SELECT target_qualified FROM edges WHERE source_qualified IN ({placeholders})",
                 batch,
             ).fetchall()
             results.extend(r["target_qualified"] for r in rows)
         return results
 
     def get_incoming_sources(
-        self, target_qns: list[str],
+        self,
+        target_qns: list[str],
     ) -> list[str]:
         """Return ``source_qualified`` for edges targeting *target_qns*."""
         results: list[str] = []
         batch_size = 450
         for i in range(0, len(target_qns), batch_size):
-            batch = target_qns[i:i + batch_size]
+            batch = target_qns[i : i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
-                "SELECT source_qualified FROM edges "
-                f"WHERE target_qualified IN ({placeholders})",
+                f"SELECT source_qualified FROM edges WHERE target_qualified IN ({placeholders})",
                 batch,
             ).fetchall()
             results.extend(r["source_qualified"] for r in rows)
@@ -907,7 +910,7 @@ def get_edges_among(self, qualified_names: set[str]) -> list[GraphEdge]:
         results: list[GraphEdge] = []
         batch_size = 450  # Stay well under SQLite's default 999 limit
         for i in range(0, len(qns), batch_size):
-            batch = qns[i:i + batch_size]
+            batch = qns[i : i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
                 f"SELECT * FROM edges WHERE source_qualified IN ({placeholders})",
@@ -927,7 +930,7 @@ def _batch_get_nodes(self, qualified_names: set[str]) -> list[GraphNode]:
         results: list[GraphNode] = []
         batch_size = 450
         for i in range(0, len(qns), batch_size):
-            batch = qns[i:i + batch_size]
+            batch = qns[i : i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
                 f"SELECT * FROM nodes WHERE qualified_name IN ({placeholders})",
@@ -997,18 +1000,19 @@ def _sanitize_name(s: str, max_len: int = 256) -> str:
     agent behaviour.
     """
     # Strip control chars 0x00-0x1F except \t (0x09) and \n (0x0A)
-    cleaned = "".join(
-        ch for ch in s
-        if ch in ("\t", "\n") or ord(ch) >= 0x20
-    )
+    cleaned = "".join(ch for ch in s if ch in ("\t", "\n") or ord(ch) >= 0x20)
     return cleaned[:max_len]
 
 
 def node_to_dict(n: GraphNode) -> dict:
     return {
-        "id": n.id, "kind": n.kind, "name": _sanitize_name(n.name),
-        "qualified_name": _sanitize_name(n.qualified_name), "file_path": n.file_path,
-        "line_start": n.line_start, "line_end": n.line_end,
+        "id": n.id,
+        "kind": n.kind,
+        "name": _sanitize_name(n.name),
+        "qualified_name": _sanitize_name(n.qualified_name),
+        "file_path": n.file_path,
+        "line_start": n.line_start,
+        "line_end": n.line_end,
         "language": n.language,
         "parent_name": _sanitize_name(n.parent_name) if n.parent_name else n.parent_name,
         "is_test": n.is_test,
@@ -1017,8 +1021,10 @@ def node_to_dict(n: GraphNode) -> dict:
 
 def edge_to_dict(e: GraphEdge) -> dict:
     return {
-        "id": e.id, "kind": e.kind,
+        "id": e.id,
+        "kind": e.kind,
         "source": _sanitize_name(e.source_qualified),
         "target": _sanitize_name(e.target_qualified),
-        "file_path": e.file_path, "line": e.line,
+        "file_path": e.file_path,
+        "line": e.line,
     }
diff --git a/code_review_graph/migrations.py b/code_review_graph/migrations.py
index ddb446e..1734c69 100644
--- a/code_review_graph/migrations.py
+++ b/code_review_graph/migrations.py
@@ -20,9 +20,7 @@ def get_schema_version(conn: sqlite3.Connection) -> int:
         int: The schema version (0 if metadata table doesn't exist, 1 if not set).
     """
     try:
-        row = conn.execute(
-            "SELECT value FROM metadata WHERE key = 'schema_version'"
-        ).fetchone()
+        row = conn.execute("SELECT value FROM metadata WHERE key = 'schema_version'").fetchone()
         if row is None:
             return 1
         return int(row[0] if isinstance(row, (tuple, list)) else row["value"])
@@ -39,10 +37,20 @@ def _set_schema_version(conn: sqlite3.Connection, version: int) -> None:
     )
 
 
-_KNOWN_TABLES = frozenset({
-    "nodes", "edges", "metadata", "communities", "flows", "flow_memberships", "nodes_fts",
-    "community_summaries", "flow_snapshots", "risk_index",
-})
+_KNOWN_TABLES = frozenset(
+    {
+        "nodes",
+        "edges",
+        "metadata",
+        "communities",
+        "flows",
+        "flow_memberships",
+        "nodes_fts",
+        "community_summaries",
+        "flow_snapshots",
+        "risk_index",
+    }
+)
 
 
 def _has_column(conn: sqlite3.Connection, table: str, column: str) -> bool:
@@ -59,8 +67,7 @@ def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
     if table not in _KNOWN_TABLES:
         raise ValueError(f"Unknown table: {table}")
     row = conn.execute(
-        "SELECT count(*) FROM sqlite_master WHERE type IN ('table', 'view') "
-        "AND name = ?",
+        "SELECT count(*) FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?",
         (table,),
     ).fetchone()
     return row[0] > 0
@@ -102,12 +109,8 @@ def _migrate_v3(conn: sqlite3.Connection) -> None:
             PRIMARY KEY (flow_id, node_id)
         )
     """)
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_flows_criticality ON flows(criticality DESC)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_flows_entry ON flows(entry_point_id)"
-    )
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_flows_criticality ON flows(criticality DESC)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_flows_entry ON flows(entry_point_id)")
     conn.execute(
         "CREATE INDEX IF NOT EXISTS idx_flow_memberships_node ON flow_memberships(node_id)"
     )
@@ -132,12 +135,8 @@ def _migrate_v4(conn: sqlite3.Connection) -> None:
     if not _has_column(conn, "nodes", "community_id"):
         conn.execute("ALTER TABLE nodes ADD COLUMN community_id INTEGER")
         logger.info("Migration v4: added 'community_id' column to nodes")
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community_id)"
-    )
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_communities_parent ON communities(parent_id)"
-    )
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community_id)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_communities_parent ON communities(parent_id)")
     conn.execute(
         "CREATE INDEX IF NOT EXISTS idx_communities_cohesion ON communities(cohesion DESC)"
     )
@@ -195,12 +194,21 @@ def _migrate_v6(conn: sqlite3.Connection) -> None:
             FOREIGN KEY (node_id) REFERENCES nodes(id)
         )
     """)
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_risk_index_score ON risk_index(risk_score DESC)")
+    logger.info(
+        "Migration v6: created summary tables (community_summaries, flow_snapshots, risk_index)"
+    )
+
+
+def _migrate_v7(conn: sqlite3.Connection) -> None:
+    """v7: Add compound edge indexes for summary and risk queries."""
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind)"
+    )
     conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_risk_index_score "
-        "ON risk_index(risk_score DESC)"
+        "CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind)"
     )
-    logger.info("Migration v6: created summary tables "
-                "(community_summaries, flow_snapshots, risk_index)")
+    logger.info("Migration v7: added compound edge indexes")
 
 
 # ---------------------------------------------------------------------------
@@ -213,6 +221,7 @@ def _migrate_v6(conn: sqlite3.Connection) -> None:
     4: _migrate_v4,
     5: _migrate_v5,
     6: _migrate_v6,
+    7: _migrate_v7,
 }
 
 LATEST_VERSION = max(MIGRATIONS.keys())
diff --git a/tests/test_migrations.py b/tests/test_migrations.py
index 3802aae..e903ded 100644
--- a/tests/test_migrations.py
+++ b/tests/test_migrations.py
@@ -34,9 +34,7 @@ def test_v1_db_migrates_to_latest(self):
 
         # Manually create a v1 database (base schema only, version=1)
         conn = sqlite3.connect(str(self.tmp.name))
-        conn.execute(
-            "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '1')"
-        )
+        conn.execute("INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '1')")
         conn.commit()
         # Drop migration artifacts to simulate v1
         conn.execute("DROP TABLE IF EXISTS flows")
@@ -98,9 +96,7 @@ def test_get_schema_version_no_metadata_table(self):
     def test_get_schema_version_no_key(self):
         """get_schema_version returns 1 when metadata exists but key is missing."""
         conn = sqlite3.connect(":memory:")
-        conn.execute(
-            "CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)"
-        )
+        conn.execute("CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)")
         conn.commit()
         assert get_schema_version(conn) == 1
         conn.close()
@@ -117,7 +113,6 @@ def test_run_migrations_on_already_current_db(self):
         version_after = get_schema_version(self.store._conn)
         assert version_before == version_after == LATEST_VERSION
 
-
     def test_v6_summary_tables_exist(self):
         """v6 summary tables should exist after migration."""
         tables = _get_table_names(self.store._conn)
@@ -134,10 +129,16 @@ def test_v6_migration_idempotent(self):
         tables = _get_table_names(self.store._conn)
         assert "community_summaries" in tables
 
+    def test_v7_compound_edge_indexes_exist(self):
+        """v7 compound edge indexes should exist after migration."""
+        rows = self.store._conn.execute("PRAGMA index_list(edges)").fetchall()
+        indexes = {row[1] if isinstance(row, tuple) else row["name"] for row in rows}
+
+        assert "idx_edges_target_kind" in indexes
+        assert "idx_edges_source_kind" in indexes
+
 
 def _get_table_names(conn: sqlite3.Connection) -> set[str]:
     """Helper: return all table/view names in the database."""
-    rows = conn.execute(
-        "SELECT name FROM sqlite_master WHERE type IN ('table', 'view')"
-    ).fetchall()
+    rows = conn.execute("SELECT name FROM sqlite_master WHERE type IN ('table', 'view')").fetchall()
     return {row[0] if isinstance(row, (tuple, list)) else row["name"] for row in rows}

From 2fb80b9b3757c3253d5173d98b64d4fcd8064755 Mon Sep 17 00:00:00 2001
From: Christopher Skene <chris.skene@gmail.com>
Date: Tue, 7 Apr 2026 11:27:29 +1000
Subject: [PATCH 2/4] Revert "fix: add sqlite edge compound indexes"

This reverts commit 9fc1f8fb3eb31090f35f8236d336dfb8fc494675.
---
 code_review_graph/graph.py      | 164 +++++++++++++++-----------------
 code_review_graph/migrations.py |  59 +++++-------
 tests/test_migrations.py        |  21 ++--
 3 files changed, 114 insertions(+), 130 deletions(-)

diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py
index 047f2e4..2dfa97f 100644
--- a/code_review_graph/graph.py
+++ b/code_review_graph/graph.py
@@ -70,8 +70,6 @@
 CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_qualified);
 CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_qualified);
 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
-CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind);
-CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind);
 CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path);
 """
 
@@ -127,7 +125,9 @@ class GraphStore:
     def __init__(self, db_path: str | Path) -> None:
         self.db_path = Path(db_path)
         self.db_path.parent.mkdir(parents=True, exist_ok=True)
-        self._conn = sqlite3.connect(str(self.db_path), timeout=30, check_same_thread=False)
+        self._conn = sqlite3.connect(
+            str(self.db_path), timeout=30, check_same_thread=False
+        )
         self._conn.row_factory = sqlite3.Row
         self._conn.execute("PRAGMA journal_mode=WAL")
         self._conn.execute("PRAGMA busy_timeout=5000")
@@ -136,7 +136,8 @@ def __init__(self, db_path: str | Path) -> None:
         if get_schema_version(self._conn) < 1:
             # Fresh DB — metadata table just created by _init_schema
             self._conn.execute(
-                "INSERT OR IGNORE INTO metadata (key, value) VALUES ('schema_version', '1')"
+                "INSERT OR IGNORE INTO metadata (key, value) "
+                "VALUES ('schema_version', '1')"
             )
             self._conn.commit()
         run_migrations(self._conn)
@@ -185,21 +186,11 @@ def upsert_node(self, node: NodeInfo, file_hash: str = "") -> int:
                  extra=excluded.extra, updated_at=excluded.updated_at
             """,
             (
-                node.kind,
-                node.name,
-                qualified,
-                node.file_path,
-                node.line_start,
-                node.line_end,
-                node.language,
-                node.parent_name,
-                node.params,
-                node.return_type,
-                node.modifiers,
-                int(node.is_test),
-                file_hash,
-                extra,
-                now,
+                node.kind, node.name, qualified, node.file_path,
+                node.line_start, node.line_end, node.language,
+                node.parent_name, node.params, node.return_type,
+                node.modifiers, int(node.is_test), file_hash,
+                extra, now,
             ),
         )
         row = self._conn.execute(
@@ -333,7 +324,9 @@ def search_nodes(self, query: str, limit: int = 20) -> list[GraphNode]:
         conditions: list[str] = []
         params: list[str | int] = []
         for word in words:
-            conditions.append("(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)")
+            conditions.append(
+                "(LOWER(name) LIKE ? OR LOWER(qualified_name) LIKE ?)"
+            )
             params.extend([f"%{word}%", f"%{word}%"])
 
         where = " AND ".join(conditions)
@@ -364,14 +357,10 @@ def get_impact_radius(
         """
         if BFS_ENGINE == "networkx":
             return self._get_impact_radius_networkx(
-                changed_files,
-                max_depth=max_depth,
-                max_nodes=max_nodes,
+                changed_files, max_depth=max_depth, max_nodes=max_nodes,
             )
         return self.get_impact_radius_sql(
-            changed_files,
-            max_depth=max_depth,
-            max_nodes=max_nodes,
+            changed_files, max_depth=max_depth, max_nodes=max_nodes,
         )
 
     # -- SQLite recursive CTE version (default) ---------------------------
@@ -416,12 +405,15 @@ def get_impact_radius_sql(
 
         # Build recursive CTE — use a temp table for the seed set to
         # keep the query plan efficient and stay under variable limits.
-        self._conn.execute("CREATE TEMP TABLE IF NOT EXISTS _impact_seeds (qn TEXT PRIMARY KEY)")
+        self._conn.execute(
+            "CREATE TEMP TABLE IF NOT EXISTS _impact_seeds "
+            "(qn TEXT PRIMARY KEY)"
+        )
         self._conn.execute("DELETE FROM _impact_seeds")
         batch_size = 450
         seed_list = list(seeds)
         for i in range(0, len(seed_list), batch_size):
-            batch = seed_list[i : i + batch_size]
+            batch = seed_list[i:i + batch_size]
             placeholders = ",".join("(?)" for _ in batch)
             self._conn.execute(  # nosec B608
                 f"INSERT OR IGNORE INTO _impact_seeds (qn) VALUES {placeholders}",
@@ -448,8 +440,7 @@ def get_impact_radius_sql(
         LIMIT ?
         """
         rows = self._conn.execute(
-            cte_sql,
-            (max_depth, max_depth, max_nodes + len(seeds)),
+            cte_sql, (max_depth, max_depth, max_nodes + len(seeds)),
         ).fetchall()
 
         # Split into seeds vs impacted
@@ -582,8 +573,7 @@ def get_stats(self) -> GraphStats:
             edges_by_kind[row["kind"]] = row["cnt"]
 
         languages = [
-            r["language"]
-            for r in self._conn.execute(
+            r["language"] for r in self._conn.execute(
                 "SELECT DISTINCT language FROM nodes WHERE language IS NOT NULL AND language != ''"
             )
         ]
@@ -654,7 +644,9 @@ def get_nodes_by_size(
 
     def get_node_by_id(self, node_id: int) -> Optional[GraphNode]:
         """Fetch a single node by its integer primary key."""
-        row = self._conn.execute("SELECT * FROM nodes WHERE id = ?", (node_id,)).fetchone()
+        row = self._conn.execute(
+            "SELECT * FROM nodes WHERE id = ?", (node_id,)
+        ).fetchone()
         return self._row_to_node(row) if row else None
 
     def get_nodes_by_kind(
@@ -680,15 +672,15 @@ def get_nodes_by_kind(
             params.append(f"%{file_pattern}%")
         where = " AND ".join(conditions)
         rows = self._conn.execute(  # nosec B608
-            f"SELECT * FROM nodes WHERE {where}",
-            params,
+            f"SELECT * FROM nodes WHERE {where}", params,
         ).fetchall()
         return [self._row_to_node(r) for r in rows]
 
     def count_flow_memberships(self, node_id: int) -> int:
         """Return the number of flows a node participates in."""
         row = self._conn.execute(
-            "SELECT COUNT(*) as cnt FROM flow_memberships WHERE node_id = ?",
+            "SELECT COUNT(*) as cnt FROM flow_memberships "
+            "WHERE node_id = ?",
             (node_id,),
         ).fetchone()
         return row["cnt"] if row else 0
@@ -704,8 +696,7 @@ def get_node_community_id(self, node_id: int) -> int | None:
         return None
 
     def get_community_ids_by_qualified_names(
-        self,
-        qns: list[str],
+        self, qns: list[str],
     ) -> dict[str, int | None]:
         """Batch-fetch ``community_id`` for a list of qualified names.
 
@@ -715,7 +706,7 @@ def get_community_ids_by_qualified_names(
         result: dict[str, int | None] = {}
         batch_size = 450
         for i in range(0, len(qns), batch_size):
-            batch = qns[i : i + batch_size]
+            batch = qns[i:i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
                 "SELECT qualified_name, community_id FROM nodes "
@@ -729,7 +720,8 @@ def get_community_ids_by_qualified_names(
     def get_files_matching(self, pattern: str) -> list[str]:
         """Return distinct ``file_path`` values matching a LIKE suffix."""
         rows = self._conn.execute(
-            "SELECT DISTINCT file_path FROM nodes WHERE file_path LIKE ?",
+            "SELECT DISTINCT file_path FROM nodes "
+            "WHERE file_path LIKE ?",
             (f"%{pattern}",),
         ).fetchall()
         return [r["file_path"] for r in rows]
@@ -737,13 +729,12 @@ def get_files_matching(self, pattern: str) -> list[str]:
     def get_nodes_without_signature(self) -> list[sqlite3.Row]:
         """Return raw rows for nodes that have no signature yet."""
         return self._conn.execute(
-            "SELECT id, name, kind, params, return_type FROM nodes WHERE signature IS NULL"
+            "SELECT id, name, kind, params, return_type "
+            "FROM nodes WHERE signature IS NULL"
         ).fetchall()
 
     def update_node_signature(
-        self,
-        node_id: int,
-        signature: str,
+        self, node_id: int, signature: str,
     ) -> None:
         """Set the ``signature`` column for a single node."""
         self._conn.execute(
@@ -757,14 +748,18 @@ def get_all_community_ids(self) -> dict[str, int | None]:
         Used primarily by the visualization exporter.
         """
         try:
-            rows = self._conn.execute("SELECT qualified_name, community_id FROM nodes").fetchall()
-            return {r["qualified_name"]: r["community_id"] for r in rows}
+            rows = self._conn.execute(
+                "SELECT qualified_name, community_id FROM nodes"
+            ).fetchall()
+            return {
+                r["qualified_name"]: r["community_id"]
+                for r in rows
+            }
         except Exception:
             return {}
 
     def get_node_ids_by_files(
-        self,
-        file_paths: list[str],
+        self, file_paths: list[str],
     ) -> set[int]:
         """Return node IDs belonging to the given file paths."""
         if not file_paths:
@@ -772,18 +767,18 @@ def get_node_ids_by_files(
         result: set[int] = set()
         batch_size = 450
         for i in range(0, len(file_paths), batch_size):
-            batch = file_paths[i : i + batch_size]
+            batch = file_paths[i:i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
-                f"SELECT id FROM nodes WHERE file_path IN ({placeholders})",
+                "SELECT id FROM nodes "
+                f"WHERE file_path IN ({placeholders})",
                 batch,
             ).fetchall()
             result.update(r["id"] for r in rows)
         return result
 
     def get_flow_ids_by_node_ids(
-        self,
-        node_ids: set[int],
+        self, node_ids: set[int],
     ) -> list[int]:
         """Return distinct flow IDs that contain any of *node_ids*."""
         if not node_ids:
@@ -792,10 +787,11 @@ def get_flow_ids_by_node_ids(
         result: list[int] = []
         batch_size = 450
         for i in range(0, len(nids), batch_size):
-            batch = nids[i : i + batch_size]
+            batch = nids[i:i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
-                f"SELECT DISTINCT flow_id FROM flow_memberships WHERE node_id IN ({placeholders})",
+                "SELECT DISTINCT flow_id FROM flow_memberships "
+                f"WHERE node_id IN ({placeholders})",
                 batch,
             ).fetchall()
             result.extend(r["flow_id"] for r in rows)
@@ -814,15 +810,15 @@ def get_flow_qualified_names(self, flow_id: int) -> set[str]:
     def get_node_kind_by_id(self, node_id: int) -> str | None:
         """Return just the ``kind`` column for a node, or ``None``."""
         row = self._conn.execute(
-            "SELECT kind FROM nodes WHERE id = ?",
-            (node_id,),
+            "SELECT kind FROM nodes WHERE id = ?", (node_id,),
         ).fetchone()
         return row["kind"] if row else None
 
     def get_all_call_targets(self) -> set[str]:
         """Return the set of all CALLS-edge target qualified names."""
         rows = self._conn.execute(
-            "SELECT DISTINCT target_qualified FROM edges WHERE kind = 'CALLS'"
+            "SELECT DISTINCT target_qualified FROM edges "
+            "WHERE kind = 'CALLS'"
         ).fetchall()
         return {r["target_qualified"] for r in rows}
 
@@ -831,24 +827,25 @@ def get_communities_list(
     ) -> list[sqlite3.Row]:
         """Return raw rows from the ``communities`` table."""
         try:
-            return self._conn.execute("SELECT id, name FROM communities").fetchall()
+            return self._conn.execute(
+                "SELECT id, name FROM communities"
+            ).fetchall()
         except Exception:
             return []
 
     def get_community_member_qns(
-        self,
-        community_id: int,
+        self, community_id: int,
     ) -> list[str]:
         """Return qualified names of nodes in a community."""
         rows = self._conn.execute(
-            "SELECT qualified_name FROM nodes WHERE community_id = ?",
+            "SELECT qualified_name FROM nodes "
+            "WHERE community_id = ?",
             (community_id,),
         ).fetchall()
         return [r["qualified_name"] for r in rows]
 
     def get_nodes_by_community_id(
-        self,
-        community_id: int,
+        self, community_id: int,
     ) -> list[GraphNode]:
         """Return all nodes belonging to a community."""
         rows = self._conn.execute(
@@ -858,34 +855,34 @@ def get_nodes_by_community_id(
         return [self._row_to_node(r) for r in rows]
 
     def get_outgoing_targets(
-        self,
-        source_qns: list[str],
+        self, source_qns: list[str],
     ) -> list[str]:
         """Return ``target_qualified`` for edges sourced from *source_qns*."""
         results: list[str] = []
         batch_size = 450
         for i in range(0, len(source_qns), batch_size):
-            batch = source_qns[i : i + batch_size]
+            batch = source_qns[i:i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
-                f"SELECT target_qualified FROM edges WHERE source_qualified IN ({placeholders})",
+                "SELECT target_qualified FROM edges "
+                f"WHERE source_qualified IN ({placeholders})",
                 batch,
             ).fetchall()
             results.extend(r["target_qualified"] for r in rows)
         return results
 
     def get_incoming_sources(
-        self,
-        target_qns: list[str],
+        self, target_qns: list[str],
     ) -> list[str]:
         """Return ``source_qualified`` for edges targeting *target_qns*."""
         results: list[str] = []
         batch_size = 450
         for i in range(0, len(target_qns), batch_size):
-            batch = target_qns[i : i + batch_size]
+            batch = target_qns[i:i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
-                f"SELECT source_qualified FROM edges WHERE target_qualified IN ({placeholders})",
+                "SELECT source_qualified FROM edges "
+                f"WHERE target_qualified IN ({placeholders})",
                 batch,
             ).fetchall()
             results.extend(r["source_qualified"] for r in rows)
@@ -910,7 +907,7 @@ def get_edges_among(self, qualified_names: set[str]) -> list[GraphEdge]:
         results: list[GraphEdge] = []
         batch_size = 450  # Stay well under SQLite's default 999 limit
         for i in range(0, len(qns), batch_size):
-            batch = qns[i : i + batch_size]
+            batch = qns[i:i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
                 f"SELECT * FROM edges WHERE source_qualified IN ({placeholders})",
@@ -930,7 +927,7 @@ def _batch_get_nodes(self, qualified_names: set[str]) -> list[GraphNode]:
         results: list[GraphNode] = []
         batch_size = 450
         for i in range(0, len(qns), batch_size):
-            batch = qns[i : i + batch_size]
+            batch = qns[i:i + batch_size]
             placeholders = ",".join("?" for _ in batch)
             rows = self._conn.execute(  # nosec B608
                 f"SELECT * FROM nodes WHERE qualified_name IN ({placeholders})",
@@ -1000,19 +997,18 @@ def _sanitize_name(s: str, max_len: int = 256) -> str:
     agent behaviour.
     """
     # Strip control chars 0x00-0x1F except \t (0x09) and \n (0x0A)
-    cleaned = "".join(ch for ch in s if ch in ("\t", "\n") or ord(ch) >= 0x20)
+    cleaned = "".join(
+        ch for ch in s
+        if ch in ("\t", "\n") or ord(ch) >= 0x20
+    )
     return cleaned[:max_len]
 
 
 def node_to_dict(n: GraphNode) -> dict:
     return {
-        "id": n.id,
-        "kind": n.kind,
-        "name": _sanitize_name(n.name),
-        "qualified_name": _sanitize_name(n.qualified_name),
-        "file_path": n.file_path,
-        "line_start": n.line_start,
-        "line_end": n.line_end,
+        "id": n.id, "kind": n.kind, "name": _sanitize_name(n.name),
+        "qualified_name": _sanitize_name(n.qualified_name), "file_path": n.file_path,
+        "line_start": n.line_start, "line_end": n.line_end,
         "language": n.language,
         "parent_name": _sanitize_name(n.parent_name) if n.parent_name else n.parent_name,
         "is_test": n.is_test,
@@ -1021,10 +1017,8 @@ def node_to_dict(n: GraphNode) -> dict:
 
 def edge_to_dict(e: GraphEdge) -> dict:
     return {
-        "id": e.id,
-        "kind": e.kind,
+        "id": e.id, "kind": e.kind,
         "source": _sanitize_name(e.source_qualified),
         "target": _sanitize_name(e.target_qualified),
-        "file_path": e.file_path,
-        "line": e.line,
+        "file_path": e.file_path, "line": e.line,
     }
diff --git a/code_review_graph/migrations.py b/code_review_graph/migrations.py
index 1734c69..ddb446e 100644
--- a/code_review_graph/migrations.py
+++ b/code_review_graph/migrations.py
@@ -20,7 +20,9 @@ def get_schema_version(conn: sqlite3.Connection) -> int:
         int: The schema version (0 if metadata table doesn't exist, 1 if not set).
     """
     try:
-        row = conn.execute("SELECT value FROM metadata WHERE key = 'schema_version'").fetchone()
+        row = conn.execute(
+            "SELECT value FROM metadata WHERE key = 'schema_version'"
+        ).fetchone()
         if row is None:
             return 1
         return int(row[0] if isinstance(row, (tuple, list)) else row["value"])
@@ -37,20 +39,10 @@ def _set_schema_version(conn: sqlite3.Connection, version: int) -> None:
     )
 
 
-_KNOWN_TABLES = frozenset(
-    {
-        "nodes",
-        "edges",
-        "metadata",
-        "communities",
-        "flows",
-        "flow_memberships",
-        "nodes_fts",
-        "community_summaries",
-        "flow_snapshots",
-        "risk_index",
-    }
-)
+_KNOWN_TABLES = frozenset({
+    "nodes", "edges", "metadata", "communities", "flows", "flow_memberships", "nodes_fts",
+    "community_summaries", "flow_snapshots", "risk_index",
+})
 
 
 def _has_column(conn: sqlite3.Connection, table: str, column: str) -> bool:
@@ -67,7 +59,8 @@ def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
     if table not in _KNOWN_TABLES:
         raise ValueError(f"Unknown table: {table}")
     row = conn.execute(
-        "SELECT count(*) FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?",
+        "SELECT count(*) FROM sqlite_master WHERE type IN ('table', 'view') "
+        "AND name = ?",
         (table,),
     ).fetchone()
     return row[0] > 0
@@ -109,8 +102,12 @@ def _migrate_v3(conn: sqlite3.Connection) -> None:
             PRIMARY KEY (flow_id, node_id)
         )
     """)
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_flows_criticality ON flows(criticality DESC)")
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_flows_entry ON flows(entry_point_id)")
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_flows_criticality ON flows(criticality DESC)"
+    )
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_flows_entry ON flows(entry_point_id)"
+    )
     conn.execute(
         "CREATE INDEX IF NOT EXISTS idx_flow_memberships_node ON flow_memberships(node_id)"
     )
@@ -135,8 +132,12 @@ def _migrate_v4(conn: sqlite3.Connection) -> None:
     if not _has_column(conn, "nodes", "community_id"):
         conn.execute("ALTER TABLE nodes ADD COLUMN community_id INTEGER")
         logger.info("Migration v4: added 'community_id' column to nodes")
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community_id)")
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_communities_parent ON communities(parent_id)")
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_nodes_community ON nodes(community_id)"
+    )
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_communities_parent ON communities(parent_id)"
+    )
     conn.execute(
         "CREATE INDEX IF NOT EXISTS idx_communities_cohesion ON communities(cohesion DESC)"
     )
@@ -194,21 +195,12 @@ def _migrate_v6(conn: sqlite3.Connection) -> None:
             FOREIGN KEY (node_id) REFERENCES nodes(id)
         )
     """)
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_risk_index_score ON risk_index(risk_score DESC)")
-    logger.info(
-        "Migration v6: created summary tables (community_summaries, flow_snapshots, risk_index)"
-    )
-
-
-def _migrate_v7(conn: sqlite3.Connection) -> None:
-    """v7: Add compound edge indexes for summary and risk queries."""
-    conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind)"
-    )
     conn.execute(
-        "CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind)"
+        "CREATE INDEX IF NOT EXISTS idx_risk_index_score "
+        "ON risk_index(risk_score DESC)"
     )
-    logger.info("Migration v7: added compound edge indexes")
+    logger.info("Migration v6: created summary tables "
+                "(community_summaries, flow_snapshots, risk_index)")
 
 
 # ---------------------------------------------------------------------------
@@ -221,7 +213,6 @@ def _migrate_v7(conn: sqlite3.Connection) -> None:
     4: _migrate_v4,
     5: _migrate_v5,
     6: _migrate_v6,
-    7: _migrate_v7,
 }
 
 LATEST_VERSION = max(MIGRATIONS.keys())
diff --git a/tests/test_migrations.py b/tests/test_migrations.py
index e903ded..3802aae 100644
--- a/tests/test_migrations.py
+++ b/tests/test_migrations.py
@@ -34,7 +34,9 @@ def test_v1_db_migrates_to_latest(self):
 
         # Manually create a v1 database (base schema only, version=1)
         conn = sqlite3.connect(str(self.tmp.name))
-        conn.execute("INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '1')")
+        conn.execute(
+            "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', '1')"
+        )
         conn.commit()
         # Drop migration artifacts to simulate v1
         conn.execute("DROP TABLE IF EXISTS flows")
@@ -96,7 +98,9 @@ def test_get_schema_version_no_metadata_table(self):
     def test_get_schema_version_no_key(self):
         """get_schema_version returns 1 when metadata exists but key is missing."""
         conn = sqlite3.connect(":memory:")
-        conn.execute("CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)")
+        conn.execute(
+            "CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT NOT NULL)"
+        )
         conn.commit()
         assert get_schema_version(conn) == 1
         conn.close()
@@ -113,6 +117,7 @@ def test_run_migrations_on_already_current_db(self):
         version_after = get_schema_version(self.store._conn)
         assert version_before == version_after == LATEST_VERSION
 
+
     def test_v6_summary_tables_exist(self):
         """v6 summary tables should exist after migration."""
         tables = _get_table_names(self.store._conn)
@@ -129,16 +134,10 @@ def test_v6_migration_idempotent(self):
         tables = _get_table_names(self.store._conn)
         assert "community_summaries" in tables
 
-    def test_v7_compound_edge_indexes_exist(self):
-        """v7 compound edge indexes should exist after migration."""
-        rows = self.store._conn.execute("PRAGMA index_list(edges)").fetchall()
-        indexes = {row[1] if isinstance(row, tuple) else row["name"] for row in rows}
-
-        assert "idx_edges_target_kind" in indexes
-        assert "idx_edges_source_kind" in indexes
-
 
 def _get_table_names(conn: sqlite3.Connection) -> set[str]:
     """Helper: return all table/view names in the database."""
-    rows = conn.execute("SELECT name FROM sqlite_master WHERE type IN ('table', 'view')").fetchall()
+    rows = conn.execute(
+        "SELECT name FROM sqlite_master WHERE type IN ('table', 'view')"
+    ).fetchall()
     return {row[0] if isinstance(row, (tuple, list)) else row["name"] for row in rows}

From 5a31784a19573bb2542d9848fbbe995e9dda6c9c Mon Sep 17 00:00:00 2001
From: Christopher Skene <chris.skene@gmail.com>
Date: Tue, 7 Apr 2026 11:32:19 +1000
Subject: [PATCH 3/4] fix: add sqlite edge compound indexes

---
 code_review_graph/graph.py      |  2 ++
 code_review_graph/migrations.py | 14 ++++++++++++++
 tests/test_migrations.py        |  8 ++++++++
 3 files changed, 24 insertions(+)

diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py
index 2dfa97f..83841e9 100644
--- a/code_review_graph/graph.py
+++ b/code_review_graph/graph.py
@@ -70,6 +70,8 @@
 CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_qualified);
 CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_qualified);
 CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
+CREATE INDEX IF NOT EXISTS idx_edges_target_kind ON edges(target_qualified, kind);
+CREATE INDEX IF NOT EXISTS idx_edges_source_kind ON edges(source_qualified, kind);
 CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path);
 """
 
diff --git a/code_review_graph/migrations.py b/code_review_graph/migrations.py
index ddb446e..06d4d07 100644
--- a/code_review_graph/migrations.py
+++ b/code_review_graph/migrations.py
@@ -203,6 +203,19 @@ def _migrate_v6(conn: sqlite3.Connection) -> None:
                 "(community_summaries, flow_snapshots, risk_index)")
 
 
+def _migrate_v7(conn: sqlite3.Connection) -> None:
+    """v7: Add compound edge indexes for summary and risk queries."""
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_edges_target_kind "
+        "ON edges(target_qualified, kind)"
+    )
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_edges_source_kind "
+        "ON edges(source_qualified, kind)"
+    )
+    logger.info("Migration v7: added compound edge indexes")
+
+
 # ---------------------------------------------------------------------------
 # Migration registry
 # ---------------------------------------------------------------------------
@@ -213,6 +226,7 @@ def _migrate_v6(conn: sqlite3.Connection) -> None:
     4: _migrate_v4,
     5: _migrate_v5,
     6: _migrate_v6,
+    7: _migrate_v7,
 }
 
 LATEST_VERSION = max(MIGRATIONS.keys())
diff --git a/tests/test_migrations.py b/tests/test_migrations.py
index 3802aae..d208e60 100644
--- a/tests/test_migrations.py
+++ b/tests/test_migrations.py
@@ -134,6 +134,14 @@ def test_v6_migration_idempotent(self):
         tables = _get_table_names(self.store._conn)
         assert "community_summaries" in tables
 
+    def test_v7_compound_edge_indexes_exist(self):
+        """v7 compound edge indexes should exist after migration."""
+        rows = self.store._conn.execute("PRAGMA index_list(edges)").fetchall()
+        indexes = {row[1] if isinstance(row, tuple) else row["name"] for row in rows}
+
+        assert "idx_edges_target_kind" in indexes
+        assert "idx_edges_source_kind" in indexes
+
 
 def _get_table_names(conn: sqlite3.Connection) -> set[str]:
     """Helper: return all table/view names in the database."""

From 05cbcd6056d1537a4b274ca4b11d98cea1f33680 Mon Sep 17 00:00:00 2001
From: Christopher Skene <chris.skene@gmail.com>
Date: Thu, 9 Apr 2026 07:00:08 +1000
Subject: [PATCH 4/4] Update supported schema version to 7

---
 code-review-graph-vscode/src/backend/sqlite.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/code-review-graph-vscode/src/backend/sqlite.ts b/code-review-graph-vscode/src/backend/sqlite.ts
index 267b3a2..9fead0f 100644
--- a/code-review-graph-vscode/src/backend/sqlite.ts
+++ b/code-review-graph-vscode/src/backend/sqlite.ts
@@ -208,7 +208,7 @@ export class SqliteReader {
         if (row) {
           const version = parseInt(row.value, 10);
           // Must match LATEST_VERSION in code_review_graph/migrations.py
-          const SUPPORTED_SCHEMA_VERSION = 6;
+          const SUPPORTED_SCHEMA_VERSION = 7;
           if (!isNaN(version) && version > SUPPORTED_SCHEMA_VERSION) {
             return `Database was created with a newer version (schema v${version}). Update the extension.`;
           }