-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsupabase_codebase_schema.sql
More file actions
390 lines (331 loc) · 13.3 KB
/
supabase_codebase_schema.sql
File metadata and controls
390 lines (331 loc) · 13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
-- Codebase Indexing Schema for InspectAI
-- This schema stores parsed code structure for intelligent PR reviews
-- Each project is isolated via project_id column
-- ============================================
-- Table 1: Projects (Company/Repo Registration)
-- ============================================
CREATE TABLE IF NOT EXISTS indexed_projects (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- GitHub identifiers
repo_full_name TEXT NOT NULL, -- e.g., "company/repo-name"
repo_id BIGINT, -- GitHub repo ID
installation_id BIGINT, -- GitHub App installation ID
-- Indexing status
indexing_status TEXT DEFAULT 'pending', -- pending, indexing, completed, failed
last_indexed_at TIMESTAMPTZ,
last_commit_sha TEXT, -- Last indexed commit
-- Statistics
total_files INTEGER DEFAULT 0,
total_symbols INTEGER DEFAULT 0,
total_calls INTEGER DEFAULT 0,
total_dependencies INTEGER DEFAULT 0,
-- Metadata
default_branch TEXT DEFAULT 'main',
languages JSONB DEFAULT '[]', -- ["python", "java", "cpp"]
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(repo_full_name)
);
-- Index for quick lookups
CREATE INDEX IF NOT EXISTS idx_projects_repo ON indexed_projects(repo_full_name);
CREATE INDEX IF NOT EXISTS idx_projects_installation ON indexed_projects(installation_id);
-- ============================================
-- Table 2: Code Files
-- ============================================
CREATE TABLE IF NOT EXISTS code_files (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES indexed_projects(id) ON DELETE CASCADE,
-- File info
file_path TEXT NOT NULL, -- e.g., "src/auth/validator.py"
language TEXT NOT NULL, -- python, java, cpp
-- Content hash for change detection
content_hash TEXT, -- SHA256 of file content
-- Statistics
line_count INTEGER DEFAULT 0,
symbol_count INTEGER DEFAULT 0,
-- Timestamps
last_indexed_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(project_id, file_path)
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_files_project ON code_files(project_id);
CREATE INDEX IF NOT EXISTS idx_files_path ON code_files(file_path);
CREATE INDEX IF NOT EXISTS idx_files_language ON code_files(language);
-- ============================================
-- Table 3: Code Symbols (Functions, Classes, Variables)
-- ============================================
CREATE TABLE IF NOT EXISTS code_symbols (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES indexed_projects(id) ON DELETE CASCADE,
file_id UUID NOT NULL REFERENCES code_files(id) ON DELETE CASCADE,
-- Symbol identification
symbol_name TEXT NOT NULL, -- e.g., "validate_token"
symbol_type TEXT NOT NULL, -- function, class, method, variable, constant
qualified_name TEXT, -- e.g., "auth.validator.validate_token"
-- Location
start_line INTEGER NOT NULL,
end_line INTEGER NOT NULL,
-- Signature/Details
signature TEXT, -- e.g., "def validate_token(token: str) -> bool"
parameters JSONB DEFAULT '[]', -- [{"name": "token", "type": "str"}]
return_type TEXT, -- e.g., "bool"
-- Documentation
docstring TEXT,
-- Parent (for methods inside classes)
parent_symbol_id UUID REFERENCES code_symbols(id) ON DELETE CASCADE,
-- Modifiers
is_public BOOLEAN DEFAULT true,
is_static BOOLEAN DEFAULT false,
is_async BOOLEAN DEFAULT false,
-- Timestamps
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for fast queries
CREATE INDEX IF NOT EXISTS idx_symbols_project ON code_symbols(project_id);
CREATE INDEX IF NOT EXISTS idx_symbols_file ON code_symbols(file_id);
CREATE INDEX IF NOT EXISTS idx_symbols_name ON code_symbols(symbol_name);
CREATE INDEX IF NOT EXISTS idx_symbols_type ON code_symbols(symbol_type);
CREATE INDEX IF NOT EXISTS idx_symbols_qualified ON code_symbols(qualified_name);
-- ============================================
-- Table 4: Dependencies (Imports)
-- ============================================
CREATE TABLE IF NOT EXISTS code_imports (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES indexed_projects(id) ON DELETE CASCADE,
file_id UUID NOT NULL REFERENCES code_files(id) ON DELETE CASCADE,
-- Import details
import_statement TEXT NOT NULL, -- e.g., "from auth import validate_token"
imported_module TEXT NOT NULL, -- e.g., "auth"
imported_names JSONB DEFAULT '[]', -- ["validate_token", "User"]
is_relative BOOLEAN DEFAULT false, -- Relative import (from . import x)
-- Resolved target (if internal)
resolved_file_id UUID REFERENCES code_files(id) ON DELETE SET NULL,
is_external BOOLEAN DEFAULT false, -- External package (not in repo)
-- Location
line_number INTEGER,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_imports_project ON code_imports(project_id);
CREATE INDEX IF NOT EXISTS idx_imports_file ON code_imports(file_id);
CREATE INDEX IF NOT EXISTS idx_imports_module ON code_imports(imported_module);
-- ============================================
-- Table 5: Call Graph (Function Calls)
-- ============================================
CREATE TABLE IF NOT EXISTS code_calls (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES indexed_projects(id) ON DELETE CASCADE,
-- Caller (who makes the call)
caller_file_id UUID NOT NULL REFERENCES code_files(id) ON DELETE CASCADE,
caller_symbol_id UUID REFERENCES code_symbols(id) ON DELETE CASCADE,
-- Callee (who is being called)
callee_name TEXT NOT NULL, -- Function/method name being called
callee_symbol_id UUID REFERENCES code_symbols(id) ON DELETE SET NULL,
-- Location
call_line INTEGER NOT NULL,
-- Call type
call_type TEXT DEFAULT 'function', -- function, method, constructor
created_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(project_id, caller_file_id, callee_name, call_line)
);
-- Indexes for impact analysis
CREATE INDEX IF NOT EXISTS idx_calls_project ON code_calls(project_id);
CREATE INDEX IF NOT EXISTS idx_calls_caller_file ON code_calls(caller_file_id);
CREATE INDEX IF NOT EXISTS idx_calls_caller_symbol ON code_calls(caller_symbol_id);
CREATE INDEX IF NOT EXISTS idx_calls_callee ON code_calls(callee_symbol_id);
CREATE INDEX IF NOT EXISTS idx_calls_callee_name ON code_calls(callee_name);
-- ============================================
-- Table 6: Indexing Jobs (Background Processing)
-- ============================================
CREATE TABLE IF NOT EXISTS indexing_jobs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
project_id UUID NOT NULL REFERENCES indexed_projects(id) ON DELETE CASCADE,
-- Job details
job_type TEXT NOT NULL, -- full, incremental, file
status TEXT DEFAULT 'pending', -- pending, running, completed, failed
-- Progress
total_files INTEGER DEFAULT 0,
processed_files INTEGER DEFAULT 0,
-- Timing
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
-- Error tracking
error_message TEXT,
-- Metadata
triggered_by TEXT, -- webhook, manual, scheduled
commit_sha TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Index for job queries
CREATE INDEX IF NOT EXISTS idx_jobs_project ON indexing_jobs(project_id);
CREATE INDEX IF NOT EXISTS idx_jobs_status ON indexing_jobs(status);
-- ============================================
-- Function: Get Symbol Impact (Who calls this?)
-- ============================================
CREATE OR REPLACE FUNCTION get_symbol_impact(
p_project_id UUID,
p_symbol_name TEXT
)
RETURNS TABLE (
caller_file TEXT,
caller_function TEXT,
call_line INTEGER,
caller_file_id UUID
) AS $$
BEGIN
RETURN QUERY
SELECT
cf.file_path as caller_file,
cs.symbol_name as caller_function,
cc.call_line,
cc.caller_file_id
FROM code_calls cc
JOIN code_files cf ON cc.caller_file_id = cf.id
LEFT JOIN code_symbols cs ON cc.caller_symbol_id = cs.id
WHERE cc.project_id = p_project_id
AND cc.callee_name = p_symbol_name
ORDER BY cf.file_path, cc.call_line;
END;
$$ LANGUAGE plpgsql;
-- ============================================
-- Function: Get Symbol Dependencies (What does this call?)
-- ============================================
CREATE OR REPLACE FUNCTION get_symbol_dependencies(
p_project_id UUID,
p_symbol_id UUID
)
RETURNS TABLE (
callee_name TEXT,
callee_file TEXT,
call_line INTEGER
) AS $$
BEGIN
RETURN QUERY
SELECT
cc.callee_name,
cf.file_path as callee_file,
cc.call_line
FROM code_calls cc
LEFT JOIN code_symbols cs ON cc.callee_symbol_id = cs.id
LEFT JOIN code_files cf ON cs.file_id = cf.id
WHERE cc.project_id = p_project_id
AND cc.caller_symbol_id = p_symbol_id
ORDER BY cc.call_line;
END;
$$ LANGUAGE plpgsql;
-- ============================================
-- Function: Get File Dependents (What files import this?)
-- ============================================
CREATE OR REPLACE FUNCTION get_file_dependents(
p_project_id UUID,
p_file_path TEXT
)
RETURNS TABLE (
dependent_file TEXT,
import_statement TEXT,
line_number INTEGER
) AS $$
BEGIN
RETURN QUERY
SELECT
cf.file_path as dependent_file,
ci.import_statement,
ci.line_number
FROM code_imports ci
JOIN code_files cf ON ci.file_id = cf.id
JOIN code_files target ON ci.resolved_file_id = target.id
WHERE ci.project_id = p_project_id
AND target.file_path = p_file_path
ORDER BY cf.file_path;
END;
$$ LANGUAGE plpgsql;
-- ============================================
-- Function: Get Changed Symbols Impact Summary
-- ============================================
CREATE OR REPLACE FUNCTION get_change_impact(
p_project_id UUID,
p_file_path TEXT,
p_changed_lines INTEGER[]
)
RETURNS TABLE (
symbol_name TEXT,
symbol_type TEXT,
start_line INTEGER,
end_line INTEGER,
caller_count BIGINT,
impact_level TEXT
) AS $$
BEGIN
RETURN QUERY
SELECT
cs.symbol_name,
cs.symbol_type,
cs.start_line,
cs.end_line,
COUNT(DISTINCT cc.id) as caller_count,
CASE
WHEN COUNT(DISTINCT cc.id) > 10 THEN 'HIGH'
WHEN COUNT(DISTINCT cc.id) > 3 THEN 'MEDIUM'
ELSE 'LOW'
END as impact_level
FROM code_symbols cs
JOIN code_files cf ON cs.file_id = cf.id
LEFT JOIN code_calls cc ON cc.callee_symbol_id = cs.id
WHERE cs.project_id = p_project_id
AND cf.file_path = p_file_path
AND (
cs.start_line = ANY(p_changed_lines)
OR cs.end_line = ANY(p_changed_lines)
OR EXISTS (
SELECT 1 FROM unnest(p_changed_lines) AS changed_line
WHERE changed_line BETWEEN cs.start_line AND cs.end_line
)
)
GROUP BY cs.id, cs.symbol_name, cs.symbol_type, cs.start_line, cs.end_line
ORDER BY caller_count DESC;
END;
$$ LANGUAGE plpgsql;
-- ============================================
-- Trigger: Update project updated_at
-- ============================================
CREATE OR REPLACE FUNCTION update_project_timestamp()
RETURNS TRIGGER AS $$
BEGIN
UPDATE indexed_projects
SET updated_at = NOW()
WHERE id = NEW.project_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Apply trigger to symbol changes
DROP TRIGGER IF EXISTS trigger_update_project_on_symbol ON code_symbols;
CREATE TRIGGER trigger_update_project_on_symbol
AFTER INSERT OR UPDATE ON code_symbols
FOR EACH ROW EXECUTE FUNCTION update_project_timestamp();
-- ============================================
-- Row Level Security (Optional but Recommended)
-- ============================================
-- Uncomment these if you want RLS enabled
-- ALTER TABLE indexed_projects ENABLE ROW LEVEL SECURITY;
-- ALTER TABLE code_files ENABLE ROW LEVEL SECURITY;
-- ALTER TABLE code_symbols ENABLE ROW LEVEL SECURITY;
-- ALTER TABLE code_imports ENABLE ROW LEVEL SECURITY;
-- ALTER TABLE code_calls ENABLE ROW LEVEL SECURITY;
-- ALTER TABLE indexing_jobs ENABLE ROW LEVEL SECURITY;
-- ============================================
-- Sample Queries for Reference
-- ============================================
-- Find all callers of a function:
-- SELECT * FROM get_symbol_impact('project-uuid', 'validate_token');
-- Find what a function depends on:
-- SELECT * FROM get_symbol_dependencies('project-uuid', 'symbol-uuid');
-- Find files that import a specific file:
-- SELECT * FROM get_file_dependents('project-uuid', 'src/auth/validator.py');
-- Get impact of changed lines:
-- SELECT * FROM get_change_impact('project-uuid', 'src/auth/validator.py', ARRAY[10,11,12,15]);
-- Get all functions in a file:
-- SELECT symbol_name, signature, start_line, end_line
-- FROM code_symbols cs
-- JOIN code_files cf ON cs.file_id = cf.id
-- WHERE cf.file_path = 'src/auth/validator.py' AND cs.symbol_type = 'function';