-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsql_schema.sql
More file actions
360 lines (311 loc) · 12.5 KB
/
sql_schema.sql
File metadata and controls
360 lines (311 loc) · 12.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
-- Contract Processing System SQL Schema
-- Author: Martin Bacigal, 01/2025
-- Description: Comprehensive schema for contract processing with version tracking and ontologies
-- Enable UUID extension for PostgreSQL (comment out for MySQL/SQLite)
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-- Companies table
CREATE TABLE IF NOT EXISTS companies (
id SERIAL PRIMARY KEY,
company_id VARCHAR(50) UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
company_group VARCHAR(100),
industry VARCHAR(100),
country VARCHAR(100),
parent_company_id INTEGER REFERENCES companies(id),
metadata JSONB,
active BOOLEAN DEFAULT true,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_company_group (company_group),
INDEX idx_parent_company (parent_company_id)
);
-- Contract types enumeration
CREATE TABLE IF NOT EXISTS contract_types (
id SERIAL PRIMARY KEY,
type_code VARCHAR(50) UNIQUE NOT NULL,
type_name VARCHAR(100) NOT NULL,
description TEXT,
typical_duration_days INTEGER,
metadata JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insert default contract types
INSERT INTO contract_types (type_code, type_name, description) VALUES
('SERVICE', 'Service Agreement', 'General service contracts'),
('SUPPLY', 'Supply Agreement', 'Product supply contracts'),
('NDA', 'Non-Disclosure Agreement', 'Confidentiality agreements'),
('MSA', 'Master Service Agreement', 'Framework agreements'),
('SOW', 'Statement of Work', 'Project-specific work agreements'),
('LICENSE', 'License Agreement', 'Software/IP licensing'),
('PURCHASE', 'Purchase Agreement', 'One-time purchase contracts')
ON CONFLICT (type_code) DO NOTHING;
-- Main documents table
CREATE TABLE IF NOT EXISTS documents (
id SERIAL PRIMARY KEY,
document_id UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
-- File information
file_path VARCHAR(500) NOT NULL,
file_name VARCHAR(255) NOT NULL,
file_type VARCHAR(50),
file_size_bytes BIGINT,
-- Contract identifiers
cw_number VARCHAR(50),
contract_number VARCHAR(100),
contract_type_id INTEGER REFERENCES contract_types(id),
-- Processing status
processed BOOLEAN DEFAULT false,
processing_started_at TIMESTAMP,
processing_completed_at TIMESTAMP,
processing_status VARCHAR(50) DEFAULT 'pending', -- pending, processing, completed, failed, reprocessing
processing_version INTEGER DEFAULT 1,
error_message TEXT,
-- Current hash (latest version)
current_file_hash VARCHAR(64) NOT NULL,
-- Contract details (extracted)
contract_title VARCHAR(500),
contract_start_date DATE,
contract_end_date DATE,
contract_duration VARCHAR(100),
contract_value DECIMAL(15, 2),
currency VARCHAR(3),
-- AI Analysis results
analysis_results JSONB,
key_deliverables JSONB,
risk_score DECIMAL(3, 2), -- 0.00 to 1.00
compliance_score DECIMAL(3, 2),
-- Metadata
tags JSONB,
custom_fields JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(100),
INDEX idx_cw_number (cw_number),
INDEX idx_contract_number (contract_number),
INDEX idx_processed_status (processed, processing_status),
INDEX idx_contract_dates (contract_start_date, contract_end_date),
INDEX idx_current_hash (current_file_hash),
INDEX idx_created_at (created_at)
);
-- File hash tracking (for version control)
CREATE TABLE IF NOT EXISTS file_hashes (
id SERIAL PRIMARY KEY,
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
file_hash VARCHAR(64) NOT NULL,
hash_algorithm VARCHAR(20) DEFAULT 'SHA256',
file_size_bytes BIGINT,
calculated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
is_current BOOLEAN DEFAULT true,
UNIQUE(document_id, file_hash),
INDEX idx_file_hash (file_hash),
INDEX idx_document_current (document_id, is_current)
);
-- Document versions (track changes over time)
CREATE TABLE IF NOT EXISTS document_versions (
id SERIAL PRIMARY KEY,
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
version_number INTEGER NOT NULL,
file_hash VARCHAR(64) NOT NULL,
-- What changed
change_type VARCHAR(50), -- content_update, metadata_update, reprocessed, manual_edit
change_description TEXT,
-- Processing info for this version
processed BOOLEAN DEFAULT false,
processing_completed_at TIMESTAMP,
analysis_results JSONB,
-- Version metadata
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(100),
UNIQUE(document_id, version_number),
INDEX idx_version_hash (file_hash)
);
-- Contract ontology categories
CREATE TABLE IF NOT EXISTS contract_ontology (
id SERIAL PRIMARY KEY,
category_code VARCHAR(50) UNIQUE NOT NULL,
category_name VARCHAR(100) NOT NULL,
parent_category_id INTEGER REFERENCES contract_ontology(id),
level INTEGER NOT NULL DEFAULT 0,
description TEXT,
keywords JSONB,
rules JSONB, -- Rules for automatic classification
color_hex VARCHAR(7), -- For visualization
icon VARCHAR(50),
sort_order INTEGER DEFAULT 0,
active BOOLEAN DEFAULT true,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_parent_category (parent_category_id),
INDEX idx_level (level)
);
-- Insert default ontology
INSERT INTO contract_ontology (category_code, category_name, level, description, color_hex) VALUES
('ROOT', 'All Contracts', 0, 'Root category', '#000000'),
('PROCUREMENT', 'Procurement', 1, 'Procurement related contracts', '#1E90FF'),
('PROCUREMENT.GOODS', 'Goods Procurement', 2, 'Purchase of physical goods', '#4169E1'),
('PROCUREMENT.SERVICES', 'Services Procurement', 2, 'Purchase of services', '#0000CD'),
('LEGAL', 'Legal', 1, 'Legal agreements', '#FF6347'),
('LEGAL.NDA', 'Confidentiality', 2, 'Non-disclosure and confidentiality', '#DC143C'),
('LEGAL.IP', 'Intellectual Property', 2, 'IP and licensing agreements', '#8B0000'),
('OPERATIONAL', 'Operational', 1, 'Day-to-day operational contracts', '#32CD32'),
('OPERATIONAL.FACILITIES', 'Facilities', 2, 'Facility management contracts', '#228B22'),
('OPERATIONAL.IT', 'IT Services', 2, 'Information technology contracts', '#006400')
ON CONFLICT (category_code) DO NOTHING;
-- Document to ontology mapping
CREATE TABLE IF NOT EXISTS document_ontology_mapping (
id SERIAL PRIMARY KEY,
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
ontology_id INTEGER NOT NULL REFERENCES contract_ontology(id),
confidence_score DECIMAL(3, 2) DEFAULT 1.00, -- How confident we are in this categorization
is_primary BOOLEAN DEFAULT false, -- Primary category for the document
assigned_by VARCHAR(50), -- 'ai', 'user', 'rule'
assigned_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(document_id, ontology_id),
INDEX idx_document_ontology (document_id, is_primary)
);
-- Document relationships
CREATE TABLE IF NOT EXISTS document_relationships (
id SERIAL PRIMARY KEY,
parent_document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
child_document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
relationship_type VARCHAR(50) NOT NULL, -- amendment, renewal, related, supersedes, references
confidence DECIMAL(3, 2) DEFAULT 1.00,
detected_by VARCHAR(50), -- 'ai', 'user', 'content_analysis'
metadata JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(parent_document_id, child_document_id, relationship_type),
INDEX idx_parent_doc (parent_document_id),
INDEX idx_child_doc (child_document_id),
INDEX idx_relationship_type (relationship_type)
);
-- Document to company associations
CREATE TABLE IF NOT EXISTS document_companies (
id SERIAL PRIMARY KEY,
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
company_id INTEGER NOT NULL REFERENCES companies(id),
role VARCHAR(50) NOT NULL, -- vendor, client, prime_contractor, subcontractor, witness
is_primary BOOLEAN DEFAULT false,
confidence_score DECIMAL(3, 2) DEFAULT 1.00,
detected_from VARCHAR(100), -- Where in the document this was found
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(document_id, company_id, role),
INDEX idx_doc_company (document_id, is_primary),
INDEX idx_company_docs (company_id)
);
-- Processing logs (audit trail)
CREATE TABLE IF NOT EXISTS processing_logs (
id SERIAL PRIMARY KEY,
document_id INTEGER REFERENCES documents(id) ON DELETE CASCADE,
action VARCHAR(50) NOT NULL, -- created, processing_started, completed, failed, reprocessed
status VARCHAR(50),
message TEXT,
details JSONB,
duration_seconds INTEGER,
memory_used_mb INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_by VARCHAR(100),
INDEX idx_document_action (document_id, action),
INDEX idx_created_at (created_at),
INDEX idx_action (action)
);
-- Key performance indicators (for dashboards)
CREATE TABLE IF NOT EXISTS processing_statistics (
id SERIAL PRIMARY KEY,
date DATE NOT NULL,
total_documents_processed INTEGER DEFAULT 0,
total_processing_time_seconds INTEGER DEFAULT 0,
average_processing_time_seconds DECIMAL(10, 2),
success_count INTEGER DEFAULT 0,
failure_count INTEGER DEFAULT 0,
total_file_size_mb DECIMAL(15, 2),
unique_companies INTEGER DEFAULT 0,
new_relationships_found INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(date),
INDEX idx_stat_date (date)
);
-- Views for common queries
-- Active documents with company info
CREATE OR REPLACE VIEW v_documents_with_companies AS
SELECT
d.*,
c.company_id,
c.name as company_name,
c.company_group,
dc.role as company_role
FROM documents d
LEFT JOIN document_companies dc ON d.id = dc.document_id AND dc.is_primary = true
LEFT JOIN companies c ON dc.company_id = c.id;
-- Document processing status summary
CREATE OR REPLACE VIEW v_processing_summary AS
SELECT
processing_status,
COUNT(*) as document_count,
AVG(EXTRACT(EPOCH FROM (processing_completed_at - processing_started_at))) as avg_processing_seconds
FROM documents
GROUP BY processing_status;
-- Contract expiration monitoring
CREATE OR REPLACE VIEW v_contract_expiration AS
SELECT
d.*,
CASE
WHEN contract_end_date < CURRENT_DATE THEN 'expired'
WHEN contract_end_date < CURRENT_DATE + INTERVAL '30 days' THEN 'expiring_soon'
ELSE 'active'
END as expiration_status,
contract_end_date - CURRENT_DATE as days_until_expiration
FROM documents d
WHERE contract_end_date IS NOT NULL
ORDER BY contract_end_date;
-- Functions
-- Function to update document version
CREATE OR REPLACE FUNCTION update_document_version(
p_document_id INTEGER,
p_new_hash VARCHAR(64),
p_change_type VARCHAR(50),
p_change_description TEXT,
p_created_by VARCHAR(100)
) RETURNS INTEGER AS $$
DECLARE
v_new_version INTEGER;
BEGIN
-- Get next version number
SELECT COALESCE(MAX(version_number), 0) + 1 INTO v_new_version
FROM document_versions
WHERE document_id = p_document_id;
-- Insert new version
INSERT INTO document_versions (
document_id, version_number, file_hash,
change_type, change_description, created_by
) VALUES (
p_document_id, v_new_version, p_new_hash,
p_change_type, p_change_description, p_created_by
);
-- Update current hash in documents table
UPDATE documents
SET current_file_hash = p_new_hash,
processing_version = v_new_version,
updated_at = CURRENT_TIMESTAMP
WHERE id = p_document_id;
-- Update file_hashes table
UPDATE file_hashes SET is_current = false
WHERE document_id = p_document_id;
INSERT INTO file_hashes (document_id, file_hash, is_current)
VALUES (p_document_id, p_new_hash, true);
RETURN v_new_version;
END;
$$ LANGUAGE plpgsql;
-- Triggers
-- Update timestamp trigger
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_documents_updated_at BEFORE UPDATE ON documents
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_companies_updated_at BEFORE UPDATE ON companies
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_contract_ontology_updated_at BEFORE UPDATE ON contract_ontology
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();