Skip to content

Commit 54fcedf

Browse files
committed
more comments
1 parent 33037dc commit 54fcedf

3 files changed

Lines changed: 50 additions & 6 deletions

File tree

apps/sim/lib/knowledge/documents/service.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ async function assertKnowledgeBaseFileUrlsOwnership(
9090
fileUrls: string[],
9191
kbWorkspaceId: string | null,
9292
kbUserId: string,
93-
requestId: string
93+
requestId: string,
94+
executor: DbExecutor = db
9495
): Promise<void> {
9596
const keys = [
9697
...new Set(
@@ -103,7 +104,9 @@ async function assertKnowledgeBaseFileUrlsOwnership(
103104
return
104105
}
105106

106-
const bindings = await getFileMetadataByKeys(keys, 'knowledge-base')
107+
// Read bindings on the caller's transaction so the security check shares the
108+
// same connection/lock context as the FOR UPDATE'd insert that follows.
109+
const bindings = await getFileMetadataByKeys(keys, 'knowledge-base', executor)
107110
const bindingByKey = new Map(bindings.map((binding) => [binding.key, binding]))
108111

109112
for (const key of keys) {
@@ -868,7 +871,8 @@ export async function createDocumentRecords(
868871
documents.map((docData) => docData.fileUrl),
869872
kbWorkspaceId,
870873
kb[0].userId,
871-
requestId
874+
requestId,
875+
tx
872876
)
873877

874878
// One load per batch (was N+1); skip entirely if no doc carries tags.
@@ -1434,7 +1438,8 @@ export async function createSingleDocument(
14341438
[documentData.fileUrl],
14351439
kb[0].workspaceId,
14361440
kb[0].userId,
1437-
requestId
1441+
requestId,
1442+
tx
14381443
)
14391444

14401445
await tx.insert(document).values(newDocument)

apps/sim/lib/uploads/server/metadata.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,13 @@ export async function getFileMetadataByKey(
193193
*/
194194
export async function getFileMetadataByKeys(
195195
keys: string[],
196-
context: StorageContext
196+
context: StorageContext,
197+
executor: Pick<typeof db, 'select'> = db
197198
): Promise<FileMetadataRecord[]> {
198199
if (keys.length === 0) {
199200
return []
200201
}
201-
return db
202+
return executor
202203
.select()
203204
.from(workspaceFiles)
204205
.where(

packages/db/migrations/0222_kb_document_storage_key.sql

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,44 @@ $$;--> statement-breakpoint
9292
CALL backfill_document_storage_key_0222();--> statement-breakpoint
9393
DROP PROCEDURE backfill_document_storage_key_0222();--> statement-breakpoint
9494

95+
-- (1b) Fully URL-decode keys that carry encodings beyond the separator (e.g.
96+
-- '%2C' -> ',', '%26' -> '&', multi-byte UTF-8 like '%C3%ADa' -> 'ía'). The app
97+
-- derives the canonical key with decodeURIComponent (extractStorageKey), so a
98+
-- storage_key that is only %2F-decoded would never match the runtime key for
99+
-- these documents — breaking their liveness lookups and stranding their
100+
-- ownership bindings. The bulk pass above already handled '%2F'; only the
101+
-- remaining residual-'%' rows are re-derived here (a small subset), so the heavy
102+
-- decode never runs over the whole table. This single UPDATE evaluates its target
103+
-- set once, so a key that legitimately contains a literal '%' is not reprocessed.
104+
CREATE OR REPLACE FUNCTION url_decode_0222(input text) RETURNS text LANGUAGE sql IMMUTABLE AS $$
105+
SELECT CASE
106+
WHEN input IS NULL THEN NULL
107+
WHEN position('%' IN input) = 0 THEN input
108+
ELSE convert_from(
109+
(
110+
SELECT string_agg(
111+
CASE
112+
WHEN r.m[1] ~ '^%[0-9A-Fa-f]{2}$' THEN decode(substring(r.m[1] FROM 2), 'hex')
113+
ELSE convert_to(r.m[1], 'UTF8')
114+
END,
115+
''::bytea ORDER BY r.ord
116+
)
117+
FROM regexp_matches(input, '%[0-9A-Fa-f]{2}|.', 'g') WITH ORDINALITY AS r(m, ord)
118+
),
119+
'UTF8'
120+
)
121+
END
122+
$$;--> statement-breakpoint
123+
UPDATE "document"
124+
SET "storage_key" = regexp_replace(
125+
url_decode_0222(substring(split_part("file_url", '?', 1) FROM '/api/files/serve/(.*)$')),
126+
'^(s3|blob)/',
127+
''
128+
)
129+
WHERE "storage_key" LIKE 'kb/%'
130+
AND position('%' IN "storage_key") > 0;--> statement-breakpoint
131+
DROP FUNCTION url_decode_0222(text);--> statement-breakpoint
132+
95133
CREATE INDEX CONCURRENTLY IF NOT EXISTS "doc_storage_key_idx" ON "document" USING btree ("storage_key") WHERE "storage_key" IS NOT NULL;--> statement-breakpoint
96134

97135
-- (2) Backfill workspace_files ownership bindings from the populated storage_key.

0 commit comments

Comments
 (0)