Skip to content

Commit 6c6c799

Browse files
committed
improvement(knowledge): eliminate N+1 on tag definitions in bulk upload
createDocumentRecords previously called processDocumentTags per-doc, each running a SELECT against knowledge_base_tag_definitions — N queries that all returned the same kbId-scoped rows. Worse, those reads used the global db pool while the tx held a FOR UPDATE lock on the KB row, risking pool contention on large bulk uploads. Split the helper into loadTagDefinitions (single query, accepts the tx as executor) and resolveDocumentTags (pure, takes the pre-loaded Map). The bulk path loads once inside the transaction; createSingleDocument loads once outside its tx. Same throw-on-validation-error semantics preserved.
1 parent b6679a9 commit 6c6c799

1 file changed

Lines changed: 27 additions & 13 deletions

File tree

apps/sim/lib/knowledge/documents/service.ts

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,26 @@ interface DocumentTagData {
111111
value: string
112112
}
113113

114-
async function processDocumentTags(
114+
type TagDefinition = typeof knowledgeBaseTagDefinitions.$inferSelect
115+
type TagDefinitionsByName = Map<string, TagDefinition>
116+
type DbExecutor = Pick<typeof db, 'select'>
117+
118+
async function loadTagDefinitions(
115119
knowledgeBaseId: string,
120+
executor: DbExecutor = db
121+
): Promise<TagDefinitionsByName> {
122+
const defs = await executor
123+
.select()
124+
.from(knowledgeBaseTagDefinitions)
125+
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
126+
return new Map(defs.map((def) => [def.displayName, def]))
127+
}
128+
129+
function resolveDocumentTags(
116130
tagData: DocumentTagData[],
131+
tagDefinitions: TagDefinitionsByName,
117132
requestId: string
118-
): Promise<ProcessedDocumentTags> {
133+
): ProcessedDocumentTags {
119134
const setTagValue = (
120135
tags: ProcessedDocumentTags,
121136
slot: string,
@@ -200,13 +215,6 @@ async function processDocumentTags(
200215
return result
201216
}
202217

203-
const existingDefinitions = await db
204-
.select()
205-
.from(knowledgeBaseTagDefinitions)
206-
.where(eq(knowledgeBaseTagDefinitions.knowledgeBaseId, knowledgeBaseId))
207-
208-
const existingByName = new Map(existingDefinitions.map((def) => [def.displayName, def]))
209-
210218
const undefinedTags: string[] = []
211219
const typeErrors: string[] = []
212220

@@ -223,7 +231,7 @@ async function processDocumentTags(
223231

224232
if (!hasValue) continue
225233

226-
const existingDef = existingByName.get(tagName)
234+
const existingDef = tagDefinitions.get(tagName)
227235
if (!existingDef) {
228236
undefinedTags.push(tagName)
229237
continue
@@ -264,7 +272,7 @@ async function processDocumentTags(
264272

265273
if (!hasValue) continue
266274

267-
const existingDef = existingByName.get(tagName)
275+
const existingDef = tagDefinitions.get(tagName)
268276
if (!existingDef) continue
269277

270278
const targetSlot = existingDef.tagSlot
@@ -770,6 +778,11 @@ export async function createDocumentRecords(
770778
throw new Error('Knowledge base not found')
771779
}
772780

781+
// Load tag definitions once for the whole batch (avoids N+1 across docs)
782+
// and reuses the transaction's connection so we don't double-checkout
783+
// while holding the KB FOR UPDATE lock.
784+
const tagDefinitions = await loadTagDefinitions(knowledgeBaseId, tx)
785+
773786
const now = new Date()
774787
const documentRecords = []
775788
const returnData: DocumentData[] = []
@@ -783,7 +796,7 @@ export async function createDocumentRecords(
783796
try {
784797
const tagData = JSON.parse(docData.documentTagsData)
785798
if (Array.isArray(tagData)) {
786-
processedTags = await processDocumentTags(knowledgeBaseId, tagData, requestId)
799+
processedTags = resolveDocumentTags(tagData, tagDefinitions, requestId)
787800
}
788801
} catch (error) {
789802
if (error instanceof SyntaxError) {
@@ -1277,7 +1290,8 @@ export async function createSingleDocument(
12771290
try {
12781291
const tagData = JSON.parse(documentData.documentTagsData)
12791292
if (Array.isArray(tagData)) {
1280-
processedTags = await processDocumentTags(knowledgeBaseId, tagData, requestId)
1293+
const tagDefinitions = await loadTagDefinitions(knowledgeBaseId)
1294+
processedTags = resolveDocumentTags(tagData, tagDefinitions, requestId)
12811295
}
12821296
} catch (error) {
12831297
if (error instanceof SyntaxError) {

0 commit comments

Comments
 (0)