diff --git a/package-lock.json b/package-lock.json index de165655..7b29a662 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18,7 +18,7 @@ }, "devDependencies": { "commander": "^14.0.0", - "jiti": "^2.6.0", + "jiti": "^2.6.1", "typescript": "^5.9.3" }, "optionalDependencies": { diff --git a/package.json b/package.json index b6fd9b36..1a291056 100644 --- a/package.json +++ b/package.json @@ -51,15 +51,15 @@ ] }, "optionalDependencies": { - "@lancedb/lancedb-darwin-x64": "^0.26.2", "@lancedb/lancedb-darwin-arm64": "^0.26.2", - "@lancedb/lancedb-linux-x64-gnu": "^0.26.2", + "@lancedb/lancedb-darwin-x64": "^0.26.2", "@lancedb/lancedb-linux-arm64-gnu": "^0.26.2", + "@lancedb/lancedb-linux-x64-gnu": "^0.26.2", "@lancedb/lancedb-win32-x64-msvc": "^0.26.2" }, "devDependencies": { "commander": "^14.0.0", - "jiti": "^2.6.0", + "jiti": "^2.6.1", "typescript": "^5.9.3" } } diff --git a/scripts/ci-test-manifest.mjs b/scripts/ci-test-manifest.mjs index c7c17f62..58196725 100644 --- a/scripts/ci-test-manifest.mjs +++ b/scripts/ci-test-manifest.mjs @@ -49,6 +49,11 @@ export const CI_TEST_MANIFEST = [ { group: "core-regression", runner: "node", file: "test/embedder-cache.test.mjs" }, // Issue #629 batch embedding fix { group: "llm-clients-and-auth", runner: "node", file: "test/embedder-ollama-batch-routing.test.mjs" }, + // Issue #665 bulkStore tests + { group: "storage-and-schema", runner: "node", file: "test/bulk-store.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/bulk-store-edge-cases.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store.test.mjs", args: ["--test"] }, + { group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store-edge-cases.test.mjs", args: ["--test"] }, ]; export function getEntriesForGroup(group) { diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts index 7e673a49..11354ae6 100644 --- a/src/smart-extractor.ts +++ b/src/smart-extractor.ts @@ -52,6 +52,8 @@ import { classifyTemporal, inferExpiry } from "./temporal-classifier.js"; import { inferAtomicBrandItemPreferenceSlot } from "./preference-slots.js"; import { batchDedup } from "./batch-dedup.js"; +type StoreEntry = Omit; + // ============================================================================ // Envelope Metadata Stripping // ============================================================================ @@ -417,6 +419,8 @@ export class SmartExtractor { } } + const createEntries: Omit[] = []; + for (const { index, candidate } of processableCandidates) { try { await this.processCandidate( @@ -427,6 +431,7 @@ export class SmartExtractor { targetScope, scopeFilter, precomputedVectors.get(index), + createEntries, ); } catch (err) { this.log( @@ -435,6 +440,10 @@ export class SmartExtractor { } } + if (createEntries.length > 0) { + await this.store.bulkStore(createEntries); + } + return stats; } @@ -653,6 +662,7 @@ export class SmartExtractor { targetScope: string, scopeFilter?: string[], precomputedVector?: number[], + createEntries?: Omit[], ): Promise { // Profile always merges (skip dedup — admission control still applies) if (ALWAYS_MERGE_CATEGORIES.has(candidate.category)) { @@ -662,6 +672,8 @@ export class SmartExtractor { sessionKey, targetScope, scopeFilter, + undefined, + createEntries, ); if (profileResult === "rejected") { stats.rejected = (stats.rejected ?? 0) + 1; @@ -678,7 +690,7 @@ export class SmartExtractor { const vector = precomputedVector ?? await this.embedder.embed(`${candidate.abstract} ${candidate.content}`); if (!vector || vector.length === 0) { this.log("memory-pro: smart-extractor: embedding failed, storing as-is"); - await this.storeCandidate(candidate, vector || [], sessionKey, targetScope); + createEntries?.push(this.buildStoreEntry(candidate, vector || [], sessionKey, targetScope)); stats.created++; return; } @@ -714,7 +726,7 @@ export class SmartExtractor { switch (dedupResult.decision) { case "create": - await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit); + createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit)); stats.created++; break; @@ -730,11 +742,12 @@ export class SmartExtractor { scopeFilter, dedupResult.contextLabel, admission?.audit, + createEntries, ); stats.merged++; } else { // Category doesn't support merge → create instead - await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit); + createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit)); stats.created++; } break; @@ -759,11 +772,12 @@ export class SmartExtractor { targetScope, scopeFilter, admission?.audit, + createEntries, ); stats.created++; stats.superseded = (stats.superseded ?? 0) + 1; } else { - await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit); + createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit)); stats.created++; } break; @@ -773,17 +787,17 @@ export class SmartExtractor { await this.handleSupport(dedupResult.matchId, { session: sessionKey, timestamp: Date.now() }, dedupResult.reason, dedupResult.contextLabel, scopeFilter, admission?.audit); stats.supported = (stats.supported ?? 0) + 1; } else { - await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit); + createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit)); stats.created++; } break; case "contextualize": if (dedupResult.matchId) { - await this.handleContextualize(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel, admission?.audit); + await this.handleContextualize(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel, admission?.audit, createEntries); stats.created++; } else { - await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit); + createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit)); stats.created++; } break; @@ -802,15 +816,16 @@ export class SmartExtractor { targetScope, scopeFilter, admission?.audit, + createEntries, ); stats.created++; stats.superseded = (stats.superseded ?? 0) + 1; } else { - await this.handleContradict(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel, admission?.audit); + await this.handleContradict(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel, admission?.audit, createEntries); stats.created++; } } else { - await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit); + createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit)); stats.created++; } break; @@ -964,6 +979,7 @@ export class SmartExtractor { targetScope: string, scopeFilter?: string[], admissionAudit?: AdmissionAuditRecord, + createEntries?: StoreEntry[], ): Promise<"merged" | "created" | "rejected"> { // Find existing profile memory by category const embeddingText = `${candidate.abstract} ${candidate.content}`; @@ -1011,11 +1027,12 @@ export class SmartExtractor { scopeFilter, undefined, admissionAudit, + createEntries, ); return "merged"; } else { // No existing profile — create new - await this.storeCandidate(candidate, vector || [], sessionKey, targetScope, admissionAudit); + createEntries?.push(this.buildStoreEntry(candidate, vector || [], sessionKey, targetScope, admissionAudit)); return "created"; } } @@ -1030,6 +1047,7 @@ export class SmartExtractor { scopeFilter?: string[], contextLabel?: string, admissionAudit?: AdmissionAuditRecord, + createEntries?: StoreEntry[], ): Promise { let existingAbstract = ""; let existingOverview = ""; @@ -1051,12 +1069,12 @@ export class SmartExtractor { const vector = await this.embedder.embed( `${candidate.abstract} ${candidate.content}`, ); - await this.storeCandidate( + createEntries?.push(this.buildStoreEntry( candidate, vector || [], "merge-fallback", targetScope, - ); + )); return; } @@ -1141,12 +1159,13 @@ export class SmartExtractor { matchId: string, sessionKey: string, targetScope: string, - scopeFilter: string[], + scopeFilter?: string[], admissionAudit?: AdmissionAuditRecord, + createEntries?: StoreEntry[], ): Promise { const existing = await this.store.getById(matchId, scopeFilter); if (!existing) { - await this.storeCandidate(candidate, vector, sessionKey, targetScope); + createEntries?.push(this.buildStoreEntry(candidate, vector || [], sessionKey, targetScope)); return; } @@ -1265,6 +1284,7 @@ export class SmartExtractor { scopeFilter?: string[], contextLabel?: string, admissionAudit?: AdmissionAuditRecord, + createEntries?: StoreEntry[], ): Promise { const storeCategory = this.mapToStoreCategory(candidate.category); const metadata = stringifySmartMetadata(this.withAdmissionAudit({ @@ -1287,14 +1307,19 @@ export class SmartExtractor { relations: [{ type: "contextualizes", targetId: matchId }], }, admissionAudit)); - await this.store.store({ + const entry_c: StoreEntry = { text: candidate.abstract, vector, category: storeCategory, scope: targetScope, importance: this.getDefaultImportance(candidate.category), metadata, - }); + }; + if (createEntries) { + createEntries.push(entry_c); + } else { + await this.store.store(entry_c); + } this.log( `memory-pro: smart-extractor: contextualize [${contextLabel || "general"}] new entry linked to ${matchId.slice(0, 8)}`, @@ -1314,6 +1339,7 @@ export class SmartExtractor { scopeFilter?: string[], contextLabel?: string, admissionAudit?: AdmissionAuditRecord, + createEntries?: StoreEntry[], ): Promise { // 1. Record contradiction on the existing memory const existing = await this.store.getById(matchId, scopeFilter); @@ -1351,14 +1377,19 @@ export class SmartExtractor { relations: [{ type: "contradicts", targetId: matchId }], }, admissionAudit)); - await this.store.store({ + const entry_d: StoreEntry = { text: candidate.abstract, vector, category: storeCategory, scope: targetScope, importance: this.getDefaultImportance(candidate.category), metadata, - }); + }; + if (createEntries) { + createEntries.push(entry_d); + } else { + await this.store.store(entry_d); + } this.log( `memory-pro: smart-extractor: contradict [${contextLabel || "general"}] on ${matchId.slice(0, 8)}, new entry created`, @@ -1370,24 +1401,23 @@ export class SmartExtractor { // -------------------------------------------------------------------------- /** - * Store a candidate memory as a new entry with L0/L1/L2 metadata. + * Build a memory entry from candidate data (without writing). + * Used by batch creation to reduce lock acquisitions. */ - private async storeCandidate( + private buildStoreEntry( candidate: CandidateMemory, vector: number[], sessionKey: string, targetScope: string, admissionAudit?: AdmissionAuditRecord, - ): Promise { - // Map 6-category to existing store categories for backward compatibility + ): Omit { const storeCategory = this.mapToStoreCategory(candidate.category); - const classifyText = candidate.content || candidate.abstract; const metadata = stringifySmartMetadata( buildSmartMetadata( { text: candidate.abstract, - category: this.mapToStoreCategory(candidate.category), + category: storeCategory, }, { l0_abstract: candidate.abstract, @@ -1406,18 +1436,33 @@ export class SmartExtractor { suppressed_until_turn: 0, memory_temporal_type: classifyTemporal(classifyText), valid_until: inferExpiry(classifyText), + ...(admissionAudit ? { admission_audit: JSON.stringify(admissionAudit) } : {}), }, ), ); - await this.store.store({ - text: candidate.abstract, // L0 used as the searchable text + return { + text: candidate.abstract, vector, category: storeCategory, scope: targetScope, importance: this.getDefaultImportance(candidate.category), metadata, - }); + }; + } + + /** + * Store a candidate memory as a new entry with L0/L1/L2 metadata. + */ + private async storeCandidate( + candidate: CandidateMemory, + vector: number[], + sessionKey: string, + targetScope: string, + admissionAudit?: AdmissionAuditRecord, + ): Promise { + const entry = this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admissionAudit); + await this.store.store(entry); this.log( `memory-pro: smart-extractor: created [${candidate.category}] ${candidate.abstract.slice(0, 60)}`, diff --git a/src/store.ts b/src/store.ts index a4bd31cc..aa0dff9a 100644 --- a/src/store.ts +++ b/src/store.ts @@ -419,6 +419,49 @@ export class MemoryStore { }); } + /** + * Bulk store multiple memory entries (single lock acquisition) + * + * Reduces lock contention by acquiring lock once for multiple entries. + * Use this when auto-capture produces multiple memories. + */ + async bulkStore( + entries: Omit[], + ): Promise { + await this.ensureInitialized(); + + // Filter out invalid entries (undefined, null, missing text/vector) + const validEntries = entries.filter( + (entry) => entry && entry.text && entry.text.length > 0 && entry.vector && entry.vector.length > 0 + ); + + // Early return for empty array (skip lock acquisition) + if (validEntries.length === 0) { + return []; + } + + const fullEntries: MemoryEntry[] = validEntries.map((entry) => ({ + ...entry, + id: randomUUID(), + timestamp: Date.now(), + metadata: entry.metadata || "{}", + })); + + // Single lock acquisition for all entries + return this.runWithFileLock(async () => { + try { + await this.table!.add(fullEntries); + } catch (err: any) { + const code = err.code || ""; + const message = err.message || String(err); + throw new Error( + `Failed to bulk store ${fullEntries.length} memories: ${code} ${message}`, + ); + } + return fullEntries; + }); + } + /** * Import a pre-built entry while preserving its id/timestamp. * Used for re-embedding / migration / A/B testing across embedding models. diff --git a/test/auto-capture-bulkstore-complete-design.test.mjs b/test/auto-capture-bulkstore-complete-design.test.mjs new file mode 100644 index 00000000..98eb0b36 --- /dev/null +++ b/test/auto-capture-bulkstore-complete-design.test.mjs @@ -0,0 +1,107 @@ +// test/auto-capture-bulkstore-complete-design.test.mjs +/** + * Auto-Capture bulkStore Complete Design vs Claude + */ + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); + +describe("Auto-Capture Complete Design", () => { + + it("PLAN A: Current behavior (individual store)", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "plan-a-")); + const store = new MemoryStore({ dbPath: dir, vectorDim: 4 }); + + const toCapture = ["Fact A", "Fact B", "Fact C"]; + const defaultScope = "agent:test"; + let storeCallCount = 0; + const start = Date.now(); + + for (const text of toCapture.slice(0, 2)) { + const category = "fact"; + const vector = new Array(4).fill(0.1).map(() => Math.random()); + const existing = await store.vectorSearch(vector, 1, 0.9, [defaultScope]); + if (existing.length > 0 && existing[0].score > 0.90) continue; + await store.store({ text, vector, category, scope: defaultScope, importance: 0.7, metadata: "{}" }); + storeCallCount++; + } + + console.log(`[Plan A] ${storeCallCount} stores, ${Date.now() - start}ms, Lock: N`); + }); + + it("PLAN B: Collect then bulkStore", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "plan-b-")); + const store = new MemoryStore({ dbPath: dir, vectorDim: 4 }); + + const toCapture = ["Fact A", "Fact B", "Fact C"]; + const defaultScope = "agent:test"; + const start = Date.now(); + + const entries = toCapture.slice(0, 2).map(text => ({ + text, + vector: new Array(4).fill(0.1).map(() => Math.random()), + category: "fact", + scope: defaultScope, + importance: 0.7, + metadata: "{}", + })); + + const result = await store.bulkStore(entries); + console.log(`[Plan B] ${result.length} stores, ${Date.now() - start}ms, Lock: 1`); + console.log(`[Plan B] WARNING: No duplicate check!`); + }); + + it("PLAN C: Dedup then bulkStore", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "plan-c-")); + const store = new MemoryStore({ dbPath: dir, vectorDim: 4 }); + + await store.store({ + text: "Existing", + vector: [0.5, 0.5, 0.5, 0.5], + category: "fact", + scope: "agent:test", + importance: 0.7, + metadata: "{}", + }); + + const toCapture = ["A", "B", "Existing"]; + const defaultScope = "agent:test"; + const start = Date.now(); + + const entries = []; + for (const text of toCapture.slice(0, 2)) { + const vector = [Math.random(), Math.random(), Math.random(), Math.random()]; + const existing = await store.vectorSearch(vector, 1, 0.9, [defaultScope]); + if (existing.length > 0) { + console.log(`[Plan C] Skipped: ${text}`); + continue; + } + entries.push({ text, vector, category: "fact", scope: defaultScope, importance: 0.7, metadata: "{}" }); + } + + const result = await store.bulkStore(entries); + console.log(`[Plan C] ${result.length} stores, ${Date.now() - start}ms, Lock: N+1`); + console.log(`[Plan C] WARNING: No time saved!`); + }); + + it("CONCLUSION", async () => { + console.log("\n=== FINAL ANALYSIS ==="); + console.log("Plan A (Current): N locks, has dedup"); + console.log("Plan B (Simple): 1 lock, NO dedup"); + console.log("Plan C (Claude): N+1 locks, has dedup"); + console.log(""); + console.log("Auto-capture uses fail-open dedup (PR comments)"); + console.log("=> Plan B is CORRECT direction"); + }); +}); + +console.log("=== Complete Design Test ==="); \ No newline at end of file diff --git a/test/auto-capture-bulkstore-potential-issues.test.mjs b/test/auto-capture-bulkstore-potential-issues.test.mjs new file mode 100644 index 00000000..5115e3f5 --- /dev/null +++ b/test/auto-capture-bulkstore-potential-issues.test.mjs @@ -0,0 +1,140 @@ +// test/auto-capture-bulkstore-potential-issues.test.mjs +/** + * Auto-Capture bulkStore 改動潛在問題模擬測試 + * + * 模擬兩種不同的改動方式,確認問題 + */ + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); + +describe("Auto-Capture bulkStore potential issues", () => { + + // ============================================================ + // 我的方式:直接收集後 bulkStore + // ============================================================ + it("MY APPROACH: simple bulkStore without duplicate check", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "my-approach-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 4, + }); + + /* 模擬 auto-capture:現況邏輯(每次單獨寫入) + for (const text of toCapture) { + const vector = await embedder.embedPassage(text); + const existing = await store.vectorSearch(vector, 1, 0.9, [scope]); + if (existing.length > 0 && existing[0].score > 0.90) continue; + await store.store({ text, vector, ... }); + } + */ + + /* 我的改法:直接收集 + const entries = []; + for (const text of toCapture) { + const vector = [Math.random(), Math.random(), Math.random(), Math.random()]; // mock + entries.push({ text, vector, category: "fact", scope: "test", importance: 0.7, metadata: "{}" }); + } + await store.bulkStore(entries); + */ + + // 測試:直接 bulkStore + const entries = [ + { text: "Test 1", vector: [0.1, 0.1, 0.1, 0.1], category: "fact", scope: "test", importance: 0.7, metadata: "{}" }, + { text: "Test 2", vector: [0.2, 0.2, 0.2, 0.2], category: "fact", scope: "test", importance: 0.7, metadata: "{}" }, + ]; + + const result = await store.bulkStore(entries); + console.log("[My Approach] Stored:", result.length); + + // 問題:這樣無法在寫入前檢查 duplicate! + // 如果有重複的內容,已經直接寫入了 + }); + + // ============================================================ + // Claude 的方式:先檢查 duplicate 再收集 + // ============================================================ + it("CLAUDE APPROACH: duplicate check then bulkStore", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "claude-approach-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 4, + }); + + // 先寫入一個 + await store.store({ + text: "Existing memory", + vector: [0.5, 0.5, 0.5, 0.5], + category: "fact", + scope: "test", + importance: 0.7, + metadata: "{}", + }); + + /* Claude 的改法: + const entries = []; + for (const text of toCapture) { + const vector = await embedder.embedPassage(text); + // 這裡需要 duplicate check,但 bulkStore 是一次性的 + const existing = await store.vectorSearch(vector, 1, 0.9, [scope]); + if (existing.length > 0 && existing[0].score > 0.90) { + console.log("Duplicate found, skipping:", text); + continue; + } + entries.push({ text, vector, ... }); + } + await store.bulkStore(entries); + */ + + // 問題:duplicate check 在 loop 裡,但每個 check 都會觸發 vectorSearch + // 而且 bulkStore 不能做這件事 + + const newTexts = ["New 1", "New 2", "Existing memory"]; // 最後一個是重複的 + const entries = []; + + for (const text of newTexts) { + const vector = [Math.random(), Math.random(), Math.random(), Math.random()]; + + // 需要 duplicate check + const existing = await store.vectorSearch(vector, 1, 0.9, ["test"]); + if (existing.length > 0 && existing[0].score > 0.90) { + console.log("[Claude] Duplicate found, skipping:", text); + continue; + } + + entries.push({ text, vector, category: "fact", scope: "test", importance: 0.7, metadata: "{}" }); + } + + const result = await store.bulkStore(entries); + console.log("[Claude] Stored after dedup:", result.length); + + // ✅ 這方式可以工作,但需要 vectorSearch API 调用 N 次 + }); + + // ============================================================ + // CONFLICT: 哪個方式更好? + // ============================================================ + it("CONFLICT: analyze the problem", async () => { + console.log("\n=== CONFLICT ANALYSIS ==="); + console.log("問題 1: bulkStore 不能在 lock 內做 duplicate check"); + console.log("問題 2: 在 loop 外做 check = N 次 vectorSearch"); + console.log("問題 3: 如果跳過 = entries.length 變少"); + console.log(""); + console.log("My Approach: 快但可能寫入重複"); + console.log("Claude Approach: 正確但慢(N 次 API call)"); + console.log(""); + console.log("解決方案?"); + }); +}); + +console.log("=== Auto-Capture bulkStore Conflict Test ==="); \ No newline at end of file diff --git a/test/bulk-store-edge-cases.test.mjs b/test/bulk-store-edge-cases.test.mjs new file mode 100644 index 00000000..7b8f4c3a --- /dev/null +++ b/test/bulk-store-edge-cases.test.mjs @@ -0,0 +1,122 @@ +// test/bulk-store-edge-cases.test.mjs +/** + * Bulk Store Edge Cases - 驗證潛在問題 + */ + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); + +describe("bulkStore edge case verification", () => { + it("should handle undefined/null in entries", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "bulk-undefined-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 4, + }); + + // 測試包含 undefined/null + try { + const result = await store.bulkStore([ + undefined, + null, + { + text: "Valid entry", + vector: new Array(4).fill(0.1), + category: "fact", + scope: "test", + importance: 0.5, + metadata: "{}", + }, + ]); + console.log("[Edge] undefined/null result:", result.length); + } catch (err) { + console.log("[Edge] undefined/null ERROR:", err.message); + } + }); + + it("should handle missing text/vector fields", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "bulk-missing-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 4, + }); + + // 測試缺少必要欄位 + try { + const result = await store.bulkStore([ + { text: "Only text" }, // 缺少 vector + { vector: new Array(4).fill(0.1) }, // 缺少 text + {}, // 什麼都沒有 + ]); + console.log("[Edge] Missing fields result:", result.length); + } catch (err) { + console.log("[Edge] Missing fields ERROR:", err.message); + } + }); + + it("should handle wrong vector dimension", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "bulk-dim-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 4, // 設定 4 維 + }); + + // 測試錯誤維度 + try { + const result = await store.bulkStore([ + { + text: "Wrong dimension", + vector: new Array(8).fill(0.1), // 8 維會怎樣? + category: "fact", + scope: "test", + importance: 0.5, + metadata: "{}", + }, + ]); + console.log("[Edge] Wrong dimension result:", result.length); + } catch (err) { + console.log("[Edge] Wrong dimension ERROR:", err.message); + } + }); + + it("should handle empty text", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "bulk-empty-text-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 4, + }); + + // 測試空文字 + try { + const result = await store.bulkStore([ + { + text: "", + vector: new Array(4).fill(0.1), + category: "fact", + scope: "test", + importance: 0.5, + metadata: "{}", + }, + ]); + console.log("[Edge] Empty text result:", result.length); + } catch (err) { + console.log("[Edge] Empty text ERROR:", err.message); + } + }); +}); + +console.log("=== Bulk Store Edge Case Verification ==="); \ No newline at end of file diff --git a/test/bulk-store.test.mjs b/test/bulk-store.test.mjs new file mode 100644 index 00000000..70569aed --- /dev/null +++ b/test/bulk-store.test.mjs @@ -0,0 +1,112 @@ +// test/bulk-store.test.mjs +/** + * Bulk Store Test + * + * 測試 bulkStore 是否正確運作 + */ + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); + +describe("bulkStore", () => { + it("should store multiple entries with single lock", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "bulk-store-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 8, + }); + + const entries = Array(10).fill(null).map((_, i) => ({ + text: `Bulk test memory ${i}`, + vector: new Array(8).fill(0.1), + category: "fact", + scope: "test", + importance: 0.5, + metadata: "{}", + })); + + const start = Date.now(); + const stored = await store.bulkStore(entries); + const duration = Date.now() - start; + + console.log(`[bulkStore] ${entries.length} entries stored in ${duration}ms`); + console.log(`[bulkStore] First id: ${stored[0].id}`); + + assert.strictEqual(stored.length, 10); + assert.ok(stored[0].id.length > 0); + }); + + it("should handle empty array", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "bulk-empty-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 4, + }); + + const result = await store.bulkStore([]); + assert.strictEqual(result.length, 0); + console.log("[Edge] Empty array handled correctly"); + }); + + it("should handle single entry", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "bulk-single-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 4, + }); + + const result = await store.bulkStore([{ + text: "Single entry", + vector: new Array(4).fill(0.1), + category: "fact", + scope: "test", + importance: 0.5, + metadata: "{}", + }]); + + assert.strictEqual(result.length, 1); + console.log("[Edge] Single entry handled correctly"); + }); + + it("should handle concurrent bulkStore calls", async () => { + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "bulk-concurrent-")); + + const store = new MemoryStore({ + dbPath: dir, + vectorDim: 4, + }); + + const promises = Array(10).fill(null).map((_, i) => + store.bulkStore([{ + text: `Concurrent ${i}`, + vector: new Array(4).fill(0.1), + category: "fact", + scope: "test", + importance: 0.5, + metadata: "{}", + }]) + ); + + const results = await Promise.allSettled(promises); + const success = results.filter(r => r.status === "fulfilled").length; + const failed = results.filter(r => r.status === "rejected").length; + + console.log(`[Stress] ${success} success, ${failed} failed`); + assert.ok(success >= 8, `Expected at least 8, got ${success}`); + }, 60000); +}); + +console.log("=== Bulk Store Tests ==="); \ No newline at end of file diff --git a/test/plan-b-impact-analysis.test.mjs b/test/plan-b-impact-analysis.test.mjs new file mode 100644 index 00000000..466b6486 --- /dev/null +++ b/test/plan-b-impact-analysis.test.mjs @@ -0,0 +1,184 @@ +// test/plan-b-impact-analysis.test.mjs +/** + * Plan B Impact Analysis - 會不會影響其他內容? + */ + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); + +describe("Plan B Impact Analysis", () => { + + // ============================================================ + // 影響 1: stored 計數器 + // ============================================================ + it("IMPACT 1: stored counter tracking", async () => { + console.log("\n=== IMPACT 1: stored counter ==="); + console.log("現況:stored++ 在每次 store.store() 後"); + console.log("Plan B:要如何追蹤 stored?"); + console.log(""); + console.log("問題:bulkStore() 回傳 array.length"); + console.log("但 input 和 output 可能數量不同(因為 validation filter)"); + console.log(""); + + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "impact1-")); + const store = new MemoryStore({ dbPath: dir, vectorDim: 4 }); + + const entries = [ + { text: "Valid 1", vector: [0.1, 0.1, 0.1, 0.1], category: "fact", scope: "test", importance: 0.7, metadata: "{}" }, + { text: "", vector: [0.2, 0.2, 0.2, 0.2], category: "fact", scope: "test", importance: 0.7, metadata: "{}" }, // 會被 filter + { text: "Valid 2", vector: [0.3, 0.3, 0.3, 0.3], category: "fact", scope: "test", importance: 0.7, metadata: "{}" }, + ]; + + const result = await store.bulkStore(entries); + console.log(`Input: ${entries.length}, Output: ${result.length}`); + console.log(`stored = ${result.length} (可用)`); + + // ✅ 可以用 result.length 作為 stored + assert.strictEqual(result.length, 2); + }); + + // ============================================================ + // 影響 2: mdMirror dual-write + // ============================================================ + it("IMPACT 2: mdMirror dual-write", async () => { + console.log("\n=== IMPACT 2: mdMirror dual-write ==="); + console.log("現況:mdMirror 在每次 store.store() 後被呼叫"); + console.log("Plan B:mdMirror 無法在 bulkStore 中一併處理"); + console.log(""); + console.log("解決方案:"); + console.log("1. bulkStore 後另外對 result 做 mdMirror loop"); + console.log("2. 或者假設 mdMirror 不需要嚴格同步"); + console.log(""); + + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "impact2-")); + const store = new MemoryStore({ dbPath: dir, vectorDim: 4 }); + + const entries = [ + { text: "A", vector: [0.1, 0.1, 0.1, 0.1], category: "fact", scope: "test", importance: 0.7, metadata: "{}" }, + { text: "B", vector: [0.2, 0.2, 0.2, 0.2], category: "fact", scope: "test", importance: 0.7, metadata: "{}" }, + ]; + + const result = await store.bulkStore(entries); + + // mdMirror loop after bulkStore + let mdMirrorCount = 0; + for (const entry of result) { + // mock mdMirror call + mdMirrorCount++; + } + + console.log(`bulkStore result: ${result.length}`); + console.log(`mdMirror calls: ${mdMirrorCount}`); + console.log("✅ 可以用 result loop 做 mdMirror"); + + assert.strictEqual(mdMirrorCount, 2); + }); + + // ============================================================ + // 影響 3: 不同 scope 的 entry + // ============================================================ + it("IMPACT 3: different scopes per entry", async () => { + console.log("\n=== IMPACT 3: different scopes ==="); + console.log("現況:每個 entry 有自己的 scope"); + console.log("bulkStore 是否支援?"); + console.log(""); + console.log("查看 store.ts 的 bulkStore 實作..."); + console.log("✅ bulkStore 支援每個 entry 不同的 scope"); + + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "impact3-")); + const store = new MemoryStore({ dbPath: dir, vectorDim: 4 }); + + const entries = [ + { text: "A", vector: [0.1, 0.1, 0.1, 0.1], category: "fact", scope: "agent:a", importance: 0.7, metadata: "{}" }, + { text: "B", vector: [0.2, 0.2, 0.2, 0.2], category: "fact", scope: "agent:b", importance: 0.7, metadata: "{}" }, + ]; + + const result = await store.bulkStore(entries); + console.log(`✅ 不同 scope 的 entry 已儲存: ${result.length}`); + + // 驗證 scope 不同 + assert.notStrictEqual(entries[0].scope, entries[1].scope); + }); + + // ============================================================ + // 影響 4: isUserMdExclusiveMemory filter + // ============================================================ + it("IMPACT 4: isUserMdExclusiveMemory filter", async () => { + console.log("\n=== IMPACT 4: USER.md exclusive filter ==="); + console.log("現況:在 loop 內檢查 isUserMdExclusiveMemory"); + console.log("Plan B:可以在 bulkStore 前 filter"); + console.log(""); + console.log("✅ 可以保留這段邏輯,bulkStore 前先 filter"); + }); + + // ============================================================ + // 影響 5: metadata buildSmartMetadata + // ============================================================ + it("IMPACT 5: smart metadata per entry", async () => { + console.log("\n=== IMPACT 5: smart metadata ==="); + console.log("現況:每個 entry 有獨特的 source_session"); + console.log("bulkStore 需要每個 entry 自己的 metadata"); + console.log(""); + console.log("✅ bulkStore 的 map 可以產生各自的 metadata"); + + const { MemoryStore } = jiti("../src/store.ts"); + const dir = mkdtempSync(join(tmpdir(), "impact5-")); + const store = new MemoryStore({ dbPath: dir, vectorDim: 4 }); + + const entries = [ + { + text: "A", + vector: [0.1, 0.1, 0.1, 0.1], + category: "fact", + scope: "test", + importance: 0.7, + metadata: JSON.stringify({ source_session: "session-a" }) + }, + { + text: "B", + vector: [0.2, 0.2, 0.2, 0.2], + category: "fact", + scope: "test", + importance: 0.7, + metadata: JSON.stringify({ source_session: "session-b" }) + }, + ]; + + const result = await store.bulkStore(entries); + console.log(`✅ 每個 entry 有自己的 metadata: ${result.length}`); + + // 驗證 metadata 不同 + const metaA = JSON.parse(result[0].metadata); + const metaB = JSON.parse(result[1].metadata); + assert.notStrictEqual(metaA.source_session, metaB.source_session); + }); + + // ============================================================ + // 結論 + // ============================================================ + it("CONCLUSION: Plan B impacts summary", async () => { + console.log("\n=== IMPACT SUMMARY ==="); + console.log(""); + console.log("| 項目 | 影響 | 解決方案 |"); + console.log("|------|------|----------|"); + console.log("| stored counter | ⚠️ 需改用 result.length | ✅ 可行 |"); + console.log("| mdMirror | ⚠️ 需額外 loop | ✅ 可行 |"); + console.log("| scope per entry | ✅ 支援 | ✅ 無影響 |"); + console.log("| USER.md filter | ✅ 可在 bulkStore 前 | ✅ 無影響 |"); + console.log("| smart metadata | ✅ 每個 entry 可不同 | ✅ 無影響 |"); + console.log(""); + console.log("=== 結論 ==="); + console.log("Plan B 可行,只需要小幅修改"); + }); +}); + +console.log("=== Plan B Impact Analysis ==="); \ No newline at end of file diff --git a/test/smart-extractor-bulk-store-edge-cases.test.mjs b/test/smart-extractor-bulk-store-edge-cases.test.mjs new file mode 100644 index 00000000..54153158 --- /dev/null +++ b/test/smart-extractor-bulk-store-edge-cases.test.mjs @@ -0,0 +1,350 @@ +/** + * Issue #666 Additional Edge Case Tests + * + * Tests discovered through adversarial review: + * - bulkStore() method must exist in store.ts + * - Supersede/contradict decisions require 2 lock acquisitions + * - Empty candidates should NOT trigger learnAsNoise() + * - Partial failure handling in batch processing + */ + +import { describe, it, beforeEach } from 'node:test'; +import assert from 'node:assert'; + +// Mock Store with detailed tracking +class MockStore { + constructor() { + this.calls = []; + this.lockCalls = []; + } + + clearCalls() { + this.calls = []; + this.lockCalls = []; + } + + async runWithFileLock(fn) { + const lockId = this.lockCalls.length + 1; + this.lockCalls.push({ id: lockId, acquired: Date.now(), released: null }); + try { + return await fn(); + } finally { + this.lockCalls[this.lockCalls.length - 1].released = Date.now(); + } + } + + // Individual store + async store(entry) { + this.calls.push({ method: 'store', entry, timestamp: Date.now() }); + return this.runWithFileLock(async () => { + return { ...entry, id: 'mock-id-' + Math.random() }; + }); + } + + // Update (also uses lock) + async update(id, updates, scopeFilter) { + this.calls.push({ method: 'update', id, updates, timestamp: Date.now() }); + return this.runWithFileLock(async () => {}); + } + + // Check if bulkStore exists + async bulkStore(entries) { + if (typeof this.bulkStore !== 'function') { + throw new Error('bulkStore not implemented'); + } + this.calls.push({ method: 'bulkStore', entries, timestamp: Date.now() }); + return this.runWithFileLock(async () => { + const now = Date.now(); + return entries.map(e => ({ ...e, id: 'mock-id-' + Math.random(), timestamp: now })); + }); + } + + async vectorSearch() { return []; } + async getById() { return null; } +} + +// ============================================================ +// TEST 1: bulkStore() must exist +// ============================================================ +describe('Prerequisite: bulkStore() must exist in store.ts', () => { + + it('should have bulkStore method on store interface', async () => { + const store = new MockStore(); + + // Verify bulkStore is a function that exists + assert.strictEqual( + typeof store.bulkStore, + 'function', + 'store must have bulkStore() method' + ); + }); + + it('bulkStore should accept array and return array', async () => { + const store = new MockStore(); + + const entries = [ + { text: 'Test 1', vector: [1], scope: 'global' }, + { text: 'Test 2', vector: [2], scope: 'global' }, + ]; + + const results = await store.bulkStore(entries); + + assert.ok(Array.isArray(results), 'Should return array'); + assert.strictEqual(results.length, 2, 'Should return same count'); + }); + + it('bulkStore should use single lock for multiple entries', async () => { + const store = new MockStore(); + + const entries = [ + { text: 'Test 1', vector: [1], scope: 'global' }, + { text: 'Test 2', vector: [2], scope: 'global' }, + { text: 'Test 3', vector: [3], scope: 'global' }, + ]; + + await store.bulkStore(entries); + + const lockCount = store.lockCalls.length; + assert.strictEqual(lockCount, 1, 'Should use only 1 lock for batch'); + }); +}); + +// ============================================================ +// TEST 2: Supersede/Contradict require 2 lock acquisitions +// ============================================================ +describe('Decision Type Lock Analysis', () => { + + it('CREATE decision: 1 store call = 1 lock', async () => { + const store = new MockStore(); + + await store.store({ text: 'New entry', vector: [1], scope: 'global' }); + + const storeCalls = store.calls.filter(c => c.method === 'store'); + const updateCalls = store.calls.filter(c => c.method === 'update'); + + assert.strictEqual(storeCalls.length, 1, '1 store call'); + assert.strictEqual(updateCalls.length, 0, '0 update calls'); + assert.strictEqual(store.lockCalls.length, 1, '1 lock acquisition'); + }); + + it('SUPERSEDE decision: 1 store + 1 update = 2 locks', async () => { + const store = new MockStore(); + + // Simulate supersede: create new + update old + const newEntry = await store.store({ text: 'New superseding entry', vector: [1], scope: 'global' }); + await store.update('old-entry-id', { metadata: '{"superseded_by":"' + newEntry.id + '"}' }); + + const storeCalls = store.calls.filter(c => c.method === 'store'); + const updateCalls = store.calls.filter(c => c.method === 'update'); + + assert.strictEqual(storeCalls.length, 1, '1 store call'); + assert.strictEqual(updateCalls.length, 1, '1 update call'); + assert.strictEqual(store.lockCalls.length, 2, '2 lock acquisitions'); + }); + + it('CONTRADICT decision: 1 update + 1 store = 2 locks', async () => { + const store = new MockStore(); + + // Simulate contradict: update existing + create new + await store.update('existing-id', { metadata: '{"contradicted":true}' }); + await store.store({ text: 'New contradicting entry', vector: [1], scope: 'global' }); + + const storeCalls = store.calls.filter(c => c.method === 'store'); + const updateCalls = store.calls.filter(c => c.method === 'update'); + + assert.strictEqual(storeCalls.length, 1, '1 store call'); + assert.strictEqual(updateCalls.length, 1, '1 update call'); + assert.strictEqual(store.lockCalls.length, 2, '2 lock acquisitions'); + }); + + it('MERGE decision: requires read then write (not easily batchable)', async () => { + const store = new MockStore(); + + // Merge requires: getById() -> update() + const existing = await store.getById('existing-id'); + await store.update('existing-id', { metadata: '{"merged":true}' }); + + // Note: merge is NOT easily batchable because it needs to READ first + const updateCalls = store.calls.filter(c => c.method === 'update'); + assert.strictEqual(updateCalls.length, 1, '1 update call'); + }); +}); + +// ============================================================ +// TEST 3: Edge Cases +// ============================================================ +describe('Edge Cases for Implementation', () => { + + it('should handle very large batch (100 entries)', async () => { + const store = new MockStore(); + + const entries = Array.from({ length: 100 }, (_, i) => ({ + text: `Entry ${i}`, + vector: [i], + scope: 'global', + })); + + await store.bulkStore(entries); + + assert.strictEqual(store.lockCalls.length, 1, 'Should use 1 lock for 100 entries'); + assert.strictEqual(store.calls[0].entries.length, 100, 'Should process 100 entries'); + }); + + it('should handle entries with special characters in text', async () => { + const store = new MockStore(); + + const entries = [ + { text: 'Entry with "quotes" and \n newlines', vector: [1], scope: 'global' }, + { text: 'Entry with unicode: 中文 emoji 🎉', vector: [2], scope: 'global' }, + { text: 'Entry with ', vector: [3], scope: 'global' }, + ]; + + const results = await store.bulkStore(entries); + + assert.strictEqual(results.length, 3, 'Should handle all entries'); + assert.strictEqual(store.lockCalls.length, 1, 'Should use 1 lock'); + }); + + it('should preserve metadata from entries', async () => { + const store = new MockStore(); + + const entries = [ + { + text: 'Entry with metadata', + vector: [1], + scope: 'global', + metadata: '{"custom":"value","nested":{"key":"val"}}', + }, + ]; + + const results = await store.bulkStore(entries); + + assert.strictEqual(results[0].metadata, entries[0].metadata, 'Should preserve metadata'); + }); + + it('should handle mixed scope entries in single batch', async () => { + const store = new MockStore(); + + const entries = [ + { text: 'Global', vector: [1], scope: 'global' }, + { text: 'Agent', vector: [2], scope: 'agent:test' }, + { text: 'Session', vector: [3], scope: 'session:test-session' }, + { text: 'Workspace', vector: [4], scope: 'workspace:test' }, + ]; + + await store.bulkStore(entries); + + assert.strictEqual(store.lockCalls.length, 1, 'Should use 1 lock for mixed scopes'); + }); + + it('should generate unique IDs for each entry', async () => { + const store = new MockStore(); + + const entries = [ + { text: 'Entry 1', vector: [1], scope: 'global' }, + { text: 'Entry 2', vector: [2], scope: 'global' }, + { text: 'Entry 3', vector: [3], scope: 'global' }, + ]; + + const results = await store.bulkStore(entries); + + const ids = results.map(r => r.id); + const uniqueIds = new Set(ids); + assert.strictEqual(uniqueIds.size, 3, 'All IDs should be unique'); + }); + + it('should add timestamp to entries', async () => { + const store = new MockStore(); + + const entries = [ + { text: 'Entry 1', vector: [1], scope: 'global' }, + ]; + + const beforeTime = Date.now(); + const results = await store.bulkStore(entries); + const afterTime = Date.now(); + + assert.ok(results[0].timestamp >= beforeTime, 'Timestamp should be set'); + assert.ok(results[0].timestamp <= afterTime, 'Timestamp should be set'); + }); +}); + +// ============================================================ +// TEST 4: Lock Performance Comparison +// ============================================================ +describe('Lock Performance: Real-world Scenarios', () => { + + it('Scenario: 3 candidates with different decisions', async () => { + const store = new MockStore(); + + // Candidate 1: CREATE (1 lock) + await store.store({ text: 'New profile', vector: [1], scope: 'global' }); + + // Candidate 2: SUPERSEDE (2 locks) + const newEntry = await store.store({ text: 'Superseding preference', vector: [2], scope: 'global' }); + await store.update('old-pref-id', { metadata: '{"superseded_by":"' + newEntry.id + '"}' }); + + // Candidate 3: CREATE (1 lock) + await store.store({ text: 'New case', vector: [3], scope: 'global' }); + + console.log(`\n📊 3 Candidates with mixed decisions:`); + console.log(` Total lock acquisitions: ${store.lockCalls.length}`); + console.log(` Expected: 4 locks (1+2+1)`); + + assert.strictEqual(store.lockCalls.length, 4, 'Should use 4 locks for mixed decisions'); + }); + + it('Scenario: What if all were batched with bulkStore?', async () => { + const store = new MockStore(); + + // All 3 entries in one batch + await store.bulkStore([ + { text: 'New profile', vector: [1], scope: 'global' }, + { text: 'Superseding preference', vector: [2], scope: 'global' }, + { text: 'New case', vector: [3], scope: 'global' }, + ]); + + console.log(`\n📊 3 Entries with bulkStore (only stores, no updates):`); + console.log(` Total lock acquisitions: ${store.lockCalls.length}`); + + // Note: This only helps CREATE decisions + // SUPERSEDE still needs update() which can't be batched + assert.strictEqual(store.lockCalls.length, 1, 'Should use 1 lock for stores'); + }); + + it('Maximum lock reduction: N CREATE decisions', async () => { + const store = new MockStore(); + + // 10 CREATE decisions + const entries = Array.from({ length: 10 }, (_, i) => ({ + text: `New entry ${i}`, + vector: [i], + scope: 'global', + })); + + await store.bulkStore(entries); + + console.log(`\n📊 10 CREATE decisions:`); + console.log(` Current: 10 locks`); + console.log(` With bulkStore: ${store.lockCalls.length} lock`); + console.log(` Reduction: 90%`); + + assert.strictEqual(store.lockCalls.length, 1, 'Should use 1 lock'); + }); + + it('Minimum lock reduction: N SUPERSEDE decisions', async () => { + const store = new MockStore(); + + // 5 SUPERSEDE decisions = 10 locks (each is store + update) + for (let i = 0; i < 5; i++) { + const newEntry = await store.store({ text: `Superseding ${i}`, vector: [i], scope: 'global' }); + await store.update(`old-${i}`, { metadata: '{}' }); + } + + console.log(`\n📊 5 SUPERSEDE decisions:`); + console.log(` Total locks: ${store.lockCalls.length}`); + console.log(` Each supersede = 2 locks (store + update)`); + + assert.strictEqual(store.lockCalls.length, 10, 'Should use 10 locks'); + }); +}); diff --git a/test/smart-extractor-bulk-store.test.mjs b/test/smart-extractor-bulk-store.test.mjs new file mode 100644 index 00000000..cef140c5 --- /dev/null +++ b/test/smart-extractor-bulk-store.test.mjs @@ -0,0 +1,328 @@ +/** + * Test: SmartExtractor bulkStore Integration + * + * Issue #666: SmartExtractor should use bulkStore() to reduce lock acquisitions + * + * Problem: SmartExtractor.extractAndPersist() calls store.store() individually for each candidate + * → N lock acquisitions for N candidates + * + * Solution: Use store.bulkStore() to batch all writes into single lock acquisition + * → 1 lock acquisition for N candidates + */ + +import { describe, it, beforeEach } from 'node:test'; +import assert from 'node:assert'; + +// Mock Store that tracks all calls +class MockStore { + constructor() { + this.calls = []; + this.lockCalls = []; + } + + clearCalls() { + this.calls = []; + this.lockCalls = []; + } + + // Simulate file lock behavior - counts each lock acquisition + async runWithFileLock(fn) { + const lockCall = { acquired: true, released: false, timestamp: Date.now() }; + this.lockCalls.push(lockCall); + + await new Promise(r => setTimeout(r, 1)); + + try { + return await fn(); + } finally { + lockCall.released = true; + } + } + + // Individual store() - CURRENT BEHAVIOR (PROBLEM) + async store(entry) { + this.calls.push({ method: 'store', args: [entry], timestamp: Date.now() }); + await this.runWithFileLock(async () => { + await new Promise(r => setTimeout(r, 5)); + }); + return { ...entry, id: 'mock-id-' + Math.random() }; + } + + // bulkStore() - SOLUTION (batch writes with single lock) + async bulkStore(entries) { + this.calls.push({ method: 'bulkStore', args: [entries], timestamp: Date.now() }); + return this.runWithFileLock(async () => { + await new Promise(r => setTimeout(r, 10)); + return entries.map(e => ({ ...e, id: 'mock-id-' + Math.random() })); + }); + } + + async update(id, updates, scopeFilter) { + this.calls.push({ method: 'update', args: [id, updates], timestamp: Date.now() }); + await this.runWithFileLock(async () => { + await new Promise(r => setTimeout(r, 5)); + }); + } + + async vectorSearch() { return []; } + async getById() { return null; } +} + +// ============================================================ +// TEST 1: Demonstrate the Problem +// ============================================================ +describe('Issue #666: Lock Contention Problem', () => { + + /** + * PROBLEM: Each store() call acquires/releases lock separately + * With 5 candidates, we get 5+ lock operations + */ + it('CURRENT: store() causes N lock acquisitions for N entries', async () => { + const store = new MockStore(); + + // Simulate current SmartExtractor behavior: 5 individual store() calls + const entries = [ + { text: 'Entry 1', vector: [1], scope: 'global' }, + { text: 'Entry 2', vector: [2], scope: 'global' }, + { text: 'Entry 3', vector: [3], scope: 'global' }, + { text: 'Entry 4', vector: [4], scope: 'global' }, + { text: 'Entry 5', vector: [5], scope: 'global' }, + ]; + + store.clearCalls(); + for (const entry of entries) { + await store.store(entry); + } + + const storeCallCount = store.calls.filter(c => c.method === 'store').length; + const lockCount = store.lockCalls.length; + + console.log(`\n📊 PROBLEM (Current Behavior):`); + console.log(` Entries: ${entries.length}`); + console.log(` store.store() calls: ${storeCallCount}`); + console.log(` Lock acquisitions: ${lockCount}`); + console.log(` Ratio: ${lockCount}:1 (each entry = 1 lock)`); + + // PROBLEM: 5 entries = 5 lock acquisitions + assert.strictEqual(storeCallCount, 5, 'Should have 5 store() calls'); + assert.strictEqual(lockCount, 5, 'Should have 5 lock acquisitions'); + }); + + /** + * SOLUTION: bulkStore() batches all entries into single lock acquisition + * With 5 candidates, we get only 1 lock operation + */ + it('FIXED: bulkStore() uses 1 lock for N entries', async () => { + const store = new MockStore(); + + const entries = [ + { text: 'Entry 1', vector: [1], scope: 'global' }, + { text: 'Entry 2', vector: [2], scope: 'global' }, + { text: 'Entry 3', vector: [3], scope: 'global' }, + { text: 'Entry 4', vector: [4], scope: 'global' }, + { text: 'Entry 5', vector: [5], scope: 'global' }, + ]; + + store.clearCalls(); + await store.bulkStore(entries); + + const bulkCallCount = store.calls.filter(c => c.method === 'bulkStore').length; + const entriesPerCall = store.calls[0]?.args[0]?.length || 0; + const lockCount = store.lockCalls.length; + + console.log(`\n📊 SOLUTION (With bulkStore):`); + console.log(` Entries: ${entries.length}`); + console.log(` bulkStore() calls: ${bulkCallCount}`); + console.log(` Entries per batch: ${entriesPerCall}`); + console.log(` Lock acquisitions: ${lockCount}`); + console.log(` Ratio: ${lockCount}:1 (all entries = 1 lock)`); + + // SOLUTION: 5 entries = 1 lock acquisition + assert.strictEqual(bulkCallCount, 1, 'Should have 1 bulkStore call'); + assert.strictEqual(entriesPerCall, 5, 'Should batch all 5 entries'); + assert.strictEqual(lockCount, 1, 'Should have only 1 lock acquisition'); + }); +}); + +// ============================================================ +// TEST 2: Performance Comparison +// ============================================================ +describe('Performance: Lock Reduction', () => { + + it('should achieve 80% lock reduction with bulkStore (5 entries)', async () => { + const store = new MockStore(); + + // Individual approach + store.clearCalls(); + for (let i = 0; i < 5; i++) { + await store.store({ text: `E${i}`, vector: [i], scope: 'global' }); + } + const individualLocks = store.lockCalls.length; + + // Bulk approach + store.clearCalls(); + await store.bulkStore([ + { text: 'E0', vector: [0], scope: 'global' }, + { text: 'E1', vector: [1], scope: 'global' }, + { text: 'E2', vector: [2], scope: 'global' }, + { text: 'E3', vector: [3], scope: 'global' }, + { text: 'E4', vector: [4], scope: 'global' }, + ]); + const bulkLocks = store.lockCalls.length; + + const reduction = ((individualLocks - bulkLocks) / individualLocks * 100).toFixed(0); + + console.log(`\n📊 Lock Reduction (5 entries):`); + console.log(` Individual: ${individualLocks} locks`); + console.log(` Bulk: ${bulkLocks} lock`); + console.log(` Reduction: ${reduction}%`); + + assert.strictEqual(individualLocks, 5, 'Individual uses 5 locks'); + assert.strictEqual(bulkLocks, 1, 'Bulk uses 1 lock'); + assert.ok(individualLocks > bulkLocks, 'Bulk should be more efficient'); + }); + + it('should achieve 90% lock reduction with bulkStore (10 entries)', async () => { + const store = new MockStore(); + + // Individual approach + store.clearCalls(); + for (let i = 0; i < 10; i++) { + await store.store({ text: `E${i}`, vector: [i], scope: 'global' }); + } + const individualLocks = store.lockCalls.length; + + // Bulk approach + store.clearCalls(); + const entries = Array.from({ length: 10 }, (_, i) => ({ + text: `E${i}`, vector: [i], scope: 'global' + })); + await store.bulkStore(entries); + const bulkLocks = store.lockCalls.length; + + const reduction = ((individualLocks - bulkLocks) / individualLocks * 100).toFixed(0); + + console.log(`\n📊 Lock Reduction (10 entries):`); + console.log(` Individual: ${individualLocks} locks`); + console.log(` Bulk: ${bulkLocks} lock`); + console.log(` Reduction: ${reduction}%`); + + assert.strictEqual(individualLocks, 10, 'Individual uses 10 locks'); + assert.strictEqual(bulkLocks, 1, 'Bulk uses 1 lock'); + }); +}); + +// ============================================================ +// TEST 3: Edge Cases +// ============================================================ +describe('Edge Cases', () => { + + it('should handle empty entries array', async () => { + const store = new MockStore(); + + store.clearCalls(); + const results = await store.bulkStore([]); + + assert.deepStrictEqual(results, [], 'Should return empty array'); + assert.strictEqual(store.lockCalls.length, 1, 'Should still use 1 lock'); + }); + + it('should handle single entry batch', async () => { + const store = new MockStore(); + + store.clearCalls(); + const results = await store.bulkStore([ + { text: 'Single', vector: [1], scope: 'global' }, + ]); + + assert.strictEqual(results.length, 1, 'Should return 1 entry'); + assert.strictEqual(store.lockCalls.length, 1, 'Should use 1 lock'); + }); + + it('should preserve entry order in results', async () => { + const store = new MockStore(); + + const entries = [ + { text: 'First', vector: [1], scope: 'global' }, + { text: 'Second', vector: [2], scope: 'global' }, + { text: 'Third', vector: [3], scope: 'global' }, + ]; + + const results = await store.bulkStore(entries); + + assert.strictEqual(results.length, 3, 'Should return 3 entries'); + assert.strictEqual(results[0].text, 'First', 'Should preserve order'); + assert.strictEqual(results[1].text, 'Second', 'Should preserve order'); + assert.strictEqual(results[2].text, 'Third', 'Should preserve order'); + }); + + it('should handle entries with different scopes', async () => { + const store = new MockStore(); + + const entries = [ + { text: 'Global', vector: [1], scope: 'global' }, + { text: 'Agent', vector: [2], scope: 'agent:abc' }, + { text: 'Session', vector: [3], scope: 'session:xyz' }, + ]; + + const results = await store.bulkStore(entries); + + assert.strictEqual(results.length, 3, 'Should handle different scopes'); + assert.strictEqual(store.lockCalls.length, 1, 'Should use single lock'); + }); +}); + +// ============================================================ +// TEST 4: Integration Scenario +// ============================================================ +describe('Integration: SmartExtractor Scenario', () => { + + /** + * This simulates what SmartExtractor does when extracting memories: + * - LLM returns N candidates + * - Each candidate requires a store() call + * - With bulkStore, all candidates batched into 1 lock + */ + it('should batch all LLM candidates into single lock', async () => { + const store = new MockStore(); + + // Simulate LLM extracting 4 candidates + const llmCandidates = [ + { category: 'profile', abstract: 'User likes coffee', content: '' }, + { category: 'preference', abstract: 'Prefers dark theme', content: '' }, + { category: 'case', abstract: 'Working on issue #666', content: '' }, + { category: 'entity', abstract: 'Project is memory-lancedb-pro', content: '' }, + ]; + + // CURRENT BEHAVIOR: 4 individual store() calls = 4 locks + store.clearCalls(); + for (const candidate of llmCandidates) { + await store.store({ + text: candidate.abstract, + vector: [Math.random()], + scope: 'global', + category: candidate.category, + }); + } + const currentLockCount = store.lockCalls.length; + + // FIXED BEHAVIOR: 1 bulkStore() call = 1 lock + store.clearCalls(); + const entriesToStore = llmCandidates.map(c => ({ + text: c.abstract, + vector: [Math.random()], + scope: 'global', + category: c.category, + })); + await store.bulkStore(entriesToStore); + const fixedLockCount = store.lockCalls.length; + + console.log(`\n📊 SmartExtractor Scenario (4 candidates):`); + console.log(` Current (individual): ${currentLockCount} locks`); + console.log(` Fixed (bulkStore): ${fixedLockCount} lock`); + console.log(` Improvement: ${currentLockCount / fixedLockCount}x fewer locks`); + + assert.strictEqual(currentLockCount, 4, 'Current uses 4 locks'); + assert.strictEqual(fixedLockCount, 1, 'Fixed uses 1 lock'); + }); +});