Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@
]
},
"optionalDependencies": {
"@lancedb/lancedb-darwin-x64": "^0.26.2",
"@lancedb/lancedb-darwin-arm64": "^0.26.2",
"@lancedb/lancedb-linux-x64-gnu": "^0.26.2",
"@lancedb/lancedb-darwin-x64": "^0.26.2",
"@lancedb/lancedb-linux-arm64-gnu": "^0.26.2",
"@lancedb/lancedb-linux-x64-gnu": "^0.26.2",
"@lancedb/lancedb-win32-x64-msvc": "^0.26.2"
},
"devDependencies": {
"commander": "^14.0.0",
"jiti": "^2.6.0",
"jiti": "^2.6.1",
"typescript": "^5.9.3"
}
}
5 changes: 5 additions & 0 deletions scripts/ci-test-manifest.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ export const CI_TEST_MANIFEST = [
{ group: "core-regression", runner: "node", file: "test/embedder-cache.test.mjs" },
// Issue #629 batch embedding fix
{ group: "llm-clients-and-auth", runner: "node", file: "test/embedder-ollama-batch-routing.test.mjs" },
// Issue #665 bulkStore tests
{ group: "storage-and-schema", runner: "node", file: "test/bulk-store.test.mjs", args: ["--test"] },
{ group: "storage-and-schema", runner: "node", file: "test/bulk-store-edge-cases.test.mjs", args: ["--test"] },
{ group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store.test.mjs", args: ["--test"] },
{ group: "storage-and-schema", runner: "node", file: "test/smart-extractor-bulk-store-edge-cases.test.mjs", args: ["--test"] },
];

export function getEntriesForGroup(group) {
Expand Down
99 changes: 72 additions & 27 deletions src/smart-extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ import { classifyTemporal, inferExpiry } from "./temporal-classifier.js";
import { inferAtomicBrandItemPreferenceSlot } from "./preference-slots.js";
import { batchDedup } from "./batch-dedup.js";

type StoreEntry = Omit<import("./store.js").MemoryEntry, "id" | "timestamp">;

// ============================================================================
// Envelope Metadata Stripping
// ============================================================================
Expand Down Expand Up @@ -417,6 +419,8 @@ export class SmartExtractor {
}
}

const createEntries: Omit<import("./store.js").MemoryEntry, "id" | "timestamp">[] = [];

for (const { index, candidate } of processableCandidates) {
try {
await this.processCandidate(
Expand All @@ -427,6 +431,7 @@ export class SmartExtractor {
targetScope,
scopeFilter,
precomputedVectors.get(index),
createEntries,
);
} catch (err) {
this.log(
Expand All @@ -435,6 +440,10 @@ export class SmartExtractor {
}
}

if (createEntries.length > 0) {
await this.store.bulkStore(createEntries);
}

return stats;
}

Expand Down Expand Up @@ -653,6 +662,7 @@ export class SmartExtractor {
targetScope: string,
scopeFilter?: string[],
precomputedVector?: number[],
createEntries?: Omit<import("./store.js").MemoryEntry, "id" | "timestamp">[],
): Promise<void> {
// Profile always merges (skip dedup — admission control still applies)
if (ALWAYS_MERGE_CATEGORIES.has(candidate.category)) {
Expand All @@ -662,6 +672,8 @@ export class SmartExtractor {
sessionKey,
targetScope,
scopeFilter,
undefined,
createEntries,
);
if (profileResult === "rejected") {
stats.rejected = (stats.rejected ?? 0) + 1;
Expand All @@ -678,7 +690,7 @@ export class SmartExtractor {
const vector = precomputedVector ?? await this.embedder.embed(`${candidate.abstract} ${candidate.content}`);
if (!vector || vector.length === 0) {
this.log("memory-pro: smart-extractor: embedding failed, storing as-is");
await this.storeCandidate(candidate, vector || [], sessionKey, targetScope);
createEntries?.push(this.buildStoreEntry(candidate, vector || [], sessionKey, targetScope));
stats.created++;
return;
}
Expand Down Expand Up @@ -714,7 +726,7 @@ export class SmartExtractor {

switch (dedupResult.decision) {
case "create":
await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit);
createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit));
stats.created++;
break;

Expand All @@ -730,11 +742,12 @@ export class SmartExtractor {
scopeFilter,
dedupResult.contextLabel,
admission?.audit,
createEntries,
);
stats.merged++;
} else {
// Category doesn't support merge → create instead
await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit);
createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit));
stats.created++;
}
break;
Expand All @@ -759,11 +772,12 @@ export class SmartExtractor {
targetScope,
scopeFilter,
admission?.audit,
createEntries,
);
stats.created++;
stats.superseded = (stats.superseded ?? 0) + 1;
} else {
await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit);
createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit));
stats.created++;
}
break;
Expand All @@ -773,17 +787,17 @@ export class SmartExtractor {
await this.handleSupport(dedupResult.matchId, { session: sessionKey, timestamp: Date.now() }, dedupResult.reason, dedupResult.contextLabel, scopeFilter, admission?.audit);
stats.supported = (stats.supported ?? 0) + 1;
} else {
await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit);
createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit));
stats.created++;
}
break;

case "contextualize":
if (dedupResult.matchId) {
await this.handleContextualize(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel, admission?.audit);
await this.handleContextualize(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel, admission?.audit, createEntries);
stats.created++;
} else {
await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit);
createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit));
stats.created++;
}
break;
Expand All @@ -802,15 +816,16 @@ export class SmartExtractor {
targetScope,
scopeFilter,
admission?.audit,
createEntries,
);
stats.created++;
stats.superseded = (stats.superseded ?? 0) + 1;
} else {
await this.handleContradict(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel, admission?.audit);
await this.handleContradict(candidate, vector, dedupResult.matchId, sessionKey, targetScope, scopeFilter, dedupResult.contextLabel, admission?.audit, createEntries);
stats.created++;
}
} else {
await this.storeCandidate(candidate, vector, sessionKey, targetScope, admission?.audit);
createEntries?.push(this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admission?.audit));
stats.created++;
}
break;
Expand Down Expand Up @@ -964,6 +979,7 @@ export class SmartExtractor {
targetScope: string,
scopeFilter?: string[],
admissionAudit?: AdmissionAuditRecord,
createEntries?: StoreEntry[],
): Promise<"merged" | "created" | "rejected"> {
// Find existing profile memory by category
const embeddingText = `${candidate.abstract} ${candidate.content}`;
Expand Down Expand Up @@ -1011,11 +1027,12 @@ export class SmartExtractor {
scopeFilter,
undefined,
admissionAudit,
createEntries,
);
return "merged";
} else {
// No existing profile — create new
await this.storeCandidate(candidate, vector || [], sessionKey, targetScope, admissionAudit);
createEntries?.push(this.buildStoreEntry(candidate, vector || [], sessionKey, targetScope, admissionAudit));
return "created";
}
}
Expand All @@ -1030,6 +1047,7 @@ export class SmartExtractor {
scopeFilter?: string[],
contextLabel?: string,
admissionAudit?: AdmissionAuditRecord,
createEntries?: StoreEntry[],
): Promise<void> {
let existingAbstract = "";
let existingOverview = "";
Expand All @@ -1051,12 +1069,12 @@ export class SmartExtractor {
const vector = await this.embedder.embed(
`${candidate.abstract} ${candidate.content}`,
);
await this.storeCandidate(
createEntries?.push(this.buildStoreEntry(
candidate,
vector || [],
"merge-fallback",
targetScope,
);
));
return;
}

Expand Down Expand Up @@ -1141,12 +1159,13 @@ export class SmartExtractor {
matchId: string,
sessionKey: string,
targetScope: string,
scopeFilter: string[],
scopeFilter?: string[],
admissionAudit?: AdmissionAuditRecord,
createEntries?: StoreEntry[],
): Promise<void> {
const existing = await this.store.getById(matchId, scopeFilter);
if (!existing) {
await this.storeCandidate(candidate, vector, sessionKey, targetScope);
createEntries?.push(this.buildStoreEntry(candidate, vector || [], sessionKey, targetScope));
return;
}

Expand Down Expand Up @@ -1265,6 +1284,7 @@ export class SmartExtractor {
scopeFilter?: string[],
contextLabel?: string,
admissionAudit?: AdmissionAuditRecord,
createEntries?: StoreEntry[],
): Promise<void> {
const storeCategory = this.mapToStoreCategory(candidate.category);
const metadata = stringifySmartMetadata(this.withAdmissionAudit({
Expand All @@ -1287,14 +1307,19 @@ export class SmartExtractor {
relations: [{ type: "contextualizes", targetId: matchId }],
}, admissionAudit));

await this.store.store({
const entry_c: StoreEntry = {
text: candidate.abstract,
vector,
category: storeCategory,
scope: targetScope,
importance: this.getDefaultImportance(candidate.category),
metadata,
});
};
if (createEntries) {
createEntries.push(entry_c);
} else {
await this.store.store(entry_c);
}

this.log(
`memory-pro: smart-extractor: contextualize [${contextLabel || "general"}] new entry linked to ${matchId.slice(0, 8)}`,
Expand All @@ -1314,6 +1339,7 @@ export class SmartExtractor {
scopeFilter?: string[],
contextLabel?: string,
admissionAudit?: AdmissionAuditRecord,
createEntries?: StoreEntry[],
): Promise<void> {
// 1. Record contradiction on the existing memory
const existing = await this.store.getById(matchId, scopeFilter);
Expand Down Expand Up @@ -1351,14 +1377,19 @@ export class SmartExtractor {
relations: [{ type: "contradicts", targetId: matchId }],
}, admissionAudit));

await this.store.store({
const entry_d: StoreEntry = {
text: candidate.abstract,
vector,
category: storeCategory,
scope: targetScope,
importance: this.getDefaultImportance(candidate.category),
metadata,
});
};
if (createEntries) {
createEntries.push(entry_d);
} else {
await this.store.store(entry_d);
}

this.log(
`memory-pro: smart-extractor: contradict [${contextLabel || "general"}] on ${matchId.slice(0, 8)}, new entry created`,
Expand All @@ -1370,24 +1401,23 @@ export class SmartExtractor {
// --------------------------------------------------------------------------

/**
* Store a candidate memory as a new entry with L0/L1/L2 metadata.
* Build a memory entry from candidate data (without writing).
* Used by batch creation to reduce lock acquisitions.
*/
private async storeCandidate(
private buildStoreEntry(
candidate: CandidateMemory,
vector: number[],
sessionKey: string,
targetScope: string,
admissionAudit?: AdmissionAuditRecord,
): Promise<void> {
// Map 6-category to existing store categories for backward compatibility
): Omit<import("./store.js").MemoryEntry, "id" | "timestamp"> {
const storeCategory = this.mapToStoreCategory(candidate.category);

const classifyText = candidate.content || candidate.abstract;
const metadata = stringifySmartMetadata(
buildSmartMetadata(
{
text: candidate.abstract,
category: this.mapToStoreCategory(candidate.category),
category: storeCategory,
},
{
l0_abstract: candidate.abstract,
Expand All @@ -1406,18 +1436,33 @@ export class SmartExtractor {
suppressed_until_turn: 0,
memory_temporal_type: classifyTemporal(classifyText),
valid_until: inferExpiry(classifyText),
...(admissionAudit ? { admission_audit: JSON.stringify(admissionAudit) } : {}),
},
),
);

await this.store.store({
text: candidate.abstract, // L0 used as the searchable text
return {
text: candidate.abstract,
vector,
category: storeCategory,
scope: targetScope,
importance: this.getDefaultImportance(candidate.category),
metadata,
});
};
}

/**
* Store a candidate memory as a new entry with L0/L1/L2 metadata.
*/
private async storeCandidate(
candidate: CandidateMemory,
vector: number[],
sessionKey: string,
targetScope: string,
admissionAudit?: AdmissionAuditRecord,
): Promise<void> {
const entry = this.buildStoreEntry(candidate, vector, sessionKey, targetScope, admissionAudit);
await this.store.store(entry);

this.log(
`memory-pro: smart-extractor: created [${candidate.category}] ${candidate.abstract.slice(0, 60)}`,
Expand Down
Loading
Loading