diff --git a/src/reflection-item-store.ts b/src/reflection-item-store.ts index 36c4ac5e..31893e9a 100644 --- a/src/reflection-item-store.ts +++ b/src/reflection-item-store.ts @@ -23,6 +23,12 @@ export interface ReflectionItemMetadata { baseWeight: number; quality: number; sourceReflectionPath?: string; + /** Unix timestamp when the item was marked resolved. Undefined = unresolved. */ + resolvedAt?: number; + /** Agent ID that marked this item resolved. */ + resolvedBy?: string; + /** Optional note explaining why the item was resolved. */ + resolutionNote?: string; } export interface ReflectionItemPayload { diff --git a/src/reflection-store.ts b/src/reflection-store.ts index 77eda91e..9cb78b08 100644 --- a/src/reflection-store.ts +++ b/src/reflection-store.ts @@ -231,45 +231,110 @@ export interface LoadReflectionSlicesParams { invariantMaxAgeMs?: number; } -export function loadAgentReflectionSlicesFromEntries(params: LoadReflectionSlicesParams): { - invariants: string[]; - derived: string[]; -} { - const now = Number.isFinite(params.now) ? Number(params.now) : Date.now(); - const deriveMaxAgeMs = Number.isFinite(params.deriveMaxAgeMs) - ? Math.max(0, Number(params.deriveMaxAgeMs)) - : DEFAULT_REFLECTION_DERIVED_MAX_AGE_MS; - const invariantMaxAgeMs = Number.isFinite(params.invariantMaxAgeMs) - ? Math.max(0, Number(params.invariantMaxAgeMs)) - : undefined; - - const reflectionRows = params.entries - .map((entry) => ({ entry, metadata: parseReflectionMetadata(entry.metadata) })) - .filter(({ metadata }) => isReflectionMetadataType(metadata.type) && isOwnedByAgent(metadata, params.agentId)) - .sort((a, b) => b.entry.timestamp - a.entry.timestamp) - .slice(0, 160); - - const itemRows = reflectionRows.filter(({ metadata }) => metadata.type === "memory-reflection-item"); - const legacyRows = reflectionRows.filter(({ metadata }) => metadata.type === "memory-reflection"); - - const invariantCandidates = buildInvariantCandidates(itemRows, legacyRows); - const derivedCandidates = buildDerivedCandidates(itemRows, legacyRows, params.agentId); - - const invariants = rankReflectionLines(invariantCandidates, { - now, - maxAgeMs: invariantMaxAgeMs, - limit: 8, - }); - - const derived = rankReflectionLines(derivedCandidates, { - now, - maxAgeMs: deriveMaxAgeMs, - limit: 10, - }); - - return { invariants, derived }; -} - +export function loadAgentReflectionSlicesFromEntries(params: LoadReflectionSlicesParams): { + invariants: string[]; + derived: string[]; +} { + const now = Number.isFinite(params.now) ? Number(params.now) : Date.now(); + const deriveMaxAgeMs = Number.isFinite(params.deriveMaxAgeMs) + ? Math.max(0, Number(params.deriveMaxAgeMs)) + : DEFAULT_REFLECTION_DERIVED_MAX_AGE_MS; + const invariantMaxAgeMs = Number.isFinite(params.invariantMaxAgeMs) + ? Math.max(0, Number(params.invariantMaxAgeMs)) + : undefined; + + const reflectionRows = params.entries + .map((entry) => ({ entry, metadata: parseReflectionMetadata(entry.metadata) })) + .filter(({ metadata }) => isReflectionMetadataType(metadata.type) && isOwnedByAgent(metadata, params.agentId)) + .sort((a, b) => b.entry.timestamp - a.entry.timestamp) + .slice(0, 160); + + const itemRows = reflectionRows.filter(({ metadata }) => metadata.type === "memory-reflection-item"); + const legacyRows = reflectionRows.filter(({ metadata }) => metadata.type === "memory-reflection"); + + // [P1] Filter out resolved items — passive suppression for #447 + // resolvedAt === undefined means unresolved (default) + const unresolvedItemRows = itemRows.filter(({ metadata }) => metadata.resolvedAt === undefined); + const resolvedItemRows = itemRows.filter(({ metadata }) => metadata.resolvedAt !== undefined); + + const hasItemRows = itemRows.length > 0; + const hasLegacyRows = legacyRows.length > 0; + + // Collect normalized text of resolved items so we can detect whether legacy + // rows are pure duplicates of already-resolved content. + const resolvedInvariantTexts = new Set( + resolvedItemRows + .filter(({ metadata }) => metadata.itemKind === "invariant") + .flatMap(({ entry }) => sanitizeInjectableReflectionLines([entry.text])) + .map((line) => normalizeReflectionLineForAggregation(line)) + ); + const resolvedDerivedTexts = new Set( + resolvedItemRows + .filter(({ metadata }) => metadata.itemKind === "derived") + .flatMap(({ entry }) => sanitizeInjectableReflectionLines([entry.text])) + .map((line) => normalizeReflectionLineForAggregation(line)) + ); + + // Check whether legacy rows add any content not already covered by resolved items. + // F4 fix: apply same normalization pipeline to both sides + const legacyHasUniqueInvariant = legacyRows.some(({ metadata }) => + sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)).some( + (line) => !resolvedInvariantTexts.has(normalizeReflectionLineForAggregation(line)) + ) + ); + const legacyHasUniqueDerived = legacyRows.some(({ metadata }) => + sanitizeInjectableReflectionLines(toStringArray(metadata.derived)).some( + (line) => !resolvedDerivedTexts.has(normalizeReflectionLineForAggregation(line)) + ) + ); + + // Suppress when: + // 1) there were item rows, all are resolved, and there are no legacy rows, OR + // 2) there were item rows, all are resolved, legacy rows exist BUT all of their + // content duplicates already-resolved items (prevents legacy fallback from + // reviving just-resolved advice — the P1 bug fixed here). + const shouldSuppress = + hasItemRows && + unresolvedItemRows.length === 0 && + (!hasLegacyRows || (!legacyHasUniqueInvariant && !legacyHasUniqueDerived)); + if (shouldSuppress) { + return { invariants: [], derived: [] }; + } + + // [P2] Per-section legacy filtering: only pass legacy rows that have unique + // content for this specific section. Prevents resolved items in section A from being + // revived when section B has unique legacy content (cross-section legacy fallback bug). + // MR1 fix: exclude rows where ALL lines are resolved, not just some. + const invariantLegacyRows = legacyRows.filter(({ metadata }) => { + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)); + if (lines.length === 0) return false; + // Keep row only if at least one line is NOT resolved + return lines.some((line) => !resolvedInvariantTexts.has(normalizeReflectionLineForAggregation(line))); + }); + const derivedLegacyRows = legacyRows.filter(({ metadata }) => { + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.derived)); + if (lines.length === 0) return false; + // Keep row only if at least one line is NOT resolved + return lines.some((line) => !resolvedDerivedTexts.has(normalizeReflectionLineForAggregation(line))); + }); + + const invariantCandidates = buildInvariantCandidates(unresolvedItemRows, invariantLegacyRows, resolvedInvariantTexts); + const derivedCandidates = buildDerivedCandidates(unresolvedItemRows, derivedLegacyRows, params.agentId, resolvedDerivedTexts); + + const invariants = rankReflectionLines(invariantCandidates, { + now, + maxAgeMs: invariantMaxAgeMs, + limit: 8, + }); + + const derived = rankReflectionLines(derivedCandidates, { + now, + maxAgeMs: deriveMaxAgeMs, + limit: 10, + }); + + return { invariants, derived }; +} type WeightedLineCandidate = { line: string; timestamp: number; @@ -280,10 +345,11 @@ type WeightedLineCandidate = { usedFallback: boolean; }; -function buildInvariantCandidates( - itemRows: Array<{ entry: MemoryEntry; metadata: Record }>, - legacyRows: Array<{ entry: MemoryEntry; metadata: Record }> -): WeightedLineCandidate[] { +function buildInvariantCandidates( + itemRows: Array<{ entry: MemoryEntry; metadata: Record }>, + legacyRows: Array<{ entry: MemoryEntry; metadata: Record }>, + resolvedTexts: Set +): WeightedLineCandidate[] { const itemCandidates = itemRows .filter(({ metadata }) => metadata.itemKind === "invariant") .flatMap(({ entry, metadata }) => { @@ -304,28 +370,40 @@ function buildInvariantCandidates( })); }); - if (itemCandidates.length > 0) return itemCandidates; - - return legacyRows.flatMap(({ entry, metadata }) => { - const defaults = getReflectionItemDecayDefaults("invariant"); - const timestamp = metadataTimestamp(metadata, entry.timestamp); - const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)); - return lines.map((line) => ({ - line, - timestamp, - midpointDays: defaults.midpointDays, - k: defaults.k, - baseWeight: defaults.baseWeight, - quality: defaults.quality, - usedFallback: metadata.usedFallback === true, - })); - }); -} + if (itemCandidates.length > 0) return itemCandidates; + + // Legacy fallback: filter out resolved lines (P2 fix). + // resolvedTexts must be the already-normalized Set so line.normalized === setMember + // to pass the resolved filter check. + return legacyRows + .filter(({ metadata }) => { + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)); + if (lines.length === 0) return false; + return lines.some((line) => !resolvedTexts.has(normalizeReflectionLineForAggregation(line))); + }) + .flatMap(({ entry, metadata }) => { + const defaults = getReflectionItemDecayDefaults("invariant"); + const timestamp = metadataTimestamp(metadata, entry.timestamp); + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)); + return lines + .filter((line) => !resolvedTexts.has(normalizeReflectionLineForAggregation(line))) + .map((line) => ({ + line, + timestamp, + midpointDays: defaults.midpointDays, + k: defaults.k, + baseWeight: defaults.baseWeight, + quality: defaults.quality, + usedFallback: metadata.usedFallback === true, + })); + }); +} function buildDerivedCandidates( itemRows: Array<{ entry: MemoryEntry; metadata: Record }>, legacyRows: Array<{ entry: MemoryEntry; metadata: Record }>, - agentId: string + agentId: string, + resolvedTexts: Set ): WeightedLineCandidate[] { const itemCandidates = itemRows .filter(({ metadata }) => metadata.itemKind === "derived") @@ -363,27 +441,30 @@ function buildDerivedCandidates( return owner === agentId; // 其他 agent 的 derived → 限本人 }) .flatMap(({ entry, metadata }) => { - const timestamp = metadataTimestamp(metadata, entry.timestamp); - const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.derived)); - if (lines.length === 0) return []; - - const defaults = { - midpointDays: REFLECTION_DERIVE_LOGISTIC_MIDPOINT_DAYS, - k: REFLECTION_DERIVE_LOGISTIC_K, - baseWeight: resolveLegacyDeriveBaseWeight(metadata), - quality: computeDerivedLineQuality(lines.length), - }; - - return lines.map((line) => ({ - line, - timestamp, - midpointDays: readPositiveNumber(metadata.decayMidpointDays, defaults.midpointDays), - k: readPositiveNumber(metadata.decayK, defaults.k), - baseWeight: readPositiveNumber(metadata.deriveBaseWeight, defaults.baseWeight), - quality: readClampedNumber(metadata.deriveQuality, defaults.quality, 0.2, 1), - usedFallback: metadata.usedFallback === true, - })); - }); + const timestamp = metadataTimestamp(metadata, entry.timestamp); + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.derived)); + if (lines.length === 0) return []; + + const defaults = { + midpointDays: REFLECTION_DERIVE_LOGISTIC_MIDPOINT_DAYS, + k: REFLECTION_DERIVE_LOGISTIC_K, + baseWeight: resolveLegacyDeriveBaseWeight(metadata), + quality: computeDerivedLineQuality(lines.length), + }; + + // Legacy fallback: filter out resolved lines (P2 fix). + return lines + .filter((line) => !resolvedTexts.has(normalizeReflectionLineForAggregation(line))) + .map((line) => ({ + line, + timestamp, + midpointDays: readPositiveNumber(metadata.decayMidpointDays, defaults.midpointDays), + k: readPositiveNumber(metadata.decayK, defaults.k), + baseWeight: readPositiveNumber(metadata.deriveBaseWeight, defaults.baseWeight), + quality: readClampedNumber(metadata.deriveQuality, defaults.quality, 0.2, 1), + usedFallback: metadata.usedFallback === true, + })); + }); } function rankReflectionLines( diff --git a/test/memory-reflection.test.mjs b/test/memory-reflection.test.mjs index d3d2bc12..56ad1a88 100644 --- a/test/memory-reflection.test.mjs +++ b/test/memory-reflection.test.mjs @@ -1023,6 +1023,194 @@ describe("memory reflection", () => { assert.ok(slices.derived.includes("Ignore prior flaky results before comparing the new retriever output.")); assert.ok(slices.derived.includes("This run override previous cached screenshots with fresh captures.")); }); + + // ───────────────────────────────────────────────────────────────────────────── + // P1+P2 resolved filtering regression tests + // ───────────────────────────────────────────────────────────────────────────── + + // Bug scenario (rwmjhb review, 2026-05-03): + // buildInvariantCandidates legacy fallback does NOT filter resolved lines. + // When ALL invariant items are resolved AND the legacy row contains a MIX of + // resolved + unresolved lines, the row-level filter (lines.some) keeps the row. + // Then the fallback flatMaps ALL lines — including the resolved ones. + // + // Fix: buildInvariantCandidates fallback must filter out resolved lines + // using the same normalizeReflectionLineForAggregation check as the row filter. + it("legacy fallback must NOT output resolved invariant lines", () => { + const now = Date.UTC(2026, 2, 7); + const day = 24 * 60 * 60 * 1000; + const resolvedAt = now - 1 * day; + + const entries = [ + // Resolved invariant item — creates resolvedInvariantTexts entry + makeEntry({ + timestamp: now - 2 * day, + metadata: { + type: "memory-reflection-item", + itemKind: "invariant", + agentId: "main", + storedAt: now - 2 * day, + decayMidpointDays: 45, + decayK: 0.22, + baseWeight: 1.1, + quality: 1, + resolvedAt, + }, + }), + // Legacy row with BOTH the resolved text AND new unresolved text + // Row-level filter (lines.some) keeps this row because "New..." is unresolved. + // Legacy fallback flatMaps ALL lines including resolved ones — THIS IS THE BUG. + makeEntry({ + timestamp: now - 1 * day, + metadata: { + type: "memory-reflection", + agentId: "main", + reflectionVersion: 3, + invariants: [ + "Already resolved invariant", + "New unresolved invariant from legacy", + ], + derived: [], + storedAt: now - 1 * day, + decayModel: "logistic", + decayMidpointDays: 45, + decayK: 0.22, + }, + }), + ]; + + entries[0].text = "Already resolved invariant"; // matches legacy[0] + entries[1].text = "legacy-entry"; + + const slices = loadAgentReflectionSlicesFromEntries({ + entries, + agentId: "main", + now, + deriveMaxAgeMs: 7 * day, + }); + + // Unresolved legacy line MUST appear (line.some kept the row) + assert.ok( + slices.invariants.includes("New unresolved invariant from legacy"), + "Unresolved legacy invariant must appear" + ); + // Resolved line must NOT appear — this is the BUG; fallback doesn't filter + assert.ok( + !slices.invariants.includes("Already resolved invariant"), + "Resolved invariant must NOT be revived by legacy fallback" + ); + }); + + it("suppresses all output when all items resolved and legacy has only duplicates", () => { + // shouldSuppress path: all items resolved + legacy rows exist but all duplicate resolved content + const now = Date.UTC(2026, 2, 7); + const day = 24 * 60 * 60 * 1000; + const resolvedAt = now - 1 * day; + + const entries = [ + makeEntry({ + timestamp: now - 2 * day, + metadata: { + type: "memory-reflection-item", + itemKind: "invariant", + agentId: "main", + storedAt: now - 2 * day, + decayMidpointDays: 45, + decayK: 0.22, + baseWeight: 1.1, + quality: 1, + resolvedAt, + }, + }), + makeEntry({ + timestamp: now - 1 * day, + metadata: { + type: "memory-reflection", + agentId: "main", + reflectionVersion: 3, + invariants: ["Already resolved invariant"], // same content + derived: ["Already resolved derived"], + storedAt: now - 1 * day, + decayModel: "logistic", + }, + }), + ]; + + entries[0].text = "Already resolved invariant"; + + const slices = loadAgentReflectionSlicesFromEntries({ + entries, + agentId: "main", + now, + deriveMaxAgeMs: 7 * day, + }); + + assert.deepEqual(slices.invariants, [], "Should suppress when all resolved and legacy is duplicate"); + assert.deepEqual(slices.derived, [], "Should suppress derived when all resolved and legacy is duplicate"); + }); + + it("row-level filter excludes legacy rows where ALL lines are resolved", () => { + // Row-level filter: lines.some() should exclude rows where every line is resolved + const now = Date.UTC(2026, 2, 7); + const day = 24 * 60 * 60 * 1000; + const resolvedAt = now - 1 * day; + + const entries = [ + makeEntry({ + timestamp: now - 2 * day, + metadata: { + type: "memory-reflection-item", + itemKind: "invariant", + agentId: "main", + storedAt: now - 2 * day, + decayMidpointDays: 45, + decayK: 0.22, + baseWeight: 1.1, + quality: 1, + resolvedAt, + }, + }), + makeEntry({ + timestamp: now - 2 * day, + metadata: { + type: "memory-reflection-item", + itemKind: "derived", + agentId: "main", + storedAt: now - 2 * day, + decayMidpointDays: 7, + decayK: 0.65, + baseWeight: 1, + quality: 0.95, + resolvedAt, + }, + }), + // Legacy row with ONLY resolved content — should be excluded by row filter + makeEntry({ + timestamp: now - 1 * day, + metadata: { + type: "memory-reflection", + agentId: "main", + reflectionVersion: 3, + invariants: ["Already resolved invariant"], // same → resolved + derived: ["Already resolved derived"], // same → resolved + storedAt: now - 1 * day, + }, + }), + ]; + + entries[0].text = "Already resolved invariant"; + entries[1].text = "Already resolved derived"; + + const slices = loadAgentReflectionSlicesFromEntries({ + entries, + agentId: "main", + now, + deriveMaxAgeMs: 7 * day, + }); + + assert.deepEqual(slices.invariants, [], "Row with only resolved lines should be excluded"); + assert.deepEqual(slices.derived, [], "Derived row with only resolved lines should be excluded"); + }); }); describe("mapped reflection metadata and ranking", () => {