From 2f5a32201dfb680b9baa1e6ade776c58e508dc91 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Mon, 13 Apr 2026 02:20:17 +0800 Subject: [PATCH 1/3] fix(reflection): per-section legacy filtering prevents resolved items from being revived (P1+P2) - Add resolvedAt/resolvedBy/resolutionNote metadata to ReflectionItemMetadata - Filter out resolved items from reflection loading (P1 fix) - Per-section legacy filtering to prevent cross-section revival (P2 fix) --- src/reflection-item-store.ts | 6 ++++ src/reflection-store.ts | 66 ++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/reflection-item-store.ts b/src/reflection-item-store.ts index 36c4ac5e..31893e9a 100644 --- a/src/reflection-item-store.ts +++ b/src/reflection-item-store.ts @@ -23,6 +23,12 @@ export interface ReflectionItemMetadata { baseWeight: number; quality: number; sourceReflectionPath?: string; + /** Unix timestamp when the item was marked resolved. Undefined = unresolved. */ + resolvedAt?: number; + /** Agent ID that marked this item resolved. */ + resolvedBy?: string; + /** Optional note explaining why the item was resolved. */ + resolutionNote?: string; } export interface ReflectionItemPayload { diff --git a/src/reflection-store.ts b/src/reflection-store.ts index 38da5ce7..bf96691e 100644 --- a/src/reflection-store.ts +++ b/src/reflection-store.ts @@ -252,8 +252,70 @@ export function loadAgentReflectionSlicesFromEntries(params: LoadReflectionSlice const itemRows = reflectionRows.filter(({ metadata }) => metadata.type === "memory-reflection-item"); const legacyRows = reflectionRows.filter(({ metadata }) => metadata.type === "memory-reflection"); - const invariantCandidates = buildInvariantCandidates(itemRows, legacyRows); - const derivedCandidates = buildDerivedCandidates(itemRows, legacyRows); + // [P1] Filter out resolved items — passive suppression for #447 + // resolvedAt === undefined means unresolved (default) + const unresolvedItemRows = itemRows.filter(({ metadata }) => metadata.resolvedAt === undefined); + const resolvedItemRows = itemRows.filter(({ metadata }) => metadata.resolvedAt !== undefined); + + const hasItemRows = itemRows.length > 0; + const hasLegacyRows = legacyRows.length > 0; + + // Collect normalized text of resolved items so we can detect whether legacy + // rows are pure duplicates of already-resolved content. + const resolvedInvariantTexts = new Set( + resolvedItemRows + .filter(({ metadata }) => metadata.itemKind === "invariant") + .flatMap(({ entry }) => sanitizeInjectableReflectionLines([entry.text])) + .map((line) => normalizeReflectionLineForAggregation(line)) + ); + const resolvedDerivedTexts = new Set( + resolvedItemRows + .filter(({ metadata }) => metadata.itemKind === "derived") + .flatMap(({ entry }) => sanitizeInjectableReflectionLines([entry.text])) + .map((line) => normalizeReflectionLineForAggregation(line)) + ); + + // Check whether legacy rows add any content not already covered by resolved items. + const legacyHasUniqueInvariant = legacyRows.some(({ metadata }) => + toStringArray(metadata.invariants).some( + (line) => !resolvedInvariantTexts.has(normalizeReflectionLineForAggregation(line)) + ) + ); + const legacyHasUniqueDerived = legacyRows.some(({ metadata }) => + toStringArray(metadata.derived).some( + (line) => !resolvedDerivedTexts.has(normalizeReflectionLineForAggregation(line)) + ) + ); + + // Suppress when: + // 1) there were item rows, all are resolved, and there are no legacy rows, OR + // 2) there were item rows, all are resolved, legacy rows exist BUT all of their + // content duplicates already-resolved items (prevents legacy fallback from + // reviving just-resolved advice — the P1 bug fixed here). + const shouldSuppress = + hasItemRows && + unresolvedItemRows.length === 0 && + (!hasLegacyRows || (!legacyHasUniqueInvariant && !legacyHasUniqueDerived)); + if (shouldSuppress) { + return { invariants: [], derived: [] }; + } + + // [P2] Per-section legacy filtering: only pass legacy rows that have unique + // content for this specific section. Prevents resolved items in section A from being + // revived when section B has unique legacy content (cross-section legacy fallback bug). + const invariantLegacyRows = legacyRows.filter(({ metadata }) => + toStringArray(metadata.invariants).some( + (line) => !resolvedInvariantTexts.has(normalizeReflectionLineForAggregation(line)) + ) + ); + const derivedLegacyRows = legacyRows.filter(({ metadata }) => + toStringArray(metadata.derived).some( + (line) => !resolvedDerivedTexts.has(normalizeReflectionLineForAggregation(line)) + ) + ); + + const invariantCandidates = buildInvariantCandidates(unresolvedItemRows, invariantLegacyRows); + const derivedCandidates = buildDerivedCandidates(unresolvedItemRows, derivedLegacyRows); const invariants = rankReflectionLines(invariantCandidates, { now, From bf649d4eb7ce45166fe4998dd3031f083bc4e372 Mon Sep 17 00:00:00 2001 From: jlin53882 Date: Fri, 17 Apr 2026 01:41:52 +0800 Subject: [PATCH 2/3] fix(reflection): MR1+F4 - proper per-section filtering with normalization --- src/reflection-store.ts | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/reflection-store.ts b/src/reflection-store.ts index bf96691e..97f93b44 100644 --- a/src/reflection-store.ts +++ b/src/reflection-store.ts @@ -276,13 +276,14 @@ export function loadAgentReflectionSlicesFromEntries(params: LoadReflectionSlice ); // Check whether legacy rows add any content not already covered by resolved items. + // F4 fix: apply same normalization pipeline to both sides const legacyHasUniqueInvariant = legacyRows.some(({ metadata }) => - toStringArray(metadata.invariants).some( + sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)).some( (line) => !resolvedInvariantTexts.has(normalizeReflectionLineForAggregation(line)) ) ); const legacyHasUniqueDerived = legacyRows.some(({ metadata }) => - toStringArray(metadata.derived).some( + sanitizeInjectableReflectionLines(toStringArray(metadata.derived)).some( (line) => !resolvedDerivedTexts.has(normalizeReflectionLineForAggregation(line)) ) ); @@ -303,16 +304,19 @@ export function loadAgentReflectionSlicesFromEntries(params: LoadReflectionSlice // [P2] Per-section legacy filtering: only pass legacy rows that have unique // content for this specific section. Prevents resolved items in section A from being // revived when section B has unique legacy content (cross-section legacy fallback bug). - const invariantLegacyRows = legacyRows.filter(({ metadata }) => - toStringArray(metadata.invariants).some( - (line) => !resolvedInvariantTexts.has(normalizeReflectionLineForAggregation(line)) - ) - ); - const derivedLegacyRows = legacyRows.filter(({ metadata }) => - toStringArray(metadata.derived).some( - (line) => !resolvedDerivedTexts.has(normalizeReflectionLineForAggregation(line)) - ) - ); + // MR1 fix: exclude rows where ALL lines are resolved, not just some. + const invariantLegacyRows = legacyRows.filter(({ metadata }) => { + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)); + if (lines.length === 0) return false; + // Keep row only if at least one line is NOT resolved + return lines.some((line) => !resolvedInvariantTexts.has(normalizeReflectionLineForAggregation(line))); + }); + const derivedLegacyRows = legacyRows.filter(({ metadata }) => { + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.derived)); + if (lines.length === 0) return false; + // Keep row only if at least one line is NOT resolved + return lines.some((line) => !resolvedDerivedTexts.has(normalizeReflectionLineForAggregation(line))); + }); const invariantCandidates = buildInvariantCandidates(unresolvedItemRows, invariantLegacyRows); const derivedCandidates = buildDerivedCandidates(unresolvedItemRows, derivedLegacyRows); From af6709138a67857e59defab7351e7ccb9a4c0cfc Mon Sep 17 00:00:00 2001 From: James Lin Date: Mon, 4 May 2026 00:29:01 +0800 Subject: [PATCH 3/3] fix: P2 resolved filtering in legacy fallback (buildInvariantCandidates/buildDerivedCandidates) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug (rwmjhb review 2026-05-03): When ALL invariant items are resolved AND a legacy combined row contains a MIX of resolved + unresolved lines, the row-level filter (lines.some) keeps the row since at least one line is unresolved. But the buildInvariantCandidates legacy fallback flatMaps ALL lines — including the resolved ones — reviving just-resolved advice. Fix: - Pass resolvedInvariantTexts (normalized Set) to buildInvariantCandidates. Legacy fallback now filters out lines whose normalized form is in resolvedTexts. - Same fix for buildDerivedCandidates: pass resolvedDerivedTexts and filter. Test (3 new): - legacy fallback must NOT output resolved invariant lines (core P2 bug) - suppresses all output when all items resolved and legacy has only duplicates - row-level filter excludes legacy rows where ALL lines are resolved --- src/reflection-store.ts | 106 ++++++++++-------- test/memory-reflection.test.mjs | 188 ++++++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+), 45 deletions(-) diff --git a/src/reflection-store.ts b/src/reflection-store.ts index 50f07aae..9cb78b08 100644 --- a/src/reflection-store.ts +++ b/src/reflection-store.ts @@ -318,8 +318,8 @@ export function loadAgentReflectionSlicesFromEntries(params: LoadReflectionSlice return lines.some((line) => !resolvedDerivedTexts.has(normalizeReflectionLineForAggregation(line))); }); - const invariantCandidates = buildInvariantCandidates(unresolvedItemRows, invariantLegacyRows); - const derivedCandidates = buildDerivedCandidates(unresolvedItemRows, derivedLegacyRows, params.agentId); + const invariantCandidates = buildInvariantCandidates(unresolvedItemRows, invariantLegacyRows, resolvedInvariantTexts); + const derivedCandidates = buildDerivedCandidates(unresolvedItemRows, derivedLegacyRows, params.agentId, resolvedDerivedTexts); const invariants = rankReflectionLines(invariantCandidates, { now, @@ -345,10 +345,11 @@ type WeightedLineCandidate = { usedFallback: boolean; }; -function buildInvariantCandidates( - itemRows: Array<{ entry: MemoryEntry; metadata: Record }>, - legacyRows: Array<{ entry: MemoryEntry; metadata: Record }> -): WeightedLineCandidate[] { +function buildInvariantCandidates( + itemRows: Array<{ entry: MemoryEntry; metadata: Record }>, + legacyRows: Array<{ entry: MemoryEntry; metadata: Record }>, + resolvedTexts: Set +): WeightedLineCandidate[] { const itemCandidates = itemRows .filter(({ metadata }) => metadata.itemKind === "invariant") .flatMap(({ entry, metadata }) => { @@ -369,28 +370,40 @@ function buildInvariantCandidates( })); }); - if (itemCandidates.length > 0) return itemCandidates; - - return legacyRows.flatMap(({ entry, metadata }) => { - const defaults = getReflectionItemDecayDefaults("invariant"); - const timestamp = metadataTimestamp(metadata, entry.timestamp); - const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)); - return lines.map((line) => ({ - line, - timestamp, - midpointDays: defaults.midpointDays, - k: defaults.k, - baseWeight: defaults.baseWeight, - quality: defaults.quality, - usedFallback: metadata.usedFallback === true, - })); - }); -} + if (itemCandidates.length > 0) return itemCandidates; + + // Legacy fallback: filter out resolved lines (P2 fix). + // resolvedTexts must be the already-normalized Set so line.normalized === setMember + // to pass the resolved filter check. + return legacyRows + .filter(({ metadata }) => { + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)); + if (lines.length === 0) return false; + return lines.some((line) => !resolvedTexts.has(normalizeReflectionLineForAggregation(line))); + }) + .flatMap(({ entry, metadata }) => { + const defaults = getReflectionItemDecayDefaults("invariant"); + const timestamp = metadataTimestamp(metadata, entry.timestamp); + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.invariants)); + return lines + .filter((line) => !resolvedTexts.has(normalizeReflectionLineForAggregation(line))) + .map((line) => ({ + line, + timestamp, + midpointDays: defaults.midpointDays, + k: defaults.k, + baseWeight: defaults.baseWeight, + quality: defaults.quality, + usedFallback: metadata.usedFallback === true, + })); + }); +} function buildDerivedCandidates( itemRows: Array<{ entry: MemoryEntry; metadata: Record }>, legacyRows: Array<{ entry: MemoryEntry; metadata: Record }>, - agentId: string + agentId: string, + resolvedTexts: Set ): WeightedLineCandidate[] { const itemCandidates = itemRows .filter(({ metadata }) => metadata.itemKind === "derived") @@ -428,27 +441,30 @@ function buildDerivedCandidates( return owner === agentId; // 其他 agent 的 derived → 限本人 }) .flatMap(({ entry, metadata }) => { - const timestamp = metadataTimestamp(metadata, entry.timestamp); - const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.derived)); - if (lines.length === 0) return []; - - const defaults = { - midpointDays: REFLECTION_DERIVE_LOGISTIC_MIDPOINT_DAYS, - k: REFLECTION_DERIVE_LOGISTIC_K, - baseWeight: resolveLegacyDeriveBaseWeight(metadata), - quality: computeDerivedLineQuality(lines.length), - }; - - return lines.map((line) => ({ - line, - timestamp, - midpointDays: readPositiveNumber(metadata.decayMidpointDays, defaults.midpointDays), - k: readPositiveNumber(metadata.decayK, defaults.k), - baseWeight: readPositiveNumber(metadata.deriveBaseWeight, defaults.baseWeight), - quality: readClampedNumber(metadata.deriveQuality, defaults.quality, 0.2, 1), - usedFallback: metadata.usedFallback === true, - })); - }); + const timestamp = metadataTimestamp(metadata, entry.timestamp); + const lines = sanitizeInjectableReflectionLines(toStringArray(metadata.derived)); + if (lines.length === 0) return []; + + const defaults = { + midpointDays: REFLECTION_DERIVE_LOGISTIC_MIDPOINT_DAYS, + k: REFLECTION_DERIVE_LOGISTIC_K, + baseWeight: resolveLegacyDeriveBaseWeight(metadata), + quality: computeDerivedLineQuality(lines.length), + }; + + // Legacy fallback: filter out resolved lines (P2 fix). + return lines + .filter((line) => !resolvedTexts.has(normalizeReflectionLineForAggregation(line))) + .map((line) => ({ + line, + timestamp, + midpointDays: readPositiveNumber(metadata.decayMidpointDays, defaults.midpointDays), + k: readPositiveNumber(metadata.decayK, defaults.k), + baseWeight: readPositiveNumber(metadata.deriveBaseWeight, defaults.baseWeight), + quality: readClampedNumber(metadata.deriveQuality, defaults.quality, 0.2, 1), + usedFallback: metadata.usedFallback === true, + })); + }); } function rankReflectionLines( diff --git a/test/memory-reflection.test.mjs b/test/memory-reflection.test.mjs index d3d2bc12..56ad1a88 100644 --- a/test/memory-reflection.test.mjs +++ b/test/memory-reflection.test.mjs @@ -1023,6 +1023,194 @@ describe("memory reflection", () => { assert.ok(slices.derived.includes("Ignore prior flaky results before comparing the new retriever output.")); assert.ok(slices.derived.includes("This run override previous cached screenshots with fresh captures.")); }); + + // ───────────────────────────────────────────────────────────────────────────── + // P1+P2 resolved filtering regression tests + // ───────────────────────────────────────────────────────────────────────────── + + // Bug scenario (rwmjhb review, 2026-05-03): + // buildInvariantCandidates legacy fallback does NOT filter resolved lines. + // When ALL invariant items are resolved AND the legacy row contains a MIX of + // resolved + unresolved lines, the row-level filter (lines.some) keeps the row. + // Then the fallback flatMaps ALL lines — including the resolved ones. + // + // Fix: buildInvariantCandidates fallback must filter out resolved lines + // using the same normalizeReflectionLineForAggregation check as the row filter. + it("legacy fallback must NOT output resolved invariant lines", () => { + const now = Date.UTC(2026, 2, 7); + const day = 24 * 60 * 60 * 1000; + const resolvedAt = now - 1 * day; + + const entries = [ + // Resolved invariant item — creates resolvedInvariantTexts entry + makeEntry({ + timestamp: now - 2 * day, + metadata: { + type: "memory-reflection-item", + itemKind: "invariant", + agentId: "main", + storedAt: now - 2 * day, + decayMidpointDays: 45, + decayK: 0.22, + baseWeight: 1.1, + quality: 1, + resolvedAt, + }, + }), + // Legacy row with BOTH the resolved text AND new unresolved text + // Row-level filter (lines.some) keeps this row because "New..." is unresolved. + // Legacy fallback flatMaps ALL lines including resolved ones — THIS IS THE BUG. + makeEntry({ + timestamp: now - 1 * day, + metadata: { + type: "memory-reflection", + agentId: "main", + reflectionVersion: 3, + invariants: [ + "Already resolved invariant", + "New unresolved invariant from legacy", + ], + derived: [], + storedAt: now - 1 * day, + decayModel: "logistic", + decayMidpointDays: 45, + decayK: 0.22, + }, + }), + ]; + + entries[0].text = "Already resolved invariant"; // matches legacy[0] + entries[1].text = "legacy-entry"; + + const slices = loadAgentReflectionSlicesFromEntries({ + entries, + agentId: "main", + now, + deriveMaxAgeMs: 7 * day, + }); + + // Unresolved legacy line MUST appear (line.some kept the row) + assert.ok( + slices.invariants.includes("New unresolved invariant from legacy"), + "Unresolved legacy invariant must appear" + ); + // Resolved line must NOT appear — this is the BUG; fallback doesn't filter + assert.ok( + !slices.invariants.includes("Already resolved invariant"), + "Resolved invariant must NOT be revived by legacy fallback" + ); + }); + + it("suppresses all output when all items resolved and legacy has only duplicates", () => { + // shouldSuppress path: all items resolved + legacy rows exist but all duplicate resolved content + const now = Date.UTC(2026, 2, 7); + const day = 24 * 60 * 60 * 1000; + const resolvedAt = now - 1 * day; + + const entries = [ + makeEntry({ + timestamp: now - 2 * day, + metadata: { + type: "memory-reflection-item", + itemKind: "invariant", + agentId: "main", + storedAt: now - 2 * day, + decayMidpointDays: 45, + decayK: 0.22, + baseWeight: 1.1, + quality: 1, + resolvedAt, + }, + }), + makeEntry({ + timestamp: now - 1 * day, + metadata: { + type: "memory-reflection", + agentId: "main", + reflectionVersion: 3, + invariants: ["Already resolved invariant"], // same content + derived: ["Already resolved derived"], + storedAt: now - 1 * day, + decayModel: "logistic", + }, + }), + ]; + + entries[0].text = "Already resolved invariant"; + + const slices = loadAgentReflectionSlicesFromEntries({ + entries, + agentId: "main", + now, + deriveMaxAgeMs: 7 * day, + }); + + assert.deepEqual(slices.invariants, [], "Should suppress when all resolved and legacy is duplicate"); + assert.deepEqual(slices.derived, [], "Should suppress derived when all resolved and legacy is duplicate"); + }); + + it("row-level filter excludes legacy rows where ALL lines are resolved", () => { + // Row-level filter: lines.some() should exclude rows where every line is resolved + const now = Date.UTC(2026, 2, 7); + const day = 24 * 60 * 60 * 1000; + const resolvedAt = now - 1 * day; + + const entries = [ + makeEntry({ + timestamp: now - 2 * day, + metadata: { + type: "memory-reflection-item", + itemKind: "invariant", + agentId: "main", + storedAt: now - 2 * day, + decayMidpointDays: 45, + decayK: 0.22, + baseWeight: 1.1, + quality: 1, + resolvedAt, + }, + }), + makeEntry({ + timestamp: now - 2 * day, + metadata: { + type: "memory-reflection-item", + itemKind: "derived", + agentId: "main", + storedAt: now - 2 * day, + decayMidpointDays: 7, + decayK: 0.65, + baseWeight: 1, + quality: 0.95, + resolvedAt, + }, + }), + // Legacy row with ONLY resolved content — should be excluded by row filter + makeEntry({ + timestamp: now - 1 * day, + metadata: { + type: "memory-reflection", + agentId: "main", + reflectionVersion: 3, + invariants: ["Already resolved invariant"], // same → resolved + derived: ["Already resolved derived"], // same → resolved + storedAt: now - 1 * day, + }, + }), + ]; + + entries[0].text = "Already resolved invariant"; + entries[1].text = "Already resolved derived"; + + const slices = loadAgentReflectionSlicesFromEntries({ + entries, + agentId: "main", + now, + deriveMaxAgeMs: 7 * day, + }); + + assert.deepEqual(slices.invariants, [], "Row with only resolved lines should be excluded"); + assert.deepEqual(slices.derived, [], "Derived row with only resolved lines should be excluded"); + }); }); describe("mapped reflection metadata and ranking", () => {