diff --git a/packages/cli/src/calibrate/match.test.ts b/packages/cli/src/calibrate/match.test.ts index a11b95d..8e0952a 100644 --- a/packages/cli/src/calibrate/match.test.ts +++ b/packages/cli/src/calibrate/match.test.ts @@ -220,3 +220,59 @@ describe('matchClustersToGold — edge cases', () => { expect(outcomes.filter(o => o.kind === 'false_positive')).toHaveLength(1); }); }); + +describe('matchClustersToGold — normalizedMessage is a tiebreaker, not a hard gate', () => { + it('matches on kind + location even when normalizedMessage does not appear in output', () => { + // The bench gold authors normalizedMessage as a semantic label + // ("homepage-multiple-h1") that never appears verbatim in detector output. + // kind + location is sufficient to confirm the planted bug was found. + const cluster = makeCluster({ + id: 'ck_seo', kind: 'seo_h1_missing_or_multiple', bugIdentity: 'seo0h10000000000', + signatureKey: 'unknown|seo_h1_missing_or_multiple|/|2', + rootCause: 'Page "/" has 2

element(s) — exactly 1 is required', + }); + const gold = makeGold({ + goldId: 'nb-h1', kind: 'seo_h1_missing_or_multiple', bugIdentity: undefined, + structuralMatch: { kind: 'seo_h1_missing_or_multiple', normalizedLocation: '/', normalizedMessage: 'homepage-multiple-h1' }, + }); + const { outcomes, ambiguities } = matchClustersToGold([cluster], [gold]); + expect(ambiguities).toEqual([]); + const tp = outcomes.find(o => o.kind === 'true_positive'); + expect(tp).toBeDefined(); + expect(tp && tp.kind === 'true_positive' ? tp.matchVia : undefined).toBe('structural'); + }); + + it('does NOT match when the location genuinely differs', () => { + const cluster = makeCluster({ + id: 'ck_rob', kind: 'seo_robots_blocking_crawl', bugIdentity: 'rob00000000000000', + signatureKey: 'unknown|seo_robots_blocking_crawl|/', + rootCause: 'robots.txt has "Disallow: /" for User-agent: *', + }); + const gold = makeGold({ + goldId: 'nb-rob', kind: 'seo_robots_blocking_crawl', bugIdentity: undefined, + structuralMatch: { kind: 'seo_robots_blocking_crawl', normalizedLocation: '/robots.txt', normalizedMessage: 'disallow-all' }, + }); + const { outcomes } = matchClustersToGold([cluster], [gold]); + expect(outcomes.find(o => o.kind === 'true_positive')).toBeUndefined(); + expect(outcomes.find(o => o.kind === 'false_negative')).toBeDefined(); + }); + + it('uses normalizedMessage to disambiguate multiple same-kind+location candidates', () => { + const c1 = makeCluster({ id: 'ck_a', kind: 'dom_error_text', bugIdentity: 'a000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'alpha error shown' }); + const c2 = makeCluster({ id: 'ck_b', kind: 'dom_error_text', bugIdentity: 'b000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'beta error shown' }); + const gold = makeGold({ goldId: 'g-dom', kind: 'dom_error_text', bugIdentity: undefined, structuralMatch: { kind: 'dom_error_text', normalizedLocation: '/page', normalizedMessage: 'beta' } }); + const { outcomes, ambiguities } = matchClustersToGold([c1, c2], [gold]); + expect(ambiguities).toEqual([]); + const tp = outcomes.find(o => o.kind === 'true_positive'); + expect(tp && tp.kind === 'true_positive' ? tp.clusterId : undefined).toBe('ck_b'); + }); + + it('is ambiguous when multiple same-kind+location candidates cannot be disambiguated by message', () => { + const c1 = makeCluster({ id: 'ck_a', kind: 'dom_error_text', bugIdentity: 'a000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'alpha' }); + const c2 = makeCluster({ id: 'ck_b', kind: 'dom_error_text', bugIdentity: 'b000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'beta' }); + const gold = makeGold({ goldId: 'g-dom', kind: 'dom_error_text', bugIdentity: undefined, structuralMatch: { kind: 'dom_error_text', normalizedLocation: '/page', normalizedMessage: 'gamma-not-present' } }); + const { ambiguities } = matchClustersToGold([c1, c2], [gold]); + expect(ambiguities.length).toBe(1); + expect(ambiguities[0].goldId).toBe('g-dom'); + }); +}); diff --git a/packages/cli/src/calibrate/match.ts b/packages/cli/src/calibrate/match.ts index 41cfc86..3f55eec 100644 --- a/packages/cli/src/calibrate/match.ts +++ b/packages/cli/src/calibrate/match.ts @@ -105,32 +105,50 @@ export function matchClustersToGold( continue; } - const candidates = (byStructural.get(entry.kind) ?? []).filter(c => { + // Primary structural signals: kind (index) + normalizedLocation. The gold's + // normalizedMessage is authored as a semantic label that rarely appears + // verbatim in detector output, so it is a TIEBREAKER among multiple + // same-kind+location candidates, not a hard gate. (A single kind+location + // candidate is accepted even if the message label does not align.) + const sameKindLoc = (byStructural.get(entry.kind) ?? []).filter(c => { if (consumedClusterIds.has(c.id)) return false; - // Match on normalizedLocation: compare against signatureKey + const sig = c.signatureKey ?? ''; + return sig.includes(sm.normalizedLocation) || sm.normalizedLocation === '*'; + }); + + const messageMatches = (c: BugCluster): boolean => { const sig = c.signatureKey ?? ''; return ( - sig.includes(sm.normalizedLocation) || - sm.normalizedLocation === '*' - ) && ( sig.includes(sm.normalizedMessage) || c.rootCause.toLowerCase().includes(sm.normalizedMessage.toLowerCase()) || sm.normalizedMessage === '*' ); - }); + }; + + let matched: BugCluster | undefined; + let ambiguousCandidates: BugCluster[] | undefined; + if (sameKindLoc.length === 1) { + matched = sameKindLoc[0]; + } else if (sameKindLoc.length > 1) { + // Disambiguate by normalizedMessage; exactly one message-match wins. + // 0 or >1 message-matches among multiple location candidates is + // genuinely ambiguous — surface as fatal so the gold is tightened. + const msgMatched = sameKindLoc.filter(messageMatches); + if (msgMatched.length === 1) matched = msgMatched[0]; + else ambiguousCandidates = sameKindLoc; + } - if (candidates.length === 1) { - const cluster = candidates[0]; + if (matched !== undefined) { outcomes.push({ kind: 'true_positive', goldId: entry.goldId, - clusterId: cluster.id, + clusterId: matched.id, matchVia: 'structural', bugKind: entry.kind, }); - consumedClusterIds.add(cluster.id); - } else if (candidates.length > 1) { - ambiguities.push({ goldId: entry.goldId, candidates: candidates.map(c => c.id) }); + consumedClusterIds.add(matched.id); + } else if (ambiguousCandidates !== undefined) { + ambiguities.push({ goldId: entry.goldId, candidates: ambiguousCandidates.map(c => c.id) }); // Don't emit an outcome for ambiguous matches — caller handles as fatal } else if (entry.expected === 'detector_fires') { outcomes.push({