diff --git a/packages/cli/src/calibrate/match.test.ts b/packages/cli/src/calibrate/match.test.ts
index a11b95d..8e0952a 100644
--- a/packages/cli/src/calibrate/match.test.ts
+++ b/packages/cli/src/calibrate/match.test.ts
@@ -220,3 +220,59 @@ describe('matchClustersToGold — edge cases', () => {
expect(outcomes.filter(o => o.kind === 'false_positive')).toHaveLength(1);
});
});
+
+describe('matchClustersToGold — normalizedMessage is a tiebreaker, not a hard gate', () => {
+ it('matches on kind + location even when normalizedMessage does not appear in output', () => {
+ // The bench gold authors normalizedMessage as a semantic label
+ // ("homepage-multiple-h1") that never appears verbatim in detector output.
+ // kind + location is sufficient to confirm the planted bug was found.
+ const cluster = makeCluster({
+ id: 'ck_seo', kind: 'seo_h1_missing_or_multiple', bugIdentity: 'seo0h10000000000',
+ signatureKey: 'unknown|seo_h1_missing_or_multiple|/|2',
+ rootCause: 'Page "/" has 2
element(s) — exactly 1 is required',
+ });
+ const gold = makeGold({
+ goldId: 'nb-h1', kind: 'seo_h1_missing_or_multiple', bugIdentity: undefined,
+ structuralMatch: { kind: 'seo_h1_missing_or_multiple', normalizedLocation: '/', normalizedMessage: 'homepage-multiple-h1' },
+ });
+ const { outcomes, ambiguities } = matchClustersToGold([cluster], [gold]);
+ expect(ambiguities).toEqual([]);
+ const tp = outcomes.find(o => o.kind === 'true_positive');
+ expect(tp).toBeDefined();
+ expect(tp && tp.kind === 'true_positive' ? tp.matchVia : undefined).toBe('structural');
+ });
+
+ it('does NOT match when the location genuinely differs', () => {
+ const cluster = makeCluster({
+ id: 'ck_rob', kind: 'seo_robots_blocking_crawl', bugIdentity: 'rob00000000000000',
+ signatureKey: 'unknown|seo_robots_blocking_crawl|/',
+ rootCause: 'robots.txt has "Disallow: /" for User-agent: *',
+ });
+ const gold = makeGold({
+ goldId: 'nb-rob', kind: 'seo_robots_blocking_crawl', bugIdentity: undefined,
+ structuralMatch: { kind: 'seo_robots_blocking_crawl', normalizedLocation: '/robots.txt', normalizedMessage: 'disallow-all' },
+ });
+ const { outcomes } = matchClustersToGold([cluster], [gold]);
+ expect(outcomes.find(o => o.kind === 'true_positive')).toBeUndefined();
+ expect(outcomes.find(o => o.kind === 'false_negative')).toBeDefined();
+ });
+
+ it('uses normalizedMessage to disambiguate multiple same-kind+location candidates', () => {
+ const c1 = makeCluster({ id: 'ck_a', kind: 'dom_error_text', bugIdentity: 'a000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'alpha error shown' });
+ const c2 = makeCluster({ id: 'ck_b', kind: 'dom_error_text', bugIdentity: 'b000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'beta error shown' });
+ const gold = makeGold({ goldId: 'g-dom', kind: 'dom_error_text', bugIdentity: undefined, structuralMatch: { kind: 'dom_error_text', normalizedLocation: '/page', normalizedMessage: 'beta' } });
+ const { outcomes, ambiguities } = matchClustersToGold([c1, c2], [gold]);
+ expect(ambiguities).toEqual([]);
+ const tp = outcomes.find(o => o.kind === 'true_positive');
+ expect(tp && tp.kind === 'true_positive' ? tp.clusterId : undefined).toBe('ck_b');
+ });
+
+ it('is ambiguous when multiple same-kind+location candidates cannot be disambiguated by message', () => {
+ const c1 = makeCluster({ id: 'ck_a', kind: 'dom_error_text', bugIdentity: 'a000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'alpha' });
+ const c2 = makeCluster({ id: 'ck_b', kind: 'dom_error_text', bugIdentity: 'b000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'beta' });
+ const gold = makeGold({ goldId: 'g-dom', kind: 'dom_error_text', bugIdentity: undefined, structuralMatch: { kind: 'dom_error_text', normalizedLocation: '/page', normalizedMessage: 'gamma-not-present' } });
+ const { ambiguities } = matchClustersToGold([c1, c2], [gold]);
+ expect(ambiguities.length).toBe(1);
+ expect(ambiguities[0].goldId).toBe('g-dom');
+ });
+});
diff --git a/packages/cli/src/calibrate/match.ts b/packages/cli/src/calibrate/match.ts
index 41cfc86..3f55eec 100644
--- a/packages/cli/src/calibrate/match.ts
+++ b/packages/cli/src/calibrate/match.ts
@@ -105,32 +105,50 @@ export function matchClustersToGold(
continue;
}
- const candidates = (byStructural.get(entry.kind) ?? []).filter(c => {
+ // Primary structural signals: kind (index) + normalizedLocation. The gold's
+ // normalizedMessage is authored as a semantic label that rarely appears
+ // verbatim in detector output, so it is a TIEBREAKER among multiple
+ // same-kind+location candidates, not a hard gate. (A single kind+location
+ // candidate is accepted even if the message label does not align.)
+ const sameKindLoc = (byStructural.get(entry.kind) ?? []).filter(c => {
if (consumedClusterIds.has(c.id)) return false;
- // Match on normalizedLocation: compare against signatureKey
+ const sig = c.signatureKey ?? '';
+ return sig.includes(sm.normalizedLocation) || sm.normalizedLocation === '*';
+ });
+
+ const messageMatches = (c: BugCluster): boolean => {
const sig = c.signatureKey ?? '';
return (
- sig.includes(sm.normalizedLocation) ||
- sm.normalizedLocation === '*'
- ) && (
sig.includes(sm.normalizedMessage) ||
c.rootCause.toLowerCase().includes(sm.normalizedMessage.toLowerCase()) ||
sm.normalizedMessage === '*'
);
- });
+ };
+
+ let matched: BugCluster | undefined;
+ let ambiguousCandidates: BugCluster[] | undefined;
+ if (sameKindLoc.length === 1) {
+ matched = sameKindLoc[0];
+ } else if (sameKindLoc.length > 1) {
+ // Disambiguate by normalizedMessage; exactly one message-match wins.
+ // 0 or >1 message-matches among multiple location candidates is
+ // genuinely ambiguous — surface as fatal so the gold is tightened.
+ const msgMatched = sameKindLoc.filter(messageMatches);
+ if (msgMatched.length === 1) matched = msgMatched[0];
+ else ambiguousCandidates = sameKindLoc;
+ }
- if (candidates.length === 1) {
- const cluster = candidates[0];
+ if (matched !== undefined) {
outcomes.push({
kind: 'true_positive',
goldId: entry.goldId,
- clusterId: cluster.id,
+ clusterId: matched.id,
matchVia: 'structural',
bugKind: entry.kind,
});
- consumedClusterIds.add(cluster.id);
- } else if (candidates.length > 1) {
- ambiguities.push({ goldId: entry.goldId, candidates: candidates.map(c => c.id) });
+ consumedClusterIds.add(matched.id);
+ } else if (ambiguousCandidates !== undefined) {
+ ambiguities.push({ goldId: entry.goldId, candidates: ambiguousCandidates.map(c => c.id) });
// Don't emit an outcome for ambiguous matches — caller handles as fatal
} else if (entry.expected === 'detector_fires') {
outcomes.push({