Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions packages/cli/src/calibrate/match.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,59 @@ describe('matchClustersToGold — edge cases', () => {
expect(outcomes.filter(o => o.kind === 'false_positive')).toHaveLength(1);
});
});

describe('matchClustersToGold — normalizedMessage is a tiebreaker, not a hard gate', () => {
it('matches on kind + location even when normalizedMessage does not appear in output', () => {
// The bench gold authors normalizedMessage as a semantic label
// ("homepage-multiple-h1") that never appears verbatim in detector output.
// kind + location is sufficient to confirm the planted bug was found.
const cluster = makeCluster({
id: 'ck_seo', kind: 'seo_h1_missing_or_multiple', bugIdentity: 'seo0h10000000000',
signatureKey: 'unknown|seo_h1_missing_or_multiple|/|2',
rootCause: 'Page "/" has 2 <h1> element(s) — exactly 1 is required',
});
const gold = makeGold({
goldId: 'nb-h1', kind: 'seo_h1_missing_or_multiple', bugIdentity: undefined,
structuralMatch: { kind: 'seo_h1_missing_or_multiple', normalizedLocation: '/', normalizedMessage: 'homepage-multiple-h1' },
});
const { outcomes, ambiguities } = matchClustersToGold([cluster], [gold]);
expect(ambiguities).toEqual([]);
const tp = outcomes.find(o => o.kind === 'true_positive');
expect(tp).toBeDefined();
expect(tp && tp.kind === 'true_positive' ? tp.matchVia : undefined).toBe('structural');
});

it('does NOT match when the location genuinely differs', () => {
const cluster = makeCluster({
id: 'ck_rob', kind: 'seo_robots_blocking_crawl', bugIdentity: 'rob00000000000000',
signatureKey: 'unknown|seo_robots_blocking_crawl|/',
rootCause: 'robots.txt has "Disallow: /" for User-agent: *',
});
const gold = makeGold({
goldId: 'nb-rob', kind: 'seo_robots_blocking_crawl', bugIdentity: undefined,
structuralMatch: { kind: 'seo_robots_blocking_crawl', normalizedLocation: '/robots.txt', normalizedMessage: 'disallow-all' },
});
const { outcomes } = matchClustersToGold([cluster], [gold]);
expect(outcomes.find(o => o.kind === 'true_positive')).toBeUndefined();
expect(outcomes.find(o => o.kind === 'false_negative')).toBeDefined();
});

it('uses normalizedMessage to disambiguate multiple same-kind+location candidates', () => {
const c1 = makeCluster({ id: 'ck_a', kind: 'dom_error_text', bugIdentity: 'a000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'alpha error shown' });
const c2 = makeCluster({ id: 'ck_b', kind: 'dom_error_text', bugIdentity: 'b000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'beta error shown' });
const gold = makeGold({ goldId: 'g-dom', kind: 'dom_error_text', bugIdentity: undefined, structuralMatch: { kind: 'dom_error_text', normalizedLocation: '/page', normalizedMessage: 'beta' } });
const { outcomes, ambiguities } = matchClustersToGold([c1, c2], [gold]);
expect(ambiguities).toEqual([]);
const tp = outcomes.find(o => o.kind === 'true_positive');
expect(tp && tp.kind === 'true_positive' ? tp.clusterId : undefined).toBe('ck_b');
});

it('is ambiguous when multiple same-kind+location candidates cannot be disambiguated by message', () => {
const c1 = makeCluster({ id: 'ck_a', kind: 'dom_error_text', bugIdentity: 'a000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'alpha' });
const c2 = makeCluster({ id: 'ck_b', kind: 'dom_error_text', bugIdentity: 'b000000000000000', signatureKey: 'x|dom_error_text|/page', rootCause: 'beta' });
const gold = makeGold({ goldId: 'g-dom', kind: 'dom_error_text', bugIdentity: undefined, structuralMatch: { kind: 'dom_error_text', normalizedLocation: '/page', normalizedMessage: 'gamma-not-present' } });
const { ambiguities } = matchClustersToGold([c1, c2], [gold]);
expect(ambiguities.length).toBe(1);
expect(ambiguities[0].goldId).toBe('g-dom');
});
});
42 changes: 30 additions & 12 deletions packages/cli/src/calibrate/match.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,32 +105,50 @@ export function matchClustersToGold(
continue;
}

const candidates = (byStructural.get(entry.kind) ?? []).filter(c => {
// Primary structural signals: kind (index) + normalizedLocation. The gold's
// normalizedMessage is authored as a semantic label that rarely appears
// verbatim in detector output, so it is a TIEBREAKER among multiple
// same-kind+location candidates, not a hard gate. (A single kind+location
// candidate is accepted even if the message label does not align.)
const sameKindLoc = (byStructural.get(entry.kind) ?? []).filter(c => {
if (consumedClusterIds.has(c.id)) return false;
// Match on normalizedLocation: compare against signatureKey
const sig = c.signatureKey ?? '';
return sig.includes(sm.normalizedLocation) || sm.normalizedLocation === '*';
});

const messageMatches = (c: BugCluster): boolean => {
const sig = c.signatureKey ?? '';
return (
sig.includes(sm.normalizedLocation) ||
sm.normalizedLocation === '*'
) && (
sig.includes(sm.normalizedMessage) ||
c.rootCause.toLowerCase().includes(sm.normalizedMessage.toLowerCase()) ||
sm.normalizedMessage === '*'
);
});
};

let matched: BugCluster | undefined;
let ambiguousCandidates: BugCluster[] | undefined;
if (sameKindLoc.length === 1) {
matched = sameKindLoc[0];
} else if (sameKindLoc.length > 1) {
// Disambiguate by normalizedMessage; exactly one message-match wins.
// 0 or >1 message-matches among multiple location candidates is
// genuinely ambiguous — surface as fatal so the gold is tightened.
const msgMatched = sameKindLoc.filter(messageMatches);
if (msgMatched.length === 1) matched = msgMatched[0];
else ambiguousCandidates = sameKindLoc;
}

if (candidates.length === 1) {
const cluster = candidates[0];
if (matched !== undefined) {
outcomes.push({
kind: 'true_positive',
goldId: entry.goldId,
clusterId: cluster.id,
clusterId: matched.id,
matchVia: 'structural',
bugKind: entry.kind,
});
consumedClusterIds.add(cluster.id);
} else if (candidates.length > 1) {
ambiguities.push({ goldId: entry.goldId, candidates: candidates.map(c => c.id) });
consumedClusterIds.add(matched.id);
} else if (ambiguousCandidates !== undefined) {
ambiguities.push({ goldId: entry.goldId, candidates: ambiguousCandidates.map(c => c.id) });
// Don't emit an outcome for ambiguous matches — caller handles as fatal
} else if (entry.expected === 'detector_fires') {
outcomes.push({
Expand Down
Loading