Skip to content

Commit 4a5568d

Browse files
PickHubCopilot
andauthored
Exclude RAI content-filter 400s from AI search error monitor (#61498)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent b27140c commit 4a5568d

3 files changed

Lines changed: 183 additions & 3 deletions

File tree

src/search/lib/ai-search-constants.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,9 @@
22
// payloads are almost always pasted docs pages or unrelated content
33
// and either time out or return no-answer. See github/cse-copilot#1214.
44
export const MAX_QUERY_LENGTH = 500
5+
6+
// cse-copilot returns HTTP 400 with this code in `detail.code` when Azure's
7+
// Responsible AI input content filter rejects a query. These are expected,
8+
// user-triggered rejections, so they are tracked separately and kept out of
9+
// the AI search error-rate monitor. See github/cse-copilot#1214.
10+
export const RAI_CONTENT_FILTER_CODE = 'RAI_INPUT_CONTENT_POLICY_BREACH_ERROR'

src/search/lib/ai-search-proxy.ts

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import { getHmacWithEpoch } from '@/search/lib/helpers/get-cse-copilot-auth'
66
import { getCSECopilotSource } from '@/search/lib/helpers/cse-copilot-docs-versions'
77
import type { ExtendedRequest } from '@/types'
88
import { handleExternalSearchAnalytics } from '@/search/lib/helpers/external-search-analytics'
9-
import { MAX_QUERY_LENGTH } from '@/search/lib/ai-search-constants'
9+
import { MAX_QUERY_LENGTH, RAI_CONTENT_FILTER_CODE } from '@/search/lib/ai-search-constants'
1010

1111
const logger = createLogger(import.meta.url)
1212

@@ -15,6 +15,27 @@ const logger = createLogger(import.meta.url)
1515
// established, but the connect + first-byte must complete within this window.
1616
const AI_SEARCH_TIMEOUT_MS = 9_000
1717

18+
type ContentFilterCandidate = {
19+
status: number
20+
headers: { get: (name: string) => string | null }
21+
json: () => Promise<unknown>
22+
}
23+
24+
const isContentFilterRejection = async (response: ContentFilterCandidate): Promise<boolean> => {
25+
if (response.status !== 400) {
26+
return false
27+
}
28+
if (!response.headers.get('content-type')?.includes('application/json')) {
29+
return false
30+
}
31+
try {
32+
const body = (await response.json()) as { detail?: { code?: string } }
33+
return body?.detail?.code === RAI_CONTENT_FILTER_CODE
34+
} catch {
35+
return false
36+
}
37+
}
38+
1839
export const aiSearchProxy = async (req: ExtendedRequest, res: Response) => {
1940
const { query, version } = req.body ?? {}
2041

@@ -100,8 +121,13 @@ export const aiSearchProxy = async (req: ExtendedRequest, res: Response) => {
100121

101122
if (!response.ok) {
102123
const errorMessage = `Upstream server responded with status code ${response.status}`
103-
logger.error(errorMessage, { statusCode: response.status })
104-
statsd.increment('ai-search.stream_response_error', 1, diagnosticTags)
124+
if (await isContentFilterRejection(response)) {
125+
logger.info(errorMessage, { statusCode: response.status })
126+
statsd.increment('ai-search.content_filtered', 1, diagnosticTags)
127+
} else {
128+
logger.error(errorMessage, { statusCode: response.status })
129+
statsd.increment('ai-search.stream_response_error', 1, diagnosticTags)
130+
}
105131
res.status(response.status).json({
106132
errors: [{ message: errorMessage }],
107133
upstreamStatus: response.status,
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import { describe, test, expect, vi, beforeEach } from 'vitest'
2+
3+
import statsd from '@/observability/lib/statsd'
4+
import { fetchStream } from '@/frame/lib/fetch-utils'
5+
import { aiSearchProxy } from '@/search/lib/ai-search-proxy'
6+
import { RAI_CONTENT_FILTER_CODE } from '@/search/lib/ai-search-constants'
7+
import type { ExtendedRequest } from '@/types'
8+
9+
vi.mock('@/observability/lib/statsd', () => ({
10+
default: { increment: vi.fn(), gauge: vi.fn() },
11+
}))
12+
13+
vi.mock('@/frame/lib/fetch-utils', () => ({
14+
fetchStream: vi.fn(),
15+
}))
16+
17+
vi.mock('@/search/lib/helpers/get-cse-copilot-auth', () => ({
18+
getHmacWithEpoch: () => 'test-auth',
19+
}))
20+
21+
vi.mock('@/search/lib/helpers/cse-copilot-docs-versions', () => ({
22+
getCSECopilotSource: () => 'docs',
23+
}))
24+
25+
vi.mock('@/search/lib/helpers/external-search-analytics', () => ({
26+
handleExternalSearchAnalytics: async () => null,
27+
}))
28+
29+
const incrementedMetrics = () =>
30+
(statsd.increment as ReturnType<typeof vi.fn>).mock.calls.map((call) => call[0])
31+
32+
function buildResponse() {
33+
const res = {
34+
statusCode: 0,
35+
body: null as unknown,
36+
status(code: number) {
37+
this.statusCode = code
38+
return this
39+
},
40+
json(payload: unknown) {
41+
this.body = payload
42+
return this
43+
},
44+
setHeader: vi.fn(),
45+
flushHeaders: vi.fn(),
46+
write: vi.fn(),
47+
end: vi.fn(),
48+
headersSent: false,
49+
}
50+
return res
51+
}
52+
53+
function buildRequest(): ExtendedRequest {
54+
return {
55+
body: { query: 'a disallowed query', version: 'dotcom' },
56+
language: 'en',
57+
} as unknown as ExtendedRequest
58+
}
59+
60+
function mockUpstream(
61+
status: number,
62+
jsonBody: unknown | (() => never),
63+
contentType = 'application/json',
64+
) {
65+
const json = vi.fn(async () => {
66+
if (typeof jsonBody === 'function') {
67+
return (jsonBody as () => never)()
68+
}
69+
return jsonBody
70+
})
71+
;(fetchStream as ReturnType<typeof vi.fn>).mockResolvedValue({
72+
ok: status < 400,
73+
status,
74+
headers: {
75+
get: (name: string) => (name.toLowerCase() === 'content-type' ? contentType : null),
76+
},
77+
json,
78+
})
79+
return json
80+
}
81+
82+
describe('aiSearchProxy upstream 400 handling', () => {
83+
beforeEach(() => {
84+
vi.clearAllMocks()
85+
})
86+
87+
test('RAI content filter 400 increments content_filtered, not stream_response_error', async () => {
88+
mockUpstream(400, {
89+
code: 400,
90+
message: 'Responsible AI input content policy breach',
91+
detail: { code: RAI_CONTENT_FILTER_CODE },
92+
})
93+
const res = buildResponse()
94+
95+
await aiSearchProxy(buildRequest(), res as never)
96+
97+
const metrics = incrementedMetrics()
98+
expect(metrics).toContain('ai-search.content_filtered')
99+
expect(metrics).not.toContain('ai-search.stream_response_error')
100+
expect(metrics).toContain('ai-search.call')
101+
expect(res.statusCode).toBe(400)
102+
})
103+
104+
test('non-RAI 400 increments stream_response_error', async () => {
105+
mockUpstream(400, { code: 400, message: 'some other bad request' })
106+
const res = buildResponse()
107+
108+
await aiSearchProxy(buildRequest(), res as never)
109+
110+
const metrics = incrementedMetrics()
111+
expect(metrics).toContain('ai-search.stream_response_error')
112+
expect(metrics).not.toContain('ai-search.content_filtered')
113+
expect(res.statusCode).toBe(400)
114+
})
115+
116+
test('malformed 400 body increments stream_response_error', async () => {
117+
mockUpstream(400, () => {
118+
throw new Error('invalid json')
119+
})
120+
const res = buildResponse()
121+
122+
await aiSearchProxy(buildRequest(), res as never)
123+
124+
const metrics = incrementedMetrics()
125+
expect(metrics).toContain('ai-search.stream_response_error')
126+
expect(metrics).not.toContain('ai-search.content_filtered')
127+
expect(res.statusCode).toBe(400)
128+
})
129+
130+
test('non-JSON 400 body increments stream_response_error without parsing', async () => {
131+
const json = mockUpstream(
132+
400,
133+
() => {
134+
throw new Error('should not be parsed')
135+
},
136+
'text/html',
137+
)
138+
const res = buildResponse()
139+
140+
await aiSearchProxy(buildRequest(), res as never)
141+
142+
const metrics = incrementedMetrics()
143+
expect(json).not.toHaveBeenCalled()
144+
expect(metrics).toContain('ai-search.stream_response_error')
145+
expect(metrics).not.toContain('ai-search.content_filtered')
146+
expect(res.statusCode).toBe(400)
147+
})
148+
})

0 commit comments

Comments
 (0)