Skip to content

Commit 3823396

Browse files
committed
fix(connectors): shared streaming size-cap reader for ado file hydration (promote from s3)
1 parent 8355b80 commit 3823396

3 files changed

Lines changed: 43 additions & 39 deletions

File tree

apps/sim/connectors/azure-devops/azure-devops.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { getErrorMessage, toError } from '@sim/utils/errors'
33
import { AzureDevOpsIcon } from '@/components/icons'
44
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
55
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
6-
import { htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils'
6+
import { htmlToPlainText, joinTagArray, parseTagDate, readBodyWithLimit } from '@/connectors/utils'
77

88
const logger = createLogger('AzureDevOpsConnector')
99

@@ -1032,13 +1032,13 @@ async function getFileDocument(
10321032
throw new Error(`Failed to fetch repository file content: ${contentResponse.status}`)
10331033
}
10341034

1035-
const buffer = Buffer.from(await contentResponse.arrayBuffer())
1036-
if (isBinaryBuffer(buffer)) {
1037-
logger.info('Skipping binary Azure DevOps file', { path })
1035+
const buffer = await readBodyWithLimit(contentResponse, MAX_FILE_SIZE)
1036+
if (buffer === null) {
1037+
logger.info('Skipping oversized Azure DevOps file', { path })
10381038
return null
10391039
}
1040-
if (buffer.byteLength > MAX_FILE_SIZE) {
1041-
logger.info('Skipping oversized Azure DevOps file', { path, size: buffer.byteLength })
1040+
if (isBinaryBuffer(buffer)) {
1041+
logger.info('Skipping binary Azure DevOps file', { path })
10421042
return null
10431043
}
10441044

apps/sim/connectors/s3/s3.ts

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { getErrorMessage, toError } from '@sim/utils/errors'
44
import { S3Icon } from '@/components/icons'
55
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
66
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
7-
import { parseTagDate } from '@/connectors/utils'
7+
import { parseTagDate, readBodyWithLimit } from '@/connectors/utils'
88
import { encodeS3PathComponent, getSignatureKey } from '@/tools/s3/utils'
99

1010
const logger = createLogger('S3Connector')
@@ -329,36 +329,6 @@ function buildListQueryString(params: Record<string, string>): string {
329329
.join('&')
330330
}
331331

332-
/**
333-
* Reads a response body as UTF-8 text while enforcing a hard byte cap. The
334-
* declared `content-length` header cannot be trusted as the sole guard:
335-
* S3-compatible stores (MinIO, Cloudflare R2) may use chunked transfer
336-
* encoding and omit the header entirely. Bytes are accumulated from the
337-
* stream and reading aborts as soon as the cap is exceeded, so an oversized
338-
* body is never fully buffered. Returns null when the cap is exceeded.
339-
*/
340-
async function readBodyWithLimit(response: Response, maxBytes: number): Promise<string | null> {
341-
if (!response.body) {
342-
const text = await response.text()
343-
return Buffer.byteLength(text) > maxBytes ? null : text
344-
}
345-
346-
const reader = response.body.getReader()
347-
const chunks: Uint8Array[] = []
348-
let total = 0
349-
while (true) {
350-
const { done, value } = await reader.read()
351-
if (done) break
352-
total += value.byteLength
353-
if (total > maxBytes) {
354-
await reader.cancel().catch(() => {})
355-
return null
356-
}
357-
chunks.push(value)
358-
}
359-
return Buffer.concat(chunks).toString('utf-8')
360-
}
361-
362332
/**
363333
* Decodes XML entities found in S3 response text values. `&amp;` is decoded
364334
* last so sequences like `&amp;lt;` resolve to `&lt;` rather than `<`.
@@ -663,11 +633,12 @@ export const s3Connector: ConnectorConfig = {
663633
return null
664634
}
665635

666-
const content = await readBodyWithLimit(response, MAX_FILE_SIZE)
667-
if (content === null) {
636+
const body = await readBodyWithLimit(response, MAX_FILE_SIZE)
637+
if (body === null) {
668638
logger.warn('Skipping oversized S3 object (size cap exceeded while streaming)', { key })
669639
return null
670640
}
641+
const content = body.toString('utf-8')
671642
if (!content.trim()) return null
672643

673644
const entry: S3ObjectEntry = {

apps/sim/connectors/utils.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,36 @@ export function parseMultiValue(value: unknown): string[] {
7878
}
7979
return []
8080
}
81+
82+
/**
83+
* Reads a response body into a Buffer while enforcing a hard byte cap. The
84+
* declared `content-length` header cannot be trusted as the sole guard —
85+
* chunked transfer encoding may omit it entirely — so bytes are accumulated
86+
* from the stream and reading aborts as soon as the cap is exceeded, ensuring
87+
* an oversized (or hostile) body is never fully buffered into memory.
88+
* Returns null when the cap is exceeded.
89+
*/
90+
export async function readBodyWithLimit(
91+
response: Response,
92+
maxBytes: number
93+
): Promise<Buffer | null> {
94+
if (!response.body) {
95+
const buffer = Buffer.from(await response.arrayBuffer())
96+
return buffer.byteLength > maxBytes ? null : buffer
97+
}
98+
99+
const reader = response.body.getReader()
100+
const chunks: Uint8Array[] = []
101+
let total = 0
102+
while (true) {
103+
const { done, value } = await reader.read()
104+
if (done) break
105+
total += value.byteLength
106+
if (total > maxBytes) {
107+
await reader.cancel().catch(() => {})
108+
return null
109+
}
110+
chunks.push(value)
111+
}
112+
return Buffer.concat(chunks)
113+
}

0 commit comments

Comments
 (0)