@@ -4,7 +4,7 @@ import { getErrorMessage, toError } from '@sim/utils/errors'
44import { S3Icon } from '@/components/icons'
55import { fetchWithRetry , VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
66import type { ConnectorConfig , ExternalDocument , ExternalDocumentList } from '@/connectors/types'
7- import { parseTagDate } from '@/connectors/utils'
7+ import { parseTagDate , readBodyWithLimit } from '@/connectors/utils'
88import { encodeS3PathComponent , getSignatureKey } from '@/tools/s3/utils'
99
1010const logger = createLogger ( 'S3Connector' )
@@ -329,36 +329,6 @@ function buildListQueryString(params: Record<string, string>): string {
329329 . join ( '&' )
330330}
331331
332- /**
333- * Reads a response body as UTF-8 text while enforcing a hard byte cap. The
334- * declared `content-length` header cannot be trusted as the sole guard:
335- * S3-compatible stores (MinIO, Cloudflare R2) may use chunked transfer
336- * encoding and omit the header entirely. Bytes are accumulated from the
337- * stream and reading aborts as soon as the cap is exceeded, so an oversized
338- * body is never fully buffered. Returns null when the cap is exceeded.
339- */
340- async function readBodyWithLimit ( response : Response , maxBytes : number ) : Promise < string | null > {
341- if ( ! response . body ) {
342- const text = await response . text ( )
343- return Buffer . byteLength ( text ) > maxBytes ? null : text
344- }
345-
346- const reader = response . body . getReader ( )
347- const chunks : Uint8Array [ ] = [ ]
348- let total = 0
349- while ( true ) {
350- const { done, value } = await reader . read ( )
351- if ( done ) break
352- total += value . byteLength
353- if ( total > maxBytes ) {
354- await reader . cancel ( ) . catch ( ( ) => { } )
355- return null
356- }
357- chunks . push ( value )
358- }
359- return Buffer . concat ( chunks ) . toString ( 'utf-8' )
360- }
361-
362332/**
363333 * Decodes XML entities found in S3 response text values. `&` is decoded
364334 * last so sequences like `&lt;` resolve to `<` rather than `<`.
@@ -663,11 +633,12 @@ export const s3Connector: ConnectorConfig = {
663633 return null
664634 }
665635
666- const content = await readBodyWithLimit ( response , MAX_FILE_SIZE )
667- if ( content === null ) {
636+ const body = await readBodyWithLimit ( response , MAX_FILE_SIZE )
637+ if ( body === null ) {
668638 logger . warn ( 'Skipping oversized S3 object (size cap exceeded while streaming)' , { key } )
669639 return null
670640 }
641+ const content = body . toString ( 'utf-8' )
671642 if ( ! content . trim ( ) ) return null
672643
673644 const entry : S3ObjectEntry = {
0 commit comments