Skip to content

Commit 6b66855

Browse files
committed
fix(connectors): ado byte-exact file content fetch, google-forms hash-poisoning on listing failure
1 parent f87d05d commit 6b66855

2 files changed

Lines changed: 44 additions & 16 deletions

File tree

apps/sim/connectors/azure-devops/azure-devops.ts

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -943,35 +943,57 @@ async function getFileDocument(
943943
return null
944944
}
945945

946-
const params = new URLSearchParams({
946+
const metadataParams = new URLSearchParams({
947947
path,
948948
'versionDescriptor.version': branch,
949949
'versionDescriptor.versionType': 'Branch',
950-
includeContent: 'true',
951950
includeContentMetadata: 'true',
952951
$format: 'json',
953952
'api-version': GIT_API_VERSION,
954953
})
955-
const url = `${ADO_BASE_URL}/${encodeURIComponent(organization)}/${encodeURIComponent(project)}/_apis/git/repositories/${encodeURIComponent(repoId)}/items?${params.toString()}`
956-
const response = await fetchWithRetry(url, {
954+
const metadataUrl = `${ADO_BASE_URL}/${encodeURIComponent(organization)}/${encodeURIComponent(project)}/_apis/git/repositories/${encodeURIComponent(repoId)}/items?${metadataParams.toString()}`
955+
const metadataResponse = await fetchWithRetry(metadataUrl, {
957956
method: 'GET',
958957
headers: { Accept: 'application/json', Authorization: patAuthHeader(accessToken) },
959958
})
960959

961-
if (!response.ok) {
962-
if (response.status === 404) return null
963-
throw new Error(`Failed to fetch repository file: ${response.status}`)
960+
if (!metadataResponse.ok) {
961+
if (metadataResponse.status === 404) return null
962+
throw new Error(`Failed to fetch repository file metadata: ${metadataResponse.status}`)
964963
}
965964

966-
const item = (await response.json()) as GitItem
965+
const item = (await metadataResponse.json()) as GitItem
967966
if (!item.objectId) return null
968967
if (item.contentMetadata?.isBinary) {
969968
logger.info('Skipping binary Azure DevOps file', { path })
970969
return null
971970
}
972971

973-
const raw = typeof item.content === 'string' ? item.content : ''
974-
const buffer = Buffer.from(raw, 'utf8')
972+
/**
973+
* Content is fetched as raw bytes (Accept: application/octet-stream) rather
974+
* than via `includeContent=true` JSON. The JSON `content` field's encoding is
975+
* ambiguous (the API may deliver base64 or codepage-transcoded text per
976+
* `ItemContentType`), whereas the octet-stream response is the byte-exact git
977+
* blob, which is then binary-sniffed and decoded as UTF-8.
978+
*/
979+
const contentParams = new URLSearchParams({
980+
path,
981+
'versionDescriptor.version': branch,
982+
'versionDescriptor.versionType': 'Branch',
983+
'api-version': GIT_API_VERSION,
984+
})
985+
const contentUrl = `${ADO_BASE_URL}/${encodeURIComponent(organization)}/${encodeURIComponent(project)}/_apis/git/repositories/${encodeURIComponent(repoId)}/items?${contentParams.toString()}`
986+
const contentResponse = await fetchWithRetry(contentUrl, {
987+
method: 'GET',
988+
headers: { Accept: 'application/octet-stream', Authorization: patAuthHeader(accessToken) },
989+
})
990+
991+
if (!contentResponse.ok) {
992+
if (contentResponse.status === 404) return null
993+
throw new Error(`Failed to fetch repository file content: ${contentResponse.status}`)
994+
}
995+
996+
const buffer = Buffer.from(await contentResponse.arrayBuffer())
975997
if (isBinaryBuffer(buffer)) {
976998
logger.info('Skipping binary Azure DevOps file', { path })
977999
return null

apps/sim/connectors/google-forms/google-forms.ts

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,8 @@ async function fetchFormResponses(accessToken: string, formId: string): Promise<
275275
* Reads the latest response submission time for change detection without
276276
* retaining every response. Returns the greatest `lastSubmittedTime` (falling
277277
* back to `createTime`) across all responses, or undefined when there are none.
278+
* Throws on a failed read so the caller skips the form for this run instead of
279+
* computing a hash from incomplete data.
278280
*/
279281
async function fetchLatestResponseTime(
280282
accessToken: string,
@@ -293,13 +295,17 @@ async function fetchLatestResponseTime(
293295

294296
if (!response.ok) {
295297
/**
296-
* Treat response-listing failures as "no responses" for hashing purposes
297-
* so a transient error never silently drops the form from the sync.
298+
* Propagate the failure rather than hashing with an empty response segment.
299+
* A swallowed error here would poison the stub's content hash (listing
300+
* would hash "no responses" while getDocument hashes the real latest
301+
* submission time), making the form re-process on every sync. Throwing lets
302+
* the per-form catch in listDocuments skip the form for this run and set
303+
* `skippedOnError` → `listingCapped`, so the form is neither deleted nor
304+
* hashed incorrectly.
298305
*/
299-
logger.warn(`Failed to read responses for change detection on form ${formId}`, {
300-
status: response.status,
301-
})
302-
return undefined
306+
throw new Error(
307+
`Failed to read responses for change detection on form ${formId}: ${response.status}`
308+
)
303309
}
304310

305311
const data = (await response.json()) as FormResponseList

0 commit comments

Comments
 (0)