@@ -943,35 +943,57 @@ async function getFileDocument(
943943 return null
944944 }
945945
946- const params = new URLSearchParams ( {
946+ const metadataParams = new URLSearchParams ( {
947947 path,
948948 'versionDescriptor.version' : branch ,
949949 'versionDescriptor.versionType' : 'Branch' ,
950- includeContent : 'true' ,
951950 includeContentMetadata : 'true' ,
952951 $format : 'json' ,
953952 'api-version' : GIT_API_VERSION ,
954953 } )
955- const url = `${ ADO_BASE_URL } /${ encodeURIComponent ( organization ) } /${ encodeURIComponent ( project ) } /_apis/git/repositories/${ encodeURIComponent ( repoId ) } /items?${ params . toString ( ) } `
956- const response = await fetchWithRetry ( url , {
954+ const metadataUrl = `${ ADO_BASE_URL } /${ encodeURIComponent ( organization ) } /${ encodeURIComponent ( project ) } /_apis/git/repositories/${ encodeURIComponent ( repoId ) } /items?${ metadataParams . toString ( ) } `
955+ const metadataResponse = await fetchWithRetry ( metadataUrl , {
957956 method : 'GET' ,
958957 headers : { Accept : 'application/json' , Authorization : patAuthHeader ( accessToken ) } ,
959958 } )
960959
961- if ( ! response . ok ) {
962- if ( response . status === 404 ) return null
963- throw new Error ( `Failed to fetch repository file: ${ response . status } ` )
960+ if ( ! metadataResponse . ok ) {
961+ if ( metadataResponse . status === 404 ) return null
962+ throw new Error ( `Failed to fetch repository file metadata : ${ metadataResponse . status } ` )
964963 }
965964
966- const item = ( await response . json ( ) ) as GitItem
965+ const item = ( await metadataResponse . json ( ) ) as GitItem
967966 if ( ! item . objectId ) return null
968967 if ( item . contentMetadata ?. isBinary ) {
969968 logger . info ( 'Skipping binary Azure DevOps file' , { path } )
970969 return null
971970 }
972971
973- const raw = typeof item . content === 'string' ? item . content : ''
974- const buffer = Buffer . from ( raw , 'utf8' )
972+ /**
973+ * Content is fetched as raw bytes (Accept: application/octet-stream) rather
974+ * than via `includeContent=true` JSON. The JSON `content` field's encoding is
975+ * ambiguous (the API may deliver base64 or codepage-transcoded text per
976+ * `ItemContentType`), whereas the octet-stream response is the byte-exact git
977+ * blob, which is then binary-sniffed and decoded as UTF-8.
978+ */
979+ const contentParams = new URLSearchParams ( {
980+ path,
981+ 'versionDescriptor.version' : branch ,
982+ 'versionDescriptor.versionType' : 'Branch' ,
983+ 'api-version' : GIT_API_VERSION ,
984+ } )
985+ const contentUrl = `${ ADO_BASE_URL } /${ encodeURIComponent ( organization ) } /${ encodeURIComponent ( project ) } /_apis/git/repositories/${ encodeURIComponent ( repoId ) } /items?${ contentParams . toString ( ) } `
986+ const contentResponse = await fetchWithRetry ( contentUrl , {
987+ method : 'GET' ,
988+ headers : { Accept : 'application/octet-stream' , Authorization : patAuthHeader ( accessToken ) } ,
989+ } )
990+
991+ if ( ! contentResponse . ok ) {
992+ if ( contentResponse . status === 404 ) return null
993+ throw new Error ( `Failed to fetch repository file content: ${ contentResponse . status } ` )
994+ }
995+
996+ const buffer = Buffer . from ( await contentResponse . arrayBuffer ( ) )
975997 if ( isBinaryBuffer ( buffer ) ) {
976998 logger . info ( 'Skipping binary Azure DevOps file' , { path } )
977999 return null
0 commit comments