@@ -87,20 +87,18 @@ function matchesExtension(filePath: string, extSet: Set<string> | null): boolean
8787}
8888
8989/**
90- * Extracts the `page_token` of the `rel="next"` link from a keyset-pagination
91- * `Link` response header. Returns undefined when there is no next page.
90+ * Extracts the full `rel="next"` URL from a keyset-pagination `Link` response
91+ * header. GitLab's guidance is to follow this link verbatim rather than rebuild
92+ * the URL, so the connector stores and re-fetches it as-is — this is robust to
93+ * whichever continuation parameter the endpoint uses (`page_token`, `cursor`,
94+ * `id_after`, …). Returns undefined when there is no next page.
9295 */
93- function parseNextPageToken ( linkHeader : string | null ) : string | undefined {
96+ function parseNextLink ( linkHeader : string | null ) : string | undefined {
9497 if ( ! linkHeader ) return undefined
9598 for ( const part of linkHeader . split ( ',' ) ) {
9699 if ( ! / r e l = " ? n e x t " ? / i. test ( part ) ) continue
97100 const urlMatch = part . match ( / < ( [ ^ > ] + ) > / )
98- if ( ! urlMatch ) continue
99- try {
100- return new URL ( urlMatch [ 1 ] ) . searchParams . get ( 'page_token' ) ?? undefined
101- } catch {
102- return undefined
103- }
101+ if ( urlMatch ) return urlMatch [ 1 ]
104102 }
105103 return undefined
106104}
@@ -477,7 +475,8 @@ async function fetchProject(
477475interface CursorState {
478476 phase : SyncPhase
479477 issuePage : number
480- fileToken ?: string
478+ /** Full `rel="next"` URL for the repository-tree keyset page to fetch next. */
479+ fileNextUrl ?: string
481480}
482481
483482function encodeCursor ( state : CursorState ) : string {
@@ -490,7 +489,7 @@ function decodeCursor(cursor: string | undefined, initialPhase: SyncPhase): Curs
490489 const parsed = JSON . parse ( Buffer . from ( cursor , 'base64url' ) . toString ( 'utf8' ) ) as Partial < {
491490 phase : SyncPhase
492491 issuePage : number
493- fileToken : string
492+ fileNextUrl : string
494493 } >
495494 const phase : SyncPhase =
496495 parsed . phase === 'repo' || parsed . phase === 'issues' || parsed . phase === 'wiki'
@@ -499,7 +498,7 @@ function decodeCursor(cursor: string | undefined, initialPhase: SyncPhase): Curs
499498 return {
500499 phase,
501500 issuePage : Number ( parsed . issuePage ) > 0 ? Number ( parsed . issuePage ) : 1 ,
502- fileToken : typeof parsed . fileToken === 'string' ? parsed . fileToken : undefined ,
501+ fileNextUrl : typeof parsed . fileNextUrl === 'string' ? parsed . fileNextUrl : undefined ,
503502 }
504503 } catch {
505504 return { phase : initialPhase , issuePage : 1 }
@@ -736,14 +735,14 @@ export const gitlabConnector: ConnectorConfig = {
736735 per_page : String ( PAGE_SIZE ) ,
737736 pagination : 'keyset' ,
738737 } )
739- if ( state . fileToken ) treeParams . set ( 'page_token' , state . fileToken )
740-
741- const url = `${ apiBase } /projects/${ encodedProject } /repository/tree?${ treeParams . toString ( ) } `
738+ const url =
739+ state . fileNextUrl ??
740+ `${ apiBase } /projects/${ encodedProject } /repository/tree?${ treeParams . toString ( ) } `
742741 logger . info ( 'Listing GitLab repository files' , {
743742 host,
744743 project : encodedProject ,
745744 ref,
746- hasToken : Boolean ( state . fileToken ) ,
745+ continued : Boolean ( state . fileNextUrl ) ,
747746 } )
748747
749748 const response = await fetchWithRetry ( url , {
@@ -753,6 +752,14 @@ export const gitlabConnector: ConnectorConfig = {
753752
754753 if ( ! response . ok ) {
755754 if ( response . status === 404 ) {
755+ logger . warn (
756+ 'GitLab repository tree not found; skipping files (empty repo or bad branch)' ,
757+ {
758+ host,
759+ project : encodedProject ,
760+ ref,
761+ }
762+ )
756763 const adv = advance ( 'repo' )
757764 return { documents : [ ] , nextCursor : adv . nextCursor , hasMore : adv . hasMore }
758765 }
@@ -780,11 +787,11 @@ export const gitlabConnector: ConnectorConfig = {
780787 )
781788 if ( hitLimit ) return { documents : capped , hasMore : false }
782789
783- const nextToken = parseNextPageToken ( response . headers . get ( 'link' ) )
784- if ( nextToken ) {
790+ const nextLink = parseNextLink ( response . headers . get ( 'link' ) )
791+ if ( nextLink ) {
785792 return {
786793 documents : capped ,
787- nextCursor : encodeCursor ( { phase : 'repo' , issuePage : 1 , fileToken : nextToken } ) ,
794+ nextCursor : encodeCursor ( { phase : 'repo' , issuePage : 1 , fileNextUrl : nextLink } ) ,
788795 hasMore : true ,
789796 }
790797 }
@@ -802,6 +809,15 @@ export const gitlabConnector: ConnectorConfig = {
802809 } )
803810
804811 if ( ! response . ok ) {
812+ if ( response . status === 403 || response . status === 404 ) {
813+ logger . warn ( 'GitLab wiki unavailable; skipping wiki phase' , {
814+ host,
815+ project : encodedProject ,
816+ status : response . status ,
817+ } )
818+ const adv = advance ( 'wiki' )
819+ return { documents : [ ] , nextCursor : adv . nextCursor , hasMore : adv . hasMore }
820+ }
805821 const errorText = await response . text ( ) . catch ( ( ) => '' )
806822 logger . error ( 'Failed to list GitLab wiki pages' , {
807823 status : response . status ,
0 commit comments