Skip to content

Commit c16e94a

Browse files
committed
fix(connectors): audit fixes across new connectors
- registry: register x connector (was dead code, never wired in) - google-docs/google-drive/google-forms: gate deletion reconciliation on Drive incompleteSearch; google-docs also now sets listingCapped on its maxDocs cap path - jsm: add read:jira-user scope so reporter resolves on requests - gong: only set listingCapped on genuine truncation, not exact-cap source exhaustion - gitlab: issues phase switched to keyset pagination (removes ~50k offset ceiling), matching the repo-tree phase - grain: parallelize recording + transcript fetch in getDocument - ashby: document updatedAt-based content-hash limitation for notes/feedback change detection - tests: mapTags coverage for x, granola, greenhouse, fathom, rootly
1 parent 3bfec6e commit c16e94a

10 files changed

Lines changed: 471 additions & 24 deletions

File tree

apps/sim/connectors/ashby/ashby.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,32 @@ function renderFeedbackValue(value: unknown): string {
298298

299299
/**
300300
* Stable, metadata-based content hash for a candidate document. Identical between the
301-
* listing stub and the fully-fetched document so unchanged candidates are skipped.
301+
* listing stub and the fully-fetched document so unchanged candidates are skipped,
302+
* which keeps the `getDocument` re-hydration (notes + feedback fetches) cheap: the
303+
* sync engine only re-hydrates a deferred stub when this hash differs from the stored
304+
* document's hash (see `lib/knowledge/connectors/sync-engine.ts`).
305+
*
306+
* Known limitation — notes/feedback freshness depends on `candidate.updatedAt`.
307+
* Candidate notes (`candidate.listNotes`) and interview feedback
308+
* (`applicationFeedback.list`) are separate Ashby objects, not candidate fields. This
309+
* hash is derived solely from the candidate's own `updatedAt`, so a new note or newly
310+
* submitted feedback is only re-synced if Ashby advances `candidate.updatedAt` as a
311+
* side effect of that write.
312+
*
313+
* As of this writing Ashby's public API docs do not specify what counts as a
314+
* "modification" for `candidate.updatedAt` or for `candidate.list` syncToken
315+
* incremental sync, and no third-party ATS-integration vendor (Merge, Nango, Knit)
316+
* documents it either — so this behavior is unverified. If Ashby does NOT touch
317+
* `candidate.updatedAt` on note/feedback writes, those additions will not be picked up
318+
* until some other candidate field changes; a forced full sync re-hydrates everything
319+
* regardless. No cheaper listing-time signal exists to fold into this hash: the
320+
* `candidate.list` object exposes no note/feedback count, and syncToken carries the
321+
* same unspecified change semantics as `updatedAt`.
322+
*
323+
* Refs:
324+
* - https://developers.ashbyhq.com/reference/candidatelist
325+
* - https://developers.ashbyhq.com/reference/candidatecreatenote
326+
* - https://developers.ashbyhq.com/docs/pagination-and-incremental-sync
302327
*/
303328
function buildContentHash(id: string, updatedAt: string | null): string {
304329
return `ashby:${id}:${updatedAt ?? ''}`

apps/sim/connectors/gitlab/gitlab.ts

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -470,15 +470,18 @@ async function fetchProject(
470470
}
471471

472472
/**
473-
* Encodes the listing cursor. The cursor packs the resource phase (wiki ➜ issues)
474-
* and the issues page number so a single sync walks wikis first, then paginates
475-
* issues via the X-Next-Page header.
473+
* Encodes the listing cursor. The cursor packs the resource phase (repo ➜ wiki ➜
474+
* issues) and a per-phase continuation token so a single sync walks the phases in
475+
* order. The repository-tree and issues phases both use GitLab keyset pagination
476+
* and store the full `rel="next"` URL from the Link header to fetch verbatim.
476477
*/
477478
interface CursorState {
478479
phase: SyncPhase
479480
issuePage: number
480481
/** Full `rel="next"` URL for the repository-tree keyset page to fetch next. */
481482
fileNextUrl?: string
483+
/** Full `rel="next"` URL for the issues keyset page to fetch next. */
484+
issueNextUrl?: string
482485
}
483486

484487
function encodeCursor(state: CursorState): string {
@@ -492,6 +495,7 @@ function decodeCursor(cursor: string | undefined, initialPhase: SyncPhase): Curs
492495
phase: SyncPhase
493496
issuePage: number
494497
fileNextUrl: string
498+
issueNextUrl: string
495499
}>
496500
const phase: SyncPhase =
497501
parsed.phase === 'repo' || parsed.phase === 'issues' || parsed.phase === 'wiki'
@@ -501,6 +505,7 @@ function decodeCursor(cursor: string | undefined, initialPhase: SyncPhase): Curs
501505
phase,
502506
issuePage: Number(parsed.issuePage) > 0 ? Number(parsed.issuePage) : 1,
503507
fileNextUrl: typeof parsed.fileNextUrl === 'string' ? parsed.fileNextUrl : undefined,
508+
issueNextUrl: typeof parsed.issueNextUrl === 'string' ? parsed.issueNextUrl : undefined,
504509
}
505510
} catch {
506511
return { phase: initialPhase, issuePage: 1 }
@@ -859,9 +864,9 @@ export const gitlabConnector: ConnectorConfig = {
859864
if (state.phase === 'issues') {
860865
const params = new URLSearchParams({
861866
per_page: String(PAGE_SIZE),
862-
page: String(state.issuePage),
863867
order_by: 'updated_at',
864868
sort: 'desc',
869+
pagination: 'keyset',
865870
})
866871
if (lastSyncAt) params.set('updated_after', lastSyncAt.toISOString())
867872
const issueState =
@@ -874,11 +879,15 @@ export const gitlabConnector: ConnectorConfig = {
874879
typeof sourceConfig.issueMilestone === 'string' ? sourceConfig.issueMilestone.trim() : ''
875880
if (issueMilestone) params.set('milestone', issueMilestone)
876881

877-
const url = `${apiBase}/projects/${encodedProject}/issues?${params.toString()}`
882+
if (state.issueNextUrl && !isSameOrigin(state.issueNextUrl, apiBase)) {
883+
throw new Error('GitLab pagination cursor points to an unexpected host')
884+
}
885+
const url =
886+
state.issueNextUrl ?? `${apiBase}/projects/${encodedProject}/issues?${params.toString()}`
878887
logger.info('Listing GitLab issues', {
879888
host,
880889
project: encodedProject,
881-
page: state.issuePage,
890+
continued: Boolean(state.issueNextUrl),
882891
incremental: Boolean(lastSyncAt),
883892
})
884893

@@ -909,18 +918,18 @@ export const gitlabConnector: ConnectorConfig = {
909918
maxItems,
910919
syncContext
911920
)
921+
if (hitLimit) return { documents: capped, hasMore: false }
912922

913-
const nextPageHeader = response.headers.get('x-next-page')?.trim()
914-
const nextPage = nextPageHeader ? Number(nextPageHeader) : 0
915-
const hasMorePages = !hitLimit && Number.isFinite(nextPage) && nextPage > 0
916-
917-
return {
918-
documents: capped,
919-
nextCursor: hasMorePages
920-
? encodeCursor({ phase: 'issues', issuePage: nextPage })
921-
: undefined,
922-
hasMore: hasMorePages,
923+
const nextLink = parseNextLink(response.headers.get('link'))
924+
if (nextLink) {
925+
return {
926+
documents: capped,
927+
nextCursor: encodeCursor({ phase: 'issues', issuePage: 1, issueNextUrl: nextLink }),
928+
hasMore: true,
929+
}
923930
}
931+
932+
return { documents: capped, hasMore: false }
924933
}
925934

926935
return { documents: [], hasMore: false }

apps/sim/connectors/gong/gong.ts

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,20 +417,32 @@ export const gongConnector: ConnectorConfig = {
417417

418418
const prevFetched = (syncContext?.totalDocsFetched as number) ?? 0
419419
let documents = allDocuments
420+
let capDroppedDocs = false
420421
if (maxCalls > 0) {
421422
const remaining = Math.max(0, maxCalls - prevFetched)
422423
if (allDocuments.length > remaining) {
423424
documents = allDocuments.slice(0, remaining)
425+
capDroppedDocs = true
424426
}
425427
}
426428

427429
const totalFetched = prevFetched + documents.length
428430
if (syncContext) syncContext.totalDocsFetched = totalFetched
429431
const hitLimit = maxCalls > 0 && totalFetched >= maxCalls
430-
if (hitLimit && syncContext) syncContext.listingCapped = true
431-
432432
const hasMore = !hitLimit && Boolean(nextPageCursor)
433433

434+
/**
435+
* Only flag the listing as capped when the `maxCalls` limit actually
436+
* truncated calls that still exist in the source — either by dropping calls
437+
* from the current page or by stopping while another page remains. Reaching
438+
* the limit exactly at source exhaustion (no dropped calls, no further
439+
* cursor) yields a complete listing, so deletion reconciliation must still
440+
* run for calls removed in Gong.
441+
*/
442+
if (syncContext && (capDroppedDocs || (hitLimit && Boolean(nextPageCursor)))) {
443+
syncContext.listingCapped = true
444+
}
445+
434446
return {
435447
documents,
436448
nextCursor: hasMore ? nextPageCursor : undefined,

apps/sim/connectors/google-docs/google-docs.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,13 +235,23 @@ export const googleDocsConnector: ConnectorConfig = {
235235
const data = await response.json()
236236
const files = (data.files || []) as DriveFile[]
237237

238+
/**
239+
* Drive sets `incompleteSearch` when it could not search every corpus (it
240+
* arises with the `allDrives` scope enabled by `includeItemsFromAllDrives`).
241+
* A partial listing drops still-existing docs, so reconciliation must be
242+
* suppressed to avoid hard-deleting valid documents.
243+
*/
244+
const incompleteSearch = data.incompleteSearch === true
245+
238246
const maxDocs = sourceConfig.maxDocs ? Number(sourceConfig.maxDocs) : 0
239247
const previouslyFetched = (syncContext?.totalDocsFetched as number) ?? 0
240248

241249
let documents = files.map(fileToStub)
250+
let slicedSome = false
242251
if (maxDocs > 0) {
243252
const remaining = maxDocs - previouslyFetched
244253
if (documents.length > remaining) {
254+
slicedSome = true
245255
documents = documents.slice(0, remaining)
246256
}
247257
}
@@ -252,6 +262,19 @@ export const googleDocsConnector: ConnectorConfig = {
252262

253263
const nextPageToken = data.nextPageToken as string | undefined
254264

265+
/**
266+
* Mark the listing as incomplete so the sync engine skips deletion
267+
* reconciliation when this page does not represent the full source set:
268+
* - `slicedSome`: the page held more docs than the `maxDocs` cap allowed.
269+
* - `hitLimit` with a next page: the cap was reached while more pages remain.
270+
* - `incompleteSearch`: Drive could not search every corpus, so the page is
271+
* partial and may omit still-existing docs.
272+
* Reconciliation against any of these would hard-delete valid documents.
273+
*/
274+
if (syncContext && (slicedSome || (hitLimit && Boolean(nextPageToken)) || incompleteSearch)) {
275+
syncContext.listingCapped = true
276+
}
277+
255278
return {
256279
documents,
257280
nextCursor: hitLimit ? undefined : nextPageToken,

apps/sim/connectors/google-drive/google-drive.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,14 +268,22 @@ export const googleDriveConnector: ConnectorConfig = {
268268
const data = await response.json()
269269
const files = (data.files || []) as DriveFile[]
270270

271+
/**
272+
* Drive sets `incompleteSearch` when it could not search every corpus (it
273+
* arises with the `allDrives` scope enabled by `includeItemsFromAllDrives`).
274+
* A partial listing drops still-existing files, so reconciliation must be
275+
* suppressed to avoid hard-deleting valid documents.
276+
*/
277+
const incompleteSearch = data.incompleteSearch === true
278+
271279
const documents = files
272280
.filter((f) => isGoogleWorkspaceFile(f.mimeType) || isSupportedTextFile(f.mimeType))
273281
.map(fileToStub)
274282

275283
const totalFetched = previouslyFetched + documents.length
276284
if (syncContext) syncContext.totalDocsFetched = totalFetched
277285
const hitLimit = maxFiles > 0 && totalFetched >= maxFiles
278-
if (hitLimit && syncContext) syncContext.listingCapped = true
286+
if (syncContext && (hitLimit || incompleteSearch)) syncContext.listingCapped = true
279287

280288
const nextPageToken = data.nextPageToken as string | undefined
281289

apps/sim/connectors/google-forms/google-forms.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,14 @@ export const googleFormsConnector: ConnectorConfig = {
573573
const data = await response.json()
574574
let files = (data.files || []) as DriveFormFile[]
575575

576+
/**
577+
* Drive sets `incompleteSearch` when it could not search every corpus (it
578+
* arises with the `allDrives` scope enabled by `includeItemsFromAllDrives`).
579+
* A partial listing drops still-existing forms, so reconciliation must be
580+
* suppressed to avoid hard-deleting valid documents.
581+
*/
582+
const incompleteSearch = data.incompleteSearch === true
583+
576584
let slicedSome = false
577585
if (maxForms > 0) {
578586
const remaining = maxForms - previouslyFetched
@@ -633,11 +641,16 @@ export const googleFormsConnector: ConnectorConfig = {
633641
* - `hitLimit` with a next page: the cap was reached while more pages of
634642
* forms remain in the source.
635643
* - `skippedOnError`: a transient error dropped a still-present form.
644+
* - `incompleteSearch`: Drive could not search every corpus, so the page
645+
* itself is partial and may omit still-existing forms.
636646
* Deleting any of those would wipe valid documents from the knowledge base.
637647
* When the cap merely coincides with source exhaustion (no slice, no next
638648
* page), reconciliation stays enabled so deleted forms are cleaned up.
639649
*/
640-
if (syncContext && (slicedSome || (hitLimit && Boolean(nextPageToken)) || skippedOnError)) {
650+
if (
651+
syncContext &&
652+
(slicedSome || (hitLimit && Boolean(nextPageToken)) || skippedOnError || incompleteSearch)
653+
) {
641654
syncContext.listingCapped = true
642655
}
643656

apps/sim/connectors/grain/grain.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -471,10 +471,11 @@ export const grainConnector: ConnectorConfig = {
471471
try {
472472
if (!externalId) return null
473473

474-
const recording = await fetchRecording(accessToken, externalId)
474+
const [recording, segments] = await Promise.all([
475+
fetchRecording(accessToken, externalId),
476+
fetchTranscript(accessToken, externalId),
477+
])
475478
if (!recording) return null
476-
477-
const segments = await fetchTranscript(accessToken, externalId)
478479
if (!segments) return null
479480

480481
const hasTranscript = segments.some((segment) => segment.text?.trim())

apps/sim/connectors/jsm/jsm.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,13 @@ export const jsmConnector: ConnectorConfig = {
338338
'read:request:jira-service-management',
339339
'read:request.comment:jira-service-management',
340340
'read:request.status:jira-service-management',
341+
/**
342+
* Requests embed a `reporter` user object whose `displayName` is surfaced
343+
* in document content and the Reporter tag. Atlassian only populates
344+
* embedded user data when the user-read scope is granted, so request it
345+
* here. Present in the `jira` OAuth provider config as `read:jira-user`.
346+
*/
347+
'read:jira-user',
341348
'offline_access',
342349
],
343350
},

0 commit comments

Comments
 (0)