1- import { Transform } from 'node:stream'
1+ import { type Readable , Transform } from 'node:stream'
22import { createLogger } from '@sim/logger'
33import { getErrorMessage } from '@sim/utils/errors'
44import { generateId } from '@sim/utils/id'
@@ -35,6 +35,13 @@ const logger = createLogger('TableImportRunner')
3535/** Emit a progress event / DB update at most every this many rows. */
3636const PROGRESS_INTERVAL_ROWS = 5000
3737
38+ /**
39+ * Thrown when this worker discovers it no longer owns the table's import (the stale-job janitor
40+ * marked its run failed and a newer import took over). The worker stops inserting rather than
41+ * writing into a table a second worker now owns.
42+ */
43+ class ImportSupersededError extends Error { }
44+
3845/** `create` infers a schema for a new table; `append`/`replace` map onto an existing one. */
3946export type TableImportMode = 'create' | 'append' | 'replace'
4047
@@ -65,6 +72,9 @@ export interface TableImportPayload {
6572export async function runTableImport ( payload : TableImportPayload ) : Promise < void > {
6673 const { importId, tableId, workspaceId, userId, fileKey, fileName, delimiter, mode } = payload
6774 const requestId = generateId ( ) . slice ( 0 , 8 )
75+ // Hoisted so `finally` can destroy it on any failure — otherwise the storage HTTP body leaks
76+ // open until it times out.
77+ let source : Readable | undefined
6878
6979 try {
7080 const loaded = await getTableById ( tableId , { includeArchived : true } )
@@ -76,7 +86,7 @@ export async function runTableImport(payload: TableImportPayload): Promise<void>
7686 const totalBytes = ( await headObject ( fileKey , 'workspace' ) ) ?. size ?? 0
7787
7888 // Stream the file rather than buffering it — a ~1M-row import must never be held in memory.
79- const source = await downloadFileStream ( { key : fileKey , context : 'workspace' } )
89+ source = await downloadFileStream ( { key : fileKey , context : 'workspace' } )
8090
8191 // Append must continue after the existing rows; create/replace start empty. Read once up
8292 // front (the import is the table's sole writer) and assign contiguous positions from it.
@@ -178,7 +188,9 @@ export async function runTableImport(payload: TableImportPayload): Promise<void>
178188 ( lastReported === 0 && inserted > 0 )
179189 ) {
180190 lastReported = inserted
181- await updateImportProgress ( tableId , inserted )
191+ // Heartbeat + ownership check: if a newer import has taken over this table, stop.
192+ const owns = await updateImportProgress ( tableId , inserted , importId )
193+ if ( ! owns ) throw new ImportSupersededError ( )
182194 // Extrapolate the total from rows-per-byte observed so far; self-refines as it runs.
183195 // `Math.max(inserted, …)` keeps it monotonic; omit when the byte size is unknown.
184196 const estimatedTotal =
@@ -219,7 +231,7 @@ export async function runTableImport(payload: TableImportPayload): Promise<void>
219231 if ( sample . length === 0 ) {
220232 // No data rows — fail rather than report a successful empty import (matches the sync route).
221233 const message = 'CSV file has no data rows'
222- await markImportFailed ( tableId , message )
234+ await markImportFailed ( tableId , importId , message )
223235 void appendTableEvent ( {
224236 kind : 'import' ,
225237 tableId,
@@ -236,8 +248,8 @@ export async function runTableImport(payload: TableImportPayload): Promise<void>
236248 await flush ( batch )
237249 }
238250
239- await updateImportProgress ( tableId , inserted )
240- await markImportReady ( tableId )
251+ await updateImportProgress ( tableId , inserted , importId )
252+ await markImportReady ( tableId , importId )
241253 void appendTableEvent ( {
242254 kind : 'import' ,
243255 tableId,
@@ -248,11 +260,22 @@ export async function runTableImport(payload: TableImportPayload): Promise<void>
248260 } )
249261 logger . info ( `[${ requestId } ] Import complete` , { tableId, fileName, mode, rows : inserted } )
250262 } catch ( err ) {
251- const message = getErrorMessage ( err , 'Import failed' )
252- logger . error ( `[${ requestId } ] Import failed for table ${ tableId } :` , err )
253- await markImportFailed ( tableId , message ) . catch ( ( ) => { } )
254- void appendTableEvent ( { kind : 'import' , tableId, importId, status : 'failed' , error : message } )
263+ if ( err instanceof ImportSupersededError ) {
264+ // A newer import owns the table now — leave its status alone and just stop.
265+ logger . info ( `[${ requestId } ] Import superseded by a newer run; stopping` , {
266+ tableId,
267+ importId,
268+ } )
269+ } else {
270+ const message = getErrorMessage ( err , 'Import failed' )
271+ logger . error ( `[${ requestId } ] Import failed for table ${ tableId } :` , err )
272+ // Scoped to importId — a no-op if a newer import has taken over.
273+ await markImportFailed ( tableId , importId , message ) . catch ( ( ) => { } )
274+ void appendTableEvent ( { kind : 'import' , tableId, importId, status : 'failed' , error : message } )
275+ }
255276 } finally {
277+ // Release the storage stream so its HTTP connection doesn't leak on failure.
278+ source ?. destroy ( )
256279 // The uploaded source file is single-use (a fresh upload per import) — delete it once the
257280 // import is terminal so the workspace bucket doesn't accumulate. Best-effort.
258281 await deleteFile ( { key : fileKey , context : 'workspace' } ) . catch ( ( err ) => {
0 commit comments