From c2f50b1b2c43feaf8ecab8978aa718de65fd78d9 Mon Sep 17 00:00:00 2001
From: enwaiax <enwaiax@users.noreply.github.com>
Date: Mon, 16 Mar 2026 15:25:42 +0000
Subject: [PATCH] fix: guard file unlink in audio extraction to prevent crash
 in library mode

In library mode, audio content arrives as base64-encoded binary data
(not a file path). PR #1119 added file-path support for Dataloader but
left Path.unlink() unconditional, causing OSError (ENAMETOOLONG) when
the base64 string (~2MB) is treated as a filename.

Use a `source_file_path` sentinel so unlink only runs when content was
actually resolved from an on-disk file (Dataloader/V2 API path).

Fixes: NVBug 5984261
Made-with: Cursor
---
 .../extract/audio/audio_extraction.py         | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/api/src/nv_ingest_api/internal/extract/audio/audio_extraction.py b/api/src/nv_ingest_api/internal/extract/audio/audio_extraction.py
index 24ab60446..61b2d96ef 100644
--- a/api/src/nv_ingest_api/internal/extract/audio/audio_extraction.py
+++ b/api/src/nv_ingest_api/internal/extract/audio/audio_extraction.py
@@ -59,25 +59,24 @@ def _extract_from_audio(row: pd.Series, audio_client: Any, trace_info: Dict, seg
         raise ValueError("Row does not contain 'metadata'.")
 
     base64_audio = metadata.pop("content")
+    source_file_path = None
     try:
-        base64_file_path = base64_audio
-        if not base64_file_path:
+        if not base64_audio:
             return [row.to_list()]
-        base64_file_path = base64.b64decode(base64_file_path).decode("utf-8")
-        if not base64_file_path:
-            return [row.to_list()]
-        if Path(base64_file_path).exists():
-            base64_audio = read_file_as_base64(base64_file_path)
+        decoded_path = base64.b64decode(base64_audio).decode("utf-8")
+        if decoded_path and Path(decoded_path).exists():
+            source_file_path = decoded_path
+            base64_audio = read_file_as_base64(decoded_path)
     except (UnicodeDecodeError, base64.binascii.Error):
         pass
     content_metadata = metadata.get("content_metadata", {})
 
-    # Only extract transcript if content type is audio
     if (content_metadata.get("type") != ContentTypeEnum.AUDIO) or (base64_audio in (None, "")):
         return [row.to_list()]
 
-    logger.debug(f"Removing file {base64_file_path}")
-    Path(base64_file_path).unlink(missing_ok=True)
+    if source_file_path is not None:
+        logger.debug(f"Removing temporary file {source_file_path}")
+        Path(source_file_path).unlink(missing_ok=True)
 
     # Get the result from the inference model
     segments, transcript = audio_client.infer(