From 82ba2c9654f33a8beef101f3a2b59f1389f1de4e Mon Sep 17 00:00:00 2001 From: beastoin Date: Sun, 21 Jun 2026 09:36:13 +0000 Subject: [PATCH 1/5] Add torch_audiomentations to parakeet Dockerfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pyannote.audio imports torch_audiomentations via pyannote.audio.core.task, but it was missing from the --no-deps install list. Without it, get_builtin_embedding_model() silently returns None and all embedding requests fall back to the external HTTP diarizer — defeating the built-in embedding feature from #8082. Co-Authored-By: Claude Opus 4.6 --- backend/parakeet/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/parakeet/Dockerfile b/backend/parakeet/Dockerfile index f11256b76eb..df55909c208 100644 --- a/backend/parakeet/Dockerfile +++ b/backend/parakeet/Dockerfile @@ -41,7 +41,7 @@ RUN pip install --no-cache-dir --no-deps "pyannote.audio>=3.1.0" && \ pip install --no-cache-dir --no-deps \ "pyannote.core" "pyannote.database" "pyannote.pipeline" \ "speechbrain" "asteroid-filterbanks" "einops" "semver" \ - "hf_transfer" "tensorboardX" + "hf_transfer" "tensorboardX" "torch_audiomentations" COPY backend/parakeet/ . From 366c75535a2a3455d062cea247393f192e922e1a Mon Sep 17 00:00:00 2001 From: beastoin Date: Sun, 21 Jun 2026 10:23:36 +0000 Subject: [PATCH 2/5] Add julius and torch-pitch-shift to parakeet Dockerfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit torch_audiomentations was installed --no-deps so its own deps (julius, torch-pitch-shift) were skipped. Import chain: pyannote.audio → task.py → torch_audiomentations → julius → ModuleNotFoundError torch and torchaudio are already in the NGC base image. Co-Authored-By: Claude Opus 4.6 --- backend/parakeet/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/parakeet/Dockerfile b/backend/parakeet/Dockerfile index df55909c208..afb9025b41a 100644 --- a/backend/parakeet/Dockerfile +++ b/backend/parakeet/Dockerfile @@ -41,7 +41,8 @@ RUN pip install --no-cache-dir --no-deps "pyannote.audio>=3.1.0" && \ pip install --no-cache-dir --no-deps \ "pyannote.core" "pyannote.database" "pyannote.pipeline" \ "speechbrain" "asteroid-filterbanks" "einops" "semver" \ - "hf_transfer" "tensorboardX" "torch_audiomentations" + "hf_transfer" "tensorboardX" "torch_audiomentations" \ + "julius" "torch-pitch-shift" COPY backend/parakeet/ . From 4557e67dbdd7e81d0f167a283ff3550cb1fc751e Mon Sep 17 00:00:00 2001 From: beastoin Date: Sun, 21 Jun 2026 10:44:40 +0000 Subject: [PATCH 3/5] Stub torch_audiomentations instead of installing dep chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pyannote.audio.core.task imports torch_audiomentations for training- time data augmentation, which pulls in julius and torch-pitch-shift, which needs real torchaudio (incompatible with NGC torch ABI). We only use pyannote Model + Inference for embedding extraction, never the training pipeline. Stub torch_audiomentations the same way we stub torchaudio — satisfies the import with zero transitive dep issues. Removes torch_audiomentations, julius, torch-pitch-shift from pip install since the stub replaces them. Co-Authored-By: Claude Opus 4.6 --- backend/parakeet/Dockerfile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/backend/parakeet/Dockerfile b/backend/parakeet/Dockerfile index afb9025b41a..443d5f301c8 100644 --- a/backend/parakeet/Dockerfile +++ b/backend/parakeet/Dockerfile @@ -25,6 +25,13 @@ RUN mkdir -p /usr/local/lib/python3.12/dist-packages/torchaudio && \ printf '__version__ = "stub"\n' > \ /usr/local/lib/python3.12/dist-packages/torchaudio/__init__.py +# pyannote.audio.core.task imports torch_audiomentations for training-time +# data augmentation. We only use Model + Inference (embedding extraction), +# never the training pipeline. Stub the package to satisfy the import. +RUN mkdir -p /usr/local/lib/python3.12/dist-packages/torch_audiomentations && \ + printf '__version__ = "stub"\n' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/__init__.py + # Extra deps not in the NGC image. # pyannote.audio and torch-dependent deps installed --no-deps to prevent # upgrading torch/torchvision/torchaudio in the NGC stack. @@ -41,8 +48,7 @@ RUN pip install --no-cache-dir --no-deps "pyannote.audio>=3.1.0" && \ pip install --no-cache-dir --no-deps \ "pyannote.core" "pyannote.database" "pyannote.pipeline" \ "speechbrain" "asteroid-filterbanks" "einops" "semver" \ - "hf_transfer" "tensorboardX" "torch_audiomentations" \ - "julius" "torch-pitch-shift" + "hf_transfer" "tensorboardX" COPY backend/parakeet/ . From 00d4d127b12b475fc1503e13e9cd59aa9210af6e Mon Sep 17 00:00:00 2001 From: beastoin Date: Sun, 21 Jun 2026 12:18:24 +0000 Subject: [PATCH 4/5] Adopt verified Dockerfile from dev testing (v7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes verified working on dev GKE (L4 GPU): 1. torchaudio: install real package --no-deps, patch __init__.py to skip C extension loader. wespeaker needs kaldi.fbank for mel filterbank features — the pure-Python compliance module works. 2. torch_audiomentations: expand stub with Identity, BaseWaveformTransform, Mix, from_dict — all symbols pyannote.audio.core.task imports. 3. pyannote telemetry: stub 5 no-op functions (needs opentelemetry OTLP which is unnecessary for inference-only usage). Dev verification: pyannote import OK, kaldi.fbank OK, wespeaker model load OK, 256-dim embedding extraction OK on GPU. Co-Authored-By: mon Co-Authored-By: Claude Opus 4.6 --- backend/parakeet/Dockerfile | 44 +++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/backend/parakeet/Dockerfile b/backend/parakeet/Dockerfile index 443d5f301c8..a0835d00f5b 100644 --- a/backend/parakeet/Dockerfile +++ b/backend/parakeet/Dockerfile @@ -17,20 +17,36 @@ WORKDIR /app RUN pip install --no-cache-dir --no-deps --force-reinstall \ "nemo_toolkit[asr] @ git+https://github.com/beastoin/NeMo.git@68d99a17944181452205dc60da21e82bf3647054" -# NGC's custom torch ABI is incompatible with ALL standard torchaudio -# wheels (PyPI, cu128 index). NeMo imports torchaudio at module level -# (via squim metrics) but parakeet never calls those functions. -# Create a lightweight stub that satisfies `import torchaudio`. -RUN mkdir -p /usr/local/lib/python3.12/dist-packages/torchaudio && \ - printf '__version__ = "stub"\n' > \ - /usr/local/lib/python3.12/dist-packages/torchaudio/__init__.py +# NGC torch ABI is incompatible with standard torchaudio C extensions. +# Install torchaudio --no-deps for the pure-Python compliance.kaldi module +# (wespeaker needs kaldi.fbank for mel filterbank features), then patch +# __init__.py to skip the C extension loader and only expose compliance. +RUN pip install --no-cache-dir --no-deps torchaudio && \ + printf '__version__ = "2.11.0-ngc-compat"\nfrom . import compliance\n' > \ + /usr/local/lib/python3.12/dist-packages/torchaudio/__init__.py && \ + printf '_IS_TORCHAUDIO_EXT_AVAILABLE = False\ndef fail_if_no_align(*a, **kw): pass\ndef fail_if_no_sox(*a, **kw): pass\ndef fail_if_no_ffmpeg(*a, **kw): pass\ndef fail_if_no_soundfile(*a, **kw): pass\ndef fail_if_no_kaldi(*a, **kw): pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torchaudio/_extension/__init__.py # pyannote.audio.core.task imports torch_audiomentations for training-time # data augmentation. We only use Model + Inference (embedding extraction), -# never the training pipeline. Stub the package to satisfy the import. -RUN mkdir -p /usr/local/lib/python3.12/dist-packages/torch_audiomentations && \ - printf '__version__ = "stub"\n' > \ - /usr/local/lib/python3.12/dist-packages/torch_audiomentations/__init__.py +# never the training pipeline. Stub the package with all symbols pyannote needs. +RUN mkdir -p /usr/local/lib/python3.12/dist-packages/torch_audiomentations/core \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/augmentations \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/utils && \ + printf '__version__ = "stub"\nclass Identity:\n pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/__init__.py && \ + printf '' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/core/__init__.py && \ + printf 'class BaseWaveformTransform:\n pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/core/transforms_interface.py && \ + printf '' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/augmentations/__init__.py && \ + printf 'class Mix:\n pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/augmentations/mix.py && \ + printf '' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/utils/__init__.py && \ + printf 'def from_dict(*a, **kw):\n pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/utils/config.py # Extra deps not in the NGC image. # pyannote.audio and torch-dependent deps installed --no-deps to prevent @@ -44,11 +60,15 @@ RUN pip install --no-cache-dir \ "prometheus-client>=0.21.0" \ "soundfile>=0.13.0" +# pyannote.audio + deps. Post-install: stub telemetry (needs opentelemetry +# OTLP exporter which we don't need for inference-only usage). RUN pip install --no-cache-dir --no-deps "pyannote.audio>=3.1.0" && \ pip install --no-cache-dir --no-deps \ "pyannote.core" "pyannote.database" "pyannote.pipeline" \ "speechbrain" "asteroid-filterbanks" "einops" "semver" \ - "hf_transfer" "tensorboardX" + "hf_transfer" "tensorboardX" && \ + printf 'def set_opentelemetry_log_level(*a, **kw): pass\ndef set_telemetry_metrics(*a, **kw): pass\ndef track_model_init(*a, **kw): pass\ndef track_pipeline_init(*a, **kw): pass\ndef track_pipeline_apply(*a, **kw): pass\n' > \ + /usr/local/lib/python3.12/dist-packages/pyannote/audio/telemetry/__init__.py COPY backend/parakeet/ . From 12bb90b2442ede8a32df0d1527096e66cfe69456 Mon Sep 17 00:00:00 2001 From: beastoin Date: Sun, 21 Jun 2026 13:30:55 +0000 Subject: [PATCH 5/5] Pin pyannote.audio <4.0 and expose torchaudio.functional Address CODEx review findings: - Pin pyannote.audio to <4.0 to prevent untested major version upgrades - Expose torchaudio.functional module for non-16kHz audio resampling resilience Co-Authored-By: Claude Opus 4.6 --- backend/parakeet/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/parakeet/Dockerfile b/backend/parakeet/Dockerfile index a0835d00f5b..01029f63624 100644 --- a/backend/parakeet/Dockerfile +++ b/backend/parakeet/Dockerfile @@ -22,7 +22,7 @@ RUN pip install --no-cache-dir --no-deps --force-reinstall \ # (wespeaker needs kaldi.fbank for mel filterbank features), then patch # __init__.py to skip the C extension loader and only expose compliance. RUN pip install --no-cache-dir --no-deps torchaudio && \ - printf '__version__ = "2.11.0-ngc-compat"\nfrom . import compliance\n' > \ + printf '__version__ = "2.11.0-ngc-compat"\nfrom . import compliance\nfrom . import functional\n' > \ /usr/local/lib/python3.12/dist-packages/torchaudio/__init__.py && \ printf '_IS_TORCHAUDIO_EXT_AVAILABLE = False\ndef fail_if_no_align(*a, **kw): pass\ndef fail_if_no_sox(*a, **kw): pass\ndef fail_if_no_ffmpeg(*a, **kw): pass\ndef fail_if_no_soundfile(*a, **kw): pass\ndef fail_if_no_kaldi(*a, **kw): pass\n' > \ /usr/local/lib/python3.12/dist-packages/torchaudio/_extension/__init__.py @@ -62,7 +62,7 @@ RUN pip install --no-cache-dir \ # pyannote.audio + deps. Post-install: stub telemetry (needs opentelemetry # OTLP exporter which we don't need for inference-only usage). -RUN pip install --no-cache-dir --no-deps "pyannote.audio>=3.1.0" && \ +RUN pip install --no-cache-dir --no-deps "pyannote.audio>=3.1.0,<4.0" && \ pip install --no-cache-dir --no-deps \ "pyannote.core" "pyannote.database" "pyannote.pipeline" \ "speechbrain" "asteroid-filterbanks" "einops" "semver" \