diff --git a/backend/parakeet/Dockerfile b/backend/parakeet/Dockerfile index f11256b76eb..01029f63624 100644 --- a/backend/parakeet/Dockerfile +++ b/backend/parakeet/Dockerfile @@ -17,13 +17,36 @@ WORKDIR /app RUN pip install --no-cache-dir --no-deps --force-reinstall \ "nemo_toolkit[asr] @ git+https://github.com/beastoin/NeMo.git@68d99a17944181452205dc60da21e82bf3647054" -# NGC's custom torch ABI is incompatible with ALL standard torchaudio -# wheels (PyPI, cu128 index). NeMo imports torchaudio at module level -# (via squim metrics) but parakeet never calls those functions. -# Create a lightweight stub that satisfies `import torchaudio`. -RUN mkdir -p /usr/local/lib/python3.12/dist-packages/torchaudio && \ - printf '__version__ = "stub"\n' > \ - /usr/local/lib/python3.12/dist-packages/torchaudio/__init__.py +# NGC torch ABI is incompatible with standard torchaudio C extensions. +# Install torchaudio --no-deps for the pure-Python compliance.kaldi module +# (wespeaker needs kaldi.fbank for mel filterbank features), then patch +# __init__.py to skip the C extension loader and only expose compliance. +RUN pip install --no-cache-dir --no-deps torchaudio && \ + printf '__version__ = "2.11.0-ngc-compat"\nfrom . import compliance\nfrom . import functional\n' > \ + /usr/local/lib/python3.12/dist-packages/torchaudio/__init__.py && \ + printf '_IS_TORCHAUDIO_EXT_AVAILABLE = False\ndef fail_if_no_align(*a, **kw): pass\ndef fail_if_no_sox(*a, **kw): pass\ndef fail_if_no_ffmpeg(*a, **kw): pass\ndef fail_if_no_soundfile(*a, **kw): pass\ndef fail_if_no_kaldi(*a, **kw): pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torchaudio/_extension/__init__.py + +# pyannote.audio.core.task imports torch_audiomentations for training-time +# data augmentation. We only use Model + Inference (embedding extraction), +# never the training pipeline. Stub the package with all symbols pyannote needs. +RUN mkdir -p /usr/local/lib/python3.12/dist-packages/torch_audiomentations/core \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/augmentations \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/utils && \ + printf '__version__ = "stub"\nclass Identity:\n pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/__init__.py && \ + printf '' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/core/__init__.py && \ + printf 'class BaseWaveformTransform:\n pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/core/transforms_interface.py && \ + printf '' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/augmentations/__init__.py && \ + printf 'class Mix:\n pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/augmentations/mix.py && \ + printf '' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/utils/__init__.py && \ + printf 'def from_dict(*a, **kw):\n pass\n' > \ + /usr/local/lib/python3.12/dist-packages/torch_audiomentations/utils/config.py # Extra deps not in the NGC image. # pyannote.audio and torch-dependent deps installed --no-deps to prevent @@ -37,11 +60,15 @@ RUN pip install --no-cache-dir \ "prometheus-client>=0.21.0" \ "soundfile>=0.13.0" -RUN pip install --no-cache-dir --no-deps "pyannote.audio>=3.1.0" && \ +# pyannote.audio + deps. Post-install: stub telemetry (needs opentelemetry +# OTLP exporter which we don't need for inference-only usage). +RUN pip install --no-cache-dir --no-deps "pyannote.audio>=3.1.0,<4.0" && \ pip install --no-cache-dir --no-deps \ "pyannote.core" "pyannote.database" "pyannote.pipeline" \ "speechbrain" "asteroid-filterbanks" "einops" "semver" \ - "hf_transfer" "tensorboardX" + "hf_transfer" "tensorboardX" && \ + printf 'def set_opentelemetry_log_level(*a, **kw): pass\ndef set_telemetry_metrics(*a, **kw): pass\ndef track_model_init(*a, **kw): pass\ndef track_pipeline_init(*a, **kw): pass\ndef track_pipeline_apply(*a, **kw): pass\n' > \ + /usr/local/lib/python3.12/dist-packages/pyannote/audio/telemetry/__init__.py COPY backend/parakeet/ .