diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cf7c119..e74964d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -75,7 +75,7 @@ jobs: if: github.event_name != 'pull_request' || github.head_ref != 'release-please--branches--main' strategy: matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - name: Checkout repository uses: actions/checkout@v4 @@ -107,7 +107,7 @@ jobs: if: github.event_name != 'pull_request' || github.head_ref != 'release-please--branches--main' strategy: matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - name: Checkout repository uses: actions/checkout@v4 @@ -140,7 +140,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - name: Clear space run: | @@ -152,6 +152,7 @@ jobs: /opt/microsoft /opt/google \ /imagegeneration \ "$AGENT_TOOLSDIRECTORY" + rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY" docker system prune -af df -h @@ -180,7 +181,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - name: Clear space run: | @@ -192,6 +193,7 @@ jobs: /opt/microsoft /opt/google \ /imagegeneration \ "$AGENT_TOOLSDIRECTORY" + rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY" docker system prune -af df -h @@ -270,6 +272,8 @@ jobs: is-default: false - python-version: "3.12" is-default: true + - python-version: "3.13" + is-default: false steps: - name: Clear Space run: | @@ -343,6 +347,8 @@ jobs: is-default: false - python-version: "3.12" is-default: true + - python-version: "3.13" + is-default: false steps: - name: Clear Space run: | @@ -416,6 +422,8 @@ jobs: is-default: false - python-version: "3.12" is-default: true + - python-version: "3.13" + is-default: false steps: - name: Clear Space run: | @@ -489,6 +497,8 @@ jobs: is-default: false - python-version: "3.12" is-default: true + - python-version: "3.13" + is-default: false steps: - name: Clear Space run: | diff --git a/Dockerfile b/Dockerfile index 01a49ca..2b661dd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,15 +3,31 @@ # For non-3.12 targets we reinstall torch from the CUDA 12.8 wheel index # (~7 GB overhead) and repoint /usr/local/bin/python so the worker CMD picks # up the correct interpreter. +# Base image provides Python 3.9-3.13 via deadsnakes; only 3.12 has torch +# pre-installed. For 3.10 and 3.11 we reinstall torch from the CUDA 12.8 +# wheel index (~7 GB overhead) and repoint /usr/local/bin/python so the +# worker CMD picks up the correct interpreter. FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204 # Target Python version for the worker runtime. +# Native per-version GPU base. One Python interpreter per image, installed +# directly into /usr/local/bin/python. No side-by-side, no symlink dance, +# no 7 GB cold-start tax. +# +# - nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04 provides the CUDA + cuDNN +# runtime libraries needed by torch's cu128 wheels. +# - On jammy (22.04), python3.10 ships from upstream Ubuntu (system Python); +# python3.11/3.12/3.13 come from the deadsnakes PPA. The same apt-get +# invocation below resolves both sources transparently. +# - pip is bootstrapped via get-pip.py (urllib stdlib): the Ubuntu system +# python3.10 has ensurepip disabled by Debian policy, and deadsnakes +# interpreters do not ship pip by default. get-pip.py works for any +# interpreter regardless of distro patching. ARG PYTHON_VERSION=3.12 ARG TORCH_VERSION=2.9.1+cu128 ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cu128 -# Expose the target version to the running worker for startup validation. -ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION} +FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04 # Validate the base image provides the requested interpreter and activate it. # For non-3.12 targets, install torch for the selected Python and repoint @@ -22,6 +38,11 @@ ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION} # pip bootstrap: Ubuntu 22.04's system python3.10 has ensurepip disabled by # Debian policy, so we install pip via get-pip.py (works for any interpreter # regardless of distro patching). urllib is stdlib, avoiding a curl dependency. +RUN python${PYTHON_VERSION} --version \ + && if [ "${PYTHON_VERSION}" != "3.12" ]; then \ + python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \ + && python${PYTHON_VERSION} /tmp/get-pip.py --no-cache-dir \ + && rm -f /tmp/get-pip.py \ RUN python${PYTHON_VERSION} --version \ && if [ "${PYTHON_VERSION}" != "3.12" ]; then \ python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \ @@ -33,49 +54,64 @@ RUN python${PYTHON_VERSION} --version \ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3; \ fi +# Re-declare ARGs after FROM so they're visible in this build stage. +ARG PYTHON_VERSION +ARG TORCH_VERSION +ARG TORCH_INDEX_URL -WORKDIR /app - -# Prevent interactive prompts during package installation +ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION} ENV DEBIAN_FRONTEND=noninteractive -# Set timezone to avoid tzdata prompts ENV TZ=Etc/UTC - -# Enable HuggingFace transfer acceleration ENV HF_HUB_ENABLE_HF_TRANSFER=1 -# Relocate HuggingFace cache outside /root/.cache to exclude from volume sync ENV HF_HOME=/hf-cache -# Configure APT cache to persist under /root/.cache for volume sync +# Install ONE Python natively. 3.10 from upstream Ubuntu (jammy ships it as +# system Python); 3.11/3.12/3.13 from deadsnakes. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + software-properties-common ca-certificates curl gnupg \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-venv \ + python${PYTHON_VERSION}-dev \ + git \ + && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \ + && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3 \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Bootstrap pip via get-pip.py. +RUN python -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \ + && python /tmp/get-pip.py --no-cache-dir \ + && rm -f /tmp/get-pip.py + +# Install torch natively for the active interpreter. +RUN python -m pip install --no-cache-dir \ + --index-url ${TORCH_INDEX_URL} \ + "torch==${TORCH_VERSION}" + +WORKDIR /app + +# Configure APT cache to persist under /root/.cache for volume sync. RUN mkdir -p /root/.cache/apt/archives/partial \ && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache -# Install system dependencies and uv -# Note: build-essential not pre-installed to reduce image size (400MB savings) -# Automatic detection will install it when needed (no manual action required) -# Advanced: Users can pre-install via system_dependencies=["build-essential"] -RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \ - curl ca-certificates git \ - && curl -LsSf https://astral.sh/uv/install.sh | sh \ +# Install uv for downstream dependency installation. +RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ && cp ~/.local/bin/uv /usr/local/bin/uv \ - && chmod +x /usr/local/bin/uv \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* + && chmod +x /usr/local/bin/uv -# Copy app code and install dependencies -# Use --python to target the active interpreter (preserves torch in its site-packages) +# Copy app code and install worker dependencies into the active interpreter. COPY README.md pyproject.toml uv.lock ./ COPY src/ ./ RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \ && uv pip install --python $(which python) --break-system-packages -r requirements.txt -# Install numpy for the active Python version. -# The runpod/pytorch image ships torch but not numpy. Flash build excludes numpy -# from tarballs (BASE_IMAGE_PACKAGES) to save tarball space (~30 MB), so numpy -# must be provided here in the base image. +# Install numpy for the active Python (excluded from flash tarballs). RUN python -m pip install --no-cache-dir numpy -# Verify torch, numpy, and the expected Python version are available. +# Verify torch, numpy, and the expected interpreter are wired correctly. RUN python -c "import sys; actual = f'{sys.version_info.major}.{sys.version_info.minor}'; expected = '${PYTHON_VERSION}'; assert actual == expected, f'Expected Python {expected}, got {actual}'; print(f'Python {actual} OK')" \ && python -c "import torch; print(f'torch {torch.__version__} CUDA {torch.cuda.is_available()}')" \ && python -c "import numpy; print(f'numpy {numpy.__version__}')" diff --git a/Dockerfile-lb b/Dockerfile-lb index ff927c6..91c849e 100644 --- a/Dockerfile-lb +++ b/Dockerfile-lb @@ -3,15 +3,21 @@ # For non-3.12 targets we reinstall torch from the CUDA 12.8 wheel index # (~7 GB overhead) and repoint /usr/local/bin/python so the worker CMD picks # up the correct interpreter. +# Base image provides Python 3.9-3.13 via deadsnakes; only 3.12 has torch +# pre-installed. For 3.10 and 3.11 we reinstall torch from the CUDA 12.8 +# wheel index (~7 GB overhead) and repoint /usr/local/bin/python so the +# worker CMD picks up the correct interpreter. FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204 # Target Python version for the worker runtime. +# Native per-version GPU LB base. Same shape as Dockerfile, with the +# uvicorn entrypoint for load-balanced endpoints. See Dockerfile for the +# full rationale on the nvidia/cuda + deadsnakes approach. ARG PYTHON_VERSION=3.12 ARG TORCH_VERSION=2.9.1+cu128 ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cu128 -# Expose the target version to the running worker for startup validation. -ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION} +FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04 # Validate the base image provides the requested interpreter and activate it. # For non-3.12 targets, install torch for the selected Python and repoint @@ -22,6 +28,11 @@ ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION} # pip bootstrap: Ubuntu 22.04's system python3.10 has ensurepip disabled by # Debian policy, so we install pip via get-pip.py (works for any interpreter # regardless of distro patching). urllib is stdlib, avoiding a curl dependency. +RUN python${PYTHON_VERSION} --version \ + && if [ "${PYTHON_VERSION}" != "3.12" ]; then \ + python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \ + && python${PYTHON_VERSION} /tmp/get-pip.py --no-cache-dir \ + && rm -f /tmp/get-pip.py \ RUN python${PYTHON_VERSION} --version \ && if [ "${PYTHON_VERSION}" != "3.12" ]; then \ python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \ @@ -33,56 +44,71 @@ RUN python${PYTHON_VERSION} --version \ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \ && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3; \ fi +# Re-declare ARGs after FROM so they're visible in this build stage. +ARG PYTHON_VERSION +ARG TORCH_VERSION +ARG TORCH_INDEX_URL -WORKDIR /app - -# Prevent interactive prompts during package installation +ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION} ENV DEBIAN_FRONTEND=noninteractive -# Set timezone to avoid tzdata prompts ENV TZ=Etc/UTC - -# Enable HuggingFace transfer acceleration ENV HF_HUB_ENABLE_HF_TRANSFER=1 -# Relocate HuggingFace cache outside /root/.cache to exclude from volume sync ENV HF_HOME=/hf-cache -# Configure APT cache to persist under /root/.cache for volume sync +# Install ONE Python natively. 3.10 from upstream Ubuntu (jammy ships it as +# system Python); 3.11/3.12/3.13 from deadsnakes. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + software-properties-common ca-certificates curl gnupg \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + python${PYTHON_VERSION} \ + python${PYTHON_VERSION}-venv \ + python${PYTHON_VERSION}-dev \ + git \ + && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \ + && ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3 \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# Bootstrap pip via get-pip.py. +RUN python -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \ + && python /tmp/get-pip.py --no-cache-dir \ + && rm -f /tmp/get-pip.py + +# Install torch natively for the active interpreter. +RUN python -m pip install --no-cache-dir \ + --index-url ${TORCH_INDEX_URL} \ + "torch==${TORCH_VERSION}" + +WORKDIR /app + +# Configure APT cache to persist under /root/.cache for volume sync. RUN mkdir -p /root/.cache/apt/archives/partial \ && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache -# Install system dependencies and uv -# Note: build-essential not pre-installed to reduce image size (400MB savings) -# Automatic detection will install it when needed (no manual action required) -# Advanced: Users can pre-install via system_dependencies=["build-essential"] -RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \ - curl ca-certificates git \ - && curl -LsSf https://astral.sh/uv/install.sh | sh \ +# Install uv for downstream dependency installation. +RUN curl -LsSf https://astral.sh/uv/install.sh | sh \ && cp ~/.local/bin/uv /usr/local/bin/uv \ - && chmod +x /usr/local/bin/uv \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* + && chmod +x /usr/local/bin/uv -# Copy app code and install dependencies -# Use --python to target the active interpreter (preserves torch in its site-packages) +# Copy app code and install worker dependencies into the active interpreter. COPY README.md pyproject.toml uv.lock ./ COPY src/ ./ RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \ && uv pip install --python $(which python) --break-system-packages -r requirements.txt -# Install numpy for the active Python version. -# The runpod/pytorch image ships torch but not numpy. Flash build excludes numpy -# from tarballs (BASE_IMAGE_PACKAGES) to save tarball space (~30 MB), so numpy -# must be provided here in the base image. +# Install numpy for the active Python (excluded from flash tarballs). RUN python -m pip install --no-cache-dir numpy -# Verify torch, numpy, and the expected Python version are available. +# Verify torch, numpy, and the expected interpreter are wired correctly. RUN python -c "import sys; actual = f'{sys.version_info.major}.{sys.version_info.minor}'; expected = '${PYTHON_VERSION}'; assert actual == expected, f'Expected Python {expected}, got {actual}'; print(f'Python {actual} OK')" \ && python -c "import torch; print(f'torch {torch.__version__} CUDA {torch.cuda.is_available()}')" \ && python -c "import numpy; print(f'numpy {numpy.__version__}')" EXPOSE 80 -# CMD will be overridden by RunPod at runtime to run the specific generated handler -# The handler factory generates handler_{resource_name}.py files +# CMD will be overridden by RunPod at runtime to run the specific generated handler. +# The handler factory generates handler_{resource_name}.py files. # RunPod will invoke: uvicorn handler_{resource_name}:app --host 0.0.0.0 --port 80 CMD ["uvicorn", "lb_handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"] diff --git a/src/version.py b/src/version.py index f09a91e..4fa9071 100644 --- a/src/version.py +++ b/src/version.py @@ -8,6 +8,35 @@ __version__ = "1.5.0" # x-release-please-version +class PythonVersionMismatchError(RuntimeError): + """Raised when the running interpreter does not match the image's declared version.""" + + +def assert_python_version_matches_image() -> None: + """Fail fast if ``sys.version_info`` disagrees with ``FLASH_PYTHON_VERSION``. + + The Dockerfiles stamp ``FLASH_PYTHON_VERSION`` with the image's target + Python (e.g. ``3.11``). If an image is mis-tagged, an apt upgrade + changes ``python`` symlinks, or the GPU side-by-side torch install fails + silently, this surfaces the skew immediately at worker boot instead of + letting user code fail later with a confusing ABI error. + + Skips the check when ``FLASH_PYTHON_VERSION`` is unset (local dev, + test harnesses). + """ + declared = os.environ.get("FLASH_PYTHON_VERSION") + if not declared: + return + + actual = f"{sys.version_info.major}.{sys.version_info.minor}" + if actual != declared: + raise PythonVersionMismatchError( + f"Worker interpreter mismatch: image declares FLASH_PYTHON_VERSION=" + f"{declared!r} but sys.version_info reports {actual!r}. " + f"Rebuild the image with the correct PYTHON_VERSION build arg." + ) + + class PythonVersionMismatchError(RuntimeError): """Raised when the running interpreter does not match the image's declared version."""