Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ jobs:
if: github.event_name != 'pull_request' || github.head_ref != 'release-please--branches--main'
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand Down Expand Up @@ -107,7 +107,7 @@ jobs:
if: github.event_name != 'pull_request' || github.head_ref != 'release-please--branches--main'
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand Down Expand Up @@ -140,7 +140,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- name: Clear space
run: |
Expand All @@ -152,6 +152,7 @@ jobs:
/opt/microsoft /opt/google \
/imagegeneration \
"$AGENT_TOOLSDIRECTORY"
rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY"
docker system prune -af
df -h

Expand Down Expand Up @@ -180,7 +181,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- name: Clear space
run: |
Expand All @@ -192,6 +193,7 @@ jobs:
/opt/microsoft /opt/google \
/imagegeneration \
"$AGENT_TOOLSDIRECTORY"
rm -rf /usr/share/dotnet /opt/ghc /usr/local/share/boost "$AGENT_TOOLSDIRECTORY"
docker system prune -af
df -h

Expand Down Expand Up @@ -270,6 +272,8 @@ jobs:
is-default: false
- python-version: "3.12"
is-default: true
- python-version: "3.13"
is-default: false
steps:
- name: Clear Space
run: |
Expand Down Expand Up @@ -343,6 +347,8 @@ jobs:
is-default: false
- python-version: "3.12"
is-default: true
- python-version: "3.13"
is-default: false
steps:
- name: Clear Space
run: |
Expand Down Expand Up @@ -416,6 +422,8 @@ jobs:
is-default: false
- python-version: "3.12"
is-default: true
- python-version: "3.13"
is-default: false
steps:
- name: Clear Space
run: |
Expand Down Expand Up @@ -489,6 +497,8 @@ jobs:
is-default: false
- python-version: "3.12"
is-default: true
- python-version: "3.13"
is-default: false
steps:
- name: Clear Space
run: |
Expand Down
90 changes: 63 additions & 27 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,31 @@
# For non-3.12 targets we reinstall torch from the CUDA 12.8 wheel index
# (~7 GB overhead) and repoint /usr/local/bin/python so the worker CMD picks
# up the correct interpreter.
# Base image provides Python 3.9-3.13 via deadsnakes; only 3.12 has torch
# pre-installed. For 3.10 and 3.11 we reinstall torch from the CUDA 12.8
# wheel index (~7 GB overhead) and repoint /usr/local/bin/python so the
# worker CMD picks up the correct interpreter.
FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204

# Target Python version for the worker runtime.
# Native per-version GPU base. One Python interpreter per image, installed
# directly into /usr/local/bin/python. No side-by-side, no symlink dance,
# no 7 GB cold-start tax.
#
# - nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04 provides the CUDA + cuDNN
# runtime libraries needed by torch's cu128 wheels.
# - On jammy (22.04), python3.10 ships from upstream Ubuntu (system Python);
# python3.11/3.12/3.13 come from the deadsnakes PPA. The same apt-get
# invocation below resolves both sources transparently.
# - pip is bootstrapped via get-pip.py (urllib stdlib): the Ubuntu system
# python3.10 has ensurepip disabled by Debian policy, and deadsnakes
# interpreters do not ship pip by default. get-pip.py works for any
# interpreter regardless of distro patching.
ARG PYTHON_VERSION=3.12
ARG TORCH_VERSION=2.9.1+cu128
ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cu128

# Expose the target version to the running worker for startup validation.
ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04

# Validate the base image provides the requested interpreter and activate it.
# For non-3.12 targets, install torch for the selected Python and repoint
Expand All @@ -22,6 +38,11 @@ ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
# pip bootstrap: Ubuntu 22.04's system python3.10 has ensurepip disabled by
# Debian policy, so we install pip via get-pip.py (works for any interpreter
# regardless of distro patching). urllib is stdlib, avoiding a curl dependency.
RUN python${PYTHON_VERSION} --version \
&& if [ "${PYTHON_VERSION}" != "3.12" ]; then \
python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
&& python${PYTHON_VERSION} /tmp/get-pip.py --no-cache-dir \
&& rm -f /tmp/get-pip.py \
RUN python${PYTHON_VERSION} --version \
&& if [ "${PYTHON_VERSION}" != "3.12" ]; then \
python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
Expand All @@ -33,49 +54,64 @@ RUN python${PYTHON_VERSION} --version \
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3; \
fi
# Re-declare ARGs after FROM so they're visible in this build stage.
ARG PYTHON_VERSION
ARG TORCH_VERSION
ARG TORCH_INDEX_URL

WORKDIR /app

# Prevent interactive prompts during package installation
ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
ENV DEBIAN_FRONTEND=noninteractive
# Set timezone to avoid tzdata prompts
ENV TZ=Etc/UTC

# Enable HuggingFace transfer acceleration
ENV HF_HUB_ENABLE_HF_TRANSFER=1
# Relocate HuggingFace cache outside /root/.cache to exclude from volume sync
ENV HF_HOME=/hf-cache

# Configure APT cache to persist under /root/.cache for volume sync
# Install ONE Python natively. 3.10 from upstream Ubuntu (jammy ships it as
# system Python); 3.11/3.12/3.13 from deadsnakes.
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
software-properties-common ca-certificates curl gnupg \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-venv \
python${PYTHON_VERSION}-dev \
git \
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3 \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

# Bootstrap pip via get-pip.py.
RUN python -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
&& python /tmp/get-pip.py --no-cache-dir \
&& rm -f /tmp/get-pip.py

# Install torch natively for the active interpreter.
RUN python -m pip install --no-cache-dir \
--index-url ${TORCH_INDEX_URL} \
"torch==${TORCH_VERSION}"

WORKDIR /app

# Configure APT cache to persist under /root/.cache for volume sync.
RUN mkdir -p /root/.cache/apt/archives/partial \
&& echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache

# Install system dependencies and uv
# Note: build-essential not pre-installed to reduce image size (400MB savings)
# Automatic detection will install it when needed (no manual action required)
# Advanced: Users can pre-install via system_dependencies=["build-essential"]
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \
curl ca-certificates git \
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
# Install uv for downstream dependency installation.
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
&& cp ~/.local/bin/uv /usr/local/bin/uv \
&& chmod +x /usr/local/bin/uv \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
&& chmod +x /usr/local/bin/uv

# Copy app code and install dependencies
# Use --python to target the active interpreter (preserves torch in its site-packages)
# Copy app code and install worker dependencies into the active interpreter.
COPY README.md pyproject.toml uv.lock ./
COPY src/ ./
RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \
&& uv pip install --python $(which python) --break-system-packages -r requirements.txt

# Install numpy for the active Python version.
# The runpod/pytorch image ships torch but not numpy. Flash build excludes numpy
# from tarballs (BASE_IMAGE_PACKAGES) to save tarball space (~30 MB), so numpy
# must be provided here in the base image.
# Install numpy for the active Python (excluded from flash tarballs).
RUN python -m pip install --no-cache-dir numpy

# Verify torch, numpy, and the expected Python version are available.
# Verify torch, numpy, and the expected interpreter are wired correctly.
RUN python -c "import sys; actual = f'{sys.version_info.major}.{sys.version_info.minor}'; expected = '${PYTHON_VERSION}'; assert actual == expected, f'Expected Python {expected}, got {actual}'; print(f'Python {actual} OK')" \
&& python -c "import torch; print(f'torch {torch.__version__} CUDA {torch.cuda.is_available()}')" \
&& python -c "import numpy; print(f'numpy {numpy.__version__}')"
Expand Down
84 changes: 55 additions & 29 deletions Dockerfile-lb
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@
# For non-3.12 targets we reinstall torch from the CUDA 12.8 wheel index
# (~7 GB overhead) and repoint /usr/local/bin/python so the worker CMD picks
# up the correct interpreter.
# Base image provides Python 3.9-3.13 via deadsnakes; only 3.12 has torch
# pre-installed. For 3.10 and 3.11 we reinstall torch from the CUDA 12.8
# wheel index (~7 GB overhead) and repoint /usr/local/bin/python so the
# worker CMD picks up the correct interpreter.
FROM runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2204

# Target Python version for the worker runtime.
# Native per-version GPU LB base. Same shape as Dockerfile, with the
# uvicorn entrypoint for load-balanced endpoints. See Dockerfile for the
# full rationale on the nvidia/cuda + deadsnakes approach.
ARG PYTHON_VERSION=3.12
ARG TORCH_VERSION=2.9.1+cu128
ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cu128

# Expose the target version to the running worker for startup validation.
ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
FROM nvidia/cuda:12.8.1-cudnn-runtime-ubuntu22.04

# Validate the base image provides the requested interpreter and activate it.
# For non-3.12 targets, install torch for the selected Python and repoint
Expand All @@ -22,6 +28,11 @@ ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
# pip bootstrap: Ubuntu 22.04's system python3.10 has ensurepip disabled by
# Debian policy, so we install pip via get-pip.py (works for any interpreter
# regardless of distro patching). urllib is stdlib, avoiding a curl dependency.
RUN python${PYTHON_VERSION} --version \
&& if [ "${PYTHON_VERSION}" != "3.12" ]; then \
python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
&& python${PYTHON_VERSION} /tmp/get-pip.py --no-cache-dir \
&& rm -f /tmp/get-pip.py \
RUN python${PYTHON_VERSION} --version \
&& if [ "${PYTHON_VERSION}" != "3.12" ]; then \
python${PYTHON_VERSION} -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
Expand All @@ -33,56 +44,71 @@ RUN python${PYTHON_VERSION} --version \
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3; \
fi
# Re-declare ARGs after FROM so they're visible in this build stage.
ARG PYTHON_VERSION
ARG TORCH_VERSION
ARG TORCH_INDEX_URL

WORKDIR /app

# Prevent interactive prompts during package installation
ENV FLASH_PYTHON_VERSION=${PYTHON_VERSION}
ENV DEBIAN_FRONTEND=noninteractive
# Set timezone to avoid tzdata prompts
ENV TZ=Etc/UTC

# Enable HuggingFace transfer acceleration
ENV HF_HUB_ENABLE_HF_TRANSFER=1
# Relocate HuggingFace cache outside /root/.cache to exclude from volume sync
ENV HF_HOME=/hf-cache

# Configure APT cache to persist under /root/.cache for volume sync
# Install ONE Python natively. 3.10 from upstream Ubuntu (jammy ships it as
# system Python); 3.11/3.12/3.13 from deadsnakes.
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
software-properties-common ca-certificates curl gnupg \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-venv \
python${PYTHON_VERSION}-dev \
git \
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python \
&& ln -sf "$(which python${PYTHON_VERSION})" /usr/local/bin/python3 \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

# Bootstrap pip via get-pip.py.
RUN python -c "import urllib.request; urllib.request.urlretrieve('https://bootstrap.pypa.io/get-pip.py', '/tmp/get-pip.py')" \
&& python /tmp/get-pip.py --no-cache-dir \
&& rm -f /tmp/get-pip.py

# Install torch natively for the active interpreter.
RUN python -m pip install --no-cache-dir \
--index-url ${TORCH_INDEX_URL} \
"torch==${TORCH_VERSION}"

WORKDIR /app

# Configure APT cache to persist under /root/.cache for volume sync.
RUN mkdir -p /root/.cache/apt/archives/partial \
&& echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache

# Install system dependencies and uv
# Note: build-essential not pre-installed to reduce image size (400MB savings)
# Automatic detection will install it when needed (no manual action required)
# Advanced: Users can pre-install via system_dependencies=["build-essential"]
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \
curl ca-certificates git \
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
# Install uv for downstream dependency installation.
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
&& cp ~/.local/bin/uv /usr/local/bin/uv \
&& chmod +x /usr/local/bin/uv \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
&& chmod +x /usr/local/bin/uv

# Copy app code and install dependencies
# Use --python to target the active interpreter (preserves torch in its site-packages)
# Copy app code and install worker dependencies into the active interpreter.
COPY README.md pyproject.toml uv.lock ./
COPY src/ ./
RUN uv export --format requirements-txt --no-dev --no-hashes > requirements.txt \
&& uv pip install --python $(which python) --break-system-packages -r requirements.txt

# Install numpy for the active Python version.
# The runpod/pytorch image ships torch but not numpy. Flash build excludes numpy
# from tarballs (BASE_IMAGE_PACKAGES) to save tarball space (~30 MB), so numpy
# must be provided here in the base image.
# Install numpy for the active Python (excluded from flash tarballs).
RUN python -m pip install --no-cache-dir numpy

# Verify torch, numpy, and the expected Python version are available.
# Verify torch, numpy, and the expected interpreter are wired correctly.
RUN python -c "import sys; actual = f'{sys.version_info.major}.{sys.version_info.minor}'; expected = '${PYTHON_VERSION}'; assert actual == expected, f'Expected Python {expected}, got {actual}'; print(f'Python {actual} OK')" \
&& python -c "import torch; print(f'torch {torch.__version__} CUDA {torch.cuda.is_available()}')" \
&& python -c "import numpy; print(f'numpy {numpy.__version__}')"

EXPOSE 80

# CMD will be overridden by RunPod at runtime to run the specific generated handler
# The handler factory generates handler_{resource_name}.py files
# CMD will be overridden by RunPod at runtime to run the specific generated handler.
# The handler factory generates handler_{resource_name}.py files.
# RunPod will invoke: uvicorn handler_{resource_name}:app --host 0.0.0.0 --port 80
CMD ["uvicorn", "lb_handler:app", "--host", "0.0.0.0", "--port", "80", "--timeout-keep-alive", "600"]
29 changes: 29 additions & 0 deletions src/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,35 @@
__version__ = "1.5.0" # x-release-please-version


class PythonVersionMismatchError(RuntimeError):
"""Raised when the running interpreter does not match the image's declared version."""


def assert_python_version_matches_image() -> None:
"""Fail fast if ``sys.version_info`` disagrees with ``FLASH_PYTHON_VERSION``.

The Dockerfiles stamp ``FLASH_PYTHON_VERSION`` with the image's target
Python (e.g. ``3.11``). If an image is mis-tagged, an apt upgrade
changes ``python`` symlinks, or the GPU side-by-side torch install fails
silently, this surfaces the skew immediately at worker boot instead of
letting user code fail later with a confusing ABI error.

Skips the check when ``FLASH_PYTHON_VERSION`` is unset (local dev,
test harnesses).
"""
declared = os.environ.get("FLASH_PYTHON_VERSION")
if not declared:
return

actual = f"{sys.version_info.major}.{sys.version_info.minor}"
if actual != declared:
raise PythonVersionMismatchError(
f"Worker interpreter mismatch: image declares FLASH_PYTHON_VERSION="
f"{declared!r} but sys.version_info reports {actual!r}. "
f"Rebuild the image with the correct PYTHON_VERSION build arg."
)


class PythonVersionMismatchError(RuntimeError):
"""Raised when the running interpreter does not match the image's declared version."""

Expand Down
Loading