diff --git a/.github/workflows/flyte-binary-v2.yml b/.github/workflows/flyte-binary-v2.yml index 23d2c70d9b..fbdea50bc2 100644 --- a/.github/workflows/flyte-binary-v2.yml +++ b/.github/workflows/flyte-binary-v2.yml @@ -158,7 +158,26 @@ jobs: registry: ghcr.io username: "${{ secrets.FLYTE_BOT_USERNAME }}" password: "${{ secrets.FLYTE_BOT_PAT }}" - - name: Build and push multi-arch image + - name: Build CPU multi-arch image to OCI archive + # Produce an OCI archive locally so the GPU build below can use it as a + # named build context. This avoids the PR-gated push chicken-and-egg: + # on pull_request events we don't push to ghcr, so the GPU build can't + # resolve a ghcr-hosted FROM. + uses: docker/build-push-action@v6 + with: + context: docker/demo-bundled + allow: "security.insecure" + platforms: linux/arm64, linux/amd64 + build-args: "FLYTE_DEMO_VERSION=${{ env.FLYTE_DEMO_VERSION }}" + outputs: type=oci,dest=/tmp/cpu-oci.tar + cache-from: type=gha,scope=demo-cpu + cache-to: type=gha,mode=max,scope=demo-cpu + - name: Extract CPU OCI layout for GPU build + run: | + mkdir -p /tmp/cpu-oci + tar -xf /tmp/cpu-oci.tar -C /tmp/cpu-oci + - name: Push CPU multi-arch image + if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} uses: docker/build-push-action@v6 with: context: docker/demo-bundled @@ -166,4 +185,33 @@ jobs: platforms: linux/arm64, linux/amd64 tags: ${{ steps.image-names.outputs.tags }} build-args: "FLYTE_DEMO_VERSION=${{ env.FLYTE_DEMO_VERSION }}" + push: true + cache-from: type=gha,scope=demo-cpu + - name: Prepare GPU Image Names + id: gpu-image-names + uses: docker/metadata-action@v3 + with: + images: | + ghcr.io/${{ github.repository_owner }}/flyte-demo + ghcr.io/${{ github.repository_owner }}/flyte-sandbox-v2 + tags: | + type=raw,value=gpu-latest,enable=${{ github.event_name == 'push' && github.ref == 'refs/heads/v2' }} + type=raw,value=gpu-nightly,enable=${{ github.event_name == 'push' && github.ref == 'refs/heads/v2' }} + type=sha,format=long,prefix=gpu- + - name: Build and push GPU multi-arch image + uses: docker/build-push-action@v6 + with: + context: docker/demo-bundled + file: docker/demo-bundled/Dockerfile.gpu + # Point Dockerfile.gpu's `FROM ${BASE_IMAGE}` at the OCI archive + # produced above — no registry round-trip needed. + build-contexts: base=oci-layout:///tmp/cpu-oci + allow: "security.insecure" + platforms: linux/arm64, linux/amd64 + tags: ${{ steps.gpu-image-names.outputs.tags }} + build-args: | + FLYTE_DEMO_VERSION=${{ env.FLYTE_DEMO_VERSION }} + BASE_IMAGE=base push: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} + cache-from: type=gha,scope=demo-gpu + cache-to: type=gha,mode=max,scope=demo-gpu diff --git a/docker/demo-bundled/Dockerfile.gpu b/docker/demo-bundled/Dockerfile.gpu new file mode 100644 index 0000000000..a6bba98fd7 --- /dev/null +++ b/docker/demo-bundled/Dockerfile.gpu @@ -0,0 +1,82 @@ +# syntax=docker/dockerfile:1.7-labs +# +# GPU-capable demo cluster image. Layers NVIDIA Container Toolkit + the +# k8s device-plugin on top of the CPU demo image so everything that ships +# in the base (flyte-binary, embedded postgres, auto-apply manifests) is +# inherited verbatim. CI builds the CPU image first and passes its tag in +# via BASE_IMAGE. + +ARG BASE_IMAGE=ghcr.io/flyteorg/flyte-demo:nightly + + +# Stage NVIDIA Container Toolkit binaries + supporting libs + ldconfig. +# k3s auto-registers a `nvidia` containerd runtime at startup if +# /usr/bin/nvidia-container-runtime is on PATH in the final image. +FROM debian:bookworm-slim AS nvidia-toolkit + +ARG TARGETARCH + +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl gnupg ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ + | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg && \ + curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \ + | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \ + > /etc/apt/sources.list.d/nvidia-container-toolkit.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + nvidia-container-toolkit-base \ + libnvidia-container1 \ + libnvidia-container-tools && \ + rm -rf /var/lib/apt/lists/* + +# Collect binaries, their shared-lib deps, and the dynamic linker so they run +# inside the minimal rancher/k3s base (no libc of its own). Also stage +# /sbin/ldconfig — the toolkit's update-ldcache OCI hook bind-mounts it +# into workload pods. +RUN set -ex; \ + mkdir -p /nvidia-staging/bin /nvidia-staging/lib /nvidia-staging/sbin; \ + for bin in nvidia-ctk nvidia-container-runtime nvidia-container-runtime.cdi \ + nvidia-container-runtime.legacy nvidia-container-cli; do \ + [ -f "/usr/bin/$bin" ] && cp -a "/usr/bin/$bin" /nvidia-staging/bin/ || true; \ + done; \ + for bin in /nvidia-staging/bin/*; do \ + ldd "$bin" 2>/dev/null | grep "=>" | awk '{print $3}' | while read lib; do \ + [ -f "$lib" ] && cp -n "$lib" /nvidia-staging/lib/ 2>/dev/null || true; \ + done; \ + done; \ + cp /lib64/ld-linux-x86-64.so.2 /nvidia-staging/lib/ 2>/dev/null || true; \ + cp /lib/ld-linux-aarch64.so.1 /nvidia-staging/lib/ 2>/dev/null || true; \ + cp /sbin/ldconfig /nvidia-staging/sbin/ldconfig + + +FROM ${BASE_IMAGE} + +# Install NVIDIA Container Toolkit binaries + supporting libs. The libs go +# into a default linker search path (/usr/lib//) because the +# nvidia-ctk OCI hook is invoked by containerd without inheriting +# LD_LIBRARY_PATH. +COPY --from=nvidia-toolkit /nvidia-staging/bin/ /usr/bin/ +COPY --from=nvidia-toolkit /nvidia-staging/lib/ /usr/lib/nvidia/ +COPY --from=nvidia-toolkit /nvidia-staging/sbin/ldconfig /sbin/ldconfig +RUN ARCH_TRIPLE=$([ "$(uname -m)" = "aarch64" ] && echo "aarch64-linux-gnu" || echo "x86_64-linux-gnu") && \ + mkdir -p "/usr/lib/${ARCH_TRIPLE}" && \ + cp -a /usr/lib/nvidia/*.so* "/usr/lib/${ARCH_TRIPLE}/" 2>/dev/null || true + +# NVIDIA device-plugin DaemonSet + RuntimeClass (auto-applied by k3s at startup). +COPY nvidia-device-plugin.yaml /var/lib/rancher/k3s/server/manifests/nvidia-device-plugin.yaml + +# k3s reads this template at startup to generate containerd's config. +# Sets nvidia as the default runtime so GPU pods don't need runtimeClassName. +COPY containerd-config.toml.tmpl /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl + +# Append nvidia libs to the base image's LD_LIBRARY_PATH (which already +# includes /usr/lib/pg-glibc for embedded postgres). +ENV LD_LIBRARY_PATH="/usr/lib/pg-glibc:/usr/lib/nvidia" + +# Propagate host GPUs into containers scheduled on this node. These env vars +# are consumed by nvidia-container-runtime. +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility diff --git a/docker/demo-bundled/Makefile b/docker/demo-bundled/Makefile index 40ae40005a..a1cd662907 100644 --- a/docker/demo-bundled/Makefile +++ b/docker/demo-bundled/Makefile @@ -60,6 +60,13 @@ build: sync-crds flyte dep_update manifests docker buildx build --builder flyte-demo --allow security.insecure --load \ --tag flyte-demo:latest . +.PHONY: build-gpu +build-gpu: build + docker buildx build --builder flyte-demo --allow security.insecure --load \ + --file Dockerfile.gpu \ + --build-arg BASE_IMAGE=flyte-demo:latest \ + --tag flyte-demo:gpu-latest . + # Port map # 6443 - k8s API server # 30000 - Docker Registry diff --git a/docker/demo-bundled/containerd-config.toml.tmpl b/docker/demo-bundled/containerd-config.toml.tmpl new file mode 100644 index 0000000000..7cda384aa0 --- /dev/null +++ b/docker/demo-bundled/containerd-config.toml.tmpl @@ -0,0 +1,10 @@ +{{ template "base" . }} + +# Override: make the NVIDIA runtime the default. k3s auto-registers a +# `nvidia` runtime at startup when /usr/bin/nvidia-container-runtime is +# present. By switching the default, pods requesting `nvidia.com/gpu` get +# GPU access without needing `runtimeClassName: nvidia` in their spec. +# nvidia-container-runtime is a passthrough when no GPU is requested, so +# non-GPU pods are unaffected. +[plugins.'io.containerd.cri.v1.runtime'.containerd] + default_runtime_name = "nvidia" diff --git a/docker/demo-bundled/nvidia-device-plugin.yaml b/docker/demo-bundled/nvidia-device-plugin.yaml new file mode 100644 index 0000000000..f0bbfcdb01 --- /dev/null +++ b/docker/demo-bundled/nvidia-device-plugin.yaml @@ -0,0 +1,45 @@ +apiVersion: node.k8s.io/v1 +kind: RuntimeClass +metadata: + name: nvidia +handler: nvidia +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nvidia-device-plugin-daemonset + namespace: kube-system +spec: + selector: + matchLabels: + name: nvidia-device-plugin-ds + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + name: nvidia-device-plugin-ds + spec: + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + priorityClassName: system-node-critical + runtimeClassName: nvidia + containers: + - name: nvidia-device-plugin-ctr + image: nvcr.io/nvidia/k8s-device-plugin:v0.17.0 + env: + - name: FAIL_ON_INIT_ERROR + value: "false" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins