From 3448298dd92d562416d475400f11dbd068212c60 Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 11:38:05 +0100 Subject: [PATCH 01/10] Add arm images builds --- .github/workflows/ci.yml | 10 +++++++--- docker/build_all.sh | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cadac5c9f..fbc84a4ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -370,10 +370,14 @@ jobs: build-and-push-docker-images: - runs-on: ubuntu-22.04 + runs-on: ${{ matrix.os }} strategy: matrix: + os: [ubuntu-22.04, ubuntu-22.04-arm] gpu: [cuda, rocm] + include: + - os: ubuntu-22.04-arm + suffix: -arm steps: - uses: actions/checkout@v4 @@ -405,7 +409,7 @@ jobs: - name: Build Docker images run: | - ./docker/build_all.sh latest 0 ${{ matrix.gpu }} + ./docker/build_all.sh latest 0 ${{ matrix.gpu }} ${{ matrix.suffix }} - name: Login to DockerHub if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') @@ -418,7 +422,7 @@ jobs: - name: Push Docker images if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') run: | - ./docker/build_all.sh ${GITHUB_REF##*/v} 1 ${{ matrix.gpu }} + ./docker/build_all.sh ${GITHUB_REF##*/v} 1 ${{ matrix.gpu }} ${{ matrix.suffix }} build-and-deploy-docs: diff --git a/docker/build_all.sh b/docker/build_all.sh index f05ea99e8..d2ba41968 100755 --- a/docker/build_all.sh +++ b/docker/build_all.sh @@ -21,6 +21,7 @@ cd $ROOT_DIR VERSION=${1:-latest} PUSH=${2:-0} GPU=${3:-cuda} +SUFFIX=${4:} IMAGE=ghcr.io/opennmt/ctranslate2 build() @@ -29,8 +30,8 @@ build() IMAGE_NAME=$2 BUILD_ARGS=${3:-} - LATEST=$IMAGE:latest-$IMAGE_NAME - TAGGED=$IMAGE:$VERSION-$IMAGE_NAME + LATEST=$IMAGE:latest-$IMAGE_NAME$SUFFIX + TAGGED=$IMAGE:$VERSION-$IMAGE_NAME$SUFFIX docker build --pull $BUILD_ARGS -t $LATEST -f docker/$DOCKERFILE . if [ $PUSH -eq 1 ]; then docker push $LATEST From 5daef832258ff2eb4991c8ca98133700bf189886 Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 11:44:08 +0100 Subject: [PATCH 02/10] TMP name change --- docker/build_all.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/build_all.sh b/docker/build_all.sh index d2ba41968..d566ab8cf 100755 --- a/docker/build_all.sh +++ b/docker/build_all.sh @@ -22,7 +22,7 @@ VERSION=${1:-latest} PUSH=${2:-0} GPU=${3:-cuda} SUFFIX=${4:} -IMAGE=ghcr.io/opennmt/ctranslate2 +IMAGE=ghcr.io/nicolasbfr/ctranslate2 build() { From e0c3a2a7773caa6040a7738d06b4e0ac08ee2d1b Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 11:48:26 +0100 Subject: [PATCH 03/10] Fix --- docker/build_all.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/build_all.sh b/docker/build_all.sh index d566ab8cf..3bb88e7ae 100755 --- a/docker/build_all.sh +++ b/docker/build_all.sh @@ -21,7 +21,7 @@ cd $ROOT_DIR VERSION=${1:-latest} PUSH=${2:-0} GPU=${3:-cuda} -SUFFIX=${4:} +SUFFIX=${4} IMAGE=ghcr.io/nicolasbfr/ctranslate2 build() From b243db7fd4b88129802e420e39dd15158ffab242 Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 11:58:56 +0100 Subject: [PATCH 04/10] Fix rocm arm --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fbc84a4ca..1973dd4c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -378,6 +378,9 @@ jobs: include: - os: ubuntu-22.04-arm suffix: -arm + exclude: + - os: ubuntu-22.04-arm + gpu: rocm steps: - uses: actions/checkout@v4 From f4700aaced2dc3d3088f079794442071a4806ff1 Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 12:05:49 +0100 Subject: [PATCH 05/10] Fix rocm arm 2 --- .github/workflows/ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1973dd4c5..90448ed06 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -373,14 +373,14 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-22.04, ubuntu-22.04-arm] - gpu: [cuda, rocm] include: + - os: ubuntu-22.04 + gpu: cuda + - os: ubuntu-22.04 + gpu: rocm - os: ubuntu-22.04-arm + gpu: cuda suffix: -arm - exclude: - - os: ubuntu-22.04-arm - gpu: rocm steps: - uses: actions/checkout@v4 From 280bd6dbb40968bb52ae1a5420ab786bfa8d988a Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 12:32:10 +0100 Subject: [PATCH 06/10] New Dockerfile --- .github/workflows/ci.yml | 2 +- docker/Dockerfile_cuda_arm | 93 ++++++++++++++++++++++++++++++++++++++ docker/build_all.sh | 2 + 3 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 docker/Dockerfile_cuda_arm diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 90448ed06..02de050a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -379,7 +379,7 @@ jobs: - os: ubuntu-22.04 gpu: rocm - os: ubuntu-22.04-arm - gpu: cuda + gpu: cudaarm suffix: -arm steps: diff --git a/docker/Dockerfile_cuda_arm b/docker/Dockerfile_cuda_arm new file mode 100644 index 000000000..0e74f2180 --- /dev/null +++ b/docker/Dockerfile_cuda_arm @@ -0,0 +1,93 @@ +FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 AS builder + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3-dev \ + python3-pip \ + wget \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /root + +RUN python3 -m pip --no-cache-dir install cmake==3.22.* + +ENV ONEDNN_VERSION=3.1.1 +RUN wget -q https://github.com/uxlffoundation/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz && \ + tar xf *.tar.gz && \ + rm *.tar.gz && \ + cd oneDNN-* && \ + cmake -DCMAKE_BUILD_TYPE=Release -DONEDNN_LIBRARY_TYPE=STATIC -DONEDNN_BUILD_EXAMPLES=OFF -DONEDNN_BUILD_TESTS=OFF -DONEDNN_ENABLE_WORKLOAD=INFERENCE -DONEDNN_ENABLE_PRIMITIVE="CONVOLUTION;REORDER" -DONEDNN_BUILD_GRAPH=OFF . && \ + make -j$(nproc) install && \ + cd .. && \ + rm -r oneDNN-* + +ENV OPENMPI_VERSION=4.1.6 +RUN wget -q https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.bz2 && \ + tar xf *.tar.bz2 && \ + rm *.tar.bz2 && \ + cd openmpi-* && \ + ./configure && \ + make -j$(nproc) install && \ + cd .. && \ + rm -r openmpi-* + +COPY third_party third_party +COPY cli cli +COPY include include +COPY src src +COPY cmake cmake +COPY python python +COPY CMakeLists.txt . + +ARG CXX_FLAGS +ENV CXX_FLAGS=${CXX_FLAGS:-"-msse4.1"} +ARG CUDA_NVCC_FLAGS +ENV CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS:-"-Xfatbin=-compress-all"} +ARG CUDA_ARCH_LIST +ENV CUDA_ARCH_LIST=${CUDA_ARCH_LIST:-"Common"} +ENV CTRANSLATE2_ROOT=/opt/ctranslate2 +ENV LD_LIBRARY_PATH=/usr/local/lib/:${LD_LIBRARY_PATH} + +RUN mkdir build_tmp && \ + cd build_tmp && \ + cmake -DCMAKE_INSTALL_PREFIX=${CTRANSLATE2_ROOT} \ + -DWITH_CUDA=ON -DWITH_CUDNN=ON -DWITH_MKL=ON -DWITH_DNNL=ON -DOPENMP_RUNTIME=COMP \ + -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS}" -DCUDA_ARCH_LIST="${CUDA_ARCH_LIST}" -DWITH_TENSOR_PARALLEL=ON .. && \ + VERBOSE=1 make -j$(nproc) install + +ENV LANG=en_US.UTF-8 +COPY README.md . + +RUN cd python && \ + python3 -m pip --no-cache-dir install -r install_requirements.txt && \ + python3 setup.py bdist_wheel --dist-dir $CTRANSLATE2_ROOT + +FROM nvidia/cuda:12.8.1-base-ubuntu22.04 + +# We remove the cuda-compat package because it conflicts with the CUDA Enhanced Compatibility. +# See e.g. https://github.com/NVIDIA/nvidia-docker/issues/1515 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libcublas-12-8=12.8.4.1-1 \ + libcudnn9-cuda-12=9.10.2.21-1 \ + libnccl2=2.26.2-1+cuda12.8 \ + libopenmpi3=4.1.2-2ubuntu1 \ + openmpi-bin \ + libgomp1 \ + python3-pip \ + && \ + apt-get purge -y cuda-compat-12-8 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CTRANSLATE2_ROOT=/opt/ctranslate2 +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CTRANSLATE2_ROOT/lib + +COPY --from=builder $CTRANSLATE2_ROOT $CTRANSLATE2_ROOT +RUN python3 -m pip --no-cache-dir install $CTRANSLATE2_ROOT/*.whl && \ + rm $CTRANSLATE2_ROOT/*.whl + +ENTRYPOINT ["/opt/ctranslate2/bin/ct2-translator"] diff --git a/docker/build_all.sh b/docker/build_all.sh index 3bb88e7ae..6ce5b2f93 100755 --- a/docker/build_all.sh +++ b/docker/build_all.sh @@ -46,6 +46,8 @@ build() if [ "$GPU" == "rocm" ]; then build Dockerfile_rocm ubuntu22.04-rocm7.2 +elif [ "$GPU" == "cudaarm" ]; then + build Dockerfile_openblas ubuntu22.04-openblas else build Dockerfile ubuntu22.04-cuda12.8 fi From 68f4a87017138ecc8fecd023eab58d04807c4e0e Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 12:32:59 +0100 Subject: [PATCH 07/10] Remove tmp jobs --- .github/workflows/ci.yml | 419 --------------------------------------- 1 file changed, 419 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 02de050a9..eca8b266a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,363 +11,6 @@ on: - master jobs: - build-and-test-cpp-x86_64: - runs-on: ${{ matrix.os }} - env: - CT2_VERBOSE: 1 - strategy: - matrix: - os: [ubuntu-22.04] - backend: [mkl, dnnl] - - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Install Intel oneAPI - if: startsWith(matrix.os, 'ubuntu') - run: | - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add *.PUB - sudo sh -c 'echo "deb https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list' - sudo apt-get update - - - name: Configure with MKL - if: startsWith(matrix.os, 'ubuntu') && matrix.backend == 'mkl' - env: - CT2_USE_MKL: 1 - MKL_VERSION: 2023.0.0 - run: | - sudo apt-get install -y intel-oneapi-mkl-devel-$MKL_VERSION - cmake -DCMAKE_INSTALL_PREFIX=$PWD/install -DBUILD_TESTS=ON . - - - name: Configure with DNNL - if: startsWith(matrix.os, 'ubuntu') && matrix.backend == 'dnnl' - env: - DNNL_VERSION: 2023.0.0-25399 - run: | - sudo apt-get install -y intel-oneapi-dnnl-devel=$DNNL_VERSION intel-oneapi-dnnl=$DNNL_VERSION - cmake -DCMAKE_INSTALL_PREFIX=$PWD/install -DBUILD_TESTS=ON -DWITH_MKL=OFF -DOPENMP_RUNTIME=COMP -DWITH_DNNL=ON . - - - name: Build - run: | - make -j $(nproc) install - - - name: Download test data - working-directory: tests/data/models - run: | - wget https://opennmt-models.s3.amazonaws.com/pi_lm_step_5000.pt - wget https://opennmt-models.s3.amazonaws.com/transliteration-aren-all.tar.gz - tar xf transliteration-aren-all.tar.gz - - - name: Test MKL - if: matrix.backend == 'mkl' - env: - CT2_USE_MKL: 1 - run: | - tests/ctranslate2_test tests/data - - name: Test DNNL - if: matrix.backend == 'dnnl' - run: | - tests/ctranslate2_test tests/data - - build-and-test-cpp-x86_64-address_sanitizer: - runs-on: ubuntu-22.04 - env: - CT2_VERBOSE: 1 - - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Install Intel oneAPI - run: | - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add *.PUB - sudo sh -c 'echo "deb https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list' - sudo apt update - - - name: Install MKL - env: - CT2_USE_MKL: 1 - MKL_VERSION: 2023.0.0 - run: | - sudo apt install -y intel-oneapi-mkl-devel-$MKL_VERSION - - - name: Install Clang - run: | - sudo apt install -y clang - - - name: Configure with MKL and Clang - env: - CT2_USE_MKL: 1 - run: | - cmake -DCMAKE_INSTALL_PREFIX=$PWD/install -DBUILD_TESTS=ON -DCMAKE_C_COMPILER=/usr/bin/clang -DCMAKE_CXX_COMPILER=/usr/bin/clang++ -DENABLE_ADDRESS_SANITIZER=ON -DCMAKE_BUILD_TYPE=Debug . - - - name: Build - run: | - make -j $(nproc) install - - - name: Download test data - working-directory: tests/data/models - run: | - wget https://opennmt-models.s3.amazonaws.com/pi_lm_step_5000.pt - wget https://opennmt-models.s3.amazonaws.com/transliteration-aren-all.tar.gz - tar xf transliteration-aren-all.tar.gz - - - name: Test AddressSanitizer - env: - CT2_USE_MKL: 1 - run: | - ASAN_OPTIONS=detect_leaks=1:print_stats=1 tests/ctranslate2_test tests/data - - build-and-test-cpp-arm64: - runs-on: ${{ matrix.os }} - env: - CT2_VERBOSE: 1 - strategy: - matrix: - - include: - - os: ubuntu-22.04-arm - backend: openblas - - os: macos-15 - backend: ruy - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - name: Build with OpenBLAS and Ruy - if: matrix.backend == 'openblas' - run: | - wget https://github.com/xianyi/OpenBLAS/archive/v0.3.13.tar.gz - tar xzvf v0.3.13.tar.gz - cd OpenBLAS-0.3.13 - make TARGET=ARMV8 NO_LAPACK=1 -j $(nproc) - sudo make PREFIX=/usr/local install -j $(nproc) - cd .. - export OpenBLAS_HOME=/usr/local - cmake \ - -DOPENMP_RUNTIME=COMP \ - -DCMAKE_INSTALL_PREFIX=$PWD/install \ - -DWITH_MKL=OFF \ - -DWITH_OPENBLAS=ON \ - -DWITH_RUY=ON \ - -DBUILD_TESTS=ON \ - . - make -j $(nproc) install - - - name: Build Ruy - if: matrix.backend == 'ruy' - run: | - CMAKE_EXTRA_OPTIONS='-DCMAKE_OSX_ARCHITECTURES=arm64 -DWITH_ACCELERATE=ON -DWITH_MKL=OFF -DOPENMP_RUNTIME=NONE -DWITH_RUY=ON' - cmake -DCMAKE_POLICY_VERSION_MINIMUM=3.5 \ - -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON \ - -DCMAKE_INSTALL_PREFIX=$PWD/install \ - -DBUILD_TESTS=ON \ - $CMAKE_EXTRA_OPTIONS \ - . - make -j $(nproc) install - - - name: Download test data - run: | - wget https://opennmt-models.s3.amazonaws.com/transliteration-aren-all.tar.gz - tar xf transliteration-aren-all.tar.gz -C tests/data/models/ - - - name: Test - run: | - tests/ctranslate2_test tests/data - - build-python-wheels: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-22.04, windows-2022] - arch: [auto64] - include: - - os: ubuntu-22.04 - arch: aarch64 - - os: macos-15 - arch: arm64 - - os: macos-15-intel - arch: x86_64 - - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - - uses: docker/setup-qemu-action@v2 - if: ${{ matrix.arch == 'aarch64' }} - name: Set up QEMU - - - name: Build wheels - uses: pypa/cibuildwheel@v3.2.1 - with: - package-dir: python - output-dir: python/wheelhouse - env: - CIBW_ENVIRONMENT_PASS_LINUX: CIBW_ARCHS - CIBW_ENVIRONMENT_WINDOWS: CTRANSLATE2_ROOT='${{ github.workspace }}\install' - CIBW_ENVIRONMENT_MACOS: "CTRANSLATE2_ROOT='/usr/local' MACOSX_DEPLOYMENT_TARGET=11.00" - CIBW_BEFORE_ALL_LINUX: python/tools/prepare_build_environment_linux.sh - CIBW_BEFORE_ALL_MACOS: python/tools/prepare_build_environment_macos.sh - CIBW_BEFORE_ALL_WINDOWS: bash python/tools/prepare_build_environment_windows.sh - CIBW_BEFORE_BUILD: pip install -r python/install_requirements.txt - CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 - CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 - CIBW_ARCHS: ${{ matrix.arch }} - CIBW_SKIP: "*-musllinux_*" - - - name: Upload Python wheels - uses: actions/upload-artifact@v4 - with: - name: python-wheels-${{ runner.os }}-${{ matrix.arch }} - path: python/wheelhouse - - build-python-wheels-rocm: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-24.04, windows-2025] - - steps: - - uses: actions/checkout@v6 - with: - submodules: recursive - - - name: Build wheels - uses: pypa/cibuildwheel@v3.2.1 - with: - package-dir: python - output-dir: python/wheelhouse - env: - CIBW_ENVIRONMENT_LINUX: ROCM_PATH=/opt/rocm LD_LIBRARY_PATH=/opt/rocm/lib/llvm/lib:$LD_LIBRARY_PATH - CIBW_ENVIRONMENT_WINDOWS: CTRANSLATE2_ROOT='${{ github.workspace }}\install' - CIBW_BEFORE_ALL_LINUX: python/tools/prepare_build_environment_linux_rocm.sh - CIBW_BEFORE_ALL_WINDOWS: bash python/tools/prepare_build_environment_windows_rocm.sh - CIBW_BEFORE_BUILD: pip install -r python/install_requirements.txt - CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 - CIBW_ARCHS: auto64 - CIBW_SKIP: "*-musllinux_*" - CIBW_REPAIR_WHEEL_COMMAND_LINUX: 'auditwheel repair -w {dest_dir} --exclude "/opt/rocm/lib/lib*" {wheel}' - - - name: Upload Python wheels - uses: actions/upload-artifact@v6 - with: - name: rocm-python-wheels-${{ runner.os }} - path: python/wheelhouse - - - # We could test the Python wheels using cibuildwheel but we prefer to run the tests outside - # the build environment to ensure wheels correctly embed all dependencies. - test-python-wheels: - needs: [build-python-wheels] - runs-on: ${{ matrix.os }} - strategy: - matrix: - include: - - os: ubuntu-22.04 - artifact_pattern: python-wheels-Linux-auto64 - wheel_pattern: "*cp310*manylinux*x86_64.whl" - - - os: ubuntu-22.04-arm - artifact_pattern: python-wheels-Linux-aarch64 - wheel_pattern: "*cp310*manylinux*_aarch64.whl" - - - os: windows-2022 - artifact_pattern: python-wheels-Windows-auto64 - wheel_pattern: "*cp310*win*amd64.whl" - - - os: macos-15 - artifact_pattern: python-wheels-macOS-arm64 - wheel_pattern: "*cp310*macosx*arm64.whl" - - steps: - - name: Set up Python 3.10.11 - uses: actions/setup-python@v5 - with: - python-version: "3.10.11" - - - uses: actions/checkout@v4 - - - name: Prepare test environment - shell: bash - run: | - ./python/tools/prepare_test_environment.sh - - - name: Download Python wheels - uses: actions/download-artifact@v4 - with: - pattern: ${{ matrix.artifact_pattern }} - merge-multiple: true - path: . - - - name: Install wheel - shell: bash - run: | - pip install ${{ matrix.wheel_pattern }} - - - name: Test Python wheel - run: | - pytest -v python/tests/ --ignore=python/tests/test_opennmt_tf.py - - - check-python-style: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python 3.10.11 - uses: actions/setup-python@v5 - with: - python-version: "3.10.11" - - - name: Install dependencies - run: | - python -m pip install black==22.* flake8==3.8.* isort==5.* - - - name: Check code format with Black - working-directory: python - run: | - black --check . - - - name: Check imports order with isort - working-directory: python - run: | - isort --check-only . - - - name: Check code style with Flake8 - working-directory: python - if: ${{ always() }} - run: | - flake8 . - - - publish-python-wheels-on-pypi: - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - needs: [build-and-test-cpp-x86_64, build-and-test-cpp-arm64, build-python-wheels, test-python-wheels, check-python-style] - runs-on: ubuntu-22.04 - - steps: - - name: Download Python wheels - uses: actions/download-artifact@v4 - with: - pattern: python-wheels-* - merge-multiple: true - path: . - - - name: Publish Python wheels to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} - packages-dir: . - build-and-push-docker-images: runs-on: ${{ matrix.os }} @@ -426,65 +69,3 @@ jobs: if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') run: | ./docker/build_all.sh ${GITHUB_REF##*/v} 1 ${{ matrix.gpu }} ${{ matrix.suffix }} - - - build-and-deploy-docs: - runs-on: ubuntu-latest - needs: [check-python-style, build-python-wheels, build-python-wheels-rocm] - - permissions: - contents: write - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.10.11 - uses: actions/setup-python@v5 - with: - python-version: "3.10.11" - - name: Download CTranslate2 wheels - uses: actions/download-artifact@v4 - with: - pattern: python-wheels-${{ runner.os }}-* - merge-multiple: true - path: . - - name: Install CTranslate2 wheel - run: | - pip install *cp310*manylinux*x86_64.whl - - name: Install dependencies to build docs - working-directory: docs - run: | - python -m pip install -r requirements.txt - - name: Build docs - working-directory: docs - run: | - python generate.py python - sphinx-build . build - - name: Deploy docs - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - uses: JamesIves/github-pages-deploy-action@v4 - with: - folder: docs/build - clean: true - - name: Download ROCm wheels and zip for release - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - mkdir -p release-zips - - gh run download ${{ github.run_id }} \ - --name rocm-python-wheels-Linux \ - --dir temp-linux - zip -r release-zips/rocm-python-wheels-Linux.zip temp-linux/* - rm -rf temp-linux - - gh run download ${{ github.run_id }} \ - --name rocm-python-wheels-Windows \ - --dir temp-windows - zip -r release-zips/rocm-python-wheels-Windows.zip temp-windows/* - rm -rf temp-windows - - - name: Create GitHub Release - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') - uses: softprops/action-gh-release@v2 - with: - files: release-zips/*.zip From bc2fe56c43b570fd8a4dba505aa5d8320d7f6c40 Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 12:35:01 +0100 Subject: [PATCH 08/10] Fix build_all.sh --- docker/build_all.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/build_all.sh b/docker/build_all.sh index 6ce5b2f93..131352a56 100755 --- a/docker/build_all.sh +++ b/docker/build_all.sh @@ -47,7 +47,7 @@ build() if [ "$GPU" == "rocm" ]; then build Dockerfile_rocm ubuntu22.04-rocm7.2 elif [ "$GPU" == "cudaarm" ]; then - build Dockerfile_openblas ubuntu22.04-openblas + build Dockerfile_cuda_arm ubuntu22.04-openblas else build Dockerfile ubuntu22.04-cuda12.8 fi From a6e4cd29155cb2b28321bf32d75f05b7814d2e6a Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 12:41:06 +0100 Subject: [PATCH 09/10] Fix build_all.sh 2 --- docker/Dockerfile_cuda_arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile_cuda_arm b/docker/Dockerfile_cuda_arm index 0e74f2180..abb064007 100644 --- a/docker/Dockerfile_cuda_arm +++ b/docker/Dockerfile_cuda_arm @@ -14,7 +14,7 @@ WORKDIR /root RUN python3 -m pip --no-cache-dir install cmake==3.22.* ENV ONEDNN_VERSION=3.1.1 -RUN wget -q https://github.com/uxlffoundation/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz && \ +RUN wget -q https://github.com/uxlfoundation/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz && \ tar xf *.tar.gz && \ rm *.tar.gz && \ cd oneDNN-* && \ From bb789330515c7fd543f99fa67cf8410318085d87 Mon Sep 17 00:00:00 2001 From: Nicolas Boganski Date: Wed, 4 Mar 2026 13:28:54 +0100 Subject: [PATCH 10/10] Fix Dockerfile_cuda_arm --- docker/Dockerfile_cuda_arm | 13 ++++++------- docker/build_all.sh | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/docker/Dockerfile_cuda_arm b/docker/Dockerfile_cuda_arm index abb064007..9b242c20d 100644 --- a/docker/Dockerfile_cuda_arm +++ b/docker/Dockerfile_cuda_arm @@ -42,7 +42,6 @@ COPY python python COPY CMakeLists.txt . ARG CXX_FLAGS -ENV CXX_FLAGS=${CXX_FLAGS:-"-msse4.1"} ARG CUDA_NVCC_FLAGS ENV CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS:-"-Xfatbin=-compress-all"} ARG CUDA_ARCH_LIST @@ -53,7 +52,7 @@ ENV LD_LIBRARY_PATH=/usr/local/lib/:${LD_LIBRARY_PATH} RUN mkdir build_tmp && \ cd build_tmp && \ cmake -DCMAKE_INSTALL_PREFIX=${CTRANSLATE2_ROOT} \ - -DWITH_CUDA=ON -DWITH_CUDNN=ON -DWITH_MKL=ON -DWITH_DNNL=ON -DOPENMP_RUNTIME=COMP \ + -DWITH_CUDA=ON -DWITH_CUDNN=ON -DWITH_MKL=OFF -DWITH_DNNL=ON -DOPENMP_RUNTIME=COMP \ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ -DCUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS}" -DCUDA_ARCH_LIST="${CUDA_ARCH_LIST}" -DWITH_TENSOR_PARALLEL=ON .. && \ VERBOSE=1 make -j$(nproc) install @@ -71,15 +70,15 @@ FROM nvidia/cuda:12.8.1-base-ubuntu22.04 # See e.g. https://github.com/NVIDIA/nvidia-docker/issues/1515 RUN apt-get update && \ apt-get install -y --no-install-recommends \ - libcublas-12-8=12.8.4.1-1 \ - libcudnn9-cuda-12=9.10.2.21-1 \ - libnccl2=2.26.2-1+cuda12.8 \ - libopenmpi3=4.1.2-2ubuntu1 \ + libcublas-12-8 \ + libcudnn9-cuda-12 \ + libnccl2 \ + libopenmpi3 \ openmpi-bin \ libgomp1 \ python3-pip \ && \ - apt-get purge -y cuda-compat-12-8 && \ + apt-get purge -y cuda-compat-12-8 || true && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/docker/build_all.sh b/docker/build_all.sh index 131352a56..03cf31e67 100755 --- a/docker/build_all.sh +++ b/docker/build_all.sh @@ -47,7 +47,7 @@ build() if [ "$GPU" == "rocm" ]; then build Dockerfile_rocm ubuntu22.04-rocm7.2 elif [ "$GPU" == "cudaarm" ]; then - build Dockerfile_cuda_arm ubuntu22.04-openblas + build Dockerfile_cuda_arm ubuntu22.04-cuda12.8 else build Dockerfile ubuntu22.04-cuda12.8 fi