Skip to content

Fix MTP text requests with mmproj loaded #33

Fix MTP text requests with mmproj loaded

Fix MTP text requests with mmproj loaded #33

Workflow file for this run

name: Publish Docker image
on:
push:
branches:
- main
- "v*"
tags:
- "v*"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref_type == 'branch' }}
permissions:
contents: read
packages: write
jobs:
docker-meta:
name: Docker metadata
runs-on: ubuntu-24.04
outputs:
image_repo: ${{ steps.meta.outputs.image_repo }}
version: ${{ steps.meta.outputs.version }}
ref_name_safe: ${{ steps.meta.outputs.ref_name_safe }}
short_sha: ${{ steps.meta.outputs.short_sha }}
publish: ${{ steps.meta.outputs.publish }}
release: ${{ steps.meta.outputs.release }}
steps:
- name: Check out the repo
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Resolve image metadata
id: meta
shell: bash
run: |
set -euo pipefail
publish="false"
release="false"
short_sha="${GITHUB_SHA::12}"
exact_tag="$(git tag --points-at "${GITHUB_SHA}" --list 'v*' | sort -V | tail -n 1 || true)"
ref_name_safe="$(printf '%s' "${GITHUB_REF_NAME}" | tr '[:upper:]' '[:lower:]' | sed -E 's#[^a-z0-9_.-]+#-#g; s#^[.-]+##; s#[.-]+$##')"
if [[ -z "${ref_name_safe}" ]]; then
ref_name_safe="${short_sha}"
fi
if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
version="${GITHUB_REF_NAME}"
git fetch origin +refs/heads/main:refs/remotes/origin/main --no-tags
if git merge-base --is-ancestor "${GITHUB_SHA}" refs/remotes/origin/main; then
publish="true"
release="true"
else
echo "Tag ${GITHUB_REF_NAME} is not on origin/main; skipping Docker publish."
fi
elif [[ "${GITHUB_REF}" == "refs/heads/main" && -n "${exact_tag}" ]]; then
version="${exact_tag}"
publish="true"
release="true"
elif [[ "${GITHUB_REF}" == "refs/heads/main" || "${GITHUB_REF}" == refs/heads/v* ]]; then
version="${ref_name_safe}-${short_sha}"
publish="true"
elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then
version="${ref_name_safe}-${short_sha}"
publish="true"
else
version="${ref_name_safe}-${short_sha}"
fi
owner="$(echo "${GITHUB_REPOSITORY_OWNER}" | tr '[:upper:]' '[:lower:]')"
repo="$(echo "${GITHUB_REPOSITORY#*/}" | tr '[:upper:]' '[:lower:]')"
image_repo="ghcr.io/${owner}/${repo}"
echo "image_repo=${image_repo}" >> "${GITHUB_OUTPUT}"
echo "version=${version}" >> "${GITHUB_OUTPUT}"
echo "ref_name_safe=${ref_name_safe}" >> "${GITHUB_OUTPUT}"
echo "short_sha=${short_sha}" >> "${GITHUB_OUTPUT}"
echo "publish=${publish}" >> "${GITHUB_OUTPUT}"
echo "release=${release}" >> "${GITHUB_OUTPUT}"
echo "Using image repository: ${image_repo}"
echo "Using version: ${version}"
echo "Publish Docker images: ${publish}"
echo "Release Docker tags: ${release}"
build:
name: Build server${{ matrix.config.display_suffix }}
needs: docker-meta
if: ${{ needs.docker-meta.outputs.publish == 'true' }}
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
config:
- name: cpu
display_suffix: " CPU"
dockerfile: .devops/cpu.Dockerfile
tag_suffixes: "none -cpu"
cache_name: cpu
platforms: linux/amd64,linux/arm64
free_disk_space: false
- name: cuda12
display_suffix: " CUDA 12.4"
dockerfile: .devops/cuda.Dockerfile
cuda_version: "12.4.1"
ubuntu_version: "22.04"
cuda_arch: "default"
cuda_build_target: "llama-server"
tag_suffixes: "-cuda -cuda12"
cache_name: cuda12
platforms: linux/amd64
free_disk_space: true
- name: cuda13
display_suffix: " CUDA 13.1"
dockerfile: .devops/cuda.Dockerfile
cuda_version: "13.1.1"
ubuntu_version: "24.04"
cuda_arch: "default"
cuda_build_target: "llama-server"
tag_suffixes: "-cuda13"
cache_name: cuda13
platforms: linux/amd64
free_disk_space: true
- name: rocm
display_suffix: " ROCm"
dockerfile: .devops/rocm.Dockerfile
tag_suffixes: "-rocm"
cache_name: rocm
platforms: linux/amd64
free_disk_space: true
- name: vulkan
display_suffix: " Vulkan"
dockerfile: .devops/vulkan.Dockerfile
tag_suffixes: "-vulkan"
cache_name: vulkan
platforms: linux/amd64
free_disk_space: false
- name: sycl
display_suffix: " SYCL"
dockerfile: .devops/intel.Dockerfile
sycl_build_target: "llama-server"
tag_suffixes: "-sycl"
cache_name: sycl
platforms: linux/amd64
free_disk_space: true
steps:
- name: Check out the repo
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Set up QEMU
if: ${{ contains(matrix.config.platforms, 'linux/arm64') }}
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4
- name: Log in to GHCR
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Free disk space
if: ${{ matrix.config.free_disk_space == true }}
uses: ggml-org/free-disk-space@v1.3.1
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Get build date
id: build_date
run: echo "date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> "${GITHUB_OUTPUT}"
- name: Determine Docker tags
id: tags
shell: bash
env:
IMAGE_REPO: ${{ needs.docker-meta.outputs.image_repo }}
VERSION: ${{ needs.docker-meta.outputs.version }}
REF_NAME_SAFE: ${{ needs.docker-meta.outputs.ref_name_safe }}
SHORT_SHA: ${{ needs.docker-meta.outputs.short_sha }}
RELEASE_TAGS: ${{ needs.docker-meta.outputs.release }}
TAG_SUFFIXES: ${{ matrix.config.tag_suffixes }}
CACHE_NAME: ${{ matrix.config.cache_name }}
run: |
set -euo pipefail
tags=()
for suffix in ${TAG_SUFFIXES}; do
if [[ "${suffix}" == "none" ]]; then
suffix=""
fi
if [[ "${RELEASE_TAGS}" == "true" ]]; then
tags+=("${IMAGE_REPO}:server${suffix}")
tags+=("${IMAGE_REPO}:server${suffix}-${VERSION}")
else
tags+=("${IMAGE_REPO}:server${suffix}-${REF_NAME_SAFE}-dev")
tags+=("${IMAGE_REPO}:server${suffix}-${REF_NAME_SAFE}-${SHORT_SHA}")
fi
done
{
echo "tags<<EOF"
printf '%s\n' "${tags[@]}"
echo "EOF"
echo "cache_tag=${IMAGE_REPO}:buildcache-server-${CACHE_NAME}"
} >> "${GITHUB_OUTPUT}"
printf 'Publishing tags:\n'
printf ' %s\n' "${tags[@]}"
- name: Build and push Docker image
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7
with:
context: .
platforms: ${{ matrix.config.platforms }}
file: ${{ matrix.config.dockerfile }}
target: server
push: true
tags: ${{ steps.tags.outputs.tags }}
provenance: false
build-args: |
BUILD_DATE=${{ steps.build_date.outputs.date }}
APP_VERSION=${{ needs.docker-meta.outputs.version }}
APP_REVISION=${{ github.sha }}
IMAGE_URL=${{ github.server_url }}/${{ github.repository }}
IMAGE_SOURCE=${{ github.server_url }}/${{ github.repository }}
${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
${{ matrix.config.cuda_arch && format('CUDA_DOCKER_ARCH={0}', matrix.config.cuda_arch) || '' }}
${{ matrix.config.cuda_build_target && format('CUDA_BUILD_TARGET={0}', matrix.config.cuda_build_target) || '' }}
${{ matrix.config.sycl_build_target && format('SYCL_BUILD_TARGET={0}', matrix.config.sycl_build_target) || '' }}
annotations: |
manifest:org.opencontainers.image.created=${{ steps.build_date.outputs.date }}
manifest:org.opencontainers.image.version=${{ needs.docker-meta.outputs.version }}
manifest:org.opencontainers.image.revision=${{ github.sha }}
manifest:org.opencontainers.image.title=BeeLlama.cpp
manifest:org.opencontainers.image.description=BeeLlama.cpp GGUF inference with DFlash, TurboQuant, and TCQ cache types
manifest:org.opencontainers.image.url=${{ github.server_url }}/${{ github.repository }}
manifest:org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
cache-from: type=registry,ref=${{ steps.tags.outputs.cache_tag }}
cache-to: type=registry,ref=${{ steps.tags.outputs.cache_tag }},mode=max