Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
6292a2a
ci: migrate gpu-runner-setup-v2 changes from FlowMesh_dev
Qruixuan May 1, 2026
408a4f5
chore: migrate ci/gpu-runner-setup-v2 docker + shared-copy changes fr…
Qruixuan May 1, 2026
afde467
revert: restore Dockerfiles and unit-tests.yml to FlowMesh main versions
Qruixuan May 2, 2026
0b38e8c
feat: add GPU requirements install + HF import error capture
Qruixuan May 2, 2026
aee6149
feat: add Docker CI compose infrastructure
Qruixuan May 2, 2026
9567d4b
feat: add CI workflow and runner setup guide
Qruixuan May 2, 2026
6600c68
feat: add local CI runner script and fix template output destinations
Qruixuan May 2, 2026
37bef38
fix: redesign CI compose for FlowMesh's single-server architecture
Qruixuan May 3, 2026
b20602a
fix: update CI workflow and run_local.sh for single-server architecture
Qruixuan May 3, 2026
9c5733d
fix: correct worker networking — host-mode workers need localhost URL…
Qruixuan May 3, 2026
62f2fca
feat: add tests/integration/test_e2e.py — E2E smoke test for CI
Qruixuan May 3, 2026
f3325f3
fix: use host bind-mount for worker results to avoid _VolumeInitializer
Qruixuan May 3, 2026
915cbf7
fix: persist HF model cache across CI runs via host bind-mount
Qruixuan May 3, 2026
aa36f34
fix: revert HF cache to named volume (same as FlowMesh_dev), bump vLL…
Qruixuan May 3, 2026
b6d1e2c
fix: replace gated llama model with open Qwen model in dag_inference.…
claude May 3, 2026
344d783
fix: harden ci.yml for zizmor pedantic audit
claude May 4, 2026
e6af601
style: apply isort and black fixes
claude May 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
496 changes: 496 additions & 0 deletions .github/workflows/ci.yml

Large diffs are not rendered by default.

85 changes: 85 additions & 0 deletions docker/ci.compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# docker/ci.compose.yml — CI integration test stack (single-host, no GPU)
#
# Brings up a fully isolated FlowMesh environment for each CI run.
# FlowMesh uses a single server container (HTTP API port 8000 + gRPC
# supervisor port 50051); no separate host or database service needed.
#
# Workers are spawned by the server's Docker adapter with network_mode: host.
# They connect to gRPC at localhost:50051 and HTTP at http://localhost:8000.
# Ports 8000 and 50051 MUST therefore be bound on the Docker host machine.
#
# RESULTS_DIR is set to an absolute host path so workers can write results
# without relying on the _VolumeInitializer busybox chown mechanism.
# Caller must create /tmp/flowmesh-ci-results with chmod 777 before 'up'.
#
# NOTE: No ports are exposed in this base file. Add ports via an overlay:
# - Fixed (GitHub Actions / bare docker compose): docker/ci.ports.fixed.yml
# - Fixed local (run_local.sh): generated at runtime

services:
redis_control:
image: redis:7-alpine
command: ["redis-server", "--loglevel", "warning"]
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 3s
timeout: 2s
retries: 10
networks: [ci-net]

redis_telemetry:
image: redis:7-alpine
command: ["redis-server", "--loglevel", "warning"]
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 3s
timeout: 2s
retries: 10
networks: [ci-net]

server:
build:
context: ..
dockerfile: src/server/Dockerfile
depends_on:
redis_control:
condition: service_healthy
redis_telemetry:
condition: service_healthy
environment:
REDIS_CONTROL_URL: "redis://redis_control:6379/0"
REDIS_TELEMETRY_URL: "redis://redis_telemetry:6379/0"
FLOWMESH_API_KEY: "flm-ci-00000000000000000000000000000000"
# Workers run with network_mode: host, so FLOWMESH_BASE_URL must be
# reachable from the Docker host (not the compose overlay network).
FLOWMESH_BASE_URL: "http://localhost:8000"
SERVER_HOST: "server"
NODE_NAMESPACE: "ci"
NODE_CLUSTER: "ci-cluster"
NODE_ALIAS: "ci-server"
LOG_LEVEL: "INFO"
# Worker spawning via Docker
ENABLE_SUPERVISOR: "true"
FLOWMESH_REGISTRY: "ci"
FLOWMESH_VERSION: "latest"
WORKER_CONFIG_PATH: "/etc/flowmesh/worker_config.yaml"
# Absolute host path for worker results (chmod 777 before 'up').
# Using an absolute path bypasses the _VolumeInitializer busybox chown
# so workers (UID 10001) can write without depending on image pulls.
RESULTS_DIR: "/tmp/flowmesh-ci-results"
# Pass HuggingFace token through so workers can download gated models.
HF_TOKEN:
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- ./ci.worker_config.yaml:/etc/flowmesh/worker_config.yaml:ro
healthcheck:
test: ["CMD", "curl", "-sf", "http://localhost:8000/healthz"]
interval: 5s
timeout: 3s
start_period: 20s
retries: 12
networks: [ci-net]

networks:
ci-net:
# Isolated per-run network; named via project (-p ci-$RUN_ID)
11 changes: 11 additions & 0 deletions docker/ci.gpu_worker_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
default_worker_config:
hb_interval: 30

workers:
- provider: docker
init_on_start: true
worker_config:
worker_alias: ci-worker-gpu
worker_type: gpu
cuda_devices: [0]
enable_ssh: true
19 changes: 19 additions & 0 deletions docker/ci.ports.fixed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# docker/ci.ports.fixed.yml — Fixed host-port bindings for CI environments
#
# Include alongside ci.compose.yml when running without run_local.sh
# (e.g. GitHub Actions or a dedicated CI machine where ports 8000/50051
# are guaranteed to be free):
#
# docker compose -p ci-$RUN_ID \
# -f docker/ci.compose.yml \
# -f docker/ci.ports.fixed.yml \
# up -d --build --wait
#
# run_local.sh generates its own dynamic-port overlay instead; this file
# is not used by that script.

services:
server:
ports:
- "8000:8000"
- "50051:50051"
23 changes: 23 additions & 0 deletions docker/ci.worker.gpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# docker/ci.worker.gpu.yml — GPU worker overlay for CI
#
# Overlay on top of ci.compose.yml for GPU runner (luyao3, RTX 5080).
# Overrides the worker config to use the GPU image and passes the
# compose network name so GPU workers can reach the server by hostname.
#
# Pre-build the GPU worker image before running compose:
# docker build -f src/worker/docker/Dockerfile.cuda \
# -t ci/flowmesh_worker:latest-gpu .
#
# Usage:
# docker compose -p ci-$RUN_ID \
# -f docker/ci.compose.yml \
# -f docker/ci.worker.gpu.yml \
# up -d --build

services:
server:
environment:
WORKER_CONFIG_PATH: "/etc/flowmesh/worker_config.yaml"
WORKER_DOCKER_NETWORK: "${COMPOSE_PROJECT_NAME}_ci-net"
volumes:
- ./ci.gpu_worker_config.yaml:/etc/flowmesh/worker_config.yaml:ro
9 changes: 9 additions & 0 deletions docker/ci.worker_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
default_worker_config:
hb_interval: 30

workers:
- provider: docker
init_on_start: true
worker_config:
worker_alias: ci-worker-cpu
worker_type: cpu
Loading