Merge Queue Checks for refs/heads/main #89
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Merge Queue Checks | |
| run-name: Merge Queue Checks for ${{ github.ref }} | |
| on: | |
| workflow_dispatch: | |
| merge_group: | |
| paths-ignore: | |
| - "docs/**" | |
| schedule: | |
| - cron: "0 0 * * *" # Runs at 00:00 UTC every day | |
| # When triggered from the merge queue, cancel any existing workflow runs for the same PR branch | |
| # Otherwise, use the unique run id for the concurrency group, to prevent anything from getting cancelled | |
| concurrency: | |
| group: ${{ github.event_name == 'merge_group' && format('{0}-{1}', github.workflow, github.ref) || github.run_id }} | |
| cancel-in-progress: true | |
| env: | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| AWS_REGION: "us-east-1" | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| AZURE_AI_FOUNDRY_API_KEY: ${{ secrets.AZURE_AI_FOUNDRY_API_KEY }} | |
| AZURE_OPENAI_API_BASE: ${{secrets.AZURE_OPENAI_API_BASE }} | |
| AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} | |
| AZURE_OPENAI_DEPLOYMENT_ID: ${{secrets.AZURE_OPENAI_DEPLOYMENT_ID }} | |
| DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }} | |
| FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }} | |
| FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} | |
| FORCE_COLOR: 1 | |
| GCP_STORAGE_ACCESS_KEY_ID: ${{ secrets.GCP_STORAGE_ACCESS_KEY_ID }} | |
| GCP_STORAGE_SECRET_ACCESS_KEY: ${{ secrets.GCP_STORAGE_SECRET_ACCESS_KEY }} | |
| GCP_VERTEX_CREDENTIALS_PATH: ${{ github.workspace }}/gcp_jwt_key.json | |
| GOOGLE_AI_STUDIO_API_KEY: ${{ secrets.GOOGLE_AI_STUDIO_API_KEY }} | |
| GOOGLE_APPLICATION_CREDENTIALS: ${{ github.workspace }}/gcp_jwt_key.json | |
| GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} | |
| HYPERBOLIC_API_KEY: ${{secrets.HYPERBOLIC_API_KEY}} | |
| MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} | |
| MODAL_KEY: ${{ secrets.MODAL_KEY }} | |
| MODAL_SECRET: ${{ secrets.MODAL_SECRET }} | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} | |
| R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} | |
| R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} | |
| SGLANG_API_KEY: ${{ secrets.SGLANG_API_KEY }} | |
| TGI_API_KEY: ${{ secrets.TGI_API_KEY }} | |
| TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }} | |
| VLLM_API_BASE: ${{ secrets.VLLM_API_BASE }} | |
| VLLM_API_KEY: ${{ secrets.VLLM_API_KEY }} | |
| VLLM_MODEL_NAME: "microsoft/Phi-3.5-mini-instruct" | |
| VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }} | |
| XAI_API_KEY: ${{ secrets.XAI_API_KEY }} | |
| jobs: | |
| batch-tests: | |
| if: github.repository == 'tensorzero/tensorzero' | |
| uses: ./.github/workflows/batch-test.yml | |
| secrets: inherit | |
| live-tests: | |
| name: "live-tests (batch_writes: ${{ matrix.batch_writes }})" | |
| runs-on: namespace-profile-tensorzero-16x32 | |
| if: github.repository == 'tensorzero/tensorzero' | |
| permissions: | |
| # Permission to checkout the repository | |
| contents: read | |
| # Permission to fetch GitHub OIDC token authentication | |
| id-token: write | |
| timeout-minutes: 45 | |
| strategy: | |
| matrix: | |
| batch_writes: [true, false] | |
| steps: | |
| - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 | |
| - name: Install gdb | |
| run: sudo apt-get update && sudo apt-get install -y gdb | |
| - name: Warm up Modal instances | |
| run: | | |
| curl -H "Modal-Key: $MODAL_KEY" -H "Modal-Secret: $MODAL_SECRET" https://tensorzero--vllm-inference-vllm-inference.modal.run/docs > vllm_modal_logs.txt & | |
| curl -H "Modal-Key: $MODAL_KEY" -H "Modal-Secret: $MODAL_SECRET" https://tensorzero--sglang-inference-sglang-inference.modal.run/ > sglang_modal_logs.txt & | |
| # TODO: Re-enable once we can switch to a T4 GPU | |
| # curl -H "Modal-Key: $MODAL_KEY" -H "Modal-Secret: $MODAL_SECRET" https://tensorzero--vllm-gpt-oss-20b-serve.modal.run/ > vllm_gpt_oss_modal_logs.txt & | |
| - name: Cleanup disk space | |
| run: ./ci/free-disk-space.sh | |
| - name: Update Rust | |
| run: | | |
| for attempt in 1 2 3; do | |
| if rustup update stable && rustup default stable; then | |
| break | |
| fi | |
| if [ $attempt -eq 3 ]; then | |
| echo "Failed to update Rust after 3 attempts" | |
| exit 1 | |
| fi | |
| sleep $((10 * attempt)) | |
| done | |
| shell: bash | |
| - name: Configure Namespace-powered Buildx | |
| uses: namespacelabs/nscloud-setup-buildx-action@84ca8c58fdf372d6a4750476cd09b7b96ee778ca | |
| - name: Install Rust toolchain | |
| run: | | |
| for attempt in 1 2 3; do | |
| if rustup toolchain install stable && rustup default stable; then | |
| break | |
| fi | |
| if [ $attempt -eq 3 ]; then | |
| echo "Failed to install Rust toolchain after 3 attempts" | |
| exit 1 | |
| fi | |
| sleep $((10 * attempt)) | |
| done | |
| shell: bash | |
| # Start testing workload identity federation credentials once the SDK adds support: https://github.com/googleapis/google-cloud-rust/issues/1342 | |
| # - uses: 'google-github-actions/auth@v2' | |
| # with: | |
| # project_id: 'tensozero-public' | |
| # workload_identity_provider: 'projects/454541351720/locations/global/workloadIdentityPools/github/providers/tensorzero' | |
| - name: Print Rust version | |
| run: rustc --version | |
| - name: Install uv | |
| run: curl -LsSf --retry 2 --retry-delay 10 --retry-max-time 60 https://astral.sh/uv/0.6.17/install.sh | sh | |
| - name: Download ClickHouse fixtures | |
| run: uv run ./ui/fixtures/download-fixtures.py | |
| - name: Install pnpm | |
| run: | | |
| for attempt in 1 2 3; do | |
| if npm install -g pnpm@latest; then | |
| break | |
| fi | |
| if [ $attempt -eq 3 ]; then | |
| echo "Failed to install pnpm after 3 attempts" | |
| exit 1 | |
| fi | |
| sleep $((10 * attempt)) | |
| done | |
| shell: bash | |
| - name: Configure Namespace cache for Python (uv), and pnpm | |
| uses: namespacelabs/nscloud-cache-action@2f50e7d0f70475e6f59a55ba0f05eec9108e77cc | |
| with: | |
| cache: | | |
| pnpm | |
| uv | |
| - name: Install JS dependencies | |
| run: pnpm install --frozen-lockfile | |
| - name: Login to DockerHub | |
| uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 | |
| with: | |
| username: ${{ secrets.DOCKERHUB_USERNAME }} | |
| password: ${{ secrets.DOCKERHUB_TOKEN }} | |
| - name: Install cargo-nextest | |
| uses: taiki-e/install-action@d12e869b89167df346dd0ff65da342d1fb1202fb | |
| with: | |
| tool: cargo-nextest | |
| - name: Write GCP JWT key to file | |
| env: | |
| GCP_JWT_KEY: ${{ secrets.GCP_JWT_KEY }} | |
| run: echo "$GCP_JWT_KEY" > $GITHUB_WORKSPACE/gcp_jwt_key.json | |
| - name: Set up TENSORZERO_CLICKHOUSE_URL for E2E tests | |
| run: | | |
| echo "TENSORZERO_CLICKHOUSE_URL=http://chuser:chpassword@localhost:8123/tensorzero_e2e_tests" >> $GITHUB_ENV | |
| echo "TENSORZERO_CLICKHOUSE_BATCH_WRITES=${{ matrix.batch_writes }}" >> $GITHUB_ENV | |
| - name: Configure batch writes in tensorzero.toml | |
| if: matrix.batch_writes == true | |
| run: | | |
| echo "[gateway.observability.batch_writes]" >> tensorzero-core/tests/e2e/tensorzero.toml | |
| echo "enabled = true" >> tensorzero-core/tests/e2e/tensorzero.toml | |
| echo "flush_interval_ms = 80" >> tensorzero-core/tests/e2e/tensorzero.toml | |
| echo "__force_allow_embedded_batch_writes = true" >> tensorzero-core/tests/e2e/tensorzero.toml | |
| - name: Download provider-proxy cache | |
| # When running as a cron job, don't use the provider-proxy cache. | |
| # The cron job is used to gather information about provider flakiness. | |
| if: github.event_name != 'schedule' | |
| run: | | |
| AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY ./ci/download-provider-proxy-cache.sh | |
| - name: Launch ClickHouse container for E2E tests | |
| run: | | |
| docker compose -f tensorzero-core/tests/e2e/docker-compose.yml up --build -d --wait | |
| - name: Print ClickHouse container logs | |
| if: always() | |
| run: | | |
| docker compose -f tensorzero-core/tests/e2e/docker-compose.yml logs -t | |
| - name: Launch the provider-proxy cache for E2E tests | |
| run: | | |
| ./ci/run-provider-proxy.sh ci | |
| - name: Launch the gateway for E2E tests | |
| run: | | |
| TENSORZERO_E2E_PROXY="http://localhost:3003" cargo run-e2e > e2e_logs.txt 2>&1 & | |
| while ! curl -s -f http://localhost:3000/health >/dev/null 2>&1; do | |
| echo "Waiting for gateway to be healthy..." | |
| sleep 1 | |
| done | |
| echo "GATEWAY_PID=$!" >> $GITHUB_ENV | |
| # We set 'TENSORZERO_E2E_PROXY' here so that embedded gateway tests can use it | |
| # The 'CARGO_NEXTEST_FLAKY_TESTS' variable allows us to mark tests as flaky without merging a PR (if a provider happens to break or goes down) | |
| # We run the tests without the flaky tests, and require them to pass | |
| - name: Run all tests (including E2E tests) | |
| # When running from a cron job (the 'schedule' event), use '--no-fail-fast' so that we get full coverage of flaky providers. | |
| run: | | |
| TENSORZERO_E2E_PROXY="http://localhost:3003" cargo test-e2e ${{ vars.CARGO_NEXTEST_EXTRA_ARGS }} -E "not (${{ vars.CARGO_NEXTEST_FLAKY_TESTS }})" ${{ github.event_name == 'schedule' && '--no-fail-fast' || '' }} | |
| # As a separate step, we run just the flaky tests, and allow them to fail. | |
| # This lets us see if any flaky tests have started succeeding (by looking at the job output), | |
| # so that we can decide to mark them as non-flaky. | |
| # - name: Run flaky E2E tests | |
| # run: | | |
| # TENSORZERO_E2E_PROXY="http://localhost:3003" cargo test-e2e --no-fail-fast ${{ vars.CARGO_NEXTEST_EXTRA_ARGS }} -E "${{ vars.CARGO_NEXTEST_FLAKY_TESTS }}" | |
| # continue-on-error: true | |
| - name: Install Python for python async client tests | |
| run: uv python install 3.9 | |
| - name: "Python: PyO3 Client: pytest" | |
| working-directory: clients/python | |
| run: | | |
| # Start the test in background and capture its PID | |
| bash ./test.sh --verbose & | |
| TEST_PID=$! | |
| echo "Started test.sh with PID: $TEST_PID" | |
| # Wait for 5 minutes (300 seconds) | |
| for i in {1..300}; do | |
| if ! kill -0 $TEST_PID 2>/dev/null; then | |
| echo "Test completed normally" | |
| wait $TEST_PID | |
| exit $? | |
| fi | |
| sleep 1 | |
| done | |
| echo "Test has been running for 5 minutes, capturing backtraces..." | |
| # Get all processes related to our test | |
| echo "=== Process tree ===" | |
| ps -ef | grep -E "(test\.sh|pytest|python)" | grep -v grep || true | |
| echo "=== Capturing backtraces with gdb ===" | |
| # Find all python processes that might be related to our test | |
| PYTHON_PIDS=$(pgrep -f "python.*pytest" || true) | |
| if [ -n "$PYTHON_PIDS" ]; then | |
| for pid in $PYTHON_PIDS; do | |
| echo "--- Backtrace for Python process $pid ---" | |
| gdb -p $pid --batch \ | |
| -ex "set pagination off" \ | |
| -ex "thread apply all bt" \ | |
| -ex "info threads" \ | |
| -ex "detach" \ | |
| -ex "quit" 2>&1 || true | |
| echo "" | |
| done | |
| else | |
| echo "No Python processes found" | |
| fi | |
| exit 1 | |
| - name: "Node.js: OpenAI Client: test" | |
| working-directory: clients/openai-node | |
| run: | | |
| pnpm run test | |
| - name: Install Go | |
| uses: actions/setup-go@29694d72cd5e7ef3b09496b39f28a942af47737e | |
| with: | |
| go-version: "1.24" | |
| - name: "Go: OpenAI Client: test" | |
| working-directory: clients/openai-go/tests | |
| run: go test -v | |
| - name: "Python: Recipes: pytest" | |
| working-directory: recipes | |
| run: | | |
| uv run pytest | |
| - name: Terminate the gateway | |
| run: | | |
| echo "Killing gateway with pid $GATEWAY_PID" | |
| kill $GATEWAY_PID | |
| - name: Check e2e logs for deprecation warnings | |
| run: | | |
| ! grep -i "Deprecation Warning" e2e_logs.txt | |
| - name: Print e2e logs | |
| if: always() | |
| run: cat e2e_logs.txt | |
| - name: Print provider-proxy logs | |
| if: always() | |
| run: cat provider_proxy_logs.txt | |
| - name: Print vLLM modal logs | |
| if: always() | |
| run: cat vllm_modal_logs.txt | |
| - name: Print SGLang modal logs | |
| if: always() | |
| run: cat sglang_modal_logs.txt | |
| - name: Print vLLM GPT-OSS modal logs | |
| if: always() | |
| continue-on-error: true | |
| run: cat vllm_gpt_oss_modal_logs.txt | |
| - name: Upload provider-proxy cache | |
| # Only upload the cache when we're running from a 'good' run | |
| # (a merge queue entry which passed this check, or a cron job) | |
| # This prevents manual workflow runs from modifying the cache | |
| if: github.event_name == 'merge_group' || github.event_name == 'schedule' | |
| run: | | |
| AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY ./ci/upload-provider-proxy-cache.sh | |
| check-production-docker-container: | |
| runs-on: ubuntu-latest | |
| if: github.repository == 'tensorzero/tensorzero' | |
| steps: | |
| - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 | |
| - name: Build Docker container for production deployment tests | |
| run: docker build -t tensorzero/gateway -f gateway/Dockerfile . | |
| - name: Launch ClickHouse container for E2E tests | |
| run: | | |
| # 'docker compose' will exit with status code 1 if any container exits, even if the container exits with status code 0 | |
| docker compose -f tensorzero-core/tests/e2e/docker-compose.yml up -d --wait || true | |
| - name: Set up .env file for production deployment tests | |
| run: | | |
| echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" > examples/production-deployment/.env | |
| echo "TENSORZERO_CLICKHOUSE_URL=http://chuser:chpassword@host.docker.internal:8123/tensorzero" >> examples/production-deployment/.env | |
| echo "TENSORZERO_DISABLE_PSEUDONYMOUS_USAGE_ANALYTICS=1" >> examples/production-deployment/.env | |
| - name: Run docker compose for production deployment tests | |
| run: docker compose -f examples/production-deployment/docker-compose.yml up -d --wait | |
| - name: Run inference for production deployment tests | |
| run: examples/production-deployment/run.sh | |
| - name: Print Docker compose logs | |
| if: always() | |
| run: | | |
| docker compose -f examples/production-deployment/docker-compose.yml logs -t | |
| - name: Take down docker compose for production deployment tests | |
| run: | | |
| docker compose -f examples/production-deployment/docker-compose.yml down | |
| docker compose -f tensorzero-core/tests/e2e/docker-compose.yml down | |
| # Test that the ui e2e tests still pass after we regenerate the model inference cache | |
| ui-tests-e2e-regen-model-inference-cache: | |
| if: github.repository == 'tensorzero/tensorzero' | |
| uses: ./.github/workflows/ui-tests-e2e-model-inference-cache.yml | |
| with: | |
| regen_cache: true | |
| secrets: | |
| S3_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
| S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }} | |
| FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} | |
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
| # See 'ci/README.md' at the repository root for more details. | |
| check-all-live-tests-passed: | |
| if: always() && github.repository == 'tensorzero/tensorzero' | |
| needs: | |
| [ | |
| check-production-docker-container, | |
| ui-tests-e2e-regen-model-inference-cache, | |
| live-tests, | |
| batch-tests, | |
| ] | |
| runs-on: ubuntu-latest | |
| steps: | |
| # When running in the merge queue, jobs should never be skipped. | |
| # In a scheduled run, some jobs may be intentionally skipped, as we only care about regenerating the model inference cache. | |
| - if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') || (github.event_name == 'merge_group' && contains(needs.*.result, 'skipped')) }} | |
| run: exit 1 |