Tests

[MNT] Dockerized tests for CI runs using localhost #2183

Workflow file for this run

	---
	name: Tests

	on:
	workflow_dispatch:

	push:
	branches:
	- main
	- develop
	tags:
	- "v..*"

	pull_request:
	branches:
	- main
	- develop

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref }}
	cancel-in-progress: true

	jobs:
	test:
	name: (${{ matrix.os }},Py${{ matrix.python-version }},sk${{ matrix.scikit-learn }}${{ matrix.pandas-version != '' && format(',pd:{0}', matrix.pandas-version) \|\| '' }},sk-only:${{ matrix.sklearn-only }})
	runs-on: ${{ matrix.os }}

	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
	scikit-learn: ["1.3.", "1.4.", "1.5.", "1.6.", "1.7.*"]
	os: [ubuntu-latest]
	sklearn-only: ["true"]

	exclude:
	# incompatible version combinations
	- python-version: "3.13"
	scikit-learn: "1.3.*"
	- python-version: "3.13"
	scikit-learn: "1.4.*"
	- python-version: "3.14"
	scikit-learn: "1.3.*"
	- python-version: "3.14"
	scikit-learn: "1.4.*"

	include:
	# Full test run on ubuntu, 3.14
	- os: ubuntu-latest
	python-version: "3.14"
	scikit-learn: "1.7.*"
	sklearn-only: "false"

	# Full test run on Windows
	- os: windows-latest
	python-version: "3.12"
	scikit-learn: "1.5.*"
	sklearn-only: "false"

	# Coverage run
	- os: ubuntu-latest
	python-version: "3.12"
	scikit-learn: "1.5.*"
	sklearn-only: "false"
	code-cov: true

	# Pandas 2 run
	- os: ubuntu-latest
	python-version: "3.12"
	scikit-learn: "1.5.*"
	sklearn-only: "false"
	pandas-version: "2.*"
	code-cov: false

	steps:
	- uses: actions/checkout@v6
	with:
	fetch-depth: 2

	- name: Setup Python ${{ matrix.python-version }}
	uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}

	- name: Install test dependencies, scikit-learn, and optional pandas
	shell: bash
	run: \|
	python -m pip install --upgrade pip
	pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }}

	if [ "${{ matrix.pandas-version }}" != "" ]; then
	echo "Installing specific pandas version: ${{ matrix.pandas-version }}"
	pip install "pandas==${{ matrix.pandas-version }}"
	fi

	- name: Store repository status
	id: status-before
	if: matrix.os != 'windows-latest'
	run: \|
	git_status=$(git status --porcelain -b)
	echo "BEFORE=$git_status" >> $GITHUB_ENV
	echo "Repository status before tests: $git_status"

	- name: Configure Host Network (The "Magic" Step)
	run: \|
	# Map 'nginx' to localhost so the Host machine can resolve the URLs in the database
	echo "127.0.0.1 nginx" \| sudo tee -a /etc/hosts

	- name: Clone Services & Apply Universal Patch
	if: matrix.os == 'ubuntu-latest'
	run: \|
	git clone --depth 1 https://github.com/openml/services.git
	cd services
	git config user.email "ci@openml.org"
	git config user.name "CI"
	git fetch origin pull/13/head:pr-13 && git merge pr-13 --no-edit
	git fetch origin pull/15/head:pr-15 && git merge pr-15 --no-edit

	# === PATCH 1: Use 'nginx' hostname ===
	# This works inside Docker (DNS) and on Host (via /etc/hosts hack above)
	sed -i 's/localhost:8000/nginx:8000/g' config/database/update.sh

	# === PATCH 2: Fix Path Mismatch ===
	# Ensure we use /data/ which Nginx recognizes
	sed -i 's\|/minio/\|/data/\|g' config/database/update.sh

	echo "=== Patched Update Script ==="
	cat config/database/update.sh \| grep "nginx"

	- name: Start Docker Services
	if: matrix.os == 'ubuntu-latest'
	working-directory: ./services
	run: \|
	sudo systemctl stop mysql.service
	docker compose --profile rest-api --profile minio --profile evaluation-engine up -d --build

	echo "1. Waiting for Database population..."
	docker wait openml-test-database-setup

	echo "2. Waiting for Elasticsearch (this is the slow part)..."
	# Wait up to 5 minutes for ES to go green
	timeout 300s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-elasticsearch)" == "healthy" ]; do sleep 5; done'

	echo "3. Waiting for PHP API..."
	# Wait up to 5 minutes for PHP to accept connections
	timeout 300s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-php-rest-api)" == "healthy" ]; do sleep 5; done'

	- name: Finalize Setup & Verify Splits
	if: matrix.os == 'ubuntu-latest'
	run: \|
	echo "1. Forcing Elasticsearch Indexing Sync (With Auth)..."
	# We append the default test API Key (AD0...0) to authorized the admin action
	curl -s "http://nginx:8000/api/v1/xml/admin/index/sync?api_key=AD000000000000000000000000000000" \|\| echo "Sync request failed"

	echo "2. Waiting for Evaluation Engine to process Task 119..."
	echo "Targeting Task 119 (The primary failure point)."

	# We give it 5 minutes (300s) to handle the queue.
	count=0
	while [ $count -lt 30 ]; do
	code=$(curl -s -o /dev/null -w "%{http_code}" http://nginx:8000/api_splits/get/119/Task_119_splits.arff)
	if [ "$code" == "200" ]; then
	echo "✅ SUCCESS: Task 119 splits are ready!"
	exit 0
	fi
	echo " ... waiting for split generation (Current Status: $code)"
	sleep 10
	count=$((count+1))
	done

	echo "❌ ERROR: Evaluation Engine timed out."

	echo "=== DEBUG: LISTING LOG DIR ==="
	docker exec openml-evaluation-engine ls -R /logs/ \|\| echo "Dir empty"

	echo "=== DEBUG: DUMPING ALL LOGS ==="
	# Use wildcard to catch whatever the filename actually is
	docker exec openml-evaluation-engine sh -c "cat /logs/*.log" \|\| echo "Could not read logs"

	# Also check the cron log again, just in case
	docker exec openml-evaluation-engine cat /cron.log \|\| echo "Cron log empty"

	exit 1

	- name: Error
	working-directory: ./services
	run: \|
	echo "---------------------------------------------------"
	echo "1. PROBING: Can we reach the API at all?"
	timeout 60s bash -c 'until curl -sSf http://localhost:8000/api/v1/xml/data/1 > /dev/null; do sleep 5; done' \|\| echo "WARNING: Main API is slow/down"

	echo "---------------------------------------------------"
	echo "2. PROBING: Waiting for Task 119 Splits (The Failure Point)..."
	# We wait 60s. If it works, great. If not, we want the logs.
	timeout 60s bash -c 'until curl -sSf http://localhost:8000/api_splits/get/119/Task_119_splits.arff > /dev/null; do
	echo " ... file not ready yet"
	sleep 5
	done' \|\| echo "FAILURE: Task 119 splits were NOT generated."

	echo "---------------------------------------------------"
	echo "3.DUMPING EVALUATION ENGINE LOGS (STDOUT)"
	docker logs openml-evaluation-engine

	echo "---------------------------------------------------"
	echo "4.DUMPING INTERNAL CRON LOGS (The Hidden Logs)"
	# The engine runs via cron, so the real errors are often in this file, NOT in docker logs
	docker exec openml-evaluation-engine cat /cron.log \|\| echo "Could not read /cron.log"

	echo "---------------------------------------------------"
	echo "5.DUMPING PHP API LOGS (Why did it throw 412?)"
	docker logs openml-php-rest-api \| grep "412" -B 5 -A 5 \|\| echo "No 412 errors found in logs?"

	echo "---------------------------------------------------"
	echo "6.CHECKING NETWORK (Can the container see Nginx?)"
	# This checks if the container can actually resolve 'localhost' to the host machine
	docker exec openml-evaluation-engine curl -v http://localhost:8000/api/v1/xml/data/1 \|\| echo "Container cannot connect to localhost:8000"

	# Force fail so you see the red X and check logs
	exit 1
	- name: Verify API and Splits
	if: matrix.os == 'ubuntu-latest'
	run: \|
	echo "Checking Data API..."
	timeout 60s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/xml/data/1)" == "200" ]; do sleep 5; done'

	echo "Checking Task Splits (The 412 Killer)..."
	# If this fails, the evaluation engine is broken
	timeout 120s bash -c 'until [ "$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api_splits/get/1/Task_1_splits.arff)" == "200" ]; do
	echo "Splits not ready yet. Waiting..."
	sleep 5
	done'

	echo "System is fully operational."

	- name: Show installed dependencies
	run: python -m pip list

	- name: Run tests on Ubuntu Test
	if: matrix.os == 'ubuntu-latest'
	run: \|
	if [ "${{ matrix.code-cov }}" = "true" ]; then
	codecov="--cov=openml --long --cov-report=xml"
	fi

	if [ "${{ matrix.sklearn-only }}" = "true" ]; then
	marks="sklearn and not production"
	else
	marks="not production"
	fi

	pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"

	- name: Run tests on Ubuntu Production
	if: matrix.os == 'ubuntu-latest'
	run: \|
	if [ "${{ matrix.code-cov }}" = "true" ]; then
	codecov="--cov=openml --long --cov-report=xml"
	fi

	if [ "${{ matrix.sklearn-only }}" = "true" ]; then
	marks="sklearn and production"
	else
	marks="production"
	fi

	pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"

	- name: Run tests on Windows
	if: matrix.os == 'windows-latest'
	run: \| # we need a separate step because of the bash-specific if-statement in the previous one.
	pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not uses_test_server"

	- name: Upload coverage
	if: matrix.code-cov && always()
	uses: codecov/codecov-action@v4
	with:
	files: coverage.xml
	token: ${{ secrets.CODECOV_TOKEN }}
	fail_ci_if_error: true
	verbose: true

	- name: Cleanup Docker setup
	if: matrix.os == 'ubuntu-latest' && always()
	run: \|
	sudo rm -rf services

	- name: Check for files left behind by test
	if: matrix.os != 'windows-latest' && always()
	run: \|
	before="${{ env.BEFORE }}"
	after="$(git status --porcelain -b)"
	if [[ "$before" != "$after" ]]; then
	echo "git status from before: $before"
	echo "git status from after: $after"
	echo "Not all generated files have been deleted!"
	exit 1
	fi

	dummy_windows_py_sk024:
	name: (windows-latest, Py, sk0.24.*, sk-only:false)
	runs-on: ubuntu-latest
	steps:
	- name: Dummy step
	run: \|
	echo "This is a temporary dummy job."
	echo "Always succeeds."

	dummy_windows_py_sk023:
	name: (ubuntu-latest, Py3.8, sk0.23.1, sk-only:false)
	runs-on: ubuntu-latest
	steps:
	- name: Dummy step
	run: \|
	echo "This is a temporary dummy job."
	echo "Always succeeds."

	dummy_docker:
	name: docker
	runs-on: ubuntu-latest
	steps:
	- name: Dummy step
	run: \|
	echo "This is a temporary dummy docker job."
	echo "Always succeeds."

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[MNT] Dockerized tests for CI runs using localhost #2183

Workflow file

[MNT] Dockerized tests for CI runs using localhost #2183

Uh oh!

Workflow file for this run