From 1e40be17ccd9baf07dd5ed6ff070afe852770a40 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 26 Feb 2026 23:02:55 +0530 Subject: [PATCH 1/6] [MNT] Diagnose and address long test runtimes (#1633) - Add global per-test timeout (600s) to pytest config - CI: report all test durations (--durations=0) for diagnosis - CI: add explicit --timeout=600 to prevent hanging tests - Optimize verify_cache_state fixture: scope function -> module - Add scripts/profile_tests.sh for local duration profiling --- .github/workflows/test.yml | 6 +++--- pyproject.toml | 1 + scripts/profile_tests.sh | 27 +++++++++++++++++++++++++++ tests/conftest.py | 10 ++++++---- 4 files changed, 37 insertions(+), 7 deletions(-) create mode 100755 scripts/profile_tests.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dc0995fc6..44fccc2e7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -153,7 +153,7 @@ jobs: marks="not production_server" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -171,14 +171,14 @@ jobs: marks="production_server" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' env: OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }} run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" + pytest -n 4 --durations=0 --timeout=600 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" - name: Upload coverage if: matrix.code-cov && always() diff --git a/pyproject.toml b/pyproject.toml index 8c463968b..91235ba04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,6 +130,7 @@ log_level="DEBUG" testpaths = ["tests"] minversion = "7.0" xfail_strict = true +timeout = 600 filterwarnings=[ "ignore:the matrix subclass:PendingDeprecationWarning" ] diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh new file mode 100755 index 000000000..593700cff --- /dev/null +++ b/scripts/profile_tests.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Profile test durations to diagnose slow tests (Issue #1633) +# Usage: ./scripts/profile_tests.sh [marker_filter] +# +# Examples: +# ./scripts/profile_tests.sh # non-server tests +# ./scripts/profile_tests.sh "production_server" # production server tests only +# ./scripts/profile_tests.sh "sklearn" # sklearn tests only + +set -euo pipefail + +MARKER_FILTER="${1:-not production_server and not test_server}" + +echo "=== OpenML Test Duration Profiler ===" +echo "Marker filter: $MARKER_FILTER" +echo "Timeout per test: 300s" +echo "" + +pytest \ + --durations=0 \ + --timeout=300 \ + -q \ + -m "$MARKER_FILTER" \ + 2>&1 | tee test_durations_report.txt + +echo "" +echo "=== Report saved to test_durations_report.txt ===" diff --git a/tests/conftest.py b/tests/conftest.py index 1359e6247..bbb486b3d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -106,7 +106,8 @@ def delete_remote_files(tracker, flow_names) -> None: if "flow" in tracker: to_sort = list(zip(tracker["flow"], flow_names)) flow_deletion_order = [ - entity_id for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) + entity_id + for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) ] tracker["flow"] = [flow_deletion_order[1] for flow_id, _ in flow_deletion_order] @@ -275,7 +276,7 @@ def test_apikey_v2() -> str: return openml.config.get_test_servers()[APIVersion.V2]["apikey"] -@pytest.fixture(autouse=True, scope="function") +@pytest.fixture(autouse=True, scope="module") def verify_cache_state(test_files_directory) -> Iterator[None]: assert_static_test_cache_correct(test_files_directory) yield @@ -324,11 +325,12 @@ def with_test_cache(test_files_directory, request): openml.config.set_root_cache_directory(_root_cache_directory) if tmp_cache.exists(): shutil.rmtree(tmp_cache) - + @pytest.fixture def static_cache_dir(): - return Path(__file__).parent / "files" + return Path(__file__).parent / "files" + @pytest.fixture def workdir(tmp_path): From 0644d2c1f901ea5e264a1b2ef91a519c7ff02794 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sun, 1 Mar 2026 21:36:51 +0530 Subject: [PATCH 2/6] Address review feedback: revert CI/conftest changes, improve profile script - Revert CI workflow to original --durations=20 (no timeout) - Remove global timeout from pyproject.toml - Revert conftest.py verify_cache_state scope to function - Update profile_tests.sh: accept CLI args (-m, -d, -t, -o) with defaults --- .github/workflows/test.yml | 6 +++--- pyproject.toml | 1 - scripts/profile_tests.sh | 44 +++++++++++++++++++++++++++++--------- tests/conftest.py | 2 +- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 44fccc2e7..dc0995fc6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -153,7 +153,7 @@ jobs: marks="not production_server" fi - pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -171,14 +171,14 @@ jobs: marks="production_server" fi - pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' env: OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }} run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=0 --timeout=600 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" + pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" - name: Upload coverage if: matrix.code-cov && always() diff --git a/pyproject.toml b/pyproject.toml index 91235ba04..8c463968b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,7 +130,6 @@ log_level="DEBUG" testpaths = ["tests"] minversion = "7.0" xfail_strict = true -timeout = 600 filterwarnings=[ "ignore:the matrix subclass:PendingDeprecationWarning" ] diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh index 593700cff..88e6f0ad7 100755 --- a/scripts/profile_tests.sh +++ b/scripts/profile_tests.sh @@ -1,27 +1,51 @@ #!/bin/bash # Profile test durations to diagnose slow tests (Issue #1633) -# Usage: ./scripts/profile_tests.sh [marker_filter] +# +# Usage: ./scripts/profile_tests.sh [options] +# +# Options: +# -m MARKER Pytest marker filter (default: "not production_server and not test_server") +# -d DURATION Number of slowest durations to show, 0 for all (default: 20) +# -t TIMEOUT Per-test timeout in seconds (default: 300) +# -o OUTPUT Output file path for the report (default: test_durations_report.txt) # # Examples: -# ./scripts/profile_tests.sh # non-server tests -# ./scripts/profile_tests.sh "production_server" # production server tests only -# ./scripts/profile_tests.sh "sklearn" # sklearn tests only +# ./scripts/profile_tests.sh +# ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600 +# ./scripts/profile_tests.sh -m "sklearn" -o sklearn_report.txt set -euo pipefail -MARKER_FILTER="${1:-not production_server and not test_server}" +# Default values +MARKER_FILTER="not production_server and not test_server" +DURATIONS=20 +TIMEOUT=300 +OUTPUT_FILE="test_durations_report.txt" + +# Parse command line arguments +while getopts "m:d:t:o:" opt; do + case $opt in + m) MARKER_FILTER="$OPTARG" ;; + d) DURATIONS="$OPTARG" ;; + t) TIMEOUT="$OPTARG" ;; + o) OUTPUT_FILE="$OPTARG" ;; + *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-o output_file]" && exit 1 ;; + esac +done echo "=== OpenML Test Duration Profiler ===" echo "Marker filter: $MARKER_FILTER" -echo "Timeout per test: 300s" +echo "Durations to show: $DURATIONS" +echo "Timeout per test: ${TIMEOUT}s" +echo "Output file: $OUTPUT_FILE" echo "" pytest \ - --durations=0 \ - --timeout=300 \ + --durations="$DURATIONS" \ + --timeout="$TIMEOUT" \ -q \ -m "$MARKER_FILTER" \ - 2>&1 | tee test_durations_report.txt + 2>&1 | tee "$OUTPUT_FILE" echo "" -echo "=== Report saved to test_durations_report.txt ===" +echo "=== Report saved to $OUTPUT_FILE ===" diff --git a/tests/conftest.py b/tests/conftest.py index bbb486b3d..03aaafe2d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -276,7 +276,7 @@ def test_apikey_v2() -> str: return openml.config.get_test_servers()[APIVersion.V2]["apikey"] -@pytest.fixture(autouse=True, scope="module") +@pytest.fixture(autouse=True, scope="function") def verify_cache_state(test_files_directory) -> Iterator[None]: assert_static_test_cache_correct(test_files_directory) yield From 37d605c9641d9b881d15ab1e2c18b3f8d5c20b2e Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sun, 1 Mar 2026 22:11:47 +0530 Subject: [PATCH 3/6] Update profile_tests.sh: add -n workers, --dist=load, remove -q - Add -n flag for parallel workers (default: 4) - Add --dist=load to distribute tests across workers - Remove -q flag for full pytest output - Mimics exact pytest command used in CI --- scripts/profile_tests.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh index 88e6f0ad7..05a8cd2fe 100755 --- a/scripts/profile_tests.sh +++ b/scripts/profile_tests.sh @@ -7,12 +7,13 @@ # -m MARKER Pytest marker filter (default: "not production_server and not test_server") # -d DURATION Number of slowest durations to show, 0 for all (default: 20) # -t TIMEOUT Per-test timeout in seconds (default: 300) +# -n WORKERS Number of parallel workers (default: 4) # -o OUTPUT Output file path for the report (default: test_durations_report.txt) # # Examples: # ./scripts/profile_tests.sh # ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600 -# ./scripts/profile_tests.sh -m "sklearn" -o sklearn_report.txt +# ./scripts/profile_tests.sh -m "sklearn" -n 2 -o sklearn_report.txt set -euo pipefail @@ -20,16 +21,18 @@ set -euo pipefail MARKER_FILTER="not production_server and not test_server" DURATIONS=20 TIMEOUT=300 +NUM_WORKERS=4 OUTPUT_FILE="test_durations_report.txt" # Parse command line arguments -while getopts "m:d:t:o:" opt; do +while getopts "m:d:t:n:o:" opt; do case $opt in m) MARKER_FILTER="$OPTARG" ;; d) DURATIONS="$OPTARG" ;; t) TIMEOUT="$OPTARG" ;; + n) NUM_WORKERS="$OPTARG" ;; o) OUTPUT_FILE="$OPTARG" ;; - *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-o output_file]" && exit 1 ;; + *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-n workers] [-o output_file]" && exit 1 ;; esac done @@ -37,13 +40,15 @@ echo "=== OpenML Test Duration Profiler ===" echo "Marker filter: $MARKER_FILTER" echo "Durations to show: $DURATIONS" echo "Timeout per test: ${TIMEOUT}s" +echo "Workers: $NUM_WORKERS" echo "Output file: $OUTPUT_FILE" echo "" pytest \ + --dist=load \ + -n="$NUM_WORKERS" \ --durations="$DURATIONS" \ --timeout="$TIMEOUT" \ - -q \ -m "$MARKER_FILTER" \ 2>&1 | tee "$OUTPUT_FILE" From 30e62827faee82e4b82abc93aba627dd71cd6f99 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Wed, 20 May 2026 20:14:51 +0530 Subject: [PATCH 4/6] Remove profile_tests.sh script Per review feedback from @PGijsbers: drop the brittle wrapper script in favor of documenting example pytest invocations directly in CONTRIBUTING.md, which keeps things flexible (e.g. --setup-only, running specific files/classes). --- scripts/profile_tests.sh | 56 ---------------------------------------- 1 file changed, 56 deletions(-) delete mode 100755 scripts/profile_tests.sh diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh deleted file mode 100755 index 05a8cd2fe..000000000 --- a/scripts/profile_tests.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -# Profile test durations to diagnose slow tests (Issue #1633) -# -# Usage: ./scripts/profile_tests.sh [options] -# -# Options: -# -m MARKER Pytest marker filter (default: "not production_server and not test_server") -# -d DURATION Number of slowest durations to show, 0 for all (default: 20) -# -t TIMEOUT Per-test timeout in seconds (default: 300) -# -n WORKERS Number of parallel workers (default: 4) -# -o OUTPUT Output file path for the report (default: test_durations_report.txt) -# -# Examples: -# ./scripts/profile_tests.sh -# ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600 -# ./scripts/profile_tests.sh -m "sklearn" -n 2 -o sklearn_report.txt - -set -euo pipefail - -# Default values -MARKER_FILTER="not production_server and not test_server" -DURATIONS=20 -TIMEOUT=300 -NUM_WORKERS=4 -OUTPUT_FILE="test_durations_report.txt" - -# Parse command line arguments -while getopts "m:d:t:n:o:" opt; do - case $opt in - m) MARKER_FILTER="$OPTARG" ;; - d) DURATIONS="$OPTARG" ;; - t) TIMEOUT="$OPTARG" ;; - n) NUM_WORKERS="$OPTARG" ;; - o) OUTPUT_FILE="$OPTARG" ;; - *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-n workers] [-o output_file]" && exit 1 ;; - esac -done - -echo "=== OpenML Test Duration Profiler ===" -echo "Marker filter: $MARKER_FILTER" -echo "Durations to show: $DURATIONS" -echo "Timeout per test: ${TIMEOUT}s" -echo "Workers: $NUM_WORKERS" -echo "Output file: $OUTPUT_FILE" -echo "" - -pytest \ - --dist=load \ - -n="$NUM_WORKERS" \ - --durations="$DURATIONS" \ - --timeout="$TIMEOUT" \ - -m "$MARKER_FILTER" \ - 2>&1 | tee "$OUTPUT_FILE" - -echo "" -echo "=== Report saved to $OUTPUT_FILE ===" From 7412079db8e19bfff7ff1d2a260a6d88be25c49c Mon Sep 17 00:00:00 2001 From: Abhishek Date: Wed, 20 May 2026 20:17:41 +0530 Subject: [PATCH 5/6] Refactor flow deletion order sorting in conftest.pyRevert formatting-only changes in tests/conftest.py Per @PGijsbers review: this PR shouldn't touch conftest.py since the changes were purely formatting. Restoring the file to match main. --- tests/conftest.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 03aaafe2d..1359e6247 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -106,8 +106,7 @@ def delete_remote_files(tracker, flow_names) -> None: if "flow" in tracker: to_sort = list(zip(tracker["flow"], flow_names)) flow_deletion_order = [ - entity_id - for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) + entity_id for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) ] tracker["flow"] = [flow_deletion_order[1] for flow_id, _ in flow_deletion_order] @@ -325,12 +324,11 @@ def with_test_cache(test_files_directory, request): openml.config.set_root_cache_directory(_root_cache_directory) if tmp_cache.exists(): shutil.rmtree(tmp_cache) - + @pytest.fixture def static_cache_dir(): - return Path(__file__).parent / "files" - + return Path(__file__).parent / "files" @pytest.fixture def workdir(tmp_path): From c5b905df4cb20b65163c8a94b20a10b4d1a299e8 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Wed, 20 May 2026 20:20:59 +0530 Subject: [PATCH 6/6] Add diagnostics for slow tests in CONTRIBUTING.mdDocument how to diagnose slow tests in CONTRIBUTING.md Added section on diagnosing slow tests with pytest.Following @PGijsbers's suggestion, add a 'Diagnosing Slow Tests' subsection to the testing documentation with example pytest invocations (--durations, --timeout, --setup-only, marker filters, xdist) instead of shipping a dedicated wrapper script. This keeps the setup flexible: contributors can scope their investigation to specific files/classes or use any pytest argument they need. --- CONTRIBUTING.md | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d194525ef..38b13825b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -107,6 +107,32 @@ $env:OPENML_TEST_SERVER_ADMIN_KEY = "admin-key" export OPENML_TEST_SERVER_ADMIN_KEY="admin-key" ``` +#### Diagnosing Slow Tests + +If you suspect a test (or the suite as a whole) is running too slowly, `pytest` already exposes everything you need to investigate it. A few invocations that are useful when looking into test runtimes: + +```bash +# Show the 20 slowest tests (use 0 to list every test's duration) +pytest tests --durations=20 + +# Fail any test that exceeds the given timeout (requires pytest-timeout) +pytest tests --timeout=600 + +# Investigate only fixture/setup costs without actually running the tests +pytest tests --setup-only + +# Profile a specific module, class, or test +pytest tests/test_datasets/test_dataset.py --durations=0 + +# Skip the slow live-server tests while profiling locally +pytest tests --durations=0 -m "not production_server and not test_server" + +# Run the suite in parallel to reproduce CI behaviour (requires pytest-xdist) +pytest tests -n 4 --dist=load --durations=0 +``` + +Combining these with the marker filters (`production_server`, `test_server`, `sklearn`) makes it straightforward to narrow the investigation down to the slow tests without changing project configuration. + ### Pull Request Checklist You can go to the `openml-python` GitHub repository to create the pull request by [comparing the branch](https://github.com/openml/openml-python/compare) from your fork with the `main` branch of the `openml-python` repository. When creating a pull request, make sure to follow the comments and structured provided by the template on GitHub. @@ -214,4 +240,4 @@ When dependencies are installed, run ```bash mkdocs serve ``` -This will open a preview of the website. \ No newline at end of file +This will open a preview of the website.