Merge main

JATAYU000 · JATAYU000 · commit 68ea96c7b0d9 · 2026-05-23T16:53:22.000+05:30
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -107,6 +107,32 @@ $env:OPENML_TEST_SERVER_ADMIN_KEY = "admin-key"
 export OPENML_TEST_SERVER_ADMIN_KEY="admin-key"
 ```
 
+#### Diagnosing Slow Tests
+
+If you suspect a test (or the suite as a whole) is running too slowly, `pytest` already exposes everything you need to investigate it. A few invocations that are useful when looking into test runtimes:
+
+```bash
+# Show the 20 slowest tests (use 0 to list every test's duration)
+pytest tests --durations=20
+
+# Fail any test that exceeds the given timeout (requires pytest-timeout)
+pytest tests --timeout=600
+
+# Investigate only fixture/setup costs without actually running the tests
+pytest tests --setup-only
+
+# Profile a specific module, class, or test
+pytest tests/test_datasets/test_dataset.py --durations=0
+
+# Skip the slow live-server tests while profiling locally
+pytest tests --durations=0 -m "not production_server and not test_server"
+
+# Run the suite in parallel to reproduce CI behaviour (requires pytest-xdist)
+pytest tests -n 4 --dist=load --durations=0
+```
+
+Combining these with the marker filters (`production_server`, `test_server`, `sklearn`) makes it straightforward to narrow the investigation down to the slow tests without changing project configuration.
+
 ### Pull Request Checklist
 
 You can go to the `openml-python` GitHub repository to create the pull request by [comparing the branch](https://github.com/openml/openml-python/compare) from your fork with the `main` branch of the `openml-python` repository. When creating a pull request, make sure to follow the comments and structured provided by the template on GitHub.
@@ -214,4 +240,4 @@ When dependencies are installed, run
 ```bash
 mkdocs serve
 ```
-This will open a preview of the website.
+This will open a preview of the website.
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -259,6 +259,11 @@ def get_datasets(
     -------
     datasets : list of datasets
         A list of dataset objects.
+
+    Examples
+    --------
+    >>> import openml
+    >>> datasets = openml.datasets.get_datasets([1, 2, 3])  # doctest: +SKIP
     """
     datasets = []
     for dataset_id in dataset_ids:
@@ -341,6 +346,13 @@ def get_dataset(
     -------
     dataset : :class:`openml.OpenMLDataset`
         The downloaded dataset.
+
+    Examples
+    --------
+    >>> import openml
+    >>> dataset = openml.datasets.get_dataset(1)  # doctest: +SKIP
+    >>> dataset = openml.datasets.get_dataset("iris", version=1)  # doctest: +SKIP
+    >>> dataset = openml.datasets.get_dataset(1, download_data=True)  # doctest: +SKIP
     """
     if download_all_files:
         warnings.warn(
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
@@ -103,6 +103,15 @@ def run_model_on_task(  # noqa: PLR0913
         Result of the run.
     flow : OpenMLFlow (optional, only if `return_flow` is True).
         Flow generated from the model.
+
+    Examples
+    --------
+    >>> import openml
+    >>> import openml_sklearn  # doctest: +SKIP
+    >>> from sklearn.tree import DecisionTreeClassifier  # doctest: +SKIP
+    >>> clf = DecisionTreeClassifier()  # doctest: +SKIP
+    >>> task = openml.tasks.get_task(6)  # doctest: +SKIP
+    >>> run = openml.runs.run_model_on_task(clf, task)  # doctest: +SKIP
     """
     if avoid_duplicate_runs is None:
         avoid_duplicate_runs = openml.config.avoid_duplicate_runs
@@ -558,9 +567,14 @@ def _run_task_get_arffcontent(  # noqa: PLR0915, PLR0912, C901
     )  # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs`
 
     for n_fit, rep_no, fold_no, sample_no in jobs:
-        pred_y, proba_y, test_indices, test_y, inner_trace, user_defined_measures_fold = job_rvals[
-            n_fit - 1
-        ]
+        (
+            pred_y,
+            proba_y,
+            test_indices,
+            test_y,
+            inner_trace,
+            user_defined_measures_fold,
+        ) = job_rvals[n_fit - 1]
 
         if inner_trace is not None:
             traces.append(inner_trace)
@@ -845,7 +859,10 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     return _create_run_from_xml(run_xml)
 
 
-def _create_run_from_xml(xml: str, from_server: bool = True) -> OpenMLRun:  # noqa: PLR0915, PLR0912, C901, FBT002
+def _create_run_from_xml(  # noqa: PLR0915, PLR0912, C901
+    xml: str,
+    from_server: bool = True,  # noqa: FBT002
+) -> OpenMLRun:
     """Create a run object from xml returned from server.
 
     Parameters
diff --git a/openml/study/functions.py b/openml/study/functions.py
@@ -29,6 +29,12 @@ def get_suite(suite_id: int | str) -> OpenMLBenchmarkSuite:
     -------
     OpenMLSuite
         The OpenML suite object
+
+    Examples
+    --------
+    >>> import openml
+    >>> suite = openml.study.get_suite(99)  # doctest: +SKIP
+    >>> suite = openml.study.get_suite("OpenML-CC18")  # doctest: +SKIP
     """
     study = _get_study(suite_id, entity_type="task")
     assert isinstance(study, OpenMLBenchmarkSuite)
@@ -58,6 +64,11 @@ def get_study(
     -------
     OpenMLStudy
         The OpenML study object
+
+    Examples
+    --------
+    >>> import openml
+    >>> study = openml.study.get_study(1)  # doctest: +SKIP
     """
     if study_id == "OpenML100":
         message = (
@@ -108,7 +119,10 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
     tags = []
     if "oml:tag" in result_dict:
         for tag in result_dict["oml:tag"]:
-            current_tag = {"name": tag["oml:name"], "write_access": tag["oml:write_access"]}
+            current_tag = {
+                "name": tag["oml:name"],
+                "write_access": tag["oml:write_access"],
+            }
             if "oml:window_start" in tag:
                 current_tag["window_start"] = tag["oml:window_start"]
             tags.append(current_tag)
@@ -209,6 +223,15 @@ def create_study(
     -------
     OpenMLStudy
         A local OpenML study object (call publish method to upload to server)
+
+    Examples
+    --------
+    >>> import openml
+    >>> study = openml.study.create_study(  # doctest: +SKIP
+    ...     name="My Study",
+    ...     description="A study on decision trees",
+    ...     run_ids=[1, 2, 3],
+    ... )
     """
     return OpenMLStudy(
         study_id=None,
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -338,7 +338,11 @@ def get_tasks(
     tasks = []
     for task_id in task_ids:
         tasks.append(
-            get_task(task_id, download_data=download_data, download_qualities=download_qualities)
+            get_task(
+                task_id,
+                download_data=download_data,
+                download_qualities=download_qualities,
+            )
         )
     return tasks
 
@@ -369,6 +373,12 @@ def get_task(
     Returns
     -------
     task: OpenMLTask
+
+    Examples
+    --------
+    >>> import openml
+    >>> task = openml.tasks.get_task(1)  # doctest: +SKIP
+    >>> task = openml.tasks.get_task(1, download_splits=True)  # doctest: +SKIP
     """
     if not isinstance(task_id, int):
         raise TypeError(f"Task id should be integer, is {type(task_id)}")