Skip to content

Commit 68ea96c

Browse files
committed
Merge main
2 parents ad17680 + 1f6fed4 commit 68ea96c

5 files changed

Lines changed: 95 additions & 7 deletions

File tree

CONTRIBUTING.md

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,32 @@ $env:OPENML_TEST_SERVER_ADMIN_KEY = "admin-key"
107107
export OPENML_TEST_SERVER_ADMIN_KEY="admin-key"
108108
```
109109
110+
#### Diagnosing Slow Tests
111+
112+
If you suspect a test (or the suite as a whole) is running too slowly, `pytest` already exposes everything you need to investigate it. A few invocations that are useful when looking into test runtimes:
113+
114+
```bash
115+
# Show the 20 slowest tests (use 0 to list every test's duration)
116+
pytest tests --durations=20
117+
118+
# Fail any test that exceeds the given timeout (requires pytest-timeout)
119+
pytest tests --timeout=600
120+
121+
# Investigate only fixture/setup costs without actually running the tests
122+
pytest tests --setup-only
123+
124+
# Profile a specific module, class, or test
125+
pytest tests/test_datasets/test_dataset.py --durations=0
126+
127+
# Skip the slow live-server tests while profiling locally
128+
pytest tests --durations=0 -m "not production_server and not test_server"
129+
130+
# Run the suite in parallel to reproduce CI behaviour (requires pytest-xdist)
131+
pytest tests -n 4 --dist=load --durations=0
132+
```
133+
134+
Combining these with the marker filters (`production_server`, `test_server`, `sklearn`) makes it straightforward to narrow the investigation down to the slow tests without changing project configuration.
135+
110136
### Pull Request Checklist
111137
112138
You can go to the `openml-python` GitHub repository to create the pull request by [comparing the branch](https://github.com/openml/openml-python/compare) from your fork with the `main` branch of the `openml-python` repository. When creating a pull request, make sure to follow the comments and structured provided by the template on GitHub.
@@ -214,4 +240,4 @@ When dependencies are installed, run
214240
```bash
215241
mkdocs serve
216242
```
217-
This will open a preview of the website.
243+
This will open a preview of the website.

openml/datasets/functions.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ def get_datasets(
259259
-------
260260
datasets : list of datasets
261261
A list of dataset objects.
262+
263+
Examples
264+
--------
265+
>>> import openml
266+
>>> datasets = openml.datasets.get_datasets([1, 2, 3]) # doctest: +SKIP
262267
"""
263268
datasets = []
264269
for dataset_id in dataset_ids:
@@ -341,6 +346,13 @@ def get_dataset(
341346
-------
342347
dataset : :class:`openml.OpenMLDataset`
343348
The downloaded dataset.
349+
350+
Examples
351+
--------
352+
>>> import openml
353+
>>> dataset = openml.datasets.get_dataset(1) # doctest: +SKIP
354+
>>> dataset = openml.datasets.get_dataset("iris", version=1) # doctest: +SKIP
355+
>>> dataset = openml.datasets.get_dataset(1, download_data=True) # doctest: +SKIP
344356
"""
345357
if download_all_files:
346358
warnings.warn(

openml/runs/functions.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,15 @@ def run_model_on_task( # noqa: PLR0913
103103
Result of the run.
104104
flow : OpenMLFlow (optional, only if `return_flow` is True).
105105
Flow generated from the model.
106+
107+
Examples
108+
--------
109+
>>> import openml
110+
>>> import openml_sklearn # doctest: +SKIP
111+
>>> from sklearn.tree import DecisionTreeClassifier # doctest: +SKIP
112+
>>> clf = DecisionTreeClassifier() # doctest: +SKIP
113+
>>> task = openml.tasks.get_task(6) # doctest: +SKIP
114+
>>> run = openml.runs.run_model_on_task(clf, task) # doctest: +SKIP
106115
"""
107116
if avoid_duplicate_runs is None:
108117
avoid_duplicate_runs = openml.config.avoid_duplicate_runs
@@ -558,9 +567,14 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901
558567
) # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs`
559568

560569
for n_fit, rep_no, fold_no, sample_no in jobs:
561-
pred_y, proba_y, test_indices, test_y, inner_trace, user_defined_measures_fold = job_rvals[
562-
n_fit - 1
563-
]
570+
(
571+
pred_y,
572+
proba_y,
573+
test_indices,
574+
test_y,
575+
inner_trace,
576+
user_defined_measures_fold,
577+
) = job_rvals[n_fit - 1]
564578

565579
if inner_trace is not None:
566580
traces.append(inner_trace)
@@ -845,7 +859,10 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun: # noqa: FBT0
845859
return _create_run_from_xml(run_xml)
846860

847861

848-
def _create_run_from_xml(xml: str, from_server: bool = True) -> OpenMLRun: # noqa: PLR0915, PLR0912, C901, FBT002
862+
def _create_run_from_xml( # noqa: PLR0915, PLR0912, C901
863+
xml: str,
864+
from_server: bool = True, # noqa: FBT002
865+
) -> OpenMLRun:
849866
"""Create a run object from xml returned from server.
850867
851868
Parameters

openml/study/functions.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ def get_suite(suite_id: int | str) -> OpenMLBenchmarkSuite:
2929
-------
3030
OpenMLSuite
3131
The OpenML suite object
32+
33+
Examples
34+
--------
35+
>>> import openml
36+
>>> suite = openml.study.get_suite(99) # doctest: +SKIP
37+
>>> suite = openml.study.get_suite("OpenML-CC18") # doctest: +SKIP
3238
"""
3339
study = _get_study(suite_id, entity_type="task")
3440
assert isinstance(study, OpenMLBenchmarkSuite)
@@ -58,6 +64,11 @@ def get_study(
5864
-------
5965
OpenMLStudy
6066
The OpenML study object
67+
68+
Examples
69+
--------
70+
>>> import openml
71+
>>> study = openml.study.get_study(1) # doctest: +SKIP
6172
"""
6273
if study_id == "OpenML100":
6374
message = (
@@ -108,7 +119,10 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
108119
tags = []
109120
if "oml:tag" in result_dict:
110121
for tag in result_dict["oml:tag"]:
111-
current_tag = {"name": tag["oml:name"], "write_access": tag["oml:write_access"]}
122+
current_tag = {
123+
"name": tag["oml:name"],
124+
"write_access": tag["oml:write_access"],
125+
}
112126
if "oml:window_start" in tag:
113127
current_tag["window_start"] = tag["oml:window_start"]
114128
tags.append(current_tag)
@@ -209,6 +223,15 @@ def create_study(
209223
-------
210224
OpenMLStudy
211225
A local OpenML study object (call publish method to upload to server)
226+
227+
Examples
228+
--------
229+
>>> import openml
230+
>>> study = openml.study.create_study( # doctest: +SKIP
231+
... name="My Study",
232+
... description="A study on decision trees",
233+
... run_ids=[1, 2, 3],
234+
... )
212235
"""
213236
return OpenMLStudy(
214237
study_id=None,

openml/tasks/functions.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,11 @@ def get_tasks(
338338
tasks = []
339339
for task_id in task_ids:
340340
tasks.append(
341-
get_task(task_id, download_data=download_data, download_qualities=download_qualities)
341+
get_task(
342+
task_id,
343+
download_data=download_data,
344+
download_qualities=download_qualities,
345+
)
342346
)
343347
return tasks
344348

@@ -369,6 +373,12 @@ def get_task(
369373
Returns
370374
-------
371375
task: OpenMLTask
376+
377+
Examples
378+
--------
379+
>>> import openml
380+
>>> task = openml.tasks.get_task(1) # doctest: +SKIP
381+
>>> task = openml.tasks.get_task(1, download_splits=True) # doctest: +SKIP
372382
"""
373383
if not isinstance(task_id, int):
374384
raise TypeError(f"Task id should be integer, is {type(task_id)}")

0 commit comments

Comments
 (0)