Skip to content

Commit 8d51fea

Browse files
committed
Fixes some tests
1 parent d789e83 commit 8d51fea

6 files changed

Lines changed: 46 additions & 29 deletions

File tree

openml/_api/clients/http.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from requests import Response
1313

1414
from openml.__version__ import __version__
15+
from openml.exceptions import OpenMLHashException
1516

1617
if TYPE_CHECKING:
1718
from openml._api.config import DelayMethod
@@ -132,7 +133,7 @@ def request(
132133
*,
133134
use_cache: bool = False,
134135
use_api_key: bool = False,
135-
md5_checksum: str | None,
136+
md5_checksum: str | None = None,
136137
**request_kwargs: Any,
137138
) -> Response:
138139
url = urljoin(self.server, urljoin(self.base_url, path))
@@ -178,7 +179,10 @@ def _verify_checksum(self, response: Response, md5_checksum: str) -> None:
178179
# ruff sees hashlib.md5 as insecure
179180
actual = hashlib.md5(response.content).hexdigest() # noqa: S324
180181
if actual != md5_checksum:
181-
raise ValueError(f"MD5 checksum mismatch: expected {md5_checksum}, got {actual}")
182+
raise OpenMLHashException(
183+
"Checksum of downloaded file is unequal to the expected checksum {md5_checksum} "
184+
f"when downloading {response.url}.",
185+
)
182186

183187
def get(
184188
self,

openml/datasets/dataset.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,8 @@ def _to_dict(self) -> dict[str, dict]:
992992
}
993993

994994

995-
def _read_features(features_file: Path) -> dict[int, OpenMLDataFeature]:
995+
def _read_features(features_file: str | Path) -> dict[int, OpenMLDataFeature]:
996+
features_file = Path(features_file)
996997
features_pickle_file = Path(_get_features_pickle_file(str(features_file)))
997998
try:
998999
with features_pickle_file.open("rb") as fh_binary:
@@ -1001,7 +1002,12 @@ def _read_features(features_file: Path) -> dict[int, OpenMLDataFeature]:
10011002
except: # noqa: E722
10021003
from openml._api import api_context
10031004

1004-
return api_context.backend.datasets.parse_features_file(features_file, features_pickle_file)
1005+
features = api_context.backend.datasets.parse_features_file(
1006+
features_file, features_pickle_file
1007+
)
1008+
with features_pickle_file.open("wb") as fh_binary:
1009+
pickle.dump(features, fh_binary)
1010+
return features
10051011

10061012

10071013
# TODO(eddiebergman): Should this really exist?
@@ -1025,6 +1031,9 @@ def _read_qualities(qualities_file: str | Path) -> dict[str, float]:
10251031
except: # noqa: E722
10261032
from openml._api import api_context
10271033

1028-
return api_context.backend.datasets.parse_qualities_file(
1034+
qualities = api_context.backend.datasets.parse_qualities_file(
10291035
qualities_file, qualities_pickle_file
10301036
)
1037+
with qualities_pickle_file.open("wb") as fh_binary:
1038+
pickle.dump(qualities, fh_binary)
1039+
return qualities

openml/datasets/functions.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,6 @@ def _get_dataset_arff(
10051005

10061006
# TODO remove cache dir
10071007
def _get_dataset_features_file(
1008-
did_cache_dir: str | Path | None, # noqa: ARG001
10091008
dataset_id: int,
10101009
) -> Path:
10111010
"""API call to load dataset features. Loads from cache or downloads them.
@@ -1035,7 +1034,6 @@ def _get_dataset_features_file(
10351034

10361035
# TODO remove cache dir
10371036
def _get_dataset_qualities_file(
1038-
did_cache_dir: str | Path | None, # noqa: ARG001
10391037
dataset_id: int,
10401038
) -> Path | None:
10411039
"""Get the path for the dataset qualities file, or None if no qualities exist.

tests/test_api/test_datasets.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from __future__ import annotations
2+
3+
import pytest
4+
import pandas as pd
5+
import requests
6+
from openml.testing import TestBase
7+
from openml._api import api_context
8+
from openml._api.resources.datasets import DatasetsV1, DatasetsV2
9+
10+
class TestDatasetsEndpoints(TestBase):
11+
def setUp(self):
12+
super().setUp()
13+
self.v1_api = DatasetsV1(
14+
api_context.backend.datasets._http,
15+
api_context.backend.datasets._minio
16+
)
17+
self.v2_api = DatasetsV2(
18+
api_context.backend.datasets._http,
19+
api_context.backend.datasets._minio
20+
)
21+

tests/test_datasets/test_dataset.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -466,16 +466,3 @@ def test__read_qualities(static_cache_dir, workdir, mocker):
466466
assert pickle_mock.dump.call_count == 1
467467

468468

469-
470-
def test__check_qualities():
471-
qualities = [{"oml:name": "a", "oml:value": "0.5"}]
472-
qualities = openml.datasets.dataset._check_qualities(qualities)
473-
assert qualities["a"] == 0.5
474-
475-
qualities = [{"oml:name": "a", "oml:value": "null"}]
476-
qualities = openml.datasets.dataset._check_qualities(qualities)
477-
assert qualities["a"] != qualities["a"]
478-
479-
qualities = [{"oml:name": "a", "oml:value": None}]
480-
qualities = openml.datasets.dataset._check_qualities(qualities)
481-
assert qualities["a"] != qualities["a"]

tests/test_datasets/test_dataset_functions.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,8 @@ def test__getarff_md5_issue(self):
441441

442442
self.assertRaisesRegex(
443443
OpenMLHashException,
444-
"Checksum of downloaded file is unequal to the expected checksum abc when downloading "
445-
"https://www.openml.org/data/download/61. Raised when downloading dataset 5.",
444+
"Checksum of downloaded file is unequal to the expected checksum abc "
445+
"when downloading https://www.openml.org/data/download/61.",
446446
_get_dataset_arff,
447447
description,
448448
)
@@ -451,17 +451,15 @@ def test__getarff_md5_issue(self):
451451

452452
@pytest.mark.uses_test_server()
453453
def test__get_dataset_features(self):
454-
features_file = _get_dataset_features_file(self.workdir, 2)
454+
features_file = _get_dataset_features_file(2)
455455
assert isinstance(features_file, Path)
456-
features_xml_path = self.workdir / "features.xml"
457-
assert features_xml_path.exists()
456+
assert features_file.exists()
458457

459458
@pytest.mark.uses_test_server()
460459
def test__get_dataset_qualities(self):
461-
qualities = _get_dataset_qualities_file(self.workdir, 2)
460+
qualities = _get_dataset_qualities_file(2)
462461
assert isinstance(qualities, Path)
463-
qualities_xml_path = self.workdir / "qualities.xml"
464-
assert qualities_xml_path.exists()
462+
assert qualities.exists()
465463

466464
@pytest.mark.uses_test_server()
467465
def test_get_dataset_force_refresh_cache(self):
@@ -565,7 +563,7 @@ def test__retrieve_class_labels(self):
565563
labels = openml.datasets.get_dataset(2).retrieve_class_labels(
566564
target_name="product-type",
567565
)
568-
assert labels == ["C", "H", "G"]
566+
assert labels == ["C", "G", "H"]
569567

570568
# Test workaround for string-typed class labels
571569
custom_ds = openml.datasets.get_dataset(2)

0 commit comments

Comments
 (0)