Skip to content

Commit dfa0ab7

Browse files
committed
Update todos, topic endpoints
1 parent 8933cd8 commit dfa0ab7

4 files changed

Lines changed: 46 additions & 18 deletions

File tree

openml/_api/http/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def download_minio_bucket(self, source: str, destination: str | Path | None = No
267267
parsed_url = urllib.parse.urlparse(source)
268268

269269
# expect path format: /BUCKET/path/to/file.ext
270-
_, bucket, *prefixes, _file = parsed_url.path.split("/")
270+
_, bucket, *prefixes, _ = parsed_url.path.split("/")
271271
prefix = "/".join(prefixes)
272272

273273
client = minio.Minio(endpoint=parsed_url.netloc, secure=False)

openml/_api/resources/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ def download_dataset_arff(
117117
description: dict | OpenMLDataset,
118118
) -> Path: ...
119119

120+
@abstractmethod
121+
def add_topic(self, data_id: int, topic: str) -> int: ...
122+
123+
@abstractmethod
124+
def delete_topic(self, data_id: int, topic: str) -> int: ...
125+
120126

121127
class TasksAPI(ResourceAPI, ABC):
122128
@abstractmethod

openml/_api/resources/datasets.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@
2828
if TYPE_CHECKING:
2929
from requests import Response
3030

31-
import openml
3231

3332
import pandas as pd
3433
import xmltodict
3534

35+
import openml
36+
3637
logger = logging.getLogger(__name__)
3738

3839

@@ -693,6 +694,20 @@ def download_dataset_arff(
693694

694695
return output_file_path
695696

697+
def add_topic(self, data_id: int, topic: str) -> int:
698+
form_data = {"data_id": data_id, "topic": topic} # type: openml._api_calls.DATA_TYPE
699+
result_xml = openml._api_calls._perform_api_call("data/topicadd", "post", data=form_data)
700+
result = xmltodict.parse(result_xml)
701+
data_id = result["oml:data_topic"]["oml:id"]
702+
return int(data_id)
703+
704+
def delete_topic(self, data_id: int, topic: str) -> int:
705+
form_data = {"data_id": data_id, "topic": topic} # type: openml._api_calls.DATA_TYPE
706+
result_xml = openml._api_calls._perform_api_call("data/topicdelete", "post", data=form_data)
707+
result = xmltodict.parse(result_xml)
708+
data_id = result["oml:data_topic"]["oml:id"]
709+
return int(data_id)
710+
696711

697712
class DatasetsV2(DatasetsAPI):
698713
def get(
@@ -1100,3 +1115,9 @@ def download_dataset_arff(
11001115
raise e
11011116

11021117
return output_file_path
1118+
1119+
def add_topic(self, data_id: int, topic: str) -> int:
1120+
raise NotImplementedError()
1121+
1122+
def delete_topic(self, data_id: int, topic: str) -> int:
1123+
raise NotImplementedError()

openml/datasets/functions.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def list_datasets(
8080
8181
Parameters
8282
----------
83-
dataset_id : list, optional
83+
data_id : list, optional
8484
A list of data ids, to specify which datasets should be
8585
listed
8686
offset : int, optional
@@ -842,6 +842,7 @@ def data_feature_remove_ontology(data_id: int, index: int, ontology: str) -> boo
842842
return api_context.backend.datasets.feature_remove_ontology(data_id, index, ontology)
843843

844844

845+
# TODO used only in tests
845846
def _topic_add_dataset(data_id: int, topic: str) -> int:
846847
"""
847848
Adds a topic for a dataset.
@@ -858,15 +859,12 @@ def _topic_add_dataset(data_id: int, topic: str) -> int:
858859
-------
859860
Dataset id
860861
"""
861-
if not isinstance(data_id, int):
862-
raise TypeError(f"`data_id` must be of type `int`, not {type(data_id)}.")
863-
form_data = {"data_id": data_id, "topic": topic} # type: openml._api_calls.DATA_TYPE
864-
result_xml = openml._api_calls._perform_api_call("data/topicadd", "post", data=form_data)
865-
result = xmltodict.parse(result_xml)
866-
data_id = result["oml:data_topic"]["oml:id"]
867-
return int(data_id)
862+
from openml._api import api_context
863+
864+
return api_context.backend.datasets.add_topic(data_id, topic)
868865

869866

867+
# TODO used only in tests
870868
def _topic_delete_dataset(data_id: int, topic: str) -> int:
871869
"""
872870
Removes a topic from a dataset.
@@ -883,15 +881,12 @@ def _topic_delete_dataset(data_id: int, topic: str) -> int:
883881
-------
884882
Dataset id
885883
"""
886-
if not isinstance(data_id, int):
887-
raise TypeError(f"`data_id` must be of type `int`, not {type(data_id)}.")
888-
form_data = {"data_id": data_id, "topic": topic} # type: openml._api_calls.DATA_TYPE
889-
result_xml = openml._api_calls._perform_api_call("data/topicdelete", "post", data=form_data)
890-
result = xmltodict.parse(result_xml)
891-
data_id = result["oml:data_topic"]["oml:id"]
892-
return int(data_id)
884+
from openml._api import api_context
893885

886+
return api_context.backend.datasets.delete_topic(data_id, topic)
894887

888+
889+
# TODO used by tests only
895890
def _get_dataset_description(did_cache_dir: Path, dataset_id: int) -> dict[str, Any]:
896891
"""Get the dataset description as xml dictionary.
897892
@@ -935,6 +930,7 @@ def _get_dataset_description(did_cache_dir: Path, dataset_id: int) -> dict[str,
935930
return description # type: ignore
936931

937932

933+
# TODO remove cache dir
938934
def _get_dataset_parquet(
939935
description: dict | OpenMLDataset,
940936
cache_directory: Path | None = None, # noqa: ARG001
@@ -972,6 +968,7 @@ def _get_dataset_parquet(
972968
return api_context.backend.datasets.download_dataset_parquet(description, download_all_files)
973969

974970

971+
# TODO remove cache dir
975972
def _get_dataset_arff(
976973
description: dict | OpenMLDataset,
977974
cache_directory: Path | None = None, # noqa: ARG001
@@ -1003,6 +1000,7 @@ def _get_dataset_arff(
10031000
return api_context.backend.datasets.download_dataset_arff(description)
10041001

10051002

1003+
# TODO remove cache dir
10061004
def _get_dataset_features_file(
10071005
did_cache_dir: str | Path | None, # noqa: ARG001
10081006
dataset_id: int,
@@ -1033,6 +1031,7 @@ def _get_dataset_features_file(
10331031
return api_context.backend.datasets.download_features_file(dataset_id)
10341032

10351033

1034+
# TODO remove cache dir
10361035
def _get_dataset_qualities_file(
10371036
did_cache_dir: str | Path | None, # noqa: ARG001
10381037
dataset_id: int,
@@ -1060,9 +1059,10 @@ def _get_dataset_qualities_file(
10601059
# cache directory not used here anymore
10611060
from openml._api import api_context
10621061

1063-
return api_context.backend.datasets.download_features_file(dataset_id)
1062+
return api_context.backend.datasets.download_qualities_file(dataset_id)
10641063

10651064

1065+
# TODO used only in tests
10661066
def _get_online_dataset_arff(dataset_id: int) -> str | None:
10671067
"""Download the ARFF file for a given dataset id
10681068
from the OpenML website.
@@ -1085,6 +1085,7 @@ def _get_online_dataset_arff(dataset_id: int) -> str | None:
10851085
)
10861086

10871087

1088+
# TODO used only in tests
10881089
def _get_online_dataset_format(dataset_id: int) -> str:
10891090
"""Get the dataset format for a given dataset id from the OpenML website.
10901091

0 commit comments

Comments
 (0)