Skip to content

Commit bd85ec4

Browse files
committed
fix conflicts
2 parents 8d51fea + a354167 commit bd85ec4

9 files changed

Lines changed: 390 additions & 38 deletions

File tree

openml/_api/clients/http.py

Lines changed: 226 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,29 @@
22

33
import hashlib
44
import json
5+
import logging
6+
import math
7+
import random
58
import time
6-
from collections.abc import Callable
9+
import xml
10+
from collections.abc import Callable, Mapping
711
from pathlib import Path
8-
from typing import TYPE_CHECKING, Any
12+
from typing import Any
913
from urllib.parse import urlencode, urljoin, urlparse
1014

1115
import requests
16+
import xmltodict
1217
from requests import Response
1318

1419
from openml.__version__ import __version__
15-
from openml.exceptions import OpenMLHashException
16-
17-
if TYPE_CHECKING:
18-
from openml._api.config import DelayMethod
20+
from openml._api.config import RetryPolicy
21+
from openml.exceptions import (
22+
OpenMLHashException,
23+
OpenMLNotAuthorizedError,
24+
OpenMLServerError,
25+
OpenMLServerException,
26+
OpenMLServerNoResult,
27+
)
1928

2029

2130
class HTTPCache:
@@ -111,21 +120,202 @@ def __init__( # noqa: PLR0913
111120
api_key: str,
112121
timeout: int,
113122
retries: int,
114-
delay_method: DelayMethod,
115-
delay_time: int,
123+
retry_policy: RetryPolicy,
116124
cache: HTTPCache | None = None,
117125
) -> None:
118126
self.server = server
119127
self.base_url = base_url
120128
self.api_key = api_key
121129
self.timeout = timeout
122130
self.retries = retries
123-
self.delay_method = delay_method
124-
self.delay_time = delay_time
131+
self.retry_policy = retry_policy
125132
self.cache = cache
126133

134+
self.retry_func = (
135+
self._human_delay if retry_policy == RetryPolicy.HUMAN else self._robot_delay
136+
)
127137
self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}
128138

139+
def _robot_delay(self, n: int) -> float:
140+
wait = (1 / (1 + math.exp(-(n * 0.5 - 4)))) * 60
141+
variation = random.gauss(0, wait / 10)
142+
return max(1.0, wait + variation)
143+
144+
def _human_delay(self, n: int) -> float:
145+
return max(1.0, n)
146+
147+
def _parse_exception_response(
148+
self,
149+
response: Response,
150+
) -> tuple[int | None, str]:
151+
content_type = response.headers.get("Content-Type", "").lower()
152+
153+
if "json" in content_type:
154+
server_exception = response.json()
155+
server_error = server_exception["detail"]
156+
code = server_error.get("code")
157+
message = server_error.get("message")
158+
additional_information = server_error.get("additional_information")
159+
else:
160+
server_exception = xmltodict.parse(response.text)
161+
server_error = server_exception["oml:error"]
162+
code = server_error.get("oml:code")
163+
message = server_error.get("oml:message")
164+
additional_information = server_error.get("oml:additional_information")
165+
166+
if code is not None:
167+
code = int(code)
168+
169+
if message and additional_information:
170+
full_message = f"{message} - {additional_information}"
171+
elif message:
172+
full_message = message
173+
elif additional_information:
174+
full_message = additional_information
175+
else:
176+
full_message = ""
177+
178+
return code, full_message
179+
180+
def _raise_code_specific_error(
181+
self,
182+
code: int,
183+
message: str,
184+
url: str,
185+
files: Mapping[str, Any] | None,
186+
) -> None:
187+
if code in [111, 372, 512, 500, 482, 542, 674]:
188+
# 512 for runs, 372 for datasets, 500 for flows
189+
# 482 for tasks, 542 for evaluations, 674 for setups
190+
# 111 for dataset descriptions
191+
raise OpenMLServerNoResult(code=code, message=message, url=url)
192+
193+
# 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
194+
if code in [163] and files is not None and "description" in files:
195+
# file_elements['description'] is the XML file description of the flow
196+
message = f"\n{files['description']}\n{message}"
197+
198+
if code in [
199+
102, # flow/exists post
200+
137, # dataset post
201+
350, # dataset/42 delete
202+
310, # flow/<something> post
203+
320, # flow/42 delete
204+
400, # run/42 delete
205+
460, # task/42 delete
206+
]:
207+
raise OpenMLNotAuthorizedError(
208+
message=(
209+
f"The API call {url} requires authentication via an API key.\nPlease configure "
210+
"OpenML-Python to use your API as described in this example:"
211+
"\nhttps://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
212+
)
213+
)
214+
215+
# Propagate all server errors to the calling functions, except
216+
# for 107 which represents a database connection error.
217+
# These are typically caused by high server load,
218+
# which means trying again might resolve the issue.
219+
# DATABASE_CONNECTION_ERRCODE
220+
if code != 107:
221+
raise OpenMLServerException(code=code, message=message, url=url)
222+
223+
def _validate_response(
224+
self,
225+
method: str,
226+
url: str,
227+
files: Mapping[str, Any] | None,
228+
response: Response,
229+
) -> Exception | None:
230+
if (
231+
"Content-Encoding" not in response.headers
232+
or response.headers["Content-Encoding"] != "gzip"
233+
):
234+
logging.warning(f"Received uncompressed content from OpenML for {url}.")
235+
236+
if response.status_code == 200:
237+
return None
238+
239+
if response.status_code == requests.codes.URI_TOO_LONG:
240+
raise OpenMLServerError(f"URI too long! ({url})")
241+
242+
retry_raise_e: Exception | None = None
243+
244+
try:
245+
code, message = self._parse_exception_response(response)
246+
247+
except (requests.exceptions.JSONDecodeError, xml.parsers.expat.ExpatError) as e:
248+
if method != "GET":
249+
extra = f"Status code: {response.status_code}\n{response.text}"
250+
raise OpenMLServerError(
251+
f"Unexpected server error when calling {url}. Please contact the "
252+
f"developers!\n{extra}"
253+
) from e
254+
255+
retry_raise_e = e
256+
257+
except Exception as e:
258+
# If we failed to parse it out,
259+
# then something has gone wrong in the body we have sent back
260+
# from the server and there is little extra information we can capture.
261+
raise OpenMLServerError(
262+
f"Unexpected server error when calling {url}. Please contact the developers!\n"
263+
f"Status code: {response.status_code}\n{response.text}",
264+
) from e
265+
266+
if code is not None:
267+
self._raise_code_specific_error(
268+
code=code,
269+
message=message,
270+
url=url,
271+
files=files,
272+
)
273+
274+
if retry_raise_e is None:
275+
retry_raise_e = OpenMLServerException(code=code, message=message, url=url)
276+
277+
return retry_raise_e
278+
279+
def _request( # noqa: PLR0913
280+
self,
281+
method: str,
282+
url: str,
283+
params: Mapping[str, Any],
284+
headers: Mapping[str, str],
285+
timeout: float | int,
286+
files: Mapping[str, Any] | None,
287+
**request_kwargs: Any,
288+
) -> tuple[Response | None, Exception | None]:
289+
retry_raise_e: Exception | None = None
290+
response: Response | None = None
291+
292+
try:
293+
response = requests.request(
294+
method=method,
295+
url=url,
296+
params=params,
297+
headers=headers,
298+
timeout=timeout,
299+
files=files,
300+
**request_kwargs,
301+
)
302+
except (
303+
requests.exceptions.ChunkedEncodingError,
304+
requests.exceptions.ConnectionError,
305+
requests.exceptions.SSLError,
306+
) as e:
307+
retry_raise_e = e
308+
309+
if response is not None:
310+
retry_raise_e = self._validate_response(
311+
method=method,
312+
url=url,
313+
files=files,
314+
response=response,
315+
)
316+
317+
return response, retry_raise_e
318+
129319
def request(
130320
self,
131321
method: str,
@@ -137,6 +327,7 @@ def request(
137327
**request_kwargs: Any,
138328
) -> Response:
139329
url = urljoin(self.server, urljoin(self.base_url, path))
330+
retries = max(1, self.retries)
140331

141332
# prepare params
142333
params = request_kwargs.pop("params", {}).copy()
@@ -148,6 +339,9 @@ def request(
148339
headers.update(self.headers)
149340

150341
timeout = request_kwargs.pop("timeout", self.timeout)
342+
files = request_kwargs.pop("files", None)
343+
344+
use_cache = False
151345

152346
if use_cache and self.cache is not None:
153347
cache_key = self.cache.get_key(url, params)
@@ -158,14 +352,28 @@ def request(
158352
except Exception:
159353
raise # propagate unexpected cache errors
160354

161-
response = requests.request(
162-
method=method,
163-
url=url,
164-
params=params,
165-
headers=headers,
166-
timeout=timeout,
167-
**request_kwargs,
168-
)
355+
for retry_counter in range(1, retries + 1):
356+
response, retry_raise_e = self._request(
357+
method=method,
358+
url=url,
359+
params=params,
360+
headers=headers,
361+
timeout=timeout,
362+
files=files,
363+
**request_kwargs,
364+
)
365+
366+
# executed successfully
367+
if retry_raise_e is None:
368+
break
369+
# tries completed
370+
if retry_counter >= retries:
371+
raise retry_raise_e
372+
373+
delay = self.retry_func(retry_counter)
374+
time.sleep(delay)
375+
376+
assert response is not None
169377

170378
if md5_checksum is not None:
171379
self._verify_checksum(response, md5_checksum)

openml/_api/config.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from enum import Enum
55

66

7-
class DelayMethod(str, Enum):
7+
class RetryPolicy(str, Enum):
88
HUMAN = "human"
99
ROBOT = "robot"
1010

@@ -26,8 +26,7 @@ class APISettings:
2626
@dataclass
2727
class ConnectionConfig:
2828
retries: int = 3
29-
delay_method: DelayMethod = DelayMethod.HUMAN
30-
delay_time: int = 1 # seconds
29+
retry_policy: RetryPolicy = RetryPolicy.HUMAN
3130

3231

3332
@dataclass
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from openml._api.resources.base.base import APIVersion, ResourceAPI, ResourceType
2+
from openml._api.resources.base.resources import DatasetsAPI, TasksAPI
3+
from openml._api.resources.base.versions import ResourceV1, ResourceV2
4+
5+
__all__ = [
6+
"APIVersion",
7+
"DatasetsAPI",
8+
"ResourceAPI",
9+
"ResourceType",
10+
"ResourceV1",
11+
"ResourceV2",
12+
"TasksAPI",
13+
]

openml/_api/resources/base/base.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from __future__ import annotations
2+
3+
from abc import ABC, abstractmethod
4+
from enum import Enum
5+
from typing import TYPE_CHECKING
6+
7+
if TYPE_CHECKING:
8+
from openml._api.clients import HTTPClient
9+
10+
11+
class APIVersion(str, Enum):
12+
V1 = "v1"
13+
V2 = "v2"
14+
15+
16+
class ResourceType(str, Enum):
17+
DATASET = "dataset"
18+
TASK = "task"
19+
TASK_TYPE = "task_type"
20+
EVALUATION_MEASURE = "evaluation_measure"
21+
ESTIMATION_PROCEDURE = "estimation_procedure"
22+
EVALUATION = "evaluation"
23+
FLOW = "flow"
24+
STUDY = "study"
25+
RUN = "run"
26+
SETUP = "setup"
27+
USER = "user"
28+
29+
30+
class ResourceAPI(ABC):
31+
api_version: APIVersion
32+
resource_type: ResourceType
33+
34+
def __init__(self, http: HTTPClient):
35+
self._http = http
36+
37+
def _get_not_implemented_message(self, method_name: str | None = None) -> str:
38+
version = getattr(self.api_version, "name", "Unknown version")
39+
resource = getattr(self.resource_type, "name", "Unknown resource")
40+
method_info = f" Method: {method_name}" if method_name else ""
41+
return (
42+
f"{self.__class__.__name__}: {version} API does not support this "
43+
f"functionality for resource: {resource}.{method_info}"
44+
)
45+
46+
@abstractmethod
47+
def delete(self, resource_id: int) -> bool: ...
48+
49+
@abstractmethod
50+
def publish(self) -> None: ...

0 commit comments

Comments
 (0)