22
33import hashlib
44import json
5+ import logging
6+ import math
7+ import random
58import time
6- from collections .abc import Callable
9+ import xml
10+ from collections .abc import Callable , Mapping
711from pathlib import Path
8- from typing import TYPE_CHECKING , Any
12+ from typing import Any
913from urllib .parse import urlencode , urljoin , urlparse
1014
1115import requests
16+ import xmltodict
1217from requests import Response
1318
1419from openml .__version__ import __version__
15- from openml .exceptions import OpenMLHashException
16-
17- if TYPE_CHECKING :
18- from openml ._api .config import DelayMethod
20+ from openml ._api .config import RetryPolicy
21+ from openml .exceptions import (
22+ OpenMLHashException ,
23+ OpenMLNotAuthorizedError ,
24+ OpenMLServerError ,
25+ OpenMLServerException ,
26+ OpenMLServerNoResult ,
27+ )
1928
2029
2130class HTTPCache :
@@ -111,21 +120,202 @@ def __init__( # noqa: PLR0913
111120 api_key : str ,
112121 timeout : int ,
113122 retries : int ,
114- delay_method : DelayMethod ,
115- delay_time : int ,
123+ retry_policy : RetryPolicy ,
116124 cache : HTTPCache | None = None ,
117125 ) -> None :
118126 self .server = server
119127 self .base_url = base_url
120128 self .api_key = api_key
121129 self .timeout = timeout
122130 self .retries = retries
123- self .delay_method = delay_method
124- self .delay_time = delay_time
131+ self .retry_policy = retry_policy
125132 self .cache = cache
126133
134+ self .retry_func = (
135+ self ._human_delay if retry_policy == RetryPolicy .HUMAN else self ._robot_delay
136+ )
127137 self .headers : dict [str , str ] = {"user-agent" : f"openml-python/{ __version__ } " }
128138
139+ def _robot_delay (self , n : int ) -> float :
140+ wait = (1 / (1 + math .exp (- (n * 0.5 - 4 )))) * 60
141+ variation = random .gauss (0 , wait / 10 )
142+ return max (1.0 , wait + variation )
143+
144+ def _human_delay (self , n : int ) -> float :
145+ return max (1.0 , n )
146+
147+ def _parse_exception_response (
148+ self ,
149+ response : Response ,
150+ ) -> tuple [int | None , str ]:
151+ content_type = response .headers .get ("Content-Type" , "" ).lower ()
152+
153+ if "json" in content_type :
154+ server_exception = response .json ()
155+ server_error = server_exception ["detail" ]
156+ code = server_error .get ("code" )
157+ message = server_error .get ("message" )
158+ additional_information = server_error .get ("additional_information" )
159+ else :
160+ server_exception = xmltodict .parse (response .text )
161+ server_error = server_exception ["oml:error" ]
162+ code = server_error .get ("oml:code" )
163+ message = server_error .get ("oml:message" )
164+ additional_information = server_error .get ("oml:additional_information" )
165+
166+ if code is not None :
167+ code = int (code )
168+
169+ if message and additional_information :
170+ full_message = f"{ message } - { additional_information } "
171+ elif message :
172+ full_message = message
173+ elif additional_information :
174+ full_message = additional_information
175+ else :
176+ full_message = ""
177+
178+ return code , full_message
179+
180+ def _raise_code_specific_error (
181+ self ,
182+ code : int ,
183+ message : str ,
184+ url : str ,
185+ files : Mapping [str , Any ] | None ,
186+ ) -> None :
187+ if code in [111 , 372 , 512 , 500 , 482 , 542 , 674 ]:
188+ # 512 for runs, 372 for datasets, 500 for flows
189+ # 482 for tasks, 542 for evaluations, 674 for setups
190+ # 111 for dataset descriptions
191+ raise OpenMLServerNoResult (code = code , message = message , url = url )
192+
193+ # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
194+ if code in [163 ] and files is not None and "description" in files :
195+ # file_elements['description'] is the XML file description of the flow
196+ message = f"\n { files ['description' ]} \n { message } "
197+
198+ if code in [
199+ 102 , # flow/exists post
200+ 137 , # dataset post
201+ 350 , # dataset/42 delete
202+ 310 , # flow/<something> post
203+ 320 , # flow/42 delete
204+ 400 , # run/42 delete
205+ 460 , # task/42 delete
206+ ]:
207+ raise OpenMLNotAuthorizedError (
208+ message = (
209+ f"The API call { url } requires authentication via an API key.\n Please configure "
210+ "OpenML-Python to use your API as described in this example:"
211+ "\n https://openml.github.io/openml-python/latest/examples/Basics/introduction_tutorial/#authentication"
212+ )
213+ )
214+
215+ # Propagate all server errors to the calling functions, except
216+ # for 107 which represents a database connection error.
217+ # These are typically caused by high server load,
218+ # which means trying again might resolve the issue.
219+ # DATABASE_CONNECTION_ERRCODE
220+ if code != 107 :
221+ raise OpenMLServerException (code = code , message = message , url = url )
222+
223+ def _validate_response (
224+ self ,
225+ method : str ,
226+ url : str ,
227+ files : Mapping [str , Any ] | None ,
228+ response : Response ,
229+ ) -> Exception | None :
230+ if (
231+ "Content-Encoding" not in response .headers
232+ or response .headers ["Content-Encoding" ] != "gzip"
233+ ):
234+ logging .warning (f"Received uncompressed content from OpenML for { url } ." )
235+
236+ if response .status_code == 200 :
237+ return None
238+
239+ if response .status_code == requests .codes .URI_TOO_LONG :
240+ raise OpenMLServerError (f"URI too long! ({ url } )" )
241+
242+ retry_raise_e : Exception | None = None
243+
244+ try :
245+ code , message = self ._parse_exception_response (response )
246+
247+ except (requests .exceptions .JSONDecodeError , xml .parsers .expat .ExpatError ) as e :
248+ if method != "GET" :
249+ extra = f"Status code: { response .status_code } \n { response .text } "
250+ raise OpenMLServerError (
251+ f"Unexpected server error when calling { url } . Please contact the "
252+ f"developers!\n { extra } "
253+ ) from e
254+
255+ retry_raise_e = e
256+
257+ except Exception as e :
258+ # If we failed to parse it out,
259+ # then something has gone wrong in the body we have sent back
260+ # from the server and there is little extra information we can capture.
261+ raise OpenMLServerError (
262+ f"Unexpected server error when calling { url } . Please contact the developers!\n "
263+ f"Status code: { response .status_code } \n { response .text } " ,
264+ ) from e
265+
266+ if code is not None :
267+ self ._raise_code_specific_error (
268+ code = code ,
269+ message = message ,
270+ url = url ,
271+ files = files ,
272+ )
273+
274+ if retry_raise_e is None :
275+ retry_raise_e = OpenMLServerException (code = code , message = message , url = url )
276+
277+ return retry_raise_e
278+
279+ def _request ( # noqa: PLR0913
280+ self ,
281+ method : str ,
282+ url : str ,
283+ params : Mapping [str , Any ],
284+ headers : Mapping [str , str ],
285+ timeout : float | int ,
286+ files : Mapping [str , Any ] | None ,
287+ ** request_kwargs : Any ,
288+ ) -> tuple [Response | None , Exception | None ]:
289+ retry_raise_e : Exception | None = None
290+ response : Response | None = None
291+
292+ try :
293+ response = requests .request (
294+ method = method ,
295+ url = url ,
296+ params = params ,
297+ headers = headers ,
298+ timeout = timeout ,
299+ files = files ,
300+ ** request_kwargs ,
301+ )
302+ except (
303+ requests .exceptions .ChunkedEncodingError ,
304+ requests .exceptions .ConnectionError ,
305+ requests .exceptions .SSLError ,
306+ ) as e :
307+ retry_raise_e = e
308+
309+ if response is not None :
310+ retry_raise_e = self ._validate_response (
311+ method = method ,
312+ url = url ,
313+ files = files ,
314+ response = response ,
315+ )
316+
317+ return response , retry_raise_e
318+
129319 def request (
130320 self ,
131321 method : str ,
@@ -137,6 +327,7 @@ def request(
137327 ** request_kwargs : Any ,
138328 ) -> Response :
139329 url = urljoin (self .server , urljoin (self .base_url , path ))
330+ retries = max (1 , self .retries )
140331
141332 # prepare params
142333 params = request_kwargs .pop ("params" , {}).copy ()
@@ -148,6 +339,9 @@ def request(
148339 headers .update (self .headers )
149340
150341 timeout = request_kwargs .pop ("timeout" , self .timeout )
342+ files = request_kwargs .pop ("files" , None )
343+
344+ use_cache = False
151345
152346 if use_cache and self .cache is not None :
153347 cache_key = self .cache .get_key (url , params )
@@ -158,14 +352,28 @@ def request(
158352 except Exception :
159353 raise # propagate unexpected cache errors
160354
161- response = requests .request (
162- method = method ,
163- url = url ,
164- params = params ,
165- headers = headers ,
166- timeout = timeout ,
167- ** request_kwargs ,
168- )
355+ for retry_counter in range (1 , retries + 1 ):
356+ response , retry_raise_e = self ._request (
357+ method = method ,
358+ url = url ,
359+ params = params ,
360+ headers = headers ,
361+ timeout = timeout ,
362+ files = files ,
363+ ** request_kwargs ,
364+ )
365+
366+ # executed successfully
367+ if retry_raise_e is None :
368+ break
369+ # tries completed
370+ if retry_counter >= retries :
371+ raise retry_raise_e
372+
373+ delay = self .retry_func (retry_counter )
374+ time .sleep (delay )
375+
376+ assert response is not None
169377
170378 if md5_checksum is not None :
171379 self ._verify_checksum (response , md5_checksum )
0 commit comments