From cfca589663100fa1a4b098fb87d1a160ca987261 Mon Sep 17 00:00:00 2001 From: Adrian Stritzinger Date: Thu, 31 Jul 2025 18:06:01 +0200 Subject: [PATCH] fix(models): auto-retry 502 errors on AskUI Inference API & fix retrying logic - 3 instead of 2 attempts - 30s, 60s, 120s wait between attempts --- src/askui/models/askui/google_genai_api.py | 19 +++++++++++++++++++ src/askui/models/askui/inference_api.py | 12 ++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/askui/models/askui/google_genai_api.py b/src/askui/models/askui/google_genai_api.py index 27af00df..f932bcb0 100644 --- a/src/askui/models/askui/google_genai_api.py +++ b/src/askui/models/askui/google_genai_api.py @@ -3,7 +3,9 @@ import google.genai as genai from google.genai import types as genai_types +from google.genai.errors import APIError from pydantic import ValidationError +from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential from typing_extensions import override from askui.logger import logger @@ -18,6 +20,17 @@ ASKUI_MODEL_CHOICE_PREFIX_LEN = len(ASKUI_MODEL_CHOICE_PREFIX) +def _is_retryable_error(exception: BaseException) -> bool: + """Check if the exception is a retryable error (status codes 429, 502, or 529). + + The 502 status of the AskUI Inference API is usually temporary which is why we also + retry it. + """ + if isinstance(exception, APIError): + return exception.code in (429, 502, 529) + return False + + def _extract_model_id(model_choice: str) -> str: if model_choice == ModelName.ASKUI: return ModelName.GEMINI__2_5__FLASH @@ -40,6 +53,12 @@ def __init__(self, settings: AskUiInferenceApiSettings | None = None) -> None: ), ) + @retry( + stop=stop_after_attempt(4), # 3 retries + wait=wait_exponential(multiplier=30, min=30, max=120), # 30s, 60s, 120s + retry=retry_if_exception(_is_retryable_error), + reraise=True, + ) @override def get( self, diff --git a/src/askui/models/askui/inference_api.py b/src/askui/models/askui/inference_api.py index e01d3e6e..d40c5150 100644 --- a/src/askui/models/askui/inference_api.py +++ b/src/askui/models/askui/inference_api.py @@ -32,9 +32,13 @@ def _is_retryable_error(exception: BaseException) -> bool: - """Check if the exception is a retryable error (status codes 429 or 529).""" + """Check if the exception is a retryable error (status codes 429, 502, or 529). + + The 502 status of the AskUI Inference API is usually temporary which is why we also + retry it. + """ if isinstance(exception, httpx.HTTPStatusError): - return exception.response.status_code in (429, 529) + return exception.response.status_code in (429, 502, 529) return False @@ -120,8 +124,8 @@ def _client(self) -> httpx.Client: ) @retry( - stop=stop_after_attempt(3), - wait=wait_exponential(multiplier=1, min=30, max=240), + stop=stop_after_attempt(4), # 3 retries + wait=wait_exponential(multiplier=30, min=30, max=120), # 30s, 60s, 120s retry=retry_if_exception(_is_retryable_error), reraise=True, )