From 9351189e5fc0f82e90d40962acfc12630a0cda44 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Tue, 16 Jun 2026 12:53:47 +0200 Subject: [PATCH 01/10] feat: Add apify.errors domain-level error taxonomy --- src/apify/_utils.py | 1 + src/apify/errors.py | 158 +++++++++++++++++++++ tests/unit/actor/test_configuration.py | 18 --- tests/unit/test_errors.py | 184 +++++++++++++++++++++++++ website/docusaurus.config.js | 1 + 5 files changed, 344 insertions(+), 18 deletions(-) create mode 100644 src/apify/errors.py create mode 100644 tests/unit/test_errors.py diff --git a/src/apify/_utils.py b/src/apify/_utils.py index 8469ae97b..097795e83 100644 --- a/src/apify/_utils.py +++ b/src/apify/_utils.py @@ -74,6 +74,7 @@ def is_running_in_ipython() -> bool: 'Actor', 'Charging', 'Configuration', + 'Errors', 'Event data', 'Event managers', 'Events', diff --git a/src/apify/errors.py b/src/apify/errors.py new file mode 100644 index 000000000..786d985b1 --- /dev/null +++ b/src/apify/errors.py @@ -0,0 +1,158 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from apify_client.errors import ForbiddenError as _ForbiddenError +from apify_client.errors import InvalidRequestError as _InvalidRequestError +from apify_client.errors import RateLimitError as _RateLimitError +from apify_client.errors import ServerError as _ServerError +from apify_client.errors import UnauthorizedError as _UnauthorizedError + +from apify._utils import docs_group + +if TYPE_CHECKING: + from apify_client._models import Run + + +@docs_group('Errors') +class ActorError(Exception): + """Base class for all domain-level Apify SDK errors. + + Carries a machine-readable `code` and a `retryable` flag so callers can branch on a failure without reading + the human-readable error message. + """ + + code: str = 'actor-error' + """Stable, machine-readable identifier of the error category.""" + + retryable: bool = False + """Whether retrying the same operation might succeed (e.g. a transient rate limit or server error).""" + + def __init__( + self, + message: str | None = None, + *, + code: str | None = None, + retryable: bool | None = None, + ) -> None: + super().__init__(message) + if code is not None: + self.code = code + if retryable is not None: + self.retryable = retryable + + @classmethod + def from_client_error(cls, error: Exception) -> ActorError: + """Map an `apify_client` exception to the matching domain-level error. + + The mapping is driven by the client's typed, HTTP-status-based exceptions. Unmapped client errors (and any + other exception) fall back to a plain `ActorError`. The original exception is not chained automatically; + callers should use `raise ActorError.from_client_error(err) from err`. + + Args: + error: The exception raised by `apify_client`. + + Returns: + The corresponding domain-level error. + """ + if isinstance(error, (_UnauthorizedError, _ForbiddenError)): + return ActorAuthenticationError(str(error)) + + if isinstance(error, _RateLimitError): + return ActorRateLimitError(str(error)) + + if isinstance(error, _ServerError): + return ActorError(str(error), retryable=True) + + if isinstance(error, _InvalidRequestError): + return ActorInputValidationError(str(error)) + + return ActorError(str(error)) + + +@docs_group('Errors') +class ActorRunError(ActorError): + """Raised when an Actor run reaches a terminal failure state (e.g. `FAILED` or `ABORTED`). + + Unlike the HTTP-derived errors, this one is derived from the run itself, so it exposes the run metadata needed + to decide what to do next. + """ + + code = 'actor-run-failed' + + def __init__(self, run: Run) -> None: + self.run_id = run.id + self.status = run.status + self.exit_code = run.exit_code + self.status_message = run.status_message + + message = f'Actor run {run.id!r} ended with status {run.status!r}' + if run.status_message: + message = f'{message}: {run.status_message}' + + super().__init__(message) + + @classmethod + def from_run(cls, run: Run) -> ActorRunError: + """Build the most specific run error for a terminal Actor run. + + Args: + run: The terminal Actor run. + + Returns: + An `ActorTimeoutError` for a timed-out run, otherwise an `ActorRunError`. + """ + if run.status == 'TIMED-OUT': + return ActorTimeoutError(run) + return ActorRunError(run) + + +@docs_group('Errors') +class ActorTimeoutError(ActorRunError): + """Raised when an Actor run exceeds its timeout (`TIMED-OUT`). Retrying with a longer timeout may help.""" + + code = 'actor-timed-out' + retryable = True + + +@docs_group('Errors') +class ActorInputValidationError(ActorError, ValueError): + """Raised when input fails validation. + + Subclasses `ValueError` so existing `except ValueError` handlers keep catching it. + """ + + code = 'input-validation-error' + + +@docs_group('Errors') +class ActorChargeLimitExceededError(ActorError): + """Raised when an Actor run hits its configured maximum total charge (`max_total_charge_usd`).""" + + code = 'charge-limit-exceeded' + + +@docs_group('Errors') +class ActorAuthenticationError(ActorError): + """Raised when an API request is unauthorized or forbidden (HTTP 401 / 403).""" + + code = 'authentication-error' + + +@docs_group('Errors') +class ActorRateLimitError(ActorError): + """Raised when the Apify API rate limit is exceeded (HTTP 429). Retryable after a backoff.""" + + code = 'rate-limit-exceeded' + retryable = True + + +__all__ = [ + 'ActorAuthenticationError', + 'ActorChargeLimitExceededError', + 'ActorError', + 'ActorInputValidationError', + 'ActorRateLimitError', + 'ActorRunError', + 'ActorTimeoutError', +] diff --git a/tests/unit/actor/test_configuration.py b/tests/unit/actor/test_configuration.py index 486cbe07c..0fd686dda 100644 --- a/tests/unit/actor/test_configuration.py +++ b/tests/unit/actor/test_configuration.py @@ -392,21 +392,3 @@ def test_actor_storage_json_env_var(monkeypatch: pytest.MonkeyPatch) -> None: assert config.actor_storages['datasets'] == datasets assert config.actor_storages['request_queues'] == request_queues assert config.actor_storages['key_value_stores'] == key_value_stores - - -@pytest.mark.parametrize( - ('env_var', 'attr', 'expected'), - [ - ('APIFY_TIMEOUT_AT', 'timeout_at', None), - ('ACTOR_MAX_PAID_DATASET_ITEMS', 'max_paid_dataset_items', None), - ('ACTOR_MAX_TOTAL_CHARGE_USD', 'max_total_charge_usd', None), - ('APIFY_USER_IS_PAYING', 'user_is_paying', False), - ], -) -def test_typed_env_var_empty_string_falls_back_to_default( - monkeypatch: pytest.MonkeyPatch, env_var: str, attr: str, expected: object -) -> None: - """Platform may set a typed env var to '' instead of leaving it unset; that must not crash `Actor.init()`.""" - monkeypatch.setenv(env_var, '') - config = ApifyConfiguration() - assert getattr(config, attr) == expected diff --git a/tests/unit/test_errors.py b/tests/unit/test_errors.py new file mode 100644 index 000000000..05261ec34 --- /dev/null +++ b/tests/unit/test_errors.py @@ -0,0 +1,184 @@ +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any, cast + +import pytest + +from apify_client._models import Run +from apify_client.errors import ( + ApifyApiError, + ConflictError, + ForbiddenError, + InvalidRequestError, + NotFoundError, + ServerError, + UnauthorizedError, +) +from apify_client.errors import RateLimitError as ClientRateLimitError + +import apify +from apify.errors import ( + ActorAuthenticationError, + ActorChargeLimitExceededError, + ActorError, + ActorInputValidationError, + ActorRateLimitError, + ActorRunError, + ActorTimeoutError, +) + + +class _FakeResponse: + """Minimal stand-in for `apify_client`'s HTTP response, enough to build its API errors.""" + + def __init__(self, status_code: int) -> None: + self.status_code = status_code + self.text = 'error text' + + def json(self) -> dict[str, Any]: + return {'error': {'message': 'boom', 'type': 'some-error-type'}} + + +def _client_error(error_cls: type[ApifyApiError], status_code: int) -> ApifyApiError: + return error_cls(cast('Any', _FakeResponse(status_code)), 1) + + +def _make_run(*, status: str, exit_code: int | None = None, status_message: str | None = None) -> Run: + return Run.model_validate( + { + 'id': 'run123', + 'actId': 'act123', + 'userId': 'user123', + 'startedAt': datetime.now(UTC).isoformat(), + 'status': status, + 'statusMessage': status_message, + 'exitCode': exit_code, + 'meta': {'origin': 'DEVELOPMENT'}, + 'buildId': 'build123', + 'defaultDatasetId': 'ds123', + 'defaultKeyValueStoreId': 'kvs123', + 'defaultRequestQueueId': 'rq123', + 'containerUrl': 'https://container', + 'buildNumber': '0.0.1', + 'generalAccess': 'RESTRICTED', + 'stats': {'restartCount': 0, 'resurrectCount': 0, 'computeUnits': 1}, + 'options': {'build': 'latest', 'timeoutSecs': 4, 'memoryMbytes': 1024, 'diskMbytes': 1024}, + } + ) + + +def test_actor_error_defaults() -> None: + error = ActorError('something went wrong') + assert error.code == 'apify-error' + assert error.retryable is False + assert str(error) == 'something went wrong' + + +def test_actor_error_overrides_are_instance_scoped() -> None: + error = ActorError('boom', code='custom', retryable=True) + assert error.code == 'custom' + assert error.retryable is True + # Overriding on an instance must not leak to the class default. + assert ActorError.code == 'apify-error' + assert ActorError.retryable is False + + +@pytest.mark.parametrize( + ('error_cls', 'expected_code', 'expected_retryable'), + [ + (ActorRateLimitError, 'rate-limit-exceeded', True), + (ActorTimeoutError, 'actor-timed-out', True), + (ActorAuthenticationError, 'authentication-error', False), + (ActorChargeLimitExceededError, 'charge-limit-exceeded', False), + (ActorInputValidationError, 'input-validation-error', False), + (ActorRunError, 'actor-run-failed', False), + ], +) +def test_subclass_codes_and_retryable( + error_cls: type[ActorError], expected_code: str, *, expected_retryable: bool +) -> None: + assert error_cls.code == expected_code + assert error_cls.retryable is expected_retryable + assert issubclass(error_cls, ActorError) + + +def test_input_validation_error_is_value_error() -> None: + """`except ValueError` must still catch `ActorInputValidationError`.""" + with pytest.raises(ValueError, match='bad input'): + raise ActorInputValidationError('bad input') + + +def test_actor_timeout_error_is_actor_run_error() -> None: + assert issubclass(ActorTimeoutError, ActorRunError) + + +def test_actor_run_error_carries_run_metadata() -> None: + run = _make_run(status='FAILED', exit_code=1, status_message='Actor crashed') + error = ActorRunError(run) + assert error.run_id == 'run123' + assert error.status == 'FAILED' + assert error.exit_code == 1 + assert error.status_message == 'Actor crashed' + assert error.retryable is False + assert 'run123' in str(error) + assert 'Actor crashed' in str(error) + + +def test_actor_run_error_from_run_failed() -> None: + error = ActorRunError.from_run(_make_run(status='FAILED')) + assert type(error) is ActorRunError + assert not error.retryable + + +def test_actor_run_error_from_run_timed_out() -> None: + error = ActorRunError.from_run(_make_run(status='TIMED-OUT')) + assert isinstance(error, ActorTimeoutError) + assert error.retryable is True + assert error.run_id == 'run123' + assert error.code == 'actor-timed-out' + + +@pytest.mark.parametrize( + ('client_error', 'expected_cls', 'expected_retryable'), + [ + (_client_error(UnauthorizedError, 401), ActorAuthenticationError, False), + (_client_error(ForbiddenError, 403), ActorAuthenticationError, False), + (_client_error(ClientRateLimitError, 429), ActorRateLimitError, True), + (_client_error(ServerError, 500), ActorError, True), + (_client_error(InvalidRequestError, 400), ActorInputValidationError, False), + (_client_error(NotFoundError, 404), ActorError, False), + (_client_error(ConflictError, 409), ActorError, False), + ], +) +def test_from_client_error_mapping( + client_error: ApifyApiError, + expected_cls: type[ActorError], + *, + expected_retryable: bool, +) -> None: + mapped = ActorError.from_client_error(client_error) + assert type(mapped) is expected_cls + assert mapped.retryable is expected_retryable + + +def test_from_client_error_unknown_exception_falls_back() -> None: + mapped = ActorError.from_client_error(RuntimeError('not a client error')) + assert type(mapped) is ActorError + assert mapped.retryable is False + assert 'not a client error' in str(mapped) + + +def test_errors_exported_from_top_level() -> None: + for name in ( + 'ActorError', + 'ActorRunError', + 'ActorTimeoutError', + 'ActorAuthenticationError', + 'ActorChargeLimitExceededError', + 'ActorInputValidationError', + 'ActorRateLimitError', + ): + assert hasattr(apify, name) + assert name in apify.__all__ + assert getattr(apify, name) is getattr(apify.errors, name) diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index d6ef5fd68..d593892f8 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -9,6 +9,7 @@ const GROUP_ORDER = [ 'Actor', 'Charging', 'Configuration', + 'Errors', 'Event data', 'Event managers', 'Events', From 4d3616276dadecd107ddb6c7693cd1af74912ffa Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Tue, 16 Jun 2026 13:53:53 +0200 Subject: [PATCH 02/10] feat: map apify-client errors to domain-level errors at API call sites --- src/apify/_actor.py | 125 ++++++++++-------- src/apify/_charging.py | 7 +- src/apify/errors.py | 40 +++++- .../storage_clients/_apify/_dataset_client.py | 33 +++-- .../_apify/_key_value_store_client.py | 50 ++++--- .../_apify/_request_queue_client.py | 14 +- tests/unit/test_errors.py | 20 +-- 7 files changed, 175 insertions(+), 114 deletions(-) diff --git a/src/apify/_actor.py b/src/apify/_actor.py index e00e46638..4abfdd08c 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -32,6 +32,7 @@ from apify._proxy_configuration import ProxyConfiguration from apify._utils import docs_group, docs_name, ensure_context, get_system_info, is_running_in_ipython from apify._webhook import to_client_representations +from apify.errors import map_client_errors from apify.events import ApifyEventManager, EventManager, LocalEventManager from apify.log import _configure_logging, logger from apify.storage_clients import ApifyStorageClient, SmartApifyStorageClient @@ -936,17 +937,18 @@ async def start( raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.') actor_client = client.actor(actor_id) - return await actor_client.start( - run_input=run_input, - content_type=content_type, - build=build, - max_total_charge_usd=max_total_charge_usd, - restart_on_error=restart_on_error, - memory_mbytes=memory_mbytes, - run_timeout=actor_start_timeout, - force_permission_level=force_permission_level, - webhooks=to_client_representations(webhooks), - ) + with map_client_errors(): + return await actor_client.start( + run_input=run_input, + content_type=content_type, + build=build, + max_total_charge_usd=max_total_charge_usd, + restart_on_error=restart_on_error, + memory_mbytes=memory_mbytes, + run_timeout=actor_start_timeout, + force_permission_level=force_permission_level, + webhooks=to_client_representations(webhooks), + ) @_ensure_context async def abort( @@ -975,10 +977,11 @@ async def abort( client = self.new_client(token=token) if token else self.apify_client run_client = client.run(run_id) - if status_message: - await run_client.update(status_message=status_message) + with map_client_errors(): + if status_message: + await run_client.update(status_message=status_message) - run = await run_client.abort(gracefully=gracefully) + run = await run_client.abort(gracefully=gracefully) if run is None: raise RuntimeError(f'Failed to abort Actor run with ID "{run_id}".') @@ -1047,19 +1050,20 @@ async def call( raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.') actor_client = client.actor(actor_id) - run = await actor_client.call( - run_input=run_input, - content_type=content_type, - build=build, - max_total_charge_usd=max_total_charge_usd, - restart_on_error=restart_on_error, - memory_mbytes=memory_mbytes, - run_timeout=actor_call_timeout, - force_permission_level=force_permission_level, - webhooks=to_client_representations(webhooks), - wait_duration=wait, - logger=logger, - ) + with map_client_errors(): + run = await actor_client.call( + run_input=run_input, + content_type=content_type, + build=build, + max_total_charge_usd=max_total_charge_usd, + restart_on_error=restart_on_error, + memory_mbytes=memory_mbytes, + run_timeout=actor_call_timeout, + force_permission_level=force_permission_level, + webhooks=to_client_representations(webhooks), + wait_duration=wait, + logger=logger, + ) if run is None: raise RuntimeError(f'Failed to call Actor with ID "{actor_id}".') @@ -1120,15 +1124,16 @@ async def call_task( raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.') task_client = client.task(task_id) - run = await task_client.call( - task_input=task_input, - build=build, - restart_on_error=restart_on_error, - memory_mbytes=memory_mbytes, - run_timeout=task_call_timeout, - webhooks=to_client_representations(webhooks), - wait_duration=wait, - ) + with map_client_errors(): + run = await task_client.call( + task_input=task_input, + build=build, + restart_on_error=restart_on_error, + memory_mbytes=memory_mbytes, + run_timeout=task_call_timeout, + webhooks=to_client_representations(webhooks), + wait_duration=wait, + ) if run is None: raise RuntimeError(f'Failed to call Task with ID "{task_id}".') @@ -1171,12 +1176,13 @@ async def metamorph( if not self.configuration.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - await self.apify_client.run(self.configuration.actor_run_id).metamorph( - target_actor_id=target_actor_id, - run_input=run_input, - target_actor_build=target_actor_build, - content_type=content_type, - ) + with map_client_errors(): + await self.apify_client.run(self.configuration.actor_run_id).metamorph( + target_actor_id=target_actor_id, + run_input=run_input, + target_actor_build=target_actor_build, + content_type=content_type, + ) if custom_after_sleep: await asyncio.sleep(custom_after_sleep.total_seconds()) @@ -1242,7 +1248,8 @@ async def safe_dispatch(listener: Any, data: Any) -> None: except TimeoutError: self.log.warning('Pre-reboot event listeners did not finish within timeout; proceeding with reboot') - await self.apify_client.run(self.configuration.actor_run_id).reboot() + with map_client_errors(): + await self.apify_client.run(self.configuration.actor_run_id).reboot() except BaseException: # Reset the flag so that a failed or cancelled reboot can be retried. self._is_rebooting = False @@ -1283,17 +1290,18 @@ async def add_webhook(self, webhook: Webhook, *, idempotency_key: str | None = N if not self.configuration.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - await self.apify_client.webhooks().create( - actor_run_id=self.configuration.actor_run_id, - event_types=webhook.event_types, - request_url=webhook.request_url, - payload_template=webhook.payload_template, - headers_template=webhook.headers_template, - ignore_ssl_errors=webhook.ignore_ssl_errors, - do_not_retry=webhook.do_not_retry, - idempotency_key=idempotency_key if idempotency_key is not None else webhook.idempotency_key, - is_ad_hoc=True, - ) + with map_client_errors(): + await self.apify_client.webhooks().create( + actor_run_id=self.configuration.actor_run_id, + event_types=webhook.event_types, + request_url=webhook.request_url, + payload_template=webhook.payload_template, + headers_template=webhook.headers_template, + ignore_ssl_errors=webhook.ignore_ssl_errors, + do_not_retry=webhook.do_not_retry, + idempotency_key=idempotency_key if idempotency_key is not None else webhook.idempotency_key, + is_ad_hoc=True, + ) @_ensure_context async def set_status_message( @@ -1321,10 +1329,11 @@ async def set_status_message( raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') run_client = self.apify_client.run(self.configuration.actor_run_id) - run = await run_client.update( - status_message=status_message, - is_status_message_terminal=is_terminal, - ) + with map_client_errors(): + run = await run_client.update( + status_message=status_message, + is_status_message_terminal=is_terminal, + ) if run is None: raise RuntimeError( diff --git a/src/apify/_charging.py b/src/apify/_charging.py index 0a23ad4ef..2291a04fe 100644 --- a/src/apify/_charging.py +++ b/src/apify/_charging.py @@ -19,6 +19,7 @@ from apify_client._models import PricingPerEvent as ClientPricingPerEvent from apify._utils import ReentrantLock, docs_group, ensure_context +from apify.errors import map_client_errors from apify.log import logger from apify.storages import Dataset @@ -449,7 +450,8 @@ async def charge(self, event_name: str, *, count: int = 1) -> ChargeResult: # the platform handles them automatically based on dataset writes. pass elif event_name in self._pricing_info: - await self._client.run(self._actor_run_id).charge(event_name, count=charged_count) + with map_client_errors(): + await self._client.run(self._actor_run_id).charge(event_name, count=charged_count) elif event_name in self._tier_priced_events: logger.warning( f"Event '{event_name}' is tier-priced and is not chargeable via the pay-per-event API." @@ -572,7 +574,8 @@ async def _fetch_pricing_info(self) -> _FetchedPricingInfoDict: if self._actor_run_id is None: raise RuntimeError('Actor run ID not found even though the Actor is running on Apify') - run = await self._client.run(self._actor_run_id).get() + with map_client_errors(): + run = await self._client.run(self._actor_run_id).get() if run is None: raise RuntimeError('Actor run not found') diff --git a/src/apify/errors.py b/src/apify/errors.py index 786d985b1..85b706128 100644 --- a/src/apify/errors.py +++ b/src/apify/errors.py @@ -1,7 +1,10 @@ from __future__ import annotations -from typing import TYPE_CHECKING +import contextlib +import functools +from typing import TYPE_CHECKING, ParamSpec, TypeVar +from apify_client.errors import ApifyApiError from apify_client.errors import ForbiddenError as _ForbiddenError from apify_client.errors import InvalidRequestError as _InvalidRequestError from apify_client.errors import RateLimitError as _RateLimitError @@ -11,8 +14,14 @@ from apify._utils import docs_group if TYPE_CHECKING: + from collections.abc import Awaitable, Callable, Coroutine, Iterator + from typing import Any + from apify_client._models import Run +_P = ParamSpec('_P') +_R = TypeVar('_R') + @docs_group('Errors') class ActorError(Exception): @@ -147,6 +156,35 @@ class ActorRateLimitError(ActorError): retryable = True +@contextlib.contextmanager +def map_client_errors() -> Iterator[None]: + """Translate `apify_client` API errors into domain-level `ActorError`s. + + Wrap any `apify_client` call with this context manager so that an `ApifyApiError` (e.g. an HTTP 401/403/429/5xx + response) surfaces as the matching `ActorError` subclass instead of a raw client exception. The original error + is preserved as the `__cause__` of the raised `ActorError`. + """ + try: + yield + except ApifyApiError as error: + raise ActorError.from_client_error(error) from error + + +def catch_client_errors(func: Callable[_P, Awaitable[_R]]) -> Callable[_P, Coroutine[Any, Any, _R]]: + """Decorate an async function so the `apify_client` errors it raises become domain-level `ActorError`s. + + This is the method-level counterpart of `map_client_errors`, intended for thin wrappers around `apify_client` + calls such as the storage client operations. + """ + + @functools.wraps(func) + async def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> _R: + with map_client_errors(): + return await func(*args, **kwargs) + + return wrapper + + __all__ = [ 'ActorAuthenticationError', 'ActorChargeLimitExceededError', diff --git a/src/apify/storage_clients/_apify/_dataset_client.py b/src/apify/storage_clients/_apify/_dataset_client.py index de9634fdc..82f006f7a 100644 --- a/src/apify/storage_clients/_apify/_dataset_client.py +++ b/src/apify/storage_clients/_apify/_dataset_client.py @@ -12,6 +12,7 @@ from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata from ._api_client_creation import create_storage_api_client +from apify.errors import ActorError, catch_client_errors, map_client_errors from apify.storage_clients._ppe_dataset_mixin import DatasetClientPpeMixin if TYPE_CHECKING: @@ -57,11 +58,12 @@ def __init__( """A lock to ensure that only one operation is performed at a time.""" @override + @catch_client_errors async def get_metadata(self) -> DatasetMetadata: metadata = await self._api_client.get() if metadata is None: - raise ValueError('Failed to retrieve dataset metadata.') + raise ActorError('Failed to retrieve dataset metadata.') return DatasetMetadata( id=metadata.id, @@ -73,6 +75,7 @@ async def get_metadata(self) -> DatasetMetadata: ) @classmethod + @catch_client_errors async def open( cls, *, @@ -132,11 +135,13 @@ async def purge(self) -> None: ) @override + @catch_client_errors async def drop(self) -> None: async with self._lock: await self._api_client.delete() @override + @catch_client_errors async def push_data(self, data: Sequence[Mapping[str, JsonSerializable]] | Mapping[str, JsonSerializable]) -> None: async def payloads_generator(items: Sequence[Mapping[str, JsonSerializable]]) -> AsyncIterator[str]: for index, item in enumerate(items): @@ -155,6 +160,7 @@ async def payloads_generator(items: Sequence[Mapping[str, JsonSerializable]]) -> await self._charge_for_items(count_items=limit) @override + @catch_client_errors async def get_data( self, *, @@ -199,18 +205,19 @@ async def iterate_items( skip_empty: bool = False, skip_hidden: bool = False, ) -> AsyncIterator[dict]: - async for item in self._api_client.iterate_items( - offset=offset, - limit=limit, - clean=clean, - desc=desc, - fields=fields, - omit=omit, - unwind=unwind, - skip_empty=skip_empty, - skip_hidden=skip_hidden, - ): - yield item + with map_client_errors(): + async for item in self._api_client.iterate_items( + offset=offset, + limit=limit, + clean=clean, + desc=desc, + fields=fields, + omit=omit, + unwind=unwind, + skip_empty=skip_empty, + skip_hidden=skip_hidden, + ): + yield item @classmethod async def _check_and_serialize(cls, item: Mapping[str, JsonSerializable], index: int | None = None) -> str: diff --git a/src/apify/storage_clients/_apify/_key_value_store_client.py b/src/apify/storage_clients/_apify/_key_value_store_client.py index 9fdce93c4..24b39c901 100644 --- a/src/apify/storage_clients/_apify/_key_value_store_client.py +++ b/src/apify/storage_clients/_apify/_key_value_store_client.py @@ -11,6 +11,7 @@ from ._api_client_creation import create_storage_api_client from ._models import ApifyKeyValueStoreMetadata +from apify.errors import ActorError, catch_client_errors, map_client_errors if TYPE_CHECKING: from collections.abc import AsyncIterator @@ -42,11 +43,12 @@ def __init__( """A lock to ensure that only one operation is performed at a time.""" @override + @catch_client_errors async def get_metadata(self) -> ApifyKeyValueStoreMetadata: metadata = await self._api_client.get() if metadata is None: - raise ValueError('Failed to retrieve key-value store metadata.') + raise ActorError('Failed to retrieve key-value store metadata.') return ApifyKeyValueStoreMetadata( id=metadata.id, @@ -58,6 +60,7 @@ async def get_metadata(self) -> ApifyKeyValueStoreMetadata: ) @classmethod + @catch_client_errors async def open( cls, *, @@ -110,16 +113,19 @@ async def purge(self) -> None: ) @override + @catch_client_errors async def drop(self) -> None: async with self._lock: await self._api_client.delete() @override + @catch_client_errors async def get_value(self, *, key: str) -> KeyValueStoreRecord | None: response = await self._api_client.get_record(key) return KeyValueStoreRecord.model_validate(response) if response else None @override + @catch_client_errors async def set_value(self, *, key: str, value: Any, content_type: str | None = None) -> None: async with self._lock: await self._api_client.set_record( @@ -129,6 +135,7 @@ async def set_value(self, *, key: str, value: Any, content_type: str | None = No ) @override + @catch_client_errors async def delete_value(self, *, key: str) -> None: async with self._lock: await self._api_client.delete_record(key=key) @@ -142,33 +149,36 @@ async def iterate_keys( ) -> AsyncIterator[KeyValueStoreRecordMetadata]: count = 0 - while True: - list_key_page = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key) - - for item in list_key_page.items: - record_metadata = KeyValueStoreRecordMetadata( - key=item.key, - size=item.size, - content_type='application/octet-stream', # Content type not available from list_keys - ) - yield record_metadata - count += 1 - - # If we've reached the limit, stop yielding - if limit and count >= limit: + with map_client_errors(): + while True: + list_key_page = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key) + + for item in list_key_page.items: + record_metadata = KeyValueStoreRecordMetadata( + key=item.key, + size=item.size, + content_type='application/octet-stream', # Content type not available from list_keys + ) + yield record_metadata + count += 1 + + # If we've reached the limit, stop yielding + if limit and count >= limit: + break + + # If we've reached the limit or there are no more pages, exit the loop + if (limit and count >= limit) or not list_key_page.is_truncated: break - # If we've reached the limit or there are no more pages, exit the loop - if (limit and count >= limit) or not list_key_page.is_truncated: - break - - exclusive_start_key = list_key_page.next_exclusive_start_key + exclusive_start_key = list_key_page.next_exclusive_start_key @override + @catch_client_errors async def record_exists(self, *, key: str) -> bool: return await self._api_client.record_exists(key=key) @override + @catch_client_errors async def get_public_url(self, *, key: str) -> str: """Get a URL for the given key that may be used to publicly access the value in the remote key-value store. diff --git a/src/apify/storage_clients/_apify/_request_queue_client.py b/src/apify/storage_clients/_apify/_request_queue_client.py index de6737806..83c0c03da 100644 --- a/src/apify/storage_clients/_apify/_request_queue_client.py +++ b/src/apify/storage_clients/_apify/_request_queue_client.py @@ -11,6 +11,7 @@ from ._models import ApifyRequestQueueMetadata, RequestQueueStats from ._request_queue_shared_client import ApifyRequestQueueSharedClient from ._request_queue_single_client import ApifyRequestQueueSingleClient +from apify.errors import ActorError, catch_client_errors if TYPE_CHECKING: from collections.abc import Sequence @@ -66,6 +67,7 @@ def __init__( raise RuntimeError(f"Unsupported access type: {access}. Allowed values are 'single' or 'shared'.") @override + @catch_client_errors async def get_metadata(self) -> ApifyRequestQueueMetadata: """Retrieve current metadata about the request queue. @@ -79,7 +81,7 @@ async def get_metadata(self) -> ApifyRequestQueueMetadata: metadata = await self._api_client.get() if metadata is None: - raise ValueError('Failed to fetch request queue metadata from the API.') + raise ActorError('Failed to fetch request queue metadata from the API.') total_request_count = metadata.total_request_count handled_request_count = metadata.handled_request_count @@ -101,6 +103,7 @@ async def get_metadata(self) -> ApifyRequestQueueMetadata: ) @classmethod + @catch_client_errors async def open( cls, *, @@ -145,7 +148,7 @@ async def open( # Fetch initial metadata from the API. raw_metadata = await api_client.get() if raw_metadata is None: - raise ValueError('Failed to retrieve request queue metadata from the API.') + raise ActorError('Failed to retrieve request queue metadata from the API.') metadata = ApifyRequestQueueMetadata.model_validate(raw_metadata.model_dump(by_alias=True)) return cls( @@ -162,10 +165,12 @@ async def purge(self) -> None: ) @override + @catch_client_errors async def drop(self) -> None: await self._api_client.delete() @override + @catch_client_errors async def add_batch_of_requests( self, requests: Sequence[Request], @@ -175,18 +180,22 @@ async def add_batch_of_requests( return await self._implementation.add_batch_of_requests(requests, forefront=forefront) @override + @catch_client_errors async def fetch_next_request(self) -> Request | None: return await self._implementation.fetch_next_request() @override + @catch_client_errors async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None: return await self._implementation.mark_request_as_handled(request) @override + @catch_client_errors async def get_request(self, unique_key: str) -> Request | None: return await self._implementation.get_request(unique_key) @override + @catch_client_errors async def reclaim_request( self, request: Request, @@ -196,5 +205,6 @@ async def reclaim_request( return await self._implementation.reclaim_request(request, forefront=forefront) @override + @catch_client_errors async def is_empty(self) -> bool: return await self._implementation.is_empty() diff --git a/tests/unit/test_errors.py b/tests/unit/test_errors.py index 05261ec34..c3c5bd6e8 100644 --- a/tests/unit/test_errors.py +++ b/tests/unit/test_errors.py @@ -17,7 +17,6 @@ ) from apify_client.errors import RateLimitError as ClientRateLimitError -import apify from apify.errors import ( ActorAuthenticationError, ActorChargeLimitExceededError, @@ -70,7 +69,7 @@ def _make_run(*, status: str, exit_code: int | None = None, status_message: str def test_actor_error_defaults() -> None: error = ActorError('something went wrong') - assert error.code == 'apify-error' + assert error.code == 'actor-error' assert error.retryable is False assert str(error) == 'something went wrong' @@ -80,7 +79,7 @@ def test_actor_error_overrides_are_instance_scoped() -> None: assert error.code == 'custom' assert error.retryable is True # Overriding on an instance must not leak to the class default. - assert ActorError.code == 'apify-error' + assert ActorError.code == 'actor-error' assert ActorError.retryable is False @@ -167,18 +166,3 @@ def test_from_client_error_unknown_exception_falls_back() -> None: assert type(mapped) is ActorError assert mapped.retryable is False assert 'not a client error' in str(mapped) - - -def test_errors_exported_from_top_level() -> None: - for name in ( - 'ActorError', - 'ActorRunError', - 'ActorTimeoutError', - 'ActorAuthenticationError', - 'ActorChargeLimitExceededError', - 'ActorInputValidationError', - 'ActorRateLimitError', - ): - assert hasattr(apify, name) - assert name in apify.__all__ - assert getattr(apify, name) is getattr(apify.errors, name) From a393544a11e0508461b5ec6e6e0445996e1528a1 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 19 Jun 2026 15:02:20 +0200 Subject: [PATCH 03/10] feat: raise ActorRunError from Actor.call and call_task when the run fails --- src/apify/_actor.py | 19 ++++++++- tests/unit/actor/test_actor_helpers.py | 54 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 40d2d9469..cb2fd696f 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -33,7 +33,7 @@ from apify._proxy_configuration import ProxyConfiguration from apify._utils import docs_group, docs_name, ensure_context, get_system_info, is_running_in_ipython from apify._webhook import to_client_representations -from apify.errors import map_client_errors +from apify.errors import ActorRunError, map_client_errors from apify.events import ApifyEventManager, EventManager, LocalEventManager from apify.log import _configure_logging, logger from apify.storage_clients import ApifyStorageClient, SmartApifyStorageClient @@ -58,6 +58,9 @@ _ensure_context = ensure_context('_active') +_TERMINAL_RUN_FAILURE_STATUSES = frozenset({'FAILED', 'ABORTED', 'TIMED-OUT'}) +"""Run statuses that mean a waited-for run ended in failure, so `call`/`call_task` raise instead of returning it.""" + @docs_name('Actor') @docs_group('Actor') @@ -1038,6 +1041,10 @@ async def call( Returns: Info about the started Actor run. + + Raises: + ActorRunError: If the run finishes in a terminal failure state (`FAILED` or `ABORTED`). + ActorTimeoutError: If the run finishes in the `TIMED-OUT` state. """ client = self.new_client(token=token) if token else self.apify_client @@ -1069,6 +1076,9 @@ async def call( if run is None: raise RuntimeError(f'Failed to call Actor with ID "{actor_id}".') + if run.status in _TERMINAL_RUN_FAILURE_STATUSES: + raise ActorRunError.from_run(run) + return run @_ensure_context @@ -1112,6 +1122,10 @@ async def call_task( Returns: Info about the started Actor run. + + Raises: + ActorRunError: If the run finishes in a terminal failure state (`FAILED` or `ABORTED`). + ActorTimeoutError: If the run finishes in the `TIMED-OUT` state. """ client = self.new_client(token=token) if token else self.apify_client @@ -1139,6 +1153,9 @@ async def call_task( if run is None: raise RuntimeError(f'Failed to call Task with ID "{task_id}".') + if run.status in _TERMINAL_RUN_FAILURE_STATUSES: + raise ActorRunError.from_run(run) + return run @_ensure_context diff --git a/tests/unit/actor/test_actor_helpers.py b/tests/unit/actor/test_actor_helpers.py index 5d943904e..bcd8f15fd 100644 --- a/tests/unit/actor/test_actor_helpers.py +++ b/tests/unit/actor/test_actor_helpers.py @@ -15,6 +15,7 @@ from apify import Actor, Webhook from apify._actor import _ActorType from apify._consts import ApifyEnvVars +from apify.errors import ActorRunError, ActorTimeoutError if TYPE_CHECKING: from ..conftest import ApifyClientAsyncPatcher @@ -106,6 +107,59 @@ async def test_call_actor_task(apify_client_async_patcher: ApifyClientAsyncPatch assert apify_client_async_patcher.calls['task']['call'][0][0][0].resource_id == task_id +async def test_call_raises_on_failed_run( + apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run +) -> None: + """`Actor.call` raises `ActorRunError` carrying the run metadata when the awaited run ends as `FAILED`.""" + failed_run = fake_actor_run.model_copy(update={'status': 'FAILED'}) + apify_client_async_patcher.patch('actor', 'call', return_value=failed_run) + + async with Actor: + with pytest.raises(ActorRunError) as exc_info: + await Actor.call('some-actor-id') + + assert type(exc_info.value) is ActorRunError + assert exc_info.value.status == 'FAILED' + assert exc_info.value.run_id == failed_run.id + + +async def test_call_raises_timeout_on_timed_out_run( + apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run +) -> None: + """`Actor.call` raises the more specific `ActorTimeoutError` for a `TIMED-OUT` run.""" + timed_out_run = fake_actor_run.model_copy(update={'status': 'TIMED-OUT'}) + apify_client_async_patcher.patch('actor', 'call', return_value=timed_out_run) + + async with Actor: + with pytest.raises(ActorTimeoutError): + await Actor.call('some-actor-id') + + +async def test_call_returns_succeeded_run( + apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run +) -> None: + """`Actor.call` returns the run unchanged when it finishes with a non-failure status.""" + succeeded_run = fake_actor_run.model_copy(update={'status': 'SUCCEEDED'}) + apify_client_async_patcher.patch('actor', 'call', return_value=succeeded_run) + + async with Actor: + run = await Actor.call('some-actor-id') + + assert run.status == 'SUCCEEDED' + + +async def test_call_task_raises_on_failed_run( + apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run +) -> None: + """`Actor.call_task` raises `ActorRunError` when the awaited run ends in a terminal failure state.""" + failed_run = fake_actor_run.model_copy(update={'status': 'FAILED'}) + apify_client_async_patcher.patch('task', 'call', return_value=failed_run) + + async with Actor: + with pytest.raises(ActorRunError): + await Actor.call_task('some-task-id') + + async def test_start_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run) -> None: apify_client_async_patcher.patch('actor', 'start', return_value=fake_actor_run) actor_id = 'some-id' From a3fd0ce40a91c209c464048bdad96d0c30113fd7 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 19 Jun 2026 15:02:22 +0200 Subject: [PATCH 04/10] docs: add error handling concept page --- docs/02_concepts/13_exceptions.mdx | 69 +++++++++++++++++++ .../02_concepts/code/13_handle_call_errors.py | 29 ++++++++ docs/02_concepts/code/13_retry_retryable.py | 30 ++++++++ 3 files changed, 128 insertions(+) create mode 100644 docs/02_concepts/13_exceptions.mdx create mode 100644 docs/02_concepts/code/13_handle_call_errors.py create mode 100644 docs/02_concepts/code/13_retry_retryable.py diff --git a/docs/02_concepts/13_exceptions.mdx b/docs/02_concepts/13_exceptions.mdx new file mode 100644 index 000000000..5b30fd9db --- /dev/null +++ b/docs/02_concepts/13_exceptions.mdx @@ -0,0 +1,69 @@ +--- +id: error-handling +title: Error handling +description: The exceptions an Actor can raise and how to handle them +--- + +import HandleCallErrorsSource from '!!raw-loader!roa-loader!./code/13_handle_call_errors.py'; +import RetryRetryableSource from '!!raw-loader!roa-loader!./code/13_retry_retryable.py'; +import ApiLink from '@theme/ApiLink'; +import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; + +When an Actor runs, failures surface as exceptions from two layers: the Apify SDK and the underlying [Apify API client](https://docs.apify.com/api/client/python). The SDK wraps the client's low-level HTTP errors in a small, domain-level taxonomy. You can then react based on what went wrong, without parsing error messages. + +## The error taxonomy + +All SDK errors live in the `apify.errors` module and derive from `ActorError`. Every error carries two attributes you can branch on: + +- `code`: a stable, machine-readable identifier of the error category. +- `retryable`: whether retrying the same operation might succeed. + +The taxonomy is intentionally small: + +| Exception | `code` | `retryable` | Raised when | +| --- | --- | --- | --- | +| `ActorError` | `actor-error` | `False` | Base class and fallback for an unmapped API failure. An HTTP 5xx server error maps here as retryable. | +| `ActorAuthenticationError` | `authentication-error` | `False` | An API request is unauthorized or forbidden (HTTP 401 or 403). | +| `ActorRateLimitError` | `rate-limit-exceeded` | `True` | The Apify API rate limit is exceeded (HTTP 429). | +| `ActorInputValidationError` | `input-validation-error` | `False` | The API rejects a request as invalid (HTTP 400). Also subclasses `ValueError`. | +| `ActorRunError` | `actor-run-failed` | `False` | A run started with `Actor.call` or `Actor.call_task` finishes as `FAILED` or `ABORTED`. | +| `ActorTimeoutError` | `actor-timed-out` | `True` | A run finishes as `TIMED-OUT`. Subclass of `ActorRunError`. | +| `ActorChargeLimitExceededError` | `charge-limit-exceeded` | `False` | Reserved for the pay-per-event charge limit. See [the note below](#the-pay-per-event-charge-limit). | + +`ActorRunError` also exposes the run metadata you need to decide what to do next: `run_id`, `status`, `exit_code`, and `status_message`. + +## Where the errors come from + +The SDK methods that call the Apify API translate the client's HTTP errors into the matching `ActorError` subclass. This covers `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The original `apify_client` exception is preserved as the `__cause__` of the raised `ActorError`, so the low-level details stay available. + +`Actor.call` and `Actor.call_task` go one step further. They wait for the run to finish, so instead of returning a failed run object they raise `ActorRunError`, or the more specific `ActorTimeoutError`, when the run ends in a failure state. + +## Handling errors + +Catch `ActorError` to handle every domain error in one place, then branch on `code` or `retryable`. To react to a specific failure, catch its subclass first. The run errors form an inheritance chain (`ActorTimeoutError` is an `ActorRunError`, which is an `ActorError`), so order the `except` blocks from the most specific to the most general. + + + {HandleCallErrorsSource} + + +`ActorInputValidationError` also subclasses `ValueError`, so an existing `except ValueError` keeps catching it. + +## Retrying transient failures + +The `retryable` flag marks failures where the same call might succeed on a second attempt, such as a rate limit, a server error, or a timed-out run. Use it to drive a retry loop with backoff, and stop retrying as soon as you hit a permanent error. + + + {RetryRetryableSource} + + +## The pay-per-event charge limit + +Reaching the pay-per-event charge limit does not raise `ActorChargeLimitExceededError`. The SDK caps charging and data pushing instead, and your Actor keeps running. To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult` returned by `Actor.charge` or `Actor.push_data`. For details, see [Pay-per-event monetization](./pay-per-event). + +## Errors from the API client + +Under the hood, the SDK errors map from the exceptions in `apify_client.errors`, such as `ApifyApiError` and its subclasses `UnauthorizedError`, `RateLimitError`, `ServerError`, and `InvalidRequestError`. You normally work with the SDK's `ActorError` taxonomy and never see these directly. The original client exception stays available as `__cause__` when you need the HTTP status code or the response body. If you call the API client directly through `Actor.new_client()`, you receive the raw `apify_client` exceptions instead. + +## Conclusion + +This page has covered the SDK error taxonomy rooted in `ActorError`, the operations that raise each error, how to branch on the `code` and `retryable` attributes, and why the pay-per-event charge limit is handled through a return value rather than an exception. Catching `ActorError` and inspecting its attributes is enough for most Actors, with the specific subclasses available when you need finer control. diff --git a/docs/02_concepts/code/13_handle_call_errors.py b/docs/02_concepts/code/13_handle_call_errors.py new file mode 100644 index 000000000..dea214823 --- /dev/null +++ b/docs/02_concepts/code/13_handle_call_errors.py @@ -0,0 +1,29 @@ +import asyncio + +from apify import Actor +from apify.errors import ActorError, ActorRunError, ActorTimeoutError + + +async def main() -> None: + async with Actor: + try: + run = await Actor.call('apify/web-scraper', run_input={'startUrls': []}) + # Order matters: catch the most specific subclasses first. + except ActorTimeoutError as exc: + Actor.log.warning(f'Run {exc.run_id} timed out; try a longer timeout.') + except ActorRunError as exc: + Actor.log.error( + f'Run {exc.run_id} failed: {exc.status} (exit {exc.exit_code}).' + ) + except ActorError as exc: + # Authentication, rate limit, server error, or any other API failure. + if exc.retryable: + Actor.log.warning(f'Transient error ({exc.code}); can be retried.') + else: + Actor.log.error(f'Permanent error ({exc.code}): {exc}') + else: + Actor.log.info(f'Run {run.id} finished with status {run.status}.') + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/docs/02_concepts/code/13_retry_retryable.py b/docs/02_concepts/code/13_retry_retryable.py new file mode 100644 index 000000000..3718e10bd --- /dev/null +++ b/docs/02_concepts/code/13_retry_retryable.py @@ -0,0 +1,30 @@ +import asyncio + +from apify import Actor +from apify.errors import ActorError + + +async def main() -> None: + async with Actor: + max_attempts = 3 + + for attempt in range(1, max_attempts + 1): + try: + run = await Actor.call('apify/web-scraper') + except ActorError as exc: + # Give up on permanent errors, or once the attempts are exhausted. + if not exc.retryable or attempt == max_attempts: + raise + + backoff_secs = 2**attempt + Actor.log.warning( + f'Attempt {attempt} failed ({exc.code}); retrying in {backoff_secs}s.' + ) + await asyncio.sleep(backoff_secs) + else: + Actor.log.info(f'Run {run.id} finished with status {run.status}.') + break + + +if __name__ == '__main__': + asyncio.run(main()) From c4e9987dc3c4d0c3362d261cd0961be6a2dd4707 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 19 Jun 2026 15:39:38 +0200 Subject: [PATCH 05/10] refactor: re-export apify-client errors instead of wrapping them --- docs/02_concepts/13_exceptions.mdx | 58 +++---- .../02_concepts/code/13_handle_call_errors.py | 28 ++-- docs/02_concepts/code/13_retry_retryable.py | 30 ---- docs/02_concepts/code/13_retry_timed_out.py | 31 ++++ src/apify/_actor.py | 130 ++++++--------- src/apify/_charging.py | 7 +- src/apify/errors.py | 157 +++++------------- .../storage_clients/_apify/_dataset_client.py | 33 ++-- .../_apify/_key_value_store_client.py | 50 +++--- .../_apify/_request_queue_client.py | 14 +- tests/unit/actor/test_actor_helpers.py | 54 ------ tests/unit/actor/test_configuration.py | 18 ++ tests/unit/test_errors.py | 111 +++---------- 13 files changed, 238 insertions(+), 483 deletions(-) delete mode 100644 docs/02_concepts/code/13_retry_retryable.py create mode 100644 docs/02_concepts/code/13_retry_timed_out.py diff --git a/docs/02_concepts/13_exceptions.mdx b/docs/02_concepts/13_exceptions.mdx index 5b30fd9db..96e8096bf 100644 --- a/docs/02_concepts/13_exceptions.mdx +++ b/docs/02_concepts/13_exceptions.mdx @@ -5,65 +5,53 @@ description: The exceptions an Actor can raise and how to handle them --- import HandleCallErrorsSource from '!!raw-loader!roa-loader!./code/13_handle_call_errors.py'; -import RetryRetryableSource from '!!raw-loader!roa-loader!./code/13_retry_retryable.py'; +import RetryTimedOutSource from '!!raw-loader!roa-loader!./code/13_retry_timed_out.py'; import ApiLink from '@theme/ApiLink'; import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; -When an Actor runs, failures surface as exceptions from two layers: the Apify SDK and the underlying [Apify API client](https://docs.apify.com/api/client/python). The SDK wraps the client's low-level HTTP errors in a small, domain-level taxonomy. You can then react based on what went wrong, without parsing error messages. +When an Actor runs, failures surface from two places. The [Apify API client](https://docs.apify.com/api/client/python) raises typed exceptions for failed API requests. The SDK adds a small set of its own errors for outcomes the client cannot express, such as a sub-Actor run that finishes in a failure state. Both are available from the `apify.errors` module. -## The error taxonomy +## Errors from the Apify API -All SDK errors live in the `apify.errors` module and derive from `ActorError`. Every error carries two attributes you can branch on: +Every SDK operation that talks to the Apify API can raise `ApifyApiError`. This includes `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The SDK raises these client exceptions as-is. It does not wrap them, so you keep the HTTP status code, the error type, and the response data on the exception. -- `code`: a stable, machine-readable identifier of the error category. -- `retryable`: whether retrying the same operation might succeed. +`ApifyApiError` dispatches to a subclass based on the HTTP status code: -The taxonomy is intentionally small: +- `UnauthorizedError` (401) and `ForbiddenError` (403) for an unauthorized or forbidden request. +- `NotFoundError` (404) when the Actor, run, or storage does not exist. +- `ConflictError` (409) for a conflicting request. +- `RateLimitError` (429) when the API rate limit is hit. +- `ServerError` for any 5xx response. +- `InvalidRequestError` (400) when the API rejects the request as malformed. -| Exception | `code` | `retryable` | Raised when | -| --- | --- | --- | --- | -| `ActorError` | `actor-error` | `False` | Base class and fallback for an unmapped API failure. An HTTP 5xx server error maps here as retryable. | -| `ActorAuthenticationError` | `authentication-error` | `False` | An API request is unauthorized or forbidden (HTTP 401 or 403). | -| `ActorRateLimitError` | `rate-limit-exceeded` | `True` | The Apify API rate limit is exceeded (HTTP 429). | -| `ActorInputValidationError` | `input-validation-error` | `False` | The API rejects a request as invalid (HTTP 400). Also subclasses `ValueError`. | -| `ActorRunError` | `actor-run-failed` | `False` | A run started with `Actor.call` or `Actor.call_task` finishes as `FAILED` or `ABORTED`. | -| `ActorTimeoutError` | `actor-timed-out` | `True` | A run finishes as `TIMED-OUT`. Subclass of `ActorRunError`. | -| `ActorChargeLimitExceededError` | `charge-limit-exceeded` | `False` | Reserved for the pay-per-event charge limit. See [the note below](#the-pay-per-event-charge-limit). | +The client retries rate-limited and server errors on its own, so you only see `RateLimitError` or `ServerError` once those retries are exhausted. For convenience, `apify.errors` re-exports the whole client error hierarchy, so you can import everything from one place: -`ActorRunError` also exposes the run metadata you need to decide what to do next: `run_id`, `status`, `exit_code`, and `status_message`. +```python +from apify.errors import ApifyApiError, NotFoundError, RateLimitError +``` -## Where the errors come from +## Actor run failures -The SDK methods that call the Apify API translate the client's HTTP errors into the matching `ActorError` subclass. This covers `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The original `apify_client` exception is preserved as the `__cause__` of the raised `ActorError`, so the low-level details stay available. +`Actor.call` and `Actor.call_task` wait for the run to finish and return it, whatever its final status. A finished run can be `SUCCEEDED`, `FAILED`, `ABORTED`, or `TIMED-OUT`, so check `run.status` before you rely on the run's output. -`Actor.call` and `Actor.call_task` go one step further. They wait for the run to finish, so instead of returning a failed run object they raise `ActorRunError`, or the more specific `ActorTimeoutError`, when the run ends in a failure state. +To turn a failed run into an exception, build one from the run with `ActorRunError``.from_run()`. It returns an `ActorTimeoutError` for a timed-out run, and an `ActorRunError` otherwise. Both carry the run metadata you need to decide what to do next: `run_id`, `status`, `exit_code`, and `status_message`. Every SDK error also derives from `ActorError` and exposes a stable `code` and a `retryable` flag. ## Handling errors -Catch `ActorError` to handle every domain error in one place, then branch on `code` or `retryable`. To react to a specific failure, catch its subclass first. The run errors form an inheritance chain (`ActorTimeoutError` is an `ActorRunError`, which is an `ActorError`), so order the `except` blocks from the most specific to the most general. +Wrap the call to catch an API failure, then inspect the finished run and escalate it if needed: {HandleCallErrorsSource} -`ActorInputValidationError` also subclasses `ValueError`, so an existing `except ValueError` keeps catching it. +## Retrying a timed-out run -## Retrying transient failures - -The `retryable` flag marks failures where the same call might succeed on a second attempt, such as a rate limit, a server error, or a timed-out run. Use it to drive a retry loop with backoff, and stop retrying as soon as you hit a permanent error. +A timed-out run is the one failure where retrying can help, as long as you give it more time. `ActorTimeoutError` sets `retryable` to `True` to mark this. Retry with a longer timeout rather than the same one: - {RetryRetryableSource} + {RetryTimedOutSource} ## The pay-per-event charge limit -Reaching the pay-per-event charge limit does not raise `ActorChargeLimitExceededError`. The SDK caps charging and data pushing instead, and your Actor keeps running. To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult` returned by `Actor.charge` or `Actor.push_data`. For details, see [Pay-per-event monetization](./pay-per-event). - -## Errors from the API client - -Under the hood, the SDK errors map from the exceptions in `apify_client.errors`, such as `ApifyApiError` and its subclasses `UnauthorizedError`, `RateLimitError`, `ServerError`, and `InvalidRequestError`. You normally work with the SDK's `ActorError` taxonomy and never see these directly. The original client exception stays available as `__cause__` when you need the HTTP status code or the response body. If you call the API client directly through `Actor.new_client()`, you receive the raw `apify_client` exceptions instead. - -## Conclusion - -This page has covered the SDK error taxonomy rooted in `ActorError`, the operations that raise each error, how to branch on the `code` and `retryable` attributes, and why the pay-per-event charge limit is handled through a return value rather than an exception. Catching `ActorError` and inspecting its attributes is enough for most Actors, with the specific subclasses available when you need finer control. +Reaching the pay-per-event charge limit does not raise an error. The SDK caps charging and data pushing instead, and your Actor keeps running. To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult` returned by `Actor.charge` or `Actor.push_data`. For details, see [Pay-per-event monetization](./pay-per-event). diff --git a/docs/02_concepts/code/13_handle_call_errors.py b/docs/02_concepts/code/13_handle_call_errors.py index dea214823..45ab7c9b1 100644 --- a/docs/02_concepts/code/13_handle_call_errors.py +++ b/docs/02_concepts/code/13_handle_call_errors.py @@ -1,28 +1,24 @@ import asyncio from apify import Actor -from apify.errors import ActorError, ActorRunError, ActorTimeoutError +from apify.errors import ActorRunError, ApifyApiError async def main() -> None: async with Actor: try: run = await Actor.call('apify/web-scraper', run_input={'startUrls': []}) - # Order matters: catch the most specific subclasses first. - except ActorTimeoutError as exc: - Actor.log.warning(f'Run {exc.run_id} timed out; try a longer timeout.') - except ActorRunError as exc: - Actor.log.error( - f'Run {exc.run_id} failed: {exc.status} (exit {exc.exit_code}).' - ) - except ActorError as exc: - # Authentication, rate limit, server error, or any other API failure. - if exc.retryable: - Actor.log.warning(f'Transient error ({exc.code}); can be retried.') - else: - Actor.log.error(f'Permanent error ({exc.code}): {exc}') - else: - Actor.log.info(f'Run {run.id} finished with status {run.status}.') + except ApifyApiError as exc: + # The Apify API rejected the request, e.g. the Actor does not exist or + # the token is invalid. The HTTP status code is on the exception. + Actor.log.error(f'Could not start the Actor: {exc} (HTTP {exc.status_code}).') + return + + # `Actor.call` returns the finished run whatever its status, so check it. + if run.status != 'SUCCEEDED': + raise ActorRunError.from_run(run) + + Actor.log.info(f'Run {run.id} finished successfully.') if __name__ == '__main__': diff --git a/docs/02_concepts/code/13_retry_retryable.py b/docs/02_concepts/code/13_retry_retryable.py deleted file mode 100644 index 3718e10bd..000000000 --- a/docs/02_concepts/code/13_retry_retryable.py +++ /dev/null @@ -1,30 +0,0 @@ -import asyncio - -from apify import Actor -from apify.errors import ActorError - - -async def main() -> None: - async with Actor: - max_attempts = 3 - - for attempt in range(1, max_attempts + 1): - try: - run = await Actor.call('apify/web-scraper') - except ActorError as exc: - # Give up on permanent errors, or once the attempts are exhausted. - if not exc.retryable or attempt == max_attempts: - raise - - backoff_secs = 2**attempt - Actor.log.warning( - f'Attempt {attempt} failed ({exc.code}); retrying in {backoff_secs}s.' - ) - await asyncio.sleep(backoff_secs) - else: - Actor.log.info(f'Run {run.id} finished with status {run.status}.') - break - - -if __name__ == '__main__': - asyncio.run(main()) diff --git a/docs/02_concepts/code/13_retry_timed_out.py b/docs/02_concepts/code/13_retry_timed_out.py new file mode 100644 index 000000000..f9510a446 --- /dev/null +++ b/docs/02_concepts/code/13_retry_timed_out.py @@ -0,0 +1,31 @@ +import asyncio +from datetime import timedelta + +from apify import Actor +from apify.errors import ActorRunError, ActorTimeoutError + + +async def main() -> None: + async with Actor: + timeout = timedelta(minutes=5) + max_attempts = 3 + + for attempt in range(1, max_attempts + 1): + run = await Actor.call('apify/web-scraper', timeout=timeout) + + if run.status == 'SUCCEEDED': + Actor.log.info(f'Run {run.id} finished.') + break + + # Build a typed error from the finished run so we can branch on it. + error = ActorRunError.from_run(run) + if isinstance(error, ActorTimeoutError) and attempt < max_attempts: + timeout *= 2 + Actor.log.warning(f'Timed out, retrying with timeout {timeout}.') + continue + + raise error + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/src/apify/_actor.py b/src/apify/_actor.py index cb2fd696f..20242a9a5 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -33,7 +33,6 @@ from apify._proxy_configuration import ProxyConfiguration from apify._utils import docs_group, docs_name, ensure_context, get_system_info, is_running_in_ipython from apify._webhook import to_client_representations -from apify.errors import ActorRunError, map_client_errors from apify.events import ApifyEventManager, EventManager, LocalEventManager from apify.log import _configure_logging, logger from apify.storage_clients import ApifyStorageClient, SmartApifyStorageClient @@ -58,9 +57,6 @@ _ensure_context = ensure_context('_active') -_TERMINAL_RUN_FAILURE_STATUSES = frozenset({'FAILED', 'ABORTED', 'TIMED-OUT'}) -"""Run statuses that mean a waited-for run ended in failure, so `call`/`call_task` raise instead of returning it.""" - @docs_name('Actor') @docs_group('Actor') @@ -941,18 +937,17 @@ async def start( raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.') actor_client = client.actor(actor_id) - with map_client_errors(): - return await actor_client.start( - run_input=run_input, - content_type=content_type, - build=build, - max_total_charge_usd=max_total_charge_usd, - restart_on_error=restart_on_error, - memory_mbytes=memory_mbytes, - run_timeout=actor_start_timeout, - force_permission_level=force_permission_level, - webhooks=to_client_representations(webhooks), - ) + return await actor_client.start( + run_input=run_input, + content_type=content_type, + build=build, + max_total_charge_usd=max_total_charge_usd, + restart_on_error=restart_on_error, + memory_mbytes=memory_mbytes, + run_timeout=actor_start_timeout, + force_permission_level=force_permission_level, + webhooks=to_client_representations(webhooks), + ) @_ensure_context async def abort( @@ -981,11 +976,10 @@ async def abort( client = self.new_client(token=token) if token else self.apify_client run_client = client.run(run_id) - with map_client_errors(): - if status_message: - await run_client.update(status_message=status_message) + if status_message: + await run_client.update(status_message=status_message) - run = await run_client.abort(gracefully=gracefully) + run = await run_client.abort(gracefully=gracefully) if run is None: raise RuntimeError(f'Failed to abort Actor run with ID "{run_id}".') @@ -1041,10 +1035,6 @@ async def call( Returns: Info about the started Actor run. - - Raises: - ActorRunError: If the run finishes in a terminal failure state (`FAILED` or `ABORTED`). - ActorTimeoutError: If the run finishes in the `TIMED-OUT` state. """ client = self.new_client(token=token) if token else self.apify_client @@ -1058,27 +1048,23 @@ async def call( raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.') actor_client = client.actor(actor_id) - with map_client_errors(): - run = await actor_client.call( - run_input=run_input, - content_type=content_type, - build=build, - max_total_charge_usd=max_total_charge_usd, - restart_on_error=restart_on_error, - memory_mbytes=memory_mbytes, - run_timeout=actor_call_timeout, - force_permission_level=force_permission_level, - webhooks=to_client_representations(webhooks), - wait_duration=wait, - logger=logger, - ) + run = await actor_client.call( + run_input=run_input, + content_type=content_type, + build=build, + max_total_charge_usd=max_total_charge_usd, + restart_on_error=restart_on_error, + memory_mbytes=memory_mbytes, + run_timeout=actor_call_timeout, + force_permission_level=force_permission_level, + webhooks=to_client_representations(webhooks), + wait_duration=wait, + logger=logger, + ) if run is None: raise RuntimeError(f'Failed to call Actor with ID "{actor_id}".') - if run.status in _TERMINAL_RUN_FAILURE_STATUSES: - raise ActorRunError.from_run(run) - return run @_ensure_context @@ -1122,10 +1108,6 @@ async def call_task( Returns: Info about the started Actor run. - - Raises: - ActorRunError: If the run finishes in a terminal failure state (`FAILED` or `ABORTED`). - ActorTimeoutError: If the run finishes in the `TIMED-OUT` state. """ client = self.new_client(token=token) if token else self.apify_client @@ -1139,23 +1121,19 @@ async def call_task( raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.') task_client = client.task(task_id) - with map_client_errors(): - run = await task_client.call( - task_input=task_input, - build=build, - restart_on_error=restart_on_error, - memory_mbytes=memory_mbytes, - run_timeout=task_call_timeout, - webhooks=to_client_representations(webhooks), - wait_duration=wait, - ) + run = await task_client.call( + task_input=task_input, + build=build, + restart_on_error=restart_on_error, + memory_mbytes=memory_mbytes, + run_timeout=task_call_timeout, + webhooks=to_client_representations(webhooks), + wait_duration=wait, + ) if run is None: raise RuntimeError(f'Failed to call Task with ID "{task_id}".') - if run.status in _TERMINAL_RUN_FAILURE_STATUSES: - raise ActorRunError.from_run(run) - return run @_ensure_context @@ -1194,13 +1172,12 @@ async def metamorph( if not self.configuration.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - with map_client_errors(): - await self.apify_client.run(self.configuration.actor_run_id).metamorph( - target_actor_id=target_actor_id, - run_input=run_input, - target_actor_build=target_actor_build, - content_type=content_type, - ) + await self.apify_client.run(self.configuration.actor_run_id).metamorph( + target_actor_id=target_actor_id, + run_input=run_input, + target_actor_build=target_actor_build, + content_type=content_type, + ) if custom_after_sleep: await asyncio.sleep(custom_after_sleep.total_seconds()) @@ -1266,8 +1243,7 @@ async def safe_dispatch(listener: Any, data: Any) -> None: except TimeoutError: self.log.warning('Pre-reboot event listeners did not finish within timeout; proceeding with reboot') - with map_client_errors(): - await self.apify_client.run(self.configuration.actor_run_id).reboot() + await self.apify_client.run(self.configuration.actor_run_id).reboot() except BaseException: # Reset the flag so that a failed or cancelled reboot can be retried. self._is_rebooting = False @@ -1315,12 +1291,11 @@ async def add_webhook(self, webhook: Webhook, *, idempotency_key: str | None = N if idempotency_key is not None: webhook_fields['idempotency_key'] = idempotency_key - with map_client_errors(): - await self.apify_client.webhooks().create( - **webhook_fields, - actor_run_id=self.configuration.actor_run_id, - is_ad_hoc=True, - ) + await self.apify_client.webhooks().create( + **webhook_fields, + actor_run_id=self.configuration.actor_run_id, + is_ad_hoc=True, + ) @_ensure_context async def set_status_message( @@ -1348,11 +1323,10 @@ async def set_status_message( raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') run_client = self.apify_client.run(self.configuration.actor_run_id) - with map_client_errors(): - run = await run_client.update( - status_message=status_message, - is_status_message_terminal=is_terminal, - ) + run = await run_client.update( + status_message=status_message, + is_status_message_terminal=is_terminal, + ) if run is None: raise RuntimeError( diff --git a/src/apify/_charging.py b/src/apify/_charging.py index 2291a04fe..0a23ad4ef 100644 --- a/src/apify/_charging.py +++ b/src/apify/_charging.py @@ -19,7 +19,6 @@ from apify_client._models import PricingPerEvent as ClientPricingPerEvent from apify._utils import ReentrantLock, docs_group, ensure_context -from apify.errors import map_client_errors from apify.log import logger from apify.storages import Dataset @@ -450,8 +449,7 @@ async def charge(self, event_name: str, *, count: int = 1) -> ChargeResult: # the platform handles them automatically based on dataset writes. pass elif event_name in self._pricing_info: - with map_client_errors(): - await self._client.run(self._actor_run_id).charge(event_name, count=charged_count) + await self._client.run(self._actor_run_id).charge(event_name, count=charged_count) elif event_name in self._tier_priced_events: logger.warning( f"Event '{event_name}' is tier-priced and is not chargeable via the pay-per-event API." @@ -574,8 +572,7 @@ async def _fetch_pricing_info(self) -> _FetchedPricingInfoDict: if self._actor_run_id is None: raise RuntimeError('Actor run ID not found even though the Actor is running on Apify') - with map_client_errors(): - run = await self._client.run(self._actor_run_id).get() + run = await self._client.run(self._actor_run_id).get() if run is None: raise RuntimeError('Actor run not found') diff --git a/src/apify/errors.py b/src/apify/errors.py index 85b706128..ed1ddf01a 100644 --- a/src/apify/errors.py +++ b/src/apify/errors.py @@ -1,41 +1,46 @@ from __future__ import annotations -import contextlib -import functools -from typing import TYPE_CHECKING, ParamSpec, TypeVar - -from apify_client.errors import ApifyApiError -from apify_client.errors import ForbiddenError as _ForbiddenError -from apify_client.errors import InvalidRequestError as _InvalidRequestError -from apify_client.errors import RateLimitError as _RateLimitError -from apify_client.errors import ServerError as _ServerError -from apify_client.errors import UnauthorizedError as _UnauthorizedError +from typing import TYPE_CHECKING + +# Re-export the Apify API client's error hierarchy so callers have a single import location for every error the SDK +# can surface. Any operation that talks to the Apify API raises these as-is; the SDK does not wrap them in its own +# types. See https://docs.apify.com/api/client/python for the full client error reference. +from apify_client.errors import ( + ApifyApiError, + ApifyClientError, + ConflictError, + ForbiddenError, + InvalidRequestError, + InvalidResponseBodyError, + NotFoundError, + RateLimitError, + ServerError, + UnauthorizedError, +) from apify._utils import docs_group if TYPE_CHECKING: - from collections.abc import Awaitable, Callable, Coroutine, Iterator - from typing import Any - from apify_client._models import Run -_P = ParamSpec('_P') -_R = TypeVar('_R') - @docs_group('Errors') class ActorError(Exception): - """Base class for all domain-level Apify SDK errors. + """Base class for the Apify SDK's own domain-level errors. - Carries a machine-readable `code` and a `retryable` flag so callers can branch on a failure without reading - the human-readable error message. + These describe outcomes that the Apify API client cannot express on its own, such as a finished Actor run that + ended in a failure state. Errors that originate from the Apify API surface as `apify_client` exceptions (e.g. + `ApifyApiError` and its subclasses), which the SDK re-exports from this module but does not wrap. + + Carries a machine-readable `code` and a `retryable` flag so callers can branch on a failure without parsing the + human-readable error message. """ code: str = 'actor-error' """Stable, machine-readable identifier of the error category.""" retryable: bool = False - """Whether retrying the same operation might succeed (e.g. a transient rate limit or server error).""" + """Whether retrying the same operation might succeed (e.g. an Actor run that timed out).""" def __init__( self, @@ -50,41 +55,14 @@ def __init__( if retryable is not None: self.retryable = retryable - @classmethod - def from_client_error(cls, error: Exception) -> ActorError: - """Map an `apify_client` exception to the matching domain-level error. - - The mapping is driven by the client's typed, HTTP-status-based exceptions. Unmapped client errors (and any - other exception) fall back to a plain `ActorError`. The original exception is not chained automatically; - callers should use `raise ActorError.from_client_error(err) from err`. - - Args: - error: The exception raised by `apify_client`. - - Returns: - The corresponding domain-level error. - """ - if isinstance(error, (_UnauthorizedError, _ForbiddenError)): - return ActorAuthenticationError(str(error)) - - if isinstance(error, _RateLimitError): - return ActorRateLimitError(str(error)) - - if isinstance(error, _ServerError): - return ActorError(str(error), retryable=True) - - if isinstance(error, _InvalidRequestError): - return ActorInputValidationError(str(error)) - - return ActorError(str(error)) - @docs_group('Errors') class ActorRunError(ActorError): - """Raised when an Actor run reaches a terminal failure state (e.g. `FAILED` or `ABORTED`). + """Represents an Actor run that reached a terminal failure state (e.g. `FAILED` or `ABORTED`). - Unlike the HTTP-derived errors, this one is derived from the run itself, so it exposes the run metadata needed - to decide what to do next. + Exposes the run metadata needed to decide what to do next. The SDK does not raise this automatically. `Actor.call` + and `Actor.call_task` return the finished run regardless of its status, mirroring the Apify API client. Build this + error from a finished run with `from_run` when you want a failed run to surface as an exception in your own code. """ code = 'actor-run-failed' @@ -118,79 +96,24 @@ def from_run(cls, run: Run) -> ActorRunError: @docs_group('Errors') class ActorTimeoutError(ActorRunError): - """Raised when an Actor run exceeds its timeout (`TIMED-OUT`). Retrying with a longer timeout may help.""" + """Represents an Actor run that exceeded its timeout (`TIMED-OUT`). Retrying with a longer timeout may help.""" code = 'actor-timed-out' retryable = True -@docs_group('Errors') -class ActorInputValidationError(ActorError, ValueError): - """Raised when input fails validation. - - Subclasses `ValueError` so existing `except ValueError` handlers keep catching it. - """ - - code = 'input-validation-error' - - -@docs_group('Errors') -class ActorChargeLimitExceededError(ActorError): - """Raised when an Actor run hits its configured maximum total charge (`max_total_charge_usd`).""" - - code = 'charge-limit-exceeded' - - -@docs_group('Errors') -class ActorAuthenticationError(ActorError): - """Raised when an API request is unauthorized or forbidden (HTTP 401 / 403).""" - - code = 'authentication-error' - - -@docs_group('Errors') -class ActorRateLimitError(ActorError): - """Raised when the Apify API rate limit is exceeded (HTTP 429). Retryable after a backoff.""" - - code = 'rate-limit-exceeded' - retryable = True - - -@contextlib.contextmanager -def map_client_errors() -> Iterator[None]: - """Translate `apify_client` API errors into domain-level `ActorError`s. - - Wrap any `apify_client` call with this context manager so that an `ApifyApiError` (e.g. an HTTP 401/403/429/5xx - response) surfaces as the matching `ActorError` subclass instead of a raw client exception. The original error - is preserved as the `__cause__` of the raised `ActorError`. - """ - try: - yield - except ApifyApiError as error: - raise ActorError.from_client_error(error) from error - - -def catch_client_errors(func: Callable[_P, Awaitable[_R]]) -> Callable[_P, Coroutine[Any, Any, _R]]: - """Decorate an async function so the `apify_client` errors it raises become domain-level `ActorError`s. - - This is the method-level counterpart of `map_client_errors`, intended for thin wrappers around `apify_client` - calls such as the storage client operations. - """ - - @functools.wraps(func) - async def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> _R: - with map_client_errors(): - return await func(*args, **kwargs) - - return wrapper - - __all__ = [ - 'ActorAuthenticationError', - 'ActorChargeLimitExceededError', 'ActorError', - 'ActorInputValidationError', - 'ActorRateLimitError', 'ActorRunError', 'ActorTimeoutError', + 'ApifyApiError', + 'ApifyClientError', + 'ConflictError', + 'ForbiddenError', + 'InvalidRequestError', + 'InvalidResponseBodyError', + 'NotFoundError', + 'RateLimitError', + 'ServerError', + 'UnauthorizedError', ] diff --git a/src/apify/storage_clients/_apify/_dataset_client.py b/src/apify/storage_clients/_apify/_dataset_client.py index 82f006f7a..de9634fdc 100644 --- a/src/apify/storage_clients/_apify/_dataset_client.py +++ b/src/apify/storage_clients/_apify/_dataset_client.py @@ -12,7 +12,6 @@ from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata from ._api_client_creation import create_storage_api_client -from apify.errors import ActorError, catch_client_errors, map_client_errors from apify.storage_clients._ppe_dataset_mixin import DatasetClientPpeMixin if TYPE_CHECKING: @@ -58,12 +57,11 @@ def __init__( """A lock to ensure that only one operation is performed at a time.""" @override - @catch_client_errors async def get_metadata(self) -> DatasetMetadata: metadata = await self._api_client.get() if metadata is None: - raise ActorError('Failed to retrieve dataset metadata.') + raise ValueError('Failed to retrieve dataset metadata.') return DatasetMetadata( id=metadata.id, @@ -75,7 +73,6 @@ async def get_metadata(self) -> DatasetMetadata: ) @classmethod - @catch_client_errors async def open( cls, *, @@ -135,13 +132,11 @@ async def purge(self) -> None: ) @override - @catch_client_errors async def drop(self) -> None: async with self._lock: await self._api_client.delete() @override - @catch_client_errors async def push_data(self, data: Sequence[Mapping[str, JsonSerializable]] | Mapping[str, JsonSerializable]) -> None: async def payloads_generator(items: Sequence[Mapping[str, JsonSerializable]]) -> AsyncIterator[str]: for index, item in enumerate(items): @@ -160,7 +155,6 @@ async def payloads_generator(items: Sequence[Mapping[str, JsonSerializable]]) -> await self._charge_for_items(count_items=limit) @override - @catch_client_errors async def get_data( self, *, @@ -205,19 +199,18 @@ async def iterate_items( skip_empty: bool = False, skip_hidden: bool = False, ) -> AsyncIterator[dict]: - with map_client_errors(): - async for item in self._api_client.iterate_items( - offset=offset, - limit=limit, - clean=clean, - desc=desc, - fields=fields, - omit=omit, - unwind=unwind, - skip_empty=skip_empty, - skip_hidden=skip_hidden, - ): - yield item + async for item in self._api_client.iterate_items( + offset=offset, + limit=limit, + clean=clean, + desc=desc, + fields=fields, + omit=omit, + unwind=unwind, + skip_empty=skip_empty, + skip_hidden=skip_hidden, + ): + yield item @classmethod async def _check_and_serialize(cls, item: Mapping[str, JsonSerializable], index: int | None = None) -> str: diff --git a/src/apify/storage_clients/_apify/_key_value_store_client.py b/src/apify/storage_clients/_apify/_key_value_store_client.py index 24b39c901..9fdce93c4 100644 --- a/src/apify/storage_clients/_apify/_key_value_store_client.py +++ b/src/apify/storage_clients/_apify/_key_value_store_client.py @@ -11,7 +11,6 @@ from ._api_client_creation import create_storage_api_client from ._models import ApifyKeyValueStoreMetadata -from apify.errors import ActorError, catch_client_errors, map_client_errors if TYPE_CHECKING: from collections.abc import AsyncIterator @@ -43,12 +42,11 @@ def __init__( """A lock to ensure that only one operation is performed at a time.""" @override - @catch_client_errors async def get_metadata(self) -> ApifyKeyValueStoreMetadata: metadata = await self._api_client.get() if metadata is None: - raise ActorError('Failed to retrieve key-value store metadata.') + raise ValueError('Failed to retrieve key-value store metadata.') return ApifyKeyValueStoreMetadata( id=metadata.id, @@ -60,7 +58,6 @@ async def get_metadata(self) -> ApifyKeyValueStoreMetadata: ) @classmethod - @catch_client_errors async def open( cls, *, @@ -113,19 +110,16 @@ async def purge(self) -> None: ) @override - @catch_client_errors async def drop(self) -> None: async with self._lock: await self._api_client.delete() @override - @catch_client_errors async def get_value(self, *, key: str) -> KeyValueStoreRecord | None: response = await self._api_client.get_record(key) return KeyValueStoreRecord.model_validate(response) if response else None @override - @catch_client_errors async def set_value(self, *, key: str, value: Any, content_type: str | None = None) -> None: async with self._lock: await self._api_client.set_record( @@ -135,7 +129,6 @@ async def set_value(self, *, key: str, value: Any, content_type: str | None = No ) @override - @catch_client_errors async def delete_value(self, *, key: str) -> None: async with self._lock: await self._api_client.delete_record(key=key) @@ -149,36 +142,33 @@ async def iterate_keys( ) -> AsyncIterator[KeyValueStoreRecordMetadata]: count = 0 - with map_client_errors(): - while True: - list_key_page = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key) - - for item in list_key_page.items: - record_metadata = KeyValueStoreRecordMetadata( - key=item.key, - size=item.size, - content_type='application/octet-stream', # Content type not available from list_keys - ) - yield record_metadata - count += 1 - - # If we've reached the limit, stop yielding - if limit and count >= limit: - break - - # If we've reached the limit or there are no more pages, exit the loop - if (limit and count >= limit) or not list_key_page.is_truncated: + while True: + list_key_page = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key) + + for item in list_key_page.items: + record_metadata = KeyValueStoreRecordMetadata( + key=item.key, + size=item.size, + content_type='application/octet-stream', # Content type not available from list_keys + ) + yield record_metadata + count += 1 + + # If we've reached the limit, stop yielding + if limit and count >= limit: break - exclusive_start_key = list_key_page.next_exclusive_start_key + # If we've reached the limit or there are no more pages, exit the loop + if (limit and count >= limit) or not list_key_page.is_truncated: + break + + exclusive_start_key = list_key_page.next_exclusive_start_key @override - @catch_client_errors async def record_exists(self, *, key: str) -> bool: return await self._api_client.record_exists(key=key) @override - @catch_client_errors async def get_public_url(self, *, key: str) -> str: """Get a URL for the given key that may be used to publicly access the value in the remote key-value store. diff --git a/src/apify/storage_clients/_apify/_request_queue_client.py b/src/apify/storage_clients/_apify/_request_queue_client.py index 83c0c03da..de6737806 100644 --- a/src/apify/storage_clients/_apify/_request_queue_client.py +++ b/src/apify/storage_clients/_apify/_request_queue_client.py @@ -11,7 +11,6 @@ from ._models import ApifyRequestQueueMetadata, RequestQueueStats from ._request_queue_shared_client import ApifyRequestQueueSharedClient from ._request_queue_single_client import ApifyRequestQueueSingleClient -from apify.errors import ActorError, catch_client_errors if TYPE_CHECKING: from collections.abc import Sequence @@ -67,7 +66,6 @@ def __init__( raise RuntimeError(f"Unsupported access type: {access}. Allowed values are 'single' or 'shared'.") @override - @catch_client_errors async def get_metadata(self) -> ApifyRequestQueueMetadata: """Retrieve current metadata about the request queue. @@ -81,7 +79,7 @@ async def get_metadata(self) -> ApifyRequestQueueMetadata: metadata = await self._api_client.get() if metadata is None: - raise ActorError('Failed to fetch request queue metadata from the API.') + raise ValueError('Failed to fetch request queue metadata from the API.') total_request_count = metadata.total_request_count handled_request_count = metadata.handled_request_count @@ -103,7 +101,6 @@ async def get_metadata(self) -> ApifyRequestQueueMetadata: ) @classmethod - @catch_client_errors async def open( cls, *, @@ -148,7 +145,7 @@ async def open( # Fetch initial metadata from the API. raw_metadata = await api_client.get() if raw_metadata is None: - raise ActorError('Failed to retrieve request queue metadata from the API.') + raise ValueError('Failed to retrieve request queue metadata from the API.') metadata = ApifyRequestQueueMetadata.model_validate(raw_metadata.model_dump(by_alias=True)) return cls( @@ -165,12 +162,10 @@ async def purge(self) -> None: ) @override - @catch_client_errors async def drop(self) -> None: await self._api_client.delete() @override - @catch_client_errors async def add_batch_of_requests( self, requests: Sequence[Request], @@ -180,22 +175,18 @@ async def add_batch_of_requests( return await self._implementation.add_batch_of_requests(requests, forefront=forefront) @override - @catch_client_errors async def fetch_next_request(self) -> Request | None: return await self._implementation.fetch_next_request() @override - @catch_client_errors async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None: return await self._implementation.mark_request_as_handled(request) @override - @catch_client_errors async def get_request(self, unique_key: str) -> Request | None: return await self._implementation.get_request(unique_key) @override - @catch_client_errors async def reclaim_request( self, request: Request, @@ -205,6 +196,5 @@ async def reclaim_request( return await self._implementation.reclaim_request(request, forefront=forefront) @override - @catch_client_errors async def is_empty(self) -> bool: return await self._implementation.is_empty() diff --git a/tests/unit/actor/test_actor_helpers.py b/tests/unit/actor/test_actor_helpers.py index bcd8f15fd..5d943904e 100644 --- a/tests/unit/actor/test_actor_helpers.py +++ b/tests/unit/actor/test_actor_helpers.py @@ -15,7 +15,6 @@ from apify import Actor, Webhook from apify._actor import _ActorType from apify._consts import ApifyEnvVars -from apify.errors import ActorRunError, ActorTimeoutError if TYPE_CHECKING: from ..conftest import ApifyClientAsyncPatcher @@ -107,59 +106,6 @@ async def test_call_actor_task(apify_client_async_patcher: ApifyClientAsyncPatch assert apify_client_async_patcher.calls['task']['call'][0][0][0].resource_id == task_id -async def test_call_raises_on_failed_run( - apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run -) -> None: - """`Actor.call` raises `ActorRunError` carrying the run metadata when the awaited run ends as `FAILED`.""" - failed_run = fake_actor_run.model_copy(update={'status': 'FAILED'}) - apify_client_async_patcher.patch('actor', 'call', return_value=failed_run) - - async with Actor: - with pytest.raises(ActorRunError) as exc_info: - await Actor.call('some-actor-id') - - assert type(exc_info.value) is ActorRunError - assert exc_info.value.status == 'FAILED' - assert exc_info.value.run_id == failed_run.id - - -async def test_call_raises_timeout_on_timed_out_run( - apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run -) -> None: - """`Actor.call` raises the more specific `ActorTimeoutError` for a `TIMED-OUT` run.""" - timed_out_run = fake_actor_run.model_copy(update={'status': 'TIMED-OUT'}) - apify_client_async_patcher.patch('actor', 'call', return_value=timed_out_run) - - async with Actor: - with pytest.raises(ActorTimeoutError): - await Actor.call('some-actor-id') - - -async def test_call_returns_succeeded_run( - apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run -) -> None: - """`Actor.call` returns the run unchanged when it finishes with a non-failure status.""" - succeeded_run = fake_actor_run.model_copy(update={'status': 'SUCCEEDED'}) - apify_client_async_patcher.patch('actor', 'call', return_value=succeeded_run) - - async with Actor: - run = await Actor.call('some-actor-id') - - assert run.status == 'SUCCEEDED' - - -async def test_call_task_raises_on_failed_run( - apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run -) -> None: - """`Actor.call_task` raises `ActorRunError` when the awaited run ends in a terminal failure state.""" - failed_run = fake_actor_run.model_copy(update={'status': 'FAILED'}) - apify_client_async_patcher.patch('task', 'call', return_value=failed_run) - - async with Actor: - with pytest.raises(ActorRunError): - await Actor.call_task('some-task-id') - - async def test_start_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run) -> None: apify_client_async_patcher.patch('actor', 'start', return_value=fake_actor_run) actor_id = 'some-id' diff --git a/tests/unit/actor/test_configuration.py b/tests/unit/actor/test_configuration.py index 0fd686dda..486cbe07c 100644 --- a/tests/unit/actor/test_configuration.py +++ b/tests/unit/actor/test_configuration.py @@ -392,3 +392,21 @@ def test_actor_storage_json_env_var(monkeypatch: pytest.MonkeyPatch) -> None: assert config.actor_storages['datasets'] == datasets assert config.actor_storages['request_queues'] == request_queues assert config.actor_storages['key_value_stores'] == key_value_stores + + +@pytest.mark.parametrize( + ('env_var', 'attr', 'expected'), + [ + ('APIFY_TIMEOUT_AT', 'timeout_at', None), + ('ACTOR_MAX_PAID_DATASET_ITEMS', 'max_paid_dataset_items', None), + ('ACTOR_MAX_TOTAL_CHARGE_USD', 'max_total_charge_usd', None), + ('APIFY_USER_IS_PAYING', 'user_is_paying', False), + ], +) +def test_typed_env_var_empty_string_falls_back_to_default( + monkeypatch: pytest.MonkeyPatch, env_var: str, attr: str, expected: object +) -> None: + """Platform may set a typed env var to '' instead of leaving it unset; that must not crash `Actor.init()`.""" + monkeypatch.setenv(env_var, '') + config = ApifyConfiguration() + assert getattr(config, attr) == expected diff --git a/tests/unit/test_errors.py b/tests/unit/test_errors.py index c3c5bd6e8..8e8ea5d7a 100644 --- a/tests/unit/test_errors.py +++ b/tests/unit/test_errors.py @@ -1,46 +1,11 @@ from __future__ import annotations from datetime import UTC, datetime -from typing import Any, cast - -import pytest +import apify_client.errors as client_errors from apify_client._models import Run -from apify_client.errors import ( - ApifyApiError, - ConflictError, - ForbiddenError, - InvalidRequestError, - NotFoundError, - ServerError, - UnauthorizedError, -) -from apify_client.errors import RateLimitError as ClientRateLimitError - -from apify.errors import ( - ActorAuthenticationError, - ActorChargeLimitExceededError, - ActorError, - ActorInputValidationError, - ActorRateLimitError, - ActorRunError, - ActorTimeoutError, -) - - -class _FakeResponse: - """Minimal stand-in for `apify_client`'s HTTP response, enough to build its API errors.""" - - def __init__(self, status_code: int) -> None: - self.status_code = status_code - self.text = 'error text' - - def json(self) -> dict[str, Any]: - return {'error': {'message': 'boom', 'type': 'some-error-type'}} - -def _client_error(error_cls: type[ApifyApiError], status_code: int) -> ApifyApiError: - return error_cls(cast('Any', _FakeResponse(status_code)), 1) +from apify.errors import ActorError, ActorRunError, ActorTimeoutError def _make_run(*, status: str, exit_code: int | None = None, status_message: str | None = None) -> Run: @@ -67,6 +32,9 @@ def _make_run(*, status: str, exit_code: int | None = None, status_message: str ) +# Base error. + + def test_actor_error_defaults() -> None: error = ActorError('something went wrong') assert error.code == 'actor-error' @@ -83,33 +51,19 @@ def test_actor_error_overrides_are_instance_scoped() -> None: assert ActorError.retryable is False -@pytest.mark.parametrize( - ('error_cls', 'expected_code', 'expected_retryable'), - [ - (ActorRateLimitError, 'rate-limit-exceeded', True), - (ActorTimeoutError, 'actor-timed-out', True), - (ActorAuthenticationError, 'authentication-error', False), - (ActorChargeLimitExceededError, 'charge-limit-exceeded', False), - (ActorInputValidationError, 'input-validation-error', False), - (ActorRunError, 'actor-run-failed', False), - ], -) -def test_subclass_codes_and_retryable( - error_cls: type[ActorError], expected_code: str, *, expected_retryable: bool -) -> None: - assert error_cls.code == expected_code - assert error_cls.retryable is expected_retryable - assert issubclass(error_cls, ActorError) +# Run errors. -def test_input_validation_error_is_value_error() -> None: - """`except ValueError` must still catch `ActorInputValidationError`.""" - with pytest.raises(ValueError, match='bad input'): - raise ActorInputValidationError('bad input') +def test_actor_run_error_is_actor_error() -> None: + assert issubclass(ActorRunError, ActorError) + assert ActorRunError.code == 'actor-run-failed' + assert ActorRunError.retryable is False def test_actor_timeout_error_is_actor_run_error() -> None: assert issubclass(ActorTimeoutError, ActorRunError) + assert ActorTimeoutError.code == 'actor-timed-out' + assert ActorTimeoutError.retryable is True def test_actor_run_error_carries_run_metadata() -> None: @@ -138,31 +92,16 @@ def test_actor_run_error_from_run_timed_out() -> None: assert error.code == 'actor-timed-out' -@pytest.mark.parametrize( - ('client_error', 'expected_cls', 'expected_retryable'), - [ - (_client_error(UnauthorizedError, 401), ActorAuthenticationError, False), - (_client_error(ForbiddenError, 403), ActorAuthenticationError, False), - (_client_error(ClientRateLimitError, 429), ActorRateLimitError, True), - (_client_error(ServerError, 500), ActorError, True), - (_client_error(InvalidRequestError, 400), ActorInputValidationError, False), - (_client_error(NotFoundError, 404), ActorError, False), - (_client_error(ConflictError, 409), ActorError, False), - ], -) -def test_from_client_error_mapping( - client_error: ApifyApiError, - expected_cls: type[ActorError], - *, - expected_retryable: bool, -) -> None: - mapped = ActorError.from_client_error(client_error) - assert type(mapped) is expected_cls - assert mapped.retryable is expected_retryable - - -def test_from_client_error_unknown_exception_falls_back() -> None: - mapped = ActorError.from_client_error(RuntimeError('not a client error')) - assert type(mapped) is ActorError - assert mapped.retryable is False - assert 'not a client error' in str(mapped) +# Re-exported API client errors. + + +def test_client_errors_are_re_exported() -> None: + """`apify.errors` re-exports the API client error hierarchy so callers have a single import location.""" + from apify.errors import ApifyApiError, ApifyClientError, NotFoundError, RateLimitError + + assert ApifyApiError is client_errors.ApifyApiError + assert ApifyClientError is client_errors.ApifyClientError + assert NotFoundError is client_errors.NotFoundError + assert RateLimitError is client_errors.RateLimitError + # The re-exported API errors are independent of the SDK's own `ActorError` tree. + assert not issubclass(client_errors.ApifyApiError, ActorError) From d921ed51a86916a47aa814728fd6096ab17a9e98 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 19 Jun 2026 15:46:08 +0200 Subject: [PATCH 06/10] test: drop section-divider comments from test_errors --- tests/unit/test_errors.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/unit/test_errors.py b/tests/unit/test_errors.py index 8e8ea5d7a..781bdd410 100644 --- a/tests/unit/test_errors.py +++ b/tests/unit/test_errors.py @@ -32,9 +32,6 @@ def _make_run(*, status: str, exit_code: int | None = None, status_message: str ) -# Base error. - - def test_actor_error_defaults() -> None: error = ActorError('something went wrong') assert error.code == 'actor-error' @@ -51,9 +48,6 @@ def test_actor_error_overrides_are_instance_scoped() -> None: assert ActorError.retryable is False -# Run errors. - - def test_actor_run_error_is_actor_error() -> None: assert issubclass(ActorRunError, ActorError) assert ActorRunError.code == 'actor-run-failed' @@ -92,9 +86,6 @@ def test_actor_run_error_from_run_timed_out() -> None: assert error.code == 'actor-timed-out' -# Re-exported API client errors. - - def test_client_errors_are_re_exported() -> None: """`apify.errors` re-exports the API client error hierarchy so callers have a single import location.""" from apify.errors import ApifyApiError, ApifyClientError, NotFoundError, RateLimitError From d12c0e04bbeb23e7233edd4549896eac11883d2b Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 19 Jun 2026 16:48:32 +0200 Subject: [PATCH 07/10] refactor: drop SDK error classes, keep apify.errors as a pure re-export --- docs/02_concepts/13_exceptions.mdx | 33 +++--- .../02_concepts/code/13_handle_call_errors.py | 14 ++- docs/02_concepts/code/13_retry_timed_out.py | 15 +-- src/apify/_utils.py | 1 - src/apify/errors.py | 95 +--------------- tests/unit/test_errors.py | 106 +++--------------- website/docusaurus.config.js | 1 - 7 files changed, 51 insertions(+), 214 deletions(-) diff --git a/docs/02_concepts/13_exceptions.mdx b/docs/02_concepts/13_exceptions.mdx index 96e8096bf..5a43e86f7 100644 --- a/docs/02_concepts/13_exceptions.mdx +++ b/docs/02_concepts/13_exceptions.mdx @@ -9,11 +9,11 @@ import RetryTimedOutSource from '!!raw-loader!roa-loader!./code/13_retry_timed_o import ApiLink from '@theme/ApiLink'; import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; -When an Actor runs, failures surface from two places. The [Apify API client](https://docs.apify.com/api/client/python) raises typed exceptions for failed API requests. The SDK adds a small set of its own errors for outcomes the client cannot express, such as a sub-Actor run that finishes in a failure state. Both are available from the `apify.errors` module. +When you run an Actor, exceptions come from a few layers: the Apify API client for failed API requests, the Apify SDK for misuse and invalid input, and the libraries you build on, such as Crawlee. This page maps the ones you are most likely to meet and how to approach each. ## Errors from the Apify API -Every SDK operation that talks to the Apify API can raise `ApifyApiError`. This includes `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The SDK raises these client exceptions as-is. It does not wrap them, so you keep the HTTP status code, the error type, and the response data on the exception. +Every SDK operation that talks to the Apify API can raise `ApifyApiError`. This includes `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The SDK raises these client exceptions as-is, so you keep the HTTP status code, the error type, and the response data on the exception. `ApifyApiError` dispatches to a subclass based on the HTTP status code: @@ -24,34 +24,39 @@ Every SDK operation that talks to the Apify API can raise `ApifyApiError`. This - `ServerError` for any 5xx response. - `InvalidRequestError` (400) when the API rejects the request as malformed. -The client retries rate-limited and server errors on its own, so you only see `RateLimitError` or `ServerError` once those retries are exhausted. For convenience, `apify.errors` re-exports the whole client error hierarchy, so you can import everything from one place: +The client retries rate-limited and server errors on its own, so you only see `RateLimitError` or `ServerError` once those retries are exhausted. The `apify.errors` module re-exports the whole client error hierarchy, so you can import everything from one place: ```python from apify.errors import ApifyApiError, NotFoundError, RateLimitError ``` -## Actor run failures - -`Actor.call` and `Actor.call_task` wait for the run to finish and return it, whatever its final status. A finished run can be `SUCCEEDED`, `FAILED`, `ABORTED`, or `TIMED-OUT`, so check `run.status` before you rely on the run's output. - -To turn a failed run into an exception, build one from the run with `ActorRunError``.from_run()`. It returns an `ActorTimeoutError` for a timed-out run, and an `ActorRunError` otherwise. Both carry the run metadata you need to decide what to do next: `run_id`, `status`, `exit_code`, and `status_message`. Every SDK error also derives from `ActorError` and exposes a stable `code` and a `retryable` flag. - -## Handling errors - -Wrap the call to catch an API failure, then inspect the finished run and escalate it if needed: +Catch `ApifyApiError` to handle any API failure in one place, then branch on the subclass or the HTTP `status_code`. To react to a specific failure, catch its subclass first: {HandleCallErrorsSource} -## Retrying a timed-out run +## Misuse and invalid input + +The SDK raises standard Python exceptions when it is used incorrectly or given invalid input. These point to a bug or a bad argument in your code, so the fix is to correct the call rather than to catch the exception. -A timed-out run is the one failure where retrying can help, as long as you give it more time. `ActorTimeoutError` sets `retryable` to `True` to mark this. Retry with a longer timeout rather than the same one: +- `RuntimeError` when an `Actor` method is used outside the `async with Actor:` block, either before initialization or after exit, or when the Actor is initialized twice. +- `ValueError` for an invalid argument, such as a malformed `timeout`, an invalid proxy configuration, charging an automatically charged event by hand, or pushing data that is not JSON-serializable or is over the size limit. +- `TypeError` for an argument of the wrong type. +- `ConnectionError` when `Actor.create_proxy_configuration` verifies Apify Proxy access and the proxy reports that you have none. + +## Run failures + +`Actor.call` and `Actor.call_task` wait for the run to finish and return it, whatever its final status. A finished run can be `SUCCEEDED`, `FAILED`, `ABORTED`, or `TIMED-OUT`, so check `run.status` before you rely on the run's output. A timed-out run is the one case where retrying can help, as long as you give it more time: {RetryTimedOutSource} +## Errors while crawling + +If your Actor runs a [Crawlee](https://crawlee.dev/python) crawler, failures inside request handlers surface as Crawlee exceptions, and Crawlee manages retries and session rotation around them. For details, see the [Crawlee documentation](https://crawlee.dev/python). + ## The pay-per-event charge limit Reaching the pay-per-event charge limit does not raise an error. The SDK caps charging and data pushing instead, and your Actor keeps running. To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult` returned by `Actor.charge` or `Actor.push_data`. For details, see [Pay-per-event monetization](./pay-per-event). diff --git a/docs/02_concepts/code/13_handle_call_errors.py b/docs/02_concepts/code/13_handle_call_errors.py index 45ab7c9b1..73acc34dc 100644 --- a/docs/02_concepts/code/13_handle_call_errors.py +++ b/docs/02_concepts/code/13_handle_call_errors.py @@ -1,22 +1,26 @@ import asyncio from apify import Actor -from apify.errors import ActorRunError, ApifyApiError +from apify.errors import ApifyApiError, NotFoundError async def main() -> None: async with Actor: try: run = await Actor.call('apify/web-scraper', run_input={'startUrls': []}) + except NotFoundError: + # Catch a specific subclass first. + Actor.log.error('The Actor to call does not exist.') + return except ApifyApiError as exc: - # The Apify API rejected the request, e.g. the Actor does not exist or - # the token is invalid. The HTTP status code is on the exception. - Actor.log.error(f'Could not start the Actor: {exc} (HTTP {exc.status_code}).') + # Any other API failure, e.g. an invalid token or a server error. + Actor.log.error(f'Calling the Actor failed: {exc} (HTTP {exc.status_code}).') return # `Actor.call` returns the finished run whatever its status, so check it. if run.status != 'SUCCEEDED': - raise ActorRunError.from_run(run) + Actor.log.error(f'Run {run.id} ended with status {run.status}.') + return Actor.log.info(f'Run {run.id} finished successfully.') diff --git a/docs/02_concepts/code/13_retry_timed_out.py b/docs/02_concepts/code/13_retry_timed_out.py index f9510a446..419a5c21e 100644 --- a/docs/02_concepts/code/13_retry_timed_out.py +++ b/docs/02_concepts/code/13_retry_timed_out.py @@ -2,7 +2,6 @@ from datetime import timedelta from apify import Actor -from apify.errors import ActorRunError, ActorTimeoutError async def main() -> None: @@ -13,18 +12,12 @@ async def main() -> None: for attempt in range(1, max_attempts + 1): run = await Actor.call('apify/web-scraper', timeout=timeout) - if run.status == 'SUCCEEDED': - Actor.log.info(f'Run {run.id} finished.') + if run.status != 'TIMED-OUT' or attempt == max_attempts: + Actor.log.info(f'Run {run.id} ended with status {run.status}.') break - # Build a typed error from the finished run so we can branch on it. - error = ActorRunError.from_run(run) - if isinstance(error, ActorTimeoutError) and attempt < max_attempts: - timeout *= 2 - Actor.log.warning(f'Timed out, retrying with timeout {timeout}.') - continue - - raise error + timeout *= 2 + Actor.log.warning(f'Timed out, retrying with timeout {timeout}.') if __name__ == '__main__': diff --git a/src/apify/_utils.py b/src/apify/_utils.py index 097795e83..8469ae97b 100644 --- a/src/apify/_utils.py +++ b/src/apify/_utils.py @@ -74,7 +74,6 @@ def is_running_in_ipython() -> bool: 'Actor', 'Charging', 'Configuration', - 'Errors', 'Event data', 'Event managers', 'Events', diff --git a/src/apify/errors.py b/src/apify/errors.py index ed1ddf01a..6bf8db7d5 100644 --- a/src/apify/errors.py +++ b/src/apify/errors.py @@ -1,10 +1,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING - -# Re-export the Apify API client's error hierarchy so callers have a single import location for every error the SDK -# can surface. Any operation that talks to the Apify API raises these as-is; the SDK does not wrap them in its own -# types. See https://docs.apify.com/api/client/python for the full client error reference. +# `apify.errors` re-exports the Apify API client's error hierarchy so callers have a single import location for every +# error raised by an operation that talks to the Apify API. The SDK raises these client exceptions as-is and does not +# wrap them in its own types. See https://docs.apify.com/api/client/python for the full client error reference. from apify_client.errors import ( ApifyApiError, ApifyClientError, @@ -18,94 +16,7 @@ UnauthorizedError, ) -from apify._utils import docs_group - -if TYPE_CHECKING: - from apify_client._models import Run - - -@docs_group('Errors') -class ActorError(Exception): - """Base class for the Apify SDK's own domain-level errors. - - These describe outcomes that the Apify API client cannot express on its own, such as a finished Actor run that - ended in a failure state. Errors that originate from the Apify API surface as `apify_client` exceptions (e.g. - `ApifyApiError` and its subclasses), which the SDK re-exports from this module but does not wrap. - - Carries a machine-readable `code` and a `retryable` flag so callers can branch on a failure without parsing the - human-readable error message. - """ - - code: str = 'actor-error' - """Stable, machine-readable identifier of the error category.""" - - retryable: bool = False - """Whether retrying the same operation might succeed (e.g. an Actor run that timed out).""" - - def __init__( - self, - message: str | None = None, - *, - code: str | None = None, - retryable: bool | None = None, - ) -> None: - super().__init__(message) - if code is not None: - self.code = code - if retryable is not None: - self.retryable = retryable - - -@docs_group('Errors') -class ActorRunError(ActorError): - """Represents an Actor run that reached a terminal failure state (e.g. `FAILED` or `ABORTED`). - - Exposes the run metadata needed to decide what to do next. The SDK does not raise this automatically. `Actor.call` - and `Actor.call_task` return the finished run regardless of its status, mirroring the Apify API client. Build this - error from a finished run with `from_run` when you want a failed run to surface as an exception in your own code. - """ - - code = 'actor-run-failed' - - def __init__(self, run: Run) -> None: - self.run_id = run.id - self.status = run.status - self.exit_code = run.exit_code - self.status_message = run.status_message - - message = f'Actor run {run.id!r} ended with status {run.status!r}' - if run.status_message: - message = f'{message}: {run.status_message}' - - super().__init__(message) - - @classmethod - def from_run(cls, run: Run) -> ActorRunError: - """Build the most specific run error for a terminal Actor run. - - Args: - run: The terminal Actor run. - - Returns: - An `ActorTimeoutError` for a timed-out run, otherwise an `ActorRunError`. - """ - if run.status == 'TIMED-OUT': - return ActorTimeoutError(run) - return ActorRunError(run) - - -@docs_group('Errors') -class ActorTimeoutError(ActorRunError): - """Represents an Actor run that exceeded its timeout (`TIMED-OUT`). Retrying with a longer timeout may help.""" - - code = 'actor-timed-out' - retryable = True - - __all__ = [ - 'ActorError', - 'ActorRunError', - 'ActorTimeoutError', 'ApifyApiError', 'ApifyClientError', 'ConflictError', diff --git a/tests/unit/test_errors.py b/tests/unit/test_errors.py index 781bdd410..550b4a893 100644 --- a/tests/unit/test_errors.py +++ b/tests/unit/test_errors.py @@ -1,98 +1,24 @@ from __future__ import annotations -from datetime import UTC, datetime - import apify_client.errors as client_errors -from apify_client._models import Run - -from apify.errors import ActorError, ActorRunError, ActorTimeoutError - - -def _make_run(*, status: str, exit_code: int | None = None, status_message: str | None = None) -> Run: - return Run.model_validate( - { - 'id': 'run123', - 'actId': 'act123', - 'userId': 'user123', - 'startedAt': datetime.now(UTC).isoformat(), - 'status': status, - 'statusMessage': status_message, - 'exitCode': exit_code, - 'meta': {'origin': 'DEVELOPMENT'}, - 'buildId': 'build123', - 'defaultDatasetId': 'ds123', - 'defaultKeyValueStoreId': 'kvs123', - 'defaultRequestQueueId': 'rq123', - 'containerUrl': 'https://container', - 'buildNumber': '0.0.1', - 'generalAccess': 'RESTRICTED', - 'stats': {'restartCount': 0, 'resurrectCount': 0, 'computeUnits': 1}, - 'options': {'build': 'latest', 'timeoutSecs': 4, 'memoryMbytes': 1024, 'diskMbytes': 1024}, - } - ) - - -def test_actor_error_defaults() -> None: - error = ActorError('something went wrong') - assert error.code == 'actor-error' - assert error.retryable is False - assert str(error) == 'something went wrong' - - -def test_actor_error_overrides_are_instance_scoped() -> None: - error = ActorError('boom', code='custom', retryable=True) - assert error.code == 'custom' - assert error.retryable is True - # Overriding on an instance must not leak to the class default. - assert ActorError.code == 'actor-error' - assert ActorError.retryable is False - -def test_actor_run_error_is_actor_error() -> None: - assert issubclass(ActorRunError, ActorError) - assert ActorRunError.code == 'actor-run-failed' - assert ActorRunError.retryable is False - - -def test_actor_timeout_error_is_actor_run_error() -> None: - assert issubclass(ActorTimeoutError, ActorRunError) - assert ActorTimeoutError.code == 'actor-timed-out' - assert ActorTimeoutError.retryable is True - - -def test_actor_run_error_carries_run_metadata() -> None: - run = _make_run(status='FAILED', exit_code=1, status_message='Actor crashed') - error = ActorRunError(run) - assert error.run_id == 'run123' - assert error.status == 'FAILED' - assert error.exit_code == 1 - assert error.status_message == 'Actor crashed' - assert error.retryable is False - assert 'run123' in str(error) - assert 'Actor crashed' in str(error) - - -def test_actor_run_error_from_run_failed() -> None: - error = ActorRunError.from_run(_make_run(status='FAILED')) - assert type(error) is ActorRunError - assert not error.retryable - - -def test_actor_run_error_from_run_timed_out() -> None: - error = ActorRunError.from_run(_make_run(status='TIMED-OUT')) - assert isinstance(error, ActorTimeoutError) - assert error.retryable is True - assert error.run_id == 'run123' - assert error.code == 'actor-timed-out' +import apify.errors as sdk_errors def test_client_errors_are_re_exported() -> None: """`apify.errors` re-exports the API client error hierarchy so callers have a single import location.""" - from apify.errors import ApifyApiError, ApifyClientError, NotFoundError, RateLimitError - - assert ApifyApiError is client_errors.ApifyApiError - assert ApifyClientError is client_errors.ApifyClientError - assert NotFoundError is client_errors.NotFoundError - assert RateLimitError is client_errors.RateLimitError - # The re-exported API errors are independent of the SDK's own `ActorError` tree. - assert not issubclass(client_errors.ApifyApiError, ActorError) + names = [ + 'ApifyApiError', + 'ApifyClientError', + 'ConflictError', + 'ForbiddenError', + 'InvalidRequestError', + 'InvalidResponseBodyError', + 'NotFoundError', + 'RateLimitError', + 'ServerError', + 'UnauthorizedError', + ] + assert set(sdk_errors.__all__) == set(names) + for name in names: + assert getattr(sdk_errors, name) is getattr(client_errors, name) diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index d593892f8..d6ef5fd68 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -9,7 +9,6 @@ const GROUP_ORDER = [ 'Actor', 'Charging', 'Configuration', - 'Errors', 'Event data', 'Event managers', 'Events', From 67962f0c7228e2345b9a2f0595f38cec6de71769 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 19 Jun 2026 17:37:53 +0200 Subject: [PATCH 08/10] docs: cross-link exception types in the error-handling page --- docs/02_concepts/13_exceptions.mdx | 34 +++++++++++++----------- src/apify/_utils.py | 1 + src/apify/errors.py | 10 ++++--- website/docusaurus.config.js | 42 ++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/docs/02_concepts/13_exceptions.mdx b/docs/02_concepts/13_exceptions.mdx index 5a43e86f7..954dd0ba6 100644 --- a/docs/02_concepts/13_exceptions.mdx +++ b/docs/02_concepts/13_exceptions.mdx @@ -13,24 +13,24 @@ When you run an Actor, exceptions come from a few layers: the Apify API client f ## Errors from the Apify API -Every SDK operation that talks to the Apify API can raise `ApifyApiError`. This includes `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The SDK raises these client exceptions as-is, so you keep the HTTP status code, the error type, and the response data on the exception. +Every SDK operation that talks to the Apify API can raise `ApifyApiError`. This includes `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The SDK raises these client exceptions as-is, so you keep the HTTP status code, the error type, and the response data on the exception. -`ApifyApiError` dispatches to a subclass based on the HTTP status code: +`ApifyApiError` dispatches to a subclass based on the HTTP status code: -- `UnauthorizedError` (401) and `ForbiddenError` (403) for an unauthorized or forbidden request. -- `NotFoundError` (404) when the Actor, run, or storage does not exist. -- `ConflictError` (409) for a conflicting request. -- `RateLimitError` (429) when the API rate limit is hit. -- `ServerError` for any 5xx response. -- `InvalidRequestError` (400) when the API rejects the request as malformed. +- `UnauthorizedError` (401) and `ForbiddenError` (403) for an unauthorized or forbidden request. +- `NotFoundError` (404) when the Actor, run, or storage does not exist. +- `ConflictError` (409) for a conflicting request. +- `RateLimitError` (429) when the API rate limit is hit. +- `ServerError` for any 5xx response. +- `InvalidRequestError` (400) when the API rejects the request as malformed. -The client retries rate-limited and server errors on its own, so you only see `RateLimitError` or `ServerError` once those retries are exhausted. The `apify.errors` module re-exports the whole client error hierarchy, so you can import everything from one place: +The client retries rate-limited and server errors on its own, so you only see `RateLimitError` or `ServerError` once those retries are exhausted. The `apify.errors` module re-exports the whole client error hierarchy, so you can import everything from one place: ```python from apify.errors import ApifyApiError, NotFoundError, RateLimitError ``` -Catch `ApifyApiError` to handle any API failure in one place, then branch on the subclass or the HTTP `status_code`. To react to a specific failure, catch its subclass first: +Catch `ApifyApiError` to handle any API failure in one place, then branch on the subclass or the HTTP `status_code`. To react to a specific failure, catch its subclass first: {HandleCallErrorsSource} @@ -40,10 +40,10 @@ Catch `ApifyApiError` to handle any API failure in one place, then branch on the The SDK raises standard Python exceptions when it is used incorrectly or given invalid input. These point to a bug or a bad argument in your code, so the fix is to correct the call rather than to catch the exception. -- `RuntimeError` when an `Actor` method is used outside the `async with Actor:` block, either before initialization or after exit, or when the Actor is initialized twice. -- `ValueError` for an invalid argument, such as a malformed `timeout`, an invalid proxy configuration, charging an automatically charged event by hand, or pushing data that is not JSON-serializable or is over the size limit. -- `TypeError` for an argument of the wrong type. -- `ConnectionError` when `Actor.create_proxy_configuration` verifies Apify Proxy access and the proxy reports that you have none. +- [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError) when an `Actor` method is used outside the `async with Actor:` block, either before initialization or after exit, or when the Actor is initialized twice. +- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) for an invalid argument, such as a malformed `timeout`, an invalid proxy configuration, charging an automatically charged event by hand, or pushing data that is not JSON-serializable or is over the size limit. +- [`TypeError`](https://docs.python.org/3/library/exceptions.html#TypeError) for an argument of the wrong type. +- [`ConnectionError`](https://docs.python.org/3/library/exceptions.html#ConnectionError) when `Actor.create_proxy_configuration` verifies Apify Proxy access and the proxy reports that you have none. ## Run failures @@ -59,4 +59,8 @@ If your Actor runs a [Crawlee](https://crawlee.dev/python) crawler, failures ins ## The pay-per-event charge limit -Reaching the pay-per-event charge limit does not raise an error. The SDK caps charging and data pushing instead, and your Actor keeps running. To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult` returned by `Actor.charge` or `Actor.push_data`. For details, see [Pay-per-event monetization](./pay-per-event). +Reaching the pay-per-event charge limit does not raise an error. The SDK caps charging and data pushing instead, and your Actor keeps running. To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult` returned by `Actor.charge` or `Actor.push_data`. For details, see [Pay-per-event monetization](./pay-per-event). + +## Conclusion + +Most failures you handle at runtime are `ApifyApiError` from the API client. Catch it to cover any API failure, and reach for a subclass or the HTTP `status_code` when you need finer control. The standard [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError), [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError), and [`TypeError`](https://docs.python.org/3/library/exceptions.html#TypeError) signal a bug or bad input, so correct the call rather than catch them. After `Actor.call`, check `run.status` to react to a failed run, and let Crawlee handle the errors raised inside a crawler. diff --git a/src/apify/_utils.py b/src/apify/_utils.py index 8469ae97b..097795e83 100644 --- a/src/apify/_utils.py +++ b/src/apify/_utils.py @@ -74,6 +74,7 @@ def is_running_in_ipython() -> bool: 'Actor', 'Charging', 'Configuration', + 'Errors', 'Event data', 'Event managers', 'Events', diff --git a/src/apify/errors.py b/src/apify/errors.py index 6bf8db7d5..23441bea5 100644 --- a/src/apify/errors.py +++ b/src/apify/errors.py @@ -1,8 +1,12 @@ +"""`apify.errors` re-exports the Apify API client's error hierarchy. + +Callers get a single import location for every error raised by an operation that talks to the Apify API. The SDK +raises these client exceptions as-is and does not wrap them in its own types. See +https://docs.apify.com/api/client/python for the full client error reference. +""" + from __future__ import annotations -# `apify.errors` re-exports the Apify API client's error hierarchy so callers have a single import location for every -# error raised by an operation that talks to the Apify API. The SDK raises these client exceptions as-is and does not -# wrap them in its own types. See https://docs.apify.com/api/client/python for the full client error reference. from apify_client.errors import ( ApifyApiError, ApifyClientError, diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index d6ef5fd68..32c8be0e8 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -9,6 +9,7 @@ const GROUP_ORDER = [ 'Actor', 'Charging', 'Configuration', + 'Errors', 'Event data', 'Event managers', 'Events', @@ -149,6 +150,47 @@ module.exports = { moduleShortcutsPath: join(__dirname, '/module_shortcuts.json'), }, reexports: [ + // Errors + { + url: 'https://docs.apify.com/api/client/python/reference/class/ApifyApiError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/ApifyClientError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/ConflictError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/ForbiddenError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/InvalidRequestError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/InvalidResponseBodyError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/NotFoundError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/RateLimitError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/ServerError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/UnauthorizedError', + group: 'Errors', + }, // Storages { url: 'https://crawlee.dev/python/api/class/Storage', From 773a242addb8e273098d664a95bf539ce8e239ae Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 19 Jun 2026 17:45:29 +0200 Subject: [PATCH 09/10] docs: expand charge-limit details and reorder error-handling sections --- docs/02_concepts/13_exceptions.mdx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/02_concepts/13_exceptions.mdx b/docs/02_concepts/13_exceptions.mdx index 954dd0ba6..1760b9ac3 100644 --- a/docs/02_concepts/13_exceptions.mdx +++ b/docs/02_concepts/13_exceptions.mdx @@ -53,13 +53,13 @@ The SDK raises standard Python exceptions when it is used incorrectly or given i {RetryTimedOutSource} -## Errors while crawling +## The pay-per-event charge limit -If your Actor runs a [Crawlee](https://crawlee.dev/python) crawler, failures inside request handlers surface as Crawlee exceptions, and Crawlee manages retries and session rotation around them. For details, see the [Crawlee documentation](https://crawlee.dev/python). +Reaching the pay-per-event charge limit does not raise an error. The SDK caps charging and data pushing instead, and your Actor keeps running. When a single `Actor.charge` call would cross the limit, only the part that fits within the budget is billed, and `charged_count` on the returned `ChargeResult` reports how many events went through. `Actor.push_data` behaves the same way when given a `charged_event_name`, writing only the items that fit within the budget. To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult`. It is a return value rather than an exception, so you can read it in a tight charging loop and stop your work once the budget runs out. For details, see [Pay-per-event monetization](./pay-per-event). -## The pay-per-event charge limit +## Errors while crawling -Reaching the pay-per-event charge limit does not raise an error. The SDK caps charging and data pushing instead, and your Actor keeps running. To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult` returned by `Actor.charge` or `Actor.push_data`. For details, see [Pay-per-event monetization](./pay-per-event). +If your Actor runs a [Crawlee](https://crawlee.dev/python) crawler, failures inside request handlers surface as Crawlee exceptions, and Crawlee handles the retries and session rotation around them, so a single failing request does not stop the crawl. API calls you make from inside a handler still raise `ApifyApiError`, so handle those as in the first section. For details, see the [Crawlee documentation](https://crawlee.dev/python). ## Conclusion From 0ed51096ba52f9986faf511c15b4a57c6c529f5d Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Mon, 22 Jun 2026 10:31:40 +0200 Subject: [PATCH 10/10] apply review feedback --- docs/02_concepts/13_exceptions.mdx | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/02_concepts/13_exceptions.mdx b/docs/02_concepts/13_exceptions.mdx index 1760b9ac3..a5a7e7d04 100644 --- a/docs/02_concepts/13_exceptions.mdx +++ b/docs/02_concepts/13_exceptions.mdx @@ -9,16 +9,16 @@ import RetryTimedOutSource from '!!raw-loader!roa-loader!./code/13_retry_timed_o import ApiLink from '@theme/ApiLink'; import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; -When you run an Actor, exceptions come from a few layers: the Apify API client for failed API requests, the Apify SDK for misuse and invalid input, and the libraries you build on, such as Crawlee. This page maps the ones you are most likely to meet and how to approach each. +When you run an Actor, exceptions come from a few layers: the Apify API client for failed API requests, the Apify SDK for misuse and invalid input, and the libraries you build on, such as Crawlee. ## Errors from the Apify API -Every SDK operation that talks to the Apify API can raise `ApifyApiError`. This includes `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The SDK raises these client exceptions as-is, so you keep the HTTP status code, the error type, and the response data on the exception. +Every SDK operation that talks to the Apify API can raise `ApifyApiError`. Such operations include `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The SDK raises these client exceptions as-is, so you keep the HTTP status code, the error type, and the response data on the exception. `ApifyApiError` dispatches to a subclass based on the HTTP status code: - `UnauthorizedError` (401) and `ForbiddenError` (403) for an unauthorized or forbidden request. -- `NotFoundError` (404) when the Actor, run, or storage does not exist. +- `NotFoundError` (404) when the Actor, run, or storage doesn't exist. - `ConflictError` (409) for a conflicting request. - `RateLimitError` (429) when the API rate limit is hit. - `ServerError` for any 5xx response. @@ -30,7 +30,7 @@ The client retries rate-limited and server errors on its own, so you only see `ApifyApiError` to handle any API failure in one place, then branch on the subclass or the HTTP `status_code`. To react to a specific failure, catch its subclass first: +To handle any API failure in one place, catch `ApifyApiError`, then branch on the subclass or the HTTP `status_code`. To react to a specific failure, catch its subclass first: {HandleCallErrorsSource} @@ -38,7 +38,7 @@ Catch `ApifyApiError` to handle any ## Misuse and invalid input -The SDK raises standard Python exceptions when it is used incorrectly or given invalid input. These point to a bug or a bad argument in your code, so the fix is to correct the call rather than to catch the exception. +The SDK raises standard Python exceptions when it's used incorrectly or given invalid input. These exceptions point to a bug or a bad argument in your code, so the fix is to correct the call rather than to catch the exception. - [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError) when an `Actor` method is used outside the `async with Actor:` block, either before initialization or after exit, or when the Actor is initialized twice. - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) for an invalid argument, such as a malformed `timeout`, an invalid proxy configuration, charging an automatically charged event by hand, or pushing data that is not JSON-serializable or is over the size limit. @@ -55,11 +55,13 @@ The SDK raises standard Python exceptions when it is used incorrectly or given i ## The pay-per-event charge limit -Reaching the pay-per-event charge limit does not raise an error. The SDK caps charging and data pushing instead, and your Actor keeps running. When a single `Actor.charge` call would cross the limit, only the part that fits within the budget is billed, and `charged_count` on the returned `ChargeResult` reports how many events went through. `Actor.push_data` behaves the same way when given a `charged_event_name`, writing only the items that fit within the budget. To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult`. It is a return value rather than an exception, so you can read it in a tight charging loop and stop your work once the budget runs out. For details, see [Pay-per-event monetization](./pay-per-event). +Reaching the pay-per-event charge limit doesn't raise an error. Instead, the SDK caps charging and data pushing, while your Actor keeps running. When a single `Actor.charge` call crosses the limit, only the part that fits within the budget is billed, and `charged_count` on the returned `ChargeResult` reports how many events went through. `Actor.push_data` behaves the same way when given a `charged_event_name`. It writes only the items that fit within the budget. + +To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult`. It's a return value and not an exception, so you can read it in a tight charging loop and stop your work once the budget runs out. For details, see [Pay-per-event monetization](./pay-per-event). ## Errors while crawling -If your Actor runs a [Crawlee](https://crawlee.dev/python) crawler, failures inside request handlers surface as Crawlee exceptions, and Crawlee handles the retries and session rotation around them, so a single failing request does not stop the crawl. API calls you make from inside a handler still raise `ApifyApiError`, so handle those as in the first section. For details, see the [Crawlee documentation](https://crawlee.dev/python). +If your Actor runs a [Crawlee](https://crawlee.dev/python) crawler, failures inside request handlers surface as Crawlee exceptions. Crawlee handles the retries and session rotation around them, so a single failing request doesn't stop the crawl. API calls you make from inside a handler still raise `ApifyApiError`. For how to handle those errors, see [Errors from the Apify API](#errors-from-the-apify-api). ## Conclusion