diff --git a/docs/02_concepts/13_exceptions.mdx b/docs/02_concepts/13_exceptions.mdx new file mode 100644 index 00000000..a5a7e7d0 --- /dev/null +++ b/docs/02_concepts/13_exceptions.mdx @@ -0,0 +1,68 @@ +--- +id: error-handling +title: Error handling +description: The exceptions an Actor can raise and how to handle them +--- + +import HandleCallErrorsSource from '!!raw-loader!roa-loader!./code/13_handle_call_errors.py'; +import RetryTimedOutSource from '!!raw-loader!roa-loader!./code/13_retry_timed_out.py'; +import ApiLink from '@theme/ApiLink'; +import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; + +When you run an Actor, exceptions come from a few layers: the Apify API client for failed API requests, the Apify SDK for misuse and invalid input, and the libraries you build on, such as Crawlee. + +## Errors from the Apify API + +Every SDK operation that talks to the Apify API can raise `ApifyApiError`. Such operations include `Actor.start`, `Actor.call`, `Actor.abort`, `Actor.metamorph`, `Actor.add_webhook`, charging, and all storage operations on datasets, key-value stores, and request queues. The SDK raises these client exceptions as-is, so you keep the HTTP status code, the error type, and the response data on the exception. + +`ApifyApiError` dispatches to a subclass based on the HTTP status code: + +- `UnauthorizedError` (401) and `ForbiddenError` (403) for an unauthorized or forbidden request. +- `NotFoundError` (404) when the Actor, run, or storage doesn't exist. +- `ConflictError` (409) for a conflicting request. +- `RateLimitError` (429) when the API rate limit is hit. +- `ServerError` for any 5xx response. +- `InvalidRequestError` (400) when the API rejects the request as malformed. + +The client retries rate-limited and server errors on its own, so you only see `RateLimitError` or `ServerError` once those retries are exhausted. The `apify.errors` module re-exports the whole client error hierarchy, so you can import everything from one place: + +```python +from apify.errors import ApifyApiError, NotFoundError, RateLimitError +``` + +To handle any API failure in one place, catch `ApifyApiError`, then branch on the subclass or the HTTP `status_code`. To react to a specific failure, catch its subclass first: + + + {HandleCallErrorsSource} + + +## Misuse and invalid input + +The SDK raises standard Python exceptions when it's used incorrectly or given invalid input. These exceptions point to a bug or a bad argument in your code, so the fix is to correct the call rather than to catch the exception. + +- [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError) when an `Actor` method is used outside the `async with Actor:` block, either before initialization or after exit, or when the Actor is initialized twice. +- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) for an invalid argument, such as a malformed `timeout`, an invalid proxy configuration, charging an automatically charged event by hand, or pushing data that is not JSON-serializable or is over the size limit. +- [`TypeError`](https://docs.python.org/3/library/exceptions.html#TypeError) for an argument of the wrong type. +- [`ConnectionError`](https://docs.python.org/3/library/exceptions.html#ConnectionError) when `Actor.create_proxy_configuration` verifies Apify Proxy access and the proxy reports that you have none. + +## Run failures + +`Actor.call` and `Actor.call_task` wait for the run to finish and return it, whatever its final status. A finished run can be `SUCCEEDED`, `FAILED`, `ABORTED`, or `TIMED-OUT`, so check `run.status` before you rely on the run's output. A timed-out run is the one case where retrying can help, as long as you give it more time: + + + {RetryTimedOutSource} + + +## The pay-per-event charge limit + +Reaching the pay-per-event charge limit doesn't raise an error. Instead, the SDK caps charging and data pushing, while your Actor keeps running. When a single `Actor.charge` call crosses the limit, only the part that fits within the budget is billed, and `charged_count` on the returned `ChargeResult` reports how many events went through. `Actor.push_data` behaves the same way when given a `charged_event_name`. It writes only the items that fit within the budget. + +To detect the limit, check the `event_charge_limit_reached` field on the `ChargeResult`. It's a return value and not an exception, so you can read it in a tight charging loop and stop your work once the budget runs out. For details, see [Pay-per-event monetization](./pay-per-event). + +## Errors while crawling + +If your Actor runs a [Crawlee](https://crawlee.dev/python) crawler, failures inside request handlers surface as Crawlee exceptions. Crawlee handles the retries and session rotation around them, so a single failing request doesn't stop the crawl. API calls you make from inside a handler still raise `ApifyApiError`. For how to handle those errors, see [Errors from the Apify API](#errors-from-the-apify-api). + +## Conclusion + +Most failures you handle at runtime are `ApifyApiError` from the API client. Catch it to cover any API failure, and reach for a subclass or the HTTP `status_code` when you need finer control. The standard [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError), [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError), and [`TypeError`](https://docs.python.org/3/library/exceptions.html#TypeError) signal a bug or bad input, so correct the call rather than catch them. After `Actor.call`, check `run.status` to react to a failed run, and let Crawlee handle the errors raised inside a crawler. diff --git a/docs/02_concepts/code/13_handle_call_errors.py b/docs/02_concepts/code/13_handle_call_errors.py new file mode 100644 index 00000000..73acc34d --- /dev/null +++ b/docs/02_concepts/code/13_handle_call_errors.py @@ -0,0 +1,29 @@ +import asyncio + +from apify import Actor +from apify.errors import ApifyApiError, NotFoundError + + +async def main() -> None: + async with Actor: + try: + run = await Actor.call('apify/web-scraper', run_input={'startUrls': []}) + except NotFoundError: + # Catch a specific subclass first. + Actor.log.error('The Actor to call does not exist.') + return + except ApifyApiError as exc: + # Any other API failure, e.g. an invalid token or a server error. + Actor.log.error(f'Calling the Actor failed: {exc} (HTTP {exc.status_code}).') + return + + # `Actor.call` returns the finished run whatever its status, so check it. + if run.status != 'SUCCEEDED': + Actor.log.error(f'Run {run.id} ended with status {run.status}.') + return + + Actor.log.info(f'Run {run.id} finished successfully.') + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/docs/02_concepts/code/13_retry_timed_out.py b/docs/02_concepts/code/13_retry_timed_out.py new file mode 100644 index 00000000..419a5c21 --- /dev/null +++ b/docs/02_concepts/code/13_retry_timed_out.py @@ -0,0 +1,24 @@ +import asyncio +from datetime import timedelta + +from apify import Actor + + +async def main() -> None: + async with Actor: + timeout = timedelta(minutes=5) + max_attempts = 3 + + for attempt in range(1, max_attempts + 1): + run = await Actor.call('apify/web-scraper', timeout=timeout) + + if run.status != 'TIMED-OUT' or attempt == max_attempts: + Actor.log.info(f'Run {run.id} ended with status {run.status}.') + break + + timeout *= 2 + Actor.log.warning(f'Timed out, retrying with timeout {timeout}.') + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/src/apify/_utils.py b/src/apify/_utils.py index 8469ae97..097795e8 100644 --- a/src/apify/_utils.py +++ b/src/apify/_utils.py @@ -74,6 +74,7 @@ def is_running_in_ipython() -> bool: 'Actor', 'Charging', 'Configuration', + 'Errors', 'Event data', 'Event managers', 'Events', diff --git a/src/apify/errors.py b/src/apify/errors.py new file mode 100644 index 00000000..23441bea --- /dev/null +++ b/src/apify/errors.py @@ -0,0 +1,34 @@ +"""`apify.errors` re-exports the Apify API client's error hierarchy. + +Callers get a single import location for every error raised by an operation that talks to the Apify API. The SDK +raises these client exceptions as-is and does not wrap them in its own types. See +https://docs.apify.com/api/client/python for the full client error reference. +""" + +from __future__ import annotations + +from apify_client.errors import ( + ApifyApiError, + ApifyClientError, + ConflictError, + ForbiddenError, + InvalidRequestError, + InvalidResponseBodyError, + NotFoundError, + RateLimitError, + ServerError, + UnauthorizedError, +) + +__all__ = [ + 'ApifyApiError', + 'ApifyClientError', + 'ConflictError', + 'ForbiddenError', + 'InvalidRequestError', + 'InvalidResponseBodyError', + 'NotFoundError', + 'RateLimitError', + 'ServerError', + 'UnauthorizedError', +] diff --git a/tests/unit/test_errors.py b/tests/unit/test_errors.py new file mode 100644 index 00000000..550b4a89 --- /dev/null +++ b/tests/unit/test_errors.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import apify_client.errors as client_errors + +import apify.errors as sdk_errors + + +def test_client_errors_are_re_exported() -> None: + """`apify.errors` re-exports the API client error hierarchy so callers have a single import location.""" + names = [ + 'ApifyApiError', + 'ApifyClientError', + 'ConflictError', + 'ForbiddenError', + 'InvalidRequestError', + 'InvalidResponseBodyError', + 'NotFoundError', + 'RateLimitError', + 'ServerError', + 'UnauthorizedError', + ] + assert set(sdk_errors.__all__) == set(names) + for name in names: + assert getattr(sdk_errors, name) is getattr(client_errors, name) diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index d6ef5fd6..32c8be0e 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -9,6 +9,7 @@ const GROUP_ORDER = [ 'Actor', 'Charging', 'Configuration', + 'Errors', 'Event data', 'Event managers', 'Events', @@ -149,6 +150,47 @@ module.exports = { moduleShortcutsPath: join(__dirname, '/module_shortcuts.json'), }, reexports: [ + // Errors + { + url: 'https://docs.apify.com/api/client/python/reference/class/ApifyApiError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/ApifyClientError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/ConflictError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/ForbiddenError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/InvalidRequestError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/InvalidResponseBodyError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/NotFoundError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/RateLimitError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/ServerError', + group: 'Errors', + }, + { + url: 'https://docs.apify.com/api/client/python/reference/class/UnauthorizedError', + group: 'Errors', + }, // Storages { url: 'https://crawlee.dev/python/api/class/Storage',