From c13aa95b1f73db9e19cfc26afc1e72470772287a Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 19 Jun 2026 15:36:09 +0200 Subject: [PATCH 1/2] refactor: polish logging across the codebase Standardize internal modules on getLogger(__name__), rebalance log levels (no-op platform-only calls error->warning), and trim Scrapy trace-debug noise ahead of the next major release. --- src/apify/_actor.py | 14 ++++++++------ src/apify/_charging.py | 5 ++++- src/apify/_proxy_configuration.py | 4 +++- src/apify/events/_apify_event_manager.py | 6 ++++-- src/apify/scrapy/middlewares/apify_proxy.py | 7 ------- src/apify/scrapy/pipelines/actor_dataset_push.py | 4 ---- src/apify/scrapy/requests.py | 12 ------------ src/apify/scrapy/scheduler.py | 9 +-------- .../storage_clients/_apify/_alias_resolving.py | 3 +-- .../_file_system/_key_value_store_client.py | 2 +- tests/unit/actor/test_actor_helpers.py | 6 +++--- tests/unit/actor/test_actor_lifecycle.py | 6 +++--- tests/unit/events/test_apify_event_manager.py | 2 +- tests/unit/scrapy/test_scheduler.py | 4 ++-- 14 files changed, 31 insertions(+), 53 deletions(-) diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 20242a9a5..23c70e9d2 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -1162,7 +1162,7 @@ async def metamorph( custom_after_sleep: How long to sleep for after the metamorph, to wait for the container to be stopped. """ if not self.is_at_home(): - self.log.error('Actor.metamorph() is only supported when running on the Apify platform.') + self.log.warning('Actor.metamorph() is only supported when running on the Apify platform.') return if custom_after_sleep is None: @@ -1172,6 +1172,7 @@ async def metamorph( if not self.configuration.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') + self.log.debug(f'Metamorphing the Actor run into target Actor {target_actor_id!r}.') await self.apify_client.run(self.configuration.actor_run_id).metamorph( target_actor_id=target_actor_id, run_input=run_input, @@ -1198,7 +1199,7 @@ async def reboot( custom_after_sleep: How long to sleep for after the reboot, to wait for the container to be stopped. """ if not self.is_at_home(): - self.log.error('Actor.reboot() is only supported when running on the Apify platform.') + self.log.warning('Actor.reboot() is only supported when running on the Apify platform.') return if self._is_rebooting: @@ -1243,6 +1244,7 @@ async def safe_dispatch(listener: Any, data: Any) -> None: except TimeoutError: self.log.warning('Pre-reboot event listeners did not finish within timeout; proceeding with reboot') + self.log.debug('Rebooting the Actor run.') await self.apify_client.run(self.configuration.actor_run_id).reboot() except BaseException: # Reset the flag so that a failed or cancelled reboot can be retried. @@ -1277,7 +1279,7 @@ async def add_webhook(self, webhook: Webhook, *, idempotency_key: str | None = N ) if not self.is_at_home(): - self.log.error('Actor.add_webhook() is only supported when running on the Apify platform.') + self.log.warning('Actor.add_webhook() is only supported when running on the Apify platform.') return # If is_at_home() is True, config.actor_run_id is always set @@ -1454,9 +1456,9 @@ def _get_remaining_time(self) -> timedelta | None: return max(self.configuration.timeout_at - datetime.now(tz=UTC), timedelta(0)) self.log.warning( - 'Using `inherit` argument is only possible when the Actor' - ' is running on the Apify platform and when the timeout for the Actor run is set. ' - f'{self.is_at_home()=}, {self.configuration.timeout_at=}' + 'Using the `inherit` argument is only possible when the Actor is running on the Apify platform and ' + 'the timeout for the Actor run is set.', + extra={'is_at_home': self.is_at_home(), 'timeout_at': self.configuration.timeout_at}, ) return None diff --git a/src/apify/_charging.py b/src/apify/_charging.py index 0a23ad4ef..9bbdeb3e4 100644 --- a/src/apify/_charging.py +++ b/src/apify/_charging.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from datetime import UTC, datetime from decimal import Decimal +from logging import getLogger from typing import TYPE_CHECKING, Literal, Protocol, TypedDict from pydantic import ConfigDict @@ -19,7 +20,6 @@ from apify_client._models import PricingPerEvent as ClientPricingPerEvent from apify._utils import ReentrantLock, docs_group, ensure_context -from apify.log import logger from apify.storages import Dataset if TYPE_CHECKING: @@ -29,6 +29,8 @@ from apify._configuration import Configuration +logger = getLogger(__name__) + charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_manager_ctx', default=None) """Holds the current `ChargingManager` instance, if any. @@ -450,6 +452,7 @@ async def charge(self, event_name: str, *, count: int = 1) -> ChargeResult: pass elif event_name in self._pricing_info: await self._client.run(self._actor_run_id).charge(event_name, count=charged_count) + logger.debug(f"Charged {charged_count} occurrence(s) of event '{event_name}'.") elif event_name in self._tier_priced_events: logger.warning( f"Event '{event_name}' is tier-priced and is not chargeable via the pay-per-event API." diff --git a/src/apify/_proxy_configuration.py b/src/apify/_proxy_configuration.py index fec4d4ddf..aaedb08ac 100644 --- a/src/apify/_proxy_configuration.py +++ b/src/apify/_proxy_configuration.py @@ -3,6 +3,7 @@ import json import re from dataclasses import dataclass, field +from logging import getLogger from re import Pattern from typing import TYPE_CHECKING, Any @@ -22,13 +23,14 @@ from apify._configuration import Configuration from apify._consts import ApifyEnvVars from apify._utils import docs_group -from apify.log import logger if TYPE_CHECKING: from apify_client import ApifyClientAsync from apify import Request +logger = getLogger(__name__) + APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$') COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$') # ISO 3166-2 subdivision codes are 1-3 uppercase alphanumeric characters, e.g. 'CA', 'NSW', '9' (Wien, AT-9) diff --git a/src/apify/events/_apify_event_manager.py b/src/apify/events/_apify_event_manager.py index c4bf6e6df..e7b21da8a 100644 --- a/src/apify/events/_apify_event_manager.py +++ b/src/apify/events/_apify_event_manager.py @@ -3,6 +3,7 @@ import asyncio import contextlib import time +from logging import getLogger from typing import TYPE_CHECKING, Annotated, Self import websockets.asyncio.client @@ -17,7 +18,6 @@ from apify._utils import docs_group from apify.events._types import DeprecatedEvent, EventMessage, SystemInfoEventData, UnknownEvent -from apify.log import logger if TYPE_CHECKING: from collections.abc import Generator @@ -28,6 +28,8 @@ from apify._configuration import Configuration +logger = getLogger(__name__) + event_data_adapter = TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent]( Annotated[EventMessage, Discriminator('name')] | DeprecatedEvent | UnknownEvent ) @@ -195,7 +197,7 @@ async def _handle_platform_message(self, message: str | bytes) -> None: return if isinstance(parsed_message, UnknownEvent): - logger.info( + logger.debug( f'Unknown message received: event_name={parsed_message.name}, event_data={parsed_message.data}' ) return diff --git a/src/apify/scrapy/middlewares/apify_proxy.py b/src/apify/scrapy/middlewares/apify_proxy.py index de061f202..7665bfcfa 100644 --- a/src/apify/scrapy/middlewares/apify_proxy.py +++ b/src/apify/scrapy/middlewares/apify_proxy.py @@ -72,7 +72,6 @@ async def process_request(self, request: Request) -> None: Raises: ValueError: If username and password are not provided in the proxy URL. """ - Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}') url = await self._get_new_proxy_url() if not (url.username and url.password): @@ -82,8 +81,6 @@ async def process_request(self, request: Request) -> None: basic_auth_header = get_basic_auth_header(url.username, url.password) request.headers[b'Proxy-Authorization'] = basic_auth_header - Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: updated request.meta={request.meta}') - def process_exception( self, request: Request, @@ -100,10 +97,6 @@ def process_exception( process_exception() methods of installed middleware, until no middleware is left and the default exception handling kicks in. """ - Actor.log.debug( - f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}', - ) - if isinstance(exception, TunnelError): Actor.log.warning( f'ApifyHttpProxyMiddleware: TunnelError occurred for request="{request}", ' diff --git a/src/apify/scrapy/pipelines/actor_dataset_push.py b/src/apify/scrapy/pipelines/actor_dataset_push.py index b3a801874..0f38d4bc9 100644 --- a/src/apify/scrapy/pipelines/actor_dataset_push.py +++ b/src/apify/scrapy/pipelines/actor_dataset_push.py @@ -1,6 +1,5 @@ from __future__ import annotations -from logging import getLogger from typing import TYPE_CHECKING from itemadapter.adapter import ItemAdapter @@ -10,8 +9,6 @@ if TYPE_CHECKING: from scrapy import Item -logger = getLogger(__name__) - class ActorDatasetPushPipeline: """A Scrapy pipeline for pushing items to an Actor's default dataset. @@ -25,6 +22,5 @@ async def process_item( ) -> Item: """Pushes the provided Scrapy item to the Actor's default dataset.""" item_dict = ItemAdapter(item).asdict() - logger.debug(f'Pushing item={item_dict} to the dataset.') await Actor.push_data(item_dict) return item diff --git a/src/apify/scrapy/requests.py b/src/apify/scrapy/requests.py index 5a115cf66..8bf99ec52 100644 --- a/src/apify/scrapy/requests.py +++ b/src/apify/scrapy/requests.py @@ -61,8 +61,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ logger.warning('Failed to convert to Apify request: Scrapy request must be a ScrapyRequest instance.') return None - logger.debug(f'to_apify_request was called (scrapy_request={scrapy_request})...') - # Configuration to behave as similarly as possible to Scrapy's default RFPDupeFilter. # # The body is stored twice on purpose: as `payload` (used for the extended unique key) and inside the serialized @@ -150,7 +148,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ # a second base64 layer would only add ~33% overhead on the enqueue path. apify_request.user_data['scrapy_request'] = scrapy_request_json - logger.debug(f'scrapy_request was converted to the apify_request={apify_request}') return apify_request @@ -171,14 +168,10 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ if not isinstance(cast('Any', apify_request), ApifyRequest): raise TypeError('apify_request must be an apify.Request instance') - logger.debug(f'to_scrapy_request was called (apify_request={apify_request})...') - # If the apify_request comes from the Scrapy if 'scrapy_request' in apify_request.user_data: # Deserialize the Scrapy ScrapyRequest from the apify_request by parsing the stored JSON and reconstructing # the Scrapy ScrapyRequest object from its dictionary representation. - logger.debug('Restoring the Scrapy ScrapyRequest from the apify_request...') - scrapy_request_json = apify_request.user_data['scrapy_request'] if not isinstance(scrapy_request_json, str): raise TypeError('the stored scrapy_request must be a string') @@ -194,8 +187,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ if not isinstance(scrapy_request, ScrapyRequest): raise TypeError('scrapy_request must be an instance of the ScrapyRequest class') - logger.debug(f'Scrapy ScrapyRequest successfully reconstructed (scrapy_request={scrapy_request})...') - # Update the meta field with the meta field from the apify_request meta = scrapy_request.meta or {} meta.update({'apify_request_unique_key': apify_request.unique_key}) @@ -204,8 +195,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ # If the apify_request comes directly from the Scrapy, typically start URLs. else: - logger.debug('Gonna create a new Scrapy ScrapyRequest (cannot be restored)') - scrapy_request = ScrapyRequest( url=apify_request.url, method=apify_request.method, @@ -222,5 +211,4 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ if apify_request.user_data: scrapy_request.meta['userData'] = apify_request.user_data - logger.debug(f'an apify_request was converted to the scrapy_request={scrapy_request}') return scrapy_request diff --git a/src/apify/scrapy/scheduler.py b/src/apify/scrapy/scheduler.py index 0df47d503..0646d3d6f 100644 --- a/src/apify/scrapy/scheduler.py +++ b/src/apify/scrapy/scheduler.py @@ -134,17 +134,14 @@ def enqueue_request(self, request: Request) -> bool: Returns: True if the request was successfully enqueued, False otherwise. """ - logger.debug(f'ApifyScheduler.enqueue_request was called (scrapy_request={request})...') - if not isinstance(self.spider, Spider): raise TypeError('self.spider must be an instance of the Spider class') apify_request = to_apify_request(request, spider=self.spider) if apify_request is None: - logger.error(f'Request {request} could not be converted to Apify request.') + logger.warning(f'Request {request} could not be converted to Apify request; skipping it.') return False - logger.debug(f'Converted to apify_request: {apify_request}') if not isinstance(self._rq, RequestQueue): raise TypeError('self._rq must be an instance of the RequestQueue class') @@ -156,7 +153,6 @@ def enqueue_request(self, request: Request) -> bool: logger.exception('Failed to enqueue the request to the request queue.') raise - logger.debug(f'rq.add_request result: {result}') return not bool(result.was_already_present) def next_request(self) -> Request | None: @@ -165,7 +161,6 @@ def next_request(self) -> Request | None: Returns: The next request, or None if there are no more requests. """ - logger.debug('next_request called...') if not isinstance(self._rq, RequestQueue): raise TypeError('self._rq must be an instance of the RequestQueue class') @@ -177,7 +172,6 @@ def next_request(self) -> Request | None: logger.exception('Failed to fetch the next request from the request queue.') raise - logger.debug(f'Fetched apify_request: {apify_request}') if apify_request is None: return None @@ -206,5 +200,4 @@ def next_request(self) -> Request | None: if scrapy_request is None: return None - logger.debug(f'Converted to scrapy_request: {scrapy_request}') return scrapy_request diff --git a/src/apify/storage_clients/_apify/_alias_resolving.py b/src/apify/storage_clients/_apify/_alias_resolving.py index 56e13ebbd..39fa3b995 100644 --- a/src/apify/storage_clients/_apify/_alias_resolving.py +++ b/src/apify/storage_clients/_apify/_alias_resolving.py @@ -1,6 +1,5 @@ from __future__ import annotations -import logging from asyncio import Lock from functools import cached_property from logging import getLogger @@ -225,7 +224,7 @@ async def store_mapping(self, storage_id: str) -> None: alias_map[self._storage_key] = storage_id if not self._configuration.is_at_home: - logging.getLogger(__name__).debug( + logger.debug( 'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.' ) return diff --git a/src/apify/storage_clients/_file_system/_key_value_store_client.py b/src/apify/storage_clients/_file_system/_key_value_store_client.py index 09d7194ab..bfce86a74 100644 --- a/src/apify/storage_clients/_file_system/_key_value_store_client.py +++ b/src/apify/storage_clients/_file_system/_key_value_store_client.py @@ -122,7 +122,7 @@ async def _create_missing_metadata_for_input_file(key: str, record_path: Path) - try: content = await asyncio.to_thread(record_path.read_bytes) except FileNotFoundError: - logger.warning(f'Input file disparaged on path: "{record_path}"') + logger.warning(f'Input file disappeared from path: "{record_path}"') return # Figure out the metadata from the file content diff --git a/tests/unit/actor/test_actor_helpers.py b/tests/unit/actor/test_actor_helpers.py index 5d943904e..78c567c6b 100644 --- a/tests/unit/actor/test_actor_helpers.py +++ b/tests/unit/actor/test_actor_helpers.py @@ -138,7 +138,7 @@ async def test_metamorph_fails_locally(caplog: pytest.LogCaptureFixture) -> None matching = [r for r in caplog.records if 'Actor.metamorph()' in r.message] assert len(matching) == 1 - assert matching[0].levelname == 'ERROR' + assert matching[0].levelname == 'WARNING' assert 'only supported when running on the Apify platform' in matching[0].message @@ -149,7 +149,7 @@ async def test_reboot_fails_locally(caplog: pytest.LogCaptureFixture) -> None: matching = [r for r in caplog.records if 'Actor.reboot()' in r.message] assert len(matching) == 1 - assert matching[0].levelname == 'ERROR' + assert matching[0].levelname == 'WARNING' assert 'only supported when running on the Apify platform' in matching[0].message @@ -165,7 +165,7 @@ async def test_add_webhook_fails_locally(caplog: pytest.LogCaptureFixture) -> No matching = [r for r in caplog.records if 'Actor.add_webhook()' in r.message] assert len(matching) == 1 - assert matching[0].levelname == 'ERROR' + assert matching[0].levelname == 'WARNING' assert 'only supported when running on the Apify platform' in matching[0].message diff --git a/tests/unit/actor/test_actor_lifecycle.py b/tests/unit/actor/test_actor_lifecycle.py index 0575a211b..d1bcdcbf6 100644 --- a/tests/unit/actor/test_actor_lifecycle.py +++ b/tests/unit/actor/test_actor_lifecycle.py @@ -164,14 +164,14 @@ async def test_fail_without_init_raises_runtime_error() -> None: await Actor().fail() -async def test_reboot_in_local_environment_logs_error_message( +async def test_reboot_in_local_environment_logs_warning_message( actor: _ActorType, caplog: pytest.LogCaptureFixture, ) -> None: - """Test that reboot() logs an error when not running on the Apify platform.""" + """Test that reboot() logs a warning when not running on the Apify platform.""" await actor.reboot() - # Check that the error message was logged + # Check that the warning message was logged assert 'Actor.reboot() is only supported when running on the Apify platform.' in caplog.text diff --git a/tests/unit/events/test_apify_event_manager.py b/tests/unit/events/test_apify_event_manager.py index 18928614c..8f7239668 100644 --- a/tests/unit/events/test_apify_event_manager.py +++ b/tests/unit/events/test_apify_event_manager.py @@ -355,7 +355,7 @@ def listener(data: Any) -> None: async def test_unknown_event_is_logged(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture) -> None: """Test that unknown events are logged and not emitted.""" - caplog.set_level(logging.INFO, logger='apify') + caplog.set_level(logging.DEBUG, logger='apify') async with ( _platform_ws_server(monkeypatch) as (connected_ws_clients, client_connected), ApifyEventManager(Configuration.get_global_configuration()), diff --git a/tests/unit/scrapy/test_scheduler.py b/tests/unit/scrapy/test_scheduler.py index cfe5b48af..a7cc4445d 100644 --- a/tests/unit/scrapy/test_scheduler.py +++ b/tests/unit/scrapy/test_scheduler.py @@ -56,13 +56,13 @@ def test_enqueue_request_skips_non_serializable_request( scheduler: ApifyScheduler, caplog: pytest.LogCaptureFixture, ) -> None: - """A request that cannot be converted (non-serializable meta) is not enqueued: returns False and logs an error.""" + """A request that cannot be converted (non-serializable meta) is not enqueued: returns False and logs a warning.""" rq = cast('mock.MagicMock', scheduler._rq) # A set in `meta` is not JSON-serializable, so `to_apify_request` returns None. scrapy_request = Request(url='https://example.com', meta={'tags': {'a', 'b'}}) - with caplog.at_level(logging.ERROR, logger='apify.scrapy.scheduler'): + with caplog.at_level(logging.WARNING, logger='apify.scrapy.scheduler'): result = scheduler.enqueue_request(scrapy_request) assert result is False From d4abac229d20407fd25c8f6a9b16127775fda690 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Mon, 22 Jun 2026 10:34:00 +0200 Subject: [PATCH 2/2] add upgrading guide --- docs/04_upgrading/upgrading_to_v4.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/04_upgrading/upgrading_to_v4.md b/docs/04_upgrading/upgrading_to_v4.md index a650b9325..2b9471f5b 100644 --- a/docs/04_upgrading/upgrading_to_v4.md +++ b/docs/04_upgrading/upgrading_to_v4.md @@ -244,4 +244,17 @@ yield scrapy.Request(url, meta={'since': datetime(2024, 1, 1)}) # After (v4): store a JSON-serializable value. yield scrapy.Request(url, meta={'since': datetime(2024, 1, 1).isoformat()}) -```Pickle +``` + +## Logging changes + +The typed public API is unchanged, so your code keeps working without edits. Only log output changes. Adjust your setup if you parse logs, route them by logger name, or assert on log records, for example in tests. + +### Rebalanced log levels + +- The local no-op messages from `Actor.metamorph()`, `Actor.reboot()`, and `Actor.add_webhook()`, logged when the Actor runs outside the Apify platform, are now `warning` instead of `error`. The Scrapy scheduler message for a request that can't be converted to an Apify request changed the same way. +- The "unknown platform event" message is now `debug` instead of `info`. + +### Module-qualified logger names + +The charging, proxy, and events modules now log under their own child loggers (`apify._charging`, `apify._proxy_configuration`, and `apify.events._apify_event_manager`) instead of the generic `apify` logger. These child loggers still propagate to `apify`, so a handler attached to `apify` keeps receiving their records. Update only configuration that targets the exact `apify` logger name for these messages.