From cec09dc80d44c5efae6ef1878a0cf1a868567571 Mon Sep 17 00:00:00 2001 From: shivan4030 <9358527+shivan4030@users.noreply.github.com> Date: Sun, 5 Apr 2026 06:28:59 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=92=20Fix=20insecure=20deserialization?= =?UTF-8?q?=20in=20`pickle.loads`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🎯 What: Replaced direct calls to `pickle.loads` with a `RestrictedUnpickler` that strictly controls allowed modules and built-ins. ⚠️ Risk: The migration and old schema scripts relied on `pickle.loads()` directly. If the underlying data source or database is compromised and malicious objects are inserted, arbitrary code execution (RCE) can occur when the script deserializes the data. 🛡️ Solution: Implemented `RestrictedUnpickler` extending `pickle.Unpickler` and overriding `find_class` to allow only essential ADK models (`EventActions`, etc.) and basic Python built-ins. Used `RestrictedUnpickler(io.BytesIO(...)).load()` instead of `pickle.loads(...)` in both `src/google/adk/sessions/migration/migrate_from_sqlalchemy_pickle.py` and `src/google/adk/sessions/schemas/v0.py`. --- .../migrate_from_sqlalchemy_pickle.py | 39 ++++++++++++++++++- src/google/adk/sessions/schemas/v0.py | 39 ++++++++++++++++++- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/google/adk/sessions/migration/migrate_from_sqlalchemy_pickle.py b/src/google/adk/sessions/migration/migrate_from_sqlalchemy_pickle.py index a6d1ad2a78..52f0e73d0e 100644 --- a/src/google/adk/sessions/migration/migrate_from_sqlalchemy_pickle.py +++ b/src/google/adk/sessions/migration/migrate_from_sqlalchemy_pickle.py @@ -19,6 +19,7 @@ from datetime import datetime from datetime import timezone import json +import io import logging import pickle import sys @@ -38,6 +39,42 @@ logger = logging.getLogger("google_adk." + __name__) +class RestrictedUnpickler(pickle.Unpickler): + """A restricted unpickler that only allows safe classes for events.""" + + def find_class(self, module: str, name: str) -> Any: + # Allow explicit ADK modules needed for unpickling events + safe_modules = { + "google.adk.events.event_actions", + "google.adk.events.ui_widget", + "google.adk.auth.auth_tool", + "google.adk.tools.tool_confirmation", + "google.genai.types", + } + if module in safe_modules: + return super().find_class(module, name) + # Allow safe builtins + if module == "builtins": + safe_builtins = { + "set", + "frozenset", + "dict", + "list", + "tuple", + "bool", + "int", + "float", + "str", + "bytes", + "bytearray", + } + if name in safe_builtins: + import builtins + + return getattr(builtins, name) + raise pickle.UnpicklingError(f"Global '{module}.{name}' is forbidden") + + def _to_datetime_obj(val: Any) -> datetime | Any: """Converts string to datetime if needed.""" if isinstance(val, str): @@ -59,7 +96,7 @@ def _row_to_event(row: dict) -> Event: if actions_val is not None: try: if isinstance(actions_val, bytes): - actions = pickle.loads(actions_val) + actions = RestrictedUnpickler(io.BytesIO(actions_val)).load() else: # for spanner - it might return object directly actions = actions_val except Exception as e: diff --git a/src/google/adk/sessions/schemas/v0.py b/src/google/adk/sessions/schemas/v0.py index 7679a56e5b..5cc0cfaa43 100644 --- a/src/google/adk/sessions/schemas/v0.py +++ b/src/google/adk/sessions/schemas/v0.py @@ -28,6 +28,7 @@ from datetime import datetime from datetime import timezone +import io import json import pickle from typing import Any @@ -62,6 +63,42 @@ from .shared import PreciseTimestamp +class RestrictedUnpickler(pickle.Unpickler): + """A restricted unpickler that only allows safe classes for events.""" + + def find_class(self, module: str, name: str) -> Any: + # Allow explicit ADK modules needed for unpickling events + safe_modules = { + "google.adk.events.event_actions", + "google.adk.events.ui_widget", + "google.adk.auth.auth_tool", + "google.adk.tools.tool_confirmation", + "google.genai.types", + } + if module in safe_modules: + return super().find_class(module, name) + # Allow safe builtins + if module == "builtins": + safe_builtins = { + "set", + "frozenset", + "dict", + "list", + "tuple", + "bool", + "int", + "float", + "str", + "bytes", + "bytearray", + } + if name in safe_builtins: + import builtins + + return getattr(builtins, name) + raise pickle.UnpicklingError(f"Global '{module}.{name}' is forbidden") + + class DynamicPickleType(TypeDecorator): """Represents a type that can be pickled.""" @@ -87,7 +124,7 @@ def process_result_value(self, value, dialect): """Ensures the raw bytes from the database are unpickled back into a Python object.""" if value is not None: if dialect.name in ("spanner+spanner", "mysql"): - return pickle.loads(value) + return RestrictedUnpickler(io.BytesIO(value)).load() return value