Skip to content

Commit 40e6cbf

Browse files
fix: skip JSON pre-parsing for str union annotations in pre_parse_json
The `pre_parse_json` method used an identity check (`field_info.annotation is not str`) to decide whether to attempt `json.loads` on string values. For union types like `str | None`, this check passed because `str | None` is not identical to `str`, causing string values to be unnecessarily JSON-parsed. This could corrupt string identifiers (UUIDs, etc.) when the parsed result was a non-scalar type like a dict or list. Replace the identity check with a `_should_pre_parse_json` helper that inspects union annotations. Unions containing only simple scalar types (str, int, float, bool, NoneType) skip pre-parsing entirely, while complex unions like `list[str] | None` still benefit from it. Closes #1873
1 parent d5b9155 commit 40e6cbf

File tree

2 files changed

+89
-1
lines changed

2 files changed

+89
-1
lines changed

src/mcp/server/mcpserver/utilities/func_metadata.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def pre_parse_json(self, data: dict[str, Any]) -> dict[str, Any]:
148148
continue
149149

150150
field_info = key_to_field_info[data_key]
151-
if isinstance(data_value, str) and field_info.annotation is not str:
151+
if isinstance(data_value, str) and _should_pre_parse_json(field_info.annotation):
152152
try:
153153
pre_parsed = json.loads(data_value)
154154
except json.JSONDecodeError:
@@ -416,6 +416,30 @@ def _try_create_model_and_schema(
416416
return None, None, False
417417

418418

419+
420+
_SIMPLE_TYPES: frozenset[type] = frozenset({str, int, float, bool, type(None)})
421+
422+
423+
def _should_pre_parse_json(annotation: Any) -> bool:
424+
"""Return True if the annotation may benefit from JSON pre-parsing.
425+
426+
For unions containing only simple scalar types (str, int, float, bool, None),
427+
pre-parsing is skipped because json.loads would corrupt string values that
428+
happen to look like JSON objects or arrays -- e.g. a UUID passed as a string
429+
should stay a string even if the annotation is ``str | None``.
430+
431+
Complex unions like ``list[str] | None`` still need pre-parsing so that a
432+
JSON-encoded list arriving as a string can be deserialized before Pydantic
433+
validation.
434+
"""
435+
if annotation is str:
436+
return False
437+
origin = get_origin(annotation)
438+
if origin is not None and is_union_origin(origin):
439+
return any(arg not in _SIMPLE_TYPES for arg in get_args(annotation))
440+
return True
441+
442+
419443
_no_default = object()
420444

421445

tests/server/mcpserver/test_func_metadata.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,70 @@ def handle_json_payload(payload: str, strict_mode: bool = False) -> str:
551551
assert result == f"Handled payload of length {len(json_array_payload)}"
552552

553553

554+
555+
def test_str_union_pre_parse_preserves_strings():
556+
"""Regression test for #1873: pre_parse_json must not JSON-parse strings
557+
when the annotation is a union of simple types like str | None.
558+
559+
UUIDs and other string identifiers that start with digits were being
560+
corrupted because json.loads would partially parse them as numbers.
561+
"""
562+
563+
def func_optional_str(value: str | None = None) -> str: # pragma: no cover
564+
return str(value)
565+
566+
meta = func_metadata(func_optional_str)
567+
568+
# A JSON object string must be preserved as-is for str | None
569+
json_obj = '{"database": "postgres", "port": 5432}'
570+
assert meta.pre_parse_json({"value": json_obj})["value"] == json_obj
571+
572+
# A JSON array string must be preserved as-is for str | None
573+
json_array = '["item1", "item2"]'
574+
assert meta.pre_parse_json({"value": json_array})["value"] == json_array
575+
576+
# Plain strings are unaffected
577+
assert meta.pre_parse_json({"value": "hello"})["value"] == "hello"
578+
579+
# UUID-like strings must never be corrupted
580+
uuid_val = "58aa9efd-faad-4901-89e8-99e807a1a2d6"
581+
assert meta.pre_parse_json({"value": uuid_val})["value"] == uuid_val
582+
583+
# UUID with scientific-notation-like prefix must be preserved
584+
uuid_sci = "3400e37e-b251-49d9-91b0-f8dd8602ff7e"
585+
assert meta.pre_parse_json({"value": uuid_sci})["value"] == uuid_sci
586+
587+
588+
def test_complex_union_still_pre_parses():
589+
"""Ensure complex unions like list[str] | None still benefit from
590+
JSON pre-parsing so that serialized lists are deserialized properly.
591+
"""
592+
593+
def func_optional_list(items: list[str] | None = None) -> str: # pragma: no cover
594+
return str(items)
595+
596+
meta = func_metadata(func_optional_list)
597+
assert meta.pre_parse_json({"items": '["a", "b", "c"]'})["items"] == ["a", "b", "c"]
598+
599+
600+
@pytest.mark.anyio
601+
async def test_str_union_uuid_end_to_end():
602+
"""End-to-end test: a str | None parameter receives the exact UUID string."""
603+
604+
def update_task(task_id: str | None = None) -> str: # pragma: no cover
605+
return f"got {task_id}"
606+
607+
meta = func_metadata(update_task)
608+
uuid_val = "58aa9efd-faad-4901-89e8-99e807a1a2d6"
609+
result = await meta.call_fn_with_arg_validation(
610+
update_task,
611+
fn_is_async=False,
612+
arguments_to_validate={"task_id": uuid_val},
613+
arguments_to_pass_directly=None,
614+
)
615+
assert result == f"got {uuid_val}"
616+
617+
554618
# Tests for structured output functionality
555619

556620

0 commit comments

Comments
 (0)