From 55f1d048646e94f50c78b1a68be43bfb0caf5f70 Mon Sep 17 00:00:00 2001 From: Sasa Junuzovic <44276455+microsasa@users.noreply.github.com> Date: Wed, 22 Apr 2026 07:12:25 -0700 Subject: [PATCH 1/6] fix: use typed ToolExecutionData accessor in _first_pass (#1030) Replace raw dict access + isinstance check with the typed ev.as_tool_execution().model accessor in _first_pass, matching the pattern used by other event types in the same function. The raw dict access bypassed ToolExecutionData validation, risking silent contract divergence if the model field shape ever changes. The typed accessor ensures tool_model extraction follows the same Pydantic-validated path as all other event data. Wrap with try/except (ValidationError, ValueError) consistent with error handling used elsewhere in _first_pass for other event types. Tests: - Add test_first_pass_tool_model_from_typed_accessor: validates tool_model is correctly populated via the typed accessor. - Add test_first_pass_tool_model_validation_error: malformed data with a valid model string but invalid toolTelemetry is rejected by Pydantic validation, leaving tool_model as None. - Remove test_no_pydantic_validation_for_tool_model: no longer applicable since the fix intentionally uses Pydantic validation. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/copilot_usage/parser.py | 11 ++++--- tests/copilot_usage/test_parser.py | 53 ++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/src/copilot_usage/parser.py b/src/copilot_usage/parser.py index f0ed8f87..5144c344 100644 --- a/src/copilot_usage/parser.py +++ b/src/copilot_usage/parser.py @@ -795,12 +795,15 @@ def _first_pass(events: list[SessionEvent]) -> _FirstPassResult: for idx, ev in enumerate(events): etype = ev.type - # Fast path: once tool_model is found, skip all tool-complete events - # with a single None-check instead of traversing the full elif chain. + # Once tool_model is found, skip all tool-complete events with a + # single None-check instead of traversing the full elif chain. if etype == EventType.TOOL_EXECUTION_COMPLETE: if tool_model is None: - m = ev.data.get("model") - if isinstance(m, str) and m: + try: + m = ev.as_tool_execution().model + except (ValidationError, ValueError): + m = None + if m: tool_model = m continue if etype not in _FIRST_PASS_EVENT_TYPES: diff --git a/tests/copilot_usage/test_parser.py b/tests/copilot_usage/test_parser.py index 76568f72..4d47c422 100644 --- a/tests/copilot_usage/test_parser.py +++ b/tests/copilot_usage/test_parser.py @@ -6782,28 +6782,53 @@ def test_malformed_tool_event_skipped(self, tmp_path: Path) -> None: fp = _first_pass(events) assert fp.tool_model == "gpt-5.1" - def test_no_pydantic_validation_for_tool_model(self) -> None: - """Optimised path reads model via dict lookup, not Pydantic validation. + def test_first_pass_tool_model_from_typed_accessor(self) -> None: + """_first_pass extracts tool_model via the typed as_tool_execution() accessor. - Builds 1 000 TOOL_EXECUTION_COMPLETE events without a ``model`` key - (worst-case) and asserts that ``ToolExecutionData.model_validate`` is - never called — proving the hot loop avoids the Pydantic round-trip. + Creates a TOOL_EXECUTION_COMPLETE event with a valid ``model`` value + and asserts that ``_first_pass`` populates ``tool_model`` correctly + through the typed ``ToolExecutionData`` accessor rather than raw + dict access. """ events: list[SessionEvent] = [ SessionEvent( type=EventType.TOOL_EXECUTION_COMPLETE, - data={"toolCallId": f"tc-{i}", "success": True}, - id=f"ev-tool-{i}", + data={ + "toolCallId": "tc-typed", + "model": "claude-sonnet-4", + "success": True, + }, + id="ev-typed", timestamp=None, parentId=None, - ) - for i in range(1_000) + ), ] - with patch.object( - ToolExecutionData, "model_validate", wraps=ToolExecutionData.model_validate - ) as mock_validate: - fp = _first_pass(events) - assert mock_validate.call_count == 0 + fp = _first_pass(events) + assert fp.tool_model == "claude-sonnet-4" + + def test_first_pass_tool_model_validation_error(self) -> None: + """Malformed data that passes isinstance(str) but fails typed validation. + + Creates a TOOL_EXECUTION_COMPLETE event whose ``model`` is a valid + string (would pass the old ``isinstance(m, str)`` check) but whose + ``toolTelemetry`` is invalid, causing ``as_tool_execution()`` to + raise ``ValidationError``. Asserts ``tool_model`` remains ``None`` + and no exception escapes. + """ + events: list[SessionEvent] = [ + SessionEvent( + type=EventType.TOOL_EXECUTION_COMPLETE, + data={ + "toolCallId": "tc-bad", + "model": "gpt-5.1", + "toolTelemetry": "not-a-valid-telemetry-object", + }, + id="ev-bad", + timestamp=None, + parentId=None, + ), + ] + fp = _first_pass(events) assert fp.tool_model is None From 38ceef8932a20fa4da6ed99acfae95df3ae9350e Mon Sep 17 00:00:00 2001 From: Sasa Junuzovic <44276455+microsasa@users.noreply.github.com> Date: Wed, 22 Apr 2026 07:26:15 -0700 Subject: [PATCH 2/6] fix: address review comments - Narrow except clause to ValidationError only (ValueError is unreachable since the event type is already checked before calling as_tool_execution) - Add debug logging on validation failure, matching the pattern used by SESSION_START and SESSION_SHUTDOWN branches in _first_pass Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/copilot_usage/parser.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/copilot_usage/parser.py b/src/copilot_usage/parser.py index 5144c344..1502c938 100644 --- a/src/copilot_usage/parser.py +++ b/src/copilot_usage/parser.py @@ -801,7 +801,13 @@ def _first_pass(events: list[SessionEvent]) -> _FirstPassResult: if tool_model is None: try: m = ev.as_tool_execution().model - except (ValidationError, ValueError): + except ValidationError as exc: + logger.debug( + "event {} — could not parse {} event ({}), skipping", + idx, + ev.type, + exc.error_count(), + ) m = None if m: tool_model = m From d5e1d8bbd0c1b351ea234cbbd8ff0b91fa1ab3cd Mon Sep 17 00:00:00 2001 From: Sasa Junuzovic <44276455+microsasa@users.noreply.github.com> Date: Wed, 22 Apr 2026 08:13:29 -0700 Subject: [PATCH 3/6] fix: update test to expect debug log for bad tool events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test_tool_execution_complete_bad_data_silently_skipped test asserted that no debug logs were emitted for malformed tool.execution_complete events. This was correct when _first_pass used raw dict access (ev.data.get("model")), but the PR switched to the typed as_tool_execution() accessor which now catches ValidationError and emits a debug log — matching the pattern used for session.start and session.shutdown handlers. Rename the test to test_tool_execution_complete_bad_data_logs_debug and flip the assertion from 'not any' to 'any' so it expects the debug message. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/copilot_usage/test_parser.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/copilot_usage/test_parser.py b/tests/copilot_usage/test_parser.py index 4d47c422..b240ce18 100644 --- a/tests/copilot_usage/test_parser.py +++ b/tests/copilot_usage/test_parser.py @@ -3785,13 +3785,15 @@ def test_session_shutdown_validation_error_logs_debug(self, tmp_path: Path) -> N for msg in log_messages ) - def test_tool_execution_complete_bad_data_silently_skipped( + def test_tool_execution_complete_bad_data_logs_debug( self, tmp_path: Path ) -> None: - """Bad tool.execution_complete data → silently skipped (no Pydantic validation). + """Bad tool.execution_complete data → debug log emitted. - The optimised path reads ``ev.data.get("model")`` directly; non-string - or missing values are ignored without raising or logging. + The typed ``as_tool_execution()`` accessor raises ``ValidationError`` + for malformed payloads; the handler logs a debug message and skips + the event, matching the pattern used for session.start and + session.shutdown. """ from loguru import logger @@ -3818,7 +3820,7 @@ def test_tool_execution_complete_bad_data_silently_skipped( # Malformed event is harmlessly ignored; summary still builds. assert summary is not None - assert not any( + assert any( "could not parse" in msg and "tool.execution_complete" in msg for msg in log_messages ) From 6aee7e1981d2f13f02ba2fd1245356350ac4a118 Mon Sep 17 00:00:00 2001 From: Sasa Junuzovic Date: Wed, 22 Apr 2026 22:28:11 -0700 Subject: [PATCH 4/6] fix: ruff format on test_parser.py Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/copilot_usage/test_parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/copilot_usage/test_parser.py b/tests/copilot_usage/test_parser.py index b240ce18..6148ce8d 100644 --- a/tests/copilot_usage/test_parser.py +++ b/tests/copilot_usage/test_parser.py @@ -3785,9 +3785,7 @@ def test_session_shutdown_validation_error_logs_debug(self, tmp_path: Path) -> N for msg in log_messages ) - def test_tool_execution_complete_bad_data_logs_debug( - self, tmp_path: Path - ) -> None: + def test_tool_execution_complete_bad_data_logs_debug(self, tmp_path: Path) -> None: """Bad tool.execution_complete data → debug log emitted. The typed ``as_tool_execution()`` accessor raises ``ValidationError`` From 21d64c31bb66bdbfb8191e5f7ed4652c25d46994 Mon Sep 17 00:00:00 2001 From: Sasa Junuzovic <44276455+microsasa@users.noreply.github.com> Date: Wed, 22 Apr 2026 22:43:25 -0700 Subject: [PATCH 5/6] fix: address review comments Add defensive ValueError catch to TOOL_EXECUTION_COMPLETE handler in _first_pass, matching the documented exceptions of as_tool_execution(). The etype guard makes ValueError currently unreachable, but catching it prevents escape if the guard is ever restructured. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/copilot_usage/parser.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/copilot_usage/parser.py b/src/copilot_usage/parser.py index 1502c938..e9c68d0d 100644 --- a/src/copilot_usage/parser.py +++ b/src/copilot_usage/parser.py @@ -809,6 +809,17 @@ def _first_pass(events: list[SessionEvent]) -> _FirstPassResult: exc.error_count(), ) m = None + except ValueError as exc: + # Defensive: currently unreachable because the etype + # guard above prevents type mismatches, but caught so + # a refactor of the guard can't let ValueError escape. + logger.debug( + "event {} — could not parse {} event ({}), skipping", + idx, + ev.type, + exc, + ) + m = None if m: tool_model = m continue From fa3534376fc1ece264aeee7dee642035898e47cc Mon Sep 17 00:00:00 2001 From: Sasa Junuzovic <44276455+microsasa@users.noreply.github.com> Date: Wed, 22 Apr 2026 23:07:55 -0700 Subject: [PATCH 6/6] fix: add test for defensive ValueError handler in _first_pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The diff-coverage check (≥90% on new lines) failed because the except-ValueError handler in _first_pass is explicitly unreachable defensive code with no test exercising it. Add a test that patches as_tool_execution to raise ValueError, verifying the handler keeps tool_model as None. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/copilot_usage/test_parser.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/copilot_usage/test_parser.py b/tests/copilot_usage/test_parser.py index 6148ce8d..450c696e 100644 --- a/tests/copilot_usage/test_parser.py +++ b/tests/copilot_usage/test_parser.py @@ -6831,6 +6831,32 @@ def test_first_pass_tool_model_validation_error(self) -> None: fp = _first_pass(events) assert fp.tool_model is None + def test_first_pass_tool_model_value_error(self) -> None: + """Defensive ValueError handler keeps tool_model None. + + The ``except ValueError`` branch in ``_first_pass`` is currently + unreachable because the ``etype`` guard prevents type mismatches, + but it exists so a future refactor cannot let ``ValueError`` escape. + We exercise it by patching ``as_tool_execution`` to raise + ``ValueError``. + """ + events: list[SessionEvent] = [ + SessionEvent( + type=EventType.TOOL_EXECUTION_COMPLETE, + data={"toolCallId": "tc-ve", "model": "gpt-5.1"}, + id="ev-ve", + timestamp=None, + parentId=None, + ), + ] + with patch.object( + SessionEvent, + "as_tool_execution", + side_effect=ValueError("simulated type mismatch"), + ): + fp = _first_pass(events) + assert fp.tool_model is None + # --------------------------------------------------------------------------- # Issue #509 — mtime-based session cache