fix: Make sure cross context cleanup doesn't raise an error (#58)

AbhiPrasad · viadezo1er · commit c104e90ebaca · 2026-03-10T17:11:03.000-07:00
diff --git a/AGENTS.md b/AGENTS.md
@@ -58,6 +58,8 @@ make test-core
 nox -l
 ```
 
+For larger or cross-cutting changes, also run `make pylint` from `py/` before handing work off.
+
 Targeted wrapper/session runs:
 
 ```bash
@@ -77,6 +79,7 @@ Key facts:
 - `test_core` runs without optional vendor packages.
 - wrapper coverage is split across dedicated nox sessions by provider/version.
 - `pylint` installs the broad dependency surface before checking files.
+- `cd py && make pylint` runs only `pylint`; `cd py && make lint` runs pre-commit hooks first and then `pylint`.
 - `test-wheel` is a wheel sanity check and requires a built wheel first.
 
 When changing behavior, run the narrowest affected session first, then expand only if needed.
diff --git a/Makefile b/Makefile
@@ -24,9 +24,12 @@ test-core:
 test-wheel:
 	mise exec -- $(MAKE) -C py test-wheel
 
-lint pylint:
+lint:
 	mise exec -- $(MAKE) -C py lint
 
+pylint:
+	mise exec -- $(MAKE) -C py pylint
+
 nox: test
 
 help:
@@ -35,8 +38,8 @@ help:
 	@echo "  fixup        - Run pre-commit hooks across the repo"
 	@echo "  install-deps - Install Python SDK dependencies via py/Makefile"
 	@echo "  install-dev  - Install pinned tools and create/update the repo env via mise"
-	@echo "  lint         - Run Python SDK lint checks via py/Makefile"
-	@echo "  pylint       - Alias for lint"
+	@echo "  lint         - Run pre-commit hooks plus Python SDK pylint via py/Makefile"
+	@echo "  pylint       - Run Python SDK pylint only via py/Makefile"
 	@echo "  nox          - Alias for test"
 	@echo "  test         - Run the Python SDK nox matrix via py/Makefile"
 	@echo "  test-core    - Run Python SDK core tests via py/Makefile"
diff --git a/py/Makefile b/py/Makefile
@@ -2,7 +2,7 @@ PYTHON ?= python
 UV := $(PYTHON) -m uv
 UV_VERSION := $(shell awk '$$1=="uv" { print $$2 }' ../.tool-versions)
 
-.PHONY: lint test test-wheel _template-version clean fixup build verify-build verify help install-build-deps install-dev install-optional test-core _check-git-clean
+.PHONY: lint pylint test test-wheel _template-version clean fixup build verify-build verify help install-build-deps install-dev install-optional test-core _check-git-clean
 
 clean:
 	rm -rf build dist
@@ -14,6 +14,9 @@ fixup:
 lint: fixup
 	nox -s pylint
 
+pylint:
+	nox -s pylint
+
 test:
 	nox -x
 
@@ -69,6 +72,7 @@ help:
 	@echo "  install-build-deps  - Install build dependencies for CI"
 	@echo "  install-dev         - Install package in development mode with all dependencies"
 	@echo "  lint                - Run pylint checks"
+	@echo "  pylint              - Run pylint without pre-commit hooks"
 	@echo "  test                - Run all tests"
 	@echo "  test-core           - Run core tests only"
 	@echo "  test-wheel          - Run tests against built wheel"
diff --git a/py/src/braintrust/context.py b/py/src/braintrust/context.py
@@ -103,7 +103,10 @@ def set_current_span(self, span_object: Any) -> Any:
     def unset_current_span(self, context_token: Any = None) -> None:
         """Unset the current active span."""
         if context_token:
-            self._current_span.reset(context_token)
+            try:
+                self._current_span.reset(context_token)
+            except ValueError:
+                self._current_span.set(None)
         else:
             self._current_span.set(None)
 
diff --git a/py/src/braintrust/test_context.py b/py/src/braintrust/test_context.py
@@ -753,6 +753,27 @@ async def task_work():
     )
 
 
+@pytest.mark.asyncio
+async def test_unset_current_span_with_cross_context_token_falls_back_to_clear():
+    """Cross-context cleanup should not raise if the token can't be reset."""
+    from braintrust.context import BraintrustContextManager
+
+    context_manager = BraintrustContextManager()
+    token = context_manager.set_current_span("parent")
+    result = {}
+
+    async def other_task():
+        try:
+            context_manager.unset_current_span(token)
+            result["outcome"] = "ok"
+        except Exception as e:
+            result["outcome"] = f"{type(e).__name__}: {e}"
+
+    await asyncio.create_task(other_task())
+
+    assert result["outcome"] == "ok"
+
+
 @pytest.mark.asyncio
 async def test_async_generator_early_break_context_token(test_logger, with_memory_logger):
     """
diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py
@@ -5,6 +5,12 @@
 the actual Claude Agent SDK.
 """
 
+import asyncio
+import gc
+import sys
+import types
+from typing import Type
+
 import pytest
 
 # Try to import the Claude Agent SDK - skip tests if not available
@@ -19,6 +25,7 @@
 from braintrust import logger
 from braintrust.span_types import SpanTypeAttribute
 from braintrust.test_helpers import init_test_logger
+from braintrust.wrappers.claude_agent_sdk import setup_claude_agent_sdk
 from braintrust.wrappers.claude_agent_sdk._wrapper import (
     _create_client_wrapper_class,
     _create_tool_wrapper_class,
@@ -292,3 +299,110 @@ class TestAutoInstrumentClaudeAgentSDK:
     def test_auto_instrument_claude_agent_sdk(self):
         """Test auto_instrument patches Claude Agent SDK and creates spans."""
         verify_autoinstrument_script("test_auto_claude_agent_sdk.py")
+
+
+class _FakeClaudeAgentOptions:
+    def __init__(self, model, permission_mode=None):
+        self.model = model
+        self.permission_mode = permission_mode
+
+
+class _FakeMessage:
+    def __init__(self, content):
+        self.content = content
+
+
+class _FakeResultMessage:
+    def __init__(self):
+        self.usage = types.SimpleNamespace(input_tokens=1, output_tokens=1, cache_creation_input_tokens=0)
+        self.num_turns = 1
+        self.session_id = "session-123"
+
+
+class _FakeClaudeSDKClient:
+    def __init__(self, options):
+        self.options = options
+        self._prompt = None
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *args):
+        return None
+
+    async def query(self, prompt):
+        self._prompt = prompt
+
+    async def receive_response(self):
+        yield _FakeMessage("Hello")
+        await asyncio.sleep(0)
+        yield _FakeResultMessage()
+
+
+class _FakeClaudeSdkModule(types.ModuleType):
+    ClaudeSDKClient: Type[_FakeClaudeSDKClient]
+    ClaudeAgentOptions: Type[_FakeClaudeAgentOptions]
+    SdkMcpTool = None
+    tool = None
+
+
+class _FakeConsumerModule(types.ModuleType):
+    ClaudeSDKClient: Type[_FakeClaudeSDKClient]
+    ClaudeAgentOptions: Type[_FakeClaudeAgentOptions]
+
+
+def _install_fake_claude_sdk(monkeypatch):
+    fake_module = _FakeClaudeSdkModule("claude_agent_sdk")
+    fake_module.ClaudeSDKClient = _FakeClaudeSDKClient
+    fake_module.ClaudeAgentOptions = _FakeClaudeAgentOptions
+    monkeypatch.setitem(sys.modules, "claude_agent_sdk", fake_module)
+    return fake_module
+
+
+@pytest.mark.asyncio
+async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, monkeypatch):
+    """Regression test for https://github.com/braintrustdata/braintrust-sdk-python/issues/7."""
+    assert not memory_logger.pop()
+
+    fake_sdk = _install_fake_claude_sdk(monkeypatch)
+    consumer_module_name = "test_issue7_repro_module"
+    consumer_module = _FakeConsumerModule(consumer_module_name)
+    consumer_module.ClaudeSDKClient = fake_sdk.ClaudeSDKClient
+    consumer_module.ClaudeAgentOptions = fake_sdk.ClaudeAgentOptions
+    monkeypatch.setitem(sys.modules, consumer_module_name, consumer_module)
+
+    # Mirror the reported import pattern:
+    # from claude_agent_sdk import ClaudeSDKClient, ClaudeAgentOptions
+    assert setup_claude_agent_sdk(project=PROJECT_NAME, api_key=logger.TEST_API_KEY)
+    assert consumer_module.ClaudeSDKClient is not _FakeClaudeSDKClient
+
+    loop_errors = []
+    received_types = []
+
+    async def main():
+        loop = asyncio.get_running_loop()
+        loop.set_exception_handler(lambda loop, ctx: loop_errors.append(ctx.get("exception") or ctx.get("message")))
+
+        options = consumer_module.ClaudeAgentOptions(
+            model="claude-sonnet-4-20250514",
+            permission_mode="bypassPermissions",
+        )
+        async with consumer_module.ClaudeSDKClient(options=options) as client:
+            await client.query("Hello")
+            async for message in client.receive_response():
+                received_types.append(type(message).__name__)
+
+        await asyncio.sleep(0)
+        gc.collect()
+        await asyncio.sleep(0.01)
+
+    await main()
+
+    assert loop_errors == []
+    assert received_types == ["_FakeMessage", "_FakeResultMessage"]
+
+    spans = memory_logger.pop()
+    task_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TASK]
+    assert len(task_spans) == 1
+    assert task_spans[0]["span_attributes"]["name"] == "Claude Agent"
+    assert task_spans[0]["input"] == "Hello"