From 82a7bd22e83cb843859efe45bca061cfb017b324 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 12 Jun 2026 20:44:53 +0000 Subject: [PATCH] Collapse websockets binary-frame hex dumps in -vv output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A realtime session logs dozens of '> BINARY fb ff ...' lines per second whose payload is PCM audio — meaningless as hex even after websockets' own 75-char cap. The -vv formatter now rewrites websockets binary/CONT frame records to keep just the direction and metadata, e.g. '> BINARY [9600 bytes]'. Other loggers and TEXT frames are untouched. https://claude.ai/code/session_01TBiiKi2mruovPieXh2ezHi --- aai_cli/debuglog.py | 16 +++++++++++++++- tests/test_debuglog.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/aai_cli/debuglog.py b/aai_cli/debuglog.py index bc25535e..e8681582 100644 --- a/aai_cli/debuglog.py +++ b/aai_cli/debuglog.py @@ -14,6 +14,11 @@ sites, because the leak comes from *library* logs — websockets logs the raw Authorization header at DEBUG during the handshake. +Binary frames are summarized, not hex-dumped: a realtime session logs dozens of +``> BINARY fb ff …`` lines per second whose payload is PCM audio — meaningless +as hex — so the formatter collapses them to ``> BINARY [9600 bytes]``, keeping +the direction and size that make ``-vv`` useful for debugging the wire. + Stdlib-only on purpose: ``config`` (a Rich-free library layer) registers secrets here, so this module must not pull in Rich via ``output``/``theme``. """ @@ -21,6 +26,7 @@ from __future__ import annotations import logging +import re import sys _MASK = "[redacted]" @@ -28,14 +34,22 @@ _verbosity = 0 _secrets: set[str] = set() +# A websockets frame log's hex payload: opcode, hex byte pairs (possibly elided +# with "..." by the library's own 75-char cap), then the "[N bytes]" metadata. +# CONT is included because a fragmented binary message continues as CONT frames. +_BINARY_FRAME_HEX = re.compile(r"\b(BINARY|CONT) [0-9a-f][0-9a-f. ]* \[") + class _RedactingFormatter(logging.Formatter): - """Formats records normally, then masks every registered secret.""" + """Formats records normally, then masks every registered secret and + collapses websockets binary-frame hex dumps to their byte count.""" def format(self, record: logging.LogRecord) -> str: text = super().format(record) for secret in _secrets: text = text.replace(secret, _MASK) + if record.name.partition(".")[0] == "websockets": + text = _BINARY_FRAME_HEX.sub(r"\1 [", text) return text diff --git a/tests/test_debuglog.py b/tests/test_debuglog.py index 07f29e44..5d299365 100644 --- a/tests/test_debuglog.py +++ b/tests/test_debuglog.py @@ -81,6 +81,41 @@ def test_registered_secrets_are_masked_in_every_record(capsys): assert "authorization: [redacted]" in err +def test_binary_frame_hex_is_collapsed_to_byte_count(capsys): + debuglog.enable(2) + logging.getLogger("websockets.client").debug( + "> BINARY fb ff fe ff fb ff f5 ff ef ff 00 00 fd ff fd ff " + "... 0d 00 f7 ff 01 00 0c 00 [9600 bytes]" + ) + err = capsys.readouterr().err + assert "[websockets.client] > BINARY [9600 bytes]" in err + assert "fb ff" not in err + + +def test_real_websockets_binary_frame_rendering_is_collapsed(capsys): + # Pin the regex to the library's actual frame format: a Frame's __str__ is + # what websockets interpolates into its DEBUG records, elision and all. + from websockets.frames import OP_BINARY, OP_CONT, Frame + + debuglog.enable(2) + log = logging.getLogger("websockets.client") + log.debug("> %s", Frame(OP_BINARY, bytes(9600), fin=False)) + log.debug("> %s", Frame(OP_CONT, b"\xfb\xff\x0d\x00")) + err = capsys.readouterr().err + assert "> BINARY [9600 bytes, continued]" in err + assert "> CONT [binary, 4 bytes]" in err + assert "00 00" not in err + + +def test_text_frames_and_other_loggers_keep_their_payload(capsys): + debuglog.enable(2) + logging.getLogger("websockets.client").debug("> TEXT '{\"audio\": true}' [16 bytes]") + logging.getLogger("httpx").debug("> BINARY fb ff 0d 00 [4 bytes]") + err = capsys.readouterr().err + assert "> TEXT '{\"audio\": true}' [16 bytes]" in err + assert "[httpx] > BINARY fb ff 0d 00 [4 bytes]" in err + + def test_register_secret_ignores_empty_values(): debuglog.register_secret(None) debuglog.register_secret("")