Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion aai_cli/debuglog.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,42 @@
sites, because the leak comes from *library* logs — websockets logs the raw
Authorization header at DEBUG during the handshake.

Binary frames are summarized, not hex-dumped: a realtime session logs dozens of
``> BINARY fb ff …`` lines per second whose payload is PCM audio — meaningless
as hex — so the formatter collapses them to ``> BINARY [9600 bytes]``, keeping
the direction and size that make ``-vv`` useful for debugging the wire.

Stdlib-only on purpose: ``config`` (a Rich-free library layer) registers
secrets here, so this module must not pull in Rich via ``output``/``theme``.
"""

from __future__ import annotations

import logging
import re
import sys

_MASK = "[redacted]"

_verbosity = 0
_secrets: set[str] = set()

# A websockets frame log's hex payload: opcode, hex byte pairs (possibly elided
# with "..." by the library's own 75-char cap), then the "[N bytes]" metadata.
# CONT is included because a fragmented binary message continues as CONT frames.
_BINARY_FRAME_HEX = re.compile(r"\b(BINARY|CONT) [0-9a-f][0-9a-f. ]* \[")


class _RedactingFormatter(logging.Formatter):
"""Formats records normally, then masks every registered secret."""
"""Formats records normally, then masks every registered secret and
collapses websockets binary-frame hex dumps to their byte count."""

def format(self, record: logging.LogRecord) -> str:
text = super().format(record)
for secret in _secrets:
text = text.replace(secret, _MASK)
if record.name.partition(".")[0] == "websockets":
text = _BINARY_FRAME_HEX.sub(r"\1 [", text)
return text


Expand Down
35 changes: 35 additions & 0 deletions tests/test_debuglog.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,41 @@ def test_registered_secrets_are_masked_in_every_record(capsys):
assert "authorization: [redacted]" in err


def test_binary_frame_hex_is_collapsed_to_byte_count(capsys):
debuglog.enable(2)
logging.getLogger("websockets.client").debug(
"> BINARY fb ff fe ff fb ff f5 ff ef ff 00 00 fd ff fd ff "
"... 0d 00 f7 ff 01 00 0c 00 [9600 bytes]"
)
err = capsys.readouterr().err
assert "[websockets.client] > BINARY [9600 bytes]" in err
assert "fb ff" not in err


def test_real_websockets_binary_frame_rendering_is_collapsed(capsys):
# Pin the regex to the library's actual frame format: a Frame's __str__ is
# what websockets interpolates into its DEBUG records, elision and all.
from websockets.frames import OP_BINARY, OP_CONT, Frame

debuglog.enable(2)
log = logging.getLogger("websockets.client")
log.debug("> %s", Frame(OP_BINARY, bytes(9600), fin=False))
log.debug("> %s", Frame(OP_CONT, b"\xfb\xff\x0d\x00"))
err = capsys.readouterr().err
assert "> BINARY [9600 bytes, continued]" in err
assert "> CONT [binary, 4 bytes]" in err
assert "00 00" not in err


def test_text_frames_and_other_loggers_keep_their_payload(capsys):
debuglog.enable(2)
logging.getLogger("websockets.client").debug("> TEXT '{\"audio\": true}' [16 bytes]")
logging.getLogger("httpx").debug("> BINARY fb ff 0d 00 [4 bytes]")
err = capsys.readouterr().err
assert "> TEXT '{\"audio\": true}' [16 bytes]" in err
assert "[httpx] > BINARY fb ff 0d 00 [4 bytes]" in err


def test_register_secret_ignores_empty_values():
debuglog.register_secret(None)
debuglog.register_secret("")
Expand Down
Loading