From 6b113761be8b8d34eeed90f5adaa36d04c6890ab Mon Sep 17 00:00:00 2001 From: "MagicMock/mock.effective_git_name/126962641680240" Date: Mon, 18 May 2026 08:03:18 +0000 Subject: [PATCH] feat(pebble): send ntfy title as native UTF-8 via RFC 2047 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #73 stopped the silent UnicodeEncodeError on the ntfy Title header by ASCII-coercing the title (em dash → "-", smart quotes → "'", etc.) — correct in the moment but lossy: emoji disappear, German umlauts ("Müller") collapse to "?", and Claude has to mentally avoid Unicode when summarising a Pebble outcome. Switch to standards-based UTF-8 instead. httpx still serialises header values as latin-1, but ntfy accepts RFC 2047 encoded-words (=?utf-8?[bq]?...?=) and decodes them server-side. Encoding happens at the point of header construction via stdlib email.header.Header; the stored NotificationPayload.title and the pebble.notify_title OTel span attribute both keep the raw Unicode so logs and traces stay readable. ASCII titles pass through verbatim — no wire overhead, no behaviour change for the framework fallback titles ("Pebble: timeout", etc.). Co-Authored-By: Claude Opus 4.7 --- src/clayde/webhook/notify.py | 48 ++++++------- tests/test_webhook_notify.py | 129 ++++++++++++++++++++++++++++------- 2 files changed, 127 insertions(+), 50 deletions(-) diff --git a/src/clayde/webhook/notify.py b/src/clayde/webhook/notify.py index 8d1f064..c0aa5a3 100644 --- a/src/clayde/webhook/notify.py +++ b/src/clayde/webhook/notify.py @@ -7,6 +7,7 @@ from __future__ import annotations import logging +from email.header import Header import httpx from pydantic import BaseModel, field_validator @@ -16,36 +17,30 @@ log = logging.getLogger("clayde.webhook.notify") -# ntfy header values are sent through httpx, which encodes headers as -# latin-1. Anything outside that range raises UnicodeEncodeError before -# the request goes out, so the user never sees the notification. We -# normalise common typographic Unicode to ASCII and replace anything -# left over with '?'. -_UNICODE_TO_ASCII = str.maketrans({ - "—": "-", # em dash - "–": "-", # en dash - "−": "-", # minus sign - "‘": "'", # left single quote - "’": "'", # right single quote / apostrophe - "“": '"', # left double quote - "”": '"', # right double quote - "…": "...", # ellipsis - " ": " ", # non-breaking space -}) +def _encode_header_value(text: str) -> str: + """Encode a header value safely for httpx. - -def _to_ascii(text: str) -> str: - """Coerce arbitrary text to safe ASCII for use in HTTP headers.""" - return text.translate(_UNICODE_TO_ASCII).encode("ascii", "replace").decode("ascii") + httpx serialises header values as latin-1, so raw non-ASCII strings + raise UnicodeEncodeError before the request leaves the process. ntfy + accepts RFC 2047 encoded-words (``=?utf-8?b??=``) and decodes + them server-side, so we route non-ASCII through that. ASCII titles + pass through verbatim — keeps log/trace output readable and avoids + pointless wire overhead. + """ + try: + text.encode("ascii") + except UnicodeEncodeError: + return Header(text, charset="utf-8").encode() + return text class NotificationPayload(BaseModel): """Outcome of a Pebble run, as emitted by Claude in the JSON tail. Title is clamped to 40 chars and body to 300 chars at construction - time so accidental over-long values never propagate to ntfy headers. - Title is additionally coerced to ASCII because it travels as an HTTP - header and httpx rejects non-latin-1 header values. + time so accidental over-long values never propagate to ntfy. The + title is stored as the raw Unicode string the user/Claude produced; + RFC 2047 encoding for the actual HTTP header happens in ``send_ntfy``. """ title: str @@ -55,9 +50,7 @@ class NotificationPayload(BaseModel): @field_validator("title", mode="before") @classmethod def _clamp_title(cls, v): - if not isinstance(v, str): - return v - return _to_ascii(v)[:40] + return v[:40] if isinstance(v, str) else v @field_validator("body", mode="before") @classmethod @@ -77,13 +70,14 @@ async def send_ntfy( """POST to ntfy.sh. Best-effort: errors are logged + OTel-annotated, never raised.""" url = f"{base_url.rstrip('/')}/{topic}" headers = { - "Title": title, + "Title": _encode_header_value(title), "Priority": "3" if success else "5", "Tags": "white_check_mark" if success else "rotating_light", } tracer = get_tracer() with tracer.start_as_current_span("clayde.pebble.notify") as span: span.set_attribute("pebble.notify_topic", topic) + # Span attribute holds the raw Unicode title for readable traces. span.set_attribute("pebble.notify_title", title) span.set_attribute("pebble.outcome_success", success) try: diff --git a/tests/test_webhook_notify.py b/tests/test_webhook_notify.py index 7e01809..1228759 100644 --- a/tests/test_webhook_notify.py +++ b/tests/test_webhook_notify.py @@ -2,11 +2,14 @@ from __future__ import annotations +import base64 +import re + import httpx import pytest import respx -from clayde.webhook.notify import NotificationPayload, send_ntfy +from clayde.webhook.notify import NotificationPayload, _encode_header_value, send_ntfy def test_notification_payload_clamps_length(): @@ -15,6 +18,15 @@ def test_notification_payload_clamps_length(): assert len(p.body) == 300 +def test_notification_payload_clamps_length_with_unicode(): + # Character-count clamp, not byte-count — verify multibyte chars still + # count as one position. + p = NotificationPayload(title="ü" * 100, body="ß" * 1000, success=True) + assert len(p.title) == 40 + assert len(p.body) == 300 + assert p.title == "ü" * 40 + + def test_notification_payload_accepts_short(): p = NotificationPayload(title="hi", body="all good", success=True) assert p.title == "hi" @@ -22,34 +34,41 @@ def test_notification_payload_accepts_short(): assert p.success is True -def test_notification_payload_em_dash_in_title_normalised(): - # Real prod failure: em dash in title raised UnicodeEncodeError when - # httpx serialised the header as latin-1. - p = NotificationPayload(title="Thomas Stegger — plant prefs saved", body="ok", success=True) - assert "—" not in p.title - assert p.title == "Thomas Stegger - plant prefs saved" - # Must round-trip cleanly through latin-1 (the header codec httpx uses). - p.title.encode("latin-1") +def test_notification_payload_preserves_unicode(): + # Raw Unicode is kept as-is; RFC 2047 encoding happens in send_ntfy. + p = NotificationPayload(title="Müller — Notiz", body="ok", success=True) + assert p.title == "Müller — Notiz" -def test_notification_payload_smart_quotes_in_title_normalised(): - p = NotificationPayload(title="“hi” ‘there’", body="ok", success=True) - assert p.title == '"hi" \'there\'' +def test_encode_header_value_passes_ascii_through(): + assert _encode_header_value("plain ascii") == "plain ascii" -def test_notification_payload_unknown_unicode_in_title_replaced(): - p = NotificationPayload(title="emoji \U0001f600 tail", body="ok", success=True) - assert "\U0001f600" not in p.title - p.title.encode("ascii") +_RFC2047_WORD = re.compile(r"=\?utf-8\?[bq]\?[^?]*\?=", re.IGNORECASE) -def test_notification_payload_ascii_coercion_runs_before_clamp(): - # "..." (3 chars) replaces "…" (1 char); clamp comes after, so a - # title that fit pre-replacement may not fit after — and that's fine. - long = "a" * 38 + "…" # 39 chars in, 41 chars after replacement - p = NotificationPayload(title=long, body="ok", success=True) - assert len(p.title) == 40 - p.title.encode("ascii") +def test_encode_header_value_rfc2047_encodes_unicode(): + out = _encode_header_value("Thomas Stegger — plant prefs saved") + # email.header.Header emits =?utf-8?[bq]?...?= encoded words; B and Q + # are both valid RFC 2047 forms and ntfy decodes either. + assert _RFC2047_WORD.search(out) + decoded = _decode_rfc2047(out) + assert decoded == "Thomas Stegger — plant prefs saved" + # Result must be ASCII-only so httpx can serialise it as a header. + out.encode("ascii") + + +def _decode_rfc2047(encoded: str) -> str: + """Decode an RFC 2047 encoded-word string back to its Unicode form.""" + from email.header import decode_header + parts = decode_header(encoded) + out = [] + for chunk, charset in parts: + if isinstance(chunk, bytes): + out.append(chunk.decode(charset or "ascii")) + else: + out.append(chunk) + return "".join(out) @pytest.mark.asyncio @@ -68,6 +87,7 @@ async def test_send_ntfy_success_headers(): ) assert route.called req = route.calls.last.request + # ASCII title passes through verbatim. assert req.headers["title"] == "pong" assert req.headers["priority"] == "3" assert req.headers["tags"] == "white_check_mark" @@ -93,6 +113,69 @@ async def test_send_ntfy_uses_failure_priority_and_tags_when_success_false(): assert req.headers["tags"] == "rotating_light" +@pytest.mark.asyncio +@respx.mock +async def test_send_ntfy_encodes_unicode_title_as_rfc2047(): + route = respx.post("https://ntfy.sh/abc123").mock( + return_value=httpx.Response(200, json={"id": "msg1"}) + ) + title = "Thomas Stegger — plant prefs saved" + await send_ntfy( + title=title, + body="ok", + success=True, + base_url="https://ntfy.sh", + topic="abc123", + timeout_s=5, + ) + req = route.calls.last.request + header = req.headers["title"] + # Must be ASCII-only so httpx can transmit it. + header.encode("ascii") + assert _RFC2047_WORD.search(header) + assert _decode_rfc2047(header) == title + + +@pytest.mark.asyncio +@respx.mock +async def test_send_ntfy_handles_emoji_title(): + route = respx.post("https://ntfy.sh/abc123").mock( + return_value=httpx.Response(200, json={"id": "msg1"}) + ) + title = "\U0001f600 done" + await send_ntfy( + title=title, + body="ok", + success=True, + base_url="https://ntfy.sh", + topic="abc123", + timeout_s=5, + ) + req = route.calls.last.request + header = req.headers["title"] + header.encode("ascii") + assert _decode_rfc2047(header) == title + + +@pytest.mark.asyncio +@respx.mock +async def test_send_ntfy_handles_german_umlauts_title(): + route = respx.post("https://ntfy.sh/abc123").mock( + return_value=httpx.Response(200, json={"id": "msg1"}) + ) + title = "Müller — Notiz gespeichert" + await send_ntfy( + title=title, + body="ok", + success=True, + base_url="https://ntfy.sh", + topic="abc123", + timeout_s=5, + ) + req = route.calls.last.request + assert _decode_rfc2047(req.headers["title"]) == title + + @pytest.mark.asyncio @respx.mock async def test_send_ntfy_swallows_errors():