email(v1): polish — fix 3 LOW review findings

Sbussiso · claude · Sbussiso · commit 88b7c7bcbc69 · 2026-05-03T00:27:07.000-07:00
The cosmetic + defense-in-depth fixes from the v1 code review.
None of the three are exploitable today; all three remove future
footguns and improve operator-facing accuracy.

LOW — Worker tick summary over-counted mid-retry failures.
``run_one_tick`` was incrementing ``summary[status]`` based on the
per-attempt outcome from ``_process_row``, ignoring the fact that
``_finalize_row`` may have flipped the row back to 'pending' for
another attempt.  A row that retried twice and succeeded was
showing up as ``failed=2 sent=1`` across three ticks instead of
``sent=1`` in the tick that actually succeeded.  The numbers in
operator log streams now reflect terminal outcomes only — mid-
retry rows count zero in any bucket and pop into the right
bucket on the tick they finally land.  Pinned by an extension to
``test_worker_retries_on_transient_failure`` and a sum-across-
ticks check in ``test_worker_gives_up_at_max_attempts``.

LOW — Subject template didn't strip embedded CR/LF.
``email_templates.render`` called ``.strip()`` on the rendered
subject, which only trims edges.  An embedded ``\r\nBcc: ...``
in a notification.title (operator-controlled camera names; AI-
agent-supplied incident titles) would survive into the subject
line.  Resend's API rejects header injection today so this isn't
exploitable, but a future provider swap that forwards subjects
raw to SMTP would turn it into a Bcc-injection vector.  Added
the explicit replace pass and ``test_render_strips_embedded_
newlines_from_subject`` to pin it.

LOW — Unsubscribe HTML pages didn't escape the substituted
``kind`` and ``frontend`` values.  The pages use ``str.format()``
(not Jinja2, so autoescape doesn't apply) and a token whose
``kind`` claim contained ``&lt;script&gt;...&lt;/script&gt;`` would render
the raw script tag.  Practically unreachable — forging the JWT
requires CLERK_SECRET_KEY, at which point the attacker has root
— but added a one-line ``_safe_html`` helper and applied it to
every interpolation site.  ``test_unsubscribe_endpoint_html_
escapes_kind`` forges a token (using the same secret the test
process holds) to prove the escape actually fires.

Full suite: 408 passed (was 406; +2 from the two new explicit
tests).  All four review fixes (1 CRITICAL + 2 HIGH + 3 LOW)
now landed.  Ready to push when the operator-side Resend setup
is done.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/backend/app/api/notifications.py b/backend/app/api/notifications.py
@@ -866,6 +866,22 @@ async def update_email_preferences(
 </body></html>"""
 
 
+def _safe_html(s: str) -> str:
+    """HTML-escape a string for embedding in the unsubscribe pages.
+
+    The unsubscribe templates use ``str.format()`` (not Jinja2) so
+    autoescape doesn't apply.  Anything we substitute that COULD
+    contain user-controlled content needs explicit escaping.
+
+    Realistically the values come from a verified JWT (``kind``) and
+    server config (``frontend``), so neither is reachable by an
+    attacker without already having the Clerk secret.  But escaping
+    is one line and removes a future footgun.
+    """
+    import html as _html
+    return _html.escape(s or "", quote=True)
+
+
 @router.get("/email/unsubscribe", response_class=HTMLResponse)
 @limiter.limit("60/minute")
 async def email_unsubscribe(
@@ -897,11 +913,12 @@ async def email_unsubscribe(
     limits, so explicit @limiter.limit is required here.
     """
     frontend = (settings.FRONTEND_URL or "").rstrip("/")
+    frontend_safe = _safe_html(frontend)
 
     decoded = verify_token(t)
     if decoded is None:
         return HTMLResponse(
-            _UNSUBSCRIBE_HTML_ERROR.format(frontend=frontend),
+            _UNSUBSCRIBE_HTML_ERROR.format(frontend=frontend_safe),
             status_code=400,
         )
 
@@ -912,7 +929,9 @@ async def email_unsubscribe(
     cfg = _EMAIL_KIND_TO_SETTING.get(kind)
     if cfg is None:
         return HTMLResponse(
-            _UNSUBSCRIBE_HTML_OK.format(kind=kind, frontend=frontend),
+            _UNSUBSCRIBE_HTML_OK.format(
+                kind=_safe_html(kind), frontend=frontend_safe,
+            ),
         )
 
     setting_key, _default = cfg
@@ -941,5 +960,7 @@ async def email_unsubscribe(
     # → "camera offline" is friendlier than "email_camera_offline".
     pretty_kind = kind.replace("_", " ")
     return HTMLResponse(
-        _UNSUBSCRIBE_HTML_OK.format(kind=pretty_kind, frontend=frontend),
+        _UNSUBSCRIBE_HTML_OK.format(
+            kind=_safe_html(pretty_kind), frontend=frontend_safe,
+        ),
     )
diff --git a/backend/app/core/email_templates.py b/backend/app/core/email_templates.py
@@ -142,6 +142,14 @@ def render(
         env, f"{kind}.subject.txt.j2", context,
         fallback=f"[SourceBox Sentry] {notif_proxy.title}",
     ).strip()
+    # Defense in depth — strip embedded CR/LF.  ``.strip()`` only
+    # trims edges, not embedded newlines.  Resend's API rejects
+    # subjects with header injection today, but if a future provider
+    # swap forwards subjects raw to SMTP, an embedded ``\r\nBcc: ...``
+    # in a notification.title (which flows from operator-controlled
+    # camera names + AI agent-supplied incident titles) would be a
+    # header-injection vector.  Cheap belt-and-suspenders.
+    subject = subject.replace("\r", "").replace("\n", " ")
     body_text = _render_or_fallback(
         env, f"{kind}.body.txt.j2", context,
         fallback=_generic_body_text(notif_proxy, dash, unsubscribe_url),
diff --git a/backend/app/core/email_worker.py b/backend/app/core/email_worker.py
@@ -114,9 +114,16 @@ def run_one_tick(db: Session) -> dict:
         status, message_id, error = outcome
         _finalize_row(db, row, status, message_id, error)
         _write_log(db, row, status, message_id, error)
-        summary[status if status != "sent" else "sent"] = (
-            summary.get(status, 0) + 1
-        )
+        # Count by TERMINAL state, not per-attempt outcome.  A row
+        # that fails twice and succeeds on the third attempt should
+        # show up as sent=1 in the tick that actually succeeded, not
+        # failed=2 sent=1 across three ticks.  Mid-retry rows
+        # (status flipped back to 'pending' by _finalize_row) are
+        # uncounted — they'll show up in the bucket they end up in
+        # eventually.
+        terminal = row.status
+        if terminal in ("sent", "failed", "suppressed"):
+            summary[terminal] = summary.get(terminal, 0) + 1
 
     db.commit()
     return summary
diff --git a/backend/tests/test_email_templates.py b/backend/tests/test_email_templates.py
@@ -206,3 +206,26 @@ def test_render_uses_dashboard_url_override():
         dashboard_url="https://override.example.com",
     )
     assert "https://override.example.com" in body_text
+
+
+def test_render_strips_embedded_newlines_from_subject():
+    """A title containing CR/LF (operator-controlled camera name OR
+    AI-agent-supplied incident title) must NOT leak into the rendered
+    subject as embedded newlines.  Resend's API rejects subject
+    header injection today, but a future provider swap that forwards
+    subjects raw to SMTP would turn this into a Bcc-injection vector.
+
+    Covers `\\n`, `\\r`, and `\\r\\n` separators for completeness."""
+    notif = _fake_notif(title="Front Door\r\nBcc: attacker@evil.test")
+
+    subject, _, _ = email_templates.render(
+        "camera_offline", notif,
+        unsubscribe_url="https://x.test/u",
+    )
+
+    assert "\r" not in subject
+    assert "\n" not in subject
+    # The original characters survive (just as spaces / removed CRs)
+    # so the alert remains intelligible to the recipient.
+    assert "Front Door" in subject
+    assert "Bcc: attacker@evil.test" in subject
diff --git a/backend/tests/test_email_worker.py b/backend/tests/test_email_worker.py
@@ -222,7 +222,14 @@ def test_worker_logs_suppressed_outcomes(db, stub_send):
 
 def test_worker_retries_on_transient_failure(db, stub_send, monkeypatch):
     """First attempt fails → row stays 'pending' with attempts=1.
-    Second tick succeeds → row flips to 'sent' with attempts=2."""
+    Second tick succeeds → row flips to 'sent' with attempts=2.
+
+    Also pins the summary-counts contract: a row that retries
+    before succeeding shows up as sent=1 in the tick that actually
+    succeeded, NOT as failed=1 + sent=1 across both ticks.  The
+    summary line is what hits operator log streams; counting per-
+    attempt would falsely imply more failures than actually
+    happened to anyone reading the steady-state log."""
     # Make sure MAX_ATTEMPTS is high enough.
     monkeypatch.setattr(email_worker.settings, "EMAIL_MAX_ATTEMPTS", 3)
 
@@ -232,38 +239,56 @@ def test_worker_retries_on_transient_failure(db, stub_send, monkeypatch):
         EmailSendResult(ok=True, message_id="msg_retry"),
     ]
 
-    # Tick 1: fails
-    email_worker.run_one_tick(db)
+    # Tick 1: fails — row stays pending, summary counts NOTHING
+    # (mid-retry isn't a terminal outcome).
+    summary1 = email_worker.run_one_tick(db)
     db.refresh(row)
     assert row.status == "pending"
     assert row.attempts == 1
     assert "ConnectionError" in (row.error or "")
+    assert summary1.get("failed", 0) == 0  # not yet a terminal failure
+    assert summary1.get("sent", 0) == 0
 
-    # Tick 2: succeeds
-    email_worker.run_one_tick(db)
+    # Tick 2: succeeds — summary now reports the eventual outcome.
+    summary2 = email_worker.run_one_tick(db)
     db.refresh(row)
     assert row.status == "sent"
     assert row.attempts == 2
     assert row.resend_message_id == "msg_retry"
     assert row.error is None
+    assert summary2["sent"] == 1
+    assert summary2.get("failed", 0) == 0
 
 
 def test_worker_gives_up_at_max_attempts(db, stub_send, monkeypatch):
     """After EMAIL_MAX_ATTEMPTS failures, row is marked 'failed'
-    permanently — no more retries for that row."""
+    permanently — no more retries for that row.
+
+    Summary contract: only the FINAL tick (which marks the row
+    terminally failed) increments summary['failed']; the prior
+    two failed-but-retrying ticks count nothing."""
     monkeypatch.setattr(email_worker.settings, "EMAIL_MAX_ATTEMPTS", 3)
 
     row = _make_outbox_row(db, recipient="alice@example.com")
     stub_send.default = EmailSendResult(ok=False, error="HTTP 500")
 
+    summaries = []
     for _ in range(3):
-        email_worker.run_one_tick(db)
+        summaries.append(email_worker.run_one_tick(db))
         db.refresh(row)
 
     assert row.status == "failed"
     assert row.attempts == 3
     assert row.error == "HTTP 500"
 
+    # Ticks 1 + 2 produced no terminal counts; tick 3 produced one
+    # terminal failure.  Total across all ticks: failed=1, NOT failed=3.
+    total_failed = sum(s.get("failed", 0) for s in summaries)
+    assert total_failed == 1, (
+        f"expected 1 terminal failure across all ticks, got {total_failed} "
+        f"(per-tick: {[s.get('failed', 0) for s in summaries]})"
+    )
+
     # Fourth tick should pick up nothing — row is no longer 'pending'.
     stub_send.calls.clear()
     email_worker.run_one_tick(db)
diff --git a/backend/tests/test_notifications.py b/backend/tests/test_notifications.py
@@ -1078,3 +1078,41 @@ def test_unsubscribe_endpoint_idempotent(unauthenticated_client, db):
     assert r1.status_code == 200
     assert r2.status_code == 200
     assert Setting.get(db, "org_test123", "email_camera_offline") == "false"
+
+
+def test_unsubscribe_endpoint_html_escapes_kind(unauthenticated_client):
+    """The unsubscribe HTML page builds via str.format() (not Jinja2,
+    so autoescape doesn't apply).  Even though the kind value flows
+    from a verified JWT — making this practically unreachable without
+    the Clerk secret — anything substituted into the page must be
+    HTML-escaped as defense in depth.
+
+    This test forges a token by signing a kind containing HTML; it
+    succeeds only because the test process knows the secret.  In
+    production an attacker would need CLERK_SECRET_KEY to reach
+    this code path, at which point they have everything anyway —
+    but the escape protects against future paths that might surface
+    less-trusted input here."""
+    import jwt
+    from app.core.email_unsubscribe import _get_secret
+
+    # Use an unknown kind so we hit the "not in _EMAIL_KIND_TO_SETTING"
+    # path that interpolates the raw kind verbatim (the known-kind
+    # path uses a pretty-printed version which would obscure the
+    # escape).
+    bad_kind = "<script>alert('xss')</script>"
+    token = jwt.encode(
+        {"org_id": "org_x", "kind": bad_kind, "sub": "email-unsubscribe"},
+        _get_secret(),
+        algorithm="HS256",
+    )
+
+    resp = unauthenticated_client.get(
+        f"/api/notifications/email/unsubscribe?t={token}"
+    )
+
+    body = resp.text
+    # Escaped form must be present.
+    assert "&lt;script&gt;" in body
+    # Raw form must NOT appear in the rendered HTML.
+    assert "<script>alert" not in body