From f6912410371552adb87ca1aa21cc1155c909e5b8 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 13 May 2026 03:59:34 +0000
Subject: [PATCH 1/3] Add cross-project orchestrator scaffolding (Phase 0)

Client folder template (CLAUDE.md, playbook.md, contacts.yaml + working
subdirs), the cross-client global-playbook, a routing/risk config schema,
on-disk file-format spec, and a phased build runbook. Code for the Managed
Agent, ingress Worker, whatsmeow bridge, and Plaud connector lands in later
phases; real config and client data stay out of the repo per .gitignore.

https://claude.ai/code/session_011AfTrSBHggjMUePW9LWaoi
---
 orchestrator/.gitignore                       |  23 +++
 orchestrator/FORMATS.md                       | 163 ++++++++++++++++
 orchestrator/README.md                        | 176 ++++++++++++++++++
 .../clients/_client-template/CLAUDE.md        |  49 +++++
 .../clients/_client-template/contacts.yaml    |  46 +++++
 .../_client-template/decisions/.gitkeep       |   0
 .../_client-template/examples/.gitkeep        |   0
 .../_client-template/feedback/.gitkeep        |   0
 .../clients/_client-template/inbox/.gitkeep   |   0
 .../_client-template/meetings/.gitkeep        |   0
 .../clients/_client-template/outbox/.gitkeep  |   0
 .../pending-approval/.gitkeep                 |   0
 .../clients/_client-template/playbook.md      |  56 ++++++
 orchestrator/global-playbook.md               |  66 +++++++
 orchestrator/routing.example.yaml             |  64 +++++++
 15 files changed, 643 insertions(+)
 create mode 100644 orchestrator/.gitignore
 create mode 100644 orchestrator/FORMATS.md
 create mode 100644 orchestrator/README.md
 create mode 100644 orchestrator/clients/_client-template/CLAUDE.md
 create mode 100644 orchestrator/clients/_client-template/contacts.yaml
 create mode 100644 orchestrator/clients/_client-template/decisions/.gitkeep
 create mode 100644 orchestrator/clients/_client-template/examples/.gitkeep
 create mode 100644 orchestrator/clients/_client-template/feedback/.gitkeep
 create mode 100644 orchestrator/clients/_client-template/inbox/.gitkeep
 create mode 100644 orchestrator/clients/_client-template/meetings/.gitkeep
 create mode 100644 orchestrator/clients/_client-template/outbox/.gitkeep
 create mode 100644 orchestrator/clients/_client-template/pending-approval/.gitkeep
 create mode 100644 orchestrator/clients/_client-template/playbook.md
 create mode 100644 orchestrator/global-playbook.md
 create mode 100644 orchestrator/routing.example.yaml
diff --git a/orchestrator/.gitignore b/orchestrator/.gitignore
new file mode 100644
index 0000000..39607f7
--- /dev/null
+++ b/orchestrator/.gitignore
@@ -0,0 +1,23 @@
+# Real config and client data never go in the repo — only schemas/templates do.
+routing.yaml
+clients/*/
+!clients/_client-template/
+
+# Secrets / credentials of any kind
+*.key
+*.pem
+*-credentials.json
+*-service-account*.json
+.env
+.env.*
+.dev.vars
+
+# whatsmeow session store
+*.db
+session/
+
+# build artifacts (added in later phases)
+node_modules/
+dist/
+.wrangler/
+bridge/bin/
diff --git a/orchestrator/FORMATS.md b/orchestrator/FORMATS.md
new file mode 100644
index 0000000..b897f51
--- /dev/null
+++ b/orchestrator/FORMATS.md
@@ -0,0 +1,163 @@
+# File formats inside a client folder
+
+These are the on-disk formats the orchestrator agent reads and writes inside each
+`Clients/<X>/` folder. Kept here so the agent definition (Phase 1) and you (when
+reviewing in cowork) share one spec. `<id>` is a short stable id derived from the
+source message id; `<date>` is `YYYY-MM-DD`; `<slug>` is a short kebab-case
+summary.
+
+## `inbox/<ts>-<source>-<id>.md` — an incoming item
+
+`<ts>` is `YYYYMMDDTHHMMSSZ`. `<source>` ∈ `gmail | whatsapp | plaud`.
+
+```markdown
+---
+id: 9f3a2b
+source: gmail
+received_at: 2026-05-13T09:14:00Z
+client: acme
+classified_risk: high            # low | med | high
+routed_by: rule                  # rule | classifier
+sender: "Jane Counsel <jane.counsel@lawfirm.com>"
+subject: "Re: Project Falcon — revised SPA schedule 3"
+thread_ref: "<gmail-thread-id>"
+status: staged                   # staged | sent | logged | superseded
+ref: pending-approval/9f3a2b.md  # where the resulting draft/decision lives
+---
+
+<verbatim body of the email / WhatsApp text / Plaud-transcript-derived note>
+```
+
+Raw `inbox/` copies are expired after processing per the retention policy — only
+the distilled `decisions/` entry is kept long-term.
+
+## `pending-approval/<id>.md` — a draft awaiting sign-off
+
+```markdown
+---
+id: 9f3a2b
+client: acme
+created_at: 2026-05-13T09:14:30Z
+in_reply_to: inbox/20260513T091400Z-gmail-9f3a2b.md
+channel: gmail                   # how the reply would be sent
+to: "jane.counsel@lawfirm.com"
+risk: high
+rationale: "Touches SPA schedule 3 (contract terms) — material per global + client playbook."
+suggested_action: "Send the drafted reply as-is, or edit below first."
+---
+
+## Draft reply
+
+<the proposed email / WhatsApp text — edit this before approving if you want>
+
+## Why this draft
+
+<2–4 lines: what changed, what the agent is proposing, what (if anything) it
+wasn't sure about>
+```
+
+Approve by replying `/approve <id>` (WhatsApp/email) or, in the cowork project,
+telling Claude "send the proposal in pending-approval/<id>". On send: the reply
+goes out via the Gmail/whatsmeow MCP, this file moves to `outbox/<id>.md`, and a
+`decisions/` entry is appended. Reject with `/reject <id> <reason>`.
+
+## `outbox/<id>.md` — audit copy of something already sent
+
+Same front-matter as `pending-approval/` plus `sent_at` and `message_id` of the
+outbound message (so the ingress can dedupe it and not re-process it).
+
+## `decisions/<date>-<slug>.md` — the append-only ledger
+
+```markdown
+---
+date: 2026-05-13
+client: acme
+kind: reply                      # reply | amendment | meeting | lesson | route-fix
+ref: outbox/9f3a2b.md
+auto: false                      # true if auto-sent without approval
+---
+
+**What happened:** Replied to Jane Counsel confirming we accept the revised
+wording of SPA schedule 3 §2 but flagging the indemnity cap in §4 still needs
+the client's sign-off.
+
+**Why:** Client had already approved the §2 change verbally (see 2026-05-10
+meeting note); §4 is unresolved.
+
+**Follow-ups:** Chase client on §4 indemnity cap.
+```
+
+## `meetings/<date>-<slug>.md` — a Plaud transcript, processed
+
+```markdown
+---
+date: 2026-05-13
+client: acme
+source: plaud
+recording_id: "<plaud-id>"
+participants: ["<you>", "Acme CEO", "Acme GC"]
+title: "Project Falcon — weekly sync"
+---
+
+## Summary
+<3–8 lines>
+
+## Decisions made in the meeting
+- <decision> — also written to decisions/<date>-<slug>.md
+
+## Action items
+- [ ] <owner> — <action> — <due>
+- ...
+
+## Follow-ups drafted
+- pending-approval/<id>.md — email to <person> re <thing>
+
+## Transcript
+<full transcript, or a reference if kept elsewhere per retention policy>
+```
+
+## `feedback/<id>.json` — a labeled correction (the learning loop)
+
+```json
+{
+  "id": "9f3a2b",
+  "ts": "2026-05-13T11:02:00Z",
+  "channel": "gmail",
+  "client": "acme",
+  "input_summary": "Counsel email re revised SPA schedule 3",
+  "agent_classification": { "client": "acme", "risk": "high", "routed_by": "rule" },
+  "agent_output": "<draft the agent produced>",
+  "your_final_output": "<what you actually sent, if you edited it>",
+  "your_action": "approved_with_edits",
+  "diff": "<concise draft → final diff, or null>",
+  "your_reason": null,
+  "derived_lesson": "Client wants schedule references written as 'Sch. 3' not 'Schedule 3'."
+}
+```
+
+`your_action` ∈ `approved | approved_with_edits | rejected | undone | re_routed |
+lesson_rejected`. Records are kept as summaries + diffs, not full transcripts.
+The weekly `mode: reflect` run reads recent records, finds patterns recurring
+≥2×, and proposes edits to `playbook.md` / `CLAUDE.md` / `routing.yaml` /
+`global-playbook.md` / `examples/` — each proposal arrives as a
+`pending-approval/` item.
+
+## `examples/<slug>.md` — a curated (situation → ideal reply) pair
+
+```markdown
+---
+client: acme
+tags: ["scheduling", "counsel"]
+added: 2026-05-13
+source: outbox/9f3a2b.md          # where this exemplar came from
+---
+
+## Situation
+<short description of the incoming context>
+
+## Ideal reply
+<the reply you'd want sent in this situation>
+```
+
+The drafter retrieves the closest 2–3 of these by similarity and includes them
+in-context when drafting.
diff --git a/orchestrator/README.md b/orchestrator/README.md
new file mode 100644
index 0000000..10fb02c
--- /dev/null
+++ b/orchestrator/README.md
@@ -0,0 +1,176 @@
+# Cross-project orchestrator
+
+An always-on orchestrator that watches Gmail, WhatsApp (via whatsmeow), and Plaud
+meeting recordings, routes each new item to the relevant client engagement, and
+either acts autonomously on low-risk items or stages a proposal for approval on
+material ones. State for each client lives in a cloud-storage folder that the
+matching Claude Desktop cowork project reads as its project files.
+
+The full design — architecture, components, costs, privacy levers, phased
+rollout, verification, and open questions — is in
+[`/root/.claude/plans/i-am-using-cowork-lexical-globe.md`](../../../root/.claude/plans/i-am-using-cowork-lexical-globe.md)
+(the approved plan). This directory holds the buildable artifacts.
+
+## What's here now (Phase 0)
+
+```
+orchestrator/
+├── README.md                     # this file — the build runbook
+├── global-playbook.md            # cross-client rules the agent always reads
+├── routing.example.yaml          # routing/risk config schema, with examples
+└── clients/
+    └── _client-template/         # copy this per client into your cloud-storage root
+        ├── CLAUDE.md             # client persona / deal context / tone — fill in per client
+        ├── playbook.md           # per-client SOPs + what counts as "material"
+        ├── contacts.yaml         # senders that matter + Plaud title patterns
+        ├── inbox/                # agent drops incoming items here
+        ├── meetings/             # Plaud transcripts + extracted action items
+        ├── outbox/               # sent replies (audit copy)
+        ├── pending-approval/     # drafts / proposed amendments awaiting sign-off
+        ├── decisions/            # append-only ledger of what was done and why
+        ├── feedback/             # labeled corrections (for the learning loop)
+        └── examples/             # curated (situation → ideal reply) pairs
+```
+
+Later phases (Managed Agent definition, Cloudflare Worker ingress, the Hetzner
+whatsmeow bridge, the Plaud connector, the reflection job) get added under
+`orchestrator/agent/`, `orchestrator/ingress/`, `orchestrator/bridge/`, and
+`orchestrator/connectors/` as we work through the rollout.
+
+## Build runbook
+
+Steps marked **[you]** require your own accounts/infra and can't be done from a
+coding session. Steps marked **[code]** produce artifacts in this repo.
+
+### Phase 0 — skeleton + pilot migration
+
+1. **[you]** Confirm which connectors Claude Desktop cowork supports as a project
+   file source. The default assumed here is **Google Workspace Drive** (Workspace
+   account, EU data region). If cowork only offers something else (GitHub repo,
+   OneDrive, Notion), tell me and we adjust. *This is the gating open question.*
+2. **[you]** In your cloud-storage root, create a `Clients/` folder and copy
+   `clients/_client-template/` into it as `Clients/_Client Template/`.
+3. **[you]** Pick one real engagement as the pilot (referred to below as `Acme`).
+   Copy `Clients/_Client Template/` → `Clients/Acme/`.
+4. **[you]** In the Acme cowork project, run the two prompts noted at the top of
+   `CLAUDE.md` and `playbook.md` to self-extract the context and the operating
+   rules; paste the results in. Fill `contacts.yaml`.
+5. **[you]** Connect the `Clients/Acme/` folder to the Acme cowork project as a
+   file source.
+6. **[you]** Create a Google service account (or equivalent) that the future
+   Managed Agent will use; share `Clients/` to it with edit access. Keep the key
+   safe — it does **not** go in this repo.
+7. **Verify:** drop a test file `Clients/Acme/inbox/2026-05-08-test.md`; open the
+   Acme cowork project; confirm it appears as a project file.
+
+### Phase 1 — Managed Agent core  *(next coding session)*
+
+8. **[you]** Confirm you have access to the Claude Developer Platform / Managed
+   Agents (note: a personal Max subscription does **not** cover this — see the
+   "Costs & subscription" section of the plan; the Max-only alternative is a
+   self-hosted `claude -p` runner, which is a different build).
+9. **[code]** Add `orchestrator/agent/` — system prompt, MCP server config
+   (Google Drive + Gmail to start), input schema, outcome/rubric, single-agent v1.
+10. **[you]** Create the Managed Agent in your account from that definition; wire
+    the Drive + Gmail MCP servers.
+11. **Verify:** invoke the agent manually with a synthetic pricing-email payload;
+    confirm it writes `inbox/<id>.md` and stages `pending-approval/<id>.md` with a
+    sensible rationale; a synthetic "confirming Tuesday 3pm" payload classifies
+    low-risk.
+
+### Phase 2 — Gmail ingress
+
+12. **[you]** GCP project: enable Gmail API + Pub/Sub; `users.watch` on INBOX →
+    topic → push subscription. OAuth consent for `gmail.modify` (testing mode is
+    fine for solo use).
+13. **[code]** Add `orchestrator/ingress/` — Cloudflare Worker: `/webhook/gmail`
+    (verify push, pull history deltas, dedupe by `messageId`, invoke agent),
+    `/webhook/agent` (completion → email notification). KV for `historyId` +
+    processed IDs.
+14. **[you]** Deploy the Worker; set secrets (`wrangler secret put ...`).
+15. **Verify:** send a real email from a configured sender; within ~60s expect
+    `pending-approval/<id>.md` in Drive + an email ping.
+
+### Phase 3 — routing + auto-send
+
+16. **[code]** `routing.yaml` (from `routing.example.yaml`), per-client
+    `playbook.md`, risk threshold, low-risk auto-send, `/approve` handling in the
+    Worker.
+17. **[you]** Onboard 2–3 clients (repeat Phase 0 steps 3–5 per client; add each
+    to `routing.yaml`).
+18. **Verify:** low-risk pattern → direct send + notification, no
+    pending-approval. Pricing email → pending-approval + ping; `/approve <id>` →
+    reply sent, file moved to `outbox/`, `decisions/` appended. Flip a client's
+    `risk_threshold` to `high`; confirm the low-risk message now stages.
+
+### Phase 4 — WhatsApp
+
+19. **[you]** Provision a Hetzner CX-series VM in an EU datacenter; hardened
+    Debian, LUKS full-disk encryption, SSH keys only + fail2ban.
+20. **[you]** Decide on a WhatsApp number (a dedicated business number is
+    recommended — whatsmeow's multi-device protocol carries a ban risk on
+    personal accounts).
+21. **[code]** Add `orchestrator/bridge/` — Go process: open whatsmeow session,
+    subscribe to `*events.Message`, POST relevant messages to the ingress; also
+    serve the whatsmeow MCP over token-gated HTTPS/SSE for the agent to send
+    replies.
+22. **[you]** Deploy the bridge to the VM; scan the WhatsApp QR; attach the
+    remote whatsmeow MCP to the Managed Agent; wire `/webhook/whatsapp`.
+23. **Verify:** send a WhatsApp from a configured number; bridge fires
+    `/webhook/whatsapp`; agent processes; low-risk reply arrives via whatsmeow;
+    the cloud agent can reach the remote whatsmeow MCP.
+
+### Phase 5 — Plaud
+
+24. **[you]** Confirm what Plaud exposes: native webhook/API → email-forward →
+    Notion/Drive sync (in that order of preference).
+25. **[code]** Add `orchestrator/connectors/plaud/` matching whatever Plaud
+    offers; wire `/webhook/plaud`; add `meetings/` handling + `plaud_title_patterns`
+    routing. Transcripts default to `pending-approval/`.
+26. **Verify:** record a short test meeting whose title matches an Acme
+    `plaud_title_pattern`; confirm the connector fires, `meetings/<date>-<slug>.md`
+    is written with summary + action items, follow-up draft lands in
+    `pending-approval/`. A non-matching meeting lands in `Triage`.
+
+### Phase 6 — feedback capture
+
+27. **[code]** On every `/approve`, `/reject`, `/undo`, edit-then-send (from
+    cowork), and re-route, write `feedback/<id>.json` (agent input, agent output,
+    your final output, your action, diff, reason, derived lesson). No behavior
+    change yet — collect for a couple of weeks.
+28. **Verify:** approve one draft unchanged, edit-then-send another, `/reject` a
+    third with a reason; confirm three `feedback/*.json` records with the right
+    `your_action`, `diff`, `your_reason`.
+
+### Phase 7 — reflection & few-shot
+
+29. **[code]** `mode: reflect` in the agent; weekly cron in the ingress;
+    example-bank retrieval in the drafter; `global-playbook.md` updates from
+    cross-client patterns; learning metrics in the digest. Proposed edits flow
+    through `pending-approval/`.
+30. **Verify:** seed ~10 synthetic `feedback/` records with a repeated pattern;
+    run `mode: reflect`; confirm a `pending-approval/` proposal adds the rules to
+    `playbook.md`; approve it; confirm `playbook.md` updated, `decisions/` entry
+    logged, a later test item handled per the new rule; reject a different
+    proposal with a reason; confirm it logs a `feedback/` record and doesn't
+    reappear.
+
+### Phase 8 — polish
+
+31. **[code]** Daily digest email; `decisions/`/`meetings/` search; WhatsApp
+    `/approve` one-tap; Drive reconcile job for cowork-initiated sends; per-client
+    monthly token caps + daily spend alert.
+
+## Secrets — never commit
+
+Gmail refresh token, Anthropic API key, whatsmeow session DB, Drive
+service-account key, remote-MCP bearer tokens, Cloudflare API token. Keep these
+in Cloudflare Worker secrets / the VM's encrypted secret store. This repo holds
+**only** code, config schemas, and templates.
+
+## Loop-prevention reminder
+
+The agent's own outbound replies must not retrigger inbox processing: dedupe on
+Gmail `messageId` / WhatsApp message ID, and tag outbound mail (a Gmail label)
+and WhatsApp messages (a marker) so the ingress skips them — including
+cowork-initiated sends.
diff --git a/orchestrator/clients/_client-template/CLAUDE.md b/orchestrator/clients/_client-template/CLAUDE.md
new file mode 100644
index 0000000..f1180cf
--- /dev/null
+++ b/orchestrator/clients/_client-template/CLAUDE.md
@@ -0,0 +1,49 @@
+<!--
+  CLIENT CONTEXT FILE — fill this in per client.
+
+  Fastest way to populate it: open this client's existing cowork project and ask:
+
+    "Summarize this engagement so a new assistant could pick it up cold:
+     - the client (who they are, what they do)
+     - my role and what I've been engaged to do
+     - key people on their side and on any counterparty side (names, roles)
+     - current status of the deal/work and what's outstanding
+     - the important decisions made so far and why
+     - how I like replies written for this client (tone, length, sign-off,
+       anything to always include or never say)"
+
+  Paste the result below, then trim. The orchestrator agent reads this on every
+  item for this client. The agent may also propose edits to this file via the
+  reflection run — those come to you as a pending-approval item.
+-->
+
+# <Client name> — engagement context
+
+## Who they are
+<one short paragraph>
+
+## My role
+<what you've been engaged to do; "commercial advisor on ..." etc.>
+
+## Key people
+- **<Name>** — <role>, their side. <email/phone if relevant>
+- **<Name>** — <role>, their side.
+- **<Name>** — <role>, counterparty side.
+- (Detailed contact routing lives in `contacts.yaml`; this is just the cast.)
+
+## Current status
+<where the deal/work stands; the live workstreams; what's blocked on whom>
+
+## Decisions so far
+<the material things already agreed and the reasoning — keep this current; the
+append-only ledger in `decisions/` has the full history with dates>
+
+## How to write for this client
+- Tone: <e.g. formal / warm-professional / very terse>
+- Length: <e.g. short — 3–5 sentences max unless substantive>
+- Sign-off: <e.g. "Best, <you>" / matches the thread>
+- Always: <e.g. CC the project manager on anything touching timelines>
+- Never: <e.g. quote a number without my sign-off / commit to a date>
+
+## Notes / quirks
+<anything else that would trip up someone handling this engagement>
diff --git a/orchestrator/clients/_client-template/contacts.yaml b/orchestrator/clients/_client-template/contacts.yaml
new file mode 100644
index 0000000..84f84c3
--- /dev/null
+++ b/orchestrator/clients/_client-template/contacts.yaml
@@ -0,0 +1,46 @@
+# Contacts that matter for this client engagement.
+#
+# Used for routing (does this email/WhatsApp/meeting belong to this client?) and
+# to give the drafter the right context about who it's replying to. Routing rules
+# in ../../routing.yaml reference these — keep the two consistent (this file is
+# the human-readable detail; routing.yaml is what the matcher uses).
+#
+# `always_review: true` means: any reply to/from this person is MATERIAL,
+# regardless of topic — it gets staged in pending-approval/, never auto-sent.
+
+client: "<Client name>"
+
+contacts:
+
+  - name: "<Full name>"
+    role: "<e.g. CEO>"
+    side: client            # client | counterparty | adviser | internal
+    emails:
+      - "<name@client.com>"
+    phones:
+      - "<+15551234567>"    # E.164
+    always_review: false
+
+  - name: "<Full name>"
+    role: "<e.g. General Counsel>"
+    side: client
+    emails:
+      - "<gc@client.com>"
+    always_review: true     # legal — stage everything
+
+  - name: "<Full name>"
+    role: "<e.g. their M&A lawyer>"
+    side: counterparty
+    emails:
+      - "<partner@lawfirm.com>"
+    always_review: true
+
+# Plaud meeting routing for this client (substring match on the meeting title,
+# and/or participant email domains). Mirror these into routing.yaml's
+# plaud_title_patterns / plaud_participant_emails for the actual matcher.
+plaud:
+  title_patterns:
+    - "<Client name>"
+    - "<Project codename>"
+  participant_emails:
+    - "@client.com"
diff --git a/orchestrator/clients/_client-template/decisions/.gitkeep b/orchestrator/clients/_client-template/decisions/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/orchestrator/clients/_client-template/examples/.gitkeep b/orchestrator/clients/_client-template/examples/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/orchestrator/clients/_client-template/feedback/.gitkeep b/orchestrator/clients/_client-template/feedback/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/orchestrator/clients/_client-template/inbox/.gitkeep b/orchestrator/clients/_client-template/inbox/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/orchestrator/clients/_client-template/meetings/.gitkeep b/orchestrator/clients/_client-template/meetings/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/orchestrator/clients/_client-template/outbox/.gitkeep b/orchestrator/clients/_client-template/outbox/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/orchestrator/clients/_client-template/pending-approval/.gitkeep b/orchestrator/clients/_client-template/pending-approval/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/orchestrator/clients/_client-template/playbook.md b/orchestrator/clients/_client-template/playbook.md
new file mode 100644
index 0000000..a6f43da
--- /dev/null
+++ b/orchestrator/clients/_client-template/playbook.md
@@ -0,0 +1,56 @@
+<!--
+  CLIENT PLAYBOOK — the operating rules for this engagement. This is the single
+  most important file for risk classification: it tells the orchestrator what
+  counts as "material" (→ stage for your approval) versus routine (→ may be
+  auto-sent), plus any client-specific tone/process rules.
+
+  Fastest way to populate it: open this client's cowork project and ask:
+
+    "What are the standing rules for how I handle this engagement? Cover:
+     - pricing/fee bands and what I can say about price without checking with the
+       client vs. what always needs sign-off
+     - which topics are sensitive enough that any reply must be reviewed first
+     - which people, if they email, mean 'always review before sending'
+     - standard documents I send as-is (and which aren't standard)
+     - any process rules (who to CC, escalation triggers, response-time norms)"
+
+  Paste the result below, then edit into the sections. The agent reads this on
+  every item, alongside the cross-client `global-playbook.md`. The reflection run
+  proposes additions here from your feedback — approved via pending-approval.
+-->
+
+# <Client name> — playbook
+
+## Always MATERIAL for this client (→ stage in pending-approval, never auto-send)
+<Start from the global rules in ../../global-playbook.md, then add anything
+client-specific, e.g.:>
+- Any mention of the <SPA / specific contract> or its schedules.
+- Anything from <specific person> — always review.
+- Any number relating to <the transaction / the budget>.
+- <...>
+
+## Safe to auto-send for this client (LOW risk)
+<Only widen beyond the global LOW criteria if you're confident, e.g.:>
+- Confirming or rescheduling internal calls with <named contacts>.
+- Sending the standard <NDA / engagement letter> when explicitly asked for it.
+- Acknowledging receipt of documents.
+- <...>
+
+## Pricing / commercial guardrails
+- <e.g. "Day rate is RM X; never quote below RM Y without client sign-off.">
+- <e.g. "Do not discuss the success-fee structure over email at all — stage it.">
+
+## Standard documents
+- <Doc name> — standard, may be sent as-is on request: <where it lives>.
+- <Doc name> — NOT standard, always review before sending.
+
+## Process rules
+- CC on <topic>: <who>.
+- Escalate to <whom> immediately if: <triggers>.
+- Response-time norm: <e.g. "acknowledge within the day, full reply within 48h">.
+
+## Tone (overrides global defaults if different)
+<only if this client differs from global-playbook.md's defaults>
+
+## Open issues to keep in mind
+<live questions the agent should be aware of when drafting>
diff --git a/orchestrator/global-playbook.md b/orchestrator/global-playbook.md
new file mode 100644
index 0000000..09db593
--- /dev/null
+++ b/orchestrator/global-playbook.md
@@ -0,0 +1,66 @@
+# Global playbook
+
+Cross-client rules the orchestrator agent reads on **every** item, in addition to
+the matched client's `playbook.md`. Keep this short and high-confidence — these
+override nothing client-specific, they only set the floor. New entries here
+should come from the weekly reflection run (patterns seen across ≥2 clients) and
+be approved like any other change.
+
+## Always treat as MATERIAL (→ stage in pending-approval, never auto-send)
+
+- Anything about price, fees, rates, discounts, payment terms, or milestone
+  payments.
+- Anything about scope, deliverables, timelines that affect a deadline, or change
+  requests.
+- Contract terms: NDAs beyond the standard template, MSAs, SOWs, SPAs, term
+  sheets, indemnities, liability caps, IP ownership, exclusivity,
+  non-compete/non-solicit, termination clauses.
+- Equity, options, valuation, cap-table, or anything with a number that could
+  appear in a financing.
+- Headcount, hiring, redundancies, or comp.
+- Anything a counterparty's lawyer sends or is copied on.
+- Anything that commits the client to a meeting, call, or deadline with a third
+  party that isn't a simple "yes, that time works".
+- Anything where you're not sure — default to MATERIAL.
+
+## Safe to auto-send (LOW risk) — only when ALL of these hold
+
+- It's a reply within an existing thread to a known contact (in `contacts.yaml`).
+- It conveys no new commercial position — it confirms, acknowledges, schedules,
+  forwards, or sends an already-approved standard document.
+- It contains no numbers that matter, no contract language, no scope/timeline
+  change.
+- The client's `playbook.md` doesn't flag the topic or the sender as always-ask.
+- The client's `risk_threshold` permits auto-send.
+
+## Tone and form (defaults — client `playbook.md` overrides)
+
+- Professional, concise, no filler. Get to the point in the first sentence.
+- Don't over-apologize, don't over-explain, don't speculate on the client's
+  behalf about anything commercial.
+- Match the thread's register and language.
+- Never invent facts, figures, dates, or commitments. If a reply needs a fact you
+  don't have, that alone makes it MATERIAL — stage it with a note about what's
+  missing.
+
+## Handling meeting transcripts (Plaud)
+
+- A transcript is new context, not something to reply to. Summarize it, extract
+  action items / commitments / decisions, and update `decisions/`.
+- Any follow-up email or amendment that comes out of a meeting is almost always
+  MATERIAL — stage it. Only auto-send a follow-up if it's purely "sending the
+  notes / confirming the next slot" and `playbook.md` whitelists it.
+
+## Routing
+
+- If you can't confidently route an item to a client, send it to `triage` —
+  never guess.
+- A re-route by the user is a routing signal: log it to `feedback/` so the
+  reflection run can propose a `routing.yaml` rule.
+
+## Privacy / data handling
+
+- Send the classifier only what it needs: sender + subject + a short snippet.
+- Send full bodies/transcripts to the drafter only when actually drafting.
+- Keep `feedback/<id>.json` as one-line summaries + diffs, not full transcripts.
+- Don't write secrets or full credentials into any file in a client folder.
diff --git a/orchestrator/routing.example.yaml b/orchestrator/routing.example.yaml
new file mode 100644
index 0000000..b66bbd7
--- /dev/null
+++ b/orchestrator/routing.example.yaml
@@ -0,0 +1,64 @@
+# Routing + risk config for the orchestrator.
+#
+# Copy to routing.yaml and fill in real values. The agent reads this to decide
+# which client an inbound item belongs to and whether it may be auto-sent.
+#
+# Matching order:
+#   1. Rule match on this file: sender email/domain, sender phone, or — for Plaud
+#      transcripts — a meeting-title pattern or a participant email.
+#   2. If no rule matches, the classifier reads the candidate clients' playbook.md
+#      files and picks one (or `triage`) with a rationale.
+#
+# risk_threshold semantics: an item the classifier rates BELOW this level may be
+# auto-sent; at or above it, the item is staged in pending-approval/. So:
+#   - risk_threshold: high  → only HIGH items stage; LOW and MED auto-send.
+#   - risk_threshold: med   → MED and HIGH stage; only LOW auto-sends.   (recommended default)
+#   - risk_threshold: low   → everything stages; nothing auto-sends.     (safest)
+
+default_project: triage          # where unrouted items go
+
+# Optional global caps (the agent / ingress enforce these; alerts when exceeded).
+limits:
+  per_client_monthly_token_cap: 2000000   # soft cap; pause + notify when hit
+  daily_spend_alert_usd: 5
+
+projects:
+
+  acme:
+    drive_folder_id: "REPLACE_WITH_DRIVE_FOLDER_ID"   # ID of Clients/Acme/ in your cloud store
+    display_name: "Acme Corp"
+    emails:
+      - "@acme.com"                 # any sender at this domain
+      - "ceo@acme.io"               # a specific external address
+      - "jane.counsel@lawfirm.com"  # their lawyer — note: counsel mail is MATERIAL by global rule
+    phones:
+      - "+15551234567"              # E.164; matched against whatsmeow sender JID
+    plaud_title_patterns:
+      - "Acme"                      # substring match on the Plaud meeting title
+      - "Project Falcon"            # the engagement's internal codename
+    plaud_participant_emails:
+      - "@acme.com"
+    risk_threshold: med
+
+  beta:
+    drive_folder_id: "REPLACE_WITH_DRIVE_FOLDER_ID"
+    display_name: "Beta Holdings"
+    emails:
+      - "@beta.io"
+    phones:
+      - "+15557654321"
+    plaud_title_patterns:
+      - "Beta"
+    risk_threshold: high            # this client: stage everything material, never auto-send
+
+  # Add one block per client. Onboarding a client = create Clients/<Name>/ from
+  # the template, fill CLAUDE.md/playbook.md/contacts.yaml, connect it to the
+  # cowork project, then add a block here.
+
+# The triage "project" is a real folder + cowork project where unrouted items
+# land. Re-routing from triage is logged to feedback/ so the reflection run can
+# propose a new rule above.
+triage:
+  drive_folder_id: "REPLACE_WITH_DRIVE_FOLDER_ID"
+  display_name: "Triage"
+  risk_threshold: low              # never auto-send anything from triage

From c9afe275ecc1d6bb502a95ce02ea0bb031fe0a5a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 13 May 2026 04:02:32 +0000
Subject: [PATCH 2/3] Add Managed Agent definition (Phase 1)

The orchestrator "brain": agent.yaml (model, MCP servers, input schema, success
rubric, completion webhook) + system-prompt.md covering process/approve/reject/
reflect modes, plus a README with instantiation steps and verification payloads.
v1 is a single agent doing classify -> act inline; sub-agent fan-out is deferred.

https://claude.ai/code/session_011AfTrSBHggjMUePW9LWaoi
---
 orchestrator/agent/README.md        |  77 ++++++++++++++++
 orchestrator/agent/agent.yaml       | 127 ++++++++++++++++++++++++++
 orchestrator/agent/system-prompt.md | 132 ++++++++++++++++++++++++++++
 3 files changed, 336 insertions(+)
 create mode 100644 orchestrator/agent/README.md
 create mode 100644 orchestrator/agent/agent.yaml
 create mode 100644 orchestrator/agent/system-prompt.md

diff --git a/orchestrator/agent/README.md b/orchestrator/agent/README.md
new file mode 100644
index 0000000..f245f81
--- /dev/null
+++ b/orchestrator/agent/README.md
@@ -0,0 +1,77 @@
+# Managed Agent — orchestrator brain (Phase 1)
+
+The autonomous "brain": classify → route → act for each inbound item, plus
+approve / reject / reflect modes. Runs on the Claude Developer Platform (Managed
+Agents) — **a personal Max subscription does not cover this**; see the plan's
+"Costs & subscription" section for the Max-only alternative (a self-hosted
+`claude -p` runner using this same `system-prompt.md`).
+
+## Files
+
+- `agent.yaml` — portable agent definition: model, MCP servers, input schema,
+  success rubric, completion webhook. Map onto the platform's actual config
+  format when you create the agent.
+- `system-prompt.md` — the system prompt (referenced by `agent.yaml`).
+
+## To instantiate (Phase 1 steps from the runbook)
+
+1. Confirm you have Managed Agents access on your Anthropic account.
+2. Create the agent from `agent.yaml` + `system-prompt.md`.
+3. Attach MCP servers: **Google Drive** (service-account creds scoped to
+   `Clients/`, EU data region) and **Gmail** (`gmail.modify` scope). Leave
+   **whatsmeow** unattached until Phase 4.
+4. Upload `FORMATS.md` (from the orchestrator repo root) so the agent can
+   reference the on-disk formats, and make sure `Clients/global-playbook.md` and
+   `Clients/routing.yaml` exist in Drive.
+5. Set `max_turns` (20) and the per-client token caps from `routing.yaml`.
+6. Note the agent's invoke endpoint/credentials — the ingress Worker (Phase 2)
+   will call it; the agent's completion webhook points back at the Worker's
+   `/webhook/agent`.
+
+## Verify (Phase 1)
+
+Invoke the agent manually (no ingress yet) with a synthetic payload:
+
+```json
+{
+  "mode": "process",
+  "channel": "gmail",
+  "item_id": "test-pricing-1",
+  "sender": "ceo@acme.io",
+  "subject": "Re: Project Falcon — proposed fee",
+  "body": "Can you confirm the day rate we discussed? And can we lock it for 6 months?",
+  "thread_ref": "test-thread-1",
+  "occurred_at": "2026-05-13T09:00:00Z"
+}
+```
+
+Expect: `Clients/Acme/inbox/...test-pricing-1.md` written, classified **HIGH**
+(pricing), and `Clients/Acme/pending-approval/test-pricing-1.md` written with a
+draft + a rationale that names the pricing/lock-in as the reason. Then try a
+low-risk payload:
+
+```json
+{
+  "mode": "process",
+  "channel": "gmail",
+  "item_id": "test-sched-1",
+  "sender": "ceo@acme.io",
+  "subject": "Re: weekly sync",
+  "body": "Tuesday 3pm works for me — see you then.",
+  "thread_ref": "test-thread-2",
+  "occurred_at": "2026-05-13T09:05:00Z"
+}
+```
+
+Expect: classified **LOW**; with Acme's `risk_threshold: med` it would auto-send
+a brief confirmation — but since Gmail send is wired but you may not want a real
+send during testing, point it at a test thread or temporarily set Acme's
+`risk_threshold: low` so it stages instead. Confirm the `outbox/` + `decisions/`
+(or `pending-approval/`) artifacts and the one-line completion summary.
+
+## v1 scope
+
+One agent, doing everything inline. Splitting into a Haiku classifier sub-agent +
+a Sonnet drafter sub-agent is a later optimisation (do it if classification cost
+or draft quality warrants). Don't build the sub-agent fan-out until the single
+agent loop is proven end-to-end.
diff --git a/orchestrator/agent/agent.yaml b/orchestrator/agent/agent.yaml
new file mode 100644
index 0000000..8da028c
--- /dev/null
+++ b/orchestrator/agent/agent.yaml
@@ -0,0 +1,127 @@
+# Managed Agent definition for the cross-project orchestrator (v1: single agent).
+#
+# This is the portable description of the agent. When you create the agent on the
+# Claude Developer Platform, map these fields onto whatever the Managed Agents
+# config format expects — the substance (system prompt, MCP servers, input
+# schema, success rubric, model) is what matters; field names may differ.
+#
+# v1 keeps it to ONE agent doing classify → act inline. Split into a Haiku
+# classifier sub-agent + a Sonnet drafter sub-agent later only if cost/quality
+# needs it (the plan's Phase 1 note).
+
+name: cross-project-orchestrator
+version: 1
+
+# Default model for the agent's reasoning/drafting. Use a Sonnet-tier model so
+# drafts are good; the agent is told to keep classification cheap (and you can
+# move classification to a Haiku sub-agent later).
+model: claude-sonnet-4-6
+
+system_prompt_file: ./system-prompt.md
+
+# Hard ceiling per invocation so a runaway run can't burn budget.
+max_turns: 20
+
+# MCP servers the agent may use. Wire these up in the platform; the agent's
+# tool access is pre-authorized (no permission prompts mid-run).
+#   - google_drive: read/write the Clients/ tree (service-account credentials,
+#     scoped to the Clients/ folder; EU data region).
+#   - gmail: read threads, send replies, apply the "processed" label.
+#   - whatsmeow: send WhatsApp replies — REMOTE server hosted on the Hetzner
+#     bridge VM, behind a bearer token. Not attached until Phase 4.
+mcp_servers:
+  - id: google_drive
+    purpose: "Read/write the per-client folders under Clients/ (state substrate)."
+    required: true
+  - id: gmail
+    purpose: "Read email threads, send replies, label processed mail."
+    required: true
+  - id: whatsmeow
+    purpose: "Send WhatsApp replies. Remote (Hetzner bridge), token-gated."
+    required: false   # attached in Phase 4
+
+# What the ingress (Cloudflare Worker) passes in on each invocation.
+input_schema:
+  type: object
+  required: [mode]
+  properties:
+    mode:
+      enum: [process, approve, reject, reflect]
+    # --- mode: process ---
+    channel:        { enum: [gmail, whatsapp, plaud] }
+    item_id:        { type: string, description: "Stable id derived from the source message/recording id." }
+    sender:         { type: string, description: "Email address or display name (gmail/whatsapp)." }
+    participants:   { type: array,  items: { type: string }, description: "Plaud meeting participants." }
+    subject:        { type: string, description: "Email subject or Plaud meeting title." }
+    body:           { type: string, description: "Email body / WhatsApp text / Plaud transcript+summary." }
+    thread_ref:     { type: string, description: "Provider thread id, for sending the reply in-thread." }
+    occurred_at:    { type: string, format: date-time }
+    routing_hint:   { type: object, description: "Pre-computed rule match from routing.yaml, if any: {client, matched_by}." }
+    # --- mode: approve | reject ---
+    ref_id:         { type: string, description: "The pending-approval/<ref_id>.md being approved/rejected." }
+    edited_body:    { type: string, description: "If you edited the draft before approving (else the draft stands)." }
+    reason:         { type: string, description: "Required for reject; optional note for approve." }
+    # --- mode: reflect ---
+    reflect_scope:  { oneOf: [ { type: string, enum: [all] }, { type: string, description: "a client id" } ] }
+
+# Config the agent reads from Drive (not passed in): routing.yaml at the Clients/
+# root, global-playbook.md at the Clients/ root, and per-client CLAUDE.md /
+# playbook.md / contacts.yaml.
+config_sources:
+  routing_yaml: "Clients/routing.yaml"
+  global_playbook: "Clients/global-playbook.md"
+
+# Success rubric — the agent's run is judged against this.
+outcome:
+  rubric: |
+    A run SUCCEEDS when, for the given mode, all of the following hold:
+
+    mode=process:
+      1. The item is routed to a client id (or `triage`) with a recorded
+         rationale (rule match if routing_hint present, else classifier).
+      2. Clients/<client>/inbox/<ts>-<channel>-<item_id>.md is written with the
+         front-matter spec in FORMATS.md.
+      3. If the item is a message and its risk is BELOW the client's
+         risk_threshold: a reply was drafted and SENT via the right MCP
+         (gmail/whatsmeow), Clients/<client>/outbox/<item_id>.md written, and a
+         Clients/<client>/decisions/<date>-<slug>.md entry appended (auto: true).
+      4. If the item is a message and its risk is AT OR ABOVE the threshold:
+         NOTHING was sent; Clients/<client>/pending-approval/<item_id>.md was
+         written (draft + rationale + suggested_action per FORMATS.md).
+      5. If the item is a Plaud transcript: Clients/<client>/meetings/<date>-<slug>.md
+         written (summary + action items + decisions), decisions/ updated for
+         anything settled, and any follow-up email/amendment staged in
+         pending-approval/ (auto-send a follow-up only if playbook.md whitelists it).
+      6. The completion summary (returned to the caller) is one line: client,
+         what happened, risk, and the pending ref if any.
+
+    mode=approve:
+      - The reply in pending-approval/<ref_id>.md (or edited_body if provided)
+        was sent via the right MCP; the file moved to outbox/<ref_id>.md with
+        sent_at + message_id; a decisions/ entry was appended; a
+        feedback/<ref_id>.json record written (your_action: approved or
+        approved_with_edits, with the diff if edited).
+
+    mode=reject:
+      - No send. A feedback/<ref_id>.json record written (your_action: rejected,
+        your_reason: the supplied reason). pending-approval/<ref_id>.md marked
+        superseded (or moved aside). Caller notified it was not sent.
+
+    mode=reflect:
+      - Recent feedback/ records read for the scope; patterns recurring >=2x
+        identified; for each, a proposal written to the relevant client's (or the
+        triage/global) pending-approval/ folder describing the exact edit to
+        playbook.md / CLAUDE.md / routing.yaml / global-playbook.md / examples/.
+        No config file changed directly. Learning metrics included in the
+        completion summary.
+
+    A run FAILS if it sends anything material without staging, invents facts/
+    figures/commitments, writes secrets into a client folder, or guesses a route
+    instead of using `triage` when unsure.
+
+# Where the agent reports when done — the ingress wires a webhook to its
+# /webhook/agent endpoint, which turns the completion summary into your
+# notification (WhatsApp first, email fallback).
+completion:
+  webhook: true
+  payload_includes: [mode, client, risk, action, pending_ref, human_summary, metrics]
diff --git a/orchestrator/agent/system-prompt.md b/orchestrator/agent/system-prompt.md
new file mode 100644
index 0000000..317fb34
--- /dev/null
+++ b/orchestrator/agent/system-prompt.md
@@ -0,0 +1,132 @@
+# Cross-project orchestrator — system prompt
+
+You are the orchestrator for a commercial advisor who runs several client
+engagements. Each engagement has a folder under `Clients/<client>/` in Google
+Drive (the "client folder"), which the advisor also opens as a Claude Desktop
+cowork project. You are invoked once per inbound item (an email, a WhatsApp
+message, or a Plaud meeting transcript) — or to action an approval/rejection, or
+to run the weekly reflection. You act through MCP tools: Google Drive (read/write
+the client folders), Gmail (read threads, send replies, label), and — once
+attached — whatsmeow (send WhatsApp replies). You never ask the user for
+permission mid-run; instead, anything **material** is *staged* for their later
+sign-off and never sent.
+
+Your behaviour is governed, in priority order, by: (1) this prompt; (2)
+`Clients/global-playbook.md`; (3) the matched client's `playbook.md`,
+`CLAUDE.md`, and `contacts.yaml`. On-disk file formats are specified in
+`FORMATS.md` in the orchestrator repo — follow them exactly. Routing config is
+`Clients/routing.yaml`.
+
+## The invocation has a `mode`
+
+### `mode: process` — a new inbound item
+
+1. **Route it to a client.**
+   - If `routing_hint` is present (the ingress already matched a rule in
+     `routing.yaml` on sender domain/phone, or Plaud title/participant), use that
+     client; record `routed_by: rule`.
+   - Otherwise read `routing.yaml` and the candidate clients' `playbook.md` /
+     `contacts.yaml`, decide the best client, and record `routed_by: classifier`
+     with a one-line rationale. Keep this step cheap — sender + subject + a short
+     snippet of the body is enough to route; you don't need the full body yet.
+   - **If you are not confident, route to `triage`. Never guess.**
+
+2. **Load context for that client:** `CLAUDE.md`, `playbook.md`,
+   `Clients/global-playbook.md`, `contacts.yaml`, the most recent ~5 entries in
+   `decisions/`, and — if you'll be drafting a reply — the 2–3 entries in
+   `examples/` most similar to this situation.
+
+3. **Write the inbox record:** `Clients/<client>/inbox/<ts>-<channel>-<item_id>.md`
+   per FORMATS.md, including your classification.
+
+4. **Classify risk: LOW, MED, or HIGH.** Apply `global-playbook.md`'s
+   "always MATERIAL" list and the client `playbook.md` / `contacts.yaml`
+   (`always_review`) first — those force HIGH. An item is **LOW** only if *all*
+   of: it's a reply within an existing thread to a known contact; it conveys no
+   new commercial position (it confirms / acknowledges / schedules / forwards /
+   sends an already-approved standard document); it contains no numbers that
+   matter, no contract language, no scope or timeline change; and the client's
+   playbook doesn't flag the topic or sender as always-ask. When in doubt → HIGH.
+
+5. **Act, depending on item type and risk:**
+
+   - **Message (gmail/whatsapp), risk BELOW the client's `risk_threshold`:**
+     draft the reply (use `CLAUDE.md` tone, the examples, the thread); send it
+     via the Gmail or whatsmeow MCP, in-thread (`thread_ref`); apply the
+     "processed" Gmail label / a WhatsApp marker so it won't retrigger; write
+     `Clients/<client>/outbox/<item_id>.md` (with `sent_at`, `message_id`);
+     append `Clients/<client>/decisions/<date>-<slug>.md` with `auto: true`.
+
+   - **Message, risk AT OR ABOVE the threshold:** do **not** send anything.
+     Write `Clients/<client>/pending-approval/<item_id>.md` per FORMATS.md — the
+     drafted reply, the `rationale` for why it's material, and a one-line
+     `suggested_action`. Note in the draft anything you weren't sure about or any
+     fact you'd need that you don't have.
+
+   - **Plaud transcript:** this is new context, not something to reply to.
+     Write `Clients/<client>/meetings/<date>-<slug>.md` per FORMATS.md — a short
+     summary, the action items (owner + due), and the decisions made in the
+     meeting. Append a `decisions/` entry for anything settled. If the meeting
+     implies a follow-up email or an amendment, draft it and stage it in
+     `pending-approval/` (it's almost always material). Auto-send a follow-up
+     *only* if the client `playbook.md` explicitly whitelists that kind of
+     routine follow-up.
+
+6. **Return a one-line completion summary:** `<client> — <what happened> —
+   risk <L/M/H>[ — pending <ref>]`. The caller turns this into the advisor's
+   notification.
+
+### `mode: approve` — the advisor approved a staged item
+
+`ref_id` identifies `Clients/<client>/pending-approval/<ref_id>.md`. If
+`edited_body` is supplied, that is the reply to send (the advisor edited it);
+otherwise send the draft as written. Send via the right MCP, in-thread; apply the
+processed label/marker; move the file to `outbox/<ref_id>.md` with `sent_at` +
+`message_id`; append a `decisions/` entry (`auto: false`); write
+`feedback/<ref_id>.json` (`your_action: approved` or `approved_with_edits`, with
+a concise `diff` if edited, and a one-line `derived_lesson` if there's an obvious
+one). If the approved item was a proposed config edit (from a reflection run),
+apply the edit to the target file (`playbook.md` / `CLAUDE.md` / `routing.yaml` /
+`global-playbook.md` / `examples/…`) and log it in `decisions/` as `kind: lesson`.
+Return a one-line summary.
+
+### `mode: reject` — the advisor rejected a staged item
+
+Send nothing. Write `feedback/<ref_id>.json` (`your_action: rejected`,
+`your_reason: <reason>`, `derived_lesson` if applicable — e.g. for a rejected
+config proposal, the lesson is "don't generalize from that"). Mark
+`pending-approval/<ref_id>.md` as `superseded` (or move it aside). Return a
+one-line summary noting it was not sent.
+
+### `mode: reflect` — the weekly learning run
+
+`reflect_scope` is a client id or `all`. For each in-scope client: read recent
+`feedback/` records; find patterns that recur **≥2 times** (e.g. you keep
+stripping the greeting → tone rule; you keep flagging a topic as material →
+playbook rule; you keep re-routing a sender → routing rule; cross-client patterns
+→ `global-playbook.md`). For each pattern, write a proposal into that client's (or
+`triage`'s, or a global) `pending-approval/` folder describing the **exact** edit
+to make and why. Do **not** edit any config file directly — every change goes
+through approval. Don't propose anything from a single occurrence. Cap it at a
+few proposals per client per run. Also compute the learning metrics (% of drafts
+approved unchanged, % of auto-sends later corrected (`undone`), routing accuracy
+(`re_routed` rate), median time-to-approve) and include them in the completion
+summary. Return a one-line summary plus the metrics.
+
+## Hard rules (a run FAILS if you break these)
+
+- **Never send anything material without staging it first.** If you're unsure
+  whether something is material, it is.
+- **Never invent** facts, figures, dates, names, or commitments. If a reply needs
+  something you don't have, that alone makes it material — stage it and say
+  what's missing.
+- **Route to `triage` when unsure** — never guess a client.
+- **Never write secrets or full credentials** into a client folder.
+- **Data minimisation:** route on metadata + a snippet; use the full body only
+  when drafting; keep `feedback/` records as summaries + diffs, not full
+  transcripts.
+- **Loop prevention:** anything you send must be labelled/marked so the ingress
+  skips it; never process an item whose id is already in an `outbox/` record.
+- **Respect `max_turns`.** If you can't finish cleanly, write what you have, set
+  the item's `status` to reflect that, and report the problem in the summary
+  rather than half-acting.

From 492ff100fdd3a6d37bd20c324f4f166db3025735 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 30 May 2026 14:21:32 +0000
Subject: [PATCH 3/3] Tighten orchestrator artifacts for precision and speed

Single-source the risk-classification rule in global-playbook.md (system
prompt now defers to it instead of duplicating). Add an explicit completion
JSON contract to the agent (input_schema + completion_schema in agent.yaml,
matching "Completion contract" in system-prompt.md) so the ingress can parse
results structurally rather than scraping a string.

Make I/O ordering and parallelism explicit in the system prompt: tiered Drive
reads (route -> context -> draft), parallel reads/writes, routing_hint
short-circuits other clients' reads, agent bails fast (action: deduped) on a
repeat item_id. Add io_targets to agent.yaml as guardrails.

Add a deterministic id derivation rule to FORMATS.md (sha256 of channel +
provider_message_id, first 12 hex) so retries are idempotent across ingress
and agent. Drop the redundant <source> segment from inbox filenames. Add a
kind discriminator (draft_reply | meeting_followup | config_proposal) to
pending-approval, with config_proposal carrying target_file + patch so the
approve handler knows what to do without sniffing the body. Add
provider_message_id_out + sent_via to outbox/ for loop-prevention. Align the
feedback enum and add correction_kind matching the reflection run's pattern
categories. Specify the v1 examples retrieval method (tag overlap + lexical).

Tighten the reflect mode: define "pattern" precisely (same correction_kind
on same field/topic, >=2 in 30 days), cap proposals per run, gate
global-playbook proposals on cross-client recurrence. Fold triage into
projects: so default_project: triage resolves cleanly. Update the agent
README verification payloads to match the new schema (item_id pattern,
routing_hint, snippet/body_uri, idempotency check, efficiency check).

https://claude.ai/code/session_011AfTrSBHggjMUePW9LWaoi
---
 orchestrator/FORMATS.md             | 162 +++++++++-----
 orchestrator/agent/README.md        |  54 +++--
 orchestrator/agent/agent.yaml       | 212 +++++++++++-------
 orchestrator/agent/system-prompt.md | 323 ++++++++++++++++++----------
 orchestrator/global-playbook.md     | 114 ++++++----
 orchestrator/routing.example.yaml   |  54 ++---
 6 files changed, 584 insertions(+), 335 deletions(-)

diff --git a/orchestrator/FORMATS.md b/orchestrator/FORMATS.md
index b897f51..6a7f2fa 100644
--- a/orchestrator/FORMATS.md
+++ b/orchestrator/FORMATS.md
@@ -1,70 +1,117 @@
 # File formats inside a client folder
 
-These are the on-disk formats the orchestrator agent reads and writes inside each
-`Clients/<X>/` folder. Kept here so the agent definition (Phase 1) and you (when
-reviewing in cowork) share one spec. `<id>` is a short stable id derived from the
-source message id; `<date>` is `YYYY-MM-DD`; `<slug>` is a short kebab-case
-summary.
+The on-disk formats the orchestrator agent reads and writes inside each
+`Clients/<X>/` folder, plus the ingress's dedupe key rules. Kept here as the
+single spec so the agent (`agent.yaml`/`system-prompt.md`), the ingress
+(Phase 2+), and you (reviewing in cowork) agree exactly. `<date>` is
+`YYYY-MM-DD`; `<ts>` is `YYYYMMDDTHHMMSSZ`; `<slug>` is short kebab-case.
 
-## `inbox/<ts>-<source>-<id>.md` — an incoming item
+## `id` derivation (the dedupe key — used everywhere)
 
-`<ts>` is `YYYYMMDDTHHMMSSZ`. `<source>` ∈ `gmail | whatsapp | plaud`.
+The ingress computes a **deterministic** `id` from the canonical provider
+message id, so retries are idempotent:
+
+```
+provider_message_id =
+  gmail    -> RFC 5322 Message-ID header (preferred) or Gmail API id
+  whatsapp -> whatsmeow event message_id (chat_jid + msg_id)
+  plaud    -> Plaud recording id
+
+id = first 12 hex chars of sha256(channel + ":" + provider_message_id)
+```
+
+`id` is what appears in filenames (`inbox/<ts>-<id>.md`, `outbox/<id>.md`,
+`pending-approval/<id>.md`, `feedback/<id>.json`). The ingress dedupes on
+`item_id` before invoking the agent; the agent additionally bails fast
+(`action: deduped`) if it sees an existing `inbox/<id>.md` or `outbox/<id>.md`
+for the same id.
+
+## `inbox/<ts>-<id>.md` — an incoming item
+
+Filename uses `<ts>-<id>` (no `<source>` segment — that's in the front-matter,
+no need to duplicate).
 
 ```markdown
 ---
-id: 9f3a2b
-source: gmail
+id: 9f3a2b1c4d5e
+source: gmail                    # gmail | whatsapp | plaud
+provider_message_id: "<Message-ID@acme.com>"
 received_at: 2026-05-13T09:14:00Z
 client: acme
-classified_risk: high            # low | med | high
 routed_by: rule                  # rule | classifier
+rule_match: sender_domain        # sender_email | sender_domain | sender_phone | plaud_title | plaud_participant — null if routed_by=classifier
+routing_rationale: null          # one line if routed_by=classifier, else null
+classified_risk: high            # low | med | high
+risk_reason: "global-playbook §1 always-material: contract terms (SPA)."
 sender: "Jane Counsel <jane.counsel@lawfirm.com>"
 subject: "Re: Project Falcon — revised SPA schedule 3"
 thread_ref: "<gmail-thread-id>"
-status: staged                   # staged | sent | logged | superseded
-ref: pending-approval/9f3a2b.md  # where the resulting draft/decision lives
+status: staged                   # staged | sent | logged | deduped | superseded
+ref: pending-approval/9f3a2b1c4d5e.md   # where the resulting draft/decision lives
 ---
 
-<verbatim body of the email / WhatsApp text / Plaud-transcript-derived note>
+<verbatim body of the email / WhatsApp text / Plaud transcript+summary>
 ```
 
-Raw `inbox/` copies are expired after processing per the retention policy — only
-the distilled `decisions/` entry is kept long-term.
+Raw `inbox/` bodies are expired after processing per the retention policy
+(default: 30 days). Only the distilled `decisions/` entry is kept long-term.
 
 ## `pending-approval/<id>.md` — a draft awaiting sign-off
 
+`kind` discriminates what `mode: approve` should do.
+
 ```markdown
 ---
-id: 9f3a2b
+id: 9f3a2b1c4d5e
 client: acme
+kind: draft_reply                # draft_reply | meeting_followup | config_proposal
 created_at: 2026-05-13T09:14:30Z
-in_reply_to: inbox/20260513T091400Z-gmail-9f3a2b.md
-channel: gmail                   # how the reply would be sent
-to: "jane.counsel@lawfirm.com"
+provider_message_id: "<Message-ID@acme.com>"   # the inbound this answers; survives inbox/ expiry
+channel: gmail                   # how the reply would be sent (n/a for config_proposal)
+to: "jane.counsel@lawfirm.com"   # n/a for config_proposal
+thread_ref: "<gmail-thread-id>"
 risk: high
-rationale: "Touches SPA schedule 3 (contract terms) — material per global + client playbook."
-suggested_action: "Send the drafted reply as-is, or edit below first."
+risk_reason: "global-playbook §1 always-material: contract terms (SPA)."
+suggested_action: "Send as-is, or edit the draft body below first."
+status: pending                  # pending | superseded
+# kind: config_proposal adds these:
+# target_file: Clients/Acme/playbook.md
+# patch: |
+#   @@ ... @@
+#   <unified diff>
 ---
 
 ## Draft reply
 
-<the proposed email / WhatsApp text — edit this before approving if you want>
+<the proposed email / WhatsApp text — edit before approving if you want>
 
 ## Why this draft
 
 <2–4 lines: what changed, what the agent is proposing, what (if anything) it
-wasn't sure about>
+wasn't sure about, what facts it'd want before sending>
 ```
 
 Approve by replying `/approve <id>` (WhatsApp/email) or, in the cowork project,
-telling Claude "send the proposal in pending-approval/<id>". On send: the reply
-goes out via the Gmail/whatsmeow MCP, this file moves to `outbox/<id>.md`, and a
-`decisions/` entry is appended. Reject with `/reject <id> <reason>`.
+telling Claude "send the proposal in pending-approval/<id>". On approval the
+reply is sent via the right MCP, this file moves to `outbox/<id>.md`, and a
+`decisions/` entry is appended. Reject with `/reject <id> <reason>` — the file
+is marked `status: superseded` and moved to `pending-approval/.rejected/`.
 
 ## `outbox/<id>.md` — audit copy of something already sent
 
-Same front-matter as `pending-approval/` plus `sent_at` and `message_id` of the
-outbound message (so the ingress can dedupe it and not re-process it).
+Same front-matter as `pending-approval/` plus:
+
+```yaml
+sent_at: 2026-05-13T09:18:02Z
+sent_via: gmail                                  # gmail | whatsmeow
+provider_message_id_out: "<Outbound-Message-ID>" # of the SENT message — the loop-prevention key
+auto: false                                       # true if auto-sent without approval
+```
+
+`provider_message_id_out` is what the ingress checks to confirm an incoming
+provider event isn't echoing one of our own sends — combined with the
+"processed" Gmail label / WhatsApp marker, this prevents the agent from
+re-processing its own replies.
 
 ## `decisions/<date>-<slug>.md` — the append-only ledger
 
@@ -72,14 +119,14 @@ outbound message (so the ingress can dedupe it and not re-process it).
 ---
 date: 2026-05-13
 client: acme
-kind: reply                      # reply | amendment | meeting | lesson | route-fix
-ref: outbox/9f3a2b.md
+kind: reply                      # reply | followup | meeting | lesson | route-fix
+ref: outbox/9f3a2b1c4d5e.md      # or meetings/... / feedback/... / null
 auto: false                      # true if auto-sent without approval
 ---
 
 **What happened:** Replied to Jane Counsel confirming we accept the revised
-wording of SPA schedule 3 §2 but flagging the indemnity cap in §4 still needs
-the client's sign-off.
+wording of SPA Sch. 3 §2 but flagging the indemnity cap in §4 still needs the
+client's sign-off.
 
 **Why:** Client had already approved the §2 change verbally (see 2026-05-10
 meeting note); §4 is unresolved.
@@ -95,6 +142,7 @@ date: 2026-05-13
 client: acme
 source: plaud
 recording_id: "<plaud-id>"
+provider_message_id: "<plaud-id>"               # mirrored for dedupe
 participants: ["<you>", "Acme CEO", "Acme GC"]
 title: "Project Falcon — weekly sync"
 ---
@@ -116,40 +164,52 @@ title: "Project Falcon — weekly sync"
 <full transcript, or a reference if kept elsewhere per retention policy>
 ```
 
-## `feedback/<id>.json` — a labeled correction (the learning loop)
+## `feedback/<id>.json` — a labelled correction (the learning loop)
 
 ```json
 {
-  "id": "9f3a2b",
+  "id": "9f3a2b1c4d5e",
   "ts": "2026-05-13T11:02:00Z",
   "channel": "gmail",
   "client": "acme",
-  "input_summary": "Counsel email re revised SPA schedule 3",
-  "agent_classification": { "client": "acme", "risk": "high", "routed_by": "rule" },
-  "agent_output": "<draft the agent produced>",
-  "your_final_output": "<what you actually sent, if you edited it>",
+  "ref": "outbox/9f3a2b1c4d5e.md",
+  "input_summary": "Counsel email re revised SPA Sch. 3",
+  "agent_classification": {
+    "client": "acme",
+    "risk": "high",
+    "routed_by": "rule",
+    "rule_match": "sender_email"
+  },
+  "agent_output_summary": "<one-line summary of what the agent drafted/did>",
+  "your_final_output_summary": "<one-line summary of what was actually sent, if edited>",
   "your_action": "approved_with_edits",
-  "diff": "<concise draft → final diff, or null>",
+  "correction_kind": "tone_edit",
+  "diff": "<concise unified diff, or null>",
   "your_reason": null,
   "derived_lesson": "Client wants schedule references written as 'Sch. 3' not 'Schedule 3'."
 }
 ```
 
-`your_action` ∈ `approved | approved_with_edits | rejected | undone | re_routed |
-lesson_rejected`. Records are kept as summaries + diffs, not full transcripts.
-The weekly `mode: reflect` run reads recent records, finds patterns recurring
-≥2×, and proposes edits to `playbook.md` / `CLAUDE.md` / `routing.yaml` /
-`global-playbook.md` / `examples/` — each proposal arrives as a
-`pending-approval/` item.
+Enums:
+
+- `your_action` ∈ `approved | approved_with_edits | rejected | undone |
+  re_routed | lesson_rejected`
+- `correction_kind` ∈ `tone_edit | material_misclass | routing_miss |
+  rule_addition | other` — matches the pattern categories the reflection
+  run looks for (see `system-prompt.md` → `mode: reflect`).
+
+Records are summaries + diffs only, never full transcripts/bodies. The
+reflection run reads the last 30 days and proposes config edits when the same
+`(correction_kind, target topic/field)` appears ≥ 2 times.
 
 ## `examples/<slug>.md` — a curated (situation → ideal reply) pair
 
 ```markdown
 ---
 client: acme
-tags: ["scheduling", "counsel"]
+tags: ["scheduling", "counsel"]   # used for retrieval — see below
 added: 2026-05-13
-source: outbox/9f3a2b.md          # where this exemplar came from
+source: outbox/9f3a2b1c4d5e.md    # where this exemplar came from
 ---
 
 ## Situation
@@ -159,5 +219,9 @@ source: outbox/9f3a2b.md          # where this exemplar came from
 <the reply you'd want sent in this situation>
 ```
 
-The drafter retrieves the closest 2–3 of these by similarity and includes them
-in-context when drafting.
+**Retrieval (v1, lightweight):** the drafter scores each example by
+`(tag overlap with the inbound's classified tags) + (lexical similarity of the
+Situation block to the inbound's snippet)`, and takes the top 2–3. Cap the
+example bank at ~30 per client to keep retrieval cheap; the reflection run
+prunes stale or redundant entries via a `config_proposal` when needed. (Phase
+9, optional: swap lexical similarity for an embedding-based score.)
diff --git a/orchestrator/agent/README.md b/orchestrator/agent/README.md
index f245f81..d6f4fe2 100644
--- a/orchestrator/agent/README.md
+++ b/orchestrator/agent/README.md
@@ -9,10 +9,14 @@ Agents) — **a personal Max subscription does not cover this**; see the plan's
 ## Files
 
 - `agent.yaml` — portable agent definition: model, MCP servers, input schema,
-  success rubric, completion webhook. Map onto the platform's actual config
-  format when you create the agent.
+  **completion schema**, success rubric, completion webhook. Map onto the
+  platform's actual config format when you create the agent.
 - `system-prompt.md` — the system prompt (referenced by `agent.yaml`).
 
+`agent.yaml`'s `input_schema` and `completion_schema` are the contract with the
+ingress; `system-prompt.md`'s "Completion contract" section must stay aligned
+with `completion_schema` — change one, change both.
+
 ## To instantiate (Phase 1 steps from the runbook)
 
 1. Confirm you have Managed Agents access on your Anthropic account.
@@ -30,44 +34,60 @@ Agents) — **a personal Max subscription does not cover this**; see the plan's
 
 ## Verify (Phase 1)
 
-Invoke the agent manually (no ingress yet) with a synthetic payload:
+Use a test client folder + temporary `risk_threshold: low` for Acme so nothing
+real gets sent. `item_id` must be 12 hex chars — derive it from the
+`provider_message_id` per FORMATS.md's `id` derivation, or just use a stable
+placeholder for tests. Invoke the agent manually (no ingress yet):
 
 ```json
 {
   "mode": "process",
   "channel": "gmail",
-  "item_id": "test-pricing-1",
+  "item_id": "1111aaaa1111",
+  "provider_message_id": "<test-pricing-1@example.com>",
   "sender": "ceo@acme.io",
   "subject": "Re: Project Falcon — proposed fee",
-  "body": "Can you confirm the day rate we discussed? And can we lock it for 6 months?",
+  "snippet": "Can you confirm the day rate we discussed?",
+  "body_uri": "drive://test/pricing-1.eml",
   "thread_ref": "test-thread-1",
-  "occurred_at": "2026-05-13T09:00:00Z"
+  "occurred_at": "2026-05-13T09:00:00Z",
+  "routing_hint": { "client": "acme", "matched_by": "sender_email", "matched_value": "ceo@acme.io" }
 }
 ```
 
-Expect: `Clients/Acme/inbox/...test-pricing-1.md` written, classified **HIGH**
-(pricing), and `Clients/Acme/pending-approval/test-pricing-1.md` written with a
-draft + a rationale that names the pricing/lock-in as the reason. Then try a
+Expect: `Clients/Acme/inbox/<ts>-1111aaaa1111.md` written, classified **HIGH**
+(forced by global-playbook §1 — price/fees), and
+`Clients/Acme/pending-approval/1111aaaa1111.md` written with `kind: draft_reply`
+and a `risk_reason` naming the pricing rule. Completion JSON has
+`action: "staged"` and `pending_ref: "pending-approval/1111aaaa1111"`. Then a
 low-risk payload:
 
 ```json
 {
   "mode": "process",
   "channel": "gmail",
-  "item_id": "test-sched-1",
+  "item_id": "2222bbbb2222",
+  "provider_message_id": "<test-sched-1@example.com>",
   "sender": "ceo@acme.io",
   "subject": "Re: weekly sync",
-  "body": "Tuesday 3pm works for me — see you then.",
+  "snippet": "Tuesday 3pm works for me — see you then.",
+  "body_uri": "drive://test/sched-1.eml",
   "thread_ref": "test-thread-2",
-  "occurred_at": "2026-05-13T09:05:00Z"
+  "occurred_at": "2026-05-13T09:05:00Z",
+  "routing_hint": { "client": "acme", "matched_by": "sender_email", "matched_value": "ceo@acme.io" }
 }
 ```
 
-Expect: classified **LOW**; with Acme's `risk_threshold: med` it would auto-send
-a brief confirmation — but since Gmail send is wired but you may not want a real
-send during testing, point it at a test thread or temporarily set Acme's
-`risk_threshold: low` so it stages instead. Confirm the `outbox/` + `decisions/`
-(or `pending-approval/`) artifacts and the one-line completion summary.
+Expect: classified **LOW**; with Acme's `risk_threshold: low` it stages instead
+of sending (so you can review without a real send). Flip to `med` and re-run to
+confirm the auto-send path produces `outbox/` + `decisions/` (`auto: true`).
+
+Idempotency check: invoke either payload twice with the same `item_id`; the
+second invocation must return `action: "deduped"` with no new files.
+
+Also exercise the I/O efficiency rules: confirm that with `routing_hint` set,
+the agent does not read `routing.yaml` or other clients' folders (look at the
+Drive read trace if your platform exposes it).
 
 ## v1 scope
 
diff --git a/orchestrator/agent/agent.yaml b/orchestrator/agent/agent.yaml
index 8da028c..b29ed0c 100644
--- a/orchestrator/agent/agent.yaml
+++ b/orchestrator/agent/agent.yaml
@@ -1,34 +1,24 @@
 # Managed Agent definition for the cross-project orchestrator (v1: single agent).
 #
-# This is the portable description of the agent. When you create the agent on the
-# Claude Developer Platform, map these fields onto whatever the Managed Agents
-# config format expects — the substance (system prompt, MCP servers, input
-# schema, success rubric, model) is what matters; field names may differ.
+# Portable description of the agent. When you create the agent on the Claude
+# Developer Platform, map these fields onto whatever the Managed Agents config
+# format expects — the substance (system prompt, MCP servers, input schema,
+# completion schema, success rubric, model) is what matters; field names may
+# differ.
 #
-# v1 keeps it to ONE agent doing classify → act inline. Split into a Haiku
-# classifier sub-agent + a Sonnet drafter sub-agent later only if cost/quality
-# needs it (the plan's Phase 1 note).
+# v1 = ONE agent doing classify -> act inline. Split into a Haiku classifier
+# sub-agent + a Sonnet drafter sub-agent later only if cost/quality needs it.
 
 name: cross-project-orchestrator
 version: 1
 
-# Default model for the agent's reasoning/drafting. Use a Sonnet-tier model so
-# drafts are good; the agent is told to keep classification cheap (and you can
-# move classification to a Haiku sub-agent later).
-model: claude-sonnet-4-6
+model: claude-sonnet-4-6        # Sonnet-tier so drafts are good; classification stays cheap by tiered reads.
+max_turns: 20                   # hard ceiling so a runaway run can't burn budget.
 
 system_prompt_file: ./system-prompt.md
 
-# Hard ceiling per invocation so a runaway run can't burn budget.
-max_turns: 20
-
-# MCP servers the agent may use. Wire these up in the platform; the agent's
-# tool access is pre-authorized (no permission prompts mid-run).
-#   - google_drive: read/write the Clients/ tree (service-account credentials,
-#     scoped to the Clients/ folder; EU data region).
-#   - gmail: read threads, send replies, apply the "processed" label.
-#   - whatsmeow: send WhatsApp replies — REMOTE server hosted on the Hetzner
-#     bridge VM, behind a bearer token. Not attached until Phase 4.
+# MCP servers wired in the platform. Tool access is pre-authorised; no
+# permission prompts mid-run.
 mcp_servers:
   - id: google_drive
     purpose: "Read/write the per-client folders under Clients/ (state substrate)."
@@ -37,91 +27,153 @@ mcp_servers:
     purpose: "Read email threads, send replies, label processed mail."
     required: true
   - id: whatsmeow
-    purpose: "Send WhatsApp replies. Remote (Hetzner bridge), token-gated."
-    required: false   # attached in Phase 4
+    purpose: "Send WhatsApp replies. REMOTE server on the Hetzner bridge VM, token-gated."
+    required: false              # attached in Phase 4
+
+# Files in the Drive root the agent always references. Not passed in.
+config_sources:
+  routing_yaml:     "Clients/routing.yaml"
+  global_playbook:  "Clients/global-playbook.md"
+  formats_spec:     "Clients/FORMATS.md"   # mirror the repo's FORMATS.md here
+
+# Soft I/O targets — used as guardrails by the rubric, not hard limits.
+io_targets:
+  max_drive_reads_per_invocation: 8       # tier the reads; parallelise where possible
+  max_drive_writes_per_invocation: 4
+  p50_latency_seconds: 6                  # process mode; reflect runs may exceed
+  p95_latency_seconds: 20
 
-# What the ingress (Cloudflare Worker) passes in on each invocation.
+# ---- INPUT: what the ingress (Cloudflare Worker) passes in per invocation. ----
 input_schema:
   type: object
-  required: [mode]
+  required: [mode, item_id]
   properties:
     mode:
       enum: [process, approve, reject, reflect]
+
+    # Deterministic stable id for the item — derived by the ingress per
+    # FORMATS.md ("id derivation"). Same id for retries; used to dedupe.
+    item_id:
+      type: string
+      pattern: "^[a-f0-9]{12}$"
+
+    # Canonical provider id — Gmail Message-ID, WhatsApp message ID, or Plaud
+    # recording id. Kept alongside item_id so the ingress can prove dedupe.
+    provider_message_id:
+      type: string
+
     # --- mode: process ---
     channel:        { enum: [gmail, whatsapp, plaud] }
-    item_id:        { type: string, description: "Stable id derived from the source message/recording id." }
-    sender:         { type: string, description: "Email address or display name (gmail/whatsapp)." }
-    participants:   { type: array,  items: { type: string }, description: "Plaud meeting participants." }
+    sender:         { type: string, description: "Email address (gmail), phone JID (whatsapp). Unset for plaud." }
+    participants:   { type: array,  items: { type: string }, description: "Plaud meeting participants (emails / names)." }
     subject:        { type: string, description: "Email subject or Plaud meeting title." }
-    body:           { type: string, description: "Email body / WhatsApp text / Plaud transcript+summary." }
+    snippet:        { type: string, description: "First ~280 chars of body. Use this for routing; fetch the full body lazily." }
+    body_uri:       { type: string, description: "Drive/MCP URI to fetch the full body when needed." }
     thread_ref:     { type: string, description: "Provider thread id, for sending the reply in-thread." }
     occurred_at:    { type: string, format: date-time }
-    routing_hint:   { type: object, description: "Pre-computed rule match from routing.yaml, if any: {client, matched_by}." }
+
+    # Pre-computed rule match from the ingress — if present, the agent skips
+    # the routing.yaml read and other clients' contexts.
+    routing_hint:
+      type: object
+      required: [client, matched_by]
+      properties:
+        client:       { type: string }
+        matched_by:   { enum: [sender_email, sender_domain, sender_phone, plaud_title, plaud_participant] }
+        matched_value:{ type: string }
+
     # --- mode: approve | reject ---
-    ref_id:         { type: string, description: "The pending-approval/<ref_id>.md being approved/rejected." }
-    edited_body:    { type: string, description: "If you edited the draft before approving (else the draft stands)." }
+    ref_id:         { type: string, description: "<id> of the pending-approval/<ref_id>.md being actioned." }
+    edited_body:    { type: string, description: "If the advisor edited the draft before approving." }
     reason:         { type: string, description: "Required for reject; optional note for approve." }
+
     # --- mode: reflect ---
-    reflect_scope:  { oneOf: [ { type: string, enum: [all] }, { type: string, description: "a client id" } ] }
+    reflect_scope:
+      oneOf:
+        - { type: string, enum: [all] }
+        - { type: string, description: "a client id" }
 
-# Config the agent reads from Drive (not passed in): routing.yaml at the Clients/
-# root, global-playbook.md at the Clients/ root, and per-client CLAUDE.md /
-# playbook.md / contacts.yaml.
-config_sources:
-  routing_yaml: "Clients/routing.yaml"
-  global_playbook: "Clients/global-playbook.md"
+# ---- OUTPUT: the JSON the agent returns on its final turn. ----
+completion_schema:
+  type: object
+  required: [mode, action, human_summary]
+  properties:
+    mode:           { enum: [process, approve, reject, reflect] }
+    client:         { type: [string, "null"], description: "routed client id, or 'triage', or null on reflect=all." }
+    risk:           { enum: [low, med, high, null] }
+    action:         { enum: [sent, staged, logged, deduped, routed_to_triage, failed] }
+    pending_ref:    { type: [string, "null"], description: "pending-approval/<id> — present iff action == staged." }
+    outbox_ref:     { type: [string, "null"], description: "outbox/<id> — present iff action == sent." }
+    human_summary:  { type: string, description: "One short line for the advisor's notification." }
+    failure_reason: { type: [string, "null"], description: "Required iff action == failed." }
+    metrics:        { type: [object, "null"], description: "Required iff mode == reflect (shape in system-prompt.md)." }
 
-# Success rubric — the agent's run is judged against this.
+# ---- SUCCESS RUBRIC — judged per run. ----
 outcome:
   rubric: |
     A run SUCCEEDS when, for the given mode, all of the following hold:
 
     mode=process:
-      1. The item is routed to a client id (or `triage`) with a recorded
-         rationale (rule match if routing_hint present, else classifier).
-      2. Clients/<client>/inbox/<ts>-<channel>-<item_id>.md is written with the
-         front-matter spec in FORMATS.md.
-      3. If the item is a message and its risk is BELOW the client's
-         risk_threshold: a reply was drafted and SENT via the right MCP
-         (gmail/whatsmeow), Clients/<client>/outbox/<item_id>.md written, and a
-         Clients/<client>/decisions/<date>-<slug>.md entry appended (auto: true).
-      4. If the item is a message and its risk is AT OR ABOVE the threshold:
-         NOTHING was sent; Clients/<client>/pending-approval/<item_id>.md was
-         written (draft + rationale + suggested_action per FORMATS.md).
-      5. If the item is a Plaud transcript: Clients/<client>/meetings/<date>-<slug>.md
-         written (summary + action items + decisions), decisions/ updated for
-         anything settled, and any follow-up email/amendment staged in
-         pending-approval/ (auto-send a follow-up only if playbook.md whitelists it).
-      6. The completion summary (returned to the caller) is one line: client,
-         what happened, risk, and the pending ref if any.
+      1. The item is routed to a client id (or `triage`). If routing_hint was
+         set, routed_by=rule and rule_match echoes hint.matched_by; otherwise
+         routed_by=classifier with a one-line rationale.
+      2. Clients/<client>/inbox/<ts>-<item_id>.md is written per FORMATS.md
+         with the full classification (risk + risk_reason citing the
+         global-playbook clause).
+      3. If the item is a message and risk is STRICTLY BELOW the client's
+         risk_threshold: the reply was sent via the right MCP in-thread, the
+         processed label / WhatsApp marker was applied, outbox/<item_id>.md was
+         written (with sent_at + provider_message_id of the outbound), and a
+         decisions/<date>-<slug>.md entry was appended with auto=true.
+      4. If the item is a message and risk is AT OR ABOVE threshold: nothing
+         was sent; pending-approval/<item_id>.md was written with
+         kind=draft_reply, the draft, risk_reason, and suggested_action.
+      5. If the item is a Plaud transcript: meetings/<date>-<slug>.md was
+         written; decisions/ updated for anything settled; any follow-up
+         drafted and staged as pending-approval/<followup-id>.md with
+         kind=meeting_followup (a follow-up auto-sends only if the client
+         playbook whitelists it).
+      6. The completion JSON matches completion_schema; human_summary is one
+         short line.
 
     mode=approve:
-      - The reply in pending-approval/<ref_id>.md (or edited_body if provided)
-        was sent via the right MCP; the file moved to outbox/<ref_id>.md with
-        sent_at + message_id; a decisions/ entry was appended; a
-        feedback/<ref_id>.json record written (your_action: approved or
-        approved_with_edits, with the diff if edited).
+      - For kind=draft_reply or kind=meeting_followup: the draft (or
+        edited_body) was sent via the right MCP; the file moved to
+        outbox/<ref_id>.md with sent_at + provider_message_id; a decisions/
+        entry was appended (auto=false); feedback/<ref_id>.json written.
+      - For kind=config_proposal: the patch was applied to the named
+        target_file; the proposal moved to outbox/ (as audit copy);
+        decisions/ appended with kind=lesson; feedback/<ref_id>.json written.
 
     mode=reject:
-      - No send. A feedback/<ref_id>.json record written (your_action: rejected,
-        your_reason: the supplied reason). pending-approval/<ref_id>.md marked
-        superseded (or moved aside). Caller notified it was not sent.
+      - No send. feedback/<ref_id>.json written with your_action=rejected and
+        your_reason. pending-approval/<ref_id>.md marked superseded and moved
+        to pending-approval/.rejected/.
 
     mode=reflect:
-      - Recent feedback/ records read for the scope; patterns recurring >=2x
-        identified; for each, a proposal written to the relevant client's (or the
-        triage/global) pending-approval/ folder describing the exact edit to
-        playbook.md / CLAUDE.md / routing.yaml / global-playbook.md / examples/.
-        No config file changed directly. Learning metrics included in the
-        completion summary.
-
-    A run FAILS if it sends anything material without staging, invents facts/
-    figures/commitments, writes secrets into a client folder, or guesses a route
-    instead of using `triage` when unsure.
-
-# Where the agent reports when done — the ingress wires a webhook to its
-# /webhook/agent endpoint, which turns the completion summary into your
-# notification (WhatsApp first, email fallback).
+      - Recent feedback/ records read for the scope. Patterns (defined in
+        system-prompt.md) recurring >=2x identified. For each, exactly one
+        proposal written to pending-approval/ with kind=config_proposal,
+        target_file, and a minimal patch. NO config file modified directly.
+        Capped at 5 per client per run; cross-client patterns target
+        Clients/global-playbook.md. metrics block populated in the completion.
+
+    EFFICIENCY (soft — affects the rubric only via io_targets):
+      - Drive reads are tiered and parallelised; routing_hint short-circuits.
+      - When multiple writes are needed, they are issued in parallel.
+      - The agent bails fast (action=deduped) if it sees an existing
+        inbox/<id>.md or outbox/<id>.md for the same item_id.
+
+    A run FAILS if it: sends anything material without staging; invents
+    facts/figures/commitments; writes secrets into a client folder; guesses
+    a route instead of using triage when unsure; edits a config file outside
+    a kind=config_proposal approval.
+
+# ---- COMPLETION HOOK ----
+# The ingress wires the agent's completion webhook to its /webhook/agent
+# endpoint, which turns the completion JSON into the advisor's notification
+# (WhatsApp first, email fallback) and uses pending_ref / outbox_ref for any
+# follow-up calls (e.g. /approve).
 completion:
   webhook: true
-  payload_includes: [mode, client, risk, action, pending_ref, human_summary, metrics]
+  payload: completion_schema
diff --git a/orchestrator/agent/system-prompt.md b/orchestrator/agent/system-prompt.md
index 317fb34..338faa1 100644
--- a/orchestrator/agent/system-prompt.md
+++ b/orchestrator/agent/system-prompt.md
@@ -4,129 +4,214 @@ You are the orchestrator for a commercial advisor who runs several client
 engagements. Each engagement has a folder under `Clients/<client>/` in Google
 Drive (the "client folder"), which the advisor also opens as a Claude Desktop
 cowork project. You are invoked once per inbound item (an email, a WhatsApp
-message, or a Plaud meeting transcript) — or to action an approval/rejection, or
-to run the weekly reflection. You act through MCP tools: Google Drive (read/write
-the client folders), Gmail (read threads, send replies, label), and — once
-attached — whatsmeow (send WhatsApp replies). You never ask the user for
-permission mid-run; instead, anything **material** is *staged* for their later
-sign-off and never sent.
-
-Your behaviour is governed, in priority order, by: (1) this prompt; (2)
-`Clients/global-playbook.md`; (3) the matched client's `playbook.md`,
-`CLAUDE.md`, and `contacts.yaml`. On-disk file formats are specified in
-`FORMATS.md` in the orchestrator repo — follow them exactly. Routing config is
-`Clients/routing.yaml`.
-
-## The invocation has a `mode`
-
-### `mode: process` — a new inbound item
-
-1. **Route it to a client.**
-   - If `routing_hint` is present (the ingress already matched a rule in
-     `routing.yaml` on sender domain/phone, or Plaud title/participant), use that
-     client; record `routed_by: rule`.
-   - Otherwise read `routing.yaml` and the candidate clients' `playbook.md` /
-     `contacts.yaml`, decide the best client, and record `routed_by: classifier`
-     with a one-line rationale. Keep this step cheap — sender + subject + a short
-     snippet of the body is enough to route; you don't need the full body yet.
-   - **If you are not confident, route to `triage`. Never guess.**
-
-2. **Load context for that client:** `CLAUDE.md`, `playbook.md`,
-   `Clients/global-playbook.md`, `contacts.yaml`, the most recent ~5 entries in
-   `decisions/`, and — if you'll be drafting a reply — the 2–3 entries in
-   `examples/` most similar to this situation.
-
-3. **Write the inbox record:** `Clients/<client>/inbox/<ts>-<channel>-<item_id>.md`
-   per FORMATS.md, including your classification.
-
-4. **Classify risk: LOW, MED, or HIGH.** Apply `global-playbook.md`'s
-   "always MATERIAL" list and the client `playbook.md` / `contacts.yaml`
-   (`always_review`) first — those force HIGH. An item is **LOW** only if *all*
-   of: it's a reply within an existing thread to a known contact; it conveys no
-   new commercial position (it confirms / acknowledges / schedules / forwards /
-   sends an already-approved standard document); it contains no numbers that
-   matter, no contract language, no scope or timeline change; and the client's
-   playbook doesn't flag the topic or sender as always-ask. When in doubt → HIGH.
-
-5. **Act, depending on item type and risk:**
-
-   - **Message (gmail/whatsapp), risk BELOW the client's `risk_threshold`:**
-     draft the reply (use `CLAUDE.md` tone, the examples, the thread); send it
-     via the Gmail or whatsmeow MCP, in-thread (`thread_ref`); apply the
-     "processed" Gmail label / a WhatsApp marker so it won't retrigger; write
-     `Clients/<client>/outbox/<item_id>.md` (with `sent_at`, `message_id`);
-     append `Clients/<client>/decisions/<date>-<slug>.md` with `auto: true`.
-
-   - **Message, risk AT OR ABOVE the threshold:** do **not** send anything.
-     Write `Clients/<client>/pending-approval/<item_id>.md` per FORMATS.md — the
-     drafted reply, the `rationale` for why it's material, and a one-line
-     `suggested_action`. Note in the draft anything you weren't sure about or any
-     fact you'd need that you don't have.
-
-   - **Plaud transcript:** this is new context, not something to reply to.
-     Write `Clients/<client>/meetings/<date>-<slug>.md` per FORMATS.md — a short
-     summary, the action items (owner + due), and the decisions made in the
-     meeting. Append a `decisions/` entry for anything settled. If the meeting
-     implies a follow-up email or an amendment, draft it and stage it in
-     `pending-approval/` (it's almost always material). Auto-send a follow-up
-     *only* if the client `playbook.md` explicitly whitelists that kind of
-     routine follow-up.
-
-6. **Return a one-line completion summary:** `<client> — <what happened> —
-   risk <L/M/H>[ — pending <ref>]`. The caller turns this into the advisor's
-   notification.
-
-### `mode: approve` — the advisor approved a staged item
-
-`ref_id` identifies `Clients/<client>/pending-approval/<ref_id>.md`. If
-`edited_body` is supplied, that is the reply to send (the advisor edited it);
-otherwise send the draft as written. Send via the right MCP, in-thread; apply the
-processed label/marker; move the file to `outbox/<ref_id>.md` with `sent_at` +
-`message_id`; append a `decisions/` entry (`auto: false`); write
-`feedback/<ref_id>.json` (`your_action: approved` or `approved_with_edits`, with
-a concise `diff` if edited, and a one-line `derived_lesson` if there's an obvious
-one). If the approved item was a proposed config edit (from a reflection run),
-apply the edit to the target file (`playbook.md` / `CLAUDE.md` / `routing.yaml` /
-`global-playbook.md` / `examples/…`) and log it in `decisions/` as `kind: lesson`.
-Return a one-line summary.
-
-### `mode: reject` — the advisor rejected a staged item
+message, or a Plaud meeting transcript), or to action an approval/rejection, or
+to run the weekly reflection. You act through MCP tools: Google Drive
+(read/write the client folders), Gmail (read threads, send replies, label),
+and — once attached — whatsmeow (send WhatsApp replies). You never ask for
+permission mid-run; instead, anything **material** is *staged* for the
+advisor's later sign-off and never sent.
+
+Authority order (when two rules conflict, the later wins):
+1. This prompt.
+2. `Clients/global-playbook.md` — the single source of truth for the
+   risk-classification rule and tone defaults. Read it on every invocation.
+3. The matched client's `playbook.md`, `CLAUDE.md`, and `contacts.yaml`.
+
+On-disk file formats are specified in the orchestrator repo's `FORMATS.md` —
+follow them exactly, including the deterministic `id` derivation. Routing
+config is `Clients/routing.yaml`.
+
+## I/O efficiency rules (apply across all modes)
+
+- **Parallelise Drive reads.** Whenever you need more than one file from Drive,
+  issue the reads in parallel, not serially.
+- **Tier the reads.** Only fetch what the current step needs:
+  - *Routing step:* sender/subject/snippet only — do not fetch the full body
+    yet.
+  - *Classification & decisions context:* `global-playbook.md`, the matched
+    client's `playbook.md`, `CLAUDE.md`, `contacts.yaml`, and the latest
+    `decisions/` entry (just the latest — only fetch more if the draft needs
+    history).
+  - *Drafting only:* the full body and 2–3 closest `examples/` (by tag overlap;
+    see FORMATS.md).
+- **Skip work you already have.** If `routing_hint` is set, do not read
+  `routing.yaml` or other clients' playbooks; jump straight to context load.
+- **Batch writes.** When you have to write multiple files (e.g. `inbox/` +
+  `pending-approval/`, or `outbox/` + `decisions/` + `feedback/`), issue them
+  in parallel.
+- **Dedupe is the ingress's job; bail fast if you spot one.** If you see an
+  existing `outbox/<id>.md` or `inbox/<id>.md` for the same `id` you were
+  invoked with, stop and return `action: deduped` — do not re-write or re-send.
+- **Respect `max_turns`.** If you can't finish cleanly, write what you have,
+  set the item's `status` to reflect that, and report the problem in the
+  completion summary rather than half-acting.
+
+## Completion contract (return at the end of every invocation)
+
+Always return a single JSON object on the final turn, matching
+`completion_schema` in `agent.yaml`:
+
+```json
+{
+  "mode": "process",                       // echo of input mode
+  "client": "acme",                        // routed client id, or "triage"
+  "risk": "high",                          // "low" | "med" | "high" | null (n/a for reflect)
+  "action": "staged",                      // see enum below
+  "pending_ref": "pending-approval/<id>",  // present iff action == staged
+  "outbox_ref": "outbox/<id>",             // present iff action == sent
+  "human_summary": "Acme — pricing reply staged for Jane Counsel.",
+  "metrics": null                          // populated only on mode=reflect
+}
+```
+
+`action` ∈ `sent | staged | logged | deduped | routed_to_triage | failed`.
+The ingress turns `human_summary` + `risk` into the advisor's notification
+(WhatsApp first, email fallback) and uses the refs for follow-up calls.
+
+---
+
+## `mode: process` — a new inbound item
+
+1. **Route.**
+   - If `routing_hint` is set: use `routing_hint.client`, record
+     `routed_by: rule` and `rule_match: <hint.matched_by>`. **Skip reading
+     `routing.yaml` and other clients' files.**
+   - Else: read `routing.yaml`; if no rule matches, pick the best client (or
+     `triage`) from sender + subject + a short snippet only, recording
+     `routed_by: classifier` and a one-line rationale. **Route to `triage`
+     when not confident — never guess.**
+
+2. **Load context for the chosen client (parallel reads):** `CLAUDE.md`,
+   `playbook.md`, `Clients/global-playbook.md`, `contacts.yaml`, and the
+   single latest `decisions/` entry. Defer `examples/` until you know you'll
+   draft.
+
+3. **Write `Clients/<client>/inbox/<ts>-<id>.md`** per FORMATS.md, with the
+   full classification you'll fill in next.
+
+4. **Classify risk** (LOW / MED / HIGH) per the rule in
+   `Clients/global-playbook.md` "Risk classification rule". Do not duplicate
+   that logic here — apply it. Record the matched rule clause in the inbox
+   record's `risk_reason`.
+
+5. **Act:**
+
+   - **Message (gmail / whatsapp), risk strictly BELOW `risk_threshold`:**
+     fetch the full body if you don't have it; draft the reply using
+     `CLAUDE.md` tone + 2–3 closest `examples/` + the thread context. Send via
+     the right MCP, in-thread (`thread_ref`); apply the processed Gmail label
+     / WhatsApp marker so the ingress will skip it. In parallel: write
+     `outbox/<id>.md` (with `sent_at` + `provider_message_id` of the
+     outbound — that's the loop-prevention key) and append
+     `decisions/<date>-<slug>.md` (`auto: true`). Return `action: sent`.
+
+   - **Message, risk AT OR ABOVE threshold:** do **not** send. Write
+     `pending-approval/<id>.md` per FORMATS.md, with `kind: draft_reply`, the
+     drafted reply, the `risk_reason`, and a one-line `suggested_action`.
+     Note in the draft body anything you weren't sure about or any fact you'd
+     need that you don't have. Return `action: staged`.
+
+   - **Plaud transcript:** write `meetings/<date>-<slug>.md` per FORMATS.md
+     (short summary, action items with owner + due, decisions made). Append
+     `decisions/` entries for anything settled in the meeting. If the meeting
+     implies a follow-up email / amendment, draft it and write
+     `pending-approval/<followup-id>.md` with `kind: meeting_followup`
+     (it's almost always HIGH). Auto-send a follow-up only if the client
+     `playbook.md` explicitly whitelists that kind of routine follow-up.
+     Return `action: sent` if anything went out, otherwise `staged`.
+
+6. **Return the completion JSON.**
+
+---
+
+## `mode: approve` — the advisor approved a staged item
+
+Load `Clients/<client>/pending-approval/<ref_id>.md`. Read its `kind`:
+
+- **`kind: draft_reply` or `kind: meeting_followup`** — the body to send is
+  `edited_body` if supplied, otherwise the draft as written. Send via the
+  right MCP, in-thread; apply the processed label / WhatsApp marker. In
+  parallel: move the file to `outbox/<ref_id>.md` (set `sent_at`,
+  `provider_message_id`); append `decisions/<date>-<slug>.md` (`auto: false`);
+  write `feedback/<ref_id>.json` with `your_action: approved` or
+  `approved_with_edits` (with a concise `diff` if edited) and a one-line
+  `derived_lesson` if there's an obvious one.
+
+- **`kind: config_proposal`** — apply the `patch` to the `target_file` named
+  in the front-matter (one of `playbook.md`, `CLAUDE.md`,
+  `Clients/routing.yaml`, `Clients/global-playbook.md`, or an `examples/<slug>.md`).
+  Move the proposal to `outbox/<ref_id>.md` (no send happened, but it's the
+  audit copy); append `decisions/<date>-<slug>.md` with `kind: lesson`; write
+  `feedback/<ref_id>.json` (`your_action: approved`).
+
+Return `action: sent` (for replies/followups) or `action: logged` (for config
+proposals).
+
+## `mode: reject` — the advisor rejected a staged item
 
 Send nothing. Write `feedback/<ref_id>.json` (`your_action: rejected`,
-`your_reason: <reason>`, `derived_lesson` if applicable — e.g. for a rejected
-config proposal, the lesson is "don't generalize from that"). Mark
-`pending-approval/<ref_id>.md` as `superseded` (or move it aside). Return a
-one-line summary noting it was not sent.
-
-### `mode: reflect` — the weekly learning run
-
-`reflect_scope` is a client id or `all`. For each in-scope client: read recent
-`feedback/` records; find patterns that recur **≥2 times** (e.g. you keep
-stripping the greeting → tone rule; you keep flagging a topic as material →
-playbook rule; you keep re-routing a sender → routing rule; cross-client patterns
-→ `global-playbook.md`). For each pattern, write a proposal into that client's (or
-`triage`'s, or a global) `pending-approval/` folder describing the **exact** edit
-to make and why. Do **not** edit any config file directly — every change goes
-through approval. Don't propose anything from a single occurrence. Cap it at a
-few proposals per client per run. Also compute the learning metrics (% of drafts
-approved unchanged, % of auto-sends later corrected (`undone`), routing accuracy
-(`re_routed` rate), median time-to-approve) and include them in the completion
-summary. Return a one-line summary plus the metrics.
+`your_reason: <reason>`; `derived_lesson` if applicable — e.g. for a rejected
+config_proposal, the lesson is "don't generalise from that"). Mark
+`pending-approval/<ref_id>.md`'s front-matter `status: superseded` and move
+the file to `pending-approval/.rejected/<ref_id>.md`. Return `action: logged`.
+
+## `mode: reflect` — the weekly learning run
+
+`reflect_scope` is a client id or `all`. For each in-scope client, read
+`feedback/` records from the last 30 days and identify **patterns**.
+
+A pattern, precisely, is **the same kind of correction targeting the same
+field / topic appearing ≥ 2 times in the last 30 days**, where "kind of
+correction" is one of:
+
+- `tone_edit` — recurring small edits to the same phrasing aspect (greeting,
+  sign-off, register, length).
+- `material_misclass` — items rated LOW that you flagged `undone` or
+  re-classified HIGH on approval, all sharing a topic / keyword.
+- `routing_miss` — items re-routed (`re_routed`) sharing a sender domain /
+  Plaud title pattern.
+- `rule_addition` — a topic / phrase that recurs in `derived_lesson` strings.
+
+For each pattern, write **one** proposal into the relevant
+`pending-approval/<id>.md` with `kind: config_proposal`, naming the exact
+`target_file` and a minimal `patch` (a `unified-diff` block or, for YAML, a
+small structured edit). **Do not edit any config file directly.** Cap at 5
+proposals per client per run. Cross-client patterns (the same correction kind
+seen across ≥ 2 clients) go to `Clients/Triage/pending-approval/` with a
+`target_file: Clients/global-playbook.md`.
+
+Compute and return the `metrics` block in the completion JSON:
+
+```json
+"metrics": {
+  "window_days": 30,
+  "items_total": 124,
+  "drafts_approved_unchanged_pct": 0.62,
+  "auto_sent_corrected_pct": 0.04,
+  "routing_accuracy_pct": 0.96,
+  "median_time_to_approve_minutes": 38,
+  "patterns_proposed": 7
+}
+```
+
+Return `action: logged`.
+
+---
 
 ## Hard rules (a run FAILS if you break these)
 
-- **Never send anything material without staging it first.** If you're unsure
-  whether something is material, it is.
-- **Never invent** facts, figures, dates, names, or commitments. If a reply needs
-  something you don't have, that alone makes it material — stage it and say
-  what's missing.
+- **Never send anything material without staging it first.** If unsure → MED at
+  minimum → stage.
+- **Never invent** facts, figures, dates, names, or commitments. Missing facts
+  → MED at minimum → stage with a note about what's missing.
 - **Route to `triage` when unsure** — never guess a client.
 - **Never write secrets or full credentials** into a client folder.
-- **Data minimisation:** route on metadata + a snippet; use the full body only
-  when drafting; keep `feedback/` records as summaries + diffs, not full
-  transcripts.
-- **Loop prevention:** anything you send must be labelled/marked so the ingress
-  skips it; never process an item whose id is already in an `outbox/` record.
-- **Respect `max_turns`.** If you can't finish cleanly, write what you have, set
-  the item's `status` to reflect that, and report the problem in the summary
-  rather than half-acting.
+- **Data minimisation:** route on metadata + snippet; fetch the full body only
+  to draft or classify a borderline item; keep `feedback/` records as
+  summaries + diffs, not full transcripts.
+- **Loop prevention:** every send must be labelled / marked so the ingress
+  skips it (Gmail "processed" label, WhatsApp marker text); the outbound's own
+  `provider_message_id` must be recorded in `outbox/`. Bail fast on a
+  duplicate `id`.
+- **No config file edits except via `mode: approve` on a `config_proposal`.**
+  The reflection run only proposes; only approval applies.
diff --git a/orchestrator/global-playbook.md b/orchestrator/global-playbook.md
index 09db593..99e1ac5 100644
--- a/orchestrator/global-playbook.md
+++ b/orchestrator/global-playbook.md
@@ -1,66 +1,92 @@
 # Global playbook
 
-Cross-client rules the orchestrator agent reads on **every** item, in addition to
-the matched client's `playbook.md`. Keep this short and high-confidence — these
-override nothing client-specific, they only set the floor. New entries here
-should come from the weekly reflection run (patterns seen across ≥2 clients) and
-be approved like any other change.
-
-## Always treat as MATERIAL (→ stage in pending-approval, never auto-send)
-
-- Anything about price, fees, rates, discounts, payment terms, or milestone
-  payments.
-- Anything about scope, deliverables, timelines that affect a deadline, or change
-  requests.
-- Contract terms: NDAs beyond the standard template, MSAs, SOWs, SPAs, term
-  sheets, indemnities, liability caps, IP ownership, exclusivity,
-  non-compete/non-solicit, termination clauses.
-- Equity, options, valuation, cap-table, or anything with a number that could
-  appear in a financing.
-- Headcount, hiring, redundancies, or comp.
-- Anything a counterparty's lawyer sends or is copied on.
-- Anything that commits the client to a meeting, call, or deadline with a third
-  party that isn't a simple "yes, that time works".
-- Anything where you're not sure — default to MATERIAL.
-
-## Safe to auto-send (LOW risk) — only when ALL of these hold
-
-- It's a reply within an existing thread to a known contact (in `contacts.yaml`).
-- It conveys no new commercial position — it confirms, acknowledges, schedules,
-  forwards, or sends an already-approved standard document.
-- It contains no numbers that matter, no contract language, no scope/timeline
-  change.
-- The client's `playbook.md` doesn't flag the topic or the sender as always-ask.
-- The client's `risk_threshold` permits auto-send.
+Cross-client rules the orchestrator agent reads on **every** item, in addition
+to the matched client's `playbook.md`. This file is the **single source of
+truth** for the risk-classification rule and the tone defaults —
+`system-prompt.md` defers to what's here so the rules don't drift between two
+places. Keep this short and high-confidence; new entries should come from the
+weekly reflection run (a pattern seen across ≥2 clients) and be approved like
+any other change.
+
+## Risk classification rule
+
+Decide LOW / MED / HIGH in this order. First match wins.
+
+1. **Forced HIGH — if ANY of these holds.** No other check needed.
+   - The matched client's `contacts.yaml` flags the sender as
+     `always_review: true`.
+   - The matched client's `playbook.md` flags the topic, the sender, or a phrase
+     in the body as always-ask / material.
+   - The item touches any of the always-material categories in step 2.
+
+2. **Always-material categories.** Anything about:
+   - **Price / fees:** day rate, retainer, success fee, discounts, payment
+     terms, milestone payments, late fees.
+   - **Scope / timeline / deliverables:** change requests, anything that moves a
+     deadline by more than the playbook's slack, scope additions, exclusions.
+   - **Contract terms:** NDAs beyond the standard template, MSAs, SOWs, SPAs,
+     LOIs, term sheets, indemnities, liability caps, IP ownership, exclusivity,
+     non-compete/non-solicit, termination, governing law.
+   - **Cap table / financing:** equity, options, valuation, dilution, anti-
+     dilution, pre-emption, drag/tag, anything with a number that could appear
+     in a financing.
+   - **People:** headcount, hiring, redundancies, comp.
+   - **Counterparty legal:** anything a counterparty's lawyer sends or is copied
+     on (their party only — your own counsel may be in `contacts.yaml`).
+   - **External commitments:** anything that commits the client to a meeting,
+     call, or deadline with a third party that isn't a simple "yes, that time
+     works".
+
+3. **LOW — only if ALL of these hold.**
+   - It's a reply within an existing thread to a known contact (in
+     `contacts.yaml`).
+   - It conveys no new commercial position — it confirms, acknowledges,
+     schedules, forwards, or sends an already-approved standard document.
+   - It contains no numbers that matter, no contract language, and no scope or
+     timeline change.
+   - The client's `playbook.md` doesn't flag the topic or the sender as
+     always-ask.
+   - The classifier is confident (not uncertain about routing, parties, or
+     topic).
+
+4. **Otherwise MED.** Default to MED whenever an item isn't clearly LOW and
+   isn't covered by a forced-HIGH rule. **When in doubt → MED at minimum, HIGH
+   if it could move money or commitments.**
+
+`risk_threshold` in `routing.yaml` gates auto-send: an item rated **strictly
+below** the threshold may be auto-sent; at or above it, it stages in
+`pending-approval/`. So a client with `risk_threshold: med` auto-sends LOW only
+and stages MED + HIGH.
 
 ## Tone and form (defaults — client `playbook.md` overrides)
 
 - Professional, concise, no filler. Get to the point in the first sentence.
-- Don't over-apologize, don't over-explain, don't speculate on the client's
+- Don't over-apologise, don't over-explain, don't speculate on the client's
   behalf about anything commercial.
 - Match the thread's register and language.
-- Never invent facts, figures, dates, or commitments. If a reply needs a fact you
-  don't have, that alone makes it MATERIAL — stage it with a note about what's
-  missing.
+- Never invent facts, figures, dates, or commitments. If a reply needs a fact
+  you don't have, that alone makes it MED at minimum — stage it with a note
+  about what's missing.
 
 ## Handling meeting transcripts (Plaud)
 
-- A transcript is new context, not something to reply to. Summarize it, extract
+- A transcript is new context, not something to reply to. Summarise it, extract
   action items / commitments / decisions, and update `decisions/`.
 - Any follow-up email or amendment that comes out of a meeting is almost always
-  MATERIAL — stage it. Only auto-send a follow-up if it's purely "sending the
-  notes / confirming the next slot" and `playbook.md` whitelists it.
+  HIGH — stage it. Only auto-send a follow-up if it's purely "sending the
+  notes / confirming the next slot" and the client `playbook.md` whitelists it.
 
 ## Routing
 
 - If you can't confidently route an item to a client, send it to `triage` —
   never guess.
-- A re-route by the user is a routing signal: log it to `feedback/` so the
-  reflection run can propose a `routing.yaml` rule.
+- A user re-route is a routing signal: log it to `feedback/` so the reflection
+  run can propose a `routing.yaml` rule.
 
 ## Privacy / data handling
 
-- Send the classifier only what it needs: sender + subject + a short snippet.
-- Send full bodies/transcripts to the drafter only when actually drafting.
+- Route on metadata + a short snippet, not the full body.
+- Fetch the full body only when actually drafting or classifying borderline
+  items.
 - Keep `feedback/<id>.json` as one-line summaries + diffs, not full transcripts.
-- Don't write secrets or full credentials into any file in a client folder.
+- Never write secrets or full credentials into any file in a client folder.
diff --git a/orchestrator/routing.example.yaml b/orchestrator/routing.example.yaml
index b66bbd7..043e097 100644
--- a/orchestrator/routing.example.yaml
+++ b/orchestrator/routing.example.yaml
@@ -1,23 +1,29 @@
 # Routing + risk config for the orchestrator.
 #
-# Copy to routing.yaml and fill in real values. The agent reads this to decide
-# which client an inbound item belongs to and whether it may be auto-sent.
+# Copy to routing.yaml and fill in real values. The ingress reads this to attempt
+# a rule match on every inbound item BEFORE invoking the agent — when a rule
+# matches, the ingress passes routing_hint to the agent and the agent skips
+# loading other clients' contexts (a big speed win on the hot path).
 #
 # Matching order:
-#   1. Rule match on this file: sender email/domain, sender phone, or — for Plaud
-#      transcripts — a meeting-title pattern or a participant email.
-#   2. If no rule matches, the classifier reads the candidate clients' playbook.md
-#      files and picks one (or `triage`) with a rationale.
+#   1. Exact email match in projects[*].emails (sender_email)
+#   2. Email domain match: "@acme.com" matches anyone @acme.com (sender_domain)
+#   3. Phone match in projects[*].phones (sender_phone) — for whatsapp
+#   4. Plaud meeting title contains a string in projects[*].plaud_title_patterns
+#      (plaud_title), OR a participant's email matches projects[*].plaud_participant_emails
+#      (plaud_participant)
+#   5. No match -> ingress invokes the agent with no routing_hint; the agent
+#      classifies, or routes to default_project ("triage") if not confident.
 #
-# risk_threshold semantics: an item the classifier rates BELOW this level may be
-# auto-sent; at or above it, the item is staged in pending-approval/. So:
-#   - risk_threshold: high  → only HIGH items stage; LOW and MED auto-send.
-#   - risk_threshold: med   → MED and HIGH stage; only LOW auto-sends.   (recommended default)
-#   - risk_threshold: low   → everything stages; nothing auto-sends.     (safest)
+# risk_threshold gates auto-send. An item the agent rates STRICTLY BELOW the
+# threshold may be auto-sent; AT OR ABOVE it, it stages in pending-approval/.
+#   - risk_threshold: high  -> only HIGH stages; LOW + MED auto-send.   (loose)
+#   - risk_threshold: med   -> MED + HIGH stage; only LOW auto-sends.   (recommended)
+#   - risk_threshold: low   -> everything stages; nothing auto-sends.   (safest)
 
 default_project: triage          # where unrouted items go
 
-# Optional global caps (the agent / ingress enforce these; alerts when exceeded).
+# Optional global caps. The ingress enforces; alerts you when exceeded.
 limits:
   per_client_monthly_token_cap: 2000000   # soft cap; pause + notify when hit
   daily_spend_alert_usd: 5
@@ -25,16 +31,16 @@ limits:
 projects:
 
   acme:
-    drive_folder_id: "REPLACE_WITH_DRIVE_FOLDER_ID"   # ID of Clients/Acme/ in your cloud store
+    drive_folder_id: "REPLACE_WITH_DRIVE_FOLDER_ID"   # ID of Clients/Acme/
     display_name: "Acme Corp"
     emails:
       - "@acme.com"                 # any sender at this domain
       - "ceo@acme.io"               # a specific external address
-      - "jane.counsel@lawfirm.com"  # their lawyer — note: counsel mail is MATERIAL by global rule
+      - "jane.counsel@lawfirm.com"  # their lawyer — note: counsel mail is forced HIGH by global-playbook
     phones:
       - "+15551234567"              # E.164; matched against whatsmeow sender JID
     plaud_title_patterns:
-      - "Acme"                      # substring match on the Plaud meeting title
+      - "Acme"
       - "Project Falcon"            # the engagement's internal codename
     plaud_participant_emails:
       - "@acme.com"
@@ -51,14 +57,10 @@ projects:
       - "Beta"
     risk_threshold: high            # this client: stage everything material, never auto-send
 
-  # Add one block per client. Onboarding a client = create Clients/<Name>/ from
-  # the template, fill CLAUDE.md/playbook.md/contacts.yaml, connect it to the
-  # cowork project, then add a block here.
-
-# The triage "project" is a real folder + cowork project where unrouted items
-# land. Re-routing from triage is logged to feedback/ so the reflection run can
-# propose a new rule above.
-triage:
-  drive_folder_id: "REPLACE_WITH_DRIVE_FOLDER_ID"
-  display_name: "Triage"
-  risk_threshold: low              # never auto-send anything from triage
+  # The default destination for unrouted items. Mirror it in cowork as a Triage
+  # project; re-routes from here are logged to feedback/ so the reflection run
+  # can propose a routing.yaml rule.
+  triage:
+    drive_folder_id: "REPLACE_WITH_DRIVE_FOLDER_ID"
+    display_name: "Triage"
+    risk_threshold: low             # never auto-send anything from triage