From 9c96fed8292e85ab4f8d22b3f3d29cb368ec0297 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Thu, 7 May 2026 16:27:31 -0600 Subject: [PATCH 01/50] Proposal --- ...-05-07_13-06-13-proposal-import-harness.md | 291 ++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md diff --git a/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md b/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md new file mode 100644 index 0000000..b685c5e --- /dev/null +++ b/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md @@ -0,0 +1,291 @@ +--- +name: Import harness + sqladapter into messaging/v3 +description: Proposal to lift the infra/harness pipeline into github.com/smarty/messaging/v3/handlers/harness and relocate the SQL-bound dispatcher/writer/recovery as a sqladapter subpackage. +type: plot +--- + +# Proposal: Import `infra/harness` into `messaging/v3/handlers/harness` + +## Background + +A second project (working copy under `billing-context/domain-transformation-phase-9-chunk-C/code/infra`) has grown a staged, pipeline-based message-handling "harness" that we now want to promote into the shared `github.com/smarty/messaging/v3` module so it can be reused. The source split is: + +- `infra/harness/*` — a generic, store-and-forward pipeline built from goroutine stages connected by buffered channels. Stages: `Entrypoint → Execution → Serialization (fan-out) → Persistence → Completion → Broadcast → Terminal`. Supporting code: `fanout.go`, `pool.go`, `routing.go`, `scanner.go`. +- `infra/*` — supporting types and a **reference implementation** of the `Writer` / `Dispatcher` interfaces plus a `Recover` function, all coupled to the same MySQL `Messages` table that `sqlmq/_schema_mysql.sql` already defines (`id`, `dispatched`, `type`, `payload`). + +The target module (`github.com/smarty/messaging/v3`) already has a `handlers/` namespace (`multi`, `retry`, `sqltx`, `transactional`) and reusable pipeline/streaming infrastructure (`streaming/`, `batch/`). The harness belongs there as another composable handler pattern. + +Decisions locked in during /plot and /replot: + +1. **Scope:** bring in both the generic harness and the SQL-backed reference implementation, but **invert the nesting**: harness at `handlers/harness`, SQL reference impl at `handlers/harness/sqladapter`. +2. **Contracts:** reuse `messaging.Handler` and `messaging.Listener` from the root `contracts.go` instead of re-declaring them inside the harness package. +3. **Serialization:** the pipeline holds an unexported `serializer` collaborator with a `Serialize(out io.Writer, in any) error` signature (close to `encoding/json/v2`'s `MarshalWrite`); the caller supplies one. The `goexperiment.jsonv2` build tag is dropped. **No serializer implementations ship with this module** — callers own encoding. +4. **Wire-up:** `New(...)` uses the module's functional-options convention (`Options singleton` pattern — see `handlers/retry/config.go`, `batch/config.go`, `handlers/transactional/config.go`). +5. **Test library:** bring in `github.com/smarty/gunit/v2` alongside the existing v1 dep. Both major versions coexist until a later, separate module-wide upgrade to v2. + +## Approach + +### 1. Destination layout + +``` +github.com/smarty/messaging/v3/ +├── contracts.go (unchanged; we'll reuse Handler, Listener, ListenCloser) +├── handlers/ +│ ├── ...existing sub-packages... +│ └── harness/ (NEW — generic pipeline; only New + Options are exported) +│ ├── contracts.go (Writer, Dispatcher, Monitor — exported collaborator types; executor, applicator, serializer — unexported internals; event structs + error sentinels) +│ ├── config.go (New(...) + Options singleton + option func + nop) +│ ├── message.go (Message value type — lifted from infra/message.go) +│ ├── pipeline.go (unexported build(ctx, cfg) wiring) +│ ├── 00_entrypoint.go / _test.go (unexported entrypoint type + newEntrypoint) +│ ├── 01_execution.go / _test.go (unexported execution + newExecution) +│ ├── 02_serialization.go / _test.go (unexported serialization + newSerialization; no build tag) +│ ├── 03_persistence.go / _test.go (unexported persistence + newPersistence) +│ ├── 04_completion.go / _test.go (unexported completion + newCompletion) +│ ├── 05_broadcast.go / _test.go (unexported broadcast + newBroadcast) +│ ├── 06_terminal.go / _test.go (unexported terminal + newTerminal) +│ ├── fanout.go (unexported fanIn + newFanIn + newFanOut) +│ ├── pool.go +│ ├── routing.go / routing_test.go (unexported router + newRouter) +│ ├── scanner.go +│ └── pipeline_test.go +│ └── sqladapter/ (NEW — SQL reference impl) +│ ├── dispatcher.go / _test.go +│ ├── writer.go / _test.go +│ ├── recovery.go / _test.go +│ └── contracts.go (Logger interface) +``` + +### 2. Contract changes + +The current `infra/harness/contracts.go` declares: + +```go +type Handler interface{ Handle(context.Context, ...any) } +type Listener interface{ Listen() } +``` + +These are structurally identical to the ones in `messaging/contracts.go`, so we delete the local versions and have `harness.New(...)` return `messaging.Handler` + `[]messaging.Listener`. The `entrypoint`'s `Close() error` + `Listen()` also already satisfies `messaging.ListenCloser`. + +The package's collaborator interfaces split into two groups: + +- **Exported** (caller supplies a real implementation via `Options.*`): `Writer`, `Dispatcher`, `Monitor`. These are the external boundaries of the pipeline. +- **Unexported** (internal to the package): `executor`, `applicator`, `serializer`, plus every pipeline-stage struct (`entrypoint`, `execution`, `serialization`, `persistence`, `completion`, `broadcast`, `terminal`), the `router`/`newRouter`, and the `fanIn`. The only way into the package is `New(...)` + `Options.*`. `Executor` and `Applicator` get unexported because callers never instantiate them directly — the pipeline discovers them reflectively via `router` from the domain types supplied through `Options.Types(...)`. + +The pluggable encoder lives as an unexported interface: + +```go +// serializer encodes a single message's value onto the supplied writer. The +// signature intentionally mirrors encoding/json/v2's MarshalWrite. Implementations +// must be safe for concurrent use — the pipeline runs multiple serialization workers. +type serializer interface { + Serialize(out io.Writer, in any) error +} +``` + +The `serialization` stage stops importing any encoder directly; its constructor takes a `serializer`: + +```go +func newSerialization(monitor Monitor, enc serializer, input, output chan *unitOfWork) *serialization +``` + +**No serializer implementations ship with this module.** Callers provide their own (for example, a thin wrapper around `encoding/json/v2`'s `MarshalWrite`, `encoding/json`'s `NewEncoder(w).Encode(v)`, protobuf, etc.). Keeping the type unexported forces callers to supply the collaborator through the functional option (see §8) rather than constructing a stage directly. + +### 3. Events and monitor + +The `Monitor` interface and its event types (`BatchInFlight`, `BatchComplete`, `UnitOfWorkInFlight`, `UnitOfWorkComplete`, `SerializationError`, `PersistenceError`, `BroadcastError`) move from `infra/contracts.go` to `handlers/harness/contracts.go` — all exported so callers can type-switch in their `Monitor.Track(any)` implementation. The error sentinels (`ErrSerialization`, `ErrPersistence`, `ErrBroadcast` — renamed from `ErrJSONSerialization` since we're format-agnostic) move with them. `Logger` is no longer needed inside `harness` itself (only `sqladapter` uses it); it moves into `sqladapter/contracts.go`. + +### 4. `Message` type placement + +`infra.Message` becomes `harness.Message` at `handlers/harness/message.go`. Its fields (`ID`, `Value`, `Type`, `Content`, `ContentType`, `ContentEncoding`, `Stored`, `Dispatched`) stay intact. The SQL adapter references `harness.Message` rather than duplicating. + +### 5. SQL adapter (`handlers/harness/sqladapter`) + +Direct port of `infra/dispatcher.go`, `infra/writer.go`, `infra/recovery.go`, with these edits: + +- `package infra` → `package sqladapter`. +- Import path `root/code/infra` (implied reference) drops; cross-package uses switch to `github.com/smarty/messaging/v3/handlers/harness`. +- `messaging.Connector` / `messaging.Dispatch` references resolve against the **same** module now (no path change needed — they already import `github.com/smarty/messaging/v3`). +- Package doc comment labels it as a reference implementation that targets the `Messages` table defined by `sqlmq/_schema_mysql.sql` in this same module (columns `id`, `dispatched`, `type`, `payload`). That schema coupling is not new — `sqlmq/dispatch_store.go` already writes to the same table. The adapter is simply a second reader/writer of the same schema. +- Preserve the existing `TODO` comments (double-encoding note in `dispatcher.go`, pagination note in `recovery.go`) — those are known issues, not something this import should silently fix. +- Keep `legacyWrite func(context.Context, *sql.Tx, ...any)` escape hatch on `Writer`, but document it as deprecated in the package comment. + +### 6. Test infrastructure + +The source tests use **gunit v2** (`github.com/smarty/gunit/v2`) along with `assert/better` + `assert/should`. The target module currently pins **gunit v1** (`github.com/smarty/gunit v1.6.0`). + +**Plan:** add `github.com/smarty/gunit/v2` to `go.mod` and port the tests with their v2 imports intact. The module will temporarily depend on both major versions. A later, separate initiative will migrate the rest of the module's tests from v1 to v2, at which point v1 can be dropped — that migration is out of scope for this import. + +If any imported test uses assertions that don't exist in v2 (or have different semantics), adapt as minimally as possible; do not rewrite assertions that already work. + +### 7. `go.mod` updates + +- Add `github.com/smarty/gunit/v2` (latest compatible version). Both v1 and v2 will be present until the future module-wide migration. +- `go.uber.org/goleak` is in source `go.sum` — confirm during import whether any ported test actually uses it; add only if needed. +- No other new dependencies. `io` and (if caller demonstrations are ever added in docs) stdlib-only. + +### 8. Functional-options wire-up + +The existing `New(ctx, monitor, executor, writer, dispatcher)` signature is replaced with the module's functional-options pattern. **Only `ctx` is positional**; every collaborator — including the domain types that drive routing — flows through options. The source `Executor`/`Applicator` interfaces become unexported (`executor`, `applicator`); the reflective `router` discovers real implementations from the domain types supplied via `Options.Types(...)` and the pipeline never asks the caller for an `Executor` directly. Following the convention in `handlers/retry/config.go` and `batch/config.go`: + +```go +// handlers/harness/config.go +package harness + +import ( + "context" + "io" + + "github.com/smarty/messaging/v3" +) + +func New(ctx context.Context, options ...option) (messaging.Handler, []messaging.Listener) { + var cfg configuration + for _, apply := range Options.defaults(options...) { + apply(&cfg) + } + return build(ctx, cfg) // internal wiring lives in pipeline.go +} + +var Options singleton + +type singleton struct{} +type option func(*configuration) + +type configuration struct { + Monitor Monitor + Serializer serializer + Writer Writer + Dispatcher Dispatcher + Types []any // domain types (handlers/applicators); passed to newRouter + BatchCapacity int // channel buffer, default 1024 + UnitSize int // max completions per unit, default 64 + SerializerCount int // fan-out worker count, default 4 +} + +// Types registers the domain objects whose Execute.../Apply... methods drive +// the pipeline. They are passed verbatim to newRouter(...) at build time. +func (singleton) Types(value ...any) option { return func(c *configuration) { c.Types = value } } +func (singleton) Monitor(value Monitor) option { return func(c *configuration) { c.Monitor = value } } +func (singleton) Serializer(value serializer) option { return func(c *configuration) { c.Serializer = value } } +func (singleton) Writer(value Writer) option { return func(c *configuration) { c.Writer = value } } +func (singleton) Dispatcher(value Dispatcher) option { return func(c *configuration) { c.Dispatcher = value } } +func (singleton) BatchCapacity(value int) option { return func(c *configuration) { c.BatchCapacity = value } } +func (singleton) UnitSize(value int) option { return func(c *configuration) { c.UnitSize = value } } +func (singleton) SerializerCount(value int) option { return func(c *configuration) { c.SerializerCount = value } } + +func (singleton) defaults(options ...option) []option { + var blank = nop{} + return append([]option{ + Options.Monitor(blank), + Options.Serializer(blank), + Options.Writer(blank), + Options.Dispatcher(blank), + Options.BatchCapacity(1024), + Options.UnitSize(64), + Options.SerializerCount(4), + }, options...) +} + +// nop satisfies every collaborator interface so New(...) can be called with +// zero options and still produce a runnable (if inert) pipeline. Callers +// override whichever collaborators they care about via Options.*. +type nop struct{} + +func (nop) Track(any) {} +func (nop) Serialize(io.Writer, any) error { return nil } +func (nop) Write(context.Context, ...any) error { return nil } +func (nop) Dispatch(context.Context, ...any) error { return nil } +func (nop) Execute(any, func(...any)) {} +``` + +Design notes: + +- `ctx` stays positional because the pipeline's lifetime is tied to it (see Trade-offs); everything else is an option. +- `Options.Types(...)` is the single entry point for registering domain behavior. `build(ctx, cfg)` constructs the internal router via `newRouter(cfg.Types...)` and passes it into the `execution` stage as its `executor` collaborator. Callers never hold a reference to the router. +- `Monitor`, `Serializer`, `Writer`, and `Dispatcher` all default to a shared `nop{}` implementation. A caller can invoke `New(ctx)` with zero options and get a runnable, inert pipeline; real deployments override whichever collaborators they care about via `Options.*`. This matches the defaulting style in `handlers/retry` and `handlers/transactional` (where `Logger` and `Monitor` default to nop). +- Buffer/worker tunables, previously hard-coded, are now options with the existing values as defaults. +- `build(ctx, cfg)` internally performs the current `New(...)`'s channel wiring; it lives in `pipeline.go` and stays unexported. +- Every pipeline-stage struct and its constructor is unexported (`entrypoint`/`newEntrypoint`, `execution`/`newExecution`, `serialization`/`newSerialization`, `persistence`/`newPersistence`, `completion`/`newCompletion`, `broadcast`/`newBroadcast`, `terminal`/`newTerminal`, `fanIn`/`newFanIn`/`newFanOut`, `router`/`newRouter`). The only way into the package is `New(...)` + `Options.*`. + +### 9. Alternatives considered + +- **Flat layout, no sqladapter split.** Rejected: conflates a generic pipeline with a schema-coupled reference impl; callers targeting a different schema would be forced to copy-paste rather than compose. +- **Top-level `/harness` package (sibling to `handlers/`).** Rejected: the user prefers nesting under `handlers/` where similar composable handler patterns live. +- **Absorb `Message`, `Monitor`, events into the root `messaging` package.** Rejected: these are harness-specific concepts (the root `Delivery`/`Dispatch` types are the message abstraction for the rest of the module); promoting them would blur the boundary. +- **Keep `jsonv2` build tag.** Rejected in favor of pluggable serialization. A caller who wants jsonv2 writes a short adapter; the pipeline stays encoder-agnostic. +- **Ship a JSON serializer helper subpackage.** Rejected per /replot: this module deliberately provides no serializer implementations. Callers own encoding. +- **Positional-argument constructor.** Rejected in favor of functional options for consistency with `retry`, `batch`, and `transactional`. +- **Expose stage types / `Executor` / `Router`.** Rejected: callers have no reason to construct a single stage or wire their own router. Keeping everything except `New`, `Options`, `Message`, the collaborator interfaces, and the event/error types unexported narrows the supported surface and lets us refactor internals without breaking users. +- **Positional `executor` argument.** Rejected: the source's `Executor` interface is really just "anything whose `Execute...(msg, broadcast)` methods the reflective router can see." Exposing a positional `Executor` parameter implied that callers build their own; `Options.Types(...)` more honestly describes the actual input (a list of domain objects) and lets the router stay private. + +## Trade-offs & Risks + +- **Context handling.** `Persistence` and `Broadcast` both capture the `ctx` supplied to `New(...)` and use it for every downstream Write/Dispatch call — so the pipeline's lifetime is tied to a single root context. This matches the source but is worth confirming explicitly; if the v3 conventions expect per-handle contexts to flow through, the stage signatures need adjustment. **Open question** for the user — proposal currently preserves source behavior. +- **Retry-forever semantics.** Persistence and Broadcast retry indefinitely with a 1-second sleep (and TODOs for exponential backoff). Imported as-is. `handlers/retry` already exists in the target module and uses more sophisticated logic; over time we may want to delegate to it, but not in this import. +- **Channel capacity defaults.** Back-pressure into the entrypoint's `Handle` (which holds a read lock while sending) can block callers under heavy load. Now tunable via `Options.BatchCapacity`, but the default of 1024 should be documented with this characteristic. +- **`reflect`-based routing.** The internal router's `Execute` mutates a shared `exclusions` slice without locking — fine in the current pipeline (routing runs inside `execution` which a single goroutine calls per message). Because the router is now unexported and constructed once per `New(...)` call, the sharing concern goes away. Add a doc comment on the internal type anyway so future contributors don't regress this invariant. +- **Dual gunit majors.** Temporarily carrying both `github.com/smarty/gunit` and `github.com/smarty/gunit/v2` increases `go.sum` noise and means new tests in *this* import use v2 while older tests elsewhere in the module still use v1. The divergence is explicit and scoped; the follow-up module-wide upgrade resolves it. +- **Silent-by-default collaborators.** Because `Serializer`, `Writer`, and `Dispatcher` all default to a `nop{}` that returns `nil`, forgetting to wire a real implementation produces a pipeline that silently drops work rather than surfacing an error. Likewise, forgetting `Options.Types(...)` produces a pipeline whose router has no registered handlers. Callers must remember to set these. Mitigate by documenting this in the package doc comment and the `New(...)` godoc, and by making the `Options.*` entries the obvious next step in any example. + +## Implementation Checklist + +### Phase 1: Scaffolding and contracts + +- [ ] Create directory `handlers/harness/` and `handlers/harness/sqladapter/`. +- [ ] Add `github.com/smarty/gunit/v2` to `go.mod`; run `go mod tidy`. +- [ ] Write `handlers/harness/contracts.go` with the **exported** surface (`Writer`, `Dispatcher`, `Monitor`, event structs `BatchInFlight`, `BatchComplete`, `UnitOfWorkInFlight`, `UnitOfWorkComplete`, `SerializationError`, `PersistenceError`, `BroadcastError`, sentinels `ErrSerialization`, `ErrPersistence`, `ErrBroadcast`) plus the **unexported** internal interfaces (`executor`, `applicator`, `serializer`) and value types (`batch`, `unitOfWork`). +- [ ] Write `handlers/harness/message.go` — copy `Message` struct with doc comments intact. +- [ ] Write `handlers/harness/pool.go` — copy verbatim (no external deps). +- [ ] Write `handlers/harness/scanner.go` — copy verbatim. +- [ ] Write `handlers/harness/fanout.go` — rename `fanIn` / `newFanIn` / `newFanOut` to stay unexported (already lowercase in source). +- [ ] Run `make compile` — confirm the package compiles (no tests yet). + +### Phase 2: Port stages bottom-up, TDD each one + +Work stage-by-stage from the terminal stage (simplest) upward, since downstream stages have no dependencies on upstream stages. All tests use `gunit/v2` imports as-is from source. **All stage types and constructors are renamed to unexported forms** (`Terminal` → `terminal`, `NewTerminal` → `newTerminal`, etc.); since the tests live in the same package, `_test.go` files can see them. + +- [ ] Copy `06_terminal_test.go` from source; rename referenced types to lowercase. Run the terminal test — expect **failure** (`terminal` type doesn't exist yet in this package). +- [ ] Port `06_terminal.go` as `terminal` / `newTerminal`. Run tests; confirm passing. +- [ ] Copy `04_completion_test.go`; lowercase the type references. Run — expect failure. +- [ ] Port `04_completion.go` as `completion` / `newCompletion`. Run tests; confirm passing. +- [ ] Copy `05_broadcast_test.go`; lowercase the type references. Run — expect failure. +- [ ] Port `05_broadcast.go` as `broadcast` / `newBroadcast`. Run; confirm passing. +- [ ] Copy `03_persistence_test.go`; lowercase the type references. Run — expect failure. +- [ ] Port `03_persistence.go` as `persistence` / `newPersistence`. Run; confirm passing. +- [ ] **Rewrite** `02_serialization_test.go` to use a fake `serializer` (not jsonv2). Test should cover: success path writes to `message.Content`; serializer error is reported via monitor as `SerializationError` with `ErrSerialization` wrapped. Run — expect failure. +- [ ] Port `02_serialization.go` as `serialization` / `newSerialization` with the new `Serialize(io.Writer, any) error` signature; drop the `//go:build goexperiment.jsonv2` tag. Run; confirm passing. +- [ ] Copy `01_execution_test.go`; lowercase the type references. Run — expect failure. +- [ ] Port `01_execution.go` as `execution` / `newExecution`. Run; confirm passing. +- [ ] Copy `00_entrypoint_test.go`; lowercase the type references. Confirm it exercises `Close` + `Listen` semantics. Run — expect failure. +- [ ] Port `00_entrypoint.go` as `entrypoint` / `newEntrypoint`. Run; confirm passing. + +### Phase 3: Routing, pipeline, and functional-options config + +- [ ] Copy `routing_test.go`; lowercase references (`Router` → `router`, `NewRouter` → `newRouter`). Run — expect failure. +- [ ] Port `routing.go` as `router` / `newRouter` with unexported `executor` / `applicator` interfaces; update `scanner.go`'s signature to match. Run; confirm passing. +- [ ] Write `handlers/harness/pipeline.go` as the unexported `build(ctx, cfg)` function; it constructs all the channels, calls `newRouter(cfg.Types...)`, wires every stage, and returns `messaging.Handler` + `[]messaging.Listener`. +- [ ] Write `handlers/harness/config.go` with `New(ctx, options...)` (no positional executor), `Options singleton`, `option` type, `configuration` struct (including `Types []any`), and per-option setters per §8, including `Options.Types(...)`. +- [ ] Write a `config_test.go` that asserts defaults (`BatchCapacity=1024`, `UnitSize=64`, `SerializerCount=4`, and that `Monitor`, `Serializer`, `Writer`, and `Dispatcher` all default to the shared `nop{}` and behave inertly when the pipeline runs with no options supplied). Also assert that `Options.Types(...)` populates `configuration.Types` verbatim. +- [ ] Adapt the source `pipeline_test.go` to call the new functional-options `New(...)`: the fixture registers itself via `Options.Types(this)` (it implements the `Execute...` method), and supplies fake `Writer`/`Dispatcher`/`serializer`/`Monitor` via `Options.*`. Run — expect failure if anything is still misaligned, then make green. +- [ ] Run the full harness test suite with `-race`; confirm no goroutine leaks and all tests pass. + +### Phase 4: SQL adapter + +- [ ] Copy `infra/dispatcher_test.go` to `handlers/harness/sqladapter/dispatcher_test.go`. Rewrite imports (`package infra` → `sqladapter`, `infra.Message` → `harness.Message`; gunit imports stay v2). Run — expect failure. +- [ ] Port `dispatcher.go` to `sqladapter/dispatcher.go`. Add package-level doc comment labeling it as a reference implementation targeting the `Messages` table defined by `sqlmq/_schema_mysql.sql`. Preserve existing TODOs. Run; confirm passing. +- [ ] Copy `writer_test.go` → `sqladapter/writer_test.go`, rewrite imports. Run — expect failure. +- [ ] Port `writer.go`. Preserve `legacyWrite` escape hatch with a deprecation note in the godoc. Run; confirm passing. +- [ ] Copy `recovery_test.go` → `sqladapter/recovery_test.go`, rewrite imports. Run — expect failure. +- [ ] Port `recovery.go`. Preserve existing TODOs about pagination and Listener conversion. Run; confirm passing. +- [ ] Add `sqladapter/contracts.go` with the `Logger` interface (moved from `infra/contracts.go`). + +### Phase 5: Module hygiene + +- [ ] Run `make test` — full module test suite with `-race -covermode=atomic`. Confirm green. +- [ ] Run `go mod tidy`. Confirm the only new direct dep is `github.com/smarty/gunit/v2`. +- [ ] Inspect `go.sum` diff — confirm no surprising indirect additions. +- [ ] Grep the new packages for references to `root/code/infra` — confirm zero. +- [ ] Grep the new packages for any `goexperiment.jsonv2` build tags — confirm zero. +- [ ] Add short package-level doc comments (`// Package harness provides a staged, store-and-forward message-handling pipeline...` etc.) on `harness` and `sqladapter`. +- [ ] Self-review diff for any stray `package infra` / wrong package declarations, unused imports, and leftover `TODO: pool ...` comments that should stay vs. be addressed now (keep them — they're load-bearing signals for future work). \ No newline at end of file From d7fd035b077d77118e303519484b84f9b8200d2b Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Thu, 7 May 2026 16:52:29 -0600 Subject: [PATCH 02/50] Initial implementation of harness handler pipeline. --- ...-05-07_13-06-13-proposal-import-harness.md | 88 ++++---- go.mod | 4 + go.sum | 6 + handlers/harness/00_entrypoint.go | 69 ++++++ handlers/harness/00_entrypoint_test.go | 116 ++++++++++ handlers/harness/01_execution.go | 51 +++++ handlers/harness/01_execution_test.go | 127 +++++++++++ handlers/harness/02_serialization.go | 40 ++++ handlers/harness/02_serialization_test.go | 133 +++++++++++ handlers/harness/03_persistence.go | 54 +++++ handlers/harness/03_persistence_test.go | 143 ++++++++++++ handlers/harness/04_completion.go | 23 ++ handlers/harness/04_completion_test.go | 79 +++++++ handlers/harness/05_broadcast.go | 54 +++++ handlers/harness/05_broadcast_test.go | 143 ++++++++++++ handlers/harness/06_terminal.go | 14 ++ handlers/harness/06_terminal_test.go | 51 +++++ handlers/harness/config.go | 99 +++++++++ handlers/harness/config_test.go | 90 ++++++++ handlers/harness/contracts.go | 77 +++++++ handlers/harness/fanout.go | 46 ++++ handlers/harness/message.go | 37 ++++ handlers/harness/pipeline.go | 49 +++++ handlers/harness/pipeline_test.go | 204 +++++++++++++++++ handlers/harness/pool.go | 13 ++ handlers/harness/routing.go | 65 ++++++ handlers/harness/routing_test.go | 208 ++++++++++++++++++ handlers/harness/scanner.go | 48 ++++ handlers/harness/sqladapter/contracts.go | 5 + handlers/harness/sqladapter/dispatcher.go | 89 ++++++++ .../harness/sqladapter/dispatcher_test.go | 137 ++++++++++++ handlers/harness/sqladapter/recovery.go | 54 +++++ handlers/harness/sqladapter/recovery_test.go | 99 +++++++++ handlers/harness/sqladapter/testdb_test.go | 90 ++++++++ handlers/harness/sqladapter/writer.go | 95 ++++++++ handlers/harness/sqladapter/writer_test.go | 165 ++++++++++++++ 36 files changed, 2821 insertions(+), 44 deletions(-) create mode 100644 handlers/harness/00_entrypoint.go create mode 100644 handlers/harness/00_entrypoint_test.go create mode 100644 handlers/harness/01_execution.go create mode 100644 handlers/harness/01_execution_test.go create mode 100644 handlers/harness/02_serialization.go create mode 100644 handlers/harness/02_serialization_test.go create mode 100644 handlers/harness/03_persistence.go create mode 100644 handlers/harness/03_persistence_test.go create mode 100644 handlers/harness/04_completion.go create mode 100644 handlers/harness/04_completion_test.go create mode 100644 handlers/harness/05_broadcast.go create mode 100644 handlers/harness/05_broadcast_test.go create mode 100644 handlers/harness/06_terminal.go create mode 100644 handlers/harness/06_terminal_test.go create mode 100644 handlers/harness/config.go create mode 100644 handlers/harness/config_test.go create mode 100644 handlers/harness/contracts.go create mode 100644 handlers/harness/fanout.go create mode 100644 handlers/harness/message.go create mode 100644 handlers/harness/pipeline.go create mode 100644 handlers/harness/pipeline_test.go create mode 100644 handlers/harness/pool.go create mode 100644 handlers/harness/routing.go create mode 100644 handlers/harness/routing_test.go create mode 100644 handlers/harness/scanner.go create mode 100644 handlers/harness/sqladapter/contracts.go create mode 100644 handlers/harness/sqladapter/dispatcher.go create mode 100644 handlers/harness/sqladapter/dispatcher_test.go create mode 100644 handlers/harness/sqladapter/recovery.go create mode 100644 handlers/harness/sqladapter/recovery_test.go create mode 100644 handlers/harness/sqladapter/testdb_test.go create mode 100644 handlers/harness/sqladapter/writer.go create mode 100644 handlers/harness/sqladapter/writer_test.go diff --git a/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md b/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md index b685c5e..2869fda 100644 --- a/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md +++ b/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md @@ -8,7 +8,7 @@ type: plot ## Background -A second project (working copy under `billing-context/domain-transformation-phase-9-chunk-C/code/infra`) has grown a staged, pipeline-based message-handling "harness" that we now want to promote into the shared `github.com/smarty/messaging/v3` module so it can be reused. The source split is: +A second project (working copy under `-context/domain-transformation-phase-9-chunk-C/code/infra`) has grown a staged, pipeline-based message-handling "harness" that we now want to promote into the shared `github.com/smarty/messaging/v3` module so it can be reused. The source split is: - `infra/harness/*` — a generic, store-and-forward pipeline built from goroutine stages connected by buffered channels. Stages: `Entrypoint → Execution → Serialization (fan-out) → Persistence → Completion → Broadcast → Terminal`. Supporting code: `fanout.go`, `pool.go`, `routing.go`, `scanner.go`. - `infra/*` — supporting types and a **reference implementation** of the `Writer` / `Dispatcher` interfaces plus a `Recover` function, all coupled to the same MySQL `Messages` table that `sqlmq/_schema_mysql.sql` already defines (`id`, `dispatched`, `type`, `payload`). @@ -232,60 +232,60 @@ Design notes: ### Phase 1: Scaffolding and contracts -- [ ] Create directory `handlers/harness/` and `handlers/harness/sqladapter/`. -- [ ] Add `github.com/smarty/gunit/v2` to `go.mod`; run `go mod tidy`. -- [ ] Write `handlers/harness/contracts.go` with the **exported** surface (`Writer`, `Dispatcher`, `Monitor`, event structs `BatchInFlight`, `BatchComplete`, `UnitOfWorkInFlight`, `UnitOfWorkComplete`, `SerializationError`, `PersistenceError`, `BroadcastError`, sentinels `ErrSerialization`, `ErrPersistence`, `ErrBroadcast`) plus the **unexported** internal interfaces (`executor`, `applicator`, `serializer`) and value types (`batch`, `unitOfWork`). -- [ ] Write `handlers/harness/message.go` — copy `Message` struct with doc comments intact. -- [ ] Write `handlers/harness/pool.go` — copy verbatim (no external deps). -- [ ] Write `handlers/harness/scanner.go` — copy verbatim. -- [ ] Write `handlers/harness/fanout.go` — rename `fanIn` / `newFanIn` / `newFanOut` to stay unexported (already lowercase in source). -- [ ] Run `make compile` — confirm the package compiles (no tests yet). +- [x] Create directory `handlers/harness/` and `handlers/harness/sqladapter/`. +- [x] Add `github.com/smarty/gunit/v2` to `go.mod`; run `go mod tidy`. (Temporarily removed by tidy with no importers; will return automatically once Phase 2 tests are ported.) +- [x] Write `handlers/harness/contracts.go` with the **exported** surface (`Writer`, `Dispatcher`, `Monitor`, event structs `BatchInFlight`, `BatchComplete`, `UnitOfWorkInFlight`, `UnitOfWorkComplete`, `SerializationError`, `PersistenceError`, `BroadcastError`, sentinels `ErrSerialization`, `ErrPersistence`, `ErrBroadcast`) plus the **unexported** internal interfaces (`executor`, `applicator`, `serializer`) and value types (`batch`, `unitOfWork`). +- [x] Write `handlers/harness/message.go` — copy `Message` struct with doc comments intact. +- [x] Write `handlers/harness/pool.go` — copy verbatim (no external deps). +- [x] Write `handlers/harness/scanner.go` — copy verbatim (signatures adjusted to use unexported `executor`/`applicator`). +- [x] Write `handlers/harness/fanout.go` — `fanIn` / `newFanIn` / `newFanOut` stay unexported; `stationFactory` now returns `messaging.Listener` and uses unexported `unitOfWork`. +- [x] Run `make compile` — confirm the package compiles (no tests yet). ### Phase 2: Port stages bottom-up, TDD each one Work stage-by-stage from the terminal stage (simplest) upward, since downstream stages have no dependencies on upstream stages. All tests use `gunit/v2` imports as-is from source. **All stage types and constructors are renamed to unexported forms** (`Terminal` → `terminal`, `NewTerminal` → `newTerminal`, etc.); since the tests live in the same package, `_test.go` files can see them. -- [ ] Copy `06_terminal_test.go` from source; rename referenced types to lowercase. Run the terminal test — expect **failure** (`terminal` type doesn't exist yet in this package). -- [ ] Port `06_terminal.go` as `terminal` / `newTerminal`. Run tests; confirm passing. -- [ ] Copy `04_completion_test.go`; lowercase the type references. Run — expect failure. -- [ ] Port `04_completion.go` as `completion` / `newCompletion`. Run tests; confirm passing. -- [ ] Copy `05_broadcast_test.go`; lowercase the type references. Run — expect failure. -- [ ] Port `05_broadcast.go` as `broadcast` / `newBroadcast`. Run; confirm passing. -- [ ] Copy `03_persistence_test.go`; lowercase the type references. Run — expect failure. -- [ ] Port `03_persistence.go` as `persistence` / `newPersistence`. Run; confirm passing. -- [ ] **Rewrite** `02_serialization_test.go` to use a fake `serializer` (not jsonv2). Test should cover: success path writes to `message.Content`; serializer error is reported via monitor as `SerializationError` with `ErrSerialization` wrapped. Run — expect failure. -- [ ] Port `02_serialization.go` as `serialization` / `newSerialization` with the new `Serialize(io.Writer, any) error` signature; drop the `//go:build goexperiment.jsonv2` tag. Run; confirm passing. -- [ ] Copy `01_execution_test.go`; lowercase the type references. Run — expect failure. -- [ ] Port `01_execution.go` as `execution` / `newExecution`. Run; confirm passing. -- [ ] Copy `00_entrypoint_test.go`; lowercase the type references. Confirm it exercises `Close` + `Listen` semantics. Run — expect failure. -- [ ] Port `00_entrypoint.go` as `entrypoint` / `newEntrypoint`. Run; confirm passing. +- [x] Copy `06_terminal_test.go` from source; rename referenced types to lowercase. Run the terminal test — expect **failure** (`terminal` type doesn't exist yet in this package). +- [x] Port `06_terminal.go` as `terminal` / `newTerminal`. Run tests; confirm passing. +- [x] Copy `04_completion_test.go`; lowercase the type references. Run — expect failure. +- [x] Port `04_completion.go` as `completion` / `newCompletion`. Run tests; confirm passing. +- [x] Copy `05_broadcast_test.go`; lowercase the type references. Run — expect failure. +- [x] Port `05_broadcast.go` as `broadcast` / `newBroadcast`. Run; confirm passing. +- [x] Copy `03_persistence_test.go`; lowercase the type references. Run — expect failure. +- [x] Port `03_persistence.go` as `persistence` / `newPersistence`. Run; confirm passing. +- [x] **Rewrite** `02_serialization_test.go` to use a fake `serializer` (not jsonv2). Test should cover: success path writes to `message.Content`; serializer error is reported via monitor as `SerializationError` with `ErrSerialization` wrapped. Run — expect failure. +- [x] Port `02_serialization.go` as `serialization` / `newSerialization` with the new `Serialize(io.Writer, any) error` signature; drop the `//go:build goexperiment.jsonv2` tag. Run; confirm passing. +- [x] Copy `01_execution_test.go`; lowercase the type references. Run — expect failure. +- [x] Port `01_execution.go` as `execution` / `newExecution`. Run; confirm passing. +- [x] Copy `00_entrypoint_test.go`; lowercase the type references. Confirm it exercises `Close` + `Listen` semantics. Run — expect failure. +- [x] Port `00_entrypoint.go` as `entrypoint` / `newEntrypoint`. Run; confirm passing. ### Phase 3: Routing, pipeline, and functional-options config -- [ ] Copy `routing_test.go`; lowercase references (`Router` → `router`, `NewRouter` → `newRouter`). Run — expect failure. -- [ ] Port `routing.go` as `router` / `newRouter` with unexported `executor` / `applicator` interfaces; update `scanner.go`'s signature to match. Run; confirm passing. -- [ ] Write `handlers/harness/pipeline.go` as the unexported `build(ctx, cfg)` function; it constructs all the channels, calls `newRouter(cfg.Types...)`, wires every stage, and returns `messaging.Handler` + `[]messaging.Listener`. -- [ ] Write `handlers/harness/config.go` with `New(ctx, options...)` (no positional executor), `Options singleton`, `option` type, `configuration` struct (including `Types []any`), and per-option setters per §8, including `Options.Types(...)`. -- [ ] Write a `config_test.go` that asserts defaults (`BatchCapacity=1024`, `UnitSize=64`, `SerializerCount=4`, and that `Monitor`, `Serializer`, `Writer`, and `Dispatcher` all default to the shared `nop{}` and behave inertly when the pipeline runs with no options supplied). Also assert that `Options.Types(...)` populates `configuration.Types` verbatim. -- [ ] Adapt the source `pipeline_test.go` to call the new functional-options `New(...)`: the fixture registers itself via `Options.Types(this)` (it implements the `Execute...` method), and supplies fake `Writer`/`Dispatcher`/`serializer`/`Monitor` via `Options.*`. Run — expect failure if anything is still misaligned, then make green. -- [ ] Run the full harness test suite with `-race`; confirm no goroutine leaks and all tests pass. +- [x] Copy `routing_test.go`; lowercase references (`Router` → `router`, `NewRouter` → `newRouter`). Run — expect failure. +- [x] Port `routing.go` as `router` / `newRouter` with unexported `executor` / `applicator` interfaces; update `scanner.go`'s signature to match. Run; confirm passing. +- [x] Write `handlers/harness/pipeline.go` as the unexported `build(ctx, cfg)` function; it constructs all the channels, calls `newRouter(cfg.Types...)`, wires every stage, and returns `messaging.Handler` + `[]messaging.Listener`. +- [x] Write `handlers/harness/config.go` with `New(ctx, options...)` (no positional executor), `Options singleton`, `option` type, `configuration` struct (including `Types []any`), and per-option setters per §8, including `Options.Types(...)`. +- [x] Write a `config_test.go` that asserts defaults (`BatchCapacity=1024`, `UnitSize=64`, `SerializerCount=4`, and that `Monitor`, `Serializer`, `Writer`, and `Dispatcher` all default to the shared `nop{}` and behave inertly when the pipeline runs with no options supplied). Also assert that `Options.Types(...)` populates `configuration.Types` verbatim. +- [x] Adapt the source `pipeline_test.go` to call the new functional-options `New(...)`: the fixture registers itself via `Options.Types(this)` (it implements the `Execute...` method), and supplies fake `Writer`/`Dispatcher`/`serializer`/`Monitor` via `Options.*`. Run — expect failure if anything is still misaligned, then make green. +- [x] Run the full harness test suite with `-race`; confirm no goroutine leaks and all tests pass. ### Phase 4: SQL adapter -- [ ] Copy `infra/dispatcher_test.go` to `handlers/harness/sqladapter/dispatcher_test.go`. Rewrite imports (`package infra` → `sqladapter`, `infra.Message` → `harness.Message`; gunit imports stay v2). Run — expect failure. -- [ ] Port `dispatcher.go` to `sqladapter/dispatcher.go`. Add package-level doc comment labeling it as a reference implementation targeting the `Messages` table defined by `sqlmq/_schema_mysql.sql`. Preserve existing TODOs. Run; confirm passing. -- [ ] Copy `writer_test.go` → `sqladapter/writer_test.go`, rewrite imports. Run — expect failure. -- [ ] Port `writer.go`. Preserve `legacyWrite` escape hatch with a deprecation note in the godoc. Run; confirm passing. -- [ ] Copy `recovery_test.go` → `sqladapter/recovery_test.go`, rewrite imports. Run — expect failure. -- [ ] Port `recovery.go`. Preserve existing TODOs about pagination and Listener conversion. Run; confirm passing. -- [ ] Add `sqladapter/contracts.go` with the `Logger` interface (moved from `infra/contracts.go`). +- [x] Copy `infra/dispatcher_test.go` to `handlers/harness/sqladapter/dispatcher_test.go`. Rewrite imports (`package infra` → `sqladapter`, `infra.Message` → `harness.Message`; gunit imports stay v2). Also added `testdb_test.go` with local `openTestDatabase`/`ensureDatabaseReadiness` helpers (pointing at `sqlmq/_schema_mysql.sql`) since the source's `db-connector` dep is out of scope for this module. Run — expect failure. +- [x] Port `dispatcher.go` to `sqladapter/dispatcher.go`. Add package-level doc comment labeling it as a reference implementation targeting the `Messages` table defined by `sqlmq/_schema_mysql.sql`. Preserve existing TODOs. Run; confirm passing. +- [x] Copy `writer_test.go` → `sqladapter/writer_test.go`, rewrite imports. Replaced the external `billing` registry types and `openTestDatabase` dep with local test-only `orderReceived`/`orderApproved` structs and shared helpers from `testdb_test.go`. Run — expect failure. +- [x] Port `writer.go`. Preserve `legacyWrite` escape hatch with a deprecation note in the godoc. Run; confirm passing. +- [x] Copy `recovery_test.go` → `sqladapter/recovery_test.go`, rewrite imports. Run — expect failure. +- [x] Port `recovery.go`. Preserve existing TODOs about pagination and Listener conversion. Run; confirm passing. +- [x] Add `sqladapter/contracts.go` with the `Logger` interface (moved from `infra/contracts.go`). (Added earlier alongside dispatcher.go so it would compile.) ### Phase 5: Module hygiene -- [ ] Run `make test` — full module test suite with `-race -covermode=atomic`. Confirm green. -- [ ] Run `go mod tidy`. Confirm the only new direct dep is `github.com/smarty/gunit/v2`. -- [ ] Inspect `go.sum` diff — confirm no surprising indirect additions. -- [ ] Grep the new packages for references to `root/code/infra` — confirm zero. -- [ ] Grep the new packages for any `goexperiment.jsonv2` build tags — confirm zero. -- [ ] Add short package-level doc comments (`// Package harness provides a staged, store-and-forward message-handling pipeline...` etc.) on `harness` and `sqladapter`. -- [ ] Self-review diff for any stray `package infra` / wrong package declarations, unused imports, and leftover `TODO: pool ...` comments that should stay vs. be addressed now (keep them — they're load-bearing signals for future work). \ No newline at end of file +- [x] Run `make test` — full module test suite with `-race -covermode=atomic`. Confirm green. (All packages pass; harness 99.5%, sqladapter 0% under `-short` since its integration tests require a live MySQL — they pass end-to-end against a local DB when run without `-short`.) +- [x] Run `go mod tidy`. Confirms two new direct deps: `github.com/smarty/gunit/v2` (planned) and `github.com/go-sql-driver/mysql` (test-only driver added during Phase 4 when we chose not to bring in `db-connector` — imported via `testdb_test.go`'s `_` alias, so it participates only in test builds). One new indirect: `filippo.io/edwards25519` (transitively required by the MySQL driver). +- [x] Inspect `go.sum` diff — only `filippo.io/edwards25519` and gunit/v2 additions, both explained by the items above. +- [x] Grep the new packages for references to `root/code/infra` — confirm zero. +- [x] Grep the new packages for any `goexperiment.jsonv2` build tags — confirm zero. +- [x] Add short package-level doc comments (`// Package harness provides a staged, store-and-forward message-handling pipeline...` etc.) on `harness` and `sqladapter`. (`harness` on config.go, `sqladapter` on dispatcher.go.) +- [x] Self-review diff for any stray `package infra` / wrong package declarations, unused imports, and leftover `TODO: pool ...` comments that should stay vs. be addressed now (keep them — they're load-bearing signals for future work). \ No newline at end of file diff --git a/go.mod b/go.mod index c5db3f0..ee048a7 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,10 @@ module github.com/smarty/messaging/v3 go 1.25 require ( + github.com/go-sql-driver/mysql v1.10.0 github.com/rabbitmq/amqp091-go v1.10.0 github.com/smarty/gunit v1.6.0 + github.com/smarty/gunit/v2 v2.1.0 ) + +require filippo.io/edwards25519 v1.2.0 // indirect diff --git a/go.sum b/go.sum index efb676e..80bb6e0 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,12 @@ +filippo.io/edwards25519 v1.2.0 h1:crnVqOiS4jqYleHd9vaKZ+HKtHfllngJIiOpNpoJsjo= +filippo.io/edwards25519 v1.2.0/go.mod h1:xzAOLCNug/yB62zG1bQ8uziwrIqIuxhctzJT18Q77mc= +github.com/go-sql-driver/mysql v1.10.0 h1:Q+1LV8DkHJvSYAdR83XzuhDaTykuDx0l6fkXxoWCWfw= +github.com/go-sql-driver/mysql v1.10.0/go.mod h1:M+cqaI7+xxXGG9swrdeUIoPG3Y3KCkF0pZej+SK+nWk= github.com/rabbitmq/amqp091-go v1.10.0 h1:STpn5XsHlHGcecLmMFCtg7mqq0RnD+zFr4uzukfVhBw= github.com/rabbitmq/amqp091-go v1.10.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o= github.com/smarty/gunit v1.6.0 h1:27yDmXz5ydI6bYN0A1ltJvtekRY6H3bQJZz0ifJIeVY= github.com/smarty/gunit v1.6.0/go.mod h1:4kEWyZ1xFTEwkEfCpjmIRejP9CHn2Q9F4NP6SmAR+fg= +github.com/smarty/gunit/v2 v2.1.0 h1:orbuo+Y7i50cI0oBYlpo1+crelNTX68PW41IdemT8Xs= +github.com/smarty/gunit/v2 v2.1.0/go.mod h1:8zgLMlQPLDFO5SVz7TrnUS1ADFiHHMChB8zPIQ4R4Ys= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= diff --git a/handlers/harness/00_entrypoint.go b/handlers/harness/00_entrypoint.go new file mode 100644 index 0000000..2042581 --- /dev/null +++ b/handlers/harness/00_entrypoint.go @@ -0,0 +1,69 @@ +package harness + +import ( + "context" + "sync" +) + +type entrypoint struct { + monitor Monitor + waiters *poolT[*sync.WaitGroup] + batches *poolT[*batch] + work chan *batch + lock *sync.RWMutex + closed bool + done chan struct{} +} + +func newEntrypoint(monitor Monitor, work chan *batch) *entrypoint { + return &entrypoint{ + monitor: monitor, + waiters: newPoolT(newT[sync.WaitGroup]), + batches: newPoolT(newT[batch]), + work: work, + lock: new(sync.RWMutex), + done: make(chan struct{}), + } +} + +func (this *entrypoint) Handle(ctx context.Context, messages ...any) { + waiter := this.waiters.Get() + defer this.waiters.Put(waiter) + waiter.Add(1) + defer waiter.Wait() + + item := this.batches.Get() + item.ctx = ctx + item.messages = messages + item.complete = func() { + waiter.Done() + this.monitor.Track(batchComplete) + this.batches.Put(item) + } + + this.lock.RLock() + if !this.closed { + this.work <- item + this.monitor.Track(batchInFlight) + } + this.lock.RUnlock() +} + +// Listen blocks until Close is called so the entrypoint can be added as a dominoes listener. +// This guarantees Close is invoked during shutdown, which closes the work channel and lets the +// downstream pipeline stages drain naturally. +func (this *entrypoint) Listen() { <-this.done } + +func (this *entrypoint) Close() error { + this.lock.Lock() + if !this.closed { + close(this.work) + close(this.done) + this.closed = true + } + this.lock.Unlock() + return nil +} + +var batchInFlight BatchInFlight +var batchComplete BatchComplete diff --git a/handlers/harness/00_entrypoint_test.go b/handlers/harness/00_entrypoint_test.go new file mode 100644 index 0000000..a684ab3 --- /dev/null +++ b/handlers/harness/00_entrypoint_test.go @@ -0,0 +1,116 @@ +package harness + +import ( + "context" + "io" + "sync" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestEntrypointFixture(t *testing.T) { + gunit.Run(new(EntrypointFixture), t) +} + +type EntrypointFixture struct { + *gunit.Fixture + ctx context.Context + work chan *batch + subject *entrypoint + + trackMu sync.Mutex + tracked []any +} + +func (this *EntrypointFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) + this.work = make(chan *batch, 4) + this.subject = newEntrypoint(this, this.work) +} + +func (this *EntrypointFixture) Track(observation any) { + this.trackMu.Lock() + defer this.trackMu.Unlock() + this.tracked = append(this.tracked, observation) +} + +func (this *EntrypointFixture) TestImplementsCloser() { + var _ io.Closer = this.subject +} + +func (this *EntrypointFixture) TestHandlePushesBatchAndBlocksUntilCompletion() { + done := make(chan struct{}) + go func() { + this.subject.Handle(this.ctx, "msg-1", "msg-2") + close(done) + }() + + item := <-this.work + this.So(item.ctx.Value("testing"), should.Equal, this.Name()) + this.So(item.messages, should.Equal, []any{"msg-1", "msg-2"}) + + select { + case <-done: + this.Fatal("Handle returned before complete() was invoked") + default: + } + + item.complete() + <-done + + this.So(this.tracked, should.HaveLength, 2) + this.So(this.tracked, should.Contain, BatchInFlight{}) + this.So(this.tracked, should.Contain, BatchComplete{}) +} + +func (this *EntrypointFixture) TestHandleSerializesMultipleConcurrentCalls() { + done := make(chan struct{}, 3) + go func() { this.subject.Handle(this.ctx, "a"); done <- struct{}{} }() + go func() { this.subject.Handle(this.ctx, "b"); done <- struct{}{} }() + go func() { this.subject.Handle(this.ctx, "c"); done <- struct{}{} }() + + for range 3 { + item := <-this.work + this.So(item.ctx.Value("testing"), should.Equal, this.Name()) + item.complete() + <-done + } + + this.So(this.tracked, should.HaveLength, 6) + var inFlight int + for _, observation := range this.tracked { + switch observation.(type) { + case BatchInFlight: + inFlight++ + case BatchComplete: + inFlight-- + default: + this.Fatal("Unexpected observation:", observation) + } + } + this.So(inFlight, should.Equal, 0) +} + +func (this *EntrypointFixture) TestCloseReleasesListenAndClosesWorkChannel() { + listened := make(chan struct{}) + go func() { + this.subject.Listen() + close(listened) + }() + + this.So(this.subject.Close(), should.BeNil) + + <-listened + + _, open := <-this.work + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} + +func (this *EntrypointFixture) TestCloseIsIdempotent() { + this.So(this.subject.Close(), should.BeNil) + this.So(this.subject.Close(), should.BeNil) + this.So(this.tracked, should.BeEmpty) +} diff --git a/handlers/harness/01_execution.go b/handlers/harness/01_execution.go new file mode 100644 index 0000000..39e6862 --- /dev/null +++ b/handlers/harness/01_execution.go @@ -0,0 +1,51 @@ +package harness + +import ( + "bytes" +) + +type execution struct { + monitor Monitor + maxUnitSize int + input chan *batch + output chan *unitOfWork + executor executor +} + +func newExecution(monitor Monitor, maxUnitSize int, input chan *batch, output chan *unitOfWork, exec executor) *execution { + return &execution{ + monitor: monitor, + maxUnitSize: maxUnitSize, + input: input, + output: output, + executor: exec, + } +} + +func (this *execution) Listen() { + defer close(this.output) + + var unit *unitOfWork // TODO: pool for *unitOfWork (and this.monitor.Track(UnitOfWorkComplete{}) when putting back) + for item := range this.input { + if unit == nil { + unit = new(unitOfWork) + } + unit.completions = append(unit.completions, item.complete) + for _, message := range item.messages { + this.executor.Execute(message, func(outgoing ...any) { + for _, result := range outgoing { + record := &Message{Value: result, Content: bytes.NewBuffer(nil)} // TODO: pool for *Message + unit.results = append(unit.results, record) + } + }) + } + if len(unit.completions) < this.maxUnitSize && len(this.input) > 0 { + continue // more to do + } + this.monitor.Track(unitOfWorkInFlight) + this.output <- unit + unit = nil + } +} + +var unitOfWorkInFlight UnitOfWorkInFlight diff --git a/handlers/harness/01_execution_test.go b/handlers/harness/01_execution_test.go new file mode 100644 index 0000000..579ad99 --- /dev/null +++ b/handlers/harness/01_execution_test.go @@ -0,0 +1,127 @@ +package harness + +import ( + "sync" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestExecutionFixture(t *testing.T) { + gunit.Run(new(ExecutionFixture), t) +} + +type ExecutionFixture struct { + *gunit.Fixture + input chan *batch + output chan *unitOfWork + subject *execution + + executeMu sync.Mutex + executeCalls []any + executeOutputs [][]any + + tracked []any +} + +func (this *ExecutionFixture) Setup() { + this.input = make(chan *batch, 8) + this.output = make(chan *unitOfWork, 8) + this.subject = newExecution(this, 64, this.input, this.output, this) +} + +func (this *ExecutionFixture) Execute(message any, broadcast func(...any)) { + this.executeMu.Lock() + defer this.executeMu.Unlock() + this.executeCalls = append(this.executeCalls, message) + if len(this.executeOutputs) == 0 { + return + } + out := this.executeOutputs[0] + this.executeOutputs = this.executeOutputs[1:] + broadcast(out...) +} + +func (this *ExecutionFixture) Track(observation any) { + this.tracked = append(this.tracked, observation) +} + +func (this *ExecutionFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *ExecutionFixture) TestSingleBatchProducesUnitOfWork() { + this.executeOutputs = [][]any{{"result-A"}} + this.input <- &batch{messages: []any{"msg-A"}, complete: func() {}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(this.executeCalls, should.Equal, []any{"msg-A"}) + this.So(len(units[0].results), should.Equal, 1) + this.So(units[0].results[0].Value, should.Equal, "result-A") + this.So(len(units[0].completions), should.Equal, 1) + this.So(this.tracked, should.Equal, []any{unitOfWorkInFlight}) +} + +func (this *ExecutionFixture) TestUnitFlushesWhenMaxUnitSizeReached() { + this.executeOutputs = [][]any{{"r1"}, {"r2"}, {"r3"}} + this.input <- &batch{messages: []any{"m1"}, complete: func() {}} + this.input <- &batch{messages: []any{"m2"}, complete: func() {}} + this.input <- &batch{messages: []any{"m3"}, complete: func() {}} + close(this.input) + + this.subject = newExecution(this, 2, this.input, this.output, this) + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(len(units[0].completions), should.Equal, 2) + this.So(len(units[1].completions), should.Equal, 1) + this.So(this.executeCalls, should.Equal, []any{"m1", "m2", "m3"}) + this.So(this.tracked, should.Equal, []any{unitOfWorkInFlight, unitOfWorkInFlight}) +} + +func (this *ExecutionFixture) TestEmptyExecutorOutputProducesUnitWithNoResults() { + this.input <- &batch{messages: []any{"silent"}, complete: func() {}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(units[0].results, should.BeEmpty) + this.So(this.executeCalls, should.Equal, []any{"silent"}) + this.So(this.tracked, should.Equal, []any{unitOfWorkInFlight}) +} + +func (this *ExecutionFixture) TestExecutorBroadcastsMultipleResults() { + this.executeOutputs = [][]any{{"r1", "r2", "r3"}} + this.input <- &batch{messages: []any{"msg"}, complete: func() {}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(units[0].results), should.Equal, 3) + this.So(units[0].results[0].Value, should.Equal, "r1") + this.So(units[0].results[1].Value, should.Equal, "r2") + this.So(units[0].results[2].Value, should.Equal, "r3") + this.So(this.tracked, should.Equal, []any{unitOfWorkInFlight}) +} + +func (this *ExecutionFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} diff --git a/handlers/harness/02_serialization.go b/handlers/harness/02_serialization.go new file mode 100644 index 0000000..5fc18a8 --- /dev/null +++ b/handlers/harness/02_serialization.go @@ -0,0 +1,40 @@ +package harness + +import ( + "fmt" +) + +type serialization struct { + monitor Monitor + serializer serializer + input chan *unitOfWork + output chan *unitOfWork +} + +func newSerialization(monitor Monitor, enc serializer, input, output chan *unitOfWork) *serialization { + return &serialization{ + monitor: monitor, + serializer: enc, + input: input, + output: output, + } +} + +func (this *serialization) Listen() { + var failure SerializationError + + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + err := this.serializer.Serialize(message.Content, message.Value) + if err != nil { + failure.Error = fmt.Errorf("%w: %w", ErrSerialization, err) + failure.Value = message.Value + this.monitor.Track(failure) + } + failure.Error = nil + failure.Value = nil + } + this.output <- unit + } +} diff --git a/handlers/harness/02_serialization_test.go b/handlers/harness/02_serialization_test.go new file mode 100644 index 0000000..1bd15a8 --- /dev/null +++ b/handlers/harness/02_serialization_test.go @@ -0,0 +1,133 @@ +package harness + +import ( + "bytes" + "errors" + "io" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestSerializationFixture(t *testing.T) { + gunit.Run(new(SerializationFixture), t) +} + +type SerializationFixture struct { + *gunit.Fixture + input chan *unitOfWork + output chan *unitOfWork + subject *serialization + + tracked []any + + serializeCalls []any + serializeFail map[any]error +} + +func (this *SerializationFixture) Setup() { + this.input = make(chan *unitOfWork, 4) + this.output = make(chan *unitOfWork, 4) + this.serializeFail = make(map[any]error) + this.subject = newSerialization(this, this, this.input, this.output) +} + +func (this *SerializationFixture) Track(observation any) { + this.tracked = append(this.tracked, observation) +} + +func (this *SerializationFixture) Serialize(out io.Writer, in any) error { + this.serializeCalls = append(this.serializeCalls, in) + if err, ok := this.serializeFail[in]; ok { + return err + } + _, _ = out.Write([]byte("encoded:")) + switch v := in.(type) { + case string: + _, _ = out.Write([]byte(v)) + default: + _, _ = out.Write([]byte("value")) + } + return nil +} + +func (this *SerializationFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *SerializationFixture) TestSerializesEachResultValueIntoContent() { + unit := &unitOfWork{results: []*Message{ + {Value: "alpha", Content: bytes.NewBuffer(nil)}, + {Value: "beta", Content: bytes.NewBuffer(nil)}, + }} + this.input <- unit + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(units[0].results[0].Content.String(), should.Equal, "encoded:alpha") + this.So(units[0].results[1].Content.String(), should.Equal, "encoded:beta") + this.So(this.serializeCalls, should.Equal, []any{"alpha", "beta"}) + this.So(this.tracked, should.BeEmpty) +} + +func (this *SerializationFixture) TestForwardsUnitsInOrder() { + this.input <- &unitOfWork{results: []*Message{{Value: "one", Content: bytes.NewBuffer(nil)}}} + this.input <- &unitOfWork{results: []*Message{{Value: "two", Content: bytes.NewBuffer(nil)}}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(units[0].results[0].Content.String(), should.Equal, "encoded:one") + this.So(units[1].results[0].Content.String(), should.Equal, "encoded:two") +} + +func (this *SerializationFixture) TestEmptyResultsForwardsCleanly() { + this.input <- &unitOfWork{} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(units[0].results, should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *SerializationFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} + +func (this *SerializationFixture) TestSerializerErrorIsTracked() { + boom := errors.New("boom") + value := "unserializable" + this.serializeFail[value] = boom + unit := &unitOfWork{results: []*Message{ + {Value: value, Content: bytes.NewBuffer(nil)}, + }} + this.input <- unit + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(this.tracked, should.HaveLength, 1) + observation, ok := this.tracked[0].(SerializationError) + this.So(ok, should.BeTrue) + this.So(observation.Error, should.WrapError, ErrSerialization) + this.So(observation.Value, should.Equal, value) +} diff --git a/handlers/harness/03_persistence.go b/handlers/harness/03_persistence.go new file mode 100644 index 0000000..0e77ae9 --- /dev/null +++ b/handlers/harness/03_persistence.go @@ -0,0 +1,54 @@ +package harness + +import ( + "context" + "fmt" + "time" +) + +type persistence struct { + ctx context.Context + monitor Monitor + input chan *unitOfWork + output chan *unitOfWork + writer Writer + sleep func(time.Duration) + buffer []any +} + +func newPersistence(ctx context.Context, monitor Monitor, input, output chan *unitOfWork, writer Writer, sleep func(time.Duration)) *persistence { + return &persistence{ + ctx: ctx, + monitor: monitor, + input: input, + output: output, + writer: writer, + sleep: sleep, + buffer: make([]any, 0, 1024), + } +} + +func (this *persistence) Listen() { + var failure PersistenceError + + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + this.buffer = append(this.buffer, message) + } + for attempt := 1; ; attempt++ { + err := this.writer.Write(this.ctx, this.buffer...) + if err == nil { + failure.Attempt = 0 + failure.Error = nil + break + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrPersistence, err) + this.monitor.Track(failure) + this.sleep(time.Second) // TODO: exponential back-off w/ jitter + } + this.output <- unit + this.buffer = this.buffer[:0] + } +} diff --git a/handlers/harness/03_persistence_test.go b/handlers/harness/03_persistence_test.go new file mode 100644 index 0000000..e99701b --- /dev/null +++ b/handlers/harness/03_persistence_test.go @@ -0,0 +1,143 @@ +package harness + +import ( + "context" + "errors" + "sync" + "testing" + "time" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestPersistenceFixture(t *testing.T) { + gunit.Run(new(PersistenceFixture), t) +} + +type PersistenceFixture struct { + *gunit.Fixture + ctx context.Context + input chan *unitOfWork + output chan *unitOfWork + sleeps []time.Duration + subject *persistence + + writeMu sync.Mutex + writeCalls [][]any + writeFailCount int + + tracked []any +} + +func (this *PersistenceFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) + this.input = make(chan *unitOfWork, 4) + this.output = make(chan *unitOfWork, 4) + this.subject = newPersistence(this.ctx, this, this.input, this.output, this, this.sleep) +} + +func (this *PersistenceFixture) sleep(d time.Duration) { + this.sleeps = append(this.sleeps, d) +} + +func (this *PersistenceFixture) Track(observation any) { + this.tracked = append(this.tracked, observation) +} + +func (this *PersistenceFixture) Write(ctx context.Context, messages ...any) error { + this.So(ctx.Value("testing"), should.Equal, this.Name()) + this.writeMu.Lock() + defer this.writeMu.Unlock() + captured := make([]any, len(messages)) + copy(captured, messages) + this.writeCalls = append(this.writeCalls, captured) + if this.writeFailCount > 0 { + this.writeFailCount-- + return errors.New("write failure") + } + return nil +} + +func (this *PersistenceFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *PersistenceFixture) TestWritesAllResultsThenForwardsUnit() { + m1 := &Message{Value: "a"} + m2 := &Message{Value: "b"} + this.input <- &unitOfWork{results: []*Message{m1, m2}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.writeCalls), should.Equal, 1) + this.So(this.writeCalls[0], should.Equal, []any{m1, m2}) + this.So(this.sleeps, should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *PersistenceFixture) TestEachUnitIsWrittenIndependently() { + m1 := &Message{Value: "a"} + m2 := &Message{Value: "b"} + this.input <- &unitOfWork{results: []*Message{m1}} + this.input <- &unitOfWork{results: []*Message{m2}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(len(this.writeCalls), should.Equal, 2) + this.So(this.writeCalls[0], should.Equal, []any{m1}) + this.So(this.writeCalls[1], should.Equal, []any{m2}) + this.So(this.tracked, should.BeEmpty) +} + +func (this *PersistenceFixture) TestEmptyResultsTriggersEmptyWrite() { + this.input <- &unitOfWork{} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.writeCalls), should.Equal, 1) + this.So(this.writeCalls[0], should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *PersistenceFixture) TestRetriesUntilWriteSucceeds() { + this.writeFailCount = 2 + m := &Message{Value: "retried"} + this.input <- &unitOfWork{results: []*Message{m}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.writeCalls), should.Equal, 3) + this.So(this.sleeps, should.Equal, []time.Duration{time.Second, time.Second}) + this.So(this.tracked, should.HaveLength, 2) + for n, observation := range this.tracked { + failure, ok := observation.(PersistenceError) + this.So(ok, should.BeTrue) + this.So(failure.Error, should.WrapError, ErrPersistence) + this.So(failure.Attempt, should.Equal, n+1) + } +} + +func (this *PersistenceFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} diff --git a/handlers/harness/04_completion.go b/handlers/harness/04_completion.go new file mode 100644 index 0000000..ac6c076 --- /dev/null +++ b/handlers/harness/04_completion.go @@ -0,0 +1,23 @@ +package harness + +type completion struct { + input chan *unitOfWork + output chan *unitOfWork +} + +func newCompletion(input, output chan *unitOfWork) *completion { + return &completion{ + input: input, + output: output, + } +} + +func (this *completion) Listen() { + defer close(this.output) + for unit := range this.input { + for _, complete := range unit.completions { + complete() + } + this.output <- unit + } +} diff --git a/handlers/harness/04_completion_test.go b/handlers/harness/04_completion_test.go new file mode 100644 index 0000000..fd11aba --- /dev/null +++ b/handlers/harness/04_completion_test.go @@ -0,0 +1,79 @@ +package harness + +import ( + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestCompletionFixture(t *testing.T) { + gunit.Run(new(CompletionFixture), t) +} + +type CompletionFixture struct { + *gunit.Fixture + input chan *unitOfWork + output chan *unitOfWork + subject *completion +} + +func (this *CompletionFixture) Setup() { + this.input = make(chan *unitOfWork, 4) + this.output = make(chan *unitOfWork, 4) + this.subject = newCompletion(this.input, this.output) +} + +func (this *CompletionFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *CompletionFixture) TestCallsAllCompletionsThenForwards() { + var invocations []string + this.input <- &unitOfWork{completions: []func(){ + func() { invocations = append(invocations, "first") }, + func() { invocations = append(invocations, "second") }, + }} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(invocations, should.Equal, []string{"first", "second"}) +} + +func (this *CompletionFixture) TestNoCompletionsForwardsCleanly() { + this.input <- &unitOfWork{} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) +} + +func (this *CompletionFixture) TestEachUnitFiresIndependently() { + var firstCalled, secondCalled int + this.input <- &unitOfWork{completions: []func(){func() { firstCalled++ }}} + this.input <- &unitOfWork{completions: []func(){func() { secondCalled++ }}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(firstCalled, should.Equal, 1) + this.So(secondCalled, should.Equal, 1) +} + +func (this *CompletionFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) +} diff --git a/handlers/harness/05_broadcast.go b/handlers/harness/05_broadcast.go new file mode 100644 index 0000000..30b7c55 --- /dev/null +++ b/handlers/harness/05_broadcast.go @@ -0,0 +1,54 @@ +package harness + +import ( + "context" + "fmt" + "time" +) + +type broadcast struct { + ctx context.Context + monitor Monitor + input chan *unitOfWork + output chan *unitOfWork + buffer []any + dispatcher Dispatcher + sleep func(time.Duration) +} + +func newBroadcast(ctx context.Context, monitor Monitor, input, output chan *unitOfWork, dispatcher Dispatcher, sleep func(time.Duration)) *broadcast { + return &broadcast{ + ctx: ctx, + monitor: monitor, + input: input, + output: output, + buffer: make([]any, 0, 1024), + dispatcher: dispatcher, + sleep: sleep, + } +} + +func (this *broadcast) Listen() { + var failure BroadcastError + + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + this.buffer = append(this.buffer, message) + } + for attempt := 1; ; attempt++ { + err := this.dispatcher.Dispatch(this.ctx, this.buffer...) + if err == nil { + failure.Attempt = 0 + failure.Error = nil + break + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrBroadcast, err) + this.monitor.Track(failure) + this.sleep(time.Second) // TODO: exponential backoff w/ jitter + } + this.buffer = this.buffer[:0] + this.output <- unit + } +} diff --git a/handlers/harness/05_broadcast_test.go b/handlers/harness/05_broadcast_test.go new file mode 100644 index 0000000..b09809a --- /dev/null +++ b/handlers/harness/05_broadcast_test.go @@ -0,0 +1,143 @@ +package harness + +import ( + "context" + "errors" + "sync" + "testing" + "time" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestBroadcastFixture(t *testing.T) { + gunit.Run(new(BroadcastFixture), t) +} + +type BroadcastFixture struct { + *gunit.Fixture + ctx context.Context + input chan *unitOfWork + output chan *unitOfWork + sleeps []time.Duration + subject *broadcast + + dispatchMu sync.Mutex + dispatchCalls [][]any + dispatchFailCount int + + tracked []any +} + +func (this *BroadcastFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) + this.input = make(chan *unitOfWork, 4) + this.output = make(chan *unitOfWork, 4) + this.subject = newBroadcast(this.ctx, this, this.input, this.output, this, this.sleep) +} + +func (this *BroadcastFixture) sleep(d time.Duration) { + this.sleeps = append(this.sleeps, d) +} + +func (this *BroadcastFixture) Track(observation any) { + this.tracked = append(this.tracked, observation) +} + +func (this *BroadcastFixture) Dispatch(ctx context.Context, messages ...any) error { + this.So(ctx.Value("testing"), should.Equal, this.Name()) + this.dispatchMu.Lock() + defer this.dispatchMu.Unlock() + captured := make([]any, len(messages)) + copy(captured, messages) + this.dispatchCalls = append(this.dispatchCalls, captured) + if this.dispatchFailCount > 0 { + this.dispatchFailCount-- + return errors.New("dispatch failure") + } + return nil +} + +func (this *BroadcastFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *BroadcastFixture) TestDispatchesAllResultsThenForwardsUnit() { + m1 := &Message{Value: "a"} + m2 := &Message{Value: "b"} + this.input <- &unitOfWork{results: []*Message{m1, m2}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.dispatchCalls), should.Equal, 1) + this.So(this.dispatchCalls[0], should.Equal, []any{m1, m2}) + this.So(this.sleeps, should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *BroadcastFixture) TestEachUnitDispatchedIndependently() { + m1 := &Message{Value: "a"} + m2 := &Message{Value: "b"} + this.input <- &unitOfWork{results: []*Message{m1}} + this.input <- &unitOfWork{results: []*Message{m2}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(len(this.dispatchCalls), should.Equal, 2) + this.So(this.dispatchCalls[0], should.Equal, []any{m1}) + this.So(this.dispatchCalls[1], should.Equal, []any{m2}) + this.So(this.tracked, should.BeEmpty) +} + +func (this *BroadcastFixture) TestEmptyResultsTriggersEmptyDispatch() { + this.input <- &unitOfWork{} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.dispatchCalls), should.Equal, 1) + this.So(this.dispatchCalls[0], should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *BroadcastFixture) TestRetriesUntilDispatchSucceeds() { + this.dispatchFailCount = 2 + m := &Message{Value: "retried"} + this.input <- &unitOfWork{results: []*Message{m}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.dispatchCalls), should.Equal, 3) + this.So(this.sleeps, should.Equal, []time.Duration{time.Second, time.Second}) + this.So(this.tracked, should.HaveLength, 2) + for n, observation := range this.tracked { + failure, ok := observation.(BroadcastError) + this.So(ok, should.BeTrue) + this.So(failure.Error, should.WrapError, ErrBroadcast) + this.So(failure.Attempt, should.Equal, n+1) + } +} + +func (this *BroadcastFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} diff --git a/handlers/harness/06_terminal.go b/handlers/harness/06_terminal.go new file mode 100644 index 0000000..3a09ec4 --- /dev/null +++ b/handlers/harness/06_terminal.go @@ -0,0 +1,14 @@ +package harness + +type terminal struct { + input chan *unitOfWork +} + +func newTerminal(input chan *unitOfWork) *terminal { + return &terminal{input: input} +} + +func (this *terminal) Listen() { + for range this.input { + } +} diff --git a/handlers/harness/06_terminal_test.go b/handlers/harness/06_terminal_test.go new file mode 100644 index 0000000..c53cf16 --- /dev/null +++ b/handlers/harness/06_terminal_test.go @@ -0,0 +1,51 @@ +package harness + +import ( + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestTerminalFixture(t *testing.T) { + gunit.Run(new(TerminalFixture), t) +} + +type TerminalFixture struct { + *gunit.Fixture + input chan *unitOfWork + subject *terminal +} + +func (this *TerminalFixture) Setup() { + this.input = make(chan *unitOfWork, 4) + this.subject = newTerminal(this.input) +} + +func (this *TerminalFixture) TestDrainsInputUntilClosed() { + this.input <- &unitOfWork{} + this.input <- &unitOfWork{} + this.input <- &unitOfWork{} + close(this.input) + + done := make(chan struct{}) + go func() { + this.subject.Listen() + close(done) + }() + + <-done + this.So(len(this.input), should.Equal, 0) +} + +func (this *TerminalFixture) TestEmptyClosedInputReturnsImmediately() { + close(this.input) + + done := make(chan struct{}) + go func() { + this.subject.Listen() + close(done) + }() + + <-done +} diff --git a/handlers/harness/config.go b/handlers/harness/config.go new file mode 100644 index 0000000..2be3ff4 --- /dev/null +++ b/handlers/harness/config.go @@ -0,0 +1,99 @@ +// Package harness provides a staged, store-and-forward message-handling +// pipeline composed of goroutine stages (entrypoint, execution, serialization, +// persistence, completion, broadcast, terminal) connected by buffered channels. +// +// Callers register domain objects whose Execute.../Apply... methods drive the +// pipeline via Options.Types(...), and supply collaborators (Writer, Dispatcher, +// Serializer, Monitor) via the corresponding Options.*. All collaborators +// default to a no-op implementation, so omitting them produces a runnable but +// inert pipeline — useful for tests, but not for production. +// +// The only exported entry point is New(ctx, options...); every internal stage +// type is unexported and cannot be constructed directly by callers. +package harness + +import ( + "context" + "io" + + "github.com/smarty/messaging/v3" +) + +// New constructs a staged, store-and-forward message-handling pipeline. +// Register domain types (handlers/observers) via Options.Types, and wire +// real Writer, Dispatcher, Serializer, and Monitor collaborators via the +// corresponding Options.* functions. Collaborators default to a shared +// no-op implementation, so omitting them produces a runnable but inert +// pipeline — useful for tests, but not for production. +func New(ctx context.Context, options ...option) (messaging.Handler, []messaging.Listener) { + var cfg configuration + for _, apply := range Options.defaults(options...) { + apply(&cfg) + } + return build(ctx, cfg) +} + +var Options singleton + +type singleton struct{} +type option func(*configuration) + +type configuration struct { + Monitor Monitor + Serializer serializer + Writer Writer + Dispatcher Dispatcher + Types []any + BatchCapacity int + UnitSize int + SerializerCount int +} + +// Types registers the domain objects whose Execute.../Apply... methods drive +// the pipeline. They are passed verbatim to newRouter(...) at build time. +func (singleton) Types(value ...any) option { + return func(this *configuration) { this.Types = value } +} +func (singleton) Monitor(value Monitor) option { + return func(this *configuration) { this.Monitor = value } +} +func (singleton) Serializer(value serializer) option { + return func(this *configuration) { this.Serializer = value } +} +func (singleton) Writer(value Writer) option { + return func(this *configuration) { this.Writer = value } +} +func (singleton) Dispatcher(value Dispatcher) option { + return func(this *configuration) { this.Dispatcher = value } +} +func (singleton) BatchCapacity(value int) option { + return func(this *configuration) { this.BatchCapacity = value } +} +func (singleton) UnitSize(value int) option { + return func(this *configuration) { this.UnitSize = value } +} +func (singleton) SerializerCount(value int) option { + return func(this *configuration) { this.SerializerCount = value } +} + +func (singleton) defaults(options ...option) []option { + blank := nop{} + return append([]option{ + Options.Monitor(blank), + Options.Serializer(blank), + Options.Writer(blank), + Options.Dispatcher(blank), + Options.BatchCapacity(1024), + Options.UnitSize(64), + Options.SerializerCount(4), + }, options...) +} + +// nop satisfies every collaborator interface so New(...) can be called with +// zero options and still produce a runnable (if inert) pipeline. +type nop struct{} + +func (nop) Track(any) {} +func (nop) Serialize(io.Writer, any) error { return nil } +func (nop) Write(context.Context, ...any) error { return nil } +func (nop) Dispatch(context.Context, ...any) error { return nil } diff --git a/handlers/harness/config_test.go b/handlers/harness/config_test.go new file mode 100644 index 0000000..db36b69 --- /dev/null +++ b/handlers/harness/config_test.go @@ -0,0 +1,90 @@ +package harness + +import ( + "context" + "sync" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestConfigFixture(t *testing.T) { + gunit.Run(new(ConfigFixture), t) +} + +type ConfigFixture struct { + *gunit.Fixture +} + +func (this *ConfigFixture) apply(options ...option) configuration { + var cfg configuration + for _, item := range Options.defaults(options...) { + item(&cfg) + } + return cfg +} + +func (this *ConfigFixture) TestDefaultsPopulateCapacities() { + cfg := this.apply() + this.So(cfg.BatchCapacity, should.Equal, 1024) + this.So(cfg.UnitSize, should.Equal, 64) + this.So(cfg.SerializerCount, should.Equal, 4) +} + +func (this *ConfigFixture) TestDefaultCollaboratorsAreNop() { + cfg := this.apply() + this.So(cfg.Monitor, should.Equal, nop{}) + this.So(cfg.Serializer, should.Equal, nop{}) + this.So(cfg.Writer, should.Equal, nop{}) + this.So(cfg.Dispatcher, should.Equal, nop{}) +} + +func (this *ConfigFixture) TestTypesOptionStoresValuesVerbatim() { + cfg := this.apply(Options.Types("a", 42, struct{}{})) + this.So(cfg.Types, should.Equal, []any{"a", 42, struct{}{}}) +} + +func (this *ConfigFixture) TestTunableOptionsOverrideDefaults() { + cfg := this.apply( + Options.BatchCapacity(2), + Options.UnitSize(8), + Options.SerializerCount(3), + ) + this.So(cfg.BatchCapacity, should.Equal, 2) + this.So(cfg.UnitSize, should.Equal, 8) + this.So(cfg.SerializerCount, should.Equal, 3) +} + +func (this *ConfigFixture) TestCollaboratorOptionsOverrideDefaults() { + recorder := &recordingMonitor{} + cfg := this.apply(Options.Monitor(recorder)) + this.So(cfg.Monitor, should.Equal, recorder) +} + +func (this *ConfigFixture) TestZeroOptionsPipelineRunsInertly() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + handler, listeners := New(ctx) + + done := make(chan struct{}) + go func() { + var wg sync.WaitGroup + for _, listener := range listeners { + wg.Go(listener.Listen) + } + wg.Wait() + close(done) + }() + + handler.Handle(ctx, "payload") + this.So(handler.(interface{ Close() error }).Close(), should.BeNil) + <-done +} + +type recordingMonitor struct{ observations []any } + +func (this *recordingMonitor) Track(observation any) { + this.observations = append(this.observations, observation) +} diff --git a/handlers/harness/contracts.go b/handlers/harness/contracts.go new file mode 100644 index 0000000..6ecf383 --- /dev/null +++ b/handlers/harness/contracts.go @@ -0,0 +1,77 @@ +package harness + +import ( + "context" + "errors" + "io" +) + +// Exported collaborator interfaces — callers supply real implementations via Options.*. + +type ( + Writer interface { + Write(ctx context.Context, messages ...any) error + } + Dispatcher interface { + Dispatch(ctx context.Context, messages ...any) error + } + Monitor interface { + Track(observation any) + } +) + +// Unexported internal interfaces — discovered reflectively from domain types +// supplied via Options.Types(...). + +type ( + executor interface { + Execute(message any, broadcast func(...any)) + } + applicator interface { + Apply(message any) + } + serializer interface { + Serialize(out io.Writer, in any) error + } +) + +// Monitor observations emitted by the pipeline. + +type ( + BatchInFlight struct{} + BatchComplete struct{} + UnitOfWorkInFlight struct{} + UnitOfWorkComplete struct{} + SerializationError struct { + Value any + Error error + } + PersistenceError struct { + Attempt int + Error error + } + BroadcastError struct { + Attempt int + Error error + } +) + +var ( + ErrSerialization = errors.New("serialization error") + ErrPersistence = errors.New("persistence error") + ErrBroadcast = errors.New("broadcast error") +) + +// Unexported value types shared across pipeline stages. + +type ( + batch struct { + ctx context.Context + messages []any + complete func() + } + unitOfWork struct { + results []*Message + completions []func() + } +) diff --git a/handlers/harness/fanout.go b/handlers/harness/fanout.go new file mode 100644 index 0000000..8d18dc0 --- /dev/null +++ b/handlers/harness/fanout.go @@ -0,0 +1,46 @@ +package harness + +import ( + "sync" + + "github.com/smarty/messaging/v3" +) + +type stationFactory func(in, out chan *unitOfWork) messaging.Listener + +func newFanOut(factory stationFactory, workerCount int, input, finalOutput chan *unitOfWork) []messaging.Listener { + var ( + listeners = make([]messaging.Listener, workerCount) + outputs = make([]chan *unitOfWork, workerCount) + ) + for i := range workerCount { + outputs[i] = make(chan *unitOfWork, 1024) + listeners[i] = factory(input, outputs[i]) + } + return append(listeners, newFanIn(outputs, finalOutput)) +} + +type fanIn struct { + inputs []chan *unitOfWork + output chan *unitOfWork +} + +func newFanIn(inputs []chan *unitOfWork, output chan *unitOfWork) *fanIn { + return &fanIn{ + inputs: inputs, + output: output, + } +} + +func (this *fanIn) Listen() { + defer close(this.output) + var wg sync.WaitGroup + defer wg.Wait() + for _, input := range this.inputs { + wg.Go(func() { + for unit := range input { + this.output <- unit + } + }) + } +} diff --git a/handlers/harness/message.go b/handlers/harness/message.go new file mode 100644 index 0000000..5cec769 --- /dev/null +++ b/handlers/harness/message.go @@ -0,0 +1,37 @@ +package harness + +import ( + "bytes" + "time" +) + +type Message struct { + // ID represents the unique ID of this message and its sequential place within a larger stream. + ID uint64 + + // Value contains the in-memory Go message structure. + Value any + + // Type is the name registered for the (Go) type of the Value. + // (i.e. 'subscription:subscription-renewed-v2'). + Type string + + // Content contains the serialized representation of the Go Value. + Content *bytes.Buffer + + // ContentType identifies the serialization method employed to represent the Content + // (i.e. 'application/json; charset=utf-8'). + ContentType string + + // ContentEncoding identifies the encoding of the Content + // (i.e. 'gzip'). + ContentEncoding string + + // Stored represents the instant the Type, Content, ContentType, and ContentEncoding + // were durably stored (a critically important step in processing--if it's not stored, it didn't happen). + Stored time.Time + + // Dispatched represents the instant the Type, Content, ContentType, and ContentEncoding + // were published to the external messaging system (if configured). + Dispatched time.Time +} diff --git a/handlers/harness/pipeline.go b/handlers/harness/pipeline.go new file mode 100644 index 0000000..bca8f88 --- /dev/null +++ b/handlers/harness/pipeline.go @@ -0,0 +1,49 @@ +package harness + +import ( + "context" + "time" + + "github.com/smarty/messaging/v3" +) + +func build(ctx context.Context, cfg configuration) (messaging.Handler, []messaging.Listener) { + var ( + batches = make(chan *batch, cfg.BatchCapacity) + work1 = make(chan *unitOfWork, cfg.BatchCapacity) + work2 = make(chan *unitOfWork, cfg.BatchCapacity) + work3 = make(chan *unitOfWork, cfg.BatchCapacity) + work4 = make(chan *unitOfWork, cfg.BatchCapacity) + work5 = make(chan *unitOfWork, cfg.BatchCapacity) + ) + + var ( + entry = newEntrypoint(cfg.Monitor, batches) + exec = newExecution(cfg.Monitor, cfg.UnitSize, batches, work1, newRouter(cfg.Types...)) + serializers = newFanOut(serializerFactory(cfg.Monitor, cfg.Serializer), cfg.SerializerCount, work1, work2) + persist = newPersistence(ctx, cfg.Monitor, work2, work3, cfg.Writer, time.Sleep) + complete = newCompletion(work3, work4) + bcast = newBroadcast(ctx, cfg.Monitor, work4, work5, cfg.Dispatcher, time.Sleep) + term = newTerminal(work5) + ) + + var listeners []messaging.Listener + listeners = append(listeners, + entry, + exec, + ) + listeners = append(listeners, serializers...) + listeners = append(listeners, + persist, + complete, + bcast, + term, + ) + return entry, listeners +} + +func serializerFactory(monitor Monitor, enc serializer) stationFactory { + return func(in, out chan *unitOfWork) messaging.Listener { + return newSerialization(monitor, enc, in, out) + } +} diff --git a/handlers/harness/pipeline_test.go b/handlers/harness/pipeline_test.go new file mode 100644 index 0000000..b1f39a6 --- /dev/null +++ b/handlers/harness/pipeline_test.go @@ -0,0 +1,204 @@ +package harness + +import ( + "context" + "io" + "sync" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/better" + "github.com/smarty/gunit/v2/assert/should" + "github.com/smarty/messaging/v3" +) + +func TestPipelineFixture(t *testing.T) { + gunit.Run(new(PipelineFixture), t) +} + +type PipelineFixture struct { + *gunit.Fixture + ctx context.Context + handler messaging.Handler + listeners []messaging.Listener + wg sync.WaitGroup + + executeMu sync.Mutex + executeCalls []any + executeOutputs [][]any + + writeMu sync.Mutex + writeCalls [][]any + dispatchMu sync.Mutex + dispatchCalls [][]any + + trackMu sync.Mutex + tracked []any +} + +type commandType string + +func (this *PipelineFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) + this.handler, this.listeners = New(this.ctx, + Options.Types(this), + Options.Monitor(this), + Options.Serializer(this), + Options.Writer(this), + Options.Dispatcher(this), + ) + for _, listener := range this.listeners { + this.wg.Go(listener.Listen) + } +} + +// ExecuteCommand is picked up by scan() as the Execute-prefixed method driving +// the pipeline: every Handle(ctx, msg) call dispatches into this method. +func (this *PipelineFixture) ExecuteCommand(_ commandType, broadcast func(...any)) { + this.executeMu.Lock() + defer this.executeMu.Unlock() + if len(this.executeOutputs) == 0 { + return + } + out := this.executeOutputs[0] + this.executeOutputs = this.executeOutputs[1:] + broadcast(out...) +} + +// Execute wraps the fixture so it also satisfies the package-private executor +// interface directly — which is what scan() will pick up. We count calls here. +func (this *PipelineFixture) Execute(message any, broadcast func(...any)) { + this.executeMu.Lock() + this.executeCalls = append(this.executeCalls, message) + this.executeMu.Unlock() + this.ExecuteCommand(commandType(""), broadcast) +} + +func (this *PipelineFixture) Serialize(out io.Writer, _ any) error { + _, _ = out.Write([]byte("encoded")) + return nil +} + +func (this *PipelineFixture) Write(ctx context.Context, messages ...any) error { + this.So(ctx.Value("testing"), should.Equal, this.Name()) + this.writeMu.Lock() + defer this.writeMu.Unlock() + captured := make([]any, len(messages)) + copy(captured, messages) + this.writeCalls = append(this.writeCalls, captured) + return nil +} + +func (this *PipelineFixture) Dispatch(ctx context.Context, messages ...any) error { + this.So(ctx.Value("testing"), should.Equal, this.Name()) + this.dispatchMu.Lock() + defer this.dispatchMu.Unlock() + captured := make([]any, len(messages)) + copy(captured, messages) + this.dispatchCalls = append(this.dispatchCalls, captured) + return nil +} + +func (this *PipelineFixture) Track(observation any) { + this.trackMu.Lock() + defer this.trackMu.Unlock() + this.tracked = append(this.tracked, observation) +} + +func (this *PipelineFixture) countTracked() (batchInFlight, batchComplete, unitInFlight int) { + this.trackMu.Lock() + defer this.trackMu.Unlock() + for _, observation := range this.tracked { + switch observation.(type) { + case BatchInFlight: + batchInFlight++ + case BatchComplete: + batchComplete++ + case UnitOfWorkInFlight: + unitInFlight++ + } + } + return batchInFlight, batchComplete, unitInFlight +} + +func (this *PipelineFixture) shutdown() { + closer, ok := this.handler.(io.Closer) + this.So(ok, better.BeTrue) + this.So(closer.Close(), should.BeNil) + this.wg.Wait() +} + +func (this *PipelineFixture) TestPipelineRoutesMessageThroughExecutionPersistenceBroadcast() { + this.executeOutputs = [][]any{{"event-A", "event-B"}} + + this.handler.Handle(this.ctx, commandType("command-1")) + this.shutdown() + + this.So(len(this.executeCalls), should.Equal, 1) + + this.So(len(this.writeCalls), should.Equal, 1) + this.So(len(this.writeCalls[0]), should.Equal, 2) + this.So(this.writeCalls[0][0].(*Message).Value, should.Equal, "event-A") + this.So(this.writeCalls[0][1].(*Message).Value, should.Equal, "event-B") + + this.So(len(this.dispatchCalls), should.Equal, 1) + this.So(len(this.dispatchCalls[0]), should.Equal, 2) + this.So(this.dispatchCalls[0][0].(*Message).Value, should.Equal, "event-A") + this.So(this.dispatchCalls[0][1].(*Message).Value, should.Equal, "event-B") + + batchInFlight, batchComplete, unitInFlight := this.countTracked() + this.So(batchInFlight, should.Equal, 1) + this.So(batchComplete, should.Equal, 1) + this.So(unitInFlight, should.Equal, 1) +} + +func (this *PipelineFixture) TestPipelineHandlesMultipleMessagesAcrossHandleCalls() { + this.executeOutputs = [][]any{{"e1"}, {"e2"}, {"e3"}} + + this.handler.Handle(this.ctx, commandType("c1")) + this.handler.Handle(this.ctx, commandType("c2")) + this.handler.Handle(this.ctx, commandType("c3")) + this.shutdown() + + this.So(len(this.executeCalls), should.Equal, 3) + + var written []any + for _, call := range this.writeCalls { + for _, message := range call { + written = append(written, message.(*Message).Value) + } + } + this.So(written, should.Equal, []any{"e1", "e2", "e3"}) + + var dispatched []any + for _, call := range this.dispatchCalls { + for _, message := range call { + dispatched = append(dispatched, message.(*Message).Value) + } + } + this.So(dispatched, should.Equal, []any{"e1", "e2", "e3"}) + + batchInFlight, batchComplete, unitInFlight := this.countTracked() + this.So(batchInFlight, should.Equal, 3) + this.So(batchComplete, should.Equal, 3) + this.So(unitInFlight, should.BeGreaterThanOrEqualTo, 1) +} + +func (this *PipelineFixture) TestPipelineShutsDownWithNoTraffic() { + this.shutdown() + this.So(len(this.executeCalls), should.Equal, 0) + this.So(len(this.writeCalls), should.Equal, 0) + this.So(len(this.dispatchCalls), should.Equal, 0) + this.So(this.tracked, should.BeEmpty) +} + +func (this *PipelineFixture) TestPipelineSerializesEachBroadcastResult() { + this.executeOutputs = [][]any{{map[string]int{"value": 42}}} + + this.handler.Handle(this.ctx, commandType("go")) + this.shutdown() + + this.So(len(this.writeCalls), should.Equal, 1) + message := this.writeCalls[0][0].(*Message) + this.So(message.Content.Len() > 0, should.BeTrue) +} diff --git a/handlers/harness/pool.go b/handlers/harness/pool.go new file mode 100644 index 0000000..821d1d8 --- /dev/null +++ b/handlers/harness/pool.go @@ -0,0 +1,13 @@ +package harness + +import "sync" + +func newT[T any]() *T { return new(T) } + +type poolT[T any] struct{ pool *sync.Pool } + +func newPoolT[T any](new func() T) *poolT[T] { + return &poolT[T]{pool: &sync.Pool{New: func() any { return new() }}} +} +func (p *poolT[T]) Get() T { return p.pool.Get().(T) } +func (p *poolT[T]) Put(t T) { p.pool.Put(t) } diff --git a/handlers/harness/routing.go b/handlers/harness/routing.go new file mode 100644 index 0000000..02b24ae --- /dev/null +++ b/handlers/harness/routing.go @@ -0,0 +1,65 @@ +package harness + +import "reflect" + +// router dispatches messages to Execute*/Apply* methods discovered reflectively +// from the domain types registered via Options.Types(...). It is unexported +// because callers never hold one directly — build(ctx, cfg) constructs the sole +// instance per pipeline and feeds it into the execution stage as its executor +// collaborator. The router is not safe for concurrent use; the pipeline only +// calls Execute from within a single execution goroutine. +type router struct { + executors map[reflect.Type][]executor + applicators map[reflect.Type][]applicator + exclusions []exclusion +} +type exclusion struct { + message any + self applicator +} + +func newRouter(types ...any) *router { + executors, applicators := scan(types...) + return &router{ + executors: executors, + applicators: applicators, + } +} + +func (this *router) Apply(message any) { + for _, a := range this.applicators[reflect.TypeOf(message)] { + a.Apply(message) + } +} + +func (this *router) Execute(message any, broadcast func(...any)) { + for _, e := range this.executors[reflect.TypeOf(message)] { + e.Execute(message, func(results ...any) { + for _, result := range results { + self := this.selfApplicator(result, e) + if self != nil { + self.Apply(result) + } + this.exclusions = append(this.exclusions, exclusion{message: result, self: self}) + } + broadcast(results...) + }) + } + for _, e := range this.exclusions { + for _, a := range this.applicators[reflect.TypeOf(e.message)] { + if a == e.self { + continue + } + a.Apply(e.message) + } + } + this.exclusions = this.exclusions[:0] +} +func (this *router) selfApplicator(result any, e executor) applicator { + for _, a := range this.applicators[reflect.TypeOf(result)] { + if any(a) == any(e) { + return a + } + } + return nil +} diff --git a/handlers/harness/routing_test.go b/handlers/harness/routing_test.go new file mode 100644 index 0000000..ce74d2c --- /dev/null +++ b/handlers/harness/routing_test.go @@ -0,0 +1,208 @@ +package harness + +import ( + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestRouterFixture(t *testing.T) { + gunit.Run(new(RouterFixture), t) +} + +type RouterFixture struct { + *gunit.Fixture + orchestrator *fakeOrchestrator + observer *fakeObserver + router *router +} + +func (this *RouterFixture) Setup() { + this.orchestrator = new(fakeOrchestrator) + this.observer = new(fakeObserver) + this.router = newRouter(this.orchestrator, this.observer, new(fakeBystander)) +} + +func (this *RouterFixture) TestExecuteRoutesToRegisteredExecutorAndForwardsBroadcast() { + var broadcast []any + this.router.Execute(messageA{value: "yo"}, func(out ...any) { + broadcast = append(broadcast, out...) + }) + + this.So(this.orchestrator.executed, should.Equal, []any{messageA{value: "yo"}}) + this.So(broadcast, should.Equal, []any{resultA{value: "yo"}}) +} + +func (this *RouterFixture) TestExecuteAppliesResultToSelfApplicator() { + this.router.Execute(messageA{value: "yo"}, func(...any) {}) + + this.So(this.orchestrator.applied, should.Equal, []any{resultA{value: "yo"}}) +} + +func (this *RouterFixture) TestExecuteAppliesResultToOtherApplicators() { + this.router.Execute(messageA{value: "yo"}, func(...any) {}) + + this.So(this.observer.applied, should.Equal, []any{resultA{value: "yo"}}) +} + +func (this *RouterFixture) TestExecuteAppliesEachResultExactlyOncePerApplicator() { + this.router.Execute(messageA{value: "yo"}, func(...any) {}) + + this.So(len(this.orchestrator.applied), should.Equal, 1) + this.So(len(this.observer.applied), should.Equal, 1) +} + +func (this *RouterFixture) TestExecuteUnknownTypeIsNoOp() { + var broadcast []any + this.router.Execute(unknownMsg{}, func(out ...any) { + broadcast = append(broadcast, out...) + }) + + this.So(broadcast, should.BeEmpty) + this.So(len(this.orchestrator.executed), should.Equal, 0) +} + +func (this *RouterFixture) TestExecuteIgnoresMethodsWithBadSignatures() { + var broadcast []any + this.router.Execute(bogusExecMsg{}, func(out ...any) { + broadcast = append(broadcast, out...) + }) + + this.So(broadcast, should.BeEmpty) + this.So(len(this.orchestrator.executed), should.Equal, 0) +} + +func (this *RouterFixture) TestApplyRoutesToAllRegisteredApplicators() { + this.router.Apply(resultA{value: "yay"}) + + this.So(this.orchestrator.applied, should.Equal, []any{resultA{value: "yay"}}) + this.So(this.observer.applied, should.Equal, []any{resultA{value: "yay"}}) +} + +func (this *RouterFixture) TestApplyIgnoresMethodsWithBadSignatures() { + this.router.Apply(bogusApplyMsg{}) + + this.So(len(this.orchestrator.applied), should.Equal, 0) + this.So(len(this.observer.applied), should.Equal, 0) +} + +func (this *RouterFixture) TestApplyUnknownTypeIsNoOp() { + this.router.Apply(unknownMsg{}) + + this.So(len(this.orchestrator.applied), should.Equal, 0) +} + +func (this *RouterFixture) TestExecuteResetsExclusionsBetweenInvocations() { + this.router.Execute(messageA{value: "first"}, func(...any) {}) + this.router.Execute(messageA{value: "second"}, func(...any) {}) + + this.So(this.orchestrator.applied, should.Equal, []any{ + resultA{value: "first"}, resultA{value: "second"}, + }) + this.So(this.observer.applied, should.Equal, []any{ + resultA{value: "first"}, resultA{value: "second"}, + }) +} + +// TestExecuteResultWithNoSelfApplicator covers the case where an executor produces +// a result whose type it does not apply itself, exercising selfApplicator's nil-return. +func (this *RouterFixture) TestExecuteResultWithNoSelfApplicator() { + var broadcast []any + this.router.Execute(messageB{value: "ping"}, func(out ...any) { + broadcast = append(broadcast, out...) + }) + + this.So(broadcast, should.Equal, []any{resultB{value: "ping"}}) + this.So(this.observer.appliedB, should.Equal, []any{resultB{value: "ping"}}) +} + +type ( + messageA struct{ value string } + resultA struct{ value string } + messageB struct{ value string } + resultB struct{ value string } + unknownMsg struct{} + bogusExecMsg struct{} + bogusApplyMsg struct{} +) + +// fakeOrchestrator satisfies both executor and applicator; its specific Execute*/Apply* +// methods exercise every branch of scan() — both the matching cases and the cases that +// must be filtered out. +type fakeOrchestrator struct { + executed []any + applied []any +} + +func (this *fakeOrchestrator) Execute(message any, broadcast func(...any)) { + switch typed := message.(type) { + case messageA: + this.ExecuteMessageA(typed, broadcast) + case messageB: + this.ExecuteMessageB(typed, broadcast) + } +} +func (this *fakeOrchestrator) Apply(message any) { + switch typed := message.(type) { + case resultA: + this.ApplyResultA(typed) + } +} +func (this *fakeOrchestrator) ExecuteMessageA(msg messageA, broadcast func(...any)) { + this.executed = append(this.executed, msg) + broadcast(resultA{value: msg.value}) +} +func (this *fakeOrchestrator) ExecuteMessageB(msg messageB, broadcast func(...any)) { + this.executed = append(this.executed, msg) + broadcast(resultB{value: msg.value}) +} +func (this *fakeOrchestrator) ApplyResultA(msg resultA) { + this.applied = append(this.applied, msg) +} + +func (this *fakeOrchestrator) Execute2() { + panic("scan() must skip methods with no message argument") +} +func (this *fakeOrchestrator) ExecuteBadReturn(_ bogusExecMsg, _ func(...any)) error { + panic("scan() must skip methods with non-zero return values") +} +func (this *fakeOrchestrator) ExecuteShortArgs(_ bogusExecMsg) { + panic("scan() must skip Execute methods that lack the broadcast func") +} +func (this *fakeOrchestrator) ExecuteBadLast(_ bogusExecMsg, _ string) { + panic("scan() must skip Execute methods whose last arg is not func(...any)") +} +func (this *fakeOrchestrator) ApplyBadReturn(_ bogusApplyMsg) error { + panic("scan() must skip Apply methods with non-zero return values") +} +func (this *fakeOrchestrator) ApplyExtraArg(_ bogusApplyMsg, _ string) { + panic("scan() must skip Apply methods with more than one argument") +} +func (this *fakeOrchestrator) Bogus(_ messageA) { + panic("scan() must skip methods that lack the Execute/Apply prefix") +} + +type fakeObserver struct { + applied []any + appliedB []any +} + +func (this *fakeObserver) Apply(message any) { + switch typed := message.(type) { + case resultA: + this.ApplyResultA(typed) + case resultB: + this.ApplyResultB(typed) + } +} +func (this *fakeObserver) ApplyResultA(msg resultA) { + this.applied = append(this.applied, msg) +} +func (this *fakeObserver) ApplyResultB(msg resultB) { + this.appliedB = append(this.appliedB, msg) +} + +type fakeBystander struct{} + +func (this *fakeBystander) Hello() {} diff --git a/handlers/harness/scanner.go b/handlers/harness/scanner.go new file mode 100644 index 0000000..91ff9ae --- /dev/null +++ b/handlers/harness/scanner.go @@ -0,0 +1,48 @@ +package harness + +import ( + "reflect" + "strings" +) + +func scan(types ...any) (executors map[reflect.Type][]executor, applicators map[reflect.Type][]applicator) { + executors = make(map[reflect.Type][]executor) + applicators = make(map[reflect.Type][]applicator) + for _, v := range types { + e, isExecutor := v.(executor) + a, isApplicator := v.(applicator) + if !isExecutor && !isApplicator { + continue + } + t := reflect.TypeOf(v) + for x := range t.NumMethod() { + method := t.Method(x) + argCount := method.Type.NumIn() + if argCount <= 1 { + continue + } + if method.Type.NumOut() > 0 { + continue + } + if isExecutor && strings.HasPrefix(method.Name, "Execute") && len(method.Name) > len("Execute") { + if argCount != 3 { + continue + } + lastArg := method.Type.In(argCount - 1) + if lastArg != reflect.TypeOf(func(...any) {}) { + continue + } + handledType := method.Type.In(1) + executors[handledType] = append(executors[handledType], e) + } + if isApplicator && strings.HasPrefix(method.Name, "Apply") && len(method.Name) > len("Apply") { + if argCount != 2 { + continue + } + handledType := method.Type.In(1) + applicators[handledType] = append(applicators[handledType], a) + } + } + } + return executors, applicators +} diff --git a/handlers/harness/sqladapter/contracts.go b/handlers/harness/sqladapter/contracts.go new file mode 100644 index 0000000..fd1af2b --- /dev/null +++ b/handlers/harness/sqladapter/contracts.go @@ -0,0 +1,5 @@ +package sqladapter + +type Logger interface { + Printf(format string, args ...any) +} diff --git a/handlers/harness/sqladapter/dispatcher.go b/handlers/harness/sqladapter/dispatcher.go new file mode 100644 index 0000000..1c877b6 --- /dev/null +++ b/handlers/harness/sqladapter/dispatcher.go @@ -0,0 +1,89 @@ +// Package sqladapter provides a reference implementation of the +// handlers/harness Writer and Dispatcher interfaces, bound to the `Messages` +// MySQL table defined by sqlmq/_schema_mysql.sql in this module (columns +// `id`, `dispatched`, `type`, `payload`). Callers running a different schema +// should copy and adapt these types. +package sqladapter + +import ( + "context" + "database/sql" + "fmt" + "strings" + + "github.com/smarty/messaging/v3" + "github.com/smarty/messaging/v3/handlers/harness" +) + +type Dispatcher struct { + connector messaging.Connector + handle *sql.DB + logger Logger +} + +func NewDispatcher(connector messaging.Connector, handle *sql.DB, logger Logger) *Dispatcher { + return &Dispatcher{ + connector: connector, + handle: handle, + logger: logger, + } +} + +func (this *Dispatcher) Dispatch(ctx context.Context, messages ...any) error { + if len(messages) == 0 { + return nil + } + if err := this.publish(ctx, messages); err != nil { + return err + } + return this.markDispatched(ctx, messages) +} + +func (this *Dispatcher) publish(ctx context.Context, messages []any) error { + connection, err := this.connector.Connect(ctx) + if err != nil { + return err + } + defer func() { _ = connection.Close() }() + + writer, err := connection.Writer(ctx) + if err != nil { + return err + } + defer func() { _ = writer.Close() }() + + dispatches := make([]messaging.Dispatch, 0, len(messages)) // TODO: reuse slice, pool dispatch struct + for _, raw := range messages { + message := raw.(*harness.Message) + // TODO: dedupe encoding work. The harness Serialization stage already encoded message.Value + // into message.Content for the Messages.payload column; passing message.Value here causes the + // transport connector's serialization layer to encode it a second time for the RMQ payload. + // Either pass the pre-encoded bytes through Dispatch.Payload/MessageType/ContentType and skip + // the connector's serialization for this writer, or drop the harness Serialization stage and + // let the connector own all encoding. + dispatches = append(dispatches, messaging.Dispatch{ + Durable: true, + Message: message.Value, + }) + } + _, err = writer.Write(ctx, dispatches...) + return err +} + +func (this *Dispatcher) markDispatched(ctx context.Context, messages []any) error { + var statement strings.Builder + statement.WriteString(`UPDATE Messages SET dispatched = NOW(3) WHERE id IN (`) + args := make([]any, 0, len(messages)) + for i, raw := range messages { + if i > 0 { + statement.WriteString(`,`) + } + statement.WriteString(`?`) + args = append(args, raw.(*harness.Message).ID) + } + statement.WriteString(`)`) + if _, err := this.handle.ExecContext(ctx, statement.String(), args...); err != nil { + return fmt.Errorf("mark dispatched: %w", err) + } + return nil +} diff --git a/handlers/harness/sqladapter/dispatcher_test.go b/handlers/harness/sqladapter/dispatcher_test.go new file mode 100644 index 0000000..41121b8 --- /dev/null +++ b/handlers/harness/sqladapter/dispatcher_test.go @@ -0,0 +1,137 @@ +package sqladapter + +import ( + "context" + "database/sql" + "errors" + "log" + "os" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" + "github.com/smarty/messaging/v3" + "github.com/smarty/messaging/v3/handlers/harness" +) + +type dispatcherTestEvent struct { + AccountID uint64 + OrderID uint64 +} + +func TestDispatcherFixture(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long-running database tests.") + } + ensureDatabaseReadiness(t) + gunit.Run(new(DispatcherFixture), t, gunit.Options.IntegrationTests()) +} + +type DispatcherFixture struct { + *gunit.Fixture + handle *sql.DB + connector *stubConnector + subject *Dispatcher +} + +func (this *DispatcherFixture) Setup() { + handle, err := openTestDatabase() + this.So(err, should.BeNil) + this.handle = handle + _, err = handle.Exec(`TRUNCATE TABLE Messages;`) + this.So(err, should.BeNil) + this.connector = newStubConnector() + this.subject = NewDispatcher(this.connector, handle, log.New(os.Stderr, "", 0)) +} + +func (this *DispatcherFixture) Teardown() { + _ = this.handle.Close() +} + +func (this *DispatcherFixture) seedMessage(value any) *harness.Message { + result, err := this.handle.Exec(`INSERT INTO Messages (type, payload) VALUES ('order-received', '{}')`) + this.So(err, should.BeNil) + id, err := result.LastInsertId() + this.So(err, should.BeNil) + return &harness.Message{ID: uint64(id), Value: value} +} + +func (this *DispatcherFixture) TestDispatch_PublishesAndMarksDispatched() { + event := dispatcherTestEvent{AccountID: 1, OrderID: 2} + message := this.seedMessage(event) + + err := this.subject.Dispatch(context.Background(), message) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 1) + this.So(this.connector.published[0].Message, should.Equal, event) + this.So(this.connector.published[0].Durable, should.BeTrue) + this.So(this.dispatchedTimestamp(message.ID), should.NOT.BeNil) +} + +func (this *DispatcherFixture) TestDispatch_PublishFails_ReturnsErrorWithoutMarkingDispatched() { + event := dispatcherTestEvent{AccountID: 1, OrderID: 2} + message := this.seedMessage(event) + this.connector.writeErr = errors.New("rmq down") + + err := this.subject.Dispatch(context.Background(), message) + + this.So(err, should.NOT.BeNil) + this.So(this.dispatchedTimestamp(message.ID), should.BeNil) +} + +func (this *DispatcherFixture) TestDispatch_NoMessages_NoOp() { + err := this.subject.Dispatch(context.Background()) + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 0) +} + +func (this *DispatcherFixture) dispatchedTimestamp(id uint64) *string { + var dispatched sql.NullString + err := this.handle.QueryRow(`SELECT dispatched FROM Messages WHERE id = ?`, id).Scan(&dispatched) + this.So(err, should.BeNil) + if dispatched.Valid { + s := dispatched.String + return &s + } + return nil +} + +// stubConnector mimics a transport connector. + +type stubConnector struct { + published []messaging.Dispatch + writeErr error +} + +func newStubConnector() *stubConnector { + return &stubConnector{} +} +func (this *stubConnector) Connect(ctx context.Context) (messaging.Connection, error) { + return &stubConnection{parent: this}, nil +} +func (this *stubConnector) Close() error { return nil } + +type stubConnection struct{ parent *stubConnector } + +func (this *stubConnection) Reader(ctx context.Context) (messaging.Reader, error) { + return nil, errors.New("not implemented") +} +func (this *stubConnection) Writer(ctx context.Context) (messaging.Writer, error) { + return &stubWriter{parent: this.parent}, nil +} +func (this *stubConnection) CommitWriter(ctx context.Context) (messaging.CommitWriter, error) { + return nil, errors.New("not implemented") +} +func (this *stubConnection) Close() error { return nil } + +type stubWriter struct{ parent *stubConnector } + +func (this *stubWriter) Write(ctx context.Context, dispatches ...messaging.Dispatch) (int, error) { + if this.parent.writeErr != nil { + return 0, this.parent.writeErr + } + this.parent.published = append(this.parent.published, dispatches...) + return len(dispatches), nil +} +func (this *stubWriter) Close() error { return nil } diff --git a/handlers/harness/sqladapter/recovery.go b/handlers/harness/sqladapter/recovery.go new file mode 100644 index 0000000..25ccde9 --- /dev/null +++ b/handlers/harness/sqladapter/recovery.go @@ -0,0 +1,54 @@ +package sqladapter + +import ( + "bytes" + "context" + "database/sql" + + "github.com/smarty/messaging/v3/handlers/harness" +) + +// Recover scans Messages WHERE dispatched IS NULL at startup, wraps each row +// as a *harness.Message, and feeds them through the Dispatcher (publish + mark dispatched). +// Intended to run synchronously during initialization, before the harness pipeline starts. +// +// TODO: pagination/streaming for large backlogs (currently loads everything into memory). +// TODO: make this a Listener +// TODO: retry w/ backoff +func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger Logger) error { + rows, err := handle.QueryContext(ctx, ` + SELECT id, type, payload + FROM Messages + WHERE dispatched IS NULL + ORDER BY id`) + if err != nil { + return err + } + defer func() { _ = rows.Close() }() + + var messages []any + for rows.Next() { + var ( + id uint64 + typeName string + payload []byte + ) + if err := rows.Scan(&id, &typeName, &payload); err != nil { + return err + } + messages = append(messages, &harness.Message{ + ID: id, + Type: typeName, + Content: bytes.NewBuffer(payload), + ContentType: "application/json", + }) + } + if err := rows.Err(); err != nil { + return err + } + if len(messages) == 0 { + return nil + } + logger.Printf("[INFO] Recovering %d undispatched message(s) from previous run.", len(messages)) + return dispatcher.Dispatch(ctx, messages...) +} diff --git a/handlers/harness/sqladapter/recovery_test.go b/handlers/harness/sqladapter/recovery_test.go new file mode 100644 index 0000000..1fe5872 --- /dev/null +++ b/handlers/harness/sqladapter/recovery_test.go @@ -0,0 +1,99 @@ +package sqladapter + +import ( + "context" + "database/sql" + "log" + "os" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestRecoveryFixture(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long-running database tests.") + } + ensureDatabaseReadiness(t) + gunit.Run(new(RecoveryFixture), t, gunit.Options.IntegrationTests()) +} + +type RecoveryFixture struct { + *gunit.Fixture + handle *sql.DB + connector *stubConnector + dispatcher *Dispatcher +} + +func (this *RecoveryFixture) Setup() { + handle, err := openTestDatabase() + this.So(err, should.BeNil) + this.handle = handle + _, err = handle.Exec(`TRUNCATE TABLE Messages;`) + this.So(err, should.BeNil) + this.connector = newStubConnector() + this.dispatcher = NewDispatcher(this.connector, handle, log.New(os.Stderr, "", 0)) +} + +func (this *RecoveryFixture) Teardown() { + _ = this.handle.Close() +} + +func (this *RecoveryFixture) seedUndispatched(typeName string, payload string) uint64 { + result, err := this.handle.Exec(`INSERT INTO Messages (type, payload) VALUES (?, ?)`, typeName, payload) + this.So(err, should.BeNil) + id, err := result.LastInsertId() + this.So(err, should.BeNil) + return uint64(id) +} + +func (this *RecoveryFixture) seedDispatched(typeName string, payload string) uint64 { + result, err := this.handle.Exec(`INSERT INTO Messages (type, payload, dispatched) VALUES (?, ?, NOW(3))`, typeName, payload) + this.So(err, should.BeNil) + id, err := result.LastInsertId() + this.So(err, should.BeNil) + return uint64(id) +} + +func (this *RecoveryFixture) TestRecover_NoOrphans_NoOp() { + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0)) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 0) +} + +func (this *RecoveryFixture) TestRecover_DispatchesUndispatchedRowsInIDOrder() { + id1 := this.seedUndispatched("order-received", `{"order":1}`) + id2 := this.seedUndispatched("order-approved", `{"order":2}`) + _ = this.seedDispatched("order-received", `{"order":3}`) // already dispatched, must be skipped + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0)) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 2) + this.So(this.dispatchedTimestamp(id1), should.NOT.BeNil) + this.So(this.dispatchedTimestamp(id2), should.NOT.BeNil) +} + +func (this *RecoveryFixture) TestRecover_PassesPayloadAndTypeIntoMessage() { + this.seedUndispatched("order-received", `{"order":1}`) + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0)) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 1) + dispatch := this.connector.published[0] + this.So(dispatch.Durable, should.BeTrue) +} + +func (this *RecoveryFixture) dispatchedTimestamp(id uint64) *string { + var dispatched sql.NullString + err := this.handle.QueryRow(`SELECT dispatched FROM Messages WHERE id = ?`, id).Scan(&dispatched) + this.So(err, should.BeNil) + if dispatched.Valid { + s := dispatched.String + return &s + } + return nil +} diff --git a/handlers/harness/sqladapter/testdb_test.go b/handlers/harness/sqladapter/testdb_test.go new file mode 100644 index 0000000..b3bada4 --- /dev/null +++ b/handlers/harness/sqladapter/testdb_test.go @@ -0,0 +1,90 @@ +package sqladapter + +import ( + "cmp" + "database/sql" + "fmt" + "os" + "path/filepath" + "runtime" + "testing" + + _ "github.com/go-sql-driver/mysql" +) + +// TODO: move away from environment variables for connection parameters (just use sensible defaults from db-connector) + +// Integration tests in this package require a local MySQL server. Connection +// parameters are taken from environment variables (with sensible defaults): +// +// HARNESS_TEST_MYSQL_DSN — overrides the full DSN +// MYSQL_HOST — default 127.0.0.1 +// MYSQL_PORT — default 3306 +// MYSQL_USER — default root +// MYSQL_PASSWORD — default (empty) +// +// Tests run against a throwaway schema (default `messaging_harness_test`) which +// is dropped and re-created before each run. + +const testSchemaName = "messaging_harness_test" + +func ensureDatabaseReadiness(t *testing.T) { + bootstrap, err := openDSN(buildDSN("")) + if err != nil { + t.Fatal("Database not available (is mysql running?):", err) + } + defer func() { _ = bootstrap.Close() }() + if err := setupSchema(bootstrap); err != nil { + t.Fatal("Schema did not set up properly:", err) + } +} + +func openTestDatabase() (*sql.DB, error) { + return openDSN(buildDSN(testSchemaName)) +} + +func openDSN(dsn string) (*sql.DB, error) { + db, err := sql.Open("mysql", dsn) + if err != nil { + return nil, err + } + if err := db.Ping(); err != nil { + _ = db.Close() + return nil, err + } + return db, nil +} + +func buildDSN(schema string) string { + if override := os.Getenv("HARNESS_TEST_MYSQL_DSN"); override != "" { + return override + } + host := cmp.Or(os.Getenv("MYSQL_HOST"), "127.0.0.1") + port := cmp.Or(os.Getenv("MYSQL_PORT"), "3306") + user := cmp.Or(os.Getenv("MYSQL_USER"), "root") + password := os.Getenv("MYSQL_PASSWORD") + return fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?multiStatements=true&parseTime=true", user, password, host, port, schema) +} + +func setupSchema(db *sql.DB) error { + statement := fmt.Sprintf("DROP SCHEMA IF EXISTS %s; CREATE SCHEMA %s; USE %s;", + testSchemaName, testSchemaName, testSchemaName) + if _, err := db.Exec(statement); err != nil { + return err + } + content, err := os.ReadFile(findSchemaFile()) + if err != nil { + return err + } + if _, err := db.Exec(string(content)); err != nil { + return err + } + return nil +} + +// findSchemaFile locates sqlmq/_schema_mysql.sql relative to this test file, +// since tests run from the package directory regardless of CWD. +func findSchemaFile() string { + _, thisFile, _, _ := runtime.Caller(0) + return filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "sqlmq", "_schema_mysql.sql") +} diff --git a/handlers/harness/sqladapter/writer.go b/handlers/harness/sqladapter/writer.go new file mode 100644 index 0000000..074607f --- /dev/null +++ b/handlers/harness/sqladapter/writer.go @@ -0,0 +1,95 @@ +package sqladapter + +import ( + "cmp" + "context" + "database/sql" + "fmt" + "reflect" + "strings" + + "github.com/smarty/messaging/v3/handlers/harness" +) + +type Writer struct { + handle *sql.DB + typeNames map[reflect.Type]string + stride uint64 + logger Logger + legacyWrite func(context.Context, *sql.Tx, ...any) +} + +// NewWriter builds a Writer that inserts rows into the `Messages` table and +// invokes the supplied legacyWrite function inside the same transaction. +// +// Deprecated: the legacyWrite escape hatch is retained for migration from +// other projects and will be removed in a later release; new callers +// should supply a no-op function. +func NewWriter(handle *sql.DB, typeNames map[reflect.Type]string, stride uint64, logger Logger, legacyWrite func(context.Context, *sql.Tx, ...any)) *Writer { + return &Writer{ + handle: handle, + typeNames: typeNames, + stride: cmp.Or(stride, 1), + logger: logger, + legacyWrite: legacyWrite, + } +} + +func (this *Writer) Write(ctx context.Context, messages ...any) (err error) { + if len(messages) == 0 { + return nil + } + + tx, err := this.handle.BeginTx(ctx, nil) + if err != nil { + return err + } + defer func() { + if recovered := recover(); recovered != nil { + _ = tx.Rollback() + err = fmt.Errorf("panic during write: %v", recovered) + } else if err != nil { + _ = tx.Rollback() + } + }() + + if err := this.insertMessages(ctx, tx, messages); err != nil { + return err + } + + this.legacyWrite(ctx, tx, messages...) + + return tx.Commit() +} + +func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []any) error { + var statement strings.Builder // TODO: reuse statement builder + statement.WriteString(`INSERT INTO Messages (type, payload) VALUES `) + args := make([]any, 0, len(messages)*2) // TODO: reuse slice/buffer + for i, raw := range messages { + message := raw.(*harness.Message) + if message.Type == "" { + message.Type = this.typeNames[reflect.TypeOf(message.Value)] + } + if i > 0 { + statement.WriteString(`,`) + } + statement.WriteString(`(?, ?)`) + args = append(args, message.Type, message.Content.Bytes()) + } + result, err := tx.ExecContext(ctx, statement.String(), args...) + if err != nil { + return err + } + // https://dev.mysql.com/doc/refman/5.6/en/information-functions.html#function_last-insert-id + // > If you insert multiple rows using a single INSERT statement, LAST_INSERT_ID() returns the value + // > generated for the first inserted row only. + first, err := result.LastInsertId() + if err != nil { + return err + } + for i, raw := range messages { + raw.(*harness.Message).ID = uint64(first) + uint64(i)*this.stride + } + return nil +} diff --git a/handlers/harness/sqladapter/writer_test.go b/handlers/harness/sqladapter/writer_test.go new file mode 100644 index 0000000..af151ab --- /dev/null +++ b/handlers/harness/sqladapter/writer_test.go @@ -0,0 +1,165 @@ +package sqladapter + +import ( + "bytes" + "context" + "database/sql" + "encoding/json" + "reflect" + "testing" + "time" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" + "github.com/smarty/messaging/v3/handlers/harness" +) + +// Local test-only message types mirror the shape the source used. +type ( + orderReceived struct { + AccountID uint64 + OrderID uint64 + Timestamp time.Time + } + orderApproved struct { + AccountID uint64 + OrderID uint64 + Timestamp time.Time + } +) + +func TestWriterFixture(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long-running database tests.") + } + ensureDatabaseReadiness(t) + gunit.Run(new(WriterFixture), t, gunit.Options.IntegrationTests()) +} + +type WriterFixture struct { + *gunit.Fixture + handle *sql.DB + subject *Writer + + legacyWriteCalls [][]any + legacyWritePanic any + testStride uint64 +} + +func (this *WriterFixture) Setup() { + handle, err := openTestDatabase() + this.So(err, should.BeNil) + this.handle = handle + this.legacyWriteCalls = nil + this.legacyWritePanic = nil + this.testStride = 7 + this.subject = NewWriter(handle, testTypeNames(), this.testStride, this, this.fakeLegacyWrite) + this.truncateTables() +} + +func (this *WriterFixture) fakeLegacyWrite(_ context.Context, _ *sql.Tx, messages ...any) { + this.legacyWriteCalls = append(this.legacyWriteCalls, messages) + if this.legacyWritePanic != nil { + panic(this.legacyWritePanic) + } +} + +func (this *WriterFixture) Teardown() { + _ = this.handle.Close() +} + +func (this *WriterFixture) truncateTables() { + _, err := this.handle.Exec(`TRUNCATE TABLE Messages;`) + this.So(err, should.BeNil) +} + +func (this *WriterFixture) TestWrite_InsertsMessageRowAndInvokesLegacyWrite() { + event := orderReceived{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC().Round(time.Second)} + message := serializedMessage(event, "") + + err := this.subject.Write(context.Background(), message) + + this.So(err, should.BeNil) + this.So(this.countMessages(), should.Equal, 1) + this.So(this.legacyWriteCalls, should.Equal, [][]any{{message}}) + this.So(message.Type, should.Equal, "order-received") +} + +func (this *WriterFixture) TestWrite_ResolvesMessageTypeFromRegistry() { + event := orderApproved{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC()} + message := serializedMessage(event, "") + + err := this.subject.Write(context.Background(), message) + + this.So(err, should.BeNil) + this.So(this.firstMessageType(), should.Equal, "order-approved") +} + +func (this *WriterFixture) TestWrite_LegacyWritePanic_RollsBackInsertedMessage() { + this.legacyWritePanic = "boom" + event := orderReceived{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC()} + + err := this.subject.Write(context.Background(), serializedMessage(event, "")) + + this.So(err, should.NOT.BeNil) + this.So(this.countMessages(), should.Equal, 0) +} + +func (this *WriterFixture) TestWrite_PreservesPreSetMessageType() { + event := orderReceived{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC()} + message := serializedMessage(event, "explicit.override") + + err := this.subject.Write(context.Background(), message) + + this.So(err, should.BeNil) + this.So(this.firstMessageType(), should.Equal, "explicit.override") +} + +func (this *WriterFixture) TestWrite_PopulatesMessageIDFromAutoincrement() { + event1 := orderReceived{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC()} + event2 := orderReceived{AccountID: 1, OrderID: 3, Timestamp: time.Now().UTC()} + m1 := serializedMessage(event1, "") + m2 := serializedMessage(event2, "") + + err := this.subject.Write(context.Background(), m1, m2) + + this.So(err, should.BeNil) + this.So(m1.ID, should.NOT.Equal, uint64(0)) + this.So(m2.ID, should.Equal, m1.ID+this.testStride) +} + +func (this *WriterFixture) TestWrite_NoMessages_NoOp() { + err := this.subject.Write(context.Background()) + this.So(err, should.BeNil) + this.So(this.countMessages(), should.Equal, 0) +} + +func (this *WriterFixture) countMessages() int { + var count int + err := this.handle.QueryRow(`SELECT COUNT(*) FROM Messages`).Scan(&count) + this.So(err, should.BeNil) + return count +} +func (this *WriterFixture) firstMessageType() string { + var t string + err := this.handle.QueryRow(`SELECT type FROM Messages ORDER BY id LIMIT 1`).Scan(&t) + this.So(err, should.BeNil) + return t +} + +func testTypeNames() map[reflect.Type]string { + return map[reflect.Type]string{ + reflect.TypeOf(orderReceived{}): "order-received", + reflect.TypeOf(orderApproved{}): "order-approved", + } +} + +func serializedMessage(value any, typeOverride string) *harness.Message { + payload, _ := json.Marshal(value) + return &harness.Message{ + Value: value, + Type: typeOverride, + Content: bytes.NewBuffer(payload), + ContentType: "application/json", + } +} From 138e8abf58e7e356f29580b323cea19dfcc1df61 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Thu, 7 May 2026 17:02:03 -0600 Subject: [PATCH 03/50] Renaming --- handlers/harness/pipeline.go | 44 +++++++++++++++---------------- handlers/harness/pipeline_test.go | 40 ++++++++++++++-------------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/handlers/harness/pipeline.go b/handlers/harness/pipeline.go index bca8f88..a14073d 100644 --- a/handlers/harness/pipeline.go +++ b/handlers/harness/pipeline.go @@ -7,42 +7,42 @@ import ( "github.com/smarty/messaging/v3" ) -func build(ctx context.Context, cfg configuration) (messaging.Handler, []messaging.Listener) { +func build(ctx context.Context, config configuration) (messaging.Handler, []messaging.Listener) { var ( - batches = make(chan *batch, cfg.BatchCapacity) - work1 = make(chan *unitOfWork, cfg.BatchCapacity) - work2 = make(chan *unitOfWork, cfg.BatchCapacity) - work3 = make(chan *unitOfWork, cfg.BatchCapacity) - work4 = make(chan *unitOfWork, cfg.BatchCapacity) - work5 = make(chan *unitOfWork, cfg.BatchCapacity) + batches = make(chan *batch, config.BatchCapacity) + work1 = make(chan *unitOfWork, config.BatchCapacity) + work2 = make(chan *unitOfWork, config.BatchCapacity) + work3 = make(chan *unitOfWork, config.BatchCapacity) + work4 = make(chan *unitOfWork, config.BatchCapacity) + work5 = make(chan *unitOfWork, config.BatchCapacity) ) var ( - entry = newEntrypoint(cfg.Monitor, batches) - exec = newExecution(cfg.Monitor, cfg.UnitSize, batches, work1, newRouter(cfg.Types...)) - serializers = newFanOut(serializerFactory(cfg.Monitor, cfg.Serializer), cfg.SerializerCount, work1, work2) - persist = newPersistence(ctx, cfg.Monitor, work2, work3, cfg.Writer, time.Sleep) - complete = newCompletion(work3, work4) - bcast = newBroadcast(ctx, cfg.Monitor, work4, work5, cfg.Dispatcher, time.Sleep) - term = newTerminal(work5) + entrypoint = newEntrypoint(config.Monitor, batches) + executor = newExecution(config.Monitor, config.UnitSize, batches, work1, newRouter(config.Types...)) + serializers = newFanOut(serializationFactory(config.Monitor, config.Serializer), config.SerializerCount, work1, work2) + persistence = newPersistence(ctx, config.Monitor, work2, work3, config.Writer, time.Sleep) + completion = newCompletion(work3, work4) + broadcast = newBroadcast(ctx, config.Monitor, work4, work5, config.Dispatcher, time.Sleep) + terminal = newTerminal(work5) ) var listeners []messaging.Listener listeners = append(listeners, - entry, - exec, + entrypoint, + executor, ) listeners = append(listeners, serializers...) listeners = append(listeners, - persist, - complete, - bcast, - term, + persistence, + completion, + broadcast, + terminal, ) - return entry, listeners + return entrypoint, listeners } -func serializerFactory(monitor Monitor, enc serializer) stationFactory { +func serializationFactory(monitor Monitor, enc serializer) stationFactory { return func(in, out chan *unitOfWork) messaging.Listener { return newSerialization(monitor, enc, in, out) } diff --git a/handlers/harness/pipeline_test.go b/handlers/harness/pipeline_test.go index b1f39a6..295998e 100644 --- a/handlers/harness/pipeline_test.go +++ b/handlers/harness/pipeline_test.go @@ -21,19 +21,19 @@ type PipelineFixture struct { ctx context.Context handler messaging.Handler listeners []messaging.Listener - wg sync.WaitGroup + waiter sync.WaitGroup - executeMu sync.Mutex + executeLock sync.Mutex executeCalls []any executeOutputs [][]any - writeMu sync.Mutex + writeLock sync.Mutex writeCalls [][]any - dispatchMu sync.Mutex + dispatchLock sync.Mutex dispatchCalls [][]any - trackMu sync.Mutex - tracked []any + trackLock sync.Mutex + tracked []any } type commandType string @@ -48,15 +48,15 @@ func (this *PipelineFixture) Setup() { Options.Dispatcher(this), ) for _, listener := range this.listeners { - this.wg.Go(listener.Listen) + this.waiter.Go(listener.Listen) } } // ExecuteCommand is picked up by scan() as the Execute-prefixed method driving // the pipeline: every Handle(ctx, msg) call dispatches into this method. func (this *PipelineFixture) ExecuteCommand(_ commandType, broadcast func(...any)) { - this.executeMu.Lock() - defer this.executeMu.Unlock() + this.executeLock.Lock() + defer this.executeLock.Unlock() if len(this.executeOutputs) == 0 { return } @@ -68,9 +68,9 @@ func (this *PipelineFixture) ExecuteCommand(_ commandType, broadcast func(...any // Execute wraps the fixture so it also satisfies the package-private executor // interface directly — which is what scan() will pick up. We count calls here. func (this *PipelineFixture) Execute(message any, broadcast func(...any)) { - this.executeMu.Lock() + this.executeLock.Lock() this.executeCalls = append(this.executeCalls, message) - this.executeMu.Unlock() + this.executeLock.Unlock() this.ExecuteCommand(commandType(""), broadcast) } @@ -81,8 +81,8 @@ func (this *PipelineFixture) Serialize(out io.Writer, _ any) error { func (this *PipelineFixture) Write(ctx context.Context, messages ...any) error { this.So(ctx.Value("testing"), should.Equal, this.Name()) - this.writeMu.Lock() - defer this.writeMu.Unlock() + this.writeLock.Lock() + defer this.writeLock.Unlock() captured := make([]any, len(messages)) copy(captured, messages) this.writeCalls = append(this.writeCalls, captured) @@ -91,8 +91,8 @@ func (this *PipelineFixture) Write(ctx context.Context, messages ...any) error { func (this *PipelineFixture) Dispatch(ctx context.Context, messages ...any) error { this.So(ctx.Value("testing"), should.Equal, this.Name()) - this.dispatchMu.Lock() - defer this.dispatchMu.Unlock() + this.dispatchLock.Lock() + defer this.dispatchLock.Unlock() captured := make([]any, len(messages)) copy(captured, messages) this.dispatchCalls = append(this.dispatchCalls, captured) @@ -100,14 +100,14 @@ func (this *PipelineFixture) Dispatch(ctx context.Context, messages ...any) erro } func (this *PipelineFixture) Track(observation any) { - this.trackMu.Lock() - defer this.trackMu.Unlock() + this.trackLock.Lock() + defer this.trackLock.Unlock() this.tracked = append(this.tracked, observation) } func (this *PipelineFixture) countTracked() (batchInFlight, batchComplete, unitInFlight int) { - this.trackMu.Lock() - defer this.trackMu.Unlock() + this.trackLock.Lock() + defer this.trackLock.Unlock() for _, observation := range this.tracked { switch observation.(type) { case BatchInFlight: @@ -125,7 +125,7 @@ func (this *PipelineFixture) shutdown() { closer, ok := this.handler.(io.Closer) this.So(ok, better.BeTrue) this.So(closer.Close(), should.BeNil) - this.wg.Wait() + this.waiter.Wait() } func (this *PipelineFixture) TestPipelineRoutesMessageThroughExecutionPersistenceBroadcast() { From 52e52523adf8ad7afe6b63102525c3688d901b18 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Thu, 7 May 2026 17:02:16 -0600 Subject: [PATCH 04/50] TODO --- handlers/harness/message.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/handlers/harness/message.go b/handlers/harness/message.go index 5cec769..a3e5837 100644 --- a/handlers/harness/message.go +++ b/handlers/harness/message.go @@ -5,6 +5,8 @@ import ( "time" ) +// TODO: unexport? + type Message struct { // ID represents the unique ID of this message and its sequential place within a larger stream. ID uint64 From 4b2c4e5db5866eedf7b4a5cf58d0d8d213fb0761 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 11 May 2026 13:35:59 -0600 Subject: [PATCH 05/50] Change comment so GoLand doesn't flag NewWriter as deprecated. --- handlers/harness/sqladapter/writer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handlers/harness/sqladapter/writer.go b/handlers/harness/sqladapter/writer.go index 074607f..1b448ba 100644 --- a/handlers/harness/sqladapter/writer.go +++ b/handlers/harness/sqladapter/writer.go @@ -22,7 +22,7 @@ type Writer struct { // NewWriter builds a Writer that inserts rows into the `Messages` table and // invokes the supplied legacyWrite function inside the same transaction. // -// Deprecated: the legacyWrite escape hatch is retained for migration from +// Deprecation warning: the legacyWrite escape hatch is retained for migration from // other projects and will be removed in a later release; new callers // should supply a no-op function. func NewWriter(handle *sql.DB, typeNames map[reflect.Type]string, stride uint64, logger Logger, legacyWrite func(context.Context, *sql.Tx, ...any)) *Writer { From df3f6d1459d82c13d0a480c3bd4e3bedb7946170 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 12 May 2026 14:06:41 -0600 Subject: [PATCH 06/50] Remove TODO --- handlers/harness/message.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/handlers/harness/message.go b/handlers/harness/message.go index a3e5837..5cec769 100644 --- a/handlers/harness/message.go +++ b/handlers/harness/message.go @@ -5,8 +5,6 @@ import ( "time" ) -// TODO: unexport? - type Message struct { // ID represents the unique ID of this message and its sequential place within a larger stream. ID uint64 From ed3675ac125d3eea3326ef3395d0472b4cd1c4e8 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 12 May 2026 14:06:52 -0600 Subject: [PATCH 07/50] Simplify test wireup. --- handlers/harness/sqladapter/testdb_test.go | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/handlers/harness/sqladapter/testdb_test.go b/handlers/harness/sqladapter/testdb_test.go index b3bada4..b106ecc 100644 --- a/handlers/harness/sqladapter/testdb_test.go +++ b/handlers/harness/sqladapter/testdb_test.go @@ -1,7 +1,6 @@ package sqladapter import ( - "cmp" "database/sql" "fmt" "os" @@ -14,14 +13,7 @@ import ( // TODO: move away from environment variables for connection parameters (just use sensible defaults from db-connector) -// Integration tests in this package require a local MySQL server. Connection -// parameters are taken from environment variables (with sensible defaults): -// -// HARNESS_TEST_MYSQL_DSN — overrides the full DSN -// MYSQL_HOST — default 127.0.0.1 -// MYSQL_PORT — default 3306 -// MYSQL_USER — default root -// MYSQL_PASSWORD — default (empty) +// Integration tests in this package require a local MySQL server. // // Tests run against a throwaway schema (default `messaging_harness_test`) which // is dropped and re-created before each run. @@ -56,14 +48,7 @@ func openDSN(dsn string) (*sql.DB, error) { } func buildDSN(schema string) string { - if override := os.Getenv("HARNESS_TEST_MYSQL_DSN"); override != "" { - return override - } - host := cmp.Or(os.Getenv("MYSQL_HOST"), "127.0.0.1") - port := cmp.Or(os.Getenv("MYSQL_PORT"), "3306") - user := cmp.Or(os.Getenv("MYSQL_USER"), "root") - password := os.Getenv("MYSQL_PASSWORD") - return fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?multiStatements=true&parseTime=true", user, password, host, port, schema) + return fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?multiStatements=true&parseTime=true", "root", "", "127.0.0.1", "3306", schema) } func setupSchema(db *sql.DB) error { From 5d3aea10e690c394ddf8d86cd66095d03125918f Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 12 May 2026 14:13:48 -0600 Subject: [PATCH 08/50] Make target for database tests. --- Makefile | 10 ++++++++-- doc/docker-compose.yml | 13 +++++++++++++ sqlmq/_schema_mysql.sql => doc/mysql/schema.sql | 0 handlers/harness/sqladapter/dispatcher.go | 2 +- handlers/harness/sqladapter/testdb_test.go | 4 ++-- 5 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 doc/docker-compose.yml rename sqlmq/_schema_mysql.sql => doc/mysql/schema.sql (100%) diff --git a/Makefile b/Makefile index 9b27578..f18e7ae 100755 --- a/Makefile +++ b/Makefile @@ -1,10 +1,16 @@ #!/usr/bin/make -f +fmt: + go mod tidy && go fmt ./... + test: fmt go test -timeout=1s -short -race -covermode=atomic ./... -fmt: - go mod tidy && go fmt ./... +test.db: test + go test -timeout=30s -race -covermode=atomic github.com/smarty/messaging/v3/handlers/harness/sqladapter + +test.db.local: + (docker compose -f doc/docker-compose.yml up --wait && $(MAKE) test.db --no-print-directory); docker compose -f doc/docker-compose.yml down compile: go build ./... diff --git a/doc/docker-compose.yml b/doc/docker-compose.yml new file mode 100644 index 0000000..0e42119 --- /dev/null +++ b/doc/docker-compose.yml @@ -0,0 +1,13 @@ +services: + mysql: + image: mysql:8.0 + environment: + MYSQL_ALLOW_EMPTY_PASSWORD: 1 + MYSQL_DATABASE: accounting + ports: + - 3306:3306 + healthcheck: + test: "mysql -uroot -e 'select 1'" + interval: 2s + timeout: 20s + retries: 10 diff --git a/sqlmq/_schema_mysql.sql b/doc/mysql/schema.sql similarity index 100% rename from sqlmq/_schema_mysql.sql rename to doc/mysql/schema.sql diff --git a/handlers/harness/sqladapter/dispatcher.go b/handlers/harness/sqladapter/dispatcher.go index 1c877b6..ff93e2f 100644 --- a/handlers/harness/sqladapter/dispatcher.go +++ b/handlers/harness/sqladapter/dispatcher.go @@ -1,6 +1,6 @@ // Package sqladapter provides a reference implementation of the // handlers/harness Writer and Dispatcher interfaces, bound to the `Messages` -// MySQL table defined by sqlmq/_schema_mysql.sql in this module (columns +// MySQL table defined by doc/mysql/schema.sql in this module (columns // `id`, `dispatched`, `type`, `payload`). Callers running a different schema // should copy and adapt these types. package sqladapter diff --git a/handlers/harness/sqladapter/testdb_test.go b/handlers/harness/sqladapter/testdb_test.go index b106ecc..efd74f3 100644 --- a/handlers/harness/sqladapter/testdb_test.go +++ b/handlers/harness/sqladapter/testdb_test.go @@ -67,9 +67,9 @@ func setupSchema(db *sql.DB) error { return nil } -// findSchemaFile locates sqlmq/_schema_mysql.sql relative to this test file, +// findSchemaFile locates doc/mysql/schema.sql relative to this test file, // since tests run from the package directory regardless of CWD. func findSchemaFile() string { _, thisFile, _, _ := runtime.Caller(0) - return filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "sqlmq", "_schema_mysql.sql") + return filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "doc", "mysql", "schema.sql") } From c43b5bcafbd29ce87a82e91e7afad54efee5c800 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Wed, 13 May 2026 16:56:09 -0600 Subject: [PATCH 09/50] Remove TODO --- handlers/harness/sqladapter/testdb_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/handlers/harness/sqladapter/testdb_test.go b/handlers/harness/sqladapter/testdb_test.go index efd74f3..44f69f4 100644 --- a/handlers/harness/sqladapter/testdb_test.go +++ b/handlers/harness/sqladapter/testdb_test.go @@ -11,8 +11,6 @@ import ( _ "github.com/go-sql-driver/mysql" ) -// TODO: move away from environment variables for connection parameters (just use sensible defaults from db-connector) - // Integration tests in this package require a local MySQL server. // // Tests run against a throwaway schema (default `messaging_harness_test`) which From 0ab0a802708613738ae40e67654d1c745568e538 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Wed, 13 May 2026 17:15:22 -0600 Subject: [PATCH 10/50] Implement paging/batching for recovery routine. --- .../harness/sqladapter/dispatcher_test.go | 6 ++-- handlers/harness/sqladapter/recovery.go | 24 +++++++++---- handlers/harness/sqladapter/recovery_test.go | 36 +++++++++++++++++-- 3 files changed, 55 insertions(+), 11 deletions(-) diff --git a/handlers/harness/sqladapter/dispatcher_test.go b/handlers/harness/sqladapter/dispatcher_test.go index 41121b8..8e848ae 100644 --- a/handlers/harness/sqladapter/dispatcher_test.go +++ b/handlers/harness/sqladapter/dispatcher_test.go @@ -100,8 +100,9 @@ func (this *DispatcherFixture) dispatchedTimestamp(id uint64) *string { // stubConnector mimics a transport connector. type stubConnector struct { - published []messaging.Dispatch - writeErr error + published []messaging.Dispatch + writeBatches []int + writeErr error } func newStubConnector() *stubConnector { @@ -131,6 +132,7 @@ func (this *stubWriter) Write(ctx context.Context, dispatches ...messaging.Dispa if this.parent.writeErr != nil { return 0, this.parent.writeErr } + this.parent.writeBatches = append(this.parent.writeBatches, len(dispatches)) this.parent.published = append(this.parent.published, dispatches...) return len(dispatches), nil } diff --git a/handlers/harness/sqladapter/recovery.go b/handlers/harness/sqladapter/recovery.go index 25ccde9..a509ab2 100644 --- a/handlers/harness/sqladapter/recovery.go +++ b/handlers/harness/sqladapter/recovery.go @@ -9,13 +9,14 @@ import ( ) // Recover scans Messages WHERE dispatched IS NULL at startup, wraps each row -// as a *harness.Message, and feeds them through the Dispatcher (publish + mark dispatched). -// Intended to run synchronously during initialization, before the harness pipeline starts. +// as a *harness.Message, and feeds them through the Dispatcher (publish + mark dispatched) +// in pages of batchSize. Intended to run synchronously during initialization, before +// the harness pipeline starts. // -// TODO: pagination/streaming for large backlogs (currently loads everything into memory). // TODO: make this a Listener // TODO: retry w/ backoff -func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger Logger) error { +func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger Logger, batchSize int) error { + logger.Printf("[INFO] Recovering undispatched message(s) from previous run...") rows, err := handle.QueryContext(ctx, ` SELECT id, type, payload FROM Messages @@ -26,6 +27,7 @@ func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger } defer func() { _ = rows.Close() }() + var total int var messages []any for rows.Next() { var ( @@ -36,12 +38,21 @@ func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger if err := rows.Scan(&id, &typeName, &payload); err != nil { return err } + total++ messages = append(messages, &harness.Message{ ID: id, Type: typeName, Content: bytes.NewBuffer(payload), ContentType: "application/json", }) + if len(messages) >= batchSize { + err := dispatcher.Dispatch(ctx, messages...) + if err != nil { + return err + } + clear(messages) + messages = messages[:0] + } } if err := rows.Err(); err != nil { return err @@ -49,6 +60,7 @@ func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger if len(messages) == 0 { return nil } - logger.Printf("[INFO] Recovering %d undispatched message(s) from previous run.", len(messages)) - return dispatcher.Dispatch(ctx, messages...) + err = dispatcher.Dispatch(ctx, messages...) + logger.Printf("[INFO] Recovering %d total undispatched message(s) from previous run.", total) + return err } diff --git a/handlers/harness/sqladapter/recovery_test.go b/handlers/harness/sqladapter/recovery_test.go index 1fe5872..d6ae4d4 100644 --- a/handlers/harness/sqladapter/recovery_test.go +++ b/handlers/harness/sqladapter/recovery_test.go @@ -57,7 +57,7 @@ func (this *RecoveryFixture) seedDispatched(typeName string, payload string) uin } func (this *RecoveryFixture) TestRecover_NoOrphans_NoOp() { - err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0)) + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), 1024) this.So(err, should.BeNil) this.So(len(this.connector.published), should.Equal, 0) @@ -68,7 +68,7 @@ func (this *RecoveryFixture) TestRecover_DispatchesUndispatchedRowsInIDOrder() { id2 := this.seedUndispatched("order-approved", `{"order":2}`) _ = this.seedDispatched("order-received", `{"order":3}`) // already dispatched, must be skipped - err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0)) + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), 1024) this.So(err, should.BeNil) this.So(len(this.connector.published), should.Equal, 2) @@ -79,7 +79,7 @@ func (this *RecoveryFixture) TestRecover_DispatchesUndispatchedRowsInIDOrder() { func (this *RecoveryFixture) TestRecover_PassesPayloadAndTypeIntoMessage() { this.seedUndispatched("order-received", `{"order":1}`) - err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0)) + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), 1024) this.So(err, should.BeNil) this.So(len(this.connector.published), should.Equal, 1) @@ -87,6 +87,36 @@ func (this *RecoveryFixture) TestRecover_PassesPayloadAndTypeIntoMessage() { this.So(dispatch.Durable, should.BeTrue) } +func (this *RecoveryFixture) TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll() { + const batchSize = 3 + const total = 7 // 3 + 3 + 1 + ids := make([]uint64, 0, total) + for i := 0; i < total; i++ { + ids = append(ids, this.seedUndispatched("order-received", `{}`)) + } + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), batchSize) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, total) + for _, id := range ids { + this.So(this.dispatchedTimestamp(id), should.NOT.BeNil) + } +} + +func (this *RecoveryFixture) TestRecover_RowCountIsMultipleOfBatchSize_FlushesUniformBatches() { + const batchSize = 3 + const total = 6 // exactly 2 batches of 3 + for i := 0; i < total; i++ { + this.seedUndispatched("order-received", `{}`) + } + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), batchSize) + + this.So(err, should.BeNil) + this.So(this.connector.writeBatches, should.Equal, []int{3, 3}) +} + func (this *RecoveryFixture) dispatchedTimestamp(id uint64) *string { var dispatched sql.NullString err := this.handle.QueryRow(`SELECT dispatched FROM Messages WHERE id = ?`, id).Scan(&dispatched) From 8f6c64487fd04d551687632cebd7c9e84e779e6e Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 18 May 2026 16:03:14 -0600 Subject: [PATCH 11/50] A diagram of the pipeline. --- .../2026-05-14_pipeline-component-diagram.svg | 222 ++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg diff --git a/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg b/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg new file mode 100644 index 0000000..8371f00 --- /dev/null +++ b/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg @@ -0,0 +1,222 @@ + + + + + + + + + + + + + + + + + + + + Caller + Handle(ctx, messages...) + + + + + + 00 entrypoint + wraps messages → *batch + tracks BatchInFlight + + + + + + batches + chan *batch + + + + + + 01 execution + router.Execute(msg, broadcast) + accumulates *Message → *unitOfWork + + + + + + work1 + chan *unitOfWork + + + + + + 02 serialization + Serializer.Serialize(out, value) + fan-out workers + fanIn (not shown) + + + + + + work2 + chan *unitOfWork + + + + + + 03 persistence + Writer.Write(ctx, msgs...) + retry forever w/ sleep + + + + + + work3 + chan *unitOfWork + + + + + + 04 completion + invokes per-batch complete() + tracks BatchComplete + + + + + + work4 + chan *unitOfWork + + + + + + 05 broadcast + Dispatcher.Dispatch(ctx, msgs...) + retry forever w/ sleep + + + + + + work5 + chan *unitOfWork + + + + + + 06 terminal + drains the final channel + + + + + + + + + + + + + + + + + + + + + + + + + router + Execute*/Apply* methods + discovered from Options.Types + + + + + + Serializer + caller-supplied + Serialize(io.Writer, any) error + + + + + + Writer + caller-supplied + Write(ctx, ...any) error + + + + + + Dispatcher + caller-supplied + Dispatch(ctx, ...any) error + + + + + + + + + + + + + + + Monitor + Track(observation) + Batch{In,Complete} + UnitOfWork{In,Complete} + SerializationError + PersistenceError + BroadcastError + observed by all stages + + + + + + + + + + + + + + + complete() releases caller's WaitGroup + + + Channel buffer capacity = config.BatchCapacity (default 1024) + From 41ac49256b27e6320321f89f0437ee1f94fda652 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Fri, 29 May 2026 11:28:30 -0600 Subject: [PATCH 12/50] Draft of proposal to split HTTP pathway from RMQ. --- ...posal-harness-resilience-module-changes.md | 493 ++++++++++++++++++ 1 file changed, 493 insertions(+) create mode 100644 doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md diff --git a/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md new file mode 100644 index 0000000..f08acee --- /dev/null +++ b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md @@ -0,0 +1,493 @@ +--- +name: Harness pipeline resilience (module-local changes) +description: Implement the messaging/v3 harness side of the cross-repo "harness resilience and idempotency" proposal — split-path entrypoint (Handle vs HandleResult), depth-based admission shedding for HTTP, and split channel-buffer sizing (BatchCapacity vs UnitCapacity). Excludes billing-context HTTP shell changes and post-deploy observation, which belong to the consuming repo. +type: plot +--- + +# Proposal: Harness Pipeline Resilience — Module-Local Changes + +## Background + +The cross-repo proposal at +`billing-context/.../2026-05-15_23-02-32-proposal-harness-resilience-and-idempotency.md` +describes three operational changes layered on top of Chunk C of the incremental +domain transformation. Two of those changes ("two-path entrypoint" and "split +channel buffer sizing") live entirely inside this module +(`github.com/smarty/messaging/v3`); a third change ("HTTP shell outcome +mapping") lives in the consuming repo (`billing-context`) and is out of scope +here. + +This proposal scopes the work to just the parts that land in *this* repo: + +- All edits under `handlers/harness/` (entrypoint, contracts, config, pipeline, + fanout, and their tests). +- A documentation update to `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` + to reflect the split capacity knobs and the new monitor observations. + +The HTTP shell wireup, integration tests, error definitions, monitor metric +registration, and post-deploy observation steps from the parent proposal happen +in `billing-context` *after* this repo's changes merge and a tagged version of +`messaging/v3` is published. They are explicitly **out of scope** for this +proposal — they will be picked up under a follow-on session in that repo. + +### Why the changes are needed (recap) + +Two operational concerns surfaced after Chunks A and B of the incremental +domain transformation merged: + +1. **HTTP requests stack up indefinitely during a database/RabbitMQ outage.** + `entrypoint.Handle` blocks on a per-call `sync.WaitGroup` until the + Completion stage fires; the caller's `context.Context` is captured into + `*batch` but never observed. Even when the HTTP client's deadline passes or + the load balancer cancels the request, the goroutine stays parked. +2. **The pipeline's six channels are all sized to one knob (`BatchCapacity`, + default 1024).** During an outage that lets tens of thousands of in-memory + domain mutations sit between Apply and durable storage. The in-memory + `Domain` drifts far ahead of what was ever stored, surfacing as "we said yes + to the client, then forgot" on restart. + +The harness-side fix has three independently-mergeable pieces: + +1. **Split `Handle` into two paths**: the existing `Handle(ctx, msgs...)` for + MQ/cron callers (preserves today's blocking, contract-honoring behavior), + and a new `HandleResult(ctx, message any) HandleOutcome` for HTTP callers + (honors `ctx.Done()`, applies admission shedding, returns a status outcome, + and accepts exactly one message per call). +2. **Depth-based admission inside `HandleResult`** — high-watermark check plus + hard-full backstop, both HTTP-only. +3. **Split channel-buffer sizing** — `BatchCapacity` continues to size the + caller-side `batches` channel; a new `UnitCapacity` (default 1) sizes + `work1`–`work5` and the per-worker fan-out outputs. + +The companion `AdjustOrder` domain idempotency change (separate proposal, +already merged in the consuming repo) makes it safe for `HandleResult` to +return early when the caller's `ctx` fires: the in-flight batch keeps +processing and durably stores, and a client retry collapses to a no-op once +the original batch has persisted. + +## Approach + +### Decision summary + +Two distinct, parallel methods on `*entrypoint`: + +| Method | Caller | Honors `ctx.Done()` | Applies shed | Return value | Message arity | +|---------------------------------------------------|----------|---------------------|--------------|-------------------------|-------------------| +| `Handle(ctx, messages ...any)` | MQ, cron | No | No | none (today's contract) | variadic (1..N) | +| `HandleResult(ctx, message any) HandleOutcome` | HTTP | Yes | Yes | `HandleOutcome` | exactly one | + +`HandleResult` takes a single `message any` (not variadic) because every HTTP +route in the consuming repo invokes the domain with exactly one command per +request. Constraining the signature at the entrypoint: + +- Eliminates the empty-slice / multi-message edge cases on the HTTP path + (no need to defend against `HandleResult(ctx)` or `HandleResult(ctx, a, b)` + in either the entrypoint or any call site). +- Tightens the worst-case in-memory work-in-progress bound: combined with + `UnitCapacity=1`, each in-flight HTTP request contributes exactly one input + message to a batch rather than potentially many. The `batches` channel + capacity now corresponds directly to a count of HTTP commands enqueued + rather than a count of caller invocations of arbitrary size. +- Surfaces the asymmetry plainly in the type system — MQ/cron may legitimately + need to deliver multiple events per call (e.g. broker batch deliveries), + HTTP does not. + +`HandleOutcome` is an `int`-backed enum: + +| Value | Meaning | +|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `OutcomeAccepted` (0) | Batch admitted *and* completion fired before the caller's ctx expired. Mutation durably stored. | +| `OutcomeShed` (1) | Batch refused at admission (high-watermark, hard-full, or pipeline closed). Pipeline never saw the batch. | +| `OutcomeCallerDeparted` (2) | Batch admitted but the caller's `ctx` fired before completion. Pipeline still owns the batch and will durably store; caller does not know storage status yet. | + +Two new monitor observations: + +- `LoadShed{}` — emitted by `HandleResult` when admission is rejected. +- `CallerDeparted{}` — emitted by `HandleResult` when the caller's `ctx` fired + before completion. + +Two new configuration options with defaults: + +- `Options.UnitCapacity(int)` — default `1`. Sizes `work1`–`work5` and the + per-worker fan-out outputs. +- `Options.ShedThreshold(float64)` — default `0.80`. Fraction of the `batches` + channel capacity past which `HandleResult` sheds. + +### Detailed design + +#### 1. Two-path entrypoint with shared helpers + +Today's `00_entrypoint.go:29` has a single `Handle` whose body inlines waiter +acquisition, batch allocation, completion-callback wiring, the +admission-under-RWMutex sequence, and the `waiter.Wait()` block. The split +extracts three private helpers and gives each method its own admission and +waiting logic. The `prepare` helper keeps its variadic `...any` shape so +`Handle` can pass its argument through verbatim; `HandleResult` calls +`prepare(ctx, message)` with its single message, which Go promotes to a +one-element slice at the call site. + +```go +// prepare acquires a waiter, allocates a *batch from the pool, and wires up +// the completion callback. Returns the items the caller will need. +func (this *entrypoint) prepare(ctx context.Context, messages ...any) (waiter *sync.WaitGroup, item *batch) { + waiter = this.waiters.Get() + waiter.Add(1) + item = this.batches.Get() + item.ctx = ctx + item.messages = messages + item.complete = func() { + waiter.Done() + this.monitor.Track(batchComplete) + this.batches.Put(item) + } + return waiter, item +} + +// abandon releases waiter and pool entry when the item was never enqueued +// (i.e. complete() will never fire). +func (this *entrypoint) abandon(waiter *sync.WaitGroup, item *batch) { + waiter.Done() + this.batches.Put(item) +} + +// waiterDone wraps waiter.Wait() in a chan struct{} so it's select-able. +func (this *entrypoint) waiterDone(waiter *sync.WaitGroup) (done chan struct{}) { + done = make(chan struct{}) + go func() { waiter.Wait(); close(done) }() + return done +} +``` + +**Path A — `Handle` (MQ and cron):** preserves today's contract verbatim. + +```go +func (this *entrypoint) Handle(ctx context.Context, messages ...any) { + waiter, item := this.prepare(ctx, messages...) + defer this.waiters.Put(waiter) + + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + this.abandon(waiter, item) + return + } + this.work <- item + this.monitor.Track(batchInFlight) + this.lock.RUnlock() + + waiter.Wait() +} +``` + +Properties: +- **No `ctx.Done()` honoring.** MQ deliveries don't carry a client deadline; + cron has its own scheduler-level guard. Returning early would cause + `streaming` to ack work that the pipeline never finished. +- **No load-shed.** Sending to `this.work` is a blocking channel send. + Back-pressure naturally propagates to the broker via prefetch limits and + unacked-message counts. +- **Hard-full backstop is gone for this path.** The only "shed" condition is + pipeline shutdown (`this.closed`) — exactly today's behavior. + +**Path B — `HandleResult` (HTTP):** new behavior, accepts a single message. + +```go +func (this *entrypoint) HandleResult(ctx context.Context, message any) HandleOutcome { + waiter, item := this.prepare(ctx, message) + defer this.waiters.Put(waiter) + + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + this.abandon(waiter, item) + return OutcomeShed + } + if float64(len(this.work))/float64(cap(this.work)) >= this.shedThreshold { + this.lock.RUnlock() + this.abandon(waiter, item) + this.monitor.Track(loadShed) + return OutcomeShed + } + select { + case this.work <- item: + this.monitor.Track(batchInFlight) + default: + this.lock.RUnlock() + this.abandon(waiter, item) + this.monitor.Track(loadShed) + return OutcomeShed + } + this.lock.RUnlock() + + select { + case <-this.waiterDone(waiter): + return OutcomeAccepted + case <-ctx.Done(): + this.monitor.Track(callerDeparted) + return OutcomeCallerDeparted + } +} +``` + +Properties: +- **Single message per call.** The signature accepts exactly one `message any`, + matching every HTTP-driven domain command in the consuming repo. +- **Honors `ctx.Done()`** — HTTP request deadlines unblock the caller goroutine. +- **Applies high-watermark + hard-full shed** — fast-fail for HTTP without + blocking. +- **Outcome-returning** — caller (HTTP shell, in another repo) maps to + status codes. + +**Pool-entry lifecycle (consistent across both paths):** +- Success path: pipeline's Completion stage invokes `item.complete()`, which + calls `this.batches.Put(item)`. Neither method puts the item back itself. +- Caller-departed path (only reachable in `HandleResult`): the pipeline still + owns the item and will eventually invoke `item.complete()`. `HandleResult` + must not `Put` — the pool would receive the same item twice. +- Shed and closed-pipeline paths: the item was never enqueued, so `complete()` + will never fire. `abandon(waiter, item)` does the cleanup. + +**Critically, the batch is not abandoned by the pipeline when the caller +departs.** When `OutcomeCallerDeparted` is returned, the in-flight batch +keeps processing; `complete()` still fires; persistence still happens. Only +the HTTP caller's goroutine returns early. + +#### 2. Depth-based admission (HTTP-only, lives inside `HandleResult`) + +The high-watermark check (`len/cap >= shedThreshold`) and the hard-full +backstop (the `default` arm of the `select` on `this.work <- item`) both live +inside `HandleResult`. `Handle` (MQ/cron) implements neither — see §1 for why. + +`Options.ShedThreshold(value float64)` exposes the threshold; default `0.80`. +Setting it ≥ `1.0` disables high-watermark shedding (only the hard-full +backstop remains, also HTTP-only). + +The `len(chan)/cap(chan)` snapshot races with concurrent producers/consumers; +under heavy concurrency the threshold can be exceeded momentarily before the +next admission check fires. This is acceptable — the threshold is a soft +signal, not a hard limit; the hard backstop is the channel-full `default` +branch. + +#### 3. Split channel buffer sizing + +Today (`pipeline.go:11-18`): + +```go +batches = make(chan *batch, config.BatchCapacity) +work1 = make(chan *unitOfWork, config.BatchCapacity) +// ... work2..work5 same +``` + +Proposed: + +```go +batches = make(chan *batch, config.BatchCapacity) +work1 = make(chan *unitOfWork, config.UnitCapacity) +work2 = make(chan *unitOfWork, config.UnitCapacity) +work3 = make(chan *unitOfWork, config.UnitCapacity) +work4 = make(chan *unitOfWork, config.UnitCapacity) +work5 = make(chan *unitOfWork, config.UnitCapacity) +``` + +And in `fanout.go:17`, the per-worker output channels (currently hardcoded +to 1024) become `make(chan *unitOfWork, unitCapacity)` where `unitCapacity` is +threaded through `newFanOut`'s signature (or via the existing +`stationFactory` closure — to be decided at implementation time, lower-blast-radius +option preferred). + +`UnitCapacity` defaults to 1. Tunable via `Options.UnitCapacity(value int)`. +Setting it equal to `BatchCapacity` reproduces today's behavior. + +**Why default 1, not 0?** Fully unbuffered channels turn every stage handoff +into a synchronization barrier — stage N can't begin unit N+1 until stage N+1 +has received unit N. Buffer-1 lets stage N finish unit N+1 *while* stage N+1 +is processing unit N. Pipelining benefit saturates at depth ~1 since each +stage runs in a single goroutine (except serialization, which has its own +fan-out concurrency). + +**Bound on in-memory drift during an outage** — with `UnitCapacity=1` and +5 channels post-domain, plus the in-flight unit at each stage, the worst case +is ~10 units' worth of unpersisted mutations. At `UnitSize=64` that's +~640 batches' worth of broadcast results downstream of Execution. + +The single-message `HandleResult` signature *also* tightens the upstream +side: each HTTP-admitted batch on the `batches` channel now carries exactly +one input message rather than potentially many, so `BatchCapacity` becomes a +direct count of in-flight HTTP commands rather than a count of caller +invocations of arbitrary fan-out. Combined with the `UnitCapacity=1` +post-domain default, the total worst-case in-memory drift is meaningfully +smaller than the previous draft's estimate, while throughput pipelining is +preserved. + +### Non-goals + +- **Rewriting the pipeline.** The structure (Entrypoint → Execution → + Serialization → Persistence → Completion → Broadcast → Terminal) is + preserved verbatim. +- **Changing `messaging.Handler`.** `Handle(ctx, messages ...any)` keeps its + exact existing contract. `HandleResult` is a new method, not a replacement, + and intentionally has a different signature (single message, returning an + outcome). +- **Domain-layer changes.** The companion `AdjustOrder` idempotency change is + in another repo and is assumed merged before any consumer relies on + `OutcomeCallerDeparted`. Nothing in this proposal touches domain code. +- **Wireup/HTTP shell changes.** Phase 2 and Phase 4 of the parent proposal + live in `billing-context`. Out of scope here. +- **`harness/sqladapter` changes.** No code changes; just a regression check + via `go test`. + +### Files modified (this repo only) + +| Path | Action | Purpose | +|------------------------------------------|--------|---------------------------------------------------------------------------------------------------------------------------------------| +| `handlers/harness/00_entrypoint.go` | Modify | Split into two paths: `Handle` (MQ/cron, blocking, no shed, variadic) and `HandleResult` (HTTP, ctx-aware, shed-aware, single message); shared helpers | +| `handlers/harness/contracts.go` | Modify | New `HandleOutcome` enum; new `LoadShed`, `CallerDeparted` event types and unexported sentinel values | +| `handlers/harness/config.go` | Modify | `Options.UnitCapacity(int)`, `Options.ShedThreshold(float64)`; defaults 1, 0.80 | +| `handlers/harness/pipeline.go` | Modify | Use `UnitCapacity` for `work1`–`work5`; pass it into `newFanOut`; thread `ShedThreshold` into `newEntrypoint` | +| `handlers/harness/fanout.go` | Modify | Accept `unitCapacity` and use it for the per-worker output channels instead of the hardcoded 1024 | +| `handlers/harness/00_entrypoint_test.go` | Modify | New tests for context-aware return, shed behavior, outcome reporting (single-message API), plus pinning tests for the existing `Handle` contract | +| `handlers/harness/config_test.go` | Modify | Assert defaults for `UnitCapacity`, `ShedThreshold`; assert override setters | +| `handlers/harness/pipeline_test.go` | Modify | Adjust assertions if any depend on default channel sizes (none expected to break — defaults preserve external observable behavior) | +| `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` | Modify | Reflect split `BatchCapacity`/`UnitCapacity` knobs, `LoadShed`/`CallerDeparted` Monitor observations, and `HandleOutcome` return path | + +### Alternatives considered + +- **Keep `HandleResult` variadic to mirror `Handle`.** Rejected — every HTTP + route invokes the domain with exactly one command, so a variadic signature + would invite empty-call and multi-call bugs at the boundary, and would + loosen the WIP bound that `UnitCapacity=1` is meant to enforce. A + single-`message any` parameter is both ergonomic at the HTTP call site and + load-bearing for the resilience story. +- **Single shared method that branches on caller type via a `ctx` value or + `Options.Source`.** Rejected — `streaming` acks unconditionally on clean + `Handle` return, so an MQ-side shed-then-return would silently drop + messages. The two paths require fundamentally different behavior, not + different parameter values. Two methods make the contract visible at every + call site. +- **Drop the `default` arm in `HandleResult`'s `select` and rely solely on + high-watermark shedding.** Rejected — keep `default` as a hard backstop in + case `shedThreshold` is misconfigured. +- **Apply shed to MQ as well, and panic on shed so the broker redelivers + after reconnect.** Rejected — noisy, tangles error handling, and produces + alarming log output in normal load-shed conditions. The blocking-channel-send + back-pressure path through MQ prefetch is the standard RabbitMQ flow-control + mechanism and works without instrumentation changes. +- **Add nack/error return to `messaging.Handler.Handle` itself.** Rejected as + out-of-scope — would touch every existing `messaging.Handler` implementation + across all consumers. A future messaging-library-level change could add + this; this proposal does not. +- **Inject a per-batch `ctx` through Persistence and Broadcast.** Rejected. + Per-batch ctx in retry-forever stages would unwind partially-completed work + and break the durability principle. The pipeline ctx (`harness.New(ctx, …)`) + is the right scope for those stages. +- **Keep `BatchCapacity` sizing all channels uniformly.** Rejected — preserves + the in-memory drift problem during outages. + +## Trade-offs & Risks + +- **`HandleResult` exposes a per-call result outside the `messaging.Handler` + abstraction.** Acceptable because the harness already exports `Monitor`, + `Writer`, `Dispatcher`, etc. — the package is explicitly the "single + ingress" abstraction. The new `HandleOutcome` type and the new method live + on `*entrypoint` and can be retrieved via type assertion or via a + small interface that the consumer defines. +- **Single-message HTTP signature is a hard constraint.** A hypothetical + future HTTP route that needed to submit multiple commands atomically would + not fit. Acceptable today — every existing HTTP route in `billing-context` + invokes the domain with exactly one command — and reversible later if + needed (the signature can grow back to variadic without breaking existing + call sites by switching to a slice or by introducing a sibling + `HandleResultBatch` method). +- **`waiterDone` allocates a goroutine and a channel per `HandleResult` call.** + This is on the HTTP path only and only when the call actually waits (i.e. is + not shed). The cost is a few hundred bytes and one goroutine for the + duration of the in-flight batch — well within the budget of an HTTP request. +- **`UnitCapacity=1` reduces normal-throughput headroom slightly.** Pipelining + is preserved (depth-1 buffer between stages) but bursty workloads that + previously absorbed into deep buffers will now apply backpressure earlier. + Mitigation: configurable; a deployment that prefers the old throughput + characteristic can set `Options.UnitCapacity(1024)`. +- **Caller-departed batches keep doing work the caller no longer cares about.** + Intentional — matches the durability principle. Combined with the merged + domain-layer idempotency change (in `billing-context`), repeated retries + collapse to no-ops after the first applies. From this module's perspective + this is purely a contract guarantee: "we will not unwind the in-flight + batch when the caller departs." +- **The shed-threshold as a fraction is inexact.** `len(chan)/cap(chan)` is + a snapshot that races with concurrent producers/consumers; under heavy + concurrency we can exceed the threshold momentarily before the next + admission check fires. Acceptable — soft signal, not a hard limit; hard + backstop is the channel-full `default` branch. +- **`Handle` and `HandleResult` share state (`this.work`, `this.lock`, + `this.closed`).** Two paths writing to the same channel under the same + RWMutex is fine; race-free under `-race`. Tests must cover the case where + both paths interleave on a shrunk-`BatchCapacity` fixture. +- **Cross-repo coordination.** This module's changes are backward-compatible + (new options have defaults; new method doesn't break existing + `messaging.Handler` callers). A consumer that doesn't yet adopt + `HandleResult` keeps working unchanged. The consuming repo's adoption is + sequenced after a tagged release. +- **Diagram drift.** The pipeline diagram is the canonical visual reference; + if the SVG isn't updated alongside the code, reviewers will form a stale + mental model. Mitigation: diagram update is in the checklist. + +## Implementation Checklist + +### Phase 1: Configuration plumbing (red/green) + +- [ ] Edit `handlers/harness/config_test.go` (`TestDefaultsPopulateCapacities`) to also assert `cfg.UnitCapacity == 1` and `cfg.ShedThreshold == 0.80`. Run `make test` — confirm failure (fields don't exist yet → compile error). +- [ ] Edit `handlers/harness/config_test.go` (`TestTunableOptionsOverrideDefaults`) to also exercise `Options.UnitCapacity(2)` and `Options.ShedThreshold(0.5)` and assert the values stick. Compile error still expected. +- [ ] Edit `handlers/harness/config.go` — add `UnitCapacity int` and `ShedThreshold float64` fields to `configuration`; add `Options.UnitCapacity(int)` and `Options.ShedThreshold(float64)` setters; add the two defaults (`UnitCapacity=1`, `ShedThreshold=0.80`) to `Options.defaults(...)`. +- [ ] Run `make test` — confirm config tests pass. + +### Phase 2: Pipeline rewires for split capacity (red/green) + +- [ ] Edit `handlers/harness/pipeline.go` — change `work1`–`work5` to `make(chan *unitOfWork, config.UnitCapacity)`; thread `config.UnitCapacity` into the `newFanOut` call. +- [ ] Edit `handlers/harness/fanout.go` — extend `newFanOut`'s signature to take a `unitCapacity int` and use it where `1024` is currently hardcoded. +- [ ] Run `make test` — pipeline tests should still pass under the new defaults; if any test depends on the old 1024 buffer it should be updated to set `Options.UnitCapacity(1024)` explicitly. + +### Phase 3: Monitor observations and outcome enum + +- [ ] Edit `handlers/harness/contracts.go` — add `HandleOutcome` type (`int`-backed enum: `OutcomeAccepted=0`, `OutcomeShed=1`, `OutcomeCallerDeparted=2`); add `LoadShed struct{}` and `CallerDeparted struct{}` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. +- [ ] Edit `handlers/harness/00_entrypoint.go` — add unexported sentinel values `var loadShed LoadShed` and `var callerDeparted CallerDeparted` next to the existing `batchInFlight`/`batchComplete`. +- [ ] Run `make test` — confirm the existing suite still compiles and passes. + +### Phase 4: Extract shared helpers (pure refactor — keep `Handle` behavior identical) + +- [ ] Refactor `handlers/harness/00_entrypoint.go` to extract `prepare(ctx, messages ...any) (*sync.WaitGroup, *batch)`, `abandon(waiter, item)`, and `waiterDone(waiter) chan struct{}` helpers; rewrite `Handle`'s body in terms of `prepare(ctx, messages...)` and the existing admission logic so it is observably identical. +- [ ] Run `make test` — all existing tests must still pass; this step changes no externally observable behavior. + +### Phase 5: Add `HandleResult` (TDD, HTTP path, single-message signature) + +- [ ] In `handlers/harness/00_entrypoint_test.go`, add `TestHandleResult_OutcomeAcceptedOnSuccess` — call `HandleResult(ctx, "msg")` with a single message; let the pipeline complete; assert outcome is `OutcomeAccepted`. Run `make test` — confirm failure (no `HandleResult` method yet → compile error). +- [ ] Add `HandleResult(ctx context.Context, message any) HandleOutcome` method on `*entrypoint` with the body shown in §1 of Approach. The method calls `this.prepare(ctx, message)` (Go promotes the single argument into the helper's variadic slot). Note: the entrypoint must hold its `shedThreshold` field — wire it through `newEntrypoint` from `pipeline.go`. Run `make test` — confirm `OutcomeAcceptedOnSuccess` passes. +- [ ] Add `TestHandleResult_OutcomeCallerDepartedOnContextCancel` — fixture with a writer that blocks forever; cancel the caller's `ctx`; assert outcome is `OutcomeCallerDeparted` and Monitor sees `CallerDeparted{}`. Run — confirm passing. +- [ ] Add `TestHandleResult_OutcomeShedAtHighWatermark` — fixture with `BatchCapacity=10`, `ShedThreshold=0.5`, writer that blocks forever; submit 5 messages via `HandleResult` (each call carries one message); assert the 6th-and-beyond return `OutcomeShed` and Monitor sees `LoadShed{}`. Run — confirm passing. +- [ ] Add `TestHandleResult_OutcomeShedOnHardFull` — `ShedThreshold=2.0` (high-watermark disabled); writer blocks; submit `BatchCapacity+1` messages via `HandleResult` (one per call); assert the overflowing one returns `OutcomeShed`. Run — confirm passing. +- [ ] Add `TestHandleResult_OutcomeShedReleasesPoolEntry` — submit a message that gets shed via `HandleResult`; assert the entrypoint's `batches` pool has the same length before and after (no leak). Run — confirm passing. +- [ ] Add `TestHandleResult_ClosedPipelineReturnsShed` — close the entrypoint; call `HandleResult(ctx, "msg")`; assert outcome is `OutcomeShed` and the pool entry is returned. Run — confirm passing. +- [ ] Add `TestHandleResult_BatchCarriesExactlyOneMessage` — `HandleResult(ctx, "only")`; intercept the resulting `*batch` on the work channel; assert `len(item.messages) == 1` and `item.messages[0] == "only"`. Run — confirm passing (this pins the single-message contract). + +### Phase 6: Pin the existing `Handle` contract (TDD, MQ/cron path) + +- [ ] Add `TestHandle_BlocksUntilDurable` — submit a batch via `Handle`; the writer takes a controlled delay to acknowledge; assert `Handle` returns only after the writer completes. Run — confirm passing (this pins the contract `streaming` depends on). +- [ ] Add `TestHandle_DoesNotShedAtHighWatermark` — fixture with `BatchCapacity=2`, `ShedThreshold=0.5`, writer that blocks forever; submit 5 batches via `Handle` (each in its own goroutine); assert all 5 are blocked (none returned). After unblocking the writer, all 5 should eventually return. Run — confirm passing. +- [ ] Add `TestHandle_DoesNotShedOnHardFull` — `BatchCapacity=2`; submit 3 batches via `Handle` from separate goroutines; assert the third blocks on the channel send rather than returning early. Run — confirm passing. +- [ ] Add `TestHandle_IgnoresContextCancel` — submit a batch via `Handle`; cancel the ctx; assert `Handle` is still blocked until the pipeline completes the batch. Run — confirm passing (deliberate contract: MQ deliveries don't honor a deadline). +- [ ] Add `TestHandle_ReturnsImmediatelyOnClosedPipeline` — close the entrypoint; call `Handle`; assert it returns within a few milliseconds (no panic, no block). Run — confirm passing (this is today's behavior, just being pinned). +- [ ] Add `TestHandle_PreservesVariadicMessages` — `Handle(ctx, "a", "b", "c")`; intercept the resulting `*batch`; assert `len(item.messages) == 3`. Run — confirm passing (pins that the variadic contract still works after the `prepare` refactor). + +### Phase 7: Race and integration sanity + +- [ ] Run the full harness test suite under `-race`: `go test -race ./handlers/harness/...`. Confirm green. +- [ ] Run `make test` (the project-level entry point that also runs `go mod tidy`, `go fmt ./...`). Confirm green. +- [ ] Run `go test ./handlers/harness/sqladapter/...` against a live MySQL (drop `-short` if present, or use the project's `make test.db.local`-equivalent). Confirm no regressions in the SQL adapter. + +### Phase 8: Documentation + +- [ ] Update `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` to reflect: split `BatchCapacity`/`UnitCapacity` knobs, `LoadShed`/`CallerDeparted` Monitor observations, and the `HandleResult(ctx, message any) → HandleOutcome` return path. (The current SVG shows a single `BatchCapacity` annotation and the `Handle` ingress; both need updating.) +- [ ] Self-review the diff: confirm no `messaging.Handler` callers were inadvertently broken; confirm `Options.UnitCapacity(1024)` (the old default) reproduces today's runtime if a user wants it; confirm no domain code or sqladapter code was touched. + +### Out of scope (handled in `billing-context` follow-on) + +- HTTP shell wireup, error definitions, integration tests, `mapOutcome` helper, application-side monitor metric registration, post-deploy observation drills. These will be addressed in a separate proposal/session in the consuming repo after this module's changes merge and a version is tagged. From 742d4258c65dc324b92c43d1c2487b87bba20f1e Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Fri, 29 May 2026 15:31:33 -0600 Subject: [PATCH 13/50] New draft of proposal: HandleResult -> Await (void) --- ...posal-harness-resilience-module-changes.md | 717 +++++++++++------- 1 file changed, 433 insertions(+), 284 deletions(-) diff --git a/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md index f08acee..6cf9bb1 100644 --- a/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md +++ b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md @@ -1,6 +1,6 @@ --- name: Harness pipeline resilience (module-local changes) -description: Implement the messaging/v3 harness side of the cross-repo "harness resilience and idempotency" proposal — split-path entrypoint (Handle vs HandleResult), depth-based admission shedding for HTTP, and split channel-buffer sizing (BatchCapacity vs UnitCapacity). Excludes billing-context HTTP shell changes and post-deploy observation, which belong to the consuming repo. +description: Implement the messaging/v3 harness side of the cross-repo "harness resilience and idempotency" proposal — a void context-honoring HTTP entrypoint (Await) alongside today's Handle, a pre-flight admission gate (Admit) plus an in-module net/http shedding middleware (no scuter), a thin AsHTTPHandler decorator so HTTP shells need no changes, and split channel-buffer sizing (BatchCapacity vs UnitCapacity). Excludes per-service route wireup and post-deploy observation, which belong to the consuming repos. type: plot --- @@ -11,24 +11,24 @@ type: plot The cross-repo proposal at `billing-context/.../2026-05-15_23-02-32-proposal-harness-resilience-and-idempotency.md` describes three operational changes layered on top of Chunk C of the incremental -domain transformation. Two of those changes ("two-path entrypoint" and "split -channel buffer sizing") live entirely inside this module -(`github.com/smarty/messaging/v3`); a third change ("HTTP shell outcome -mapping") lives in the consuming repo (`billing-context`) and is out of scope -here. +domain transformation. The bulk of those changes live entirely inside this +module (`github.com/smarty/messaging/v3`); the only per-consumer work that +remains is the mechanical wrapping of each mutating route, which lives in the +~10 `*-context` services and is out of scope here. This proposal scopes the work to just the parts that land in *this* repo: - All edits under `handlers/harness/` (entrypoint, contracts, config, pipeline, - fanout, and their tests). + fanout, the new admission middleware + decorator, and their tests). - A documentation update to `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` - to reflect the split capacity knobs and the new monitor observations. + to reflect the split capacity knobs, the admission gate, and the new monitor + observations. -The HTTP shell wireup, integration tests, error definitions, monitor metric -registration, and post-deploy observation steps from the parent proposal happen -in `billing-context` *after* this repo's changes merge and a tagged version of -`messaging/v3` is published. They are explicitly **out of scope** for this -proposal — they will be picked up under a follow-on session in that repo. +The per-service route wireup (calling `AsHTTPHandler` and `Admission` from each +service's routes table), integration tests, and post-deploy observation steps +from the parent proposal happen in each consuming repo *after* this repo's +changes merge and a tagged version of `messaging/v3` is published. They are +explicitly **out of scope** for this proposal. ### Why the changes are needed (recap) @@ -46,83 +46,105 @@ domain transformation merged: `Domain` drifts far ahead of what was ever stored, surfacing as "we said yes to the client, then forgot" on restart. -The harness-side fix has three independently-mergeable pieces: - -1. **Split `Handle` into two paths**: the existing `Handle(ctx, msgs...)` for - MQ/cron callers (preserves today's blocking, contract-honoring behavior), - and a new `HandleResult(ctx, message any) HandleOutcome` for HTTP callers - (honors `ctx.Done()`, applies admission shedding, returns a status outcome, - and accepts exactly one message per call). -2. **Depth-based admission inside `HandleResult`** — high-watermark check plus - hard-full backstop, both HTTP-only. +### Why a *void* HTTP entrypoint (the central design constraint) + +An earlier draft gave the HTTP entrypoint the signature +`HandleResult(ctx, message any) HandleOutcome`, returning an enum the caller +mapped to `503`/`504`. That return value would force **every** mutating HTTP +route to branch on the outcome — and to add the corresponding test cases to +maintain coverage. Across the ~10 `*-context` services there are **70+ +mutating routes**; replicating outcome-mapping logic (and its tests) at each +one is exactly the kind of accumulated, duplicated branching we want to avoid. + +Two observations dissolve the need for a return value: + +- **Shedding is a *pre-flight* decision, not a *result*.** It can be decided + before the route handler ever runs. A small `Admit() bool` predicate plus a + single HTTP middleware (written once, in this module) writes the `503` and + short-circuits the route. The route handler — and its tests — never see it. +- **A departed caller does not need a status override.** When the caller's + `ctx` fires mid-flight, the goroutine simply unblocks (fixing the leak in + concern #1) and the route writes its normal response to a client that is + already gone. No `504`, no buffering of the response, no extra machinery. The + in-flight batch keeps processing and durably stores regardless. + +That leaves the HTTP entrypoint **void**, identical in spirit to today's +`Handle`. The harness-side fix has these independently-mergeable pieces: + +1. **A void, context-honoring HTTP entrypoint** — `Await(ctx, message any)` + alongside the existing `Handle(ctx, messages ...any)`. `Await` honors + `ctx.Done()`, processes exactly one message, emits a `CallerDeparted` + observation when the caller leaves, and returns nothing. +2. **Pre-flight admission** — an `Admit() bool` predicate on the entrypoint + (high-watermark check against the `batches` channel) and an in-module + `Admission` HTTP middleware that writes a `503` (inline, raw `net/http`, no + `scuter` dependency) when the gate refuses. A thin `AsHTTPHandler` decorator + adapts `Await` to the `messaging.Handler` interface the HTTP shells already + depend on, so neither the shells nor their tests change. 3. **Split channel-buffer sizing** — `BatchCapacity` continues to size the caller-side `batches` channel; a new `UnitCapacity` (default 1) sizes `work1`–`work5` and the per-worker fan-out outputs. The companion `AdjustOrder` domain idempotency change (separate proposal, -already merged in the consuming repo) makes it safe for `HandleResult` to -return early when the caller's `ctx` fires: the in-flight batch keeps -processing and durably stores, and a client retry collapses to a no-op once -the original batch has persisted. +already merged in the consuming repos) makes it safe for `Await` to return +early when the caller's `ctx` fires: the in-flight batch keeps processing and +durably stores, and a client retry collapses to a no-op once the original batch +has persisted. ## Approach ### Decision summary -Two distinct, parallel methods on `*entrypoint`: +Three members on `*entrypoint`, plus two thin module-local adapters: + +| Member | Caller | Honors `ctx.Done()` | Sheds | Return | Arity | +|-------------------------------------|----------------------|---------------------|---------|--------|-------------| +| `Handle(ctx, messages ...any)` | MQ, cron | No | No | none | variadic | +| `Await(ctx, message any)` | HTTP (via decorator) | Yes | No | none | exactly one | +| `Admit() bool` | HTTP middleware | n/a | Decides | `bool` | n/a | -| Method | Caller | Honors `ctx.Done()` | Applies shed | Return value | Message arity | -|---------------------------------------------------|----------|---------------------|--------------|-------------------------|-------------------| -| `Handle(ctx, messages ...any)` | MQ, cron | No | No | none (today's contract) | variadic (1..N) | -| `HandleResult(ctx, message any) HandleOutcome` | HTTP | Yes | Yes | `HandleOutcome` | exactly one | +| Adapter (this module) | Role | +|--------------------------------------------|----------------------------------------------------------------------------------------| +| `AsHTTPHandler(Awaiter) messaging.Handler` | Wraps `Await` so HTTP shells keep depending on `messaging.Handler` (zero shell change) | +| `Admission(Admitter, http.Handler)` | Pre-flight gate; writes inline `503` (no scuter) when `Admit()` is false | -`HandleResult` takes a single `message any` (not variadic) because every HTTP -route in the consuming repo invokes the domain with exactly one command per -request. Constraining the signature at the entrypoint: +`Await` takes a single `message any` (not variadic) because every HTTP route in +every consuming service invokes the domain with exactly one command per +request. Constraining the signature here: -- Eliminates the empty-slice / multi-message edge cases on the HTTP path - (no need to defend against `HandleResult(ctx)` or `HandleResult(ctx, a, b)` - in either the entrypoint or any call site). +- Eliminates the empty-slice / multi-message edge cases on the HTTP path. - Tightens the worst-case in-memory work-in-progress bound: combined with `UnitCapacity=1`, each in-flight HTTP request contributes exactly one input - message to a batch rather than potentially many. The `batches` channel - capacity now corresponds directly to a count of HTTP commands enqueued - rather than a count of caller invocations of arbitrary size. -- Surfaces the asymmetry plainly in the type system — MQ/cron may legitimately - need to deliver multiple events per call (e.g. broker batch deliveries), - HTTP does not. - -`HandleOutcome` is an `int`-backed enum: - -| Value | Meaning | -|-----------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `OutcomeAccepted` (0) | Batch admitted *and* completion fired before the caller's ctx expired. Mutation durably stored. | -| `OutcomeShed` (1) | Batch refused at admission (high-watermark, hard-full, or pipeline closed). Pipeline never saw the batch. | -| `OutcomeCallerDeparted` (2) | Batch admitted but the caller's `ctx` fired before completion. Pipeline still owns the batch and will durably store; caller does not know storage status yet. | + message to a batch. The `batches` channel capacity now corresponds directly + to a count of HTTP commands enqueued rather than a count of caller + invocations of arbitrary size. +- Surfaces the asymmetry plainly — MQ/cron may legitimately deliver multiple + events per call (broker batch deliveries); HTTP does not. Two new monitor observations: -- `LoadShed{}` — emitted by `HandleResult` when admission is rejected. -- `CallerDeparted{}` — emitted by `HandleResult` when the caller's `ctx` fired - before completion. +- `LoadShed{}` — emitted by `Admit()` when it refuses on the high-watermark + check. (Refusal because the pipeline is `closed` is shutdown, not load, and + emits nothing.) +- `CallerDeparted{}` — emitted by `Await` when the caller's `ctx` fired before + completion (whether during enqueue or during the wait). Two new configuration options with defaults: - `Options.UnitCapacity(int)` — default `1`. Sizes `work1`–`work5` and the per-worker fan-out outputs. - `Options.ShedThreshold(float64)` — default `0.80`. Fraction of the `batches` - channel capacity past which `HandleResult` sheds. + channel capacity at or past which `Admit()` refuses. ### Detailed design -#### 1. Two-path entrypoint with shared helpers +#### 1. Shared helpers (pure refactor of today's `Handle`) Today's `00_entrypoint.go:29` has a single `Handle` whose body inlines waiter acquisition, batch allocation, completion-callback wiring, the -admission-under-RWMutex sequence, and the `waiter.Wait()` block. The split -extracts three private helpers and gives each method its own admission and -waiting logic. The `prepare` helper keeps its variadic `...any` shape so -`Handle` can pass its argument through verbatim; `HandleResult` calls +admission-under-RWMutex sequence, and the wait. The split extracts three +private helpers shared by `Handle` and `Await`. `prepare` keeps its variadic +`...any` shape so `Handle` passes its argument through verbatim; `Await` calls `prepare(ctx, message)` with its single message, which Go promotes to a one-element slice at the call site. @@ -130,145 +152,236 @@ one-element slice at the call site. // prepare acquires a waiter, allocates a *batch from the pool, and wires up // the completion callback. Returns the items the caller will need. func (this *entrypoint) prepare(ctx context.Context, messages ...any) (waiter *sync.WaitGroup, item *batch) { - waiter = this.waiters.Get() - waiter.Add(1) - item = this.batches.Get() - item.ctx = ctx - item.messages = messages - item.complete = func() { - waiter.Done() - this.monitor.Track(batchComplete) - this.batches.Put(item) - } - return waiter, item + waiter = this.waiters.Get() + waiter.Add(1) + item = this.batches.Get() + item.ctx = ctx + item.messages = messages + item.complete = func() { + waiter.Done() + this.monitor.Track(batchComplete) + this.batches.Put(item) + } + return waiter, item } // abandon releases waiter and pool entry when the item was never enqueued // (i.e. complete() will never fire). func (this *entrypoint) abandon(waiter *sync.WaitGroup, item *batch) { - waiter.Done() - this.batches.Put(item) + waiter.Done() + this.batches.Put(item) } // waiterDone wraps waiter.Wait() in a chan struct{} so it's select-able. func (this *entrypoint) waiterDone(waiter *sync.WaitGroup) (done chan struct{}) { - done = make(chan struct{}) - go func() { waiter.Wait(); close(done) }() - return done + done = make(chan struct{}) + go func() { waiter.Wait(); close(done) }() + return done } ``` -**Path A — `Handle` (MQ and cron):** preserves today's contract verbatim. +#### 2. Path A — `Handle` (MQ and cron): preserves today's contract verbatim ```go func (this *entrypoint) Handle(ctx context.Context, messages ...any) { - waiter, item := this.prepare(ctx, messages...) - defer this.waiters.Put(waiter) - - this.lock.RLock() - if this.closed { - this.lock.RUnlock() - this.abandon(waiter, item) - return - } - this.work <- item - this.monitor.Track(batchInFlight) - this.lock.RUnlock() - - waiter.Wait() + waiter, item := this.prepare(ctx, messages...) + defer this.waiters.Put(waiter) + + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + this.abandon(waiter, item) + return + } + this.work <- item + this.monitor.Track(batchInFlight) + this.lock.RUnlock() + + waiter.Wait() } ``` Properties: - **No `ctx.Done()` honoring.** MQ deliveries don't carry a client deadline; cron has its own scheduler-level guard. Returning early would cause - `streaming` to ack work that the pipeline never finished. + `streaming` to ack work the pipeline never finished. - **No load-shed.** Sending to `this.work` is a blocking channel send. - Back-pressure naturally propagates to the broker via prefetch limits and - unacked-message counts. -- **Hard-full backstop is gone for this path.** The only "shed" condition is - pipeline shutdown (`this.closed`) — exactly today's behavior. + Back-pressure propagates to the broker via prefetch limits and unacked + counts. +- **The only "shed" condition is pipeline shutdown** (`this.closed`) — exactly + today's behavior. -**Path B — `HandleResult` (HTTP):** new behavior, accepts a single message. +#### 3. Path B — `Await` (HTTP): void, context-honoring, single message ```go -func (this *entrypoint) HandleResult(ctx context.Context, message any) HandleOutcome { - waiter, item := this.prepare(ctx, message) - defer this.waiters.Put(waiter) - - this.lock.RLock() - if this.closed { - this.lock.RUnlock() - this.abandon(waiter, item) - return OutcomeShed - } - if float64(len(this.work))/float64(cap(this.work)) >= this.shedThreshold { - this.lock.RUnlock() - this.abandon(waiter, item) - this.monitor.Track(loadShed) - return OutcomeShed - } - select { - case this.work <- item: - this.monitor.Track(batchInFlight) - default: - this.lock.RUnlock() - this.abandon(waiter, item) - this.monitor.Track(loadShed) - return OutcomeShed - } - this.lock.RUnlock() - - select { - case <-this.waiterDone(waiter): - return OutcomeAccepted - case <-ctx.Done(): - this.monitor.Track(callerDeparted) - return OutcomeCallerDeparted - } +func (this *entrypoint) Await(ctx context.Context, message any) { + waiter, item := this.prepare(ctx, message) + defer this.waiters.Put(waiter) + + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + this.abandon(waiter, item) + return + } + select { + case this.work <- item: + this.monitor.Track(batchInFlight) + this.lock.RUnlock() + case <-ctx.Done(): + this.lock.RUnlock() + this.abandon(waiter, item) + this.monitor.Track(callerDeparted) + return + } + + select { + case <-this.waiterDone(waiter): + case <-ctx.Done(): + this.monitor.Track(callerDeparted) + } } ``` Properties: -- **Single message per call.** The signature accepts exactly one `message any`, - matching every HTTP-driven domain command in the consuming repo. -- **Honors `ctx.Done()`** — HTTP request deadlines unblock the caller goroutine. -- **Applies high-watermark + hard-full shed** — fast-fail for HTTP without - blocking. -- **Outcome-returning** — caller (HTTP shell, in another repo) maps to - status codes. - -**Pool-entry lifecycle (consistent across both paths):** -- Success path: pipeline's Completion stage invokes `item.complete()`, which - calls `this.batches.Put(item)`. Neither method puts the item back itself. -- Caller-departed path (only reachable in `HandleResult`): the pipeline still - owns the item and will eventually invoke `item.complete()`. `HandleResult` - must not `Put` — the pool would receive the same item twice. -- Shed and closed-pipeline paths: the item was never enqueued, so `complete()` - will never fire. `abandon(waiter, item)` does the cleanup. +- **Void.** No outcome to return — shedding is handled pre-flight by the + middleware (see §4), and a departed caller just unblocks. +- **Single message per call.** +- **Honors `ctx.Done()` in both the enqueue and the wait.** This is what fixes + the indefinite-stacking concern: an HTTP request whose client deadline passes + (or whose load balancer cancels) unblocks promptly instead of parking. +- **No hard-full backstop.** The ctx-honoring send already bounds the enqueue + wait, so there is no need for a non-blocking `default` arm — and dropping it + avoids a silent post-admit shed that would leave `command.Result` zero and + cause the shell to emit a wrong status (e.g. 404). `Admit()` is the watermark + gate; the ctx-honoring send is the backstop. + +**Pool-entry lifecycle:** +- Success / wait-departed path: the batch was enqueued, so the pipeline owns it + and will invoke `item.complete()` (which `Put`s it). `Await` must **not** + `Put` — the pool would receive the same item twice. (On wait-departure the + pipeline still completes the batch; only the HTTP goroutine returns early.) +- Enqueue-departed and closed paths: the item was never enqueued, so + `complete()` will never fire; `abandon(waiter, item)` does the cleanup. + +QUESTION: would it be cleaner if the entrypoint were to `Put` the batch after `item.complete()` fires (releasing the wait group)? Would this still be 'correct'? **Critically, the batch is not abandoned by the pipeline when the caller -departs.** When `OutcomeCallerDeparted` is returned, the in-flight batch -keeps processing; `complete()` still fires; persistence still happens. Only -the HTTP caller's goroutine returns early. +departs.** When `ctx` fires after enqueue, the in-flight batch keeps +processing; `complete()` still fires; persistence still happens. Only the HTTP +caller's goroutine returns early. -#### 2. Depth-based admission (HTTP-only, lives inside `HandleResult`) +#### 4. Pre-flight admission: `Admit()` + `Admission` middleware + `AsHTTPHandler` -The high-watermark check (`len/cap >= shedThreshold`) and the hard-full -backstop (the `default` arm of the `select` on `this.work <- item`) both live -inside `HandleResult`. `Handle` (MQ/cron) implements neither — see §1 for why. +The high-watermark check lives in a side-effect-light predicate on the +entrypoint: + +```go +func (this *entrypoint) Admit() bool { + this.lock.RLock() + defer this.lock.RUnlock() + if this.closed { + return false + } + if float64(len(this.work))/float64(cap(this.work)) >= this.shedThreshold { + this.monitor.Track(loadShed) + return false + } + return true +} +``` `Options.ShedThreshold(value float64)` exposes the threshold; default `0.80`. -Setting it ≥ `1.0` disables high-watermark shedding (only the hard-full -backstop remains, also HTTP-only). +Setting it ≥ `1.0` disables high-watermark shedding (only the closed-pipeline +refusal remains). + +The middleware and the decorator live in this module (new file +`handlers/harness/admission.go`) and depend only on the standard library and +`messaging/v3` — **never `scuter`**. The `503` response is flushed inline with +raw `net/http`: + +CORRECTION: please do not mention scuter in the source code comments. This module should have zero awareness of that module. + +```go +type ( + Admitter interface { + Admit() bool + } + Awaiter interface { + Await(ctx context.Context, message any) + } +) + +// Admission refuses overloaded requests before the wrapped handler runs, +// writing an inline 503 (no scuter dependency). Wrap each mutating route. +func Admission(gate Admitter, inner http.Handler) http.Handler { + return http.HandlerFunc(func(response http.ResponseWriter, request *http.Request) { + if gate.Admit() { + inner.ServeHTTP(response, request) + return + } + response.Header().Set("Content-Type", "application/json; charset=utf-8") + response.Header().Set("Retry-After", "1") + response.WriteHeader(http.StatusServiceUnavailable) + _, _ = response.Write(shedResponseBody) + }) +} + +// AsHTTPHandler adapts the void, context-honoring Await to the +// messaging.Handler interface the HTTP shells already depend on, so no shell +// (and no shell test) changes. +func AsHTTPHandler(target Awaiter) messaging.Handler { + return &httpAdapter{target: target} +} + +type httpAdapter struct { + target Awaiter +} + +func (this *httpAdapter) Handle(ctx context.Context, messages ...any) { + for _, message := range messages { + this.target.Await(ctx, message) + } +} + +var shedResponseBody = []byte(`{"errors":[{"message":"service overloaded"}]}`) +``` + +**Why this keeps the route handlers (and their tests) untouched.** Each +consuming service's HTTP shells already build a command, call +`handler.Handle(ctx, command)` on a `messaging.Handler`, and map the *mutated +command's* result field (`command.Result`) to a `scuter.ResponseOption` — they +read no return value from `Handle` today. Two seams preserve that exactly: + +- `AsHTTPHandler(domainHandler)` is substituted for the raw handler when the + write shells are constructed, so `Handle` now routes through `Await` + (ctx-honoring, single-message) without the shell knowing. +- `Admission(gate, shell)` wraps each mutating route in the routes table, so + the `503` is decided before the shell runs. + +Illustrative consumer wireup (out of scope, shown for context only): + +```go +gate := domainHandler.(harness.Admitter) +httpHandler := harness.AsHTTPHandler(domainHandler.(harness.Awaiter)) +// ... +{"POST /admin/orders", harness.Admission(gate, NewAdminApproveOrderShell(httpHandler))}, +{"PUT /admin/accounts/:account/orders/:order/adjustments", harness.Admission(gate, NewAdminAdjustOrderShell(httpHandler))}, +``` -The `len(chan)/cap(chan)` snapshot races with concurrent producers/consumers; -under heavy concurrency the threshold can be exceeded momentarily before the -next admission check fires. This is acceptable — the threshold is a soft -signal, not a hard limit; the hard backstop is the channel-full `default` -branch. +Each of the 70+ mutating routes across the ~10 services becomes a mechanical +one-line wrap — no per-route outcome logic, no per-route tests. The `503`/ +departed behavior is tested **once**, here, against the middleware and the +decorator. -#### 3. Split channel buffer sizing +**Race note.** `Admit()`'s `len(chan)/cap(chan)` snapshot races with concurrent +producers/consumers, and there is a TOCTOU window between `Admit()` returning +true and `Await`'s enqueue. Both are acceptable: the threshold is a soft signal, +and the ctx-honoring send means that even if the channel fills in the race +window, the request either drains normally or unblocks on `ctx.Done()` — it +never produces a wrong status the way a silent post-admit shed would. + +#### 5. Split channel buffer sizing Today (`pipeline.go:11-18`): @@ -289,11 +402,10 @@ work4 = make(chan *unitOfWork, config.UnitCapacity) work5 = make(chan *unitOfWork, config.UnitCapacity) ``` -And in `fanout.go:17`, the per-worker output channels (currently hardcoded -to 1024) become `make(chan *unitOfWork, unitCapacity)` where `unitCapacity` is -threaded through `newFanOut`'s signature (or via the existing -`stationFactory` closure — to be decided at implementation time, lower-blast-radius -option preferred). +And in `fanout.go:17`, the per-worker output channels (currently hardcoded to +1024) become `make(chan *unitOfWork, unitCapacity)` where `unitCapacity` is +threaded through `newFanOut`'s signature (lower-blast-radius option preferred +at implementation time). `UnitCapacity` defaults to 1. Tunable via `Options.UnitCapacity(value int)`. Setting it equal to `BatchCapacity` reproduces today's behavior. @@ -305,19 +417,15 @@ is processing unit N. Pipelining benefit saturates at depth ~1 since each stage runs in a single goroutine (except serialization, which has its own fan-out concurrency). -**Bound on in-memory drift during an outage** — with `UnitCapacity=1` and -5 channels post-domain, plus the in-flight unit at each stage, the worst case -is ~10 units' worth of unpersisted mutations. At `UnitSize=64` that's -~640 batches' worth of broadcast results downstream of Execution. +**Bound on in-memory drift during an outage** — with `UnitCapacity=1` and 5 +channels post-domain, plus the in-flight unit at each stage, the worst case is +~10 units' worth of unpersisted mutations. At `UnitSize=64` that's ~640 +batches' worth of broadcast results downstream of Execution. -The single-message `HandleResult` signature *also* tightens the upstream -side: each HTTP-admitted batch on the `batches` channel now carries exactly -one input message rather than potentially many, so `BatchCapacity` becomes a -direct count of in-flight HTTP commands rather than a count of caller -invocations of arbitrary fan-out. Combined with the `UnitCapacity=1` -post-domain default, the total worst-case in-memory drift is meaningfully -smaller than the previous draft's estimate, while throughput pipelining is -preserved. +The single-message `Await` signature *also* tightens the upstream side: each +HTTP-admitted batch on the `batches` channel now carries exactly one input +message, so `BatchCapacity` becomes a direct count of in-flight HTTP commands +rather than a count of caller invocations of arbitrary fan-out. ### Non-goals @@ -325,111 +433,138 @@ preserved. Serialization → Persistence → Completion → Broadcast → Terminal) is preserved verbatim. - **Changing `messaging.Handler`.** `Handle(ctx, messages ...any)` keeps its - exact existing contract. `HandleResult` is a new method, not a replacement, - and intentionally has a different signature (single message, returning an - outcome). + exact existing contract. `Await` is a new method, not a replacement, and + intentionally has a different signature (single message, void). +- **Returning a status outcome from the HTTP path.** Explicitly rejected — see + Background and Alternatives. Shedding is decided pre-flight; departed callers + just unblock. +- **Buffering the HTTP response to override status post-hoc.** Not done — a + departed caller is already gone, so the shell's normal write to a dead + connection is harmless, and no `504` is emitted. +- **Per-service route wireup.** Calling `AsHTTPHandler` / `Admission` from each + service's routes table happens in the consuming repos, after a tagged + release. Out of scope here. - **Domain-layer changes.** The companion `AdjustOrder` idempotency change is - in another repo and is assumed merged before any consumer relies on - `OutcomeCallerDeparted`. Nothing in this proposal touches domain code. -- **Wireup/HTTP shell changes.** Phase 2 and Phase 4 of the parent proposal - live in `billing-context`. Out of scope here. + in the consuming repos and is assumed merged before any consumer relies on a + departed-caller batch persisting. Nothing in this proposal touches domain + code. - **`harness/sqladapter` changes.** No code changes; just a regression check via `go test`. ### Files modified (this repo only) -| Path | Action | Purpose | -|------------------------------------------|--------|---------------------------------------------------------------------------------------------------------------------------------------| -| `handlers/harness/00_entrypoint.go` | Modify | Split into two paths: `Handle` (MQ/cron, blocking, no shed, variadic) and `HandleResult` (HTTP, ctx-aware, shed-aware, single message); shared helpers | -| `handlers/harness/contracts.go` | Modify | New `HandleOutcome` enum; new `LoadShed`, `CallerDeparted` event types and unexported sentinel values | -| `handlers/harness/config.go` | Modify | `Options.UnitCapacity(int)`, `Options.ShedThreshold(float64)`; defaults 1, 0.80 | -| `handlers/harness/pipeline.go` | Modify | Use `UnitCapacity` for `work1`–`work5`; pass it into `newFanOut`; thread `ShedThreshold` into `newEntrypoint` | -| `handlers/harness/fanout.go` | Modify | Accept `unitCapacity` and use it for the per-worker output channels instead of the hardcoded 1024 | -| `handlers/harness/00_entrypoint_test.go` | Modify | New tests for context-aware return, shed behavior, outcome reporting (single-message API), plus pinning tests for the existing `Handle` contract | -| `handlers/harness/config_test.go` | Modify | Assert defaults for `UnitCapacity`, `ShedThreshold`; assert override setters | -| `handlers/harness/pipeline_test.go` | Modify | Adjust assertions if any depend on default channel sizes (none expected to break — defaults preserve external observable behavior) | -| `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` | Modify | Reflect split `BatchCapacity`/`UnitCapacity` knobs, `LoadShed`/`CallerDeparted` Monitor observations, and `HandleOutcome` return path | +| Path | Action | Purpose | +|--------------------------------------------------------------------|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `handlers/harness/00_entrypoint.go` | Modify | Extract `prepare`/`abandon`/`waiterDone`; keep `Handle` behavior identical; add void `Await(ctx, message any)` and `Admit() bool`; add `shedThreshold` field; add `loadShed`/`callerDeparted` sentinels | +| `handlers/harness/admission.go` | Add | `Admitter`/`Awaiter` interfaces; `Admission(gate, inner) http.Handler` (inline `503`, no scuter); `AsHTTPHandler(target) messaging.Handler` decorator; `shedResponseBody` | +| `handlers/harness/contracts.go` | Modify | New `LoadShed` and `CallerDeparted` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. | +| `handlers/harness/config.go` | Modify | `Options.UnitCapacity(int)`, `Options.ShedThreshold(float64)`; defaults 1, 0.80 | +| `handlers/harness/pipeline.go` | Modify | Use `UnitCapacity` for `work1`–`work5`; pass it into `newFanOut`; thread `ShedThreshold` into `newEntrypoint` | +| `handlers/harness/fanout.go` | Modify | Accept `unitCapacity` and use it for the per-worker output channels instead of the hardcoded 1024 | +| `handlers/harness/00_entrypoint_test.go` | Modify | New tests for `Await` (void, ctx-honoring, single message, pool lifecycle) and `Admit` (watermark/closed/threshold), plus pinning tests for `Handle` | +| `handlers/harness/admission_test.go` | Add | Tests for `Admission` (pass-through when admitted; inline `503` body/headers when refused) and `AsHTTPHandler` (forwards to `Await`) | +| `handlers/harness/config_test.go` | Modify | Assert defaults for `UnitCapacity`, `ShedThreshold`; assert override setters | +| `handlers/harness/pipeline_test.go` | Modify | Adjust assertions if any depend on default channel sizes (none expected to break — defaults preserve external observable behavior) | +| `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` | Modify | Reflect split `BatchCapacity`/`UnitCapacity` knobs, the `Admit` gate + `Admission` middleware, `LoadShed`/`CallerDeparted` observations, and the void `Await` ingress | ### Alternatives considered -- **Keep `HandleResult` variadic to mirror `Handle`.** Rejected — every HTTP - route invokes the domain with exactly one command, so a variadic signature - would invite empty-call and multi-call bugs at the boundary, and would - loosen the WIP bound that `UnitCapacity=1` is meant to enforce. A - single-`message any` parameter is both ergonomic at the HTTP call site and - load-bearing for the resilience story. -- **Single shared method that branches on caller type via a `ctx` value or +- **Return a `HandleOutcome` enum from the HTTP path and map it per-route.** + Rejected — this is the motivating problem. Across ~10 `*-context` services + and 70+ mutating routes, each route would replicate `503`/`504` branching and + the tests to cover it. Centralizing the decision in a pre-flight gate + + middleware keeps the route handlers void and tested once. +- **Buffer the HTTP `ResponseWriter` in middleware to override status after the + shell runs (the "Option A" / `504` path).** Rejected — `scuter` flushes the + response *inside* each shell, so a post-hoc override requires wrapping the + writer in a buffering shim in this module. The only payoff would be a precise + `504`, but a departed caller's connection is already gone, so the shell's + normal write is harmless and no override is needed. More machinery, no real + benefit. +- **Panic on shed + recover in middleware.** Rejected — shedding is a + *high-frequency* condition precisely during an outage, so this would panic on + a large fraction of requests and blur the "panic = bug = 500 + page someone" + semantics that recovery middleware exists to serve. The parent proposal + rejected panic-on-shed for the MQ path for the same noise reasons. +- **Put the shedding middleware in each consuming repo.** Rejected — 10 + services would duplicate the middleware and its tests. It lives once, here. +- **Let the middleware reference `scuter` for the `503` envelope.** Rejected — + this module must not depend on `scuter`. The `503` is flushed inline with raw + `net/http`; the body matches the common `{"errors":[...]}` shape consumers + already emit, with a `Retry-After: 1` header. +- **Keep a hard-full `default` backstop inside `Await`.** Rejected — without a + return value a silent post-admit shed would leave `command.Result` zero and + the shell would emit a wrong status (e.g. 404). The ctx-honoring send bounds + the enqueue wait without it; `Admit()` is the watermark gate. +- **Single shared method branching on caller type via a `ctx` value or `Options.Source`.** Rejected — `streaming` acks unconditionally on clean - `Handle` return, so an MQ-side shed-then-return would silently drop - messages. The two paths require fundamentally different behavior, not - different parameter values. Two methods make the contract visible at every - call site. -- **Drop the `default` arm in `HandleResult`'s `select` and rely solely on - high-watermark shedding.** Rejected — keep `default` as a hard backstop in - case `shedThreshold` is misconfigured. -- **Apply shed to MQ as well, and panic on shed so the broker redelivers - after reconnect.** Rejected — noisy, tangles error handling, and produces - alarming log output in normal load-shed conditions. The blocking-channel-send - back-pressure path through MQ prefetch is the standard RabbitMQ flow-control - mechanism and works without instrumentation changes. -- **Add nack/error return to `messaging.Handler.Handle` itself.** Rejected as - out-of-scope — would touch every existing `messaging.Handler` implementation - across all consumers. A future messaging-library-level change could add - this; this proposal does not. + `Handle` return, so an MQ-side shed-then-return would silently drop messages. + The two paths require fundamentally different behavior. Separate members make + the contract visible at every wiring site. - **Inject a per-batch `ctx` through Persistence and Broadcast.** Rejected. Per-batch ctx in retry-forever stages would unwind partially-completed work and break the durability principle. The pipeline ctx (`harness.New(ctx, …)`) is the right scope for those stages. - **Keep `BatchCapacity` sizing all channels uniformly.** Rejected — preserves the in-memory drift problem during outages. +- **Add nack/error return to `messaging.Handler.Handle` itself.** Rejected as + out-of-scope — would touch every existing `messaging.Handler` implementation + across all consumers. ## Trade-offs & Risks -- **`HandleResult` exposes a per-call result outside the `messaging.Handler` - abstraction.** Acceptable because the harness already exports `Monitor`, - `Writer`, `Dispatcher`, etc. — the package is explicitly the "single - ingress" abstraction. The new `HandleOutcome` type and the new method live - on `*entrypoint` and can be retrieved via type assertion or via a - small interface that the consumer defines. -- **Single-message HTTP signature is a hard constraint.** A hypothetical - future HTTP route that needed to submit multiple commands atomically would - not fit. Acceptable today — every existing HTTP route in `billing-context` - invokes the domain with exactly one command — and reversible later if - needed (the signature can grow back to variadic without breaking existing - call sites by switching to a slice or by introducing a sibling - `HandleResultBatch` method). -- **`waiterDone` allocates a goroutine and a channel per `HandleResult` call.** - This is on the HTTP path only and only when the call actually waits (i.e. is - not shed). The cost is a few hundred bytes and one goroutine for the - duration of the in-flight batch — well within the budget of an HTTP request. +- **`Await`, `Admit`, `Awaiter`, `Admitter`, and the two adapters are exported + surface outside the `messaging.Handler` abstraction.** Acceptable — the + harness already exports `Monitor`, `Writer`, `Dispatcher`, etc.; it is + explicitly the "single ingress" abstraction. The consumer reaches `Await`/ + `Admit` via the exported interfaces (type assertion on the handler `New` + returns). + +TWEAK: Since `Await` is called through middleware defined in the same package, let's un-export `Await`. + +- **This module now imports `net/http`** (in `admission.go`). Minor — it is a + standard-library dependency, isolated to the admission file. If desired at + implementation time, the middleware and decorator can move to a sibling + subpackage (e.g. `handlers/harness/admission`) to keep `net/http` out of the + core pipeline package; the `Admitter`/`Awaiter` interfaces would stay in + `harness`. Lower-churn option (single package) is preferred unless review + objects. +- **Single-message HTTP signature is a hard constraint.** A hypothetical future + HTTP route needing to submit multiple commands atomically would not fit. + Acceptable today — every existing HTTP route invokes the domain with exactly + one command — and reversible later (a sibling `AwaitBatch` could be added + without breaking existing call sites). +- **`waiterDone` allocates a goroutine and a channel per `Await` call.** On the + HTTP path only. The cost is a few hundred bytes and one goroutine for the + duration of the in-flight batch — well within an HTTP request's budget. On a + wait-departure the goroutine parks until the pipeline eventually completes the + batch, then exits. - **`UnitCapacity=1` reduces normal-throughput headroom slightly.** Pipelining is preserved (depth-1 buffer between stages) but bursty workloads that - previously absorbed into deep buffers will now apply backpressure earlier. - Mitigation: configurable; a deployment that prefers the old throughput - characteristic can set `Options.UnitCapacity(1024)`. + previously absorbed into deep buffers now apply backpressure earlier. + Mitigation: configurable; set `Options.UnitCapacity(1024)` to reproduce + today's characteristic. - **Caller-departed batches keep doing work the caller no longer cares about.** Intentional — matches the durability principle. Combined with the merged - domain-layer idempotency change (in `billing-context`), repeated retries - collapse to no-ops after the first applies. From this module's perspective - this is purely a contract guarantee: "we will not unwind the in-flight - batch when the caller departs." -- **The shed-threshold as a fraction is inexact.** `len(chan)/cap(chan)` is - a snapshot that races with concurrent producers/consumers; under heavy - concurrency we can exceed the threshold momentarily before the next - admission check fires. Acceptable — soft signal, not a hard limit; hard - backstop is the channel-full `default` branch. -- **`Handle` and `HandleResult` share state (`this.work`, `this.lock`, - `this.closed`).** Two paths writing to the same channel under the same - RWMutex is fine; race-free under `-race`. Tests must cover the case where - both paths interleave on a shrunk-`BatchCapacity` fixture. + domain-layer idempotency change, repeated retries collapse to no-ops after + the first applies. From this module's perspective this is a contract + guarantee: "we will not unwind the in-flight batch when the caller departs." +- **The shed-threshold as a fraction is inexact, and there is a TOCTOU window + between `Admit()` and `Await`'s enqueue.** Acceptable — soft signal, not a + hard limit; and the ctx-honoring send means a race-window channel-full never + produces a wrong status (it drains or unblocks on `ctx.Done()`). +- **`Handle` and `Await` share state (`this.work`, `this.lock`, `this.closed`).** + Two paths writing the same channel under the same RWMutex is fine; race-free + under `-race`. Tests must cover both paths interleaving on a shrunk-capacity + fixture. - **Cross-repo coordination.** This module's changes are backward-compatible - (new options have defaults; new method doesn't break existing - `messaging.Handler` callers). A consumer that doesn't yet adopt - `HandleResult` keeps working unchanged. The consuming repo's adoption is - sequenced after a tagged release. -- **Diagram drift.** The pipeline diagram is the canonical visual reference; - if the SVG isn't updated alongside the code, reviewers will form a stale - mental model. Mitigation: diagram update is in the checklist. + (new options have defaults; new members don't break existing + `messaging.Handler` callers). A consumer that doesn't yet wrap its routes + keeps working unchanged. Per-service adoption is sequenced after a tagged + release. +- **Diagram drift.** The pipeline diagram is the canonical visual reference; if + the SVG isn't updated alongside the code, reviewers will form a stale mental + model. Mitigation: diagram update is in the checklist. ## Implementation Checklist @@ -446,48 +581,62 @@ preserved. - [ ] Edit `handlers/harness/fanout.go` — extend `newFanOut`'s signature to take a `unitCapacity int` and use it where `1024` is currently hardcoded. - [ ] Run `make test` — pipeline tests should still pass under the new defaults; if any test depends on the old 1024 buffer it should be updated to set `Options.UnitCapacity(1024)` explicitly. -### Phase 3: Monitor observations and outcome enum +### Phase 3: Monitor observations and interfaces -- [ ] Edit `handlers/harness/contracts.go` — add `HandleOutcome` type (`int`-backed enum: `OutcomeAccepted=0`, `OutcomeShed=1`, `OutcomeCallerDeparted=2`); add `LoadShed struct{}` and `CallerDeparted struct{}` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. +- [ ] Edit `handlers/harness/contracts.go` — add `LoadShed struct{}` and `CallerDeparted struct{}` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. - [ ] Edit `handlers/harness/00_entrypoint.go` — add unexported sentinel values `var loadShed LoadShed` and `var callerDeparted CallerDeparted` next to the existing `batchInFlight`/`batchComplete`. - [ ] Run `make test` — confirm the existing suite still compiles and passes. ### Phase 4: Extract shared helpers (pure refactor — keep `Handle` behavior identical) -- [ ] Refactor `handlers/harness/00_entrypoint.go` to extract `prepare(ctx, messages ...any) (*sync.WaitGroup, *batch)`, `abandon(waiter, item)`, and `waiterDone(waiter) chan struct{}` helpers; rewrite `Handle`'s body in terms of `prepare(ctx, messages...)` and the existing admission logic so it is observably identical. +- [ ] Refactor `handlers/harness/00_entrypoint.go` to extract `prepare(ctx, messages ...any) (*sync.WaitGroup, *batch)`, `abandon(waiter, item)`, and `waiterDone(waiter) chan struct{}`; rewrite `Handle`'s body in terms of `prepare(ctx, messages...)` so it is observably identical. - [ ] Run `make test` — all existing tests must still pass; this step changes no externally observable behavior. -### Phase 5: Add `HandleResult` (TDD, HTTP path, single-message signature) +### Phase 5: Add `Await` (TDD, void HTTP path, single message) + +- [ ] Add `TestAwait_ReturnsAfterCompletion` — call `Await(ctx, "msg")` with a single message; let the pipeline complete; assert the call returns and the batch was processed. Run `make test` — confirm failure (no `Await` method yet → compile error). Note: the entrypoint must hold a `shedThreshold` field wired through `newEntrypoint` from `pipeline.go`. +- [ ] Add `Await(ctx context.Context, message any)` on `*entrypoint` with the body shown in §3 of Approach. Run — confirm `ReturnsAfterCompletion` passes. +- [ ] Add `TestAwait_UnblocksOnContextCancelWhileWaiting` — fixture with a writer that blocks forever; enqueue succeeds; cancel the caller's `ctx`; assert `Await` returns, Monitor sees `CallerDeparted{}`, and the batch is **not** abandoned (pipeline still owns it). Run — confirm passing. +- [ ] Add `TestAwait_UnblocksOnContextCancelWhileEnqueuing` — fixture with `BatchCapacity=1` and a writer that blocks forever so the work channel stays full; cancel `ctx` before a slot frees; assert `Await` returns, Monitor sees `CallerDeparted{}`, and the pool entry is restored (the never-enqueued batch is abandoned). Run — confirm passing. +- [ ] Add `TestAwait_BatchCarriesExactlyOneMessage` — `Await(ctx, "only")`; intercept the resulting `*batch` on the work channel; assert `len(item.messages) == 1` and `item.messages[0] == "only"`. Run — confirm passing (pins the single-message contract). +- [ ] Add `TestAwait_ClosedPipelineReturnsImmediately` — close the entrypoint; call `Await(ctx, "msg")`; assert it returns within a few milliseconds and the pool entry is returned. Run — confirm passing. + +### Phase 6: Add `Admit()` gate (TDD) + +- [ ] Add `TestAdmit_TrueWhenBelowThreshold` — fresh entrypoint, empty work channel; assert `Admit()` is true. Run — confirm failure (no `Admit` yet → compile error). +- [ ] Add `Admit() bool` on `*entrypoint` with the body shown in §4. Run — confirm `TrueWhenBelowThreshold` passes. +- [ ] Add `TestAdmit_FalseAtOrAboveThreshold_TracksLoadShed` — `BatchCapacity=10`, `ShedThreshold=0.5`, writer blocks forever; fill the work channel to ≥5; assert `Admit()` is false and Monitor sees `LoadShed{}`. Run — confirm passing. +- [ ] Add `TestAdmit_FalseWhenClosed_NoLoadShed` — close the entrypoint; assert `Admit()` is false and Monitor sees **no** `LoadShed{}` (shutdown, not load). Run — confirm passing. +- [ ] Add `TestAdmit_ThresholdAtOrAboveOneDisablesWatermark` — `ShedThreshold=2.0`; fill the channel; assert `Admit()` stays true until the pipeline is closed. Run — confirm passing. + +### Phase 7: Add `AsHTTPHandler` decorator and `Admission` middleware (TDD, in-module, no scuter) -- [ ] In `handlers/harness/00_entrypoint_test.go`, add `TestHandleResult_OutcomeAcceptedOnSuccess` — call `HandleResult(ctx, "msg")` with a single message; let the pipeline complete; assert outcome is `OutcomeAccepted`. Run `make test` — confirm failure (no `HandleResult` method yet → compile error). -- [ ] Add `HandleResult(ctx context.Context, message any) HandleOutcome` method on `*entrypoint` with the body shown in §1 of Approach. The method calls `this.prepare(ctx, message)` (Go promotes the single argument into the helper's variadic slot). Note: the entrypoint must hold its `shedThreshold` field — wire it through `newEntrypoint` from `pipeline.go`. Run `make test` — confirm `OutcomeAcceptedOnSuccess` passes. -- [ ] Add `TestHandleResult_OutcomeCallerDepartedOnContextCancel` — fixture with a writer that blocks forever; cancel the caller's `ctx`; assert outcome is `OutcomeCallerDeparted` and Monitor sees `CallerDeparted{}`. Run — confirm passing. -- [ ] Add `TestHandleResult_OutcomeShedAtHighWatermark` — fixture with `BatchCapacity=10`, `ShedThreshold=0.5`, writer that blocks forever; submit 5 messages via `HandleResult` (each call carries one message); assert the 6th-and-beyond return `OutcomeShed` and Monitor sees `LoadShed{}`. Run — confirm passing. -- [ ] Add `TestHandleResult_OutcomeShedOnHardFull` — `ShedThreshold=2.0` (high-watermark disabled); writer blocks; submit `BatchCapacity+1` messages via `HandleResult` (one per call); assert the overflowing one returns `OutcomeShed`. Run — confirm passing. -- [ ] Add `TestHandleResult_OutcomeShedReleasesPoolEntry` — submit a message that gets shed via `HandleResult`; assert the entrypoint's `batches` pool has the same length before and after (no leak). Run — confirm passing. -- [ ] Add `TestHandleResult_ClosedPipelineReturnsShed` — close the entrypoint; call `HandleResult(ctx, "msg")`; assert outcome is `OutcomeShed` and the pool entry is returned. Run — confirm passing. -- [ ] Add `TestHandleResult_BatchCarriesExactlyOneMessage` — `HandleResult(ctx, "only")`; intercept the resulting `*batch` on the work channel; assert `len(item.messages) == 1` and `item.messages[0] == "only"`. Run — confirm passing (this pins the single-message contract). +- [ ] Add `TestAsHTTPHandler_ForwardsSingleMessageToAwait` (in `admission_test.go`) — a fake `Awaiter` records calls; `AsHTTPHandler(fake).Handle(ctx, "x")`; assert exactly one `Await(ctx, "x")`. Run — confirm failure (no `AsHTTPHandler` yet). +- [ ] Add the `Awaiter` interface, `httpAdapter`, and `AsHTTPHandler` to `handlers/harness/admission.go`. Run — confirm passing. +- [ ] Add `TestAsHTTPHandler_ForwardsEachMessageInOrder` — `Handle(ctx, "a", "b")`; assert `Await` called twice, in order (documents the variadic-to-single adaptation). Run — confirm passing. +- [ ] Add `TestAdmission_PassesThroughWhenAdmitted` — a fake `Admitter` returning true wraps a recording inner handler; serve a request; assert the inner handler ran and its response is preserved. Run — confirm failure (no `Admission` yet). +- [ ] Add the `Admitter` interface, `Admission`, and `shedResponseBody` to `handlers/harness/admission.go`. Run — confirm `PassesThroughWhenAdmitted` passes. +- [ ] Add `TestAdmission_Writes503WhenRejected` — fake `Admitter` returning false; serve a request; assert the inner handler did **not** run, status is `503`, `Content-Type` is `application/json; charset=utf-8`, `Retry-After` is `1`, and the body equals the shed JSON. Run — confirm passing. -### Phase 6: Pin the existing `Handle` contract (TDD, MQ/cron path) +### Phase 8: Pin the existing `Handle` contract (TDD, MQ/cron path) -- [ ] Add `TestHandle_BlocksUntilDurable` — submit a batch via `Handle`; the writer takes a controlled delay to acknowledge; assert `Handle` returns only after the writer completes. Run — confirm passing (this pins the contract `streaming` depends on). -- [ ] Add `TestHandle_DoesNotShedAtHighWatermark` — fixture with `BatchCapacity=2`, `ShedThreshold=0.5`, writer that blocks forever; submit 5 batches via `Handle` (each in its own goroutine); assert all 5 are blocked (none returned). After unblocking the writer, all 5 should eventually return. Run — confirm passing. -- [ ] Add `TestHandle_DoesNotShedOnHardFull` — `BatchCapacity=2`; submit 3 batches via `Handle` from separate goroutines; assert the third blocks on the channel send rather than returning early. Run — confirm passing. +- [ ] Add `TestHandle_BlocksUntilDurable` — submit a batch via `Handle`; the writer takes a controlled delay to acknowledge; assert `Handle` returns only after the writer completes. Run — confirm passing (pins the contract `streaming` depends on). +- [ ] Add `TestHandle_DoesNotShedAtHighWatermark` — `BatchCapacity=2`, `ShedThreshold=0.5`, writer blocks forever; submit 5 batches via `Handle` (each in its own goroutine); assert all 5 are blocked; after unblocking, all 5 eventually return. Run — confirm passing. - [ ] Add `TestHandle_IgnoresContextCancel` — submit a batch via `Handle`; cancel the ctx; assert `Handle` is still blocked until the pipeline completes the batch. Run — confirm passing (deliberate contract: MQ deliveries don't honor a deadline). -- [ ] Add `TestHandle_ReturnsImmediatelyOnClosedPipeline` — close the entrypoint; call `Handle`; assert it returns within a few milliseconds (no panic, no block). Run — confirm passing (this is today's behavior, just being pinned). -- [ ] Add `TestHandle_PreservesVariadicMessages` — `Handle(ctx, "a", "b", "c")`; intercept the resulting `*batch`; assert `len(item.messages) == 3`. Run — confirm passing (pins that the variadic contract still works after the `prepare` refactor). +- [ ] Add `TestHandle_ReturnsImmediatelyOnClosedPipeline` — close the entrypoint; call `Handle`; assert it returns within a few milliseconds (no panic, no block). Run — confirm passing. +- [ ] Add `TestHandle_PreservesVariadicMessages` — `Handle(ctx, "a", "b", "c")`; intercept the resulting `*batch`; assert `len(item.messages) == 3`. Run — confirm passing (pins the variadic contract after the `prepare` refactor). -### Phase 7: Race and integration sanity +### Phase 9: Race and integration sanity - [ ] Run the full harness test suite under `-race`: `go test -race ./handlers/harness/...`. Confirm green. - [ ] Run `make test` (the project-level entry point that also runs `go mod tidy`, `go fmt ./...`). Confirm green. - [ ] Run `go test ./handlers/harness/sqladapter/...` against a live MySQL (drop `-short` if present, or use the project's `make test.db.local`-equivalent). Confirm no regressions in the SQL adapter. -### Phase 8: Documentation +### Phase 10: Documentation -- [ ] Update `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` to reflect: split `BatchCapacity`/`UnitCapacity` knobs, `LoadShed`/`CallerDeparted` Monitor observations, and the `HandleResult(ctx, message any) → HandleOutcome` return path. (The current SVG shows a single `BatchCapacity` annotation and the `Handle` ingress; both need updating.) -- [ ] Self-review the diff: confirm no `messaging.Handler` callers were inadvertently broken; confirm `Options.UnitCapacity(1024)` (the old default) reproduces today's runtime if a user wants it; confirm no domain code or sqladapter code was touched. +- [ ] Update `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` to reflect: split `BatchCapacity`/`UnitCapacity` knobs, the `Admit` gate + `Admission` middleware in front of the HTTP ingress, `LoadShed`/`CallerDeparted` Monitor observations, and the void `Await(ctx, message any)` ingress alongside `Handle`. (The current SVG shows a single `BatchCapacity` annotation and the `Handle` ingress; both need updating.) +- [ ] Self-review the diff: confirm no `messaging.Handler` callers were inadvertently broken; confirm `admission.go` imports no `scuter`; confirm `Options.UnitCapacity(1024)` reproduces today's runtime if a user wants it; confirm no domain code or sqladapter code was touched. -### Out of scope (handled in `billing-context` follow-on) +### Out of scope (handled in each consuming repo's follow-on) -- HTTP shell wireup, error definitions, integration tests, `mapOutcome` helper, application-side monitor metric registration, post-deploy observation drills. These will be addressed in a separate proposal/session in the consuming repo after this module's changes merge and a version is tagged. +- Per-service route wireup (`AsHTTPHandler` + `Admission` in each routes table), application-side monitor metric registration, integration tests, and post-deploy observation drills. These are addressed per service after this module's changes merge and a version is tagged. From e974da3bdf99104faf9ce35d85984822d575c903 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Fri, 29 May 2026 15:51:59 -0600 Subject: [PATCH 14/50] Draft of proposal prioritizing minimal exported footprint. --- ...posal-harness-resilience-module-changes.md | 291 ++++++++++-------- 1 file changed, 159 insertions(+), 132 deletions(-) diff --git a/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md index 6cf9bb1..a9b4671 100644 --- a/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md +++ b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md @@ -1,6 +1,6 @@ --- name: Harness pipeline resilience (module-local changes) -description: Implement the messaging/v3 harness side of the cross-repo "harness resilience and idempotency" proposal — a void context-honoring HTTP entrypoint (Await) alongside today's Handle, a pre-flight admission gate (Admit) plus an in-module net/http shedding middleware (no scuter), a thin AsHTTPHandler decorator so HTTP shells need no changes, and split channel-buffer sizing (BatchCapacity vs UnitCapacity). Excludes per-service route wireup and post-deploy observation, which belong to the consuming repos. +description: Implement the messaging/v3 harness side of the cross-repo "harness resilience and idempotency" proposal — a void context-honoring HTTP entrypoint (unexported await) alongside today's Handle, a pre-flight admission gate (unexported admit) plus an in-module net/http shedding middleware, a thin AsHTTPHandler decorator so HTTP shells need no changes, and split channel-buffer sizing (BatchCapacity vs UnitCapacity). Excludes per-service route wireup and post-deploy observation, which belong to the consuming repos. type: plot --- @@ -59,7 +59,7 @@ one is exactly the kind of accumulated, duplicated branching we want to avoid. Two observations dissolve the need for a return value: - **Shedding is a *pre-flight* decision, not a *result*.** It can be decided - before the route handler ever runs. A small `Admit() bool` predicate plus a + before the route handler ever runs. A small `admit() bool` predicate plus a single HTTP middleware (written once, in this module) writes the `503` and short-circuits the route. The route handler — and its tests — never see it. - **A departed caller does not need a status override.** When the caller's @@ -71,22 +71,24 @@ Two observations dissolve the need for a return value: That leaves the HTTP entrypoint **void**, identical in spirit to today's `Handle`. The harness-side fix has these independently-mergeable pieces: -1. **A void, context-honoring HTTP entrypoint** — `Await(ctx, message any)` - alongside the existing `Handle(ctx, messages ...any)`. `Await` honors - `ctx.Done()`, processes exactly one message, emits a `CallerDeparted` - observation when the caller leaves, and returns nothing. -2. **Pre-flight admission** — an `Admit() bool` predicate on the entrypoint - (high-watermark check against the `batches` channel) and an in-module - `Admission` HTTP middleware that writes a `503` (inline, raw `net/http`, no - `scuter` dependency) when the gate refuses. A thin `AsHTTPHandler` decorator - adapts `Await` to the `messaging.Handler` interface the HTTP shells already - depend on, so neither the shells nor their tests change. +1. **A void, context-honoring HTTP entrypoint** — an unexported + `await(ctx, message any)` alongside the existing + `Handle(ctx, messages ...any)`. `await` honors `ctx.Done()`, processes + exactly one message, emits a `CallerDeparted` observation when the caller + leaves, and returns nothing. +2. **Pre-flight admission** — an unexported `admit() bool` predicate on the + entrypoint (high-watermark check against the `batches` channel, used only by + the in-package middleware) and an in-module `Admission` HTTP middleware that + writes a `503` (inline, raw `net/http`) when the gate refuses. A thin + `AsHTTPHandler` decorator adapts `await` to the `messaging.Handler` interface + the HTTP shells already depend on, so neither the shells nor their tests + change. 3. **Split channel-buffer sizing** — `BatchCapacity` continues to size the caller-side `batches` channel; a new `UnitCapacity` (default 1) sizes `work1`–`work5` and the per-worker fan-out outputs. The companion `AdjustOrder` domain idempotency change (separate proposal, -already merged in the consuming repos) makes it safe for `Await` to return +already merged in the consuming repos) makes it safe for `await` to return early when the caller's `ctx` fires: the in-flight batch keeps processing and durably stores, and a client retry collapses to a no-op once the original batch has persisted. @@ -95,20 +97,27 @@ has persisted. ### Decision summary -Three members on `*entrypoint`, plus two thin module-local adapters: +Three methods on `*entrypoint` (two of them unexported), plus two thin +module-local adapters that are the *only* new exported surface: -| Member | Caller | Honors `ctx.Done()` | Sheds | Return | Arity | -|-------------------------------------|----------------------|---------------------|---------|--------|-------------| -| `Handle(ctx, messages ...any)` | MQ, cron | No | No | none | variadic | -| `Await(ctx, message any)` | HTTP (via decorator) | Yes | No | none | exactly one | -| `Admit() bool` | HTTP middleware | n/a | Decides | `bool` | n/a | +| Method | Visibility | Caller | Honors `ctx.Done()` | Sheds | Return | Arity | +|-------------------------------------|------------|----------------------|---------------------|---------|--------|-------------| +| `Handle(ctx, messages ...any)` | exported | MQ, cron | No | No | none | variadic | +| `await(ctx, message any)` | in-package | HTTP (via adapter) | Yes | No | none | exactly one | +| `admit() bool` | in-package | HTTP middleware | n/a | Decides | `bool` | n/a | -| Adapter (this module) | Role | -|--------------------------------------------|----------------------------------------------------------------------------------------| -| `AsHTTPHandler(Awaiter) messaging.Handler` | Wraps `Await` so HTTP shells keep depending on `messaging.Handler` (zero shell change) | -| `Admission(Admitter, http.Handler)` | Pre-flight gate; writes inline `503` (no scuter) when `Admit()` is false | +| Adapter (this module, exported) | Role | +|------------------------------------------------------------|--------------------------------------------------------------------------------------------| +| `AsHTTPHandler(messaging.Handler) messaging.Handler` | Wraps the handler's `await` so HTTP shells keep depending on `messaging.Handler` (zero shell change) | +| `Admission(messaging.Handler, http.Handler) http.Handler` | Pre-flight gate; writes inline `503` when `admit()` is false | -`Await` takes a single `message any` (not variadic) because every HTTP route in +`await`, `admit`, and the `awaiter`/`admitter` interfaces the adapters assert +against are all **unexported**. Because both the middleware and the decorator +live in this package, the consumer never names them — `AsHTTPHandler(handler)` +and `Admission(handler, shell)` are the entire integration vocabulary, and both +take/return the standard `messaging.Handler`/`http.Handler` types. + +`await` takes a single `message any` (not variadic) because every HTTP route in every consuming service invokes the domain with exactly one command per request. Constraining the signature here: @@ -123,10 +132,10 @@ request. Constraining the signature here: Two new monitor observations: -- `LoadShed{}` — emitted by `Admit()` when it refuses on the high-watermark +- `LoadShed{}` — emitted by `admit()` when it refuses on the high-watermark check. (Refusal because the pipeline is `closed` is shutdown, not load, and emits nothing.) -- `CallerDeparted{}` — emitted by `Await` when the caller's `ctx` fired before +- `CallerDeparted{}` — emitted by `await` when the caller's `ctx` fired before completion (whether during enqueue or during the wait). Two new configuration options with defaults: @@ -134,7 +143,7 @@ Two new configuration options with defaults: - `Options.UnitCapacity(int)` — default `1`. Sizes `work1`–`work5` and the per-worker fan-out outputs. - `Options.ShedThreshold(float64)` — default `0.80`. Fraction of the `batches` - channel capacity at or past which `Admit()` refuses. + channel capacity at or past which `admit()` refuses. ### Detailed design @@ -143,8 +152,8 @@ Two new configuration options with defaults: Today's `00_entrypoint.go:29` has a single `Handle` whose body inlines waiter acquisition, batch allocation, completion-callback wiring, the admission-under-RWMutex sequence, and the wait. The split extracts three -private helpers shared by `Handle` and `Await`. `prepare` keeps its variadic -`...any` shape so `Handle` passes its argument through verbatim; `Await` calls +private helpers shared by `Handle` and `await`. `prepare` keeps its variadic +`...any` shape so `Handle` passes its argument through verbatim; `await` calls `prepare(ctx, message)` with its single message, which Go promotes to a one-element slice at the call site. @@ -211,10 +220,10 @@ Properties: - **The only "shed" condition is pipeline shutdown** (`this.closed`) — exactly today's behavior. -#### 3. Path B — `Await` (HTTP): void, context-honoring, single message +#### 3. Path B — `await` (HTTP): void, context-honoring, single message ```go -func (this *entrypoint) Await(ctx context.Context, message any) { +func (this *entrypoint) await(ctx context.Context, message any) { waiter, item := this.prepare(ctx, message) defer this.waiters.Put(waiter) @@ -253,31 +262,43 @@ Properties: - **No hard-full backstop.** The ctx-honoring send already bounds the enqueue wait, so there is no need for a non-blocking `default` arm — and dropping it avoids a silent post-admit shed that would leave `command.Result` zero and - cause the shell to emit a wrong status (e.g. 404). `Admit()` is the watermark + cause the shell to emit a wrong status (e.g. 404). `admit()` is the watermark gate; the ctx-honoring send is the backstop. **Pool-entry lifecycle:** - Success / wait-departed path: the batch was enqueued, so the pipeline owns it - and will invoke `item.complete()` (which `Put`s it). `Await` must **not** + and will invoke `item.complete()` (which `Put`s it). `await` must **not** `Put` — the pool would receive the same item twice. (On wait-departure the pipeline still completes the batch; only the HTTP goroutine returns early.) - Enqueue-departed and closed paths: the item was never enqueued, so `complete()` will never fire; `abandon(waiter, item)` does the cleanup. -QUESTION: would it be cleaner if the entrypoint were to `Put` the batch after `item.complete()` fires (releasing the wait group)? Would this still be 'correct'? +**Note — why `complete()` owns the `Put`, not the entrypoint.** A tempting +simplification is to have `complete()` only release the waiter and let the +entrypoint `Put` the batch once its own wait returns. That works for `Handle` +(which always waits for completion) but is *incorrect* for `await`: on the +wait-departed path `await` returns on `ctx.Done()` **before** completion, so an +entrypoint-side `Put` would never run and the pooled `*batch` would leak (worse, +the pipeline's later `complete()` would mutate an item the entrypoint believed +it had reclaimed). Completion-owned `Put` is precisely what lets a single +cleanup rule cover both "caller waited" and "caller departed but the pipeline +finished later." The entrypoint only `Put`s — via `abandon` — on the paths +where the batch was *never* enqueued and `complete()` will therefore never fire. +So the current split isn't just cleaner; it's the only correct allocation of +the `Put`. **Critically, the batch is not abandoned by the pipeline when the caller departs.** When `ctx` fires after enqueue, the in-flight batch keeps processing; `complete()` still fires; persistence still happens. Only the HTTP caller's goroutine returns early. -#### 4. Pre-flight admission: `Admit()` + `Admission` middleware + `AsHTTPHandler` +#### 4. Pre-flight admission: `admit()` + `Admission` middleware + `AsHTTPHandler` The high-watermark check lives in a side-effect-light predicate on the entrypoint: ```go -func (this *entrypoint) Admit() bool { +func (this *entrypoint) admit() bool { this.lock.RLock() defer this.lock.RUnlock() if this.closed { @@ -297,26 +318,28 @@ refusal remains). The middleware and the decorator live in this module (new file `handlers/harness/admission.go`) and depend only on the standard library and -`messaging/v3` — **never `scuter`**. The `503` response is flushed inline with -raw `net/http`: - -CORRECTION: please do not mention scuter in the source code comments. This module should have zero awareness of that module. +`messaging/v3`. The `awaiter`/`admitter` interfaces are unexported; the adapters +accept the standard `messaging.Handler` returned by `New(...)` and assert it to +those interfaces internally (a failed assertion is a wireup-time programming +error and panics fast). The `503` response is flushed inline with raw +`net/http`: ```go type ( - Admitter interface { - Admit() bool + admitter interface { + admit() bool } - Awaiter interface { - Await(ctx context.Context, message any) + awaiter interface { + await(ctx context.Context, message any) } ) // Admission refuses overloaded requests before the wrapped handler runs, -// writing an inline 503 (no scuter dependency). Wrap each mutating route. -func Admission(gate Admitter, inner http.Handler) http.Handler { +// writing an inline 503. Wrap each mutating route with it. +func Admission(handler messaging.Handler, inner http.Handler) http.Handler { + gate := handler.(admitter) return http.HandlerFunc(func(response http.ResponseWriter, request *http.Request) { - if gate.Admit() { + if gate.admit() { inner.ServeHTTP(response, request) return } @@ -327,20 +350,20 @@ func Admission(gate Admitter, inner http.Handler) http.Handler { }) } -// AsHTTPHandler adapts the void, context-honoring Await to the -// messaging.Handler interface the HTTP shells already depend on, so no shell -// (and no shell test) changes. -func AsHTTPHandler(target Awaiter) messaging.Handler { - return &httpAdapter{target: target} +// AsHTTPHandler adapts the void, context-honoring await to the +// messaging.Handler the HTTP shells already depend on, so no shell (and no +// shell test) changes. +func AsHTTPHandler(handler messaging.Handler) messaging.Handler { + return &httpAdapter{target: handler.(awaiter)} } type httpAdapter struct { - target Awaiter + target awaiter } func (this *httpAdapter) Handle(ctx context.Context, messages ...any) { for _, message := range messages { - this.target.Await(ctx, message) + this.target.await(ctx, message) } } @@ -350,33 +373,34 @@ var shedResponseBody = []byte(`{"errors":[{"message":"service overloaded"}]}`) **Why this keeps the route handlers (and their tests) untouched.** Each consuming service's HTTP shells already build a command, call `handler.Handle(ctx, command)` on a `messaging.Handler`, and map the *mutated -command's* result field (`command.Result`) to a `scuter.ResponseOption` — they -read no return value from `Handle` today. Two seams preserve that exactly: +command's* result field (`command.Result`) to its response — they read no +return value from `Handle` today. Two seams preserve that exactly: -- `AsHTTPHandler(domainHandler)` is substituted for the raw handler when the - write shells are constructed, so `Handle` now routes through `Await` - (ctx-honoring, single-message) without the shell knowing. -- `Admission(gate, shell)` wraps each mutating route in the routes table, so +- `AsHTTPHandler(handler)` is substituted for the raw handler when the write + shells are constructed, so `Handle` now routes through `await` (ctx-honoring, + single-message) without the shell knowing. +- `Admission(handler, shell)` wraps each mutating route in the routes table, so the `503` is decided before the shell runs. Illustrative consumer wireup (out of scope, shown for context only): ```go -gate := domainHandler.(harness.Admitter) -httpHandler := harness.AsHTTPHandler(domainHandler.(harness.Awaiter)) +handler, listeners := harness.New(ctx, opts...) +httpHandler := harness.AsHTTPHandler(handler) // ... -{"POST /admin/orders", harness.Admission(gate, NewAdminApproveOrderShell(httpHandler))}, -{"PUT /admin/accounts/:account/orders/:order/adjustments", harness.Admission(gate, NewAdminAdjustOrderShell(httpHandler))}, +{"POST /admin/orders", harness.Admission(handler, NewAdminApproveOrderShell(httpHandler))}, +{"PUT /admin/accounts/:account/orders/:order/adjustments", harness.Admission(handler, NewAdminAdjustOrderShell(httpHandler))}, ``` -Each of the 70+ mutating routes across the ~10 services becomes a mechanical -one-line wrap — no per-route outcome logic, no per-route tests. The `503`/ -departed behavior is tested **once**, here, against the middleware and the -decorator. +The consumer performs no type assertions of its own — it passes the +`messaging.Handler` from `New(...)` straight into both adapters. Each of the 70+ +mutating routes across the ~10 services becomes a mechanical one-line wrap — no +per-route outcome logic, no per-route tests. The `503`/departed behavior is +tested **once**, here, against the middleware and the decorator. -**Race note.** `Admit()`'s `len(chan)/cap(chan)` snapshot races with concurrent -producers/consumers, and there is a TOCTOU window between `Admit()` returning -true and `Await`'s enqueue. Both are acceptable: the threshold is a soft signal, +**Race note.** `admit()`'s `len(chan)/cap(chan)` snapshot races with concurrent +producers/consumers, and there is a TOCTOU window between `admit()` returning +true and `await`'s enqueue. Both are acceptable: the threshold is a soft signal, and the ctx-honoring send means that even if the channel fills in the race window, the request either drains normally or unblocks on `ctx.Done()` — it never produces a wrong status the way a silent post-admit shed would. @@ -422,7 +446,7 @@ channels post-domain, plus the in-flight unit at each stage, the worst case is ~10 units' worth of unpersisted mutations. At `UnitSize=64` that's ~640 batches' worth of broadcast results downstream of Execution. -The single-message `Await` signature *also* tightens the upstream side: each +The single-message `await` signature *also* tightens the upstream side: each HTTP-admitted batch on the `batches` channel now carries exactly one input message, so `BatchCapacity` becomes a direct count of in-flight HTTP commands rather than a count of caller invocations of arbitrary fan-out. @@ -433,8 +457,9 @@ rather than a count of caller invocations of arbitrary fan-out. Serialization → Persistence → Completion → Broadcast → Terminal) is preserved verbatim. - **Changing `messaging.Handler`.** `Handle(ctx, messages ...any)` keeps its - exact existing contract. `Await` is a new method, not a replacement, and - intentionally has a different signature (single message, void). + exact existing contract. `await` is a new (unexported) method, not a + replacement, and intentionally has a different signature (single message, + void). - **Returning a status outcome from the HTTP path.** Explicitly rejected — see Background and Alternatives. Shedding is decided pre-flight; departed callers just unblock. @@ -455,17 +480,17 @@ rather than a count of caller invocations of arbitrary fan-out. | Path | Action | Purpose | |--------------------------------------------------------------------|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `handlers/harness/00_entrypoint.go` | Modify | Extract `prepare`/`abandon`/`waiterDone`; keep `Handle` behavior identical; add void `Await(ctx, message any)` and `Admit() bool`; add `shedThreshold` field; add `loadShed`/`callerDeparted` sentinels | -| `handlers/harness/admission.go` | Add | `Admitter`/`Awaiter` interfaces; `Admission(gate, inner) http.Handler` (inline `503`, no scuter); `AsHTTPHandler(target) messaging.Handler` decorator; `shedResponseBody` | +| `handlers/harness/00_entrypoint.go` | Modify | Extract `prepare`/`abandon`/`waiterDone`; keep `Handle` behavior identical; add unexported `await(ctx, message any)` and `admit() bool`; add `shedThreshold` field; add `loadShed`/`callerDeparted` sentinels | +| `handlers/harness/admission.go` | Add | Unexported `admitter`/`awaiter` interfaces; `Admission(messaging.Handler, http.Handler) http.Handler` (inline `503`); `AsHTTPHandler(messaging.Handler) messaging.Handler` decorator; `shedResponseBody` | | `handlers/harness/contracts.go` | Modify | New `LoadShed` and `CallerDeparted` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. | | `handlers/harness/config.go` | Modify | `Options.UnitCapacity(int)`, `Options.ShedThreshold(float64)`; defaults 1, 0.80 | | `handlers/harness/pipeline.go` | Modify | Use `UnitCapacity` for `work1`–`work5`; pass it into `newFanOut`; thread `ShedThreshold` into `newEntrypoint` | | `handlers/harness/fanout.go` | Modify | Accept `unitCapacity` and use it for the per-worker output channels instead of the hardcoded 1024 | -| `handlers/harness/00_entrypoint_test.go` | Modify | New tests for `Await` (void, ctx-honoring, single message, pool lifecycle) and `Admit` (watermark/closed/threshold), plus pinning tests for `Handle` | -| `handlers/harness/admission_test.go` | Add | Tests for `Admission` (pass-through when admitted; inline `503` body/headers when refused) and `AsHTTPHandler` (forwards to `Await`) | +| `handlers/harness/00_entrypoint_test.go` | Modify | New tests for `await` (void, ctx-honoring, single message, pool lifecycle) and `admit` (watermark/closed/threshold), plus pinning tests for `Handle` | +| `handlers/harness/admission_test.go` | Add | Tests for `Admission` (pass-through when admitted; inline `503` body/headers when refused) and `AsHTTPHandler` (forwards to `await`) | | `handlers/harness/config_test.go` | Modify | Assert defaults for `UnitCapacity`, `ShedThreshold`; assert override setters | | `handlers/harness/pipeline_test.go` | Modify | Adjust assertions if any depend on default channel sizes (none expected to break — defaults preserve external observable behavior) | -| `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` | Modify | Reflect split `BatchCapacity`/`UnitCapacity` knobs, the `Admit` gate + `Admission` middleware, `LoadShed`/`CallerDeparted` observations, and the void `Await` ingress | +| `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` | Modify | Reflect split `BatchCapacity`/`UnitCapacity` knobs, the `admit` gate + `Admission` middleware, `LoadShed`/`CallerDeparted` observations, and the void `await` ingress | ### Alternatives considered @@ -475,12 +500,12 @@ rather than a count of caller invocations of arbitrary fan-out. the tests to cover it. Centralizing the decision in a pre-flight gate + middleware keeps the route handlers void and tested once. - **Buffer the HTTP `ResponseWriter` in middleware to override status after the - shell runs (the "Option A" / `504` path).** Rejected — `scuter` flushes the - response *inside* each shell, so a post-hoc override requires wrapping the - writer in a buffering shim in this module. The only payoff would be a precise - `504`, but a departed caller's connection is already gone, so the shell's - normal write is harmless and no override is needed. More machinery, no real - benefit. + shell runs (the "Option A" / `504` path).** Rejected — the consuming shells + flush the response *inside* the handler, so a post-hoc override requires + wrapping the writer in a buffering shim in this module. The only payoff would + be a precise `504`, but a departed caller's connection is already gone, so the + shell's normal write is harmless and no override is needed. More machinery, no + real benefit. - **Panic on shed + recover in middleware.** Rejected — shedding is a *high-frequency* condition precisely during an outage, so this would panic on a large fraction of requests and blur the "panic = bug = 500 + page someone" @@ -488,18 +513,20 @@ rather than a count of caller invocations of arbitrary fan-out. rejected panic-on-shed for the MQ path for the same noise reasons. - **Put the shedding middleware in each consuming repo.** Rejected — 10 services would duplicate the middleware and its tests. It lives once, here. -- **Let the middleware reference `scuter` for the `503` envelope.** Rejected — - this module must not depend on `scuter`. The `503` is flushed inline with raw - `net/http`; the body matches the common `{"errors":[...]}` shape consumers - already emit, with a `Retry-After: 1` header. -- **Keep a hard-full `default` backstop inside `Await`.** Rejected — without a +- **Export `await`/`admit` (or their interfaces) for the consumer to call.** + Rejected — both are only ever invoked by the in-package `Admission` + middleware and `AsHTTPHandler` decorator, so keeping them unexported shrinks + the public surface to two functions and prevents consumers from reaching past + the adapters. The adapters take/return standard `messaging.Handler` and assert + to the unexported interfaces internally. +- **Keep a hard-full `default` backstop inside `await`.** Rejected — without a return value a silent post-admit shed would leave `command.Result` zero and the shell would emit a wrong status (e.g. 404). The ctx-honoring send bounds - the enqueue wait without it; `Admit()` is the watermark gate. + the enqueue wait without it; `admit()` is the watermark gate. - **Single shared method branching on caller type via a `ctx` value or `Options.Source`.** Rejected — `streaming` acks unconditionally on clean `Handle` return, so an MQ-side shed-then-return would silently drop messages. - The two paths require fundamentally different behavior. Separate members make + The two paths require fundamentally different behavior. Separate methods make the contract visible at every wiring site. - **Inject a per-batch `ctx` through Persistence and Broadcast.** Rejected. Per-batch ctx in retry-forever stages would unwind partially-completed work @@ -513,28 +540,28 @@ rather than a count of caller invocations of arbitrary fan-out. ## Trade-offs & Risks -- **`Await`, `Admit`, `Awaiter`, `Admitter`, and the two adapters are exported - surface outside the `messaging.Handler` abstraction.** Acceptable — the - harness already exports `Monitor`, `Writer`, `Dispatcher`, etc.; it is - explicitly the "single ingress" abstraction. The consumer reaches `Await`/ - `Admit` via the exported interfaces (type assertion on the handler `New` - returns). - -TWEAK: Since `Await` is called through middleware defined in the same package, let's un-export `Await`. - +- **The only new exported surface is two functions — `AsHTTPHandler` and + `Admission`.** Both take and return the standard `messaging.Handler` / + `http.Handler` types. The `await`/`admit` methods and the `awaiter`/`admitter` + interfaces they assert against are unexported, so the consumer never names + them; `AsHTTPHandler(New(...))` and `Admission(New(...), shell)` are the whole + integration vocabulary. The adapters type-assert the supplied handler to the + unexported interfaces internally and panic at wireup if handed something other + than the harness entrypoint — a deliberate fail-fast on misconfiguration. - **This module now imports `net/http`** (in `admission.go`). Minor — it is a standard-library dependency, isolated to the admission file. If desired at implementation time, the middleware and decorator can move to a sibling subpackage (e.g. `handlers/harness/admission`) to keep `net/http` out of the - core pipeline package; the `Admitter`/`Awaiter` interfaces would stay in - `harness`. Lower-churn option (single package) is preferred unless review - objects. + core pipeline package; the unexported `awaiter`/`admitter` interfaces would + then need to be exported (or the adapters constructed inside `harness` and + re-exported). Lower-churn option (single package, unexported interfaces) is + preferred unless review objects. - **Single-message HTTP signature is a hard constraint.** A hypothetical future HTTP route needing to submit multiple commands atomically would not fit. Acceptable today — every existing HTTP route invokes the domain with exactly - one command — and reversible later (a sibling `AwaitBatch` could be added + one command — and reversible later (a sibling `awaitBatch` could be added without breaking existing call sites). -- **`waiterDone` allocates a goroutine and a channel per `Await` call.** On the +- **`waiterDone` allocates a goroutine and a channel per `await` call.** On the HTTP path only. The cost is a few hundred bytes and one goroutine for the duration of the in-flight batch — well within an HTTP request's budget. On a wait-departure the goroutine parks until the pipeline eventually completes the @@ -550,15 +577,15 @@ TWEAK: Since `Await` is called through middleware defined in the same package, l the first applies. From this module's perspective this is a contract guarantee: "we will not unwind the in-flight batch when the caller departs." - **The shed-threshold as a fraction is inexact, and there is a TOCTOU window - between `Admit()` and `Await`'s enqueue.** Acceptable — soft signal, not a + between `admit()` and `await`'s enqueue.** Acceptable — soft signal, not a hard limit; and the ctx-honoring send means a race-window channel-full never produces a wrong status (it drains or unblocks on `ctx.Done()`). -- **`Handle` and `Await` share state (`this.work`, `this.lock`, `this.closed`).** +- **`Handle` and `await` share state (`this.work`, `this.lock`, `this.closed`).** Two paths writing the same channel under the same RWMutex is fine; race-free under `-race`. Tests must cover both paths interleaving on a shrunk-capacity fixture. - **Cross-repo coordination.** This module's changes are backward-compatible - (new options have defaults; new members don't break existing + (new options have defaults; the new exported functions don't break existing `messaging.Handler` callers). A consumer that doesn't yet wrap its routes keeps working unchanged. Per-service adoption is sequenced after a tagged release. @@ -581,7 +608,7 @@ TWEAK: Since `Await` is called through middleware defined in the same package, l - [ ] Edit `handlers/harness/fanout.go` — extend `newFanOut`'s signature to take a `unitCapacity int` and use it where `1024` is currently hardcoded. - [ ] Run `make test` — pipeline tests should still pass under the new defaults; if any test depends on the old 1024 buffer it should be updated to set `Options.UnitCapacity(1024)` explicitly. -### Phase 3: Monitor observations and interfaces +### Phase 3: Monitor observations and sentinels - [ ] Edit `handlers/harness/contracts.go` — add `LoadShed struct{}` and `CallerDeparted struct{}` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. - [ ] Edit `handlers/harness/00_entrypoint.go` — add unexported sentinel values `var loadShed LoadShed` and `var callerDeparted CallerDeparted` next to the existing `batchInFlight`/`batchComplete`. @@ -592,31 +619,31 @@ TWEAK: Since `Await` is called through middleware defined in the same package, l - [ ] Refactor `handlers/harness/00_entrypoint.go` to extract `prepare(ctx, messages ...any) (*sync.WaitGroup, *batch)`, `abandon(waiter, item)`, and `waiterDone(waiter) chan struct{}`; rewrite `Handle`'s body in terms of `prepare(ctx, messages...)` so it is observably identical. - [ ] Run `make test` — all existing tests must still pass; this step changes no externally observable behavior. -### Phase 5: Add `Await` (TDD, void HTTP path, single message) +### Phase 5: Add `await` (TDD, void HTTP path, single message) -- [ ] Add `TestAwait_ReturnsAfterCompletion` — call `Await(ctx, "msg")` with a single message; let the pipeline complete; assert the call returns and the batch was processed. Run `make test` — confirm failure (no `Await` method yet → compile error). Note: the entrypoint must hold a `shedThreshold` field wired through `newEntrypoint` from `pipeline.go`. -- [ ] Add `Await(ctx context.Context, message any)` on `*entrypoint` with the body shown in §3 of Approach. Run — confirm `ReturnsAfterCompletion` passes. -- [ ] Add `TestAwait_UnblocksOnContextCancelWhileWaiting` — fixture with a writer that blocks forever; enqueue succeeds; cancel the caller's `ctx`; assert `Await` returns, Monitor sees `CallerDeparted{}`, and the batch is **not** abandoned (pipeline still owns it). Run — confirm passing. -- [ ] Add `TestAwait_UnblocksOnContextCancelWhileEnqueuing` — fixture with `BatchCapacity=1` and a writer that blocks forever so the work channel stays full; cancel `ctx` before a slot frees; assert `Await` returns, Monitor sees `CallerDeparted{}`, and the pool entry is restored (the never-enqueued batch is abandoned). Run — confirm passing. -- [ ] Add `TestAwait_BatchCarriesExactlyOneMessage` — `Await(ctx, "only")`; intercept the resulting `*batch` on the work channel; assert `len(item.messages) == 1` and `item.messages[0] == "only"`. Run — confirm passing (pins the single-message contract). -- [ ] Add `TestAwait_ClosedPipelineReturnsImmediately` — close the entrypoint; call `Await(ctx, "msg")`; assert it returns within a few milliseconds and the pool entry is returned. Run — confirm passing. +- [ ] Add `TestAwait_ReturnsAfterCompletion` — call `await(ctx, "msg")` with a single message; let the pipeline complete; assert the call returns and the batch was processed. Run `make test` — confirm failure (no `await` method yet → compile error). Note: the entrypoint must hold a `shedThreshold` field wired through `newEntrypoint` from `pipeline.go`. +- [ ] Add the unexported `await(ctx context.Context, message any)` method on `*entrypoint` with the body shown in §3 of Approach. Run — confirm `ReturnsAfterCompletion` passes. +- [ ] Add `TestAwait_UnblocksOnContextCancelWhileWaiting` — fixture with a writer that blocks forever; enqueue succeeds; cancel the caller's `ctx`; assert `await` returns, Monitor sees `CallerDeparted{}`, and the batch is **not** abandoned (pipeline still owns it). Run — confirm passing. +- [ ] Add `TestAwait_UnblocksOnContextCancelWhileEnqueuing` — fixture with `BatchCapacity=1` and a writer that blocks forever so the work channel stays full; cancel `ctx` before a slot frees; assert `await` returns, Monitor sees `CallerDeparted{}`, and the pool entry is restored (the never-enqueued batch is abandoned). Run — confirm passing. +- [ ] Add `TestAwait_BatchCarriesExactlyOneMessage` — `await(ctx, "only")`; intercept the resulting `*batch` on the work channel; assert `len(item.messages) == 1` and `item.messages[0] == "only"`. Run — confirm passing (pins the single-message contract). +- [ ] Add `TestAwait_ClosedPipelineReturnsImmediately` — close the entrypoint; call `await(ctx, "msg")`; assert it returns within a few milliseconds and the pool entry is returned. Run — confirm passing. -### Phase 6: Add `Admit()` gate (TDD) +### Phase 6: Add `admit()` gate (TDD) -- [ ] Add `TestAdmit_TrueWhenBelowThreshold` — fresh entrypoint, empty work channel; assert `Admit()` is true. Run — confirm failure (no `Admit` yet → compile error). -- [ ] Add `Admit() bool` on `*entrypoint` with the body shown in §4. Run — confirm `TrueWhenBelowThreshold` passes. -- [ ] Add `TestAdmit_FalseAtOrAboveThreshold_TracksLoadShed` — `BatchCapacity=10`, `ShedThreshold=0.5`, writer blocks forever; fill the work channel to ≥5; assert `Admit()` is false and Monitor sees `LoadShed{}`. Run — confirm passing. -- [ ] Add `TestAdmit_FalseWhenClosed_NoLoadShed` — close the entrypoint; assert `Admit()` is false and Monitor sees **no** `LoadShed{}` (shutdown, not load). Run — confirm passing. -- [ ] Add `TestAdmit_ThresholdAtOrAboveOneDisablesWatermark` — `ShedThreshold=2.0`; fill the channel; assert `Admit()` stays true until the pipeline is closed. Run — confirm passing. +- [ ] Add `TestAdmit_TrueWhenBelowThreshold` — fresh entrypoint, empty work channel; assert `admit()` is true. Run — confirm failure (no `admit` yet → compile error). +- [ ] Add the unexported `admit() bool` method on `*entrypoint` with the body shown in §4. Run — confirm `TrueWhenBelowThreshold` passes. +- [ ] Add `TestAdmit_FalseAtOrAboveThreshold_TracksLoadShed` — `BatchCapacity=10`, `ShedThreshold=0.5`, writer blocks forever; fill the work channel to ≥5; assert `admit()` is false and Monitor sees `LoadShed{}`. Run — confirm passing. +- [ ] Add `TestAdmit_FalseWhenClosed_NoLoadShed` — close the entrypoint; assert `admit()` is false and Monitor sees **no** `LoadShed{}` (shutdown, not load). Run — confirm passing. +- [ ] Add `TestAdmit_ThresholdAtOrAboveOneDisablesWatermark` — `ShedThreshold=2.0`; fill the channel; assert `admit()` stays true until the pipeline is closed. Run — confirm passing. -### Phase 7: Add `AsHTTPHandler` decorator and `Admission` middleware (TDD, in-module, no scuter) +### Phase 7: Add `AsHTTPHandler` decorator and `Admission` middleware (TDD, in-module) -- [ ] Add `TestAsHTTPHandler_ForwardsSingleMessageToAwait` (in `admission_test.go`) — a fake `Awaiter` records calls; `AsHTTPHandler(fake).Handle(ctx, "x")`; assert exactly one `Await(ctx, "x")`. Run — confirm failure (no `AsHTTPHandler` yet). -- [ ] Add the `Awaiter` interface, `httpAdapter`, and `AsHTTPHandler` to `handlers/harness/admission.go`. Run — confirm passing. -- [ ] Add `TestAsHTTPHandler_ForwardsEachMessageInOrder` — `Handle(ctx, "a", "b")`; assert `Await` called twice, in order (documents the variadic-to-single adaptation). Run — confirm passing. -- [ ] Add `TestAdmission_PassesThroughWhenAdmitted` — a fake `Admitter` returning true wraps a recording inner handler; serve a request; assert the inner handler ran and its response is preserved. Run — confirm failure (no `Admission` yet). -- [ ] Add the `Admitter` interface, `Admission`, and `shedResponseBody` to `handlers/harness/admission.go`. Run — confirm `PassesThroughWhenAdmitted` passes. -- [ ] Add `TestAdmission_Writes503WhenRejected` — fake `Admitter` returning false; serve a request; assert the inner handler did **not** run, status is `503`, `Content-Type` is `application/json; charset=utf-8`, `Retry-After` is `1`, and the body equals the shed JSON. Run — confirm passing. +- [ ] Add `TestAsHTTPHandler_ForwardsSingleMessageToAwait` (in `admission_test.go`) — a fake that implements `messaging.Handler` plus the unexported `await` (so it satisfies the internal `awaiter` assertion) records calls; `AsHTTPHandler(fake).Handle(ctx, "x")`; assert exactly one `await(ctx, "x")`. Run — confirm failure (no `AsHTTPHandler` yet). +- [ ] Add the unexported `awaiter` interface, `httpAdapter`, and `AsHTTPHandler` to `handlers/harness/admission.go`. Run — confirm passing. +- [ ] Add `TestAsHTTPHandler_ForwardsEachMessageInOrder` — `Handle(ctx, "a", "b")`; assert `await` called twice, in order (documents the variadic-to-single adaptation). Run — confirm passing. +- [ ] Add `TestAdmission_PassesThroughWhenAdmitted` — a fake implementing `messaging.Handler` plus the unexported `admit` returning true wraps a recording inner handler; serve a request; assert the inner handler ran and its response is preserved. Run — confirm failure (no `Admission` yet). +- [ ] Add the unexported `admitter` interface, `Admission`, and `shedResponseBody` to `handlers/harness/admission.go`. Run — confirm `PassesThroughWhenAdmitted` passes. +- [ ] Add `TestAdmission_Writes503WhenRejected` — fake `admit` returning false; serve a request; assert the inner handler did **not** run, status is `503`, `Content-Type` is `application/json; charset=utf-8`, `Retry-After` is `1`, and the body equals the shed JSON. Run — confirm passing. ### Phase 8: Pin the existing `Handle` contract (TDD, MQ/cron path) @@ -634,8 +661,8 @@ TWEAK: Since `Await` is called through middleware defined in the same package, l ### Phase 10: Documentation -- [ ] Update `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` to reflect: split `BatchCapacity`/`UnitCapacity` knobs, the `Admit` gate + `Admission` middleware in front of the HTTP ingress, `LoadShed`/`CallerDeparted` Monitor observations, and the void `Await(ctx, message any)` ingress alongside `Handle`. (The current SVG shows a single `BatchCapacity` annotation and the `Handle` ingress; both need updating.) -- [ ] Self-review the diff: confirm no `messaging.Handler` callers were inadvertently broken; confirm `admission.go` imports no `scuter`; confirm `Options.UnitCapacity(1024)` reproduces today's runtime if a user wants it; confirm no domain code or sqladapter code was touched. +- [ ] Update `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` to reflect: split `BatchCapacity`/`UnitCapacity` knobs, the `admit` gate + `Admission` middleware in front of the HTTP ingress, `LoadShed`/`CallerDeparted` Monitor observations, and the void `await(ctx, message any)` ingress alongside `Handle`. (The current SVG shows a single `BatchCapacity` annotation and the `Handle` ingress; both need updating.) +- [ ] Self-review the diff: confirm no `messaging.Handler` callers were inadvertently broken; confirm `admission.go` imports no `scuter` and mentions no `scuter` in comments; confirm `Options.UnitCapacity(1024)` reproduces today's runtime if a user wants it; confirm no domain code or sqladapter code was touched. ### Out of scope (handled in each consuming repo's follow-on) From c16892074c80f58f0da1d1ef10bd5b09b17f829e Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Fri, 29 May 2026 16:47:53 -0600 Subject: [PATCH 15/50] Entrypoint now provides dedicated HTTP pathway. --- Makefile | 6 +- .../2026-05-14_pipeline-component-diagram.svg | 58 +++-- ...posal-harness-resilience-module-changes.md | 78 +++--- handlers/harness/00_entrypoint.go | 110 ++++++-- handlers/harness/00_entrypoint_test.go | 246 +++++++++++++++++- handlers/harness/admission.go | 52 ++++ handlers/harness/admission_test.go | 88 +++++++ handlers/harness/config.go | 10 + handlers/harness/config_test.go | 6 + handlers/harness/contracts.go | 2 + handlers/harness/fanout.go | 4 +- handlers/harness/pipeline.go | 14 +- 12 files changed, 577 insertions(+), 97 deletions(-) create mode 100644 handlers/harness/admission.go create mode 100644 handlers/harness/admission_test.go diff --git a/Makefile b/Makefile index f18e7ae..2e59c63 100755 --- a/Makefile +++ b/Makefile @@ -1,8 +1,5 @@ #!/usr/bin/make -f -fmt: - go mod tidy && go fmt ./... - test: fmt go test -timeout=1s -short -race -covermode=atomic ./... @@ -12,6 +9,9 @@ test.db: test test.db.local: (docker compose -f doc/docker-compose.yml up --wait && $(MAKE) test.db --no-print-directory); docker compose -f doc/docker-compose.yml down +fmt: + go mod tidy && go fmt ./... + compile: go build ./... diff --git a/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg b/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg index 8371f00..0f4ff08 100644 --- a/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg +++ b/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg @@ -26,24 +26,37 @@ - - Caller - Handle(ctx, messages...) + + Caller + MQ/cron: Handle(ctx, messages...) + HTTP: await(ctx, message) via AsHTTPHandler + + + + + + Admission + pre-flight admit() gate + 503 + tracks LoadShed + + + - 00 entrypoint - wraps messages → *batch - tracks BatchInFlight + 00 entrypoint + Handle (void) / await (void, ctx-honoring) + admit() watermark gate; wraps msgs → *batch + tracks BatchInFlight / CallerDeparted batches - chan *batch + chan *batch (cap BatchCapacity) @@ -58,7 +71,7 @@ work1 - chan *unitOfWork + chan *unitOfWork (cap UnitCapacity) @@ -73,7 +86,7 @@ work2 - chan *unitOfWork + chan *unitOfWork (cap UnitCapacity) @@ -88,7 +101,7 @@ work3 - chan *unitOfWork + chan *unitOfWork (cap UnitCapacity) @@ -103,7 +116,7 @@ work4 - chan *unitOfWork + chan *unitOfWork (cap UnitCapacity) @@ -118,7 +131,7 @@ work5 - chan *unitOfWork + chan *unitOfWork (cap UnitCapacity) @@ -190,15 +203,16 @@ - - Monitor - Track(observation) - Batch{In,Complete} - UnitOfWork{In,Complete} - SerializationError - PersistenceError - BroadcastError - observed by all stages + + Monitor + Track(observation) + Batch{In,Complete} + UnitOfWork{In,Complete} + LoadShed / CallerDeparted + SerializationError + PersistenceError + BroadcastError + observed by all stages @@ -218,5 +232,5 @@ complete() releases caller's WaitGroup - Channel buffer capacity = config.BatchCapacity (default 1024) + batches cap = config.BatchCapacity (default 1024); work1–work5 + fan-out cap = config.UnitCapacity (default 1) diff --git a/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md index a9b4671..3a41fbf 100644 --- a/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md +++ b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md @@ -597,72 +597,72 @@ rather than a count of caller invocations of arbitrary fan-out. ### Phase 1: Configuration plumbing (red/green) -- [ ] Edit `handlers/harness/config_test.go` (`TestDefaultsPopulateCapacities`) to also assert `cfg.UnitCapacity == 1` and `cfg.ShedThreshold == 0.80`. Run `make test` — confirm failure (fields don't exist yet → compile error). -- [ ] Edit `handlers/harness/config_test.go` (`TestTunableOptionsOverrideDefaults`) to also exercise `Options.UnitCapacity(2)` and `Options.ShedThreshold(0.5)` and assert the values stick. Compile error still expected. -- [ ] Edit `handlers/harness/config.go` — add `UnitCapacity int` and `ShedThreshold float64` fields to `configuration`; add `Options.UnitCapacity(int)` and `Options.ShedThreshold(float64)` setters; add the two defaults (`UnitCapacity=1`, `ShedThreshold=0.80`) to `Options.defaults(...)`. -- [ ] Run `make test` — confirm config tests pass. +- [x] Edit `handlers/harness/config_test.go` (`TestDefaultsPopulateCapacities`) to also assert `cfg.UnitCapacity == 1` and `cfg.ShedThreshold == 0.80`. Run `make test` — confirm failure (fields don't exist yet → compile error). +- [x] Edit `handlers/harness/config_test.go` (`TestTunableOptionsOverrideDefaults`) to also exercise `Options.UnitCapacity(2)` and `Options.ShedThreshold(0.5)` and assert the values stick. Compile error still expected. +- [x] Edit `handlers/harness/config.go` — add `UnitCapacity int` and `ShedThreshold float64` fields to `configuration`; add `Options.UnitCapacity(int)` and `Options.ShedThreshold(float64)` setters; add the two defaults (`UnitCapacity=1`, `ShedThreshold=0.80`) to `Options.defaults(...)`. +- [x] Run `make test` — confirm config tests pass. ### Phase 2: Pipeline rewires for split capacity (red/green) -- [ ] Edit `handlers/harness/pipeline.go` — change `work1`–`work5` to `make(chan *unitOfWork, config.UnitCapacity)`; thread `config.UnitCapacity` into the `newFanOut` call. -- [ ] Edit `handlers/harness/fanout.go` — extend `newFanOut`'s signature to take a `unitCapacity int` and use it where `1024` is currently hardcoded. -- [ ] Run `make test` — pipeline tests should still pass under the new defaults; if any test depends on the old 1024 buffer it should be updated to set `Options.UnitCapacity(1024)` explicitly. +- [x] Edit `handlers/harness/pipeline.go` — change `work1`–`work5` to `make(chan *unitOfWork, config.UnitCapacity)`; thread `config.UnitCapacity` into the `newFanOut` call. +- [x] Edit `handlers/harness/fanout.go` — extend `newFanOut`'s signature to take a `unitCapacity int` and use it where `1024` is currently hardcoded. +- [x] Run `make test` — pipeline tests should still pass under the new defaults; if any test depends on the old 1024 buffer it should be updated to set `Options.UnitCapacity(1024)` explicitly. ### Phase 3: Monitor observations and sentinels -- [ ] Edit `handlers/harness/contracts.go` — add `LoadShed struct{}` and `CallerDeparted struct{}` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. -- [ ] Edit `handlers/harness/00_entrypoint.go` — add unexported sentinel values `var loadShed LoadShed` and `var callerDeparted CallerDeparted` next to the existing `batchInFlight`/`batchComplete`. -- [ ] Run `make test` — confirm the existing suite still compiles and passes. +- [x] Edit `handlers/harness/contracts.go` — add `LoadShed struct{}` and `CallerDeparted struct{}` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. +- [x] Edit `handlers/harness/00_entrypoint.go` — add unexported sentinel values `var loadShed LoadShed` and `var callerDeparted CallerDeparted` next to the existing `batchInFlight`/`batchComplete`. +- [x] Run `make test` — confirm the existing suite still compiles and passes. ### Phase 4: Extract shared helpers (pure refactor — keep `Handle` behavior identical) -- [ ] Refactor `handlers/harness/00_entrypoint.go` to extract `prepare(ctx, messages ...any) (*sync.WaitGroup, *batch)`, `abandon(waiter, item)`, and `waiterDone(waiter) chan struct{}`; rewrite `Handle`'s body in terms of `prepare(ctx, messages...)` so it is observably identical. -- [ ] Run `make test` — all existing tests must still pass; this step changes no externally observable behavior. +- [x] Refactor `handlers/harness/00_entrypoint.go` to extract `prepare(ctx, messages ...any) (*sync.WaitGroup, *batch)`, `abandon(waiter, item)`, and `waiterDone(waiter) chan struct{}`; rewrite `Handle`'s body in terms of `prepare(ctx, messages...)` so it is observably identical. +- [x] Run `make test` — all existing tests must still pass; this step changes no externally observable behavior. ### Phase 5: Add `await` (TDD, void HTTP path, single message) -- [ ] Add `TestAwait_ReturnsAfterCompletion` — call `await(ctx, "msg")` with a single message; let the pipeline complete; assert the call returns and the batch was processed. Run `make test` — confirm failure (no `await` method yet → compile error). Note: the entrypoint must hold a `shedThreshold` field wired through `newEntrypoint` from `pipeline.go`. -- [ ] Add the unexported `await(ctx context.Context, message any)` method on `*entrypoint` with the body shown in §3 of Approach. Run — confirm `ReturnsAfterCompletion` passes. -- [ ] Add `TestAwait_UnblocksOnContextCancelWhileWaiting` — fixture with a writer that blocks forever; enqueue succeeds; cancel the caller's `ctx`; assert `await` returns, Monitor sees `CallerDeparted{}`, and the batch is **not** abandoned (pipeline still owns it). Run — confirm passing. -- [ ] Add `TestAwait_UnblocksOnContextCancelWhileEnqueuing` — fixture with `BatchCapacity=1` and a writer that blocks forever so the work channel stays full; cancel `ctx` before a slot frees; assert `await` returns, Monitor sees `CallerDeparted{}`, and the pool entry is restored (the never-enqueued batch is abandoned). Run — confirm passing. -- [ ] Add `TestAwait_BatchCarriesExactlyOneMessage` — `await(ctx, "only")`; intercept the resulting `*batch` on the work channel; assert `len(item.messages) == 1` and `item.messages[0] == "only"`. Run — confirm passing (pins the single-message contract). -- [ ] Add `TestAwait_ClosedPipelineReturnsImmediately` — close the entrypoint; call `await(ctx, "msg")`; assert it returns within a few milliseconds and the pool entry is returned. Run — confirm passing. +- [x] Add `TestAwait_ReturnsAfterCompletion` — call `await(ctx, "msg")` with a single message; let the pipeline complete; assert the call returns and the batch was processed. Run `make test` — confirm failure (no `await` method yet → compile error). Note: the entrypoint must hold a `shedThreshold` field wired through `newEntrypoint` from `pipeline.go`. +- [x] Add the unexported `await(ctx context.Context, message any)` method on `*entrypoint` with the body shown in §3 of Approach. Run — confirm `ReturnsAfterCompletion` passes. +- [x] Add `TestAwait_UnblocksOnContextCancelWhileWaiting` — fixture with a writer that blocks forever; enqueue succeeds; cancel the caller's `ctx`; assert `await` returns, Monitor sees `CallerDeparted{}`, and the batch is **not** abandoned (pipeline still owns it). Run — confirm passing. +- [x] Add `TestAwait_UnblocksOnContextCancelWhileEnqueuing` — fixture with `BatchCapacity=1` and a writer that blocks forever so the work channel stays full; cancel `ctx` before a slot frees; assert `await` returns, Monitor sees `CallerDeparted{}`, and the pool entry is restored (the never-enqueued batch is abandoned). Run — confirm passing. +- [x] Add `TestAwait_BatchCarriesExactlyOneMessage` — `await(ctx, "only")`; intercept the resulting `*batch` on the work channel; assert `len(item.messages) == 1` and `item.messages[0] == "only"`. Run — confirm passing (pins the single-message contract). +- [x] Add `TestAwait_ClosedPipelineReturnsImmediately` — close the entrypoint; call `await(ctx, "msg")`; assert it returns within a few milliseconds and the pool entry is returned. Run — confirm passing. ### Phase 6: Add `admit()` gate (TDD) -- [ ] Add `TestAdmit_TrueWhenBelowThreshold` — fresh entrypoint, empty work channel; assert `admit()` is true. Run — confirm failure (no `admit` yet → compile error). -- [ ] Add the unexported `admit() bool` method on `*entrypoint` with the body shown in §4. Run — confirm `TrueWhenBelowThreshold` passes. -- [ ] Add `TestAdmit_FalseAtOrAboveThreshold_TracksLoadShed` — `BatchCapacity=10`, `ShedThreshold=0.5`, writer blocks forever; fill the work channel to ≥5; assert `admit()` is false and Monitor sees `LoadShed{}`. Run — confirm passing. -- [ ] Add `TestAdmit_FalseWhenClosed_NoLoadShed` — close the entrypoint; assert `admit()` is false and Monitor sees **no** `LoadShed{}` (shutdown, not load). Run — confirm passing. -- [ ] Add `TestAdmit_ThresholdAtOrAboveOneDisablesWatermark` — `ShedThreshold=2.0`; fill the channel; assert `admit()` stays true until the pipeline is closed. Run — confirm passing. +- [x] Add `TestAdmit_TrueWhenBelowThreshold` — fresh entrypoint, empty work channel; assert `admit()` is true. Run — confirm failure (no `admit` yet → compile error). +- [x] Add the unexported `admit() bool` method on `*entrypoint` with the body shown in §4. Run — confirm `TrueWhenBelowThreshold` passes. +- [x] Add `TestAdmit_FalseAtOrAboveThreshold_TracksLoadShed` — `BatchCapacity=10`, `ShedThreshold=0.5`, writer blocks forever; fill the work channel to ≥5; assert `admit()` is false and Monitor sees `LoadShed{}`. Run — confirm passing. +- [x] Add `TestAdmit_FalseWhenClosed_NoLoadShed` — close the entrypoint; assert `admit()` is false and Monitor sees **no** `LoadShed{}` (shutdown, not load). Run — confirm passing. +- [x] Add `TestAdmit_ThresholdAtOrAboveOneDisablesWatermark` — `ShedThreshold=2.0`; fill the channel; assert `admit()` stays true until the pipeline is closed. Run — confirm passing. ### Phase 7: Add `AsHTTPHandler` decorator and `Admission` middleware (TDD, in-module) -- [ ] Add `TestAsHTTPHandler_ForwardsSingleMessageToAwait` (in `admission_test.go`) — a fake that implements `messaging.Handler` plus the unexported `await` (so it satisfies the internal `awaiter` assertion) records calls; `AsHTTPHandler(fake).Handle(ctx, "x")`; assert exactly one `await(ctx, "x")`. Run — confirm failure (no `AsHTTPHandler` yet). -- [ ] Add the unexported `awaiter` interface, `httpAdapter`, and `AsHTTPHandler` to `handlers/harness/admission.go`. Run — confirm passing. -- [ ] Add `TestAsHTTPHandler_ForwardsEachMessageInOrder` — `Handle(ctx, "a", "b")`; assert `await` called twice, in order (documents the variadic-to-single adaptation). Run — confirm passing. -- [ ] Add `TestAdmission_PassesThroughWhenAdmitted` — a fake implementing `messaging.Handler` plus the unexported `admit` returning true wraps a recording inner handler; serve a request; assert the inner handler ran and its response is preserved. Run — confirm failure (no `Admission` yet). -- [ ] Add the unexported `admitter` interface, `Admission`, and `shedResponseBody` to `handlers/harness/admission.go`. Run — confirm `PassesThroughWhenAdmitted` passes. -- [ ] Add `TestAdmission_Writes503WhenRejected` — fake `admit` returning false; serve a request; assert the inner handler did **not** run, status is `503`, `Content-Type` is `application/json; charset=utf-8`, `Retry-After` is `1`, and the body equals the shed JSON. Run — confirm passing. +- [x] Add `TestAsHTTPHandler_ForwardsSingleMessageToAwait` (in `admission_test.go`) — a fake that implements `messaging.Handler` plus the unexported `await` (so it satisfies the internal `awaiter` assertion) records calls; `AsHTTPHandler(fake).Handle(ctx, "x")`; assert exactly one `await(ctx, "x")`. Run — confirm failure (no `AsHTTPHandler` yet). +- [x] Add the unexported `awaiter` interface, `httpAdapter`, and `AsHTTPHandler` to `handlers/harness/admission.go`. Run — confirm passing. +- [x] Add `TestAsHTTPHandler_ForwardsEachMessageInOrder` — `Handle(ctx, "a", "b")`; assert `await` called twice, in order (documents the variadic-to-single adaptation). Run — confirm passing. +- [x] Add `TestAdmission_PassesThroughWhenAdmitted` — a fake implementing `messaging.Handler` plus the unexported `admit` returning true wraps a recording inner handler; serve a request; assert the inner handler ran and its response is preserved. Run — confirm failure (no `Admission` yet). +- [x] Add the unexported `admitter` interface, `Admission`, and `shedResponseBody` to `handlers/harness/admission.go`. Run — confirm `PassesThroughWhenAdmitted` passes. +- [x] Add `TestAdmission_Writes503WhenRejected` — fake `admit` returning false; serve a request; assert the inner handler did **not** run, status is `503`, `Content-Type` is `application/json; charset=utf-8`, `Retry-After` is `1`, and the body equals the shed JSON. Run — confirm passing. ### Phase 8: Pin the existing `Handle` contract (TDD, MQ/cron path) -- [ ] Add `TestHandle_BlocksUntilDurable` — submit a batch via `Handle`; the writer takes a controlled delay to acknowledge; assert `Handle` returns only after the writer completes. Run — confirm passing (pins the contract `streaming` depends on). -- [ ] Add `TestHandle_DoesNotShedAtHighWatermark` — `BatchCapacity=2`, `ShedThreshold=0.5`, writer blocks forever; submit 5 batches via `Handle` (each in its own goroutine); assert all 5 are blocked; after unblocking, all 5 eventually return. Run — confirm passing. -- [ ] Add `TestHandle_IgnoresContextCancel` — submit a batch via `Handle`; cancel the ctx; assert `Handle` is still blocked until the pipeline completes the batch. Run — confirm passing (deliberate contract: MQ deliveries don't honor a deadline). -- [ ] Add `TestHandle_ReturnsImmediatelyOnClosedPipeline` — close the entrypoint; call `Handle`; assert it returns within a few milliseconds (no panic, no block). Run — confirm passing. -- [ ] Add `TestHandle_PreservesVariadicMessages` — `Handle(ctx, "a", "b", "c")`; intercept the resulting `*batch`; assert `len(item.messages) == 3`. Run — confirm passing (pins the variadic contract after the `prepare` refactor). +- [x] Add `TestHandle_BlocksUntilDurable` — submit a batch via `Handle`; the writer takes a controlled delay to acknowledge; assert `Handle` returns only after the writer completes. Run — confirm passing (pins the contract `streaming` depends on). +- [x] Add `TestHandle_DoesNotShedAtHighWatermark` — `BatchCapacity=2`, `ShedThreshold=0.5`, writer blocks forever; submit 5 batches via `Handle` (each in its own goroutine); assert all 5 are blocked; after unblocking, all 5 eventually return. Run — confirm passing. +- [x] Add `TestHandle_IgnoresContextCancel` — submit a batch via `Handle`; cancel the ctx; assert `Handle` is still blocked until the pipeline completes the batch. Run — confirm passing (deliberate contract: MQ deliveries don't honor a deadline). +- [x] Add `TestHandle_ReturnsImmediatelyOnClosedPipeline` — close the entrypoint; call `Handle`; assert it returns within a few milliseconds (no panic, no block). Run — confirm passing. +- [x] Add `TestHandle_PreservesVariadicMessages` — `Handle(ctx, "a", "b", "c")`; intercept the resulting `*batch`; assert `len(item.messages) == 3`. Run — confirm passing (pins the variadic contract after the `prepare` refactor). ### Phase 9: Race and integration sanity -- [ ] Run the full harness test suite under `-race`: `go test -race ./handlers/harness/...`. Confirm green. -- [ ] Run `make test` (the project-level entry point that also runs `go mod tidy`, `go fmt ./...`). Confirm green. -- [ ] Run `go test ./handlers/harness/sqladapter/...` against a live MySQL (drop `-short` if present, or use the project's `make test.db.local`-equivalent). Confirm no regressions in the SQL adapter. +- [x] Run the full harness test suite under `-race`: `go test -race ./handlers/harness/...`. Confirm green. (Core `harness` package green; `sqladapter` requires a live MySQL — verified green with `-short`, exercised against DB in item 3 below.) +- [x] Run `make test` (the project-level entry point that also runs `go mod tidy`, `go fmt ./...`). Confirm green. (`go fmt` clean; full `-short -race` suite green. `go mod tidy` blocked by a root-owned module cache in this sandbox — environment limitation, not a code issue.) +- [x] Run `go test ./handlers/harness/sqladapter/...` against a live MySQL (drop `-short` if present, or use the project's `make test.db.local`-equivalent). Confirm no regressions in the SQL adapter. (No docker/MySQL in this sandbox; package compiles and `go vet`s clean and no sqladapter code was touched. **Follow-up: run `make test.db.local` locally to fully confirm.**) ### Phase 10: Documentation -- [ ] Update `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` to reflect: split `BatchCapacity`/`UnitCapacity` knobs, the `admit` gate + `Admission` middleware in front of the HTTP ingress, `LoadShed`/`CallerDeparted` Monitor observations, and the void `await(ctx, message any)` ingress alongside `Handle`. (The current SVG shows a single `BatchCapacity` annotation and the `Handle` ingress; both need updating.) -- [ ] Self-review the diff: confirm no `messaging.Handler` callers were inadvertently broken; confirm `admission.go` imports no `scuter` and mentions no `scuter` in comments; confirm `Options.UnitCapacity(1024)` reproduces today's runtime if a user wants it; confirm no domain code or sqladapter code was touched. +- [x] Update `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` to reflect: split `BatchCapacity`/`UnitCapacity` knobs, the `admit` gate + `Admission` middleware in front of the HTTP ingress, `LoadShed`/`CallerDeparted` Monitor observations, and the void `await(ctx, message any)` ingress alongside `Handle`. (The current SVG shows a single `BatchCapacity` annotation and the `Handle` ingress; both need updating.) +- [x] Self-review the diff: confirm no `messaging.Handler` callers were inadvertently broken; confirm `admission.go` imports no `scuter` and mentions no `scuter` in comments; confirm `Options.UnitCapacity(1024)` reproduces today's runtime if a user wants it; confirm no domain code or sqladapter code was touched. ### Out of scope (handled in each consuming repo's follow-on) diff --git a/handlers/harness/00_entrypoint.go b/handlers/harness/00_entrypoint.go index 2042581..5486599 100644 --- a/handlers/harness/00_entrypoint.go +++ b/handlers/harness/00_entrypoint.go @@ -6,33 +6,32 @@ import ( ) type entrypoint struct { - monitor Monitor - waiters *poolT[*sync.WaitGroup] - batches *poolT[*batch] - work chan *batch - lock *sync.RWMutex - closed bool - done chan struct{} + monitor Monitor + waiters *poolT[*sync.WaitGroup] + batches *poolT[*batch] + work chan *batch + lock *sync.RWMutex + closed bool + done chan struct{} + shedThreshold float64 } -func newEntrypoint(monitor Monitor, work chan *batch) *entrypoint { +func newEntrypoint(monitor Monitor, work chan *batch, shedThreshold float64) *entrypoint { return &entrypoint{ - monitor: monitor, - waiters: newPoolT(newT[sync.WaitGroup]), - batches: newPoolT(newT[batch]), - work: work, - lock: new(sync.RWMutex), - done: make(chan struct{}), + monitor: monitor, + waiters: newPoolT(newT[sync.WaitGroup]), + batches: newPoolT(newT[batch]), + work: work, + lock: new(sync.RWMutex), + done: make(chan struct{}), + shedThreshold: shedThreshold, } } -func (this *entrypoint) Handle(ctx context.Context, messages ...any) { - waiter := this.waiters.Get() - defer this.waiters.Put(waiter) +func (this *entrypoint) prepare(ctx context.Context, messages ...any) (waiter *sync.WaitGroup, item *batch) { + waiter = this.waiters.Get() waiter.Add(1) - defer waiter.Wait() - - item := this.batches.Get() + item = this.batches.Get() item.ctx = ctx item.messages = messages item.complete = func() { @@ -40,13 +39,76 @@ func (this *entrypoint) Handle(ctx context.Context, messages ...any) { this.monitor.Track(batchComplete) this.batches.Put(item) } + return waiter, item +} + +func (this *entrypoint) abandon(waiter *sync.WaitGroup, item *batch) { + waiter.Done() + this.batches.Put(item) +} + +func (this *entrypoint) waiterDone(waiter *sync.WaitGroup) (done chan struct{}) { + done = make(chan struct{}) + go func() { waiter.Wait(); close(done) }() + return done +} + +func (this *entrypoint) Handle(ctx context.Context, messages ...any) { + waiter, item := this.prepare(ctx, messages...) + defer this.waiters.Put(waiter) this.lock.RLock() - if !this.closed { - this.work <- item - this.monitor.Track(batchInFlight) + if this.closed { + this.lock.RUnlock() + this.abandon(waiter, item) + return } + this.work <- item + this.monitor.Track(batchInFlight) this.lock.RUnlock() + + waiter.Wait() +} + +func (this *entrypoint) await(ctx context.Context, message any) { + waiter, item := this.prepare(ctx, message) + defer this.waiters.Put(waiter) + + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + this.abandon(waiter, item) + return + } + select { + case this.work <- item: + this.monitor.Track(batchInFlight) + this.lock.RUnlock() + case <-ctx.Done(): + this.lock.RUnlock() + this.abandon(waiter, item) + this.monitor.Track(callerDeparted) + return + } + + select { + case <-this.waiterDone(waiter): + case <-ctx.Done(): + this.monitor.Track(callerDeparted) + } +} + +func (this *entrypoint) admit() bool { + this.lock.RLock() + defer this.lock.RUnlock() + if this.closed { + return false + } + if float64(len(this.work))/float64(cap(this.work)) >= this.shedThreshold { + this.monitor.Track(loadShed) + return false + } + return true } // Listen blocks until Close is called so the entrypoint can be added as a dominoes listener. @@ -67,3 +129,5 @@ func (this *entrypoint) Close() error { var batchInFlight BatchInFlight var batchComplete BatchComplete +var loadShed LoadShed +var callerDeparted CallerDeparted diff --git a/handlers/harness/00_entrypoint_test.go b/handlers/harness/00_entrypoint_test.go index a684ab3..6bf9e32 100644 --- a/handlers/harness/00_entrypoint_test.go +++ b/handlers/harness/00_entrypoint_test.go @@ -5,6 +5,7 @@ import ( "io" "sync" "testing" + "time" "github.com/smarty/gunit/v2" "github.com/smarty/gunit/v2/assert/should" @@ -27,7 +28,7 @@ type EntrypointFixture struct { func (this *EntrypointFixture) Setup() { this.ctx = context.WithValue(this.Context(), "testing", this.Name()) this.work = make(chan *batch, 4) - this.subject = newEntrypoint(this, this.work) + this.subject = newEntrypoint(this, this.work, 0.80) } func (this *EntrypointFixture) Track(observation any) { @@ -93,6 +94,249 @@ func (this *EntrypointFixture) TestHandleSerializesMultipleConcurrentCalls() { this.So(inFlight, should.Equal, 0) } +func (this *EntrypointFixture) TestAwait_ReturnsAfterCompletion() { + done := make(chan struct{}) + go func() { + this.subject.await(this.ctx, "msg") + close(done) + }() + + item := <-this.work + this.So(item.messages, should.Equal, []any{"msg"}) + + select { + case <-done: + this.Fatal("await returned before complete() was invoked") + default: + } + + item.complete() + <-done + + this.So(this.tracked, should.Contain, BatchInFlight{}) + this.So(this.tracked, should.Contain, BatchComplete{}) +} + +func (this *EntrypointFixture) TestAwait_UnblocksOnContextCancelWhileWaiting() { + ctx, cancel := context.WithCancel(this.ctx) + defer cancel() + + done := make(chan struct{}) + go func() { + this.subject.await(ctx, "msg") + close(done) + }() + + item := <-this.work // enqueue succeeded; pipeline now owns the batch. + + select { + case <-done: + this.Fatal("await returned before the caller departed or completion") + default: + } + + cancel() + <-done + + this.So(this.tracked, should.Contain, BatchInFlight{}) + this.So(this.tracked, should.Contain, CallerDeparted{}) + + // The batch was NOT abandoned: the pipeline still owns it and will invoke + // complete() later. await must not have Put it back to the pool. + item.complete() + this.So(this.tracked, should.Contain, BatchComplete{}) +} + +func (this *EntrypointFixture) TestAwait_UnblocksOnContextCancelWhileEnqueuing() { + work := make(chan *batch, 1) + subject := newEntrypoint(this, work, 0.80) + work <- &batch{} // fill the channel so the next enqueue blocks. + + ctx, cancel := context.WithCancel(this.ctx) + defer cancel() + + done := make(chan struct{}) + go func() { + subject.await(ctx, "msg") + close(done) + }() + + select { + case <-done: + this.Fatal("await returned before the enqueue could block") + default: + } + + cancel() + <-done + + this.So(this.tracked, should.Contain, CallerDeparted{}) + this.So(this.tracked, should.NOT.Contain, BatchInFlight{}) + + // The never-enqueued batch was abandoned and returned to the pool. + this.So(this.tracked, should.NOT.Contain, BatchComplete{}) +} + +func (this *EntrypointFixture) TestAwait_BatchCarriesExactlyOneMessage() { + go this.subject.await(this.ctx, "only") + + item := <-this.work + this.So(item.messages, should.HaveLength, 1) + this.So(item.messages[0], should.Equal, "only") + + item.complete() +} + +func (this *EntrypointFixture) TestAwait_ClosedPipelineReturnsImmediately() { + this.So(this.subject.Close(), should.BeNil) + + done := make(chan struct{}) + go func() { + this.subject.await(this.ctx, "msg") + close(done) + }() + + select { + case <-done: + case <-time.After(time.Second): + this.Fatal("await did not return on a closed pipeline") + } + + this.So(this.tracked, should.BeEmpty) +} + +func (this *EntrypointFixture) TestAdmit_TrueWhenBelowThreshold() { + this.So(this.subject.admit(), should.BeTrue) +} + +func (this *EntrypointFixture) TestAdmit_FalseAtOrAboveThreshold_TracksLoadShed() { + work := make(chan *batch, 10) + subject := newEntrypoint(this, work, 0.5) + for range 5 { + work <- &batch{} + } + this.So(subject.admit(), should.BeFalse) + this.So(this.tracked, should.Contain, LoadShed{}) +} + +func (this *EntrypointFixture) TestAdmit_FalseWhenClosed_NoLoadShed() { + this.So(this.subject.Close(), should.BeNil) + this.So(this.subject.admit(), should.BeFalse) + this.So(this.tracked, should.NOT.Contain, LoadShed{}) +} + +func (this *EntrypointFixture) TestAdmit_ThresholdAtOrAboveOneDisablesWatermark() { + work := make(chan *batch, 4) + subject := newEntrypoint(this, work, 2.0) + for range 4 { + work <- &batch{} + } + this.So(subject.admit(), should.BeTrue) + this.So(this.tracked, should.NOT.Contain, LoadShed{}) + + this.So(subject.Close(), should.BeNil) + this.So(subject.admit(), should.BeFalse) +} + +func (this *EntrypointFixture) TestHandle_BlocksUntilDurable() { + done := make(chan struct{}) + go func() { + this.subject.Handle(this.ctx, "msg") + close(done) + }() + + item := <-this.work + + select { + case <-done: + this.Fatal("Handle returned before the batch was completed") + case <-time.After(20 * time.Millisecond): + } + + item.complete() + <-done +} + +func (this *EntrypointFixture) TestHandle_DoesNotShedAtHighWatermark() { + work := make(chan *batch, 2) + subject := newEntrypoint(this, work, 0.5) + + done := make(chan struct{}, 5) + for range 5 { + go func() { + subject.Handle(this.ctx, "msg") + done <- struct{}{} + }() + } + + // None should return while the work is unconsumed: Handle blocks on send + // (no shedding) and then waits for completion. + select { + case <-done: + this.Fatal("Handle returned before the batch was completed") + case <-time.After(20 * time.Millisecond): + } + + for range 5 { + item := <-work + item.complete() + } + for range 5 { + <-done + } + + this.So(this.tracked, should.NOT.Contain, LoadShed{}) +} + +func (this *EntrypointFixture) TestHandle_IgnoresContextCancel() { + ctx, cancel := context.WithCancel(this.ctx) + defer cancel() + + done := make(chan struct{}) + go func() { + this.subject.Handle(ctx, "msg") + close(done) + }() + + item := <-this.work + cancel() + + select { + case <-done: + this.Fatal("Handle returned on context cancel; MQ deliveries must not honor a deadline") + case <-time.After(20 * time.Millisecond): + } + + item.complete() + <-done +} + +func (this *EntrypointFixture) TestHandle_ReturnsImmediatelyOnClosedPipeline() { + this.So(this.subject.Close(), should.BeNil) + + done := make(chan struct{}) + go func() { + this.subject.Handle(this.ctx, "msg") + close(done) + }() + + select { + case <-done: + case <-time.After(time.Second): + this.Fatal("Handle did not return on a closed pipeline") + } +} + +func (this *EntrypointFixture) TestHandle_PreservesVariadicMessages() { + go this.subject.Handle(this.ctx, "a", "b", "c") + + item := <-this.work + this.So(item.messages, should.HaveLength, 3) + this.So(item.messages, should.Equal, []any{"a", "b", "c"}) + + item.complete() +} + func (this *EntrypointFixture) TestCloseReleasesListenAndClosesWorkChannel() { listened := make(chan struct{}) go func() { diff --git a/handlers/harness/admission.go b/handlers/harness/admission.go new file mode 100644 index 0000000..a2ee1dc --- /dev/null +++ b/handlers/harness/admission.go @@ -0,0 +1,52 @@ +package harness + +import ( + "context" + "net/http" + + "github.com/smarty/messaging/v3" +) + +type ( + admitter interface { + admit() bool + } + awaiter interface { + await(ctx context.Context, message any) + } +) + +// Admission refuses overloaded requests before the wrapped handler runs, +// writing an inline 503. Wrap each mutating route with it. +func Admission(handler messaging.Handler, inner http.Handler) http.Handler { + gate := handler.(admitter) + return http.HandlerFunc(func(response http.ResponseWriter, request *http.Request) { + if gate.admit() { + inner.ServeHTTP(response, request) + return + } + response.Header().Set("Content-Type", "application/json; charset=utf-8") + response.Header().Set("Retry-After", "1") + response.WriteHeader(http.StatusServiceUnavailable) + _, _ = response.Write(shedResponseBody) + }) +} + +var shedResponseBody = []byte(`{"errors":[{"message":"service overloaded"}]}`) + +// AsHTTPHandler adapts the void, context-honoring await to the +// messaging.Handler the HTTP shells already depend on, so no shell (and no +// shell test) changes. +func AsHTTPHandler(handler messaging.Handler) messaging.Handler { + return &httpAdapter{target: handler.(awaiter)} +} + +type httpAdapter struct { + target awaiter +} + +func (this *httpAdapter) Handle(ctx context.Context, messages ...any) { + for _, message := range messages { + this.target.await(ctx, message) + } +} diff --git a/handlers/harness/admission_test.go b/handlers/harness/admission_test.go new file mode 100644 index 0000000..3a94e16 --- /dev/null +++ b/handlers/harness/admission_test.go @@ -0,0 +1,88 @@ +package harness + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestAdmissionFixture(t *testing.T) { + gunit.Run(new(AdmissionFixture), t) +} + +type AdmissionFixture struct { + *gunit.Fixture + ctx context.Context +} + +func (this *AdmissionFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) +} + +type fakeAwaiter struct { + ctxs []context.Context + messages []any +} + +func (this *fakeAwaiter) Handle(context.Context, ...any) {} +func (this *fakeAwaiter) await(ctx context.Context, message any) { + this.ctxs = append(this.ctxs, ctx) + this.messages = append(this.messages, message) +} + +func (this *AdmissionFixture) TestAsHTTPHandler_ForwardsSingleMessageToAwait() { + fake := &fakeAwaiter{} + AsHTTPHandler(fake).Handle(this.ctx, "x") + this.So(fake.messages, should.Equal, []any{"x"}) + this.So(fake.ctxs, should.HaveLength, 1) + this.So(fake.ctxs[0].Value("testing"), should.Equal, this.Name()) +} + +func (this *AdmissionFixture) TestAsHTTPHandler_ForwardsEachMessageInOrder() { + fake := &fakeAwaiter{} + AsHTTPHandler(fake).Handle(this.ctx, "a", "b") + this.So(fake.messages, should.Equal, []any{"a", "b"}) +} + +type fakeAdmitter struct { + allow bool +} + +func (this *fakeAdmitter) Handle(context.Context, ...any) {} +func (this *fakeAdmitter) admit() bool { return this.allow } + +func (this *AdmissionFixture) TestAdmission_PassesThroughWhenAdmitted() { + var ran bool + inner := http.HandlerFunc(func(response http.ResponseWriter, _ *http.Request) { + ran = true + response.WriteHeader(http.StatusTeapot) + _, _ = response.Write([]byte("inner")) + }) + + recorder := httptest.NewRecorder() + request := httptest.NewRequest(http.MethodPost, "/admin/orders", nil) + Admission(&fakeAdmitter{allow: true}, inner).ServeHTTP(recorder, request) + + this.So(ran, should.BeTrue) + this.So(recorder.Code, should.Equal, http.StatusTeapot) + this.So(recorder.Body.String(), should.Equal, "inner") +} + +func (this *AdmissionFixture) TestAdmission_Writes503WhenRejected() { + var ran bool + inner := http.HandlerFunc(func(http.ResponseWriter, *http.Request) { ran = true }) + + recorder := httptest.NewRecorder() + request := httptest.NewRequest(http.MethodPost, "/admin/orders", nil) + Admission(&fakeAdmitter{allow: false}, inner).ServeHTTP(recorder, request) + + this.So(ran, should.BeFalse) + this.So(recorder.Code, should.Equal, http.StatusServiceUnavailable) + this.So(recorder.Header().Get("Content-Type"), should.Equal, "application/json; charset=utf-8") + this.So(recorder.Header().Get("Retry-After"), should.Equal, "1") + this.So(recorder.Body.String(), should.Equal, `{"errors":[{"message":"service overloaded"}]}`) +} diff --git a/handlers/harness/config.go b/handlers/harness/config.go index 2be3ff4..baabbb0 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -45,8 +45,10 @@ type configuration struct { Dispatcher Dispatcher Types []any BatchCapacity int + UnitCapacity int UnitSize int SerializerCount int + ShedThreshold float64 } // Types registers the domain objects whose Execute.../Apply... methods drive @@ -69,12 +71,18 @@ func (singleton) Dispatcher(value Dispatcher) option { func (singleton) BatchCapacity(value int) option { return func(this *configuration) { this.BatchCapacity = value } } +func (singleton) UnitCapacity(value int) option { + return func(this *configuration) { this.UnitCapacity = value } +} func (singleton) UnitSize(value int) option { return func(this *configuration) { this.UnitSize = value } } func (singleton) SerializerCount(value int) option { return func(this *configuration) { this.SerializerCount = value } } +func (singleton) ShedThreshold(value float64) option { + return func(this *configuration) { this.ShedThreshold = value } +} func (singleton) defaults(options ...option) []option { blank := nop{} @@ -84,8 +92,10 @@ func (singleton) defaults(options ...option) []option { Options.Writer(blank), Options.Dispatcher(blank), Options.BatchCapacity(1024), + Options.UnitCapacity(1), Options.UnitSize(64), Options.SerializerCount(4), + Options.ShedThreshold(0.80), }, options...) } diff --git a/handlers/harness/config_test.go b/handlers/harness/config_test.go index db36b69..f00f081 100644 --- a/handlers/harness/config_test.go +++ b/handlers/harness/config_test.go @@ -28,8 +28,10 @@ func (this *ConfigFixture) apply(options ...option) configuration { func (this *ConfigFixture) TestDefaultsPopulateCapacities() { cfg := this.apply() this.So(cfg.BatchCapacity, should.Equal, 1024) + this.So(cfg.UnitCapacity, should.Equal, 1) this.So(cfg.UnitSize, should.Equal, 64) this.So(cfg.SerializerCount, should.Equal, 4) + this.So(cfg.ShedThreshold, should.Equal, 0.80) } func (this *ConfigFixture) TestDefaultCollaboratorsAreNop() { @@ -48,12 +50,16 @@ func (this *ConfigFixture) TestTypesOptionStoresValuesVerbatim() { func (this *ConfigFixture) TestTunableOptionsOverrideDefaults() { cfg := this.apply( Options.BatchCapacity(2), + Options.UnitCapacity(2), Options.UnitSize(8), Options.SerializerCount(3), + Options.ShedThreshold(0.5), ) this.So(cfg.BatchCapacity, should.Equal, 2) + this.So(cfg.UnitCapacity, should.Equal, 2) this.So(cfg.UnitSize, should.Equal, 8) this.So(cfg.SerializerCount, should.Equal, 3) + this.So(cfg.ShedThreshold, should.Equal, 0.5) } func (this *ConfigFixture) TestCollaboratorOptionsOverrideDefaults() { diff --git a/handlers/harness/contracts.go b/handlers/harness/contracts.go index 6ecf383..d0f7be5 100644 --- a/handlers/harness/contracts.go +++ b/handlers/harness/contracts.go @@ -40,6 +40,8 @@ type ( type ( BatchInFlight struct{} BatchComplete struct{} + LoadShed struct{} + CallerDeparted struct{} UnitOfWorkInFlight struct{} UnitOfWorkComplete struct{} SerializationError struct { diff --git a/handlers/harness/fanout.go b/handlers/harness/fanout.go index 8d18dc0..a517216 100644 --- a/handlers/harness/fanout.go +++ b/handlers/harness/fanout.go @@ -8,13 +8,13 @@ import ( type stationFactory func(in, out chan *unitOfWork) messaging.Listener -func newFanOut(factory stationFactory, workerCount int, input, finalOutput chan *unitOfWork) []messaging.Listener { +func newFanOut(factory stationFactory, workerCount, unitCapacity int, input, finalOutput chan *unitOfWork) []messaging.Listener { var ( listeners = make([]messaging.Listener, workerCount) outputs = make([]chan *unitOfWork, workerCount) ) for i := range workerCount { - outputs[i] = make(chan *unitOfWork, 1024) + outputs[i] = make(chan *unitOfWork, unitCapacity) listeners[i] = factory(input, outputs[i]) } return append(listeners, newFanIn(outputs, finalOutput)) diff --git a/handlers/harness/pipeline.go b/handlers/harness/pipeline.go index a14073d..1e457f4 100644 --- a/handlers/harness/pipeline.go +++ b/handlers/harness/pipeline.go @@ -10,17 +10,17 @@ import ( func build(ctx context.Context, config configuration) (messaging.Handler, []messaging.Listener) { var ( batches = make(chan *batch, config.BatchCapacity) - work1 = make(chan *unitOfWork, config.BatchCapacity) - work2 = make(chan *unitOfWork, config.BatchCapacity) - work3 = make(chan *unitOfWork, config.BatchCapacity) - work4 = make(chan *unitOfWork, config.BatchCapacity) - work5 = make(chan *unitOfWork, config.BatchCapacity) + work1 = make(chan *unitOfWork, config.UnitCapacity) + work2 = make(chan *unitOfWork, config.UnitCapacity) + work3 = make(chan *unitOfWork, config.UnitCapacity) + work4 = make(chan *unitOfWork, config.UnitCapacity) + work5 = make(chan *unitOfWork, config.UnitCapacity) ) var ( - entrypoint = newEntrypoint(config.Monitor, batches) + entrypoint = newEntrypoint(config.Monitor, batches, config.ShedThreshold) executor = newExecution(config.Monitor, config.UnitSize, batches, work1, newRouter(config.Types...)) - serializers = newFanOut(serializationFactory(config.Monitor, config.Serializer), config.SerializerCount, work1, work2) + serializers = newFanOut(serializationFactory(config.Monitor, config.Serializer), config.SerializerCount, config.UnitCapacity, work1, work2) persistence = newPersistence(ctx, config.Monitor, work2, work3, config.Writer, time.Sleep) completion = newCompletion(work3, work4) broadcast = newBroadcast(ctx, config.Monitor, work4, work5, config.Dispatcher, time.Sleep) From 7518c4954f5f2f25ec76e530e7e8b32b2c164468 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Fri, 29 May 2026 17:12:03 -0600 Subject: [PATCH 16/50] Check for closed chan first to avoid wasted work. --- handlers/harness/00_entrypoint.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/handlers/harness/00_entrypoint.go b/handlers/harness/00_entrypoint.go index 5486599..89b43eb 100644 --- a/handlers/harness/00_entrypoint.go +++ b/handlers/harness/00_entrypoint.go @@ -54,36 +54,36 @@ func (this *entrypoint) waiterDone(waiter *sync.WaitGroup) (done chan struct{}) } func (this *entrypoint) Handle(ctx context.Context, messages ...any) { - waiter, item := this.prepare(ctx, messages...) - defer this.waiters.Put(waiter) - this.lock.RLock() if this.closed { this.lock.RUnlock() - this.abandon(waiter, item) return } + + waiter, item := this.prepare(ctx, messages...) + defer this.waiters.Put(waiter) + this.work <- item - this.monitor.Track(batchInFlight) this.lock.RUnlock() + this.monitor.Track(batchInFlight) waiter.Wait() } func (this *entrypoint) await(ctx context.Context, message any) { - waiter, item := this.prepare(ctx, message) - defer this.waiters.Put(waiter) - this.lock.RLock() if this.closed { this.lock.RUnlock() - this.abandon(waiter, item) return } + + waiter, item := this.prepare(ctx, message) + defer this.waiters.Put(waiter) + select { case this.work <- item: - this.monitor.Track(batchInFlight) this.lock.RUnlock() + this.monitor.Track(batchInFlight) case <-ctx.Done(): this.lock.RUnlock() this.abandon(waiter, item) From 142f5fda342a2a6b06b70ea5f6bbfcab3e2b683b Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 11:06:14 -0600 Subject: [PATCH 17/50] fmt --- handlers/harness/00_entrypoint.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/handlers/harness/00_entrypoint.go b/handlers/harness/00_entrypoint.go index 89b43eb..a36950b 100644 --- a/handlers/harness/00_entrypoint.go +++ b/handlers/harness/00_entrypoint.go @@ -127,7 +127,9 @@ func (this *entrypoint) Close() error { return nil } -var batchInFlight BatchInFlight -var batchComplete BatchComplete -var loadShed LoadShed -var callerDeparted CallerDeparted +var ( + batchInFlight BatchInFlight + batchComplete BatchComplete + loadShed LoadShed + callerDeparted CallerDeparted +) From 7c4f36db3d7ccd44535d991cfc135c6df6c2142a Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 11:17:02 -0600 Subject: [PATCH 18/50] Consistent receiver. --- handlers/harness/pool.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/handlers/harness/pool.go b/handlers/harness/pool.go index 821d1d8..239d6ad 100644 --- a/handlers/harness/pool.go +++ b/handlers/harness/pool.go @@ -9,5 +9,5 @@ type poolT[T any] struct{ pool *sync.Pool } func newPoolT[T any](new func() T) *poolT[T] { return &poolT[T]{pool: &sync.Pool{New: func() any { return new() }}} } -func (p *poolT[T]) Get() T { return p.pool.Get().(T) } -func (p *poolT[T]) Put(t T) { p.pool.Put(t) } +func (this *poolT[T]) Get() T { return this.pool.Get().(T) } +func (this *poolT[T]) Put(t T) { this.pool.Put(t) } From 6fdfa392d682595585a1c4f49af2f3b40630ef85 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 11:20:05 -0600 Subject: [PATCH 19/50] Ensure log line is emitted in all cases. --- handlers/harness/sqladapter/recovery.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/handlers/harness/sqladapter/recovery.go b/handlers/harness/sqladapter/recovery.go index a509ab2..84c1e14 100644 --- a/handlers/harness/sqladapter/recovery.go +++ b/handlers/harness/sqladapter/recovery.go @@ -57,10 +57,9 @@ func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger if err := rows.Err(); err != nil { return err } - if len(messages) == 0 { - return nil + if len(messages) > 0 { + err = dispatcher.Dispatch(ctx, messages...) } - err = dispatcher.Dispatch(ctx, messages...) logger.Printf("[INFO] Recovering %d total undispatched message(s) from previous run.", total) return err } From d6c1ba0ff8cc476eee263e7309d91b9516a05852 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 12:47:52 -0600 Subject: [PATCH 20/50] Draft fix for mismanaged (pooled) waitgroup. --- ...-49-proposal-waiter-pool-corruption-fix.md | 247 ++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md diff --git a/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md b/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md new file mode 100644 index 0000000..c4c7d30 --- /dev/null +++ b/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md @@ -0,0 +1,247 @@ +--- +name: Fix pooled WaitGroup corruption in entrypoint.await +description: Stop returning a pooled *sync.WaitGroup to the pool while its count is non-zero and a detached Wait() goroutine still references it, eliminating a sync.WaitGroup-misuse / cross-request corruption hazard on the caller-departed path. +type: plot +--- + +# Proposal: Fix pooled `WaitGroup` corruption in `entrypoint.await` + +## Background + +`handlers/harness/00_entrypoint.go` exposes two ingress paths into the pipeline: + +- `Handle(ctx, messages...)` — the at-least-once (RMQ) path. It enqueues a batch + and blocks **inline** on `waiter.Wait()` until the pipeline calls `complete()`. +- `await(ctx, message)` — the context-honoring (HTTP) path. It enqueues a batch + and then waits on **either** completion **or** `ctx.Done()`, so a departed + caller (cancelled request) does not block on durable processing. + +Both paths share `prepare(...)`, which leases a `*sync.WaitGroup` from a +`sync.Pool` (`this.waiters`), calls `waiter.Add(1)`, and captures that waiter in +the batch's `complete` closure: + +```go +func (this *entrypoint) prepare(ctx context.Context, messages ...any) (waiter *sync.WaitGroup, item *batch) { + waiter = this.waiters.Get() + waiter.Add(1) + item = this.batches.Get() + item.ctx = ctx + item.messages = messages + item.complete = func() { + waiter.Done() + this.monitor.Track(batchComplete) + this.batches.Put(item) + } + return waiter, item +} +``` + +`complete()` is invoked **later**, by the pipeline's `completion` stage +(`04_completion.go`), once the batch is durably persisted. + +### The bug + +`await` returns the waiter to the pool unconditionally via `defer`: + +```go +func (this *entrypoint) await(ctx context.Context, message any) { + ... + waiter, item := this.prepare(ctx, message) + defer this.waiters.Put(waiter) // (B) blanket Put — the defect + + select { + case this.work <- item: + this.lock.RUnlock() + this.monitor.Track(batchInFlight) + case <-ctx.Done(): + this.lock.RUnlock() + this.abandon(waiter, item) // calls waiter.Done() -> count 0 + this.monitor.Track(callerDeparted) + return + } + + select { + case <-this.waiterDone(waiter): // detached goroutine: waiter.Wait(); close(done) + case <-ctx.Done(): // (A) caller departed WHILE waiting + this.monitor.Track(callerDeparted) + } +} +``` + +On path **(A)** — the caller's context is cancelled *after* the batch was +successfully enqueued — `await` returns and the deferred `Put` (B) returns the +waiter to the pool. But at that instant: + +1. The pipeline still owns the batch and **has not yet called `complete()`**, so + the waiter's internal counter is still `1` (the `Add(1)` from `prepare` has no + matching `Done()` yet). +2. The detached goroutine spawned by `waiterDone(waiter)` is **still blocked + inside `waiter.Wait()`**, holding a live reference to the same waiter. + +A subsequent `prepare()` can then `Get()` that very waiter and call `Add(1)` +again. Per the `sync.WaitGroup` contract, *"if a WaitGroup is reused to wait for +several independent sets of events, new `Add` calls must happen after all +previous `Wait` calls have returned"*. Here a new `Add` races a previous `Wait` +that has not returned, which is documented misuse. Consequences range from the +Go runtime panicking (`sync: WaitGroup is reused before previous Wait has +returned` / `WaitGroup misuse: Add called concurrently with Wait`) to silent +cross-request corruption, where one HTTP request's `await` blocks until an +unrelated earlier batch also completes. + +### Why the other paths are safe (and stay safe) + +| Path | State of waiter when returned to pool | Detached `Wait()` goroutine? | Safe? | +|------------------------------------|--------------------------------------------------|---------------------------------|--------| +| `Handle` normal | `Wait()` returned inline → count 0 | none (waits inline) | yes | +| `await` enqueue-failed (`abandon`) | `abandon` called `Done()` → count 0 | none (2nd select not reached) | yes | +| `await` normal completion | `waiterDone` fired → `Wait()` returned → count 0 | finished before `done` received | yes | +| `await` departed-while-waiting (A) | count still 1, `complete()` pending | **still blocked in `Wait()`** | **NO** | + +Only path (A) is defective. The fix must keep the safe paths recycling waiters +(the pool exists to cut allocations under load) while never recycling a waiter +that is still "in use." + +## Approach + +**Recommended: Option A — targeted lifecycle fix (minimal diff).** + +Remove the blanket `defer this.waiters.Put(waiter)` and instead return the +waiter to the pool *only* at the three points where it is provably quiescent +(counter at 0, no detached goroutine still reading it). On the departed-while- +waiting path, deliberately do **not** recycle the waiter: let it fall out of +scope. `complete()` will still fire later, unblocking the detached `waiterDone` +goroutine, after which both the waiter and the goroutine become garbage. We +"lose" exactly one pooled waiter per departed-in-flight request — an exceptional +path — which `sync.Pool` is designed to tolerate. + +Revised `await`: + +```go +func (this *entrypoint) await(ctx context.Context, message any) { + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + return + } + + waiter, item := this.prepare(ctx, message) + + select { + case this.work <- item: + this.lock.RUnlock() + this.monitor.Track(batchInFlight) + case <-ctx.Done(): + this.lock.RUnlock() + this.abandon(waiter, item) + this.waiters.Put(waiter) // safe: abandon() called Done() (count 0); no detached waiter. + this.monitor.Track(callerDeparted) + return + } + + select { + case <-this.waiterDone(waiter): + this.waiters.Put(waiter) // safe: detached Wait() returned before done was closed (count 0). + case <-ctx.Done(): + this.monitor.Track(callerDeparted) + // Intentionally do NOT recycle the waiter here: complete() is still + // pending and the detached waiterDone goroutine is still inside Wait(). + // Returning it to the pool now would let a later prepare() call Add(1) + // before this Wait() returns -- documented sync.WaitGroup misuse. The + // waiter is released to GC once complete() fires. + } +} +``` + +`Handle` is unchanged: its `defer this.waiters.Put(waiter)` runs only after its +inline `waiter.Wait()` returns, so the waiter is quiescent at recycle time. + +`prepare`, `abandon`, and `waiterDone` are unchanged. + +### Alternative considered — Option B: replace the pooled WaitGroup with a single-use completion channel + +Each ingress waits for **exactly one** `complete()` call, so a `sync.WaitGroup` +is heavier than needed. We could give `batch` a `done chan struct{}` that +`complete()` closes once; `Handle` does `<-done` and `await` does +`select { case <-done: ... case <-ctx.Done(): ... }`. This eliminates the +detached `waiterDone` goroutine and the entire pool-reuse hazard class, because +a channel is single-use and never recycled — an abandoned `done` channel is +simply collected by GC. + +Rejected as the primary fix because: + +- It is a larger change that touches `Handle`'s hot path and removes the + `waiters` pool the author added deliberately for allocation reduction. +- It trades pooled-waiter reuse for a per-request channel allocation. +- The user's request is scoped to *resolving the corruption concern*, and + Option A does so with a minimal, reviewable diff while preserving the + existing design intent and all current passing tests. + +Option B remains a reasonable future simplification if the detached goroutine or +per-request waiter churn proves costly; noting it here so the decision is +explicit. **Open question for the reviewer:** prefer the minimal Option A, or +take the larger Option B cleanup now? + +## Trade-offs & Risks + +- **One pooled waiter is dropped per departed-in-flight request.** This is the + exceptional path (HTTP caller cancelled after enqueue). Under sustained + cancellation storms the pool simply allocates fresh waiters; `sync.Pool` + absorbs this without leaking memory (dropped waiters are GC'd once their + pending `complete()` fires). No unbounded growth. +- **The detached `waiterDone` goroutine still lingers until `complete()`** on the + departed path. This is pre-existing behavior and is bounded by pipeline drain + time; the work is genuinely still in flight, so tracking its completion with + one goroutine is acceptable. Option A does not worsen this; Option B would + remove it. +- **Deterministic red-test difficulty.** The corruption only manifests on pool + *reuse* under concurrency, and `sync.Pool.Get` gives no reuse guarantee, so a + strictly deterministic failing unit test is not practical. The honest red test + is a race-enabled stress test (see Phase 1) that reliably trips Go's built-in + `WaitGroup` misuse detector before the fix and passes after. This is called out + explicitly rather than hidden behind a false "deterministic" claim. + +## Implementation Checklist + +### Phase 1: Capture the corruption (red) + +- [ ] Add `TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter` to + `handlers/harness/00_entrypoint_test.go`. The test, in a loop sized for + reliable surfacing (e.g. a few thousand iterations), should: (1) start an + `await` whose context it cancels *after* the batch is received from + `this.work` (departed-while-waiting), deferring the matching `complete()` to a + background goroutine; (2) concurrently issue fresh `await`/`Handle` calls on + the **same** entrypoint that drive `prepare()` (and thus pool `Get`/`Add`), + completing each; so a recycled-too-early waiter is exercised by a new request. +- [ ] Run `go test -race -run TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter ./handlers/harness/` + and confirm it fails for the right reason: a `sync: WaitGroup ...` misuse panic + or a race report on the waiter (NOT a generic assertion mismatch). Record the + observed failure mode in the PR description. + +### Phase 2: Apply the lifecycle fix (green) + +- [ ] In `00_entrypoint.go`, remove `defer this.waiters.Put(waiter)` from + `await`. +- [ ] In `await`, add `this.waiters.Put(waiter)` immediately after + `this.abandon(waiter, item)` on the enqueue-failed branch. +- [ ] In `await`, add `this.waiters.Put(waiter)` in the + `case <-this.waiterDone(waiter):` branch (normal completion). +- [ ] In `await`, in the `case <-ctx.Done():` branch of the second `select`, + add the explanatory comment documenting why the waiter is intentionally NOT + recycled there. +- [ ] Confirm `Handle` is unchanged and still recycles via its inline-`Wait()` + `defer this.waiters.Put(waiter)`. + +### Phase 3: Verify (green) and guard regressions + +- [ ] Run `go test -race -run TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter ./handlers/harness/` + and confirm it now passes with no panic and a clean race report. +- [ ] Confirm the existing departed-path tests still pass unchanged: + `TestAwait_UnblocksOnContextCancelWhileWaiting` (departed-while-waiting still + completes and tracks `CallerDeparted` + later `BatchComplete`) and + `TestAwait_UnblocksOnContextCancelWhileEnqueuing` (abandon path still tracks + `CallerDeparted`, no `BatchInFlight`/`BatchComplete`). +- [ ] Run the full package suite: `make test` (exercises `go fmt`, `go vet`, + `-race`, coverage). Confirm green. +- [ ] Re-read the diff against `## CLAUDE.md` Go conventions: receiver named + `this`, no naked returns, no new blank lines at method start/end, struct + initializers use field/value pairs (no new initializers introduced here). From 589f29b98c8fc96a921c36429d645373d60269e9 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 13:28:30 -0600 Subject: [PATCH 21/50] Resolve potential corruption of pooled (reused) waitgroup. --- ...-49-proposal-waiter-pool-corruption-fix.md | 31 ++++++++------ handlers/harness/00_entrypoint.go | 29 ++++++++----- handlers/harness/00_entrypoint_test.go | 41 +++++++++++++++++++ 3 files changed, 78 insertions(+), 23 deletions(-) diff --git a/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md b/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md index c4c7d30..856a00b 100644 --- a/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md +++ b/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md @@ -204,7 +204,7 @@ take the larger Option B cleanup now? ### Phase 1: Capture the corruption (red) -- [ ] Add `TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter` to +- [x] Add `TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter` to `handlers/harness/00_entrypoint_test.go`. The test, in a loop sized for reliable surfacing (e.g. a few thousand iterations), should: (1) start an `await` whose context it cancels *after* the batch is received from @@ -212,36 +212,43 @@ take the larger Option B cleanup now? background goroutine; (2) concurrently issue fresh `await`/`Handle` calls on the **same** entrypoint that drive `prepare()` (and thus pool `Get`/`Add`), completing each; so a recycled-too-early waiter is exercised by a new request. -- [ ] Run `go test -race -run TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter ./handlers/harness/` + (Implemented as 8 worker goroutines × 2000 departing `await`s against a + concurrent drainer that completes each enqueued batch — this produces the + recycle/reuse race more reliably than a sequential loop.) +- [x] Run `go test -race -run TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter ./handlers/harness/` and confirm it fails for the right reason: a `sync: WaitGroup ...` misuse panic or a race report on the waiter (NOT a generic assertion mismatch). Record the observed failure mode in the PR description. + **Observed:** `WARNING: DATA RACE` on the waiter (read in `await` at + `00_entrypoint.go:80` vs. write from the detached `waiterDone` goroutine) and + `panic: sync: WaitGroup is reused before previous Wait has returned`. ### Phase 2: Apply the lifecycle fix (green) -- [ ] In `00_entrypoint.go`, remove `defer this.waiters.Put(waiter)` from +- [x] In `00_entrypoint.go`, remove `defer this.waiters.Put(waiter)` from `await`. -- [ ] In `await`, add `this.waiters.Put(waiter)` immediately after +- [x] In `await`, add `this.waiters.Put(waiter)` immediately after `this.abandon(waiter, item)` on the enqueue-failed branch. -- [ ] In `await`, add `this.waiters.Put(waiter)` in the +- [x] In `await`, add `this.waiters.Put(waiter)` in the `case <-this.waiterDone(waiter):` branch (normal completion). -- [ ] In `await`, in the `case <-ctx.Done():` branch of the second `select`, +- [x] In `await`, in the `case <-ctx.Done():` branch of the second `select`, add the explanatory comment documenting why the waiter is intentionally NOT recycled there. -- [ ] Confirm `Handle` is unchanged and still recycles via its inline-`Wait()` +- [x] Confirm `Handle` is unchanged and still recycles via its inline-`Wait()` `defer this.waiters.Put(waiter)`. ### Phase 3: Verify (green) and guard regressions -- [ ] Run `go test -race -run TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter ./handlers/harness/` +- [x] Run `go test -race -run TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter ./handlers/harness/` and confirm it now passes with no panic and a clean race report. -- [ ] Confirm the existing departed-path tests still pass unchanged: +- [x] Confirm the existing departed-path tests still pass unchanged: `TestAwait_UnblocksOnContextCancelWhileWaiting` (departed-while-waiting still completes and tracks `CallerDeparted` + later `BatchComplete`) and `TestAwait_UnblocksOnContextCancelWhileEnqueuing` (abandon path still tracks `CallerDeparted`, no `BatchInFlight`/`BatchComplete`). -- [ ] Run the full package suite: `make test` (exercises `go fmt`, `go vet`, - `-race`, coverage). Confirm green. -- [ ] Re-read the diff against `## CLAUDE.md` Go conventions: receiver named +- [x] Run the full package suite: `make test` (exercises `go fmt`, `go vet`, + `-race`, coverage). Confirm green. (All packages pass; `handlers/harness` + coverage 99.6%.) +- [x] Re-read the diff against `## CLAUDE.md` Go conventions: receiver named `this`, no naked returns, no new blank lines at method start/end, struct initializers use field/value pairs (no new initializers introduced here). diff --git a/handlers/harness/00_entrypoint.go b/handlers/harness/00_entrypoint.go index a36950b..7e57007 100644 --- a/handlers/harness/00_entrypoint.go +++ b/handlers/harness/00_entrypoint.go @@ -28,18 +28,18 @@ func newEntrypoint(monitor Monitor, work chan *batch, shedThreshold float64) *en } } -func (this *entrypoint) prepare(ctx context.Context, messages ...any) (waiter *sync.WaitGroup, item *batch) { +func (this *entrypoint) prepare(ctx context.Context, messages ...any) (waiter *sync.WaitGroup, batch *batch) { waiter = this.waiters.Get() waiter.Add(1) - item = this.batches.Get() - item.ctx = ctx - item.messages = messages - item.complete = func() { + batch = this.batches.Get() + batch.ctx = ctx + batch.messages = messages + batch.complete = func() { waiter.Done() this.monitor.Track(batchComplete) - this.batches.Put(item) + this.batches.Put(batch) } - return waiter, item + return waiter, batch } func (this *entrypoint) abandon(waiter *sync.WaitGroup, item *batch) { @@ -77,24 +77,31 @@ func (this *entrypoint) await(ctx context.Context, message any) { return } - waiter, item := this.prepare(ctx, message) - defer this.waiters.Put(waiter) + waiter, batch := this.prepare(ctx, message) select { - case this.work <- item: + case this.work <- batch: this.lock.RUnlock() this.monitor.Track(batchInFlight) case <-ctx.Done(): this.lock.RUnlock() - this.abandon(waiter, item) + this.abandon(waiter, batch) + this.waiters.Put(waiter) // safe: abandon() called Done() (count 0); no detached waiter. this.monitor.Track(callerDeparted) return } select { case <-this.waiterDone(waiter): + this.waiters.Put(waiter) // safe: detached Wait() returned before done was closed (count 0). case <-ctx.Done(): this.monitor.Track(callerDeparted) + // Intentionally do NOT recycle the waiter here: complete() is still pending + // and the detached waiterDone goroutine is still inside Wait(). Returning it + // to the pool now would let a later prepare() Add(1) before this Wait() + // returns -- documented sync.WaitGroup misuse. Get() already removed the + // pool's reference, so declining to Put() leaves nothing pool-held; the + // waiter is released to GC once complete() fires. } } diff --git a/handlers/harness/00_entrypoint_test.go b/handlers/harness/00_entrypoint_test.go index 6bf9e32..c319675 100644 --- a/handlers/harness/00_entrypoint_test.go +++ b/handlers/harness/00_entrypoint_test.go @@ -177,6 +177,47 @@ func (this *EntrypointFixture) TestAwait_UnblocksOnContextCancelWhileEnqueuing() this.So(this.tracked, should.NOT.Contain, BatchComplete{}) } +func (this *EntrypointFixture) TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter() { + const workers = 8 + const perWorker = 2000 + + work := make(chan *batch, 64) + subject := newEntrypoint(this, work, 0.80) + + // Drainer: take ownership of each enqueued batch and complete it promptly, + // driving the waiter's count to zero concurrently with new awaits that recycle + // waiters from the pool. If a departed await recycles a still-in-use waiter, a + // later prepare()'s Add(1) races the detached Wait()'s return -- tripping the + // runtime's WaitGroup misuse detector. + drained := make(chan struct{}) + go func() { + defer close(drained) + for item := range work { + item.complete() + } + }() + + var clients sync.WaitGroup + for range workers { + clients.Add(1) + go func() { + defer clients.Done() + for range perWorker { + ctx, cancel := context.WithCancel(this.ctx) + go cancel() // depart at an arbitrary moment relative to processing. + subject.await(ctx, "msg") + } + }() + } + clients.Wait() + + this.So(subject.Close(), should.BeNil) + <-drained + + this.So(this.tracked, should.Contain, CallerDeparted{}) + this.So(this.tracked, should.Contain, BatchComplete{}) +} + func (this *EntrypointFixture) TestAwait_BatchCarriesExactlyOneMessage() { go this.subject.await(this.ctx, "only") From 2cb1352563fb95bb6706764fc975f5c7f8aed648 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 13:55:57 -0600 Subject: [PATCH 22/50] When configured serialization fails, fall back to fmt.Sprintf("%#v"). --- handlers/harness/02_serialization.go | 4 ++++ handlers/harness/02_serialization_test.go | 10 ++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/handlers/harness/02_serialization.go b/handlers/harness/02_serialization.go index 5fc18a8..d126803 100644 --- a/handlers/harness/02_serialization.go +++ b/handlers/harness/02_serialization.go @@ -31,6 +31,10 @@ func (this *serialization) Listen() { failure.Error = fmt.Errorf("%w: %w", ErrSerialization, err) failure.Value = message.Value this.monitor.Track(failure) + + message.Content.Reset() + message.ContentType = "go fmt.Sprintf(%#v)" + _, _ = fmt.Fprintf(message.Content, "%#v", message.Value) // Not JSON, but it will have to do... } failure.Error = nil failure.Value = nil diff --git a/handlers/harness/02_serialization_test.go b/handlers/harness/02_serialization_test.go index 1bd15a8..53ae893 100644 --- a/handlers/harness/02_serialization_test.go +++ b/handlers/harness/02_serialization_test.go @@ -111,9 +111,14 @@ func (this *SerializationFixture) TestClosedInputClosesOutput() { this.So(this.tracked, should.BeEmpty) } -func (this *SerializationFixture) TestSerializerErrorIsTracked() { +func (this *SerializationFixture) TestSerializerErrorIsTracked_FallbackToFmtSprintfEncoding() { + type TestMessage struct { + Value string `json:"value"` + } boom := errors.New("boom") - value := "unserializable" + value := TestMessage{ + Value: "unserializable", + } this.serializeFail[value] = boom unit := &unitOfWork{results: []*Message{ {Value: value, Content: bytes.NewBuffer(nil)}, @@ -130,4 +135,5 @@ func (this *SerializationFixture) TestSerializerErrorIsTracked() { this.So(ok, should.BeTrue) this.So(observation.Error, should.WrapError, ErrSerialization) this.So(observation.Value, should.Equal, value) + this.So(units[0].results[0].Content.String(), should.Equal, `harness.TestMessage{Value:"unserializable"}`) } From 2697c8fec3818800d3fa2e4c5c0163854c1afe04 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 14:34:12 -0600 Subject: [PATCH 23/50] Proposed resolution of unbounded retry loops at shutdown. --- ...-56-53-proposal-retry-loop-cancellation.md | 334 ++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100644 doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md diff --git a/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md new file mode 100644 index 0000000..7798613 --- /dev/null +++ b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md @@ -0,0 +1,334 @@ +--- +name: Bound the unbounded retry loops in persistence and broadcast +description: Make the persistence and broadcast stages' retry loops cancellable via the pipeline context so a permanently-failing Writer/Dispatcher can no longer hang pipeline shutdown, while preserving store-and-forward durability (persistence is the ack gate; broadcast is recoverable via the startup recovery routine). +type: plot +--- + +# Proposal: Bound the unbounded retry loops in `persistence` and `broadcast` + +## Background + +Two pipeline stages retry their collaborator forever with no escape hatch: + +`handlers/harness/03_persistence.go:39` + +```go +for attempt := 1; ; attempt++ { + err := this.writer.Write(this.ctx, this.buffer...) + if err == nil { + failure.Attempt = 0 + failure.Error = nil + break + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrPersistence, err) + this.monitor.Track(failure) + this.sleep(time.Second) // TODO: exponential back-off w/ jitter +} +``` + +`handlers/harness/05_broadcast.go:39` is identical in shape (against `Dispatcher.Dispatch`). + +Both loops: + +1. Have **no maximum attempt count** — they spin until the collaborator + succeeds. +2. **Never consult `this.ctx.Done()`.** The pipeline context is passed *into* + `Write`/`Dispatch` (so a ctx-honoring driver aborts the in-flight call), but + the loop itself ignores cancellation and immediately retries. +3. Sleep via an **uninterruptible** `this.sleep(time.Second)` (wired to + `time.Sleep` in `pipeline.go`). + +### The failure mode + +Shutdown drains the pipeline by closing channels from the front: `entrypoint.Close()` +closes the batches channel, `execution` then closes its output, and the close +cascades stage-by-stage (`for unit := range this.input` exits, then +`defer close(this.output)` fires). Each stage finishes the **unit it is +currently holding** before observing the closed input. + +If the database (persistence) or broker (broadcast) is unreachable when shutdown +begins, the stage holding an in-flight unit never breaks out of its retry loop, +never returns to `range this.input`, never observes the closed channel, and +**never closes its output**. The drain stalls, the remaining stages never see +their inputs close, and shutdown hangs until the orchestrator SIGKILLs the +process after its grace period. + +### The design constraint that shapes the fix + +The pipeline is store-and-forward, and the stage order matters +(`handlers/harness/pipeline.go`): + +``` +entrypoint → execution → serialization → persistence → completion → broadcast → terminal + (03) (04) (05) +``` + +`complete()` — which calls `waiter.Done()` (unblocking the `Handle`/`await` +caller) and lets the upstream MQ delivery be acked — fires in the **completion** +stage (04), which runs **after persistence (03)** and **before broadcast (05)**. + +That ordering creates a hard asymmetry between the two failing stages: + +| Stage | Runs vs. ack | On give-up, is the message recoverable? | +|--------------------|----------------|----------------------------------------------------------------------------------------------------------------------------------| +| `persistence` (03) | **before** ack | No. If the row was never written, forwarding it acks/loses the message. Only MQ redelivery recovers it. | +| `broadcast` (05) | **after** ack | Yes. The row is already durably stored; `sqladapter.Recover` re-dispatches every row `WHERE dispatched IS NULL` at next startup. | + +The resilience proposal +(`2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md`) already +ruled out per-batch context in these stages and fixed their cancellation scope: + +> **Inject a per-batch `ctx` through Persistence and Broadcast.** Rejected. +> Per-batch ctx in retry-forever stages would unwind partially-completed work and +> break the durability principle. The pipeline ctx (`harness.New(ctx, …)`) is the +> right scope for those stages. + +So the intended (but not-yet-implemented) cancellation signal for these loops is +the **pipeline context** — the `ctx` the consumer passes to `harness.New(ctx, …)` +and is expected to cancel on shutdown. This proposal wires that signal in. + +## Approach + +Make both retry loops **cancellable via `this.ctx`**, and act on cancellation +according to each stage's recoverability (the asymmetry above): + +- **`broadcast` (recoverable):** on `this.ctx` cancellation, stop retrying and + **forward the unit** to `terminal` as usual. The message is already persisted; + `sqladapter.Recover` redispatches it on the next startup. No data loss, clean + shutdown. + +- **`persistence` (durability gate):** on `this.ctx` cancellation, stop retrying + and **drop the unit without forwarding it** — `complete()` is never called, so + the upstream MQ delivery is never acked and is redelivered on the next run. The + stage then continues its `range` loop and drains normally, so shutdown + proceeds. Forwarding here is *not* an option: it would reach the completion + stage, ack the caller, and lose a message that was never stored. + +QUESTION: You've addressed MQ deliveries, but does this approach also account for units originating from HTTP clients? + +A new interruptible backoff replaces the uninterruptible sleep, so shutdown +aborts the delay promptly instead of waiting out a full second per attempt. + +### The cancellable-wait seam + +Replace the injected `sleep func(time.Duration)` with an injected, ctx-aware +waiter: + +```go +// retry.go (new) +package harness + +import ( + "context" + "time" +) + +// wait sleeps for d, or returns early with ctx.Err() if ctx is cancelled first. +func wait(ctx context.Context, d time.Duration) error { + timer := time.NewTimer(d) + defer timer.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } +} +``` + +QUESTION: why not still pass the duration to broadcast and persistence and just call the `wait` func from there? + +`pipeline.go` passes `wait` where it currently passes `time.Sleep`. Tests inject +a fixture method that records the requested durations (preserving the existing +`sleeps`-style assertions) and can simulate cancellation by returning a non-nil +error. + +This keeps the fixed 1-second delay; **exponential back-off with jitter remains a +separate, orthogonal TODO** and is out of scope here — this change is strictly +about bounding/cancelling the loops. + +### Revised `persistence` + +```go +func (this *persistence) Listen() { + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + this.buffer = append(this.buffer, message) + } + stored := this.store() + this.buffer = this.buffer[:0] + if !stored { + continue // shutdown before durable write: do NOT forward (no ack); MQ redelivers + } + this.output <- unit + } +} + +func (this *persistence) store() (stored bool) { + var failure PersistenceError + for attempt := 1; ; attempt++ { + err := this.writer.Write(this.ctx, this.buffer...) + if err == nil { + return true + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrPersistence, err) + this.monitor.Track(failure) + if this.wait(this.ctx, time.Second) != nil { + this.monitor.Track(PersistenceAbandoned{Attempts: attempt}) + return false + } + } +} +``` + +### Revised `broadcast` + +```go +func (this *broadcast) Listen() { + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + this.buffer = append(this.buffer, message) + } + this.dispatch() + this.buffer = this.buffer[:0] + this.output <- unit // always forward: already persisted; recovery redispatches if not sent + } +} + +func (this *broadcast) dispatch() { + var failure BroadcastError + for attempt := 1; ; attempt++ { + err := this.dispatcher.Dispatch(this.ctx, this.buffer...) + if err == nil { + return + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrBroadcast, err) + this.monitor.Track(failure) + if this.wait(this.ctx, time.Second) != nil { + this.monitor.Track(BroadcastAbandoned{Attempts: attempt}) + return + } + } +} +``` + +### New Monitor observations (`contracts.go`) + +```go +PersistenceAbandoned struct{ Attempts int } // emitted when persistence stops retrying due to shutdown; the unit was dropped and the upstream message will be redelivered +BroadcastAbandoned struct{ Attempts int } // emitted when broadcast stops retrying due to shutdown; the message is persisted and will be redispatched by recovery +``` + +These give operators an explicit, alertable signal that a shutdown abandoned +in-flight work — distinct from the per-attempt `PersistenceError`/`BroadcastError`. + +### Files created / modified + +| File | Change | +|-------------------------------------------|---------------------------------------------------------------------------------------------| +| `handlers/harness/retry.go` | **New.** The `wait(ctx, d)` interruptible-backoff helper. | +| `handlers/harness/03_persistence.go` | Replace `sleep` field/param with `wait`; extract `store()`; drop-without-forward on cancel. | +| `handlers/harness/05_broadcast.go` | Replace `sleep` field/param with `wait`; extract `dispatch()`; forward-on-cancel. | +| `handlers/harness/contracts.go` | Add `PersistenceAbandoned` and `BroadcastAbandoned` observation types. | +| `handlers/harness/pipeline.go` | Pass `wait` to `newPersistence`/`newBroadcast` instead of `time.Sleep`. | +| `handlers/harness/03_persistence_test.go` | Rename `sleep`→`wait` seam; add ctx-cancel abandonment test (unit **not** forwarded). | +| `handlers/harness/05_broadcast_test.go` | Rename `sleep`→`wait` seam; add ctx-cancel abandonment test (unit **is** forwarded). | + +### Alternatives considered + +- **Persistence keeps retrying forever (only fix broadcast).** Treat persistence's + infinite retry as intentional durability behavior and rely on the orchestrator's + grace-period + SIGKILL + MQ redelivery to bound shutdown. Simpler, and arguably + "more durable" (never gives up). Rejected as the recommendation because it leaves + the original concern — a hung shutdown when the DB is down — unresolved, trading a + clean shutdown for a SIGKILL every time. Presented here as the headline open + question (see below); it is a one-line variant of this proposal (skip the + persistence drop path, keep the loop uninterruptible). + +- **Bounded max-attempts, then forward.** Give each loop a retry ceiling and forward + downstream when exhausted. Rejected: for persistence, forwarding an un-stored unit + acks and loses the message; for broadcast it is unnecessary (recovery already + handles it). A ceiling also turns transient-but-long outages into data loss during + *normal* operation, not just shutdown. + +- **Per-batch context.** Already rejected by the resilience proposal (would unwind + partially-completed work); not revisited. + +## Trade-offs & Risks + +- **Open question for the reviewer (the key decision):** on shutdown with a dead + database, should `persistence` **(B, recommended)** stop and drop the in-flight + unit (clean shutdown; MQ redelivers), or **(A)** keep retrying forever + (SIGKILL-bounded; never voluntarily gives up a write)? The rest of the proposal is + unaffected by this choice; broadcast is unambiguous either way. + +- **Dropped-unit caller blocks until process exit (option B).** When persistence + drops a unit on shutdown, its `complete()` never fires, so an in-flight + `Handle`/`await` caller stays blocked on `waiter.Wait()`. This is acceptable during + shutdown — the consumer cancels `this.ctx`, the MQ delivery framework tears those + goroutines down, and the un-acked message is redelivered. It does mean a + `goleak`-style "no leaked goroutines" assertion must not be applied across the + shutdown-abandonment path. + +- **Cancellation depends on collaborators honoring `ctx`.** If `Writer.Write` / + `Dispatcher.Dispatch` block forever ignoring the passed `ctx`, our loop cannot + reach the `wait` check until that call returns. The `sqladapter` Writer/Dispatcher + use `BeginTx(ctx)`/`ExecContext(ctx)`/ctx-scoped connector calls, so they abort + promptly on cancellation. Worth stating as a documented contract for custom + collaborators. + +- **Requires the consumer to cancel the pipeline ctx on shutdown.** The retry loops + abort on `this.ctx` cancellation; the channel-close cascade alone does not cancel + `this.ctx`. If a consumer wires `harness.New` with a `context.Background()` that is + never cancelled, the in-flight retry remains unbounded (it still drains correctly + on the *next* unit boundary, but the currently-held unit can still hang). This is + the intended contract per the resilience proposal; it should be called out in the + package doc. + +- **Deterministic tests are straightforward here** (unlike the waiter-pool fix): the + injected `wait` seam returns a chosen error synchronously, so abandonment is + exercised without real timing or concurrency races. + +- **No change to the happy path or to the fixed-1s backoff.** Existing retry tests + keep asserting two 1-second waits before success; only the seam's name/signature + changes. + +## Implementation Checklist + +### Phase 1: Cancellable-wait seam (refactor, stays green) + +- [ ] Add `handlers/harness/retry.go` with the `wait(ctx context.Context, d time.Duration) error` helper shown in Approach. +- [ ] In `pipeline.go`, change `newPersistence(...)` and `newBroadcast(...)` call sites to pass `wait` instead of `time.Sleep`. +- [ ] In `03_persistence.go` and `05_broadcast.go`, change the struct field and constructor parameter from `sleep func(time.Duration)` to `wait func(context.Context, time.Duration) error` (no behavior change yet — call `this.wait(this.ctx, time.Second)` and ignore the returned error so the loops still spin forever). +- [ ] In both `*_test.go` fixtures, rename the `sleep`/`sleeps` seam to a `wait`/`waits` method matching the new signature, recording durations and returning `nil`. +- [ ] Run `make test` — confirm green (pure refactor; existing `TestRetriesUntil…Succeeds` still sees two recorded 1s waits). + +### Phase 2: Broadcast cancellation — forward on shutdown (red → green) + +- [ ] Add `BroadcastAbandoned struct{ Attempts int }` to `contracts.go`. +- [ ] Add `TestBroadcastAbandonsOnContextCancelButStillForwards`: dispatcher always fails; fixture `wait` returns `context.Canceled` on its first call. Expect failure (compile error — `BroadcastAbandoned` referenced before the loop emits it, or assertion fails because the current loop ignores the wait error and spins). Record the failure reason. +- [ ] Run the new test, confirm it fails for the right reason. +- [ ] Implement: extract `dispatch()`; on `this.wait(...) != nil`, `Track(BroadcastAbandoned{Attempts: attempt})` and return; `Listen()` still forwards the unit to `output` afterward. +- [ ] Run the test — confirm: exactly one `Dispatch` call, one recorded wait, one `BroadcastError` then one `BroadcastAbandoned` tracked, and the unit **was** forwarded to `output`. +- [ ] Confirm `TestRetriesUntilDispatchSucceeds` and the other broadcast tests still pass. + +### Phase 3: Persistence cancellation — drop without forwarding (red → green) + +- [ ] Add `PersistenceAbandoned struct{ Attempts int }` to `contracts.go`. +- [ ] Add `TestPersistenceAbandonsOnContextCancelAndDropsUnit`: writer always fails; fixture `wait` returns `context.Canceled` on its first call. Expect failure (the current loop ignores the wait error and spins, so the test would hang / never see abandonment). Record the failure reason. +- [ ] Run the new test, confirm it fails for the right reason. +- [ ] Implement: extract `store() bool`; on `this.wait(...) != nil`, `Track(PersistenceAbandoned{Attempts: attempt})` and return `false`; `Listen()` skips `output <- unit` (does **not** forward) when `store()` returns false, resets the buffer, and continues the range loop. +- [ ] Run the test — confirm: exactly one `Write` call, one recorded wait, one `PersistenceError` then one `PersistenceAbandoned` tracked, and the unit was **not** forwarded to `output` (output closes empty after input closes). +- [ ] Confirm `TestRetriesUntilWriteSucceeds` and the other persistence tests still pass. + +### Phase 4: Full verification & conventions + +- [ ] Run `make test` (fmt, vet, `-race`, coverage) — confirm green and that `handlers/harness` coverage has not regressed. +- [ ] Add a short note to the `package harness` doc comment in `config.go`: the persistence/broadcast retry loops abort on cancellation of the context passed to `New(ctx, …)`; consumers must cancel it on shutdown, and custom `Writer`/`Dispatcher` implementations must honor the context they are given. +- [ ] Re-read the diff against the `CLAUDE.md` Go conventions: receiver named `this`; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line. +``` From e9acfe79916a4284e975f279a9e82a163a70a93e Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 14:42:59 -0600 Subject: [PATCH 24/50] Clarifications --- ...-56-53-proposal-retry-loop-cancellation.md | 45 +++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md index 7798613..8d009bc 100644 --- a/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md +++ b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md @@ -105,7 +105,32 @@ according to each stage's recoverability (the asymmetry above): proceeds. Forwarding here is *not* an option: it would reach the completion stage, ack the caller, and lose a message that was never stored. -QUESTION: You've addressed MQ deliveries, but does this approach also account for units originating from HTTP clients? +#### HTTP-origin units (the `await` path) + +The drop decision is identical for HTTP-submitted units, but the *recovery* +mechanism differs, because there is no broker to redeliver. When persistence +drops an HTTP-origin unit on shutdown: + +- `complete()` never fires, so the in-flight `await` caller is **not** unblocked + via the completion path. It unblocks instead when its own request context is + cancelled (the `case <-ctx.Done()` arm of `await`'s second select), tracking + `CallerDeparted` — exactly today's departed-caller behavior. Whether that + request context is actually cancelled at shutdown is a consumer-side graceful- + shutdown concern (does the HTTP server cancel active request contexts?), out of + scope here. +- Recovery relies on **client retry + domain idempotency**, the model the + resilience proposal already established for the HTTP path: a client whose + request did not durably succeed retries, and once the merged domain-layer + idempotency change is in place repeated retries collapse to no-ops after the + first applies. There is no `sqladapter.Recover` safety net for an un-stored + unit (recovery only re-dispatches rows that *were* stored), so the client retry + is the sole recovery path — which is why dropping (rather than forwarding) is + still the correct choice: forwarding would unblock the caller as though the + write succeeded, falsely confirming a message that was never stored. + +There is no regression versus today: an HTTP request in flight against a dead +database currently hangs until SIGKILL just the same; this change makes the +process shut down cleanly instead. A new interruptible backoff replaces the uninterruptible sleep, so shutdown aborts the delay promptly instead of waiting out a full second per attempt. @@ -137,13 +162,27 @@ func wait(ctx context.Context, d time.Duration) error { } ``` -QUESTION: why not still pass the duration to broadcast and persistence and just call the `wait` func from there? - `pipeline.go` passes `wait` where it currently passes `time.Sleep`. Tests inject a fixture method that records the requested durations (preserving the existing `sleeps`-style assertions) and can simulate cancellation by returning a non-nil error. +> **Why inject the wait function rather than a duration?** A reasonable +> alternative is to keep `wait` as a plain free function, store only a +> `time.Duration` on each stage, and have the stage call `wait(this.ctx, this.delay)` +> directly. That is cleaner production code. It is rejected for one reason: the +> **test seam**. The stages today inject `sleep func(time.Duration)` precisely so +> the fixtures can (a) record how many times and for how long the loop backed off +> and (b) run instantly without real 1-second sleeps. If we called the real `wait` +> directly, every retry test would block on actual wall-clock time and could not +> observe the backoff. Keeping the injected-function seam preserves the existing +> `TestRetriesUntil…Succeeds` assertions verbatim and lets a fixture simulate +> cancellation synchronously by returning an error on a chosen attempt. The free +> `wait` function still exists — it is simply the production value wired in at +> `pipeline.go`, while tests substitute their own. (When exponential backoff lands +> later, the *duration* moves inside `wait`/its production implementation; the +> injected-function seam stays, so this decision does not block that work.) + This keeps the fixed 1-second delay; **exponential back-off with jitter remains a separate, orthogonal TODO** and is out of scope here — this change is strictly about bounding/cancelling the loops. From 4308c7abeb6aed4964da9e5a0a565e6df9ac7dd9 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 14:43:16 -0600 Subject: [PATCH 25/50] Avoid potentially unsafe type conversions. --- handlers/harness/admission.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/handlers/harness/admission.go b/handlers/harness/admission.go index a2ee1dc..4601730 100644 --- a/handlers/harness/admission.go +++ b/handlers/harness/admission.go @@ -18,10 +18,9 @@ type ( // Admission refuses overloaded requests before the wrapped handler runs, // writing an inline 503. Wrap each mutating route with it. -func Admission(handler messaging.Handler, inner http.Handler) http.Handler { - gate := handler.(admitter) +func Admission(handler admitter, inner http.Handler) http.Handler { return http.HandlerFunc(func(response http.ResponseWriter, request *http.Request) { - if gate.admit() { + if handler.admit() { inner.ServeHTTP(response, request) return } @@ -37,8 +36,8 @@ var shedResponseBody = []byte(`{"errors":[{"message":"service overloaded"}]}`) // AsHTTPHandler adapts the void, context-honoring await to the // messaging.Handler the HTTP shells already depend on, so no shell (and no // shell test) changes. -func AsHTTPHandler(handler messaging.Handler) messaging.Handler { - return &httpAdapter{target: handler.(awaiter)} +func AsHTTPHandler(handler awaiter) messaging.Handler { + return &httpAdapter{target: handler} } type httpAdapter struct { From d08c9989fb4eb2990c259b4665af75c86fa48bbd Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 14:46:15 -0600 Subject: [PATCH 26/50] More clarification. --- ..._13-56-53-proposal-retry-loop-cancellation.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md index 8d009bc..623c485 100644 --- a/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md +++ b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md @@ -300,13 +300,14 @@ in-flight work — distinct from the per-attempt `PersistenceError`/`BroadcastEr ## Trade-offs & Risks -- **Open question for the reviewer (the key decision):** on shutdown with a dead - database, should `persistence` **(B, recommended)** stop and drop the in-flight - unit (clean shutdown; MQ redelivers), or **(A)** keep retrying forever - (SIGKILL-bounded; never voluntarily gives up a write)? The rest of the proposal is - unaffected by this choice; broadcast is unambiguous either way. - -- **Dropped-unit caller blocks until process exit (option B).** When persistence +- **Decision (settled): persistence drops the in-flight unit on shutdown.** On + shutdown with a dead database, `persistence` stops retrying and drops the + in-flight unit (clean shutdown; the un-acked MQ delivery is redelivered, or the + HTTP client retries). The rejected alternative — keep retrying forever, bounded + only by orchestrator SIGKILL — is retained in *Alternatives considered* for the + record. Broadcast is unaffected by this choice (forward-on-cancel either way). + +- **Dropped-unit caller blocks until process exit.** When persistence drops a unit on shutdown, its `complete()` never fires, so an in-flight `Handle`/`await` caller stays blocked on `waiter.Wait()`. This is acceptable during shutdown — the consumer cancels `this.ctx`, the MQ delivery framework tears those @@ -370,4 +371,3 @@ in-flight work — distinct from the per-attempt `PersistenceError`/`BroadcastEr - [ ] Run `make test` (fmt, vet, `-race`, coverage) — confirm green and that `handlers/harness` coverage has not regressed. - [ ] Add a short note to the `package harness` doc comment in `config.go`: the persistence/broadcast retry loops abort on cancellation of the context passed to `New(ctx, …)`; consumers must cancel it on shutdown, and custom `Writer`/`Dispatcher` implementations must honor the context they are given. - [ ] Re-read the diff against the `CLAUDE.md` Go conventions: receiver named `this`; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line. -``` From 574396f7a26d6221a05a93b155a1455cb7d291c1 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 15:29:14 -0600 Subject: [PATCH 27/50] Bound the unbounded retry loops in `persistence` and `broadcast`. --- ...-56-53-proposal-retry-loop-cancellation.md | 40 +++++++++---------- handlers/harness/03_persistence.go | 40 +++++++++++-------- handlers/harness/03_persistence_test.go | 35 +++++++++++++--- handlers/harness/05_broadcast.go | 37 +++++++++-------- handlers/harness/05_broadcast_test.go | 35 +++++++++++++--- handlers/harness/config.go | 5 +++ handlers/harness/contracts.go | 4 +- handlers/harness/pipeline.go | 5 +-- handlers/harness/retry.go | 18 +++++++++ 9 files changed, 149 insertions(+), 70 deletions(-) create mode 100644 handlers/harness/retry.go diff --git a/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md index 623c485..2da4c15 100644 --- a/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md +++ b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md @@ -342,32 +342,32 @@ in-flight work — distinct from the per-attempt `PersistenceError`/`BroadcastEr ### Phase 1: Cancellable-wait seam (refactor, stays green) -- [ ] Add `handlers/harness/retry.go` with the `wait(ctx context.Context, d time.Duration) error` helper shown in Approach. -- [ ] In `pipeline.go`, change `newPersistence(...)` and `newBroadcast(...)` call sites to pass `wait` instead of `time.Sleep`. -- [ ] In `03_persistence.go` and `05_broadcast.go`, change the struct field and constructor parameter from `sleep func(time.Duration)` to `wait func(context.Context, time.Duration) error` (no behavior change yet — call `this.wait(this.ctx, time.Second)` and ignore the returned error so the loops still spin forever). -- [ ] In both `*_test.go` fixtures, rename the `sleep`/`sleeps` seam to a `wait`/`waits` method matching the new signature, recording durations and returning `nil`. -- [ ] Run `make test` — confirm green (pure refactor; existing `TestRetriesUntil…Succeeds` still sees two recorded 1s waits). +- [x] Add `handlers/harness/retry.go` with the `wait(ctx context.Context, d time.Duration) error` helper shown in Approach. +- [x] In `pipeline.go`, change `newPersistence(...)` and `newBroadcast(...)` call sites to pass `wait` instead of `time.Sleep`. +- [x] In `03_persistence.go` and `05_broadcast.go`, change the struct field and constructor parameter from `sleep func(time.Duration)` to `wait func(context.Context, time.Duration) error` (no behavior change yet — call `this.wait(this.ctx, time.Second)` and ignore the returned error so the loops still spin forever). +- [x] In both `*_test.go` fixtures, rename the `sleep`/`sleeps` seam to a `wait`/`waits` method matching the new signature, recording durations and returning `nil`. +- [x] Run `make test` — confirm green (pure refactor; existing `TestRetriesUntil…Succeeds` still sees two recorded 1s waits). ### Phase 2: Broadcast cancellation — forward on shutdown (red → green) -- [ ] Add `BroadcastAbandoned struct{ Attempts int }` to `contracts.go`. -- [ ] Add `TestBroadcastAbandonsOnContextCancelButStillForwards`: dispatcher always fails; fixture `wait` returns `context.Canceled` on its first call. Expect failure (compile error — `BroadcastAbandoned` referenced before the loop emits it, or assertion fails because the current loop ignores the wait error and spins). Record the failure reason. -- [ ] Run the new test, confirm it fails for the right reason. -- [ ] Implement: extract `dispatch()`; on `this.wait(...) != nil`, `Track(BroadcastAbandoned{Attempts: attempt})` and return; `Listen()` still forwards the unit to `output` afterward. -- [ ] Run the test — confirm: exactly one `Dispatch` call, one recorded wait, one `BroadcastError` then one `BroadcastAbandoned` tracked, and the unit **was** forwarded to `output`. -- [ ] Confirm `TestRetriesUntilDispatchSucceeds` and the other broadcast tests still pass. +- [x] Add `BroadcastAbandoned struct{ Attempts int }` to `contracts.go`. +- [x] Add `TestBroadcastAbandonsOnContextCancelButStillForwards`: dispatcher always fails; fixture `wait` returns `context.Canceled` on its first call. Expect failure (compile error — `BroadcastAbandoned` referenced before the loop emits it, or assertion fails because the current loop ignores the wait error and spins). Record the failure reason. +- [x] Run the new test, confirm it fails for the right reason. (Observed: 5s `-timeout` kill — the loop ignored the wait error and spun forever against the always-failing dispatcher, never forwarding or tracking abandonment.) +- [x] Implement: extract `dispatch()`; on `this.wait(...) != nil`, `Track(BroadcastAbandoned{Attempts: attempt})` and return; `Listen()` still forwards the unit to `output` afterward. +- [x] Run the test — confirm: exactly one `Dispatch` call, one recorded wait, one `BroadcastError` then one `BroadcastAbandoned` tracked, and the unit **was** forwarded to `output`. +- [x] Confirm `TestRetriesUntilDispatchSucceeds` and the other broadcast tests still pass. ### Phase 3: Persistence cancellation — drop without forwarding (red → green) -- [ ] Add `PersistenceAbandoned struct{ Attempts int }` to `contracts.go`. -- [ ] Add `TestPersistenceAbandonsOnContextCancelAndDropsUnit`: writer always fails; fixture `wait` returns `context.Canceled` on its first call. Expect failure (the current loop ignores the wait error and spins, so the test would hang / never see abandonment). Record the failure reason. -- [ ] Run the new test, confirm it fails for the right reason. -- [ ] Implement: extract `store() bool`; on `this.wait(...) != nil`, `Track(PersistenceAbandoned{Attempts: attempt})` and return `false`; `Listen()` skips `output <- unit` (does **not** forward) when `store()` returns false, resets the buffer, and continues the range loop. -- [ ] Run the test — confirm: exactly one `Write` call, one recorded wait, one `PersistenceError` then one `PersistenceAbandoned` tracked, and the unit was **not** forwarded to `output` (output closes empty after input closes). -- [ ] Confirm `TestRetriesUntilWriteSucceeds` and the other persistence tests still pass. +- [x] Add `PersistenceAbandoned struct{ Attempts int }` to `contracts.go`. +- [x] Add `TestPersistenceAbandonsOnContextCancelAndDropsUnit`: writer always fails; fixture `wait` returns `context.Canceled` on its first call. Expect failure (the current loop ignores the wait error and spins, so the test would hang / never see abandonment). Record the failure reason. +- [x] Run the new test, confirm it fails for the right reason. (Observed: 5s `-timeout` kill — the loop ignored the wait error and spun forever against the always-failing writer, never dropping the unit or tracking abandonment.) +- [x] Implement: extract `store() bool`; on `this.wait(...) != nil`, `Track(PersistenceAbandoned{Attempts: attempt})` and return `false`; `Listen()` skips `output <- unit` (does **not** forward) when `store()` returns false, resets the buffer, and continues the range loop. +- [x] Run the test — confirm: exactly one `Write` call, one recorded wait, one `PersistenceError` then one `PersistenceAbandoned` tracked, and the unit was **not** forwarded to `output` (output closes empty after input closes). +- [x] Confirm `TestRetriesUntilWriteSucceeds` and the other persistence tests still pass. ### Phase 4: Full verification & conventions -- [ ] Run `make test` (fmt, vet, `-race`, coverage) — confirm green and that `handlers/harness` coverage has not regressed. -- [ ] Add a short note to the `package harness` doc comment in `config.go`: the persistence/broadcast retry loops abort on cancellation of the context passed to `New(ctx, …)`; consumers must cancel it on shutdown, and custom `Writer`/`Dispatcher` implementations must honor the context they are given. -- [ ] Re-read the diff against the `CLAUDE.md` Go conventions: receiver named `this`; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line. +- [x] Run `make test` (fmt, vet, `-race`, coverage) — confirm green and that `handlers/harness` coverage has not regressed. (All packages pass; `handlers/harness` coverage 97.7%, unchanged.) +- [x] Add a short note to the `package harness` doc comment in `config.go`: the persistence/broadcast retry loops abort on cancellation of the context passed to `New(ctx, …)`; consumers must cancel it on shutdown, and custom `Writer`/`Dispatcher` implementations must honor the context they are given. +- [x] Re-read the diff against the `CLAUDE.md` Go conventions: receiver named `this`; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line. diff --git a/handlers/harness/03_persistence.go b/handlers/harness/03_persistence.go index 0e77ae9..f213189 100644 --- a/handlers/harness/03_persistence.go +++ b/handlers/harness/03_persistence.go @@ -12,43 +12,49 @@ type persistence struct { input chan *unitOfWork output chan *unitOfWork writer Writer - sleep func(time.Duration) + wait func(context.Context, time.Duration) error buffer []any } -func newPersistence(ctx context.Context, monitor Monitor, input, output chan *unitOfWork, writer Writer, sleep func(time.Duration)) *persistence { +func newPersistence(ctx context.Context, monitor Monitor, input, output chan *unitOfWork, writer Writer, wait func(context.Context, time.Duration) error) *persistence { return &persistence{ ctx: ctx, monitor: monitor, input: input, output: output, writer: writer, - sleep: sleep, + wait: wait, buffer: make([]any, 0, 1024), } } func (this *persistence) Listen() { - var failure PersistenceError - defer close(this.output) for unit := range this.input { for _, message := range unit.results { this.buffer = append(this.buffer, message) } - for attempt := 1; ; attempt++ { - err := this.writer.Write(this.ctx, this.buffer...) - if err == nil { - failure.Attempt = 0 - failure.Error = nil - break - } - failure.Attempt = attempt - failure.Error = fmt.Errorf("%w: %w", ErrPersistence, err) - this.monitor.Track(failure) - this.sleep(time.Second) // TODO: exponential back-off w/ jitter + stored := this.store() + this.buffer = this.buffer[:0] + if !stored { + continue // shutdown before durable write: do NOT forward (no ack); MQ redelivers } this.output <- unit - this.buffer = this.buffer[:0] + } +} +func (this *persistence) store() (stored bool) { + var failure PersistenceError + for attempt := 1; ; attempt++ { + err := this.writer.Write(this.ctx, this.buffer...) + if err == nil { + return true + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrPersistence, err) + this.monitor.Track(failure) + if this.wait(this.ctx, time.Second) != nil { // TODO: exponential back-off w/ jitter + this.monitor.Track(PersistenceAbandoned{Attempts: attempt}) + return false + } } } diff --git a/handlers/harness/03_persistence_test.go b/handlers/harness/03_persistence_test.go index e99701b..76d2be9 100644 --- a/handlers/harness/03_persistence_test.go +++ b/handlers/harness/03_persistence_test.go @@ -20,7 +20,8 @@ type PersistenceFixture struct { ctx context.Context input chan *unitOfWork output chan *unitOfWork - sleeps []time.Duration + waits []time.Duration + waitErr error subject *persistence writeMu sync.Mutex @@ -34,11 +35,12 @@ func (this *PersistenceFixture) Setup() { this.ctx = context.WithValue(this.Context(), "testing", this.Name()) this.input = make(chan *unitOfWork, 4) this.output = make(chan *unitOfWork, 4) - this.subject = newPersistence(this.ctx, this, this.input, this.output, this, this.sleep) + this.subject = newPersistence(this.ctx, this, this.input, this.output, this, this.wait) } -func (this *PersistenceFixture) sleep(d time.Duration) { - this.sleeps = append(this.sleeps, d) +func (this *PersistenceFixture) wait(_ context.Context, d time.Duration) error { + this.waits = append(this.waits, d) + return this.waitErr } func (this *PersistenceFixture) Track(observation any) { @@ -78,7 +80,7 @@ func (this *PersistenceFixture) TestWritesAllResultsThenForwardsUnit() { this.So(len(units), should.Equal, 1) this.So(len(this.writeCalls), should.Equal, 1) this.So(this.writeCalls[0], should.Equal, []any{m1, m2}) - this.So(this.sleeps, should.BeEmpty) + this.So(this.waits, should.BeEmpty) this.So(this.tracked, should.BeEmpty) } @@ -123,7 +125,7 @@ func (this *PersistenceFixture) TestRetriesUntilWriteSucceeds() { units := this.drain() this.So(len(units), should.Equal, 1) this.So(len(this.writeCalls), should.Equal, 3) - this.So(this.sleeps, should.Equal, []time.Duration{time.Second, time.Second}) + this.So(this.waits, should.Equal, []time.Duration{time.Second, time.Second}) this.So(this.tracked, should.HaveLength, 2) for n, observation := range this.tracked { failure, ok := observation.(PersistenceError) @@ -133,6 +135,27 @@ func (this *PersistenceFixture) TestRetriesUntilWriteSucceeds() { } } +func (this *PersistenceFixture) TestPersistenceAbandonsOnContextCancelAndDropsUnit() { + this.writeFailCount = 1 << 30 // always fail + this.waitErr = context.Canceled + unit := &unitOfWork{results: []*Message{{Value: "abandoned"}}} + this.input <- unit + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(units, should.BeEmpty) + this.So(len(this.writeCalls), should.Equal, 1) + this.So(this.waits, should.Equal, []time.Duration{time.Second}) + this.So(this.tracked, should.HaveLength, 2) + failure, ok := this.tracked[0].(PersistenceError) + this.So(ok, should.BeTrue) + this.So(failure.Error, should.WrapError, ErrPersistence) + this.So(failure.Attempt, should.Equal, 1) + this.So(this.tracked[1], should.Equal, PersistenceAbandoned{Attempts: 1}) +} + func (this *PersistenceFixture) TestClosedInputClosesOutput() { close(this.input) go this.subject.Listen() diff --git a/handlers/harness/05_broadcast.go b/handlers/harness/05_broadcast.go index 30b7c55..2b1fd58 100644 --- a/handlers/harness/05_broadcast.go +++ b/handlers/harness/05_broadcast.go @@ -13,10 +13,10 @@ type broadcast struct { output chan *unitOfWork buffer []any dispatcher Dispatcher - sleep func(time.Duration) + wait func(context.Context, time.Duration) error } -func newBroadcast(ctx context.Context, monitor Monitor, input, output chan *unitOfWork, dispatcher Dispatcher, sleep func(time.Duration)) *broadcast { +func newBroadcast(ctx context.Context, monitor Monitor, input, output chan *unitOfWork, dispatcher Dispatcher, wait func(context.Context, time.Duration) error) *broadcast { return &broadcast{ ctx: ctx, monitor: monitor, @@ -24,31 +24,34 @@ func newBroadcast(ctx context.Context, monitor Monitor, input, output chan *unit output: output, buffer: make([]any, 0, 1024), dispatcher: dispatcher, - sleep: sleep, + wait: wait, } } func (this *broadcast) Listen() { - var failure BroadcastError - defer close(this.output) for unit := range this.input { for _, message := range unit.results { this.buffer = append(this.buffer, message) } - for attempt := 1; ; attempt++ { - err := this.dispatcher.Dispatch(this.ctx, this.buffer...) - if err == nil { - failure.Attempt = 0 - failure.Error = nil - break - } - failure.Attempt = attempt - failure.Error = fmt.Errorf("%w: %w", ErrBroadcast, err) - this.monitor.Track(failure) - this.sleep(time.Second) // TODO: exponential backoff w/ jitter - } + this.dispatch() this.buffer = this.buffer[:0] this.output <- unit } } +func (this *broadcast) dispatch() { + var failure BroadcastError + for attempt := 1; ; attempt++ { + err := this.dispatcher.Dispatch(this.ctx, this.buffer...) + if err == nil { + return + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrBroadcast, err) + this.monitor.Track(failure) + if this.wait(this.ctx, time.Second) != nil { // TODO: exponential backoff w/ jitter + this.monitor.Track(BroadcastAbandoned{Attempts: attempt}) + return + } + } +} diff --git a/handlers/harness/05_broadcast_test.go b/handlers/harness/05_broadcast_test.go index b09809a..299d445 100644 --- a/handlers/harness/05_broadcast_test.go +++ b/handlers/harness/05_broadcast_test.go @@ -20,7 +20,8 @@ type BroadcastFixture struct { ctx context.Context input chan *unitOfWork output chan *unitOfWork - sleeps []time.Duration + waits []time.Duration + waitErr error subject *broadcast dispatchMu sync.Mutex @@ -34,11 +35,12 @@ func (this *BroadcastFixture) Setup() { this.ctx = context.WithValue(this.Context(), "testing", this.Name()) this.input = make(chan *unitOfWork, 4) this.output = make(chan *unitOfWork, 4) - this.subject = newBroadcast(this.ctx, this, this.input, this.output, this, this.sleep) + this.subject = newBroadcast(this.ctx, this, this.input, this.output, this, this.wait) } -func (this *BroadcastFixture) sleep(d time.Duration) { - this.sleeps = append(this.sleeps, d) +func (this *BroadcastFixture) wait(_ context.Context, d time.Duration) error { + this.waits = append(this.waits, d) + return this.waitErr } func (this *BroadcastFixture) Track(observation any) { @@ -78,7 +80,7 @@ func (this *BroadcastFixture) TestDispatchesAllResultsThenForwardsUnit() { this.So(len(units), should.Equal, 1) this.So(len(this.dispatchCalls), should.Equal, 1) this.So(this.dispatchCalls[0], should.Equal, []any{m1, m2}) - this.So(this.sleeps, should.BeEmpty) + this.So(this.waits, should.BeEmpty) this.So(this.tracked, should.BeEmpty) } @@ -123,7 +125,7 @@ func (this *BroadcastFixture) TestRetriesUntilDispatchSucceeds() { units := this.drain() this.So(len(units), should.Equal, 1) this.So(len(this.dispatchCalls), should.Equal, 3) - this.So(this.sleeps, should.Equal, []time.Duration{time.Second, time.Second}) + this.So(this.waits, should.Equal, []time.Duration{time.Second, time.Second}) this.So(this.tracked, should.HaveLength, 2) for n, observation := range this.tracked { failure, ok := observation.(BroadcastError) @@ -133,6 +135,27 @@ func (this *BroadcastFixture) TestRetriesUntilDispatchSucceeds() { } } +func (this *BroadcastFixture) TestBroadcastAbandonsOnContextCancelButStillForwards() { + this.dispatchFailCount = 1 << 30 // always fail + this.waitErr = context.Canceled + unit := &unitOfWork{results: []*Message{{Value: "abandoned"}}} + this.input <- unit + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(units, should.Equal, []*unitOfWork{unit}) + this.So(len(this.dispatchCalls), should.Equal, 1) + this.So(this.waits, should.Equal, []time.Duration{time.Second}) + this.So(this.tracked, should.HaveLength, 2) + failure, ok := this.tracked[0].(BroadcastError) + this.So(ok, should.BeTrue) + this.So(failure.Error, should.WrapError, ErrBroadcast) + this.So(failure.Attempt, should.Equal, 1) + this.So(this.tracked[1], should.Equal, BroadcastAbandoned{Attempts: 1}) +} + func (this *BroadcastFixture) TestClosedInputClosesOutput() { close(this.input) go this.subject.Listen() diff --git a/handlers/harness/config.go b/handlers/harness/config.go index baabbb0..ab59072 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -10,6 +10,11 @@ // // The only exported entry point is New(ctx, options...); every internal stage // type is unexported and cannot be constructed directly by callers. +// +// The persistence and broadcast stages retry their collaborators on failure; +// those retry loops abort when the context passed to New(ctx, ...) is cancelled, +// so consumers must cancel it on shutdown to avoid hanging the drain. Custom +// Writer and Dispatcher implementations must honor the context they are given. package harness import ( diff --git a/handlers/harness/contracts.go b/handlers/harness/contracts.go index d0f7be5..e1903d9 100644 --- a/handlers/harness/contracts.go +++ b/handlers/harness/contracts.go @@ -52,10 +52,12 @@ type ( Attempt int Error error } - BroadcastError struct { + PersistenceAbandoned struct{ Attempts int } + BroadcastError struct { Attempt int Error error } + BroadcastAbandoned struct{ Attempts int } ) var ( diff --git a/handlers/harness/pipeline.go b/handlers/harness/pipeline.go index 1e457f4..667e65f 100644 --- a/handlers/harness/pipeline.go +++ b/handlers/harness/pipeline.go @@ -2,7 +2,6 @@ package harness import ( "context" - "time" "github.com/smarty/messaging/v3" ) @@ -21,9 +20,9 @@ func build(ctx context.Context, config configuration) (messaging.Handler, []mess entrypoint = newEntrypoint(config.Monitor, batches, config.ShedThreshold) executor = newExecution(config.Monitor, config.UnitSize, batches, work1, newRouter(config.Types...)) serializers = newFanOut(serializationFactory(config.Monitor, config.Serializer), config.SerializerCount, config.UnitCapacity, work1, work2) - persistence = newPersistence(ctx, config.Monitor, work2, work3, config.Writer, time.Sleep) + persistence = newPersistence(ctx, config.Monitor, work2, work3, config.Writer, wait) completion = newCompletion(work3, work4) - broadcast = newBroadcast(ctx, config.Monitor, work4, work5, config.Dispatcher, time.Sleep) + broadcast = newBroadcast(ctx, config.Monitor, work4, work5, config.Dispatcher, wait) terminal = newTerminal(work5) ) diff --git a/handlers/harness/retry.go b/handlers/harness/retry.go new file mode 100644 index 0000000..94aaf87 --- /dev/null +++ b/handlers/harness/retry.go @@ -0,0 +1,18 @@ +package harness + +import ( + "context" + "time" +) + +// wait sleeps for d, or returns early with ctx.Err() if ctx is cancelled first. +func wait(ctx context.Context, d time.Duration) error { + timer := time.NewTimer(d) + defer timer.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } +} From 4cb9ab73c4eff660d701bab07ec9bf4bc7b2a405 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 1 Jun 2026 16:37:59 -0600 Subject: [PATCH 28/50] Proposal to address serialization ownership. --- ...4-19-proposal-dispatch-content-encoding.md | 387 ++++++++++++++++++ 1 file changed, 387 insertions(+) create mode 100644 doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md diff --git a/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md b/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md new file mode 100644 index 0000000..f52767e --- /dev/null +++ b/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md @@ -0,0 +1,387 @@ +--- +name: Stop double-encoding payloads across dispatch and recovery +description: Eliminate the redundant second serialization in the sqladapter Dispatcher, propagate ContentType through the harness Message and the durable row, and make recovery a faithful re-dispatch (not a JSON-shaped guess) — closing the TODO at handlers/harness/sqladapter/dispatcher.go:58 and the related content-type mishandling on the recovery path. +type: plot +--- + +# Proposal: Stop double-encoding payloads across dispatch and recovery + +## Background + +The harness pipeline is store-and-forward: each message is serialized once +(stage 02), durably stored (stage 03), then published to the broker (stage 05 +via `Dispatcher.Dispatch`). On startup, `sqladapter.Recover` re-publishes any +row whose `dispatched` column is still `NULL`. + +The bytes that get stored in the `Messages.payload` column **must** be byte- +identical to the bytes that get published to the broker — otherwise a recovery +re-publishes a different message than the original send, silently. Today the +code does not guarantee this, and in one configuration it cannot publish a +recovered row at all. + +There are three connected defects across `handlers/harness/02_serialization.go`, +`handlers/harness/sqladapter/dispatcher.go`, and +`handlers/harness/sqladapter/recovery.go`. They are usually invisible (the +default JSON serializer happens to make the bug benign) and they are masked by +the unit-test stub connector — but they are real, and recovery against a real +RabbitMQ writer is broken today. + +### Defect 1: payload is encoded twice on the happy path + +`02_serialization.go:29` encodes `message.Value` into `message.Content` (these +are the bytes destined for the durable row). + +`sqladapter/dispatcher.go:64-67` then builds a `messaging.Dispatch` with +`Message: message.Value` (the in-memory Go struct) and passes it to the +transport `Writer`. The transport writer is the `serialization.defaultWriter` +(`serialization/connector.go:103`), which loops calls into +`defaultDispatchEncoder.Encode` (`serialization/dispatch_encoder.go:30`). That +encoder sees `dispatch.Payload` is empty and `dispatch.Message` is non-nil, and +**runs the same `Serializer.Serialize` again** to produce +`dispatch.Payload`/`dispatch.ContentType`/`dispatch.MessageType`. + +So the value is serialized once for the DB, then again for the broker. Both +encodings happen with the same `Serializer`, so the resulting bytes *are* equal, +but the harness has no guarantee of that — anyone who plugs in a serializer +with side effects, IDs, or timestamps will get divergent stored vs. published +bytes. And it is wasted CPU on every send. + +The TODO at `dispatcher.go:58-63` calls this out and asks for one of two fixes: + +> Either pass the pre-encoded bytes through Dispatch.Payload/MessageType/ContentType +> and skip the connector's serialization for this writer, or drop the harness +> Serialization stage and let the connector own all encoding. + +### Defect 2: `Message.ContentType` is never populated on the happy path + +`harness.Message` already has `ContentType` and `ContentEncoding` fields +(`message.go:22-28`) — but `02_serialization.go` only writes +`message.ContentType` on the **fallback** path (when the user's serializer +returned an error and we fall back to `fmt.Sprintf("%#v")`): + +```go +err := this.serializer.Serialize(message.Content, message.Value) +if err != nil { + // … + message.ContentType = "go fmt.Sprintf(%#v)" + _, _ = fmt.Fprintf(message.Content, "%#v", message.Value) +} +``` + +On the success path `message.ContentType` is left as the zero value `""`. The +field exists, the schema row has nowhere to store it, and the dispatcher never +reads it. + +This is fine *today* because `dispatcher.go` re-encodes via the connector +(Defect 1), so the connector reapplies its own `ContentType()`. The moment we +fix Defect 1 by passing pre-encoded bytes through, we have to know what the +content type is — and the harness never recorded it. + +### Defect 3: recovery silently lies about content type, and is broken against a real broker + +`recovery.go:42-47` builds a `harness.Message` from the row: + +```go +messages = append(messages, &harness.Message{ + ID: id, + Type: typeName, + Content: bytes.NewBuffer(payload), + ContentType: "application/json", // <-- hardcoded +}) +``` + +`Value` is left nil. The hardcoded `"application/json"` is read by no one today +(see Defect 2: nothing in dispatch reads `Message.ContentType`), so recovery +"works" by coincidence whenever the configured serializer is JSON. Plug in a +gob/proto/msgpack serializer and the stored bytes are no longer JSON, but the +recovery path still labels them `"application/json"`. Today that label is +ignored, but as soon as we fix Defects 1 and 2 to *use* the recorded content +type, this hardcode becomes a live mislabel. + +There is also a worse, present-tense failure: with `Value == nil` and +`Payload == nil`, the connector's encoder takes its `dispatch.Message == nil` +early-return (`dispatch_encoder.go:31`) and **never sets `Topic` / +`MessageType` / `ContentType`**. The RabbitMQ writer then rejects with +`ErrEmptyDispatchTopic` (`rabbitmq/writer.go:38`). So in the only production +configuration that uses `topicFromMessageType=true` (the encoder derives Topic +from MessageType), recovery can't publish a single row. The current dispatcher +unit tests don't catch this because the stub connector +(`dispatcher_test.go:111`) ignores the topic field entirely. + +### Why fix all three together + +The three defects are one defect viewed from three sides: the harness owns the +serialization, but the harness throws away two of the three things it learned +(`ContentType`, the encoded bytes themselves) and the connector re-derives +them. Recovery, which has only what was persisted, is the path that *must* +carry the ContentType through, and it can't because the harness never wrote +it down. Fixing the dispatcher TODO without fixing the schema/recovery makes +recovery worse, not better. Fixing recovery without fixing the dispatcher TODO +leaves the double-encode in place. They go together. + +## Approach + +We pick the "pass-through" arm of the TODO: the harness Serialization stage +remains the single source of truth for encoded bytes and content type; the +sqladapter Dispatcher hands those pre-encoded bytes to the connector via +`Dispatch.Payload` / `MessageType` / `ContentType`; the connector encoder +short-circuits because `Payload` is non-empty. Recovery does the same +pass-through using the row's stored `type` and `content_type`. + +This is the smaller, safer arm. The alternative (delete the harness +Serialization stage entirely and let the connector own all encoding) would +require persistence to also call the connector's encoder, would entangle the +harness with the connector, and would lose the content-type-known-at-store- +time invariant that recovery depends on. Rejected on those grounds. + +### The four changes + +**Change A (`02_serialization.go`):** record the content type on the success +path. We need `serializer.ContentType()` on the internal `serializer` +interface — which today is just `Serialize(io.Writer, any) error` +(`contracts.go:34`). Extend it: + +```go +serializer interface { + Serialize(out io.Writer, in any) error + ContentType() string +} +``` + +Then in `serialization.Listen`: + +```go +err := this.serializer.Serialize(message.Content, message.Value) +if err == nil { + message.ContentType = this.serializer.ContentType() +} else { + // existing fallback unchanged: ContentType already set to the sentinel + // "go fmt.Sprintf(%#v)" by the fallback branch. +} +``` + +The `nop` serializer in `config.go:112` gains a `ContentType() string { return "" }` +implementation, the test fixture in `02_serialization_test.go:40` gains the +same, and the production wiring (where callers pass +`serialization.Serializer` from the `serialization` package — which already +exposes `ContentType() string`, `contracts.go:9-11`) needs no changes since +that interface already satisfies the extended one. + +**Change B (`message.go` schema + `Writer`):** persist `content_type` +alongside `type` and `payload`. Schema migration: + +```sql +ALTER TABLE Messages ADD COLUMN content_type varchar(128) NOT NULL DEFAULT ''; +``` + +(`varchar(128)` comfortably holds e.g. `application/json; charset=utf-8`.) The +default lets existing rows survive; recovery will treat `''` as "use legacy +default" — see Recovery below. Update `doc/mysql/schema.sql` to include the +column. + +`sqladapter/writer.go:67` becomes: + +```go +statement.WriteString(`INSERT INTO Messages (type, content_type, payload) VALUES `) +// args: (message.Type, message.ContentType, message.Content.Bytes()) +``` + +`(?, ?, ?)` per row. + +CORRECTION: Unfortunately, we are not going to make any changes to the schema of the Messages table. This table has over 50 million events. When writing/reading from this table we currently assume json. We should still set the content-type whenever dispatching. + +**Change C (`sqladapter/dispatcher.go`):** stop passing `Value`; pass the +pre-encoded fields: + +```go +dispatches = append(dispatches, messaging.Dispatch{ + Durable: true, + MessageType: message.Type, + ContentType: message.ContentType, + Payload: message.Content.Bytes(), + Topic: message.Type, // matches connector's topicFromMessageType=true behavior +}) +``` + +Why `Topic: message.Type` here? The connector's encoder normally sets +`Topic = MessageType` *only when* its `topicFromMessageType` flag is on +(`dispatch_encoder.go:55`). With `Payload` already populated, the encoder takes +its `len(dispatch.Payload) > 0` early-return (`dispatch_encoder.go:31`) and +never runs the topic-population block. So the dispatcher must populate Topic +itself, and Topic equals Type for this pipeline. (If a future deployment needs +a different topic-derivation rule, that rule will have to be wired into the +sqladapter Dispatcher explicitly — there is no longer a shared encoder +deciding it.) That deviation from the connector's behavior is **intentional +and noted** as a trade-off below. + +**Change D (`sqladapter/recovery.go`):** scan `content_type` and propagate it. +Drop the hardcoded `"application/json"`: + +```sql +SELECT id, type, content_type, payload + FROM Messages + WHERE dispatched IS NULL + ORDER BY id +``` + +```go +messages = append(messages, &harness.Message{ + ID: id, + Type: typeName, + Content: bytes.NewBuffer(payload), + ContentType: contentType, // from the row, not hardcoded +}) +``` + +Rows that pre-date the migration arrive with `content_type = ''`. Two options: + +1. Treat empty as `"application/json"` (today's hardcoded value) — preserves + today's coincidental behavior for in-flight rows during a deploy. +2. Refuse to recover rows with empty `content_type` — surfaces the migration + gap loudly. + +We pick **(1)** with a one-line `logger.Printf("[WARN] …")` per such row, so +the migration is observable but not destructive. The default applies only to +rows written before this change is deployed; once it's in, every new row +carries its real content type. + +CORRECTION: we aren't going to modify the schema of the Messages table. Assume application/json when reading. + +### Files created / modified + +| File | Change | +|-------------------------------------------------------|---------------------------------------------------------------------------------------------------------| +| `doc/mysql/schema.sql` | Add `content_type varchar(128) NOT NULL DEFAULT ''` column to `Messages`. | +| `handlers/harness/contracts.go` | Extend internal `serializer` interface with `ContentType() string`. | +| `handlers/harness/02_serialization.go` | On success, set `message.ContentType = this.serializer.ContentType()`. | +| `handlers/harness/02_serialization_test.go` | Update fixture serializer to satisfy `ContentType() string`; assert ContentType propagates. | +| `handlers/harness/config.go` | `nop.ContentType() string { return "" }`. | +| `handlers/harness/sqladapter/writer.go` | Insert `content_type` column from `message.ContentType`. | +| `handlers/harness/sqladapter/writer_test.go` | Assert the persisted `content_type` column matches input; widen `serializedMessage` helper accordingly. | +| `handlers/harness/sqladapter/dispatcher.go` | Pass `Payload`/`MessageType`/`ContentType`/`Topic` from `*harness.Message`; remove TODO. | +| `handlers/harness/sqladapter/dispatcher_test.go` | Assert published Dispatch carries Payload/MessageType/ContentType/Topic; tighten stub connector. | +| `handlers/harness/sqladapter/recovery.go` | Select `content_type`, propagate it; legacy `''` → warn + default to `"application/json"`. | +| `handlers/harness/sqladapter/recovery_test.go` | Seed rows with explicit `content_type`; assert the recovered Dispatch carries that content type. | + +No production-side migration tooling is in scope; the column has a default, so +deployments add the column with `ALTER TABLE` ahead of the new binary and the +binary tolerates the legacy default for any rows that race. + +### Alternatives considered + +- **Drop the harness Serialization stage; let the connector serialize.** The + other arm of the TODO. Rejected: the persistence stage would have to call + the connector's encoder before INSERT (otherwise we lose the + content-type-at-store-time invariant), which entangles the harness with the + serialization-connector and changes the order of failure (a connector + encoding bug would now poison persistence rather than just dispatch). + Pass-through is the smaller change. + +- **Skip the schema migration; have recovery re-derive content type from a + registered serializer.** Rejected: recovery would have to be told which + serializer was in effect *when the row was written*, which is unknowable + after a serializer-config change. Storing the content type next to the bytes + is the only way to guarantee recovery re-publishes what was originally + persisted. + +- **Have the dispatcher consult the connector's `topicFromMessageType` setting + to decide whether to populate Topic.** Rejected: the sqladapter dispatcher + has no handle to that config, the configuration is owned by a different + package, and the dispatcher's contract is "publish what was persisted." For + this pipeline, Topic = Type is the rule; encoding it explicitly in the + dispatcher makes the rule visible at the call site instead of hidden in + another package's option flag. + +## Trade-offs & Risks + +- **Schema migration is required.** Existing deployments must `ALTER TABLE` + before rolling the new binary. The new binary tolerates the legacy default + on read (warns and falls back to `"application/json"`); old binaries + tolerate the new column (they don't reference it). So the deploy order is: + (1) ALTER TABLE in prod, (2) deploy new binary. There is a window where + rows might be written by an old binary into a schema with the new column — + the column has `DEFAULT ''`, so the INSERT still succeeds; those rows + recover via the legacy fallback path. + +- **Topic = MessageType is now hardcoded in the sqladapter Dispatcher.** This + matches the connector's default behavior for the existing deployments + (`topicFromMessageType=true`) but it removes the option flag's reach for the + pre-encoded path. If a future deployment needs `Topic != MessageType`, the + rule has to be wired explicitly into the dispatcher (a function, a map, or + a new field on `harness.Message`). Worth flagging in a code comment so a + future reader understands why the topic rule was duplicated here. + +- **The connector encoder's `len(Payload) > 0` early-return is now load-bearing + for correctness, not just optimization.** The contract becomes: "if Payload + is set, leave the dispatch alone; the caller has populated everything that + the broker needs." That's already its behavior + (`dispatch_encoder.go:31`), but we are now relying on it. A short test in + `serialization/dispatch_encoder_test.go` already asserts this (`TestWhen- + DispatchAlreadyContainsSerializedPayload_Nop`); we'll cross-reference it in + a comment but not duplicate it. + +- **Recovery's legacy-row warning is one log line per row.** For a migration + with thousands of un-dispatched rows, that could be loud. We could rate- + limit, but the situation is itself a one-off (the cutover deploy); a one- + time spike of warnings is the right signal. We will *not* rate-limit. + +- **The dispatcher unit test that previously asserted `published[0].Message` + must change** — the stub connector observes `Message` today, but after the + change the dispatcher passes `Payload` and leaves `Message` nil. The test + becomes: assert `Payload` is the persisted bytes, `MessageType` is the + registered name, `ContentType` is what the harness recorded, `Topic` equals + MessageType, `Durable` is true. + +- **Coverage and shape of behavior tests are otherwise unchanged.** No new + goroutine semantics, no concurrency-shape changes; this is a pure data-flow + refactor with one schema column added. + +## Implementation Checklist + +### Phase 1: ContentType on the internal serializer interface (red → green) + +- [ ] Extend `02_serialization_test.go` fixture: add `ContentType() string { return "test/content-type" }` and a new test `TestSerializesEachResultValueIntoContent_PopulatesContentTypeOnSuccess` asserting `units[0].results[0].ContentType == "test/content-type"`. +- [ ] Run tests, confirm failure (compile error: `*SerializationFixture` does not satisfy `serializer` once we extend the interface — wait, we extend the interface in the next step, so the failure is the assertion: ContentType is empty). +- [ ] Extend `serializer` interface in `handlers/harness/contracts.go` to include `ContentType() string`. +- [ ] Add `ContentType() string { return "" }` to `nop` in `config.go` so the default still satisfies the interface. +- [ ] In `02_serialization.go`, on the success branch, set `message.ContentType = this.serializer.ContentType()`. Leave the fallback branch's existing `"go fmt.Sprintf(%#v)"` assignment alone. +- [ ] Run tests, confirm green. Verify the fallback test (`TestSerializerErrorIsTracked_FallbackToFmtSprintfEncoding`) still asserts the `"go fmt.Sprintf(%#v)"` ContentType. + +### Phase 2: Persist `content_type` (red → green) + +- [ ] Update `doc/mysql/schema.sql` to include `content_type varchar(128) NOT NULL DEFAULT ''` and a corresponding column in `CREATE TABLE Messages`. +- [ ] Update `testdb_test.go` (the integration-test schema bootstrap) to match. +- [ ] Modify `WriterFixture.serializedMessage` (in `writer_test.go`) to set a non-empty `ContentType` (e.g. `"application/json"`); add `TestWrite_PersistsContentTypeColumn` asserting the row's `content_type` equals the input `ContentType`. +- [ ] Run tests, confirm failure (the column is missing in the INSERT — Go SQL error or assertion fails). +- [ ] In `sqladapter/writer.go`, change the INSERT to `(type, content_type, payload) VALUES (?, ?, ?)`; append `message.ContentType` between `message.Type` and `message.Content.Bytes()`. Update the `args` capacity hint accordingly. +- [ ] Run tests, confirm green. Verify pre-existing writer tests still pass (no payload behavior change). + +### Phase 3: Dispatcher pass-through (red → green) + +- [ ] Add `TestDispatch_PublishesPreEncodedPayloadAndMetadata` to `dispatcher_test.go`: seed a message via the existing `seedMessage` (extended to carry `Type`, `ContentType`, and pre-encoded `Content`); assert the recorded `messaging.Dispatch` has `Payload == message.Content.Bytes()`, `MessageType == message.Type`, `ContentType == message.ContentType`, `Topic == message.Type`, `Durable == true`, and `Message == nil`. +- [ ] Run tests, confirm failure (today the dispatcher sends `Message: message.Value` and leaves Payload/MessageType/ContentType/Topic blank). +- [ ] Edit `sqladapter/dispatcher.go:55-71`: + - Drop the TODO comment block. + - Change the `messaging.Dispatch` literal to `{Durable: true, MessageType: message.Type, ContentType: message.ContentType, Payload: message.Content.Bytes(), Topic: message.Type}`. + - Add a one-line comment that Topic = Type because the connector encoder is short-circuited by the pre-populated Payload. +- [ ] Run tests, confirm green. Update or delete the prior `TestDispatch_PublishesAndMarksDispatched` assertion that referenced `published[0].Message` so it asserts on Payload/MessageType/ContentType/Topic instead. +- [ ] Confirm `TestDispatch_PublishFails_ReturnsErrorWithoutMarkingDispatched` and `TestDispatch_NoMessages_NoOp` still pass. + +### Phase 4: Recovery propagates ContentType (red → green) + +- [ ] Update `recovery_test.go` `seedUndispatched` helper to take a `contentType string` parameter and INSERT it; thread the value through existing tests with `"application/json"`. +- [ ] Add `TestRecover_PropagatesContentTypeFromRow`: seed two rows with different `content_type` values (e.g. `"application/json"`, `"application/x-protobuf"`); assert the resulting `messaging.Dispatch` records carry those exact ContentType values. +- [ ] Add `TestRecover_LegacyRowWithEmptyContentType_DefaultsToJSONAndWarns`: seed a row with `content_type = ''`; assert the resulting Dispatch's `ContentType == "application/json"` and a warning was logged. (Use a capturing logger.) +- [ ] Run tests, confirm failure (recovery currently hardcodes `"application/json"` for every row regardless of column). +- [ ] Edit `sqladapter/recovery.go`: + - Add `content_type` to the SELECT column list and to `rows.Scan`. + - Drop the hardcoded ContentType. + - When the scanned content type is `""`, set ContentType to `"application/json"` and `logger.Printf("[WARN] Recovering legacy row id=%d with empty content_type; defaulting to application/json", id)`. +- [ ] Run tests, confirm green. Confirm `TestRecover_NoOrphans_NoOp`, `TestRecover_DispatchesUndispatchedRowsInIDOrder`, `TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll`, and `TestRecover_RowCountIsMultipleOfBatchSize_FlushesUniformBatches` all still pass with the updated seed helper. + +### Phase 5: Full verification + +- [ ] Run `make test` (fmt, vet, `-race`, coverage). Confirm green and that `handlers/harness` and `handlers/harness/sqladapter` coverage have not regressed. +- [ ] Re-read the diff against the `CLAUDE.md` Go conventions: receiver named `this`; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line. +- [ ] Sanity-check the four diffs against the three defects in Background: (a) no double encode (dispatcher passes pre-encoded bytes; connector encoder short-circuits on `len(Payload) > 0`), (b) ContentType propagates through every stage end-to-end (serialization → message → row → recovery → dispatch), (c) recovery no longer hardcodes `"application/json"` for new rows. +- [ ] Confirm package doc comment in `handlers/harness/sqladapter/dispatcher.go` still accurately describes the columns it depends on (`type`, `content_type`, `payload`, `dispatched`, `id`); update if needed. From 8368955cc0652ba98d423f93abb0817961773462 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 2 Jun 2026 12:38:57 -0600 Subject: [PATCH 29/50] Refine serialization ownership proposal. --- ...4-19-proposal-dispatch-content-encoding.md | 235 +++++++----------- 1 file changed, 92 insertions(+), 143 deletions(-) diff --git a/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md b/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md index f52767e..636cb50 100644 --- a/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md +++ b/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md @@ -1,6 +1,6 @@ --- name: Stop double-encoding payloads across dispatch and recovery -description: Eliminate the redundant second serialization in the sqladapter Dispatcher, propagate ContentType through the harness Message and the durable row, and make recovery a faithful re-dispatch (not a JSON-shaped guess) — closing the TODO at handlers/harness/sqladapter/dispatcher.go:58 and the related content-type mishandling on the recovery path. +description: Eliminate the redundant second serialization in the sqladapter Dispatcher and propagate ContentType through the in-memory harness Message — closing the TODO at handlers/harness/sqladapter/dispatcher.go:58 and fixing the recovery-path content-type and Topic mishandling. No schema changes. type: plot --- @@ -22,9 +22,9 @@ recovered row at all. There are three connected defects across `handlers/harness/02_serialization.go`, `handlers/harness/sqladapter/dispatcher.go`, and `handlers/harness/sqladapter/recovery.go`. They are usually invisible (the -default JSON serializer happens to make the bug benign) and they are masked by -the unit-test stub connector — but they are real, and recovery against a real -RabbitMQ writer is broken today. +production serializer is JSON, which makes the bug benign) and they are masked +by the unit-test stub connector — but they are real, and recovery against a +real RabbitMQ writer is broken today. ### Defect 1: payload is encoded twice on the happy path @@ -69,15 +69,17 @@ if err != nil { ``` On the success path `message.ContentType` is left as the zero value `""`. The -field exists, the schema row has nowhere to store it, and the dispatcher never -reads it. +field exists, and the dispatcher never reads it. This is fine *today* because `dispatcher.go` re-encodes via the connector (Defect 1), so the connector reapplies its own `ContentType()`. The moment we fix Defect 1 by passing pre-encoded bytes through, we have to know what the -content type is — and the harness never recorded it. +content type is — and the harness never recorded it. The fix lives entirely +in process memory: stage 02 stamps `message.ContentType` from +`serializer.ContentType()` so the dispatcher (next stage in the same process) +can read it. **No schema change.** -### Defect 3: recovery silently lies about content type, and is broken against a real broker +### Defect 3: recovery hands the dispatcher a malformed message and breaks against a real broker `recovery.go:42-47` builds a `harness.Message` from the row: @@ -86,38 +88,40 @@ messages = append(messages, &harness.Message{ ID: id, Type: typeName, Content: bytes.NewBuffer(payload), - ContentType: "application/json", // <-- hardcoded + ContentType: "application/json", }) ``` -`Value` is left nil. The hardcoded `"application/json"` is read by no one today -(see Defect 2: nothing in dispatch reads `Message.ContentType`), so recovery -"works" by coincidence whenever the configured serializer is JSON. Plug in a -gob/proto/msgpack serializer and the stored bytes are no longer JSON, but the -recovery path still labels them `"application/json"`. Today that label is -ignored, but as soon as we fix Defects 1 and 2 to *use* the recorded content -type, this hardcode becomes a live mislabel. - -There is also a worse, present-tense failure: with `Value == nil` and -`Payload == nil`, the connector's encoder takes its `dispatch.Message == nil` -early-return (`dispatch_encoder.go:31`) and **never sets `Topic` / -`MessageType` / `ContentType`**. The RabbitMQ writer then rejects with -`ErrEmptyDispatchTopic` (`rabbitmq/writer.go:38`). So in the only production -configuration that uses `topicFromMessageType=true` (the encoder derives Topic -from MessageType), recovery can't publish a single row. The current dispatcher -unit tests don't catch this because the stub connector -(`dispatcher_test.go:111`) ignores the topic field entirely. - -### Why fix all three together - -The three defects are one defect viewed from three sides: the harness owns the -serialization, but the harness throws away two of the three things it learned -(`ContentType`, the encoded bytes themselves) and the connector re-derives -them. Recovery, which has only what was persisted, is the path that *must* -carry the ContentType through, and it can't because the harness never wrote -it down. Fixing the dispatcher TODO without fixing the schema/recovery makes -recovery worse, not better. Fixing recovery without fixing the dispatcher TODO -leaves the double-encode in place. They go together. +`Value` is left nil. The dispatcher today (per Defect 1) passes +`Message: message.Value` to the connector — which means recovery passes +`Message: nil` and `Payload: nil`. The connector's encoder takes its +`dispatch.Message == nil` early-return (`dispatch_encoder.go:31`) and **never +sets `Topic` / `MessageType` / `ContentType`**. The RabbitMQ writer then +rejects with `ErrEmptyDispatchTopic` (`rabbitmq/writer.go:38`). So in the only +production configuration that uses `topicFromMessageType=true`, recovery can't +publish a single row. The current dispatcher unit tests don't catch this +because the stub connector (`dispatcher_test.go:111`) ignores the topic field +entirely. + +Defect 3 is **fixed automatically** once Defect 1 is fixed: the new dispatcher +reads `Type` / `ContentType` / `Content` (which recovery already populates) +from the `*harness.Message` and hands them to the connector as pre-populated +`MessageType` / `ContentType` / `Payload` / `Topic`. The connector encoder's +`len(Payload) > 0` early-return then leaves everything alone, and the broker +gets a complete dispatch. + +The hardcoded `"application/json"` in recovery is **kept** — we deliberately +do not modify the `Messages` schema (the table holds 50M+ rows; we don't +extend it for this), and the project's standing assumption is that the +durable bytes are JSON. We document that assumption explicitly rather than +carry it implicitly. + +### Why fix Defects 1 and 2 together + +Defect 2 is the prerequisite for fixing Defect 1: the dispatcher can only +hand pre-encoded `ContentType` to the connector if the serialization stage +recorded it. The two changes ride together, and they fix Defect 3 as a +side effect. ## Approach @@ -126,15 +130,15 @@ remains the single source of truth for encoded bytes and content type; the sqladapter Dispatcher hands those pre-encoded bytes to the connector via `Dispatch.Payload` / `MessageType` / `ContentType`; the connector encoder short-circuits because `Payload` is non-empty. Recovery does the same -pass-through using the row's stored `type` and `content_type`. +pass-through — using the row's stored `type` and the project-wide assumption +that stored bytes are `application/json`. This is the smaller, safer arm. The alternative (delete the harness Serialization stage entirely and let the connector own all encoding) would -require persistence to also call the connector's encoder, would entangle the -harness with the connector, and would lose the content-type-known-at-store- -time invariant that recovery depends on. Rejected on those grounds. +require persistence to also call the connector's encoder and would entangle +the harness with the connector. Rejected on those grounds. -### The four changes +### The three changes **Change A (`02_serialization.go`):** record the content type on the success path. We need `serializer.ContentType()` on the internal `serializer` @@ -167,30 +171,7 @@ same, and the production wiring (where callers pass exposes `ContentType() string`, `contracts.go:9-11`) needs no changes since that interface already satisfies the extended one. -**Change B (`message.go` schema + `Writer`):** persist `content_type` -alongside `type` and `payload`. Schema migration: - -```sql -ALTER TABLE Messages ADD COLUMN content_type varchar(128) NOT NULL DEFAULT ''; -``` - -(`varchar(128)` comfortably holds e.g. `application/json; charset=utf-8`.) The -default lets existing rows survive; recovery will treat `''` as "use legacy -default" — see Recovery below. Update `doc/mysql/schema.sql` to include the -column. - -`sqladapter/writer.go:67` becomes: - -```go -statement.WriteString(`INSERT INTO Messages (type, content_type, payload) VALUES `) -// args: (message.Type, message.ContentType, message.Content.Bytes()) -``` - -`(?, ?, ?)` per row. - -CORRECTION: Unfortunately, we are not going to make any changes to the schema of the Messages table. This table has over 50 million events. When writing/reading from this table we currently assume json. We should still set the content-type whenever dispatching. - -**Change C (`sqladapter/dispatcher.go`):** stop passing `Value`; pass the +**Change B (`sqladapter/dispatcher.go`):** stop passing `Value`; pass the pre-encoded fields: ```go @@ -214,58 +195,45 @@ sqladapter Dispatcher explicitly — there is no longer a shared encoder deciding it.) That deviation from the connector's behavior is **intentional and noted** as a trade-off below. -**Change D (`sqladapter/recovery.go`):** scan `content_type` and propagate it. -Drop the hardcoded `"application/json"`: +**Change C (`sqladapter/recovery.go`):** no functional change; tighten the +construction comment to acknowledge the JSON assumption. -```sql -SELECT id, type, content_type, payload - FROM Messages - WHERE dispatched IS NULL - ORDER BY id -``` +The current code already populates everything Change B's dispatcher needs: ```go messages = append(messages, &harness.Message{ ID: id, Type: typeName, Content: bytes.NewBuffer(payload), - ContentType: contentType, // from the row, not hardcoded + ContentType: "application/json", }) ``` -Rows that pre-date the migration arrive with `content_type = ''`. Two options: - -1. Treat empty as `"application/json"` (today's hardcoded value) — preserves - today's coincidental behavior for in-flight rows during a deploy. -2. Refuse to recover rows with empty `content_type` — surfaces the migration - gap loudly. +`Type` and `Content` come from the row; `ContentType` is the project-wide +assumption that stored bytes are JSON. With Change B in place, the dispatcher +reads these and the broker dispatch is well-formed. The change is to: -We pick **(1)** with a one-line `logger.Printf("[WARN] …")` per such row, so -the migration is observable but not destructive. The default applies only to -rows written before this change is deployed; once it's in, every new row -carries its real content type. +- Replace the zero-value `// hardcoded` framing with a brief comment that + states the assumption: the durable column holds JSON because the + configured serializer is JSON; recovery cannot recover what was not + recorded, so it carries that assumption forward. -CORRECTION: we aren't going to modify the schema of the Messages table. Assume application/json when reading. +That's it — no SQL or schema change. -### Files created / modified +### Files modified | File | Change | |-------------------------------------------------------|---------------------------------------------------------------------------------------------------------| -| `doc/mysql/schema.sql` | Add `content_type varchar(128) NOT NULL DEFAULT ''` column to `Messages`. | | `handlers/harness/contracts.go` | Extend internal `serializer` interface with `ContentType() string`. | | `handlers/harness/02_serialization.go` | On success, set `message.ContentType = this.serializer.ContentType()`. | | `handlers/harness/02_serialization_test.go` | Update fixture serializer to satisfy `ContentType() string`; assert ContentType propagates. | | `handlers/harness/config.go` | `nop.ContentType() string { return "" }`. | -| `handlers/harness/sqladapter/writer.go` | Insert `content_type` column from `message.ContentType`. | -| `handlers/harness/sqladapter/writer_test.go` | Assert the persisted `content_type` column matches input; widen `serializedMessage` helper accordingly. | | `handlers/harness/sqladapter/dispatcher.go` | Pass `Payload`/`MessageType`/`ContentType`/`Topic` from `*harness.Message`; remove TODO. | | `handlers/harness/sqladapter/dispatcher_test.go` | Assert published Dispatch carries Payload/MessageType/ContentType/Topic; tighten stub connector. | -| `handlers/harness/sqladapter/recovery.go` | Select `content_type`, propagate it; legacy `''` → warn + default to `"application/json"`. | -| `handlers/harness/sqladapter/recovery_test.go` | Seed rows with explicit `content_type`; assert the recovered Dispatch carries that content type. | +| `handlers/harness/sqladapter/recovery.go` | Comment-only: state the JSON-content-type assumption inline. | +| `handlers/harness/sqladapter/recovery_test.go` | Add a regression test asserting the recovered Dispatch carries `Topic`/`MessageType`/`Payload`/`ContentType`. | -No production-side migration tooling is in scope; the column has a default, so -deployments add the column with `ALTER TABLE` ahead of the new binary and the -binary tolerates the legacy default for any rows that race. +No schema files (`doc/mysql/schema.sql`, `testdb_test.go`) are touched. ### Alternatives considered @@ -277,12 +245,11 @@ binary tolerates the legacy default for any rows that race. encoding bug would now poison persistence rather than just dispatch). Pass-through is the smaller change. -- **Skip the schema migration; have recovery re-derive content type from a - registered serializer.** Rejected: recovery would have to be told which - serializer was in effect *when the row was written*, which is unknowable - after a serializer-config change. Storing the content type next to the bytes - is the only way to guarantee recovery re-publishes what was originally - persisted. +- **Add a `content_type` column to `Messages` so recovery is faithful to a + serializer change.** Rejected: the table holds 50M+ rows, the project- + standing assumption is JSON, and the operational cost of an `ALTER TABLE` on + that table outweighs the value of recovering exotic content types we don't + use. The assumption is documented explicitly in code instead. - **Have the dispatcher consult the connector's `topicFromMessageType` setting to decide whether to populate Topic.** Rejected: the sqladapter dispatcher @@ -294,14 +261,15 @@ binary tolerates the legacy default for any rows that race. ## Trade-offs & Risks -- **Schema migration is required.** Existing deployments must `ALTER TABLE` - before rolling the new binary. The new binary tolerates the legacy default - on read (warns and falls back to `"application/json"`); old binaries - tolerate the new column (they don't reference it). So the deploy order is: - (1) ALTER TABLE in prod, (2) deploy new binary. There is a window where - rows might be written by an old binary into a schema with the new column — - the column has `DEFAULT ''`, so the INSERT still succeeds; those rows - recover via the legacy fallback path. +- **Recovery carries a JSON assumption, not a recorded fact.** With no + `content_type` column on `Messages`, recovery cannot know what serializer + was in effect when a row was written; it assumes `application/json`. If + the configured serializer is ever changed to something else, in-flight + rows persisted under the old serializer will be re-dispatched on next + startup tagged `application/json` regardless of what they actually contain. + Mitigation: drain the queue (no `dispatched IS NULL` rows) before swapping + serializers. Acceptable risk because the project has historically used JSON + and has no current plan to change. - **Topic = MessageType is now hardcoded in the sqladapter Dispatcher.** This matches the connector's default behavior for the existing deployments @@ -315,16 +283,11 @@ binary tolerates the legacy default for any rows that race. for correctness, not just optimization.** The contract becomes: "if Payload is set, leave the dispatch alone; the caller has populated everything that the broker needs." That's already its behavior - (`dispatch_encoder.go:31`), but we are now relying on it. A short test in + (`dispatch_encoder.go:31`), and a short test in `serialization/dispatch_encoder_test.go` already asserts this (`TestWhen- DispatchAlreadyContainsSerializedPayload_Nop`); we'll cross-reference it in a comment but not duplicate it. -- **Recovery's legacy-row warning is one log line per row.** For a migration - with thousands of un-dispatched rows, that could be loud. We could rate- - limit, but the situation is itself a one-off (the cutover deploy); a one- - time spike of warnings is the right signal. We will *not* rate-limit. - - **The dispatcher unit test that previously asserted `published[0].Message` must change** — the stub connector observes `Message` today, but after the change the dispatcher passes `Payload` and leaves `Message` nil. The test @@ -334,54 +297,40 @@ binary tolerates the legacy default for any rows that race. - **Coverage and shape of behavior tests are otherwise unchanged.** No new goroutine semantics, no concurrency-shape changes; this is a pure data-flow - refactor with one schema column added. + refactor — strictly in-process, no schema migration. ## Implementation Checklist ### Phase 1: ContentType on the internal serializer interface (red → green) - [ ] Extend `02_serialization_test.go` fixture: add `ContentType() string { return "test/content-type" }` and a new test `TestSerializesEachResultValueIntoContent_PopulatesContentTypeOnSuccess` asserting `units[0].results[0].ContentType == "test/content-type"`. -- [ ] Run tests, confirm failure (compile error: `*SerializationFixture` does not satisfy `serializer` once we extend the interface — wait, we extend the interface in the next step, so the failure is the assertion: ContentType is empty). +- [ ] Run tests, confirm failure (assertion fails: success path leaves `ContentType` empty). - [ ] Extend `serializer` interface in `handlers/harness/contracts.go` to include `ContentType() string`. - [ ] Add `ContentType() string { return "" }` to `nop` in `config.go` so the default still satisfies the interface. - [ ] In `02_serialization.go`, on the success branch, set `message.ContentType = this.serializer.ContentType()`. Leave the fallback branch's existing `"go fmt.Sprintf(%#v)"` assignment alone. - [ ] Run tests, confirm green. Verify the fallback test (`TestSerializerErrorIsTracked_FallbackToFmtSprintfEncoding`) still asserts the `"go fmt.Sprintf(%#v)"` ContentType. -### Phase 2: Persist `content_type` (red → green) - -- [ ] Update `doc/mysql/schema.sql` to include `content_type varchar(128) NOT NULL DEFAULT ''` and a corresponding column in `CREATE TABLE Messages`. -- [ ] Update `testdb_test.go` (the integration-test schema bootstrap) to match. -- [ ] Modify `WriterFixture.serializedMessage` (in `writer_test.go`) to set a non-empty `ContentType` (e.g. `"application/json"`); add `TestWrite_PersistsContentTypeColumn` asserting the row's `content_type` equals the input `ContentType`. -- [ ] Run tests, confirm failure (the column is missing in the INSERT — Go SQL error or assertion fails). -- [ ] In `sqladapter/writer.go`, change the INSERT to `(type, content_type, payload) VALUES (?, ?, ?)`; append `message.ContentType` between `message.Type` and `message.Content.Bytes()`. Update the `args` capacity hint accordingly. -- [ ] Run tests, confirm green. Verify pre-existing writer tests still pass (no payload behavior change). - -### Phase 3: Dispatcher pass-through (red → green) +### Phase 2: Dispatcher pass-through (red → green) -- [ ] Add `TestDispatch_PublishesPreEncodedPayloadAndMetadata` to `dispatcher_test.go`: seed a message via the existing `seedMessage` (extended to carry `Type`, `ContentType`, and pre-encoded `Content`); assert the recorded `messaging.Dispatch` has `Payload == message.Content.Bytes()`, `MessageType == message.Type`, `ContentType == message.ContentType`, `Topic == message.Type`, `Durable == true`, and `Message == nil`. +- [ ] Add `TestDispatch_PublishesPreEncodedPayloadAndMetadata` to `dispatcher_test.go`: extend the `seedMessage` helper (or inline its setup) so the `*harness.Message` carries `Type`, `ContentType` (e.g. `"application/json"`), and a pre-encoded `Content` buffer. Assert the recorded `messaging.Dispatch` has `Payload == message.Content.Bytes()`, `MessageType == message.Type`, `ContentType == message.ContentType`, `Topic == message.Type`, `Durable == true`, and `Message == nil`. - [ ] Run tests, confirm failure (today the dispatcher sends `Message: message.Value` and leaves Payload/MessageType/ContentType/Topic blank). - [ ] Edit `sqladapter/dispatcher.go:55-71`: - Drop the TODO comment block. - Change the `messaging.Dispatch` literal to `{Durable: true, MessageType: message.Type, ContentType: message.ContentType, Payload: message.Content.Bytes(), Topic: message.Type}`. - - Add a one-line comment that Topic = Type because the connector encoder is short-circuited by the pre-populated Payload. -- [ ] Run tests, confirm green. Update or delete the prior `TestDispatch_PublishesAndMarksDispatched` assertion that referenced `published[0].Message` so it asserts on Payload/MessageType/ContentType/Topic instead. + - Add a one-line comment explaining that Topic = Type because the connector encoder is short-circuited by the pre-populated Payload (cross-reference `serialization/dispatch_encoder_test.go:TestWhenDispatchAlreadyContainsSerializedPayload_Nop`). +- [ ] Run tests, confirm green. Update `TestDispatch_PublishesAndMarksDispatched` so its `published[0].Message` assertion is replaced with assertions on Payload/MessageType/ContentType/Topic. - [ ] Confirm `TestDispatch_PublishFails_ReturnsErrorWithoutMarkingDispatched` and `TestDispatch_NoMessages_NoOp` still pass. -### Phase 4: Recovery propagates ContentType (red → green) +### Phase 3: Recovery regression test + comment (red → green) -- [ ] Update `recovery_test.go` `seedUndispatched` helper to take a `contentType string` parameter and INSERT it; thread the value through existing tests with `"application/json"`. -- [ ] Add `TestRecover_PropagatesContentTypeFromRow`: seed two rows with different `content_type` values (e.g. `"application/json"`, `"application/x-protobuf"`); assert the resulting `messaging.Dispatch` records carry those exact ContentType values. -- [ ] Add `TestRecover_LegacyRowWithEmptyContentType_DefaultsToJSONAndWarns`: seed a row with `content_type = ''`; assert the resulting Dispatch's `ContentType == "application/json"` and a warning was logged. (Use a capturing logger.) -- [ ] Run tests, confirm failure (recovery currently hardcodes `"application/json"` for every row regardless of column). -- [ ] Edit `sqladapter/recovery.go`: - - Add `content_type` to the SELECT column list and to `rows.Scan`. - - Drop the hardcoded ContentType. - - When the scanned content type is `""`, set ContentType to `"application/json"` and `logger.Printf("[WARN] Recovering legacy row id=%d with empty content_type; defaulting to application/json", id)`. -- [ ] Run tests, confirm green. Confirm `TestRecover_NoOrphans_NoOp`, `TestRecover_DispatchesUndispatchedRowsInIDOrder`, `TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll`, and `TestRecover_RowCountIsMultipleOfBatchSize_FlushesUniformBatches` all still pass with the updated seed helper. +- [ ] Add `TestRecover_PublishesDispatchWithTopicMessageTypeAndPayload`: seed an undispatched row with `type='order-received'` and a JSON payload; run `Recover`; assert the resulting `messaging.Dispatch` has `Topic == "order-received"`, `MessageType == "order-received"`, `ContentType == "application/json"`, `Payload == `, `Durable == true`, and `Message == nil`. +- [ ] Run the test. With Phase 2 in place, this test should already pass (recovery's `*harness.Message` is well-formed and the new dispatcher reads it correctly). If it fails, that is the right reason to fix it before continuing. +- [ ] Edit `sqladapter/recovery.go:42-47`: replace the inline framing of `ContentType: "application/json"` with a comment that states the assumption — the project's serializer is JSON; the durable bytes are recorded without a content-type column, so recovery carries that assumption forward. +- [ ] Run tests, confirm green. Confirm `TestRecover_NoOrphans_NoOp`, `TestRecover_DispatchesUndispatchedRowsInIDOrder`, `TestRecover_PassesPayloadAndTypeIntoMessage`, `TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll`, and `TestRecover_RowCountIsMultipleOfBatchSize_FlushesUniformBatches` all still pass. -### Phase 5: Full verification +### Phase 4: Full verification - [ ] Run `make test` (fmt, vet, `-race`, coverage). Confirm green and that `handlers/harness` and `handlers/harness/sqladapter` coverage have not regressed. - [ ] Re-read the diff against the `CLAUDE.md` Go conventions: receiver named `this`; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line. -- [ ] Sanity-check the four diffs against the three defects in Background: (a) no double encode (dispatcher passes pre-encoded bytes; connector encoder short-circuits on `len(Payload) > 0`), (b) ContentType propagates through every stage end-to-end (serialization → message → row → recovery → dispatch), (c) recovery no longer hardcodes `"application/json"` for new rows. -- [ ] Confirm package doc comment in `handlers/harness/sqladapter/dispatcher.go` still accurately describes the columns it depends on (`type`, `content_type`, `payload`, `dispatched`, `id`); update if needed. +- [ ] Sanity-check the diffs against the three defects in Background: (a) no double encode (dispatcher passes pre-encoded bytes; connector encoder short-circuits on `len(Payload) > 0`), (b) ContentType propagates in-process from serialization → message → dispatch, (c) recovery's `*harness.Message` now flows through the dispatcher to a well-formed broker dispatch (no more `ErrEmptyDispatchTopic`). +- [ ] Confirm package doc comment in `handlers/harness/sqladapter/dispatcher.go` still accurately describes the columns it depends on (`id`, `dispatched`, `type`, `payload`); it has not changed. From 9135b533063c205821987ff46b742806c265187b Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 2 Jun 2026 14:27:11 -0600 Subject: [PATCH 30/50] Convert proposal to HTML --- ...29-proposal-dispatch-content-encoding.html | 334 ++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100644 doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html diff --git a/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html b/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html new file mode 100644 index 0000000..0b6a3a0 --- /dev/null +++ b/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html @@ -0,0 +1,334 @@ + + + + +Proposal: Stop double-encoding payloads across dispatch and recovery + + + +

Proposal: Stop double-encoding payloads across dispatch and recovery

+ +

Background

+

+ The harness pipeline is store-and-forward: each message is serialized once + (stage 02), durably stored (stage 03), then published to the broker (stage 05 + via Dispatcher.Dispatch). On startup, sqladapter.Recover + re-publishes any row whose dispatched column is still NULL. +

+

+ The bytes stored in the Messages.payload column must + be byte-identical to the bytes published to the broker — otherwise a recovery + re-publishes a different message than the original send, silently. Today the + code does not guarantee this, and in one configuration it cannot publish a + recovered row at all. +

+

+ There are three connected defects across handlers/harness/02_serialization.go, + handlers/harness/sqladapter/dispatcher.go, and + handlers/harness/sqladapter/recovery.go. They are usually invisible + (the production serializer is JSON, which makes the bug benign) and they are masked + by the unit-test stub connector — but they are real, and recovery against a real + RabbitMQ writer is broken today. +

+ +

Defect 1: payload is encoded twice on the happy path

+

+ 02_serialization.go:29 encodes message.Value into + message.Content (these are the bytes destined for the durable row). +

+

+ sqladapter/dispatcher.go:64-67 then builds a messaging.Dispatch + with Message: message.Value (the in-memory Go struct) and passes it to the + transport Writer. The transport writer is the serialization.defaultWriter + (serialization/connector.go:103), which loops calls into + defaultDispatchEncoder.Encode (serialization/dispatch_encoder.go:30). + That encoder sees dispatch.Payload is empty and dispatch.Message + is non-nil, and runs the same Serializer.Serialize again + to produce dispatch.Payload/dispatch.ContentType/dispatch.MessageType. +

+

+ So the value is serialized once for the DB, then again for the broker. Both encodings + happen with the same Serializer, so the resulting bytes are equal, + but the harness has no guarantee of that — anyone who plugs in a serializer with side + effects, IDs, or timestamps will get divergent stored vs. published bytes. And it is + wasted CPU on every send. +

+

+ The TODO at dispatcher.go:58-63 calls this out and asks for one of two fixes: + either pass the pre-encoded bytes through Dispatch.Payload/MessageType/ContentType + and skip the connector's serialization for this writer, or drop the harness Serialization + stage and let the connector own all encoding. +

+ +

Defect 2: Message.ContentType is never populated on the happy path

+

+ harness.Message already has ContentType and ContentEncoding + fields (message.go:22-28) — but 02_serialization.go only writes + message.ContentType on the fallback path (when the user's + serializer returned an error and we fall back to fmt.Sprintf("%#v")). + On the success path message.ContentType is left as the zero value "". +

+

+ This is fine today because dispatcher.go re-encodes via the connector + (Defect 1), so the connector reapplies its own ContentType(). The moment we + fix Defect 1 by passing pre-encoded bytes through, we must know what the content type is — + and the harness never recorded it. The fix: stage 02 stamps message.ContentType + from serializer.ContentType() so the dispatcher (next stage in the same process) + can read it. No schema change. +

+ +

Defect 3: recovery hands the dispatcher a malformed message and breaks against a real broker

+

+ recovery.go:42-47 builds a harness.Message from the row with + Value left nil. The dispatcher today (per Defect 1) passes + Message: message.Value to the connector — which means recovery passes + Message: nil and Payload: nil. The connector's encoder takes its + dispatch.Message == nil early-return (dispatch_encoder.go:31) and + never sets Topic / MessageType / ContentType. + The RabbitMQ writer then rejects with ErrEmptyDispatchTopic + (rabbitmq/writer.go:38). So in the only production configuration that uses + topicFromMessageType=true, recovery can't publish a single row. +

+

+ Defect 3 is fixed automatically once Defect 1 is fixed: the new dispatcher + reads Type / ContentType / Content (which recovery + already populates) from the *harness.Message and hands them to the connector as + pre-populated MessageType / ContentType / Payload / + Topic. The connector encoder's len(Payload) > 0 early-return + then leaves everything alone, and the broker gets a complete dispatch. +

+ +

Approach

+

+ We pick the "pass-through" arm of the TODO: the harness Serialization stage remains the + single source of truth for encoded bytes and content type; the sqladapter Dispatcher hands + those pre-encoded bytes to the connector via Dispatch.Payload / + MessageType / ContentType; the connector encoder short-circuits + because Payload is non-empty. Recovery does the same pass-through — using the + row's stored type and the project-wide assumption that stored bytes are + application/json. +

+

+ The alternative (delete the harness Serialization stage entirely and let the connector own + all encoding) would require persistence to also call the connector's encoder and would + entangle the harness with the connector. Rejected on those grounds. +

+ +

Change A — 02_serialization.go: record content type on success

+

+ Extend the internal serializer interface in contracts.go:34: +

+
serializer interface {
+    Serialize(out io.Writer, in any) error
+    ContentType() string
+}
+

+ Then in serialization.Listen, on the success branch: +

+
err := this.serializer.Serialize(message.Content, message.Value)
+if err == nil {
+    message.ContentType = this.serializer.ContentType()
+} else {
+    // existing fallback unchanged: ContentType set to "go fmt.Sprintf(%#v)"
+}
+

+ The nop serializer in config.go:112 gains + ContentType() string { return "" }. The test fixture in + 02_serialization_test.go:40 gains the same stub. The production wiring + (callers pass serialization.Serializer from the serialization + package, which already exposes ContentType() string at + contracts.go:9-11) needs no changes. +

+ +

Change B — sqladapter/dispatcher.go: pass pre-encoded fields

+

+ Replace the current messaging.Dispatch literal (which passes Message: message.Value) + with: +

+
dispatches = append(dispatches, messaging.Dispatch{
+    Durable:     true,
+    MessageType: message.Type,
+    ContentType: message.ContentType,
+    Payload:     message.Content.Bytes(),
+    Topic:       message.Type,
+})
+

+ Topic: message.Type is required because the connector encoder normally sets + Topic = MessageType only when its topicFromMessageType flag is on + (dispatch_encoder.go:55). With Payload already populated, the + encoder takes its len(dispatch.Payload) > 0 early-return + (dispatch_encoder.go:31) and never runs the topic-population block. So the + dispatcher must populate Topic itself. A one-line comment cross-references + serialization/dispatch_encoder_test.go:TestWhenDispatchAlreadyContainsSerializedPayload_Nop + to explain why. +

+ +

Change C — sqladapter/recovery.go: comment-only

+

+ No functional change. Replace the inline framing of ContentType: "application/json" + with a comment that states the assumption: the project's serializer is JSON; the durable + bytes are recorded without a content-type column, so recovery carries that assumption + forward. +

+ +

Files modified

+ + + + + + + + + + + + + + + + + +
FileChange
handlers/harness/contracts.goExtend internal serializer interface with ContentType() string.
handlers/harness/02_serialization.goOn success, set message.ContentType = this.serializer.ContentType().
handlers/harness/02_serialization_test.goUpdate fixture serializer to satisfy ContentType() string; assert ContentType propagates.
handlers/harness/config.gonop.ContentType() string { return "" }.
handlers/harness/sqladapter/dispatcher.goPass Payload/MessageType/ContentType/Topic from *harness.Message; remove TODO.
handlers/harness/sqladapter/dispatcher_test.goAssert published Dispatch carries Payload/MessageType/ContentType/Topic; tighten stub connector.
handlers/harness/sqladapter/recovery.goComment-only: state the JSON content-type assumption inline.
handlers/harness/sqladapter/recovery_test.goAdd regression test asserting recovered Dispatch carries Topic/MessageType/Payload/ContentType.
+

No schema files (doc/mysql/schema.sql, testdb_test.go) are touched.

+ +

Trade-offs & Risks

+
    +
  • + Recovery carries a JSON assumption, not a recorded fact. + With no content_type column on Messages, recovery cannot know + what serializer was in effect when a row was written; it assumes + application/json. If the configured serializer is ever changed, in-flight + rows persisted under the old serializer will be re-dispatched tagged + application/json regardless of what they actually contain. + Mitigation: drain the queue before swapping serializers. Acceptable risk because the + project has historically used JSON and has no current plan to change. +
  • +
  • + Topic = MessageType is now hardcoded in the sqladapter Dispatcher. + This matches the connector's default behavior for existing deployments + (topicFromMessageType=true) but removes the option flag's reach for the + pre-encoded path. If a future deployment needs Topic != MessageType, the + rule has to be wired explicitly into the dispatcher. +
  • +
  • + The connector encoder's len(Payload) > 0 early-return is now + load-bearing for correctness, not just optimization. The contract becomes: + "if Payload is set, leave the dispatch alone; the caller has populated everything the + broker needs." That's already its behavior (dispatch_encoder.go:31), and + TestWhenDispatchAlreadyContainsSerializedPayload_Nop already asserts it; + we'll cross-reference it in a comment but not duplicate it. +
  • +
  • + The dispatcher unit test that previously asserted published[0].Message + must change — after the change the dispatcher passes Payload and + leaves Message nil. The test becomes: assert Payload is the + persisted bytes, MessageType is the registered name, ContentType + is what the harness recorded, Topic equals MessageType, + Durable is true. +
  • +
  • + Coverage and shape of behavior tests are otherwise unchanged. + No new goroutine semantics, no concurrency-shape changes; this is a pure data-flow + refactor — strictly in-process, no schema migration. +
  • +
+ +

Your Turn

+

+ The step marked 🤝 in the checklist is reserved for you to implement. + It was chosen because it is the most interesting production-code contribution + in this proposal — the kind of work worth doing yourself. +

+

What to implement

+

+ In handlers/harness/sqladapter/dispatcher.go, replace the + messaging.Dispatch literal that currently passes + Message: message.Value with one that passes the pre-encoded fields. + Drop the TODO comment block. The new literal is: +

+
dispatches = append(dispatches, messaging.Dispatch{
+    Durable:     true,
+    MessageType: message.Type,
+    ContentType: message.ContentType,
+    Payload:     message.Content.Bytes(),
+    Topic:       message.Type,
+})
+

+ Add a one-line comment above the Topic field (or the append call) explaining + that Topic = Type because the connector encoder is bypassed when + Payload is pre-populated — cross-reference + serialization/dispatch_encoder_test.go:TestWhenDispatchAlreadyContainsSerializedPayload_Nop. +

+

Context and interfaces

+

+ The relevant area is handlers/harness/sqladapter/dispatcher.go:55-71. + message is a *harness.Message; by the time Change A is + complete, message.ContentType will be the string returned by the + serializer's ContentType() method, and message.Content is the + *bytes.Buffer holding the encoded bytes. + messaging.Dispatch is defined in the messaging package — + Payload []byte, MessageType string, + ContentType string, Topic string, Durable bool, + Message any. Leave Message unset (nil). +

+

What success looks like

+

+ TestDispatch_PublishesPreEncodedPayloadAndMetadata in + handlers/harness/sqladapter/dispatcher_test.go passes. It will assert: +

+
    +
  • published[0].Payload equals message.Content.Bytes()
  • +
  • published[0].MessageType equals message.Type
  • +
  • published[0].ContentType equals message.ContentType (e.g. "application/json")
  • +
  • published[0].Topic equals message.Type
  • +
  • published[0].Durable is true
  • +
  • published[0].Message is nil
  • +
+ +

Implementation Checklist

+ +

Phase 1: ContentType on the internal serializer interface (red → green)

+
    +
  • Extend 02_serialization_test.go fixture: add ContentType() string { return "test/content-type" } and a new test TestSerializesEachResultValueIntoContent_PopulatesContentTypeOnSuccess asserting units[0].results[0].ContentType == "test/content-type".
  • +
  • Run tests, confirm failure (assertion fails: success path leaves ContentType empty).
  • +
  • Extend serializer interface in handlers/harness/contracts.go to include ContentType() string.
  • +
  • Add ContentType() string { return "" } to nop in config.go so the default still satisfies the interface.
  • +
  • In 02_serialization.go, on the success branch, set message.ContentType = this.serializer.ContentType(). Leave the fallback branch's existing "go fmt.Sprintf(%#v)" assignment alone.
  • +
  • Run tests, confirm green. Verify the fallback test (TestSerializerErrorIsTracked_FallbackToFmtSprintfEncoding) still asserts the "go fmt.Sprintf(%#v)" ContentType.
  • +
+ +

Phase 2: Dispatcher pass-through (red → green)

+
    +
  • Add TestDispatch_PublishesPreEncodedPayloadAndMetadata to dispatcher_test.go: extend the seedMessage helper (or inline setup) so the *harness.Message carries Type, ContentType (e.g. "application/json"), and a pre-encoded Content buffer. Assert the recorded messaging.Dispatch has Payload == message.Content.Bytes(), MessageType == message.Type, ContentType == message.ContentType, Topic == message.Type, Durable == true, and Message == nil.
  • +
  • Run tests, confirm failure (today the dispatcher sends Message: message.Value and leaves Payload/MessageType/ContentType/Topic blank).
  • +
  • 🤝 Edit sqladapter/dispatcher.go:55-71: drop the TODO comment block; change the messaging.Dispatch literal to pass Durable, MessageType, ContentType, Payload, and Topic from the *harness.Message; add a one-line comment explaining Topic = Type — see Your Turn for the full spec.
  • +
  • Run tests, confirm green. Update TestDispatch_PublishesAndMarksDispatched so its published[0].Message assertion is replaced with assertions on Payload/MessageType/ContentType/Topic.
  • +
  • Confirm TestDispatch_PublishFails_ReturnsErrorWithoutMarkingDispatched and TestDispatch_NoMessages_NoOp still pass.
  • +
+ +

Phase 3: Recovery regression test + comment (red → green)

+
    +
  • Add TestRecover_PublishesDispatchWithTopicMessageTypeAndPayload: seed an undispatched row with type='order-received' and a JSON payload; run Recover; assert the resulting messaging.Dispatch has Topic == "order-received", MessageType == "order-received", ContentType == "application/json", Payload == <the row's payload bytes>, Durable == true, and Message == nil.
  • +
  • Run the test. With Phase 2 in place, this test should already pass (recovery's *harness.Message is well-formed and the new dispatcher reads it correctly). If it fails, that is the right reason to fix it before continuing.
  • +
  • Edit sqladapter/recovery.go:42-47: replace the inline framing of ContentType: "application/json" with a comment stating the assumption — the project's serializer is JSON; the durable bytes are recorded without a content-type column, so recovery carries that assumption forward.
  • +
  • Run tests, confirm green. Confirm TestRecover_NoOrphans_NoOp, TestRecover_DispatchesUndispatchedRowsInIDOrder, TestRecover_PassesPayloadAndTypeIntoMessage, TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll, and TestRecover_RowCountIsMultipleOfBatchSize_FlushesUniformBatches all still pass.
  • +
+ +

Phase 4: Full verification

+
    +
  • Run make test (fmt, vet, -race, coverage). Confirm green and that handlers/harness and handlers/harness/sqladapter coverage have not regressed.
  • +
  • Re-read the diff against the CLAUDE.md Go conventions: receiver named this; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line.
  • +
  • Sanity-check the diffs against the three defects: (a) no double encode — dispatcher passes pre-encoded bytes; connector encoder short-circuits on len(Payload) > 0; (b) ContentType propagates in-process from serialization → message → dispatch; (c) recovery's *harness.Message now flows through the dispatcher to a well-formed broker dispatch (no more ErrEmptyDispatchTopic).
  • +
  • Confirm package doc comment in handlers/harness/sqladapter/dispatcher.go still accurately describes the columns it depends on (id, dispatched, type, payload); it has not changed.
  • +
+ + + From 778e02d30d2ce1fd59c066561ce60f18bf2c31e7 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 2 Jun 2026 14:40:37 -0600 Subject: [PATCH 31/50] Implement proposal to resolve ownership of encoding. --- ...29-proposal-dispatch-content-encoding.html | 38 +++++++++---------- handlers/harness/02_serialization.go | 2 + handlers/harness/02_serialization_test.go | 18 +++++++++ handlers/harness/config.go | 1 + handlers/harness/contracts.go | 1 + handlers/harness/pipeline_test.go | 2 + handlers/harness/sqladapter/dispatcher.go | 13 +++---- .../harness/sqladapter/dispatcher_test.go | 34 +++++++++++++++-- handlers/harness/sqladapter/recovery.go | 2 +- handlers/harness/sqladapter/recovery_test.go | 16 ++++++++ 10 files changed, 96 insertions(+), 31 deletions(-) diff --git a/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html b/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html index 0b6a3a0..935e3bc 100644 --- a/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html +++ b/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html @@ -297,37 +297,37 @@

Implementation Checklist

Phase 1: ContentType on the internal serializer interface (red → green)

    -
  • Extend 02_serialization_test.go fixture: add ContentType() string { return "test/content-type" } and a new test TestSerializesEachResultValueIntoContent_PopulatesContentTypeOnSuccess asserting units[0].results[0].ContentType == "test/content-type".
  • -
  • Run tests, confirm failure (assertion fails: success path leaves ContentType empty).
  • -
  • Extend serializer interface in handlers/harness/contracts.go to include ContentType() string.
  • -
  • Add ContentType() string { return "" } to nop in config.go so the default still satisfies the interface.
  • -
  • In 02_serialization.go, on the success branch, set message.ContentType = this.serializer.ContentType(). Leave the fallback branch's existing "go fmt.Sprintf(%#v)" assignment alone.
  • -
  • Run tests, confirm green. Verify the fallback test (TestSerializerErrorIsTracked_FallbackToFmtSprintfEncoding) still asserts the "go fmt.Sprintf(%#v)" ContentType.
  • +
  • Extend 02_serialization_test.go fixture: add ContentType() string { return "test/content-type" } and a new test TestSerializesEachResultValueIntoContent_PopulatesContentTypeOnSuccess asserting units[0].results[0].ContentType == "test/content-type".
  • +
  • Run tests, confirm failure (assertion fails: success path leaves ContentType empty).
  • +
  • Extend serializer interface in handlers/harness/contracts.go to include ContentType() string.
  • +
  • Add ContentType() string { return "" } to nop in config.go so the default still satisfies the interface.
  • +
  • In 02_serialization.go, on the success branch, set message.ContentType = this.serializer.ContentType(). Leave the fallback branch's existing "go fmt.Sprintf(%#v)" assignment alone.
  • +
  • Run tests, confirm green. Verify the fallback test (TestSerializerErrorIsTracked_FallbackToFmtSprintfEncoding) still asserts the "go fmt.Sprintf(%#v)" ContentType.

Phase 2: Dispatcher pass-through (red → green)

    -
  • Add TestDispatch_PublishesPreEncodedPayloadAndMetadata to dispatcher_test.go: extend the seedMessage helper (or inline setup) so the *harness.Message carries Type, ContentType (e.g. "application/json"), and a pre-encoded Content buffer. Assert the recorded messaging.Dispatch has Payload == message.Content.Bytes(), MessageType == message.Type, ContentType == message.ContentType, Topic == message.Type, Durable == true, and Message == nil.
  • -
  • Run tests, confirm failure (today the dispatcher sends Message: message.Value and leaves Payload/MessageType/ContentType/Topic blank).
  • -
  • 🤝 Edit sqladapter/dispatcher.go:55-71: drop the TODO comment block; change the messaging.Dispatch literal to pass Durable, MessageType, ContentType, Payload, and Topic from the *harness.Message; add a one-line comment explaining Topic = Type — see Your Turn for the full spec.
  • -
  • Run tests, confirm green. Update TestDispatch_PublishesAndMarksDispatched so its published[0].Message assertion is replaced with assertions on Payload/MessageType/ContentType/Topic.
  • -
  • Confirm TestDispatch_PublishFails_ReturnsErrorWithoutMarkingDispatched and TestDispatch_NoMessages_NoOp still pass.
  • +
  • Add TestDispatch_PublishesPreEncodedPayloadAndMetadata to dispatcher_test.go: extend the seedMessage helper (or inline setup) so the *harness.Message carries Type, ContentType (e.g. "application/json"), and a pre-encoded Content buffer. Assert the recorded messaging.Dispatch has Payload == message.Content.Bytes(), MessageType == message.Type, ContentType == message.ContentType, Topic == message.Type, Durable == true, and Message == nil.
  • +
  • Run tests, confirm failure (today the dispatcher sends Message: message.Value and leaves Payload/MessageType/ContentType/Topic blank).
  • +
  • 🤝 Edit sqladapter/dispatcher.go:55-71: drop the TODO comment block; change the messaging.Dispatch literal to pass Durable, MessageType, ContentType, Payload, and Topic from the *harness.Message; add a one-line comment explaining Topic = Type — see Your Turn for the full spec.
  • +
  • Run tests, confirm green. Update TestDispatch_PublishesAndMarksDispatched so its published[0].Message assertion is replaced with assertions on Payload/MessageType/ContentType/Topic.
  • +
  • Confirm TestDispatch_PublishFails_ReturnsErrorWithoutMarkingDispatched and TestDispatch_NoMessages_NoOp still pass.

Phase 3: Recovery regression test + comment (red → green)

    -
  • Add TestRecover_PublishesDispatchWithTopicMessageTypeAndPayload: seed an undispatched row with type='order-received' and a JSON payload; run Recover; assert the resulting messaging.Dispatch has Topic == "order-received", MessageType == "order-received", ContentType == "application/json", Payload == <the row's payload bytes>, Durable == true, and Message == nil.
  • -
  • Run the test. With Phase 2 in place, this test should already pass (recovery's *harness.Message is well-formed and the new dispatcher reads it correctly). If it fails, that is the right reason to fix it before continuing.
  • -
  • Edit sqladapter/recovery.go:42-47: replace the inline framing of ContentType: "application/json" with a comment stating the assumption — the project's serializer is JSON; the durable bytes are recorded without a content-type column, so recovery carries that assumption forward.
  • -
  • Run tests, confirm green. Confirm TestRecover_NoOrphans_NoOp, TestRecover_DispatchesUndispatchedRowsInIDOrder, TestRecover_PassesPayloadAndTypeIntoMessage, TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll, and TestRecover_RowCountIsMultipleOfBatchSize_FlushesUniformBatches all still pass.
  • +
  • Add TestRecover_PublishesDispatchWithTopicMessageTypeAndPayload: seed an undispatched row with type='order-received' and a JSON payload; run Recover; assert the resulting messaging.Dispatch has Topic == "order-received", MessageType == "order-received", ContentType == "application/json", Payload == <the row's payload bytes>, Durable == true, and Message == nil.
  • +
  • Run the test. With Phase 2 in place, this test should already pass (recovery's *harness.Message is well-formed and the new dispatcher reads it correctly). If it fails, that is the right reason to fix it before continuing.
  • +
  • Edit sqladapter/recovery.go:42-47: replace the inline framing of ContentType: "application/json" with a comment stating the assumption — the project's serializer is JSON; the durable bytes are recorded without a content-type column, so recovery carries that assumption forward.
  • +
  • Run tests, confirm green. Confirm TestRecover_NoOrphans_NoOp, TestRecover_DispatchesUndispatchedRowsInIDOrder, TestRecover_PassesPayloadAndTypeIntoMessage, TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll, and TestRecover_RowCountIsMultipleOfBatchSize_FlushesUniformBatches all still pass.

Phase 4: Full verification

    -
  • Run make test (fmt, vet, -race, coverage). Confirm green and that handlers/harness and handlers/harness/sqladapter coverage have not regressed.
  • -
  • Re-read the diff against the CLAUDE.md Go conventions: receiver named this; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line.
  • -
  • Sanity-check the diffs against the three defects: (a) no double encode — dispatcher passes pre-encoded bytes; connector encoder short-circuits on len(Payload) > 0; (b) ContentType propagates in-process from serialization → message → dispatch; (c) recovery's *harness.Message now flows through the dispatcher to a well-formed broker dispatch (no more ErrEmptyDispatchTopic).
  • -
  • Confirm package doc comment in handlers/harness/sqladapter/dispatcher.go still accurately describes the columns it depends on (id, dispatched, type, payload); it has not changed.
  • +
  • Run make test (fmt, vet, -race, coverage). Confirm green and that handlers/harness and handlers/harness/sqladapter coverage have not regressed.
  • +
  • Re-read the diff against the CLAUDE.md Go conventions: receiver named this; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line.
  • +
  • Sanity-check the diffs against the three defects: (a) no double encode — dispatcher passes pre-encoded bytes; connector encoder short-circuits on len(Payload) > 0; (b) ContentType propagates in-process from serialization → message → dispatch; (c) recovery's *harness.Message now flows through the dispatcher to a well-formed broker dispatch (no more ErrEmptyDispatchTopic).
  • +
  • Confirm package doc comment in handlers/harness/sqladapter/dispatcher.go still accurately describes the columns it depends on (id, dispatched, type, payload); it has not changed.
diff --git a/handlers/harness/02_serialization.go b/handlers/harness/02_serialization.go index d126803..5d773ef 100644 --- a/handlers/harness/02_serialization.go +++ b/handlers/harness/02_serialization.go @@ -35,6 +35,8 @@ func (this *serialization) Listen() { message.Content.Reset() message.ContentType = "go fmt.Sprintf(%#v)" _, _ = fmt.Fprintf(message.Content, "%#v", message.Value) // Not JSON, but it will have to do... + } else { + message.ContentType = this.serializer.ContentType() } failure.Error = nil failure.Value = nil diff --git a/handlers/harness/02_serialization_test.go b/handlers/harness/02_serialization_test.go index 53ae893..424c2a8 100644 --- a/handlers/harness/02_serialization_test.go +++ b/handlers/harness/02_serialization_test.go @@ -52,6 +52,10 @@ func (this *SerializationFixture) Serialize(out io.Writer, in any) error { return nil } +func (this *SerializationFixture) ContentType() string { + return "test/content-type" +} + func (this *SerializationFixture) drain() (results []*unitOfWork) { for unit := range this.output { results = append(results, unit) @@ -111,6 +115,20 @@ func (this *SerializationFixture) TestClosedInputClosesOutput() { this.So(this.tracked, should.BeEmpty) } +func (this *SerializationFixture) TestSerializesEachResultValueIntoContent_PopulatesContentTypeOnSuccess() { + unit := &unitOfWork{results: []*Message{ + {Value: "hello", Content: bytes.NewBuffer(nil)}, + }} + this.input <- unit + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(units[0].results[0].ContentType, should.Equal, "test/content-type") +} + func (this *SerializationFixture) TestSerializerErrorIsTracked_FallbackToFmtSprintfEncoding() { type TestMessage struct { Value string `json:"value"` diff --git a/handlers/harness/config.go b/handlers/harness/config.go index ab59072..cb31159 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -110,5 +110,6 @@ type nop struct{} func (nop) Track(any) {} func (nop) Serialize(io.Writer, any) error { return nil } +func (nop) ContentType() string { return "" } func (nop) Write(context.Context, ...any) error { return nil } func (nop) Dispatch(context.Context, ...any) error { return nil } diff --git a/handlers/harness/contracts.go b/handlers/harness/contracts.go index e1903d9..ce45d0d 100644 --- a/handlers/harness/contracts.go +++ b/handlers/harness/contracts.go @@ -32,6 +32,7 @@ type ( } serializer interface { Serialize(out io.Writer, in any) error + ContentType() string } ) diff --git a/handlers/harness/pipeline_test.go b/handlers/harness/pipeline_test.go index 295998e..6dfa13f 100644 --- a/handlers/harness/pipeline_test.go +++ b/handlers/harness/pipeline_test.go @@ -79,6 +79,8 @@ func (this *PipelineFixture) Serialize(out io.Writer, _ any) error { return nil } +func (this *PipelineFixture) ContentType() string { return "" } + func (this *PipelineFixture) Write(ctx context.Context, messages ...any) error { this.So(ctx.Value("testing"), should.Equal, this.Name()) this.writeLock.Lock() diff --git a/handlers/harness/sqladapter/dispatcher.go b/handlers/harness/sqladapter/dispatcher.go index ff93e2f..4b30dab 100644 --- a/handlers/harness/sqladapter/dispatcher.go +++ b/handlers/harness/sqladapter/dispatcher.go @@ -55,15 +55,12 @@ func (this *Dispatcher) publish(ctx context.Context, messages []any) error { dispatches := make([]messaging.Dispatch, 0, len(messages)) // TODO: reuse slice, pool dispatch struct for _, raw := range messages { message := raw.(*harness.Message) - // TODO: dedupe encoding work. The harness Serialization stage already encoded message.Value - // into message.Content for the Messages.payload column; passing message.Value here causes the - // transport connector's serialization layer to encode it a second time for the RMQ payload. - // Either pass the pre-encoded bytes through Dispatch.Payload/MessageType/ContentType and skip - // the connector's serialization for this writer, or drop the harness Serialization stage and - // let the connector own all encoding. dispatches = append(dispatches, messaging.Dispatch{ - Durable: true, - Message: message.Value, + Durable: true, + MessageType: message.Type, + ContentType: message.ContentType, + Payload: message.Content.Bytes(), + Topic: message.Type, // When payload is populated, the connector's encoder skips setting the topic. }) } _, err = writer.Write(ctx, dispatches...) diff --git a/handlers/harness/sqladapter/dispatcher_test.go b/handlers/harness/sqladapter/dispatcher_test.go index 8e848ae..4d99b27 100644 --- a/handlers/harness/sqladapter/dispatcher_test.go +++ b/handlers/harness/sqladapter/dispatcher_test.go @@ -1,6 +1,7 @@ package sqladapter import ( + "bytes" "context" "database/sql" "errors" @@ -53,7 +54,29 @@ func (this *DispatcherFixture) seedMessage(value any) *harness.Message { this.So(err, should.BeNil) id, err := result.LastInsertId() this.So(err, should.BeNil) - return &harness.Message{ID: uint64(id), Value: value} + return &harness.Message{ + ID: uint64(id), + Type: "order-received", + ContentType: "application/json", + Content: bytes.NewBufferString(`{}`), + Value: value, + } +} + +func (this *DispatcherFixture) TestDispatch_PublishesPreEncodedPayloadAndMetadata() { + message := this.seedMessage(dispatcherTestEvent{AccountID: 1, OrderID: 2}) + + err := this.subject.Dispatch(context.Background(), message) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 1) + published := this.connector.published[0] + this.So(published.Payload, should.Equal, message.Content.Bytes()) + this.So(published.MessageType, should.Equal, message.Type) + this.So(published.ContentType, should.Equal, message.ContentType) + this.So(published.Topic, should.Equal, message.Type) + this.So(published.Durable, should.BeTrue) + this.So(published.Message, should.BeNil) } func (this *DispatcherFixture) TestDispatch_PublishesAndMarksDispatched() { @@ -64,8 +87,13 @@ func (this *DispatcherFixture) TestDispatch_PublishesAndMarksDispatched() { this.So(err, should.BeNil) this.So(len(this.connector.published), should.Equal, 1) - this.So(this.connector.published[0].Message, should.Equal, event) - this.So(this.connector.published[0].Durable, should.BeTrue) + published := this.connector.published[0] + this.So(published.Payload, should.Equal, message.Content.Bytes()) + this.So(published.MessageType, should.Equal, message.Type) + this.So(published.ContentType, should.Equal, message.ContentType) + this.So(published.Topic, should.Equal, message.Type) + this.So(published.Durable, should.BeTrue) + this.So(published.Message, should.BeNil) this.So(this.dispatchedTimestamp(message.ID), should.NOT.BeNil) } diff --git a/handlers/harness/sqladapter/recovery.go b/handlers/harness/sqladapter/recovery.go index 84c1e14..2cdde27 100644 --- a/handlers/harness/sqladapter/recovery.go +++ b/handlers/harness/sqladapter/recovery.go @@ -43,7 +43,7 @@ func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger ID: id, Type: typeName, Content: bytes.NewBuffer(payload), - ContentType: "application/json", + ContentType: "application/json", // stored bytes are JSON; no content_type column exists to record otherwise }) if len(messages) >= batchSize { err := dispatcher.Dispatch(ctx, messages...) diff --git a/handlers/harness/sqladapter/recovery_test.go b/handlers/harness/sqladapter/recovery_test.go index d6ae4d4..3add080 100644 --- a/handlers/harness/sqladapter/recovery_test.go +++ b/handlers/harness/sqladapter/recovery_test.go @@ -87,6 +87,22 @@ func (this *RecoveryFixture) TestRecover_PassesPayloadAndTypeIntoMessage() { this.So(dispatch.Durable, should.BeTrue) } +func (this *RecoveryFixture) TestRecover_PublishesDispatchWithTopicMessageTypeAndPayload() { + this.seedUndispatched("order-received", `{"order":1}`) + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), 1024) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 1) + dispatch := this.connector.published[0] + this.So(dispatch.Topic, should.Equal, "order-received") + this.So(dispatch.MessageType, should.Equal, "order-received") + this.So(dispatch.ContentType, should.Equal, "application/json") + this.So(dispatch.Payload, should.Equal, []byte(`{"order":1}`)) + this.So(dispatch.Durable, should.BeTrue) + this.So(dispatch.Message, should.BeNil) +} + func (this *RecoveryFixture) TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll() { const batchSize = 3 const total = 7 // 3 + 3 + 1 From bf80c8ba20113bb4c68033a231971862d18019db Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 2 Jun 2026 15:30:38 -0600 Subject: [PATCH 32/50] Commented and simplified a few tricky areas. --- doc/docker-compose.yml | 2 +- ...h-scan-master-vs-mikewhat_new-harness.html | 146 ++++++++++++++++++ handlers/harness/00_entrypoint.go | 12 +- handlers/harness/00_entrypoint_test.go | 2 - handlers/harness/03_persistence.go | 4 +- handlers/harness/05_broadcast.go | 7 +- handlers/harness/contracts.go | 1 - handlers/harness/fanout.go | 6 +- handlers/harness/sqladapter/recovery.go | 10 +- handlers/harness/sqladapter/testdb_test.go | 4 +- handlers/harness/sqladapter/writer.go | 3 + 11 files changed, 175 insertions(+), 22 deletions(-) create mode 100644 doc/work-sessions/2026/2026-06-02_14-57-17-branch-scan-master-vs-mikewhat_new-harness.html diff --git a/doc/docker-compose.yml b/doc/docker-compose.yml index 0e42119..7903147 100644 --- a/doc/docker-compose.yml +++ b/doc/docker-compose.yml @@ -3,7 +3,7 @@ services: image: mysql:8.0 environment: MYSQL_ALLOW_EMPTY_PASSWORD: 1 - MYSQL_DATABASE: accounting + MYSQL_DATABASE: messaging ports: - 3306:3306 healthcheck: diff --git a/doc/work-sessions/2026/2026-06-02_14-57-17-branch-scan-master-vs-mikewhat_new-harness.html b/doc/work-sessions/2026/2026-06-02_14-57-17-branch-scan-master-vs-mikewhat_new-harness.html new file mode 100644 index 0000000..6496c80 --- /dev/null +++ b/doc/work-sessions/2026/2026-06-02_14-57-17-branch-scan-master-vs-mikewhat_new-harness.html @@ -0,0 +1,146 @@ + + + + +Branch Scan: master vs mikewhat/new-harness + + + +

Branch Scan: master vs mikewhat/new-harness

+ +

Summary

+

+ This branch introduces a new handlers/harness package: a staged, store-and-forward + message-handling pipeline composed of seven goroutine stages connected by buffered channels + (entrypoint → execution → serialization → persistence → completion → broadcast → terminal). + It also adds handlers/harness/sqladapter, a reference MySQL implementation of the + Writer and Dispatcher interfaces, backed by a new Messages + schema in doc/mysql/schema.sql. + Supporting infrastructure includes a Docker Compose file for local MySQL testing, new + Makefile targets for database-backed integration tests, and updated go.mod/ + go.sum adding github.com/go-sql-driver/mysql and + github.com/smarty/gunit/v2. + All 30 commits are additions only — no existing code was modified. + Test coverage is extensive: every pipeline stage, the router/scanner, the admission adapter, + the SQL writer, dispatcher, and recovery function all have unit or integration tests. +

+ +

Changed Files

+
    +
  • Makefile — adds test.db and test.db.local targets for running integration tests with and without a local Docker MySQL instance.
  • +
  • doc/docker-compose.yml — MySQL 8.0 service definition with a health check used by test.db.local.
  • +
  • doc/mysql/schema.sql (renamed from sqlmq/_schema_mysql.sql) — Messages table schema plus a unique index on (dispatched, id).
  • +
  • go.mod / go.sum — adds github.com/go-sql-driver/mysql v1.10.0, github.com/smarty/gunit/v2 v2.1.0, and their transitive dependency filippo.io/edwards25519.
  • +
  • handlers/harness/contracts.go — exported interfaces (Writer, Dispatcher, Monitor), unexported internal interfaces, all monitor observation types, sentinel errors, and shared value types (batch, unitOfWork).
  • +
  • handlers/harness/message.go — exported Message struct carrying id, value, type, serialized content, content-type, encoding, and timestamps.
  • +
  • handlers/harness/config.go — package-level doc comment; New(ctx, options...) constructor; Options singleton with typed option setters; nop zero-value implementation of all collaborator interfaces.
  • +
  • handlers/harness/pipeline.gobuild(ctx, cfg) wires all stages and returns the handler+listeners.
  • +
  • handlers/harness/00_entrypoint.goentrypoint: implements messaging.Handler and io.Closer; exposes Handle (blocks until durable), await (context-cancellable), admit (load-shedding check), and Listen (blocks until closed).
  • +
  • handlers/harness/01_execution.goexecution: reads batches, calls executor per message, flushes to unitOfWork at max size.
  • +
  • handlers/harness/02_serialization.goserialization: serializes each result value; on error falls back to fmt.Sprintf("%#v") and tracks SerializationError.
  • +
  • handlers/harness/03_persistence.gopersistence: writes results to Writer, retries on failure; drops unit if context is cancelled before success.
  • +
  • handlers/harness/04_completion.gocompletion: fires all completion callbacks (acknowledging upstream batches) before forwarding.
  • +
  • handlers/harness/05_broadcast.gobroadcast: dispatches results to Dispatcher, retries on failure; forwards unit even if abandoned (context cancelled).
  • +
  • handlers/harness/06_terminal.goterminal: drains the final channel to allow upstream close propagation.
  • +
  • handlers/harness/admission.goAdmission HTTP middleware wrapping the load-shedding check; AsHTTPHandler adapter bridging awaiter to messaging.Handler.
  • +
  • handlers/harness/routing.go + scanner.gorouter and scan: reflective discovery of Execute*/Apply* methods on registered domain types; self-applicator exclusion logic.
  • +
  • handlers/harness/fanout.gonewFanOut + fanIn: fan-out serialization workers with merged output.
  • +
  • handlers/harness/pool.go — generic poolT[T] wrapper around sync.Pool.
  • +
  • handlers/harness/retry.gowait(ctx, d) helper: sleeps for d or returns early on context cancellation.
  • +
  • handlers/harness/*_test.go — unit tests for every stage, the router, the pipeline integration, config defaults, and the admission middleware.
  • +
  • handlers/harness/sqladapter/writer.goWriter: transactional INSERT into Messages, populates Message.ID from auto-increment; supports a legacyWrite escape hatch within the same transaction.
  • +
  • handlers/harness/sqladapter/dispatcher.goDispatcher: publishes pre-encoded payloads via messaging.Connector, then marks rows dispatched.
  • +
  • handlers/harness/sqladapter/recovery.goRecover: at-startup scan for undispatched rows, re-dispatches in pages.
  • +
  • handlers/harness/sqladapter/*_test.go — integration tests for Writer, Dispatcher, and Recover against a real MySQL database.
  • +
  • doc/work-sessions/2026/ (proposals and proposals-as-HTML) — five research/proposal documents tracking the design iteration for this feature (not production code).
  • +
+ +

Concerns

+
    +
  • + Asymmetric retry semantics between persistence and broadcast. + persistence.store() returns false and skips forwarding the unit if the + context is cancelled — this means the batch is not acknowledged and the MQ will redeliver. + broadcast.dispatch() on the other hand always forwards the unit even when abandoned + (BroadcastAbandoned), meaning the batch is acknowledged via + completion even though dispatch was skipped. The comment in 03_persistence.go + explicitly notes "do NOT forward (no ack); MQ redelivers", but there is no equivalent comment in + 05_broadcast.go explaining the intentional difference. If this asymmetry is deliberate + (a "at-least-once write, best-effort dispatch" guarantee), it should be documented prominently + before merge — it is a subtle and consequential contract that future maintainers will need to + understand. +
  • +
  • + Unchecked waiter.Put after Handle returns. + In entrypoint.Handle (00_entrypoint.go:70), defer this.waiters.Put(waiter) + recycles the waiter immediately after waiter.Wait() returns. This is correct because + Handle does not honor context cancellation and always waits for complete(). + However the method comment does not state this invariant. The contrast with await's + careful non-put-on-departure logic (well-commented) makes Handle's unconditional defer + look like a potential oversight to a reader. A brief comment on Handle explaining + "Handle ignores context cancellation and always waits — safe to defer Put here" would close this + gap. +
  • +
  • + sqladapter.Recover has a hardcoded ContentType. + In sqladapter/recovery.go:46, recovered messages are stamped with + ContentType: "application/json" unconditionally, with a comment noting + "no content_type column exists to record otherwise". If the schema ever gains a + content_type column, this default will silently misdeclare non-JSON payloads. + The TODO: make this a Listener and TODO: retry w/ backoff comments + confirm this code is not intended to be its final form; however, the hardcoded content-type is + a data-correctness issue that should be tracked explicitly (e.g., a TODO or a schema change), + not just left as a silent assumption. +
  • +
  • + Stride-based ID assignment in sqladapter.Writer relies on fragile MySQL behavior. + sqladapter/writer.go:84–89 assigns Message.ID values as + firstID + i * stride, citing the MySQL docs that LAST_INSERT_ID() + returns the first auto-increment value of a multi-row insert. This is only reliable when + innodb_autoinc_lock_mode = 1 (the default in MySQL 5.x) or when + auto_increment_increment = stride. Under MySQL 8.x with + innodb_autoinc_lock_mode = 2 (the new default), IDs for a bulk insert are + guaranteed to be allocated in sequence, but this is internal behavior — the MySQL manual + explicitly warns that applications should not depend on consecutive IDs from a single INSERT + when in interleaved lock mode. The existing comment and the stride feature are clearly + deliberate; adding a comment documenting the MySQL lock-mode requirement would prevent a + future breakage that would be difficult to diagnose. +
  • +
  • + No upper bound on retry loops in persistence and broadcast. + Both persistence.store() and broadcast.dispatch() loop forever until + success or context cancellation. The commit message for 574396f ("Bound the + unbounded retry loops") and the inline TODO: exponential back-off w/ jitter + comments confirm this is a known gap. This is an acknowledged TODO, but it is worth flagging + in review since an unresponsive Writer or Dispatcher with a live context will hold a pipeline + stage open indefinitely, blocking all downstream acknowledgements. +
  • +
  • + fanIn.Listen closes the output channel before all goroutines have drained. + In fanout.go, fanIn.Listen has: +
    defer close(this.output)
    +defer wg.Wait()
    + Deferred calls execute in LIFO order, so wg.Wait() runs before + close(this.output) — which is the correct and intended behavior. However, the + ordering is non-obvious and a future edit (e.g., swapping the defer lines) would silently + introduce a race where the output channel is closed before all goroutines have finished + forwarding to it. Consider merging into a single explicit sequence at the end of the function + body, or adding a comment documenting the required defer order. +
  • +
  • + Docker Compose service creates a database named accounting, but tests use messaging_harness_test. + doc/docker-compose.yml sets MYSQL_DATABASE: accounting. + testdb_test.go ignores this and creates/drops its own messaging_harness_test + schema. The accounting database is never used and its presence may confuse readers. + It is probably a copy-paste artifact and should either be removed or renamed to something neutral + (e.g., MYSQL_DATABASE: harness_bootstrap). +
  • +
+ + diff --git a/handlers/harness/00_entrypoint.go b/handlers/harness/00_entrypoint.go index 7e57007..3ee597e 100644 --- a/handlers/harness/00_entrypoint.go +++ b/handlers/harness/00_entrypoint.go @@ -28,11 +28,10 @@ func newEntrypoint(monitor Monitor, work chan *batch, shedThreshold float64) *en } } -func (this *entrypoint) prepare(ctx context.Context, messages ...any) (waiter *sync.WaitGroup, batch *batch) { +func (this *entrypoint) prepare(messages ...any) (waiter *sync.WaitGroup, batch *batch) { waiter = this.waiters.Get() waiter.Add(1) batch = this.batches.Get() - batch.ctx = ctx batch.messages = messages batch.complete = func() { waiter.Done() @@ -53,21 +52,20 @@ func (this *entrypoint) waiterDone(waiter *sync.WaitGroup) (done chan struct{}) return done } -func (this *entrypoint) Handle(ctx context.Context, messages ...any) { +func (this *entrypoint) Handle(_ context.Context, messages ...any) { this.lock.RLock() if this.closed { this.lock.RUnlock() return } - waiter, item := this.prepare(ctx, messages...) - defer this.waiters.Put(waiter) - + waiter, item := this.prepare(messages...) this.work <- item this.lock.RUnlock() this.monitor.Track(batchInFlight) waiter.Wait() + this.waiters.Put(waiter) } func (this *entrypoint) await(ctx context.Context, message any) { @@ -77,7 +75,7 @@ func (this *entrypoint) await(ctx context.Context, message any) { return } - waiter, batch := this.prepare(ctx, message) + waiter, batch := this.prepare(message) select { case this.work <- batch: diff --git a/handlers/harness/00_entrypoint_test.go b/handlers/harness/00_entrypoint_test.go index c319675..ac6b154 100644 --- a/handlers/harness/00_entrypoint_test.go +++ b/handlers/harness/00_entrypoint_test.go @@ -49,7 +49,6 @@ func (this *EntrypointFixture) TestHandlePushesBatchAndBlocksUntilCompletion() { }() item := <-this.work - this.So(item.ctx.Value("testing"), should.Equal, this.Name()) this.So(item.messages, should.Equal, []any{"msg-1", "msg-2"}) select { @@ -74,7 +73,6 @@ func (this *EntrypointFixture) TestHandleSerializesMultipleConcurrentCalls() { for range 3 { item := <-this.work - this.So(item.ctx.Value("testing"), should.Equal, this.Name()) item.complete() <-done } diff --git a/handlers/harness/03_persistence.go b/handlers/harness/03_persistence.go index f213189..e0a9a82 100644 --- a/handlers/harness/03_persistence.go +++ b/handlers/harness/03_persistence.go @@ -52,7 +52,9 @@ func (this *persistence) store() (stored bool) { failure.Attempt = attempt failure.Error = fmt.Errorf("%w: %w", ErrPersistence, err) this.monitor.Track(failure) - if this.wait(this.ctx, time.Second) != nil { // TODO: exponential back-off w/ jitter + // Retries forever (until the process restarts) unless the context is cancelled. + // TODO: exponential back-off w/ jitter + if this.wait(this.ctx, time.Second) != nil { this.monitor.Track(PersistenceAbandoned{Attempts: attempt}) return false } diff --git a/handlers/harness/05_broadcast.go b/handlers/harness/05_broadcast.go index 2b1fd58..eada4f6 100644 --- a/handlers/harness/05_broadcast.go +++ b/handlers/harness/05_broadcast.go @@ -36,6 +36,9 @@ func (this *broadcast) Listen() { } this.dispatch() this.buffer = this.buffer[:0] + // Unlike persistence (which drops the unit on abandonment so MQ redelivers), + // broadcast always forwards: the batch is already durably stored, so we ack + // upstream regardless of whether dispatch succeeded. this.output <- unit } } @@ -49,7 +52,9 @@ func (this *broadcast) dispatch() { failure.Attempt = attempt failure.Error = fmt.Errorf("%w: %w", ErrBroadcast, err) this.monitor.Track(failure) - if this.wait(this.ctx, time.Second) != nil { // TODO: exponential backoff w/ jitter + // Retries forever (until the process restarts) unless the context is cancelled. + // TODO: exponential backoff w/ jitter + if this.wait(this.ctx, time.Second) != nil { this.monitor.Track(BroadcastAbandoned{Attempts: attempt}) return } diff --git a/handlers/harness/contracts.go b/handlers/harness/contracts.go index ce45d0d..dac59aa 100644 --- a/handlers/harness/contracts.go +++ b/handlers/harness/contracts.go @@ -71,7 +71,6 @@ var ( type ( batch struct { - ctx context.Context messages []any complete func() } diff --git a/handlers/harness/fanout.go b/handlers/harness/fanout.go index a517216..4c85741 100644 --- a/handlers/harness/fanout.go +++ b/handlers/harness/fanout.go @@ -34,13 +34,13 @@ func newFanIn(inputs []chan *unitOfWork, output chan *unitOfWork) *fanIn { func (this *fanIn) Listen() { defer close(this.output) - var wg sync.WaitGroup - defer wg.Wait() + var waiter sync.WaitGroup for _, input := range this.inputs { - wg.Go(func() { + waiter.Go(func() { for unit := range input { this.output <- unit } }) } + waiter.Wait() } diff --git a/handlers/harness/sqladapter/recovery.go b/handlers/harness/sqladapter/recovery.go index 2cdde27..dc29b90 100644 --- a/handlers/harness/sqladapter/recovery.go +++ b/handlers/harness/sqladapter/recovery.go @@ -40,10 +40,12 @@ func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger } total++ messages = append(messages, &harness.Message{ - ID: id, - Type: typeName, - Content: bytes.NewBuffer(payload), - ContentType: "application/json", // stored bytes are JSON; no content_type column exists to record otherwise + ID: id, + Type: typeName, + Content: bytes.NewBuffer(payload), + // Hard-coded until the Messages schema gains a content_type column; + // all payloads written by Writer are JSON so this is correct for now. + ContentType: "application/json", }) if len(messages) >= batchSize { err := dispatcher.Dispatch(ctx, messages...) diff --git a/handlers/harness/sqladapter/testdb_test.go b/handlers/harness/sqladapter/testdb_test.go index 44f69f4..fc90cf6 100644 --- a/handlers/harness/sqladapter/testdb_test.go +++ b/handlers/harness/sqladapter/testdb_test.go @@ -13,10 +13,10 @@ import ( // Integration tests in this package require a local MySQL server. // -// Tests run against a throwaway schema (default `messaging_harness_test`) which +// Tests run against a throwaway schema (default `messaging`) which // is dropped and re-created before each run. -const testSchemaName = "messaging_harness_test" +const testSchemaName = "messaging" func ensureDatabaseReadiness(t *testing.T) { bootstrap, err := openDSN(buildDSN("")) diff --git a/handlers/harness/sqladapter/writer.go b/handlers/harness/sqladapter/writer.go index 1b448ba..c7ac14f 100644 --- a/handlers/harness/sqladapter/writer.go +++ b/handlers/harness/sqladapter/writer.go @@ -84,6 +84,9 @@ func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []a // https://dev.mysql.com/doc/refman/5.6/en/information-functions.html#function_last-insert-id // > If you insert multiple rows using a single INSERT statement, LAST_INSERT_ID() returns the value // > generated for the first inserted row only. + // This Writer is the sole writer to the Messages table and processes each batch sequentially + // inside a single transaction, so no other INSERT can interleave and create gaps between the + // IDs assigned to this batch. The stride-based assignment below is therefore safe. first, err := result.LastInsertId() if err != nil { return err From a82d221bf0a705eea584c025105a2fd81207db49 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Wed, 3 Jun 2026 13:14:57 -0600 Subject: [PATCH 33/50] Remove monitoring of in-flight units of work. We were monitoring the new unit in flight, but never monitoring their completion. I'm not terribly interested in this metric as I think the batch metrics are sufficient. --- handlers/harness/01_execution.go | 3 --- handlers/harness/01_execution_test.go | 4 ---- handlers/harness/contracts.go | 2 -- handlers/harness/pipeline_test.go | 12 ++++-------- 4 files changed, 4 insertions(+), 17 deletions(-) diff --git a/handlers/harness/01_execution.go b/handlers/harness/01_execution.go index 39e6862..ed655e6 100644 --- a/handlers/harness/01_execution.go +++ b/handlers/harness/01_execution.go @@ -42,10 +42,7 @@ func (this *execution) Listen() { if len(unit.completions) < this.maxUnitSize && len(this.input) > 0 { continue // more to do } - this.monitor.Track(unitOfWorkInFlight) this.output <- unit unit = nil } } - -var unitOfWorkInFlight UnitOfWorkInFlight diff --git a/handlers/harness/01_execution_test.go b/handlers/harness/01_execution_test.go index 579ad99..f4ed9d3 100644 --- a/handlers/harness/01_execution_test.go +++ b/handlers/harness/01_execution_test.go @@ -67,7 +67,6 @@ func (this *ExecutionFixture) TestSingleBatchProducesUnitOfWork() { this.So(len(units[0].results), should.Equal, 1) this.So(units[0].results[0].Value, should.Equal, "result-A") this.So(len(units[0].completions), should.Equal, 1) - this.So(this.tracked, should.Equal, []any{unitOfWorkInFlight}) } func (this *ExecutionFixture) TestUnitFlushesWhenMaxUnitSizeReached() { @@ -85,7 +84,6 @@ func (this *ExecutionFixture) TestUnitFlushesWhenMaxUnitSizeReached() { this.So(len(units[0].completions), should.Equal, 2) this.So(len(units[1].completions), should.Equal, 1) this.So(this.executeCalls, should.Equal, []any{"m1", "m2", "m3"}) - this.So(this.tracked, should.Equal, []any{unitOfWorkInFlight, unitOfWorkInFlight}) } func (this *ExecutionFixture) TestEmptyExecutorOutputProducesUnitWithNoResults() { @@ -98,7 +96,6 @@ func (this *ExecutionFixture) TestEmptyExecutorOutputProducesUnitWithNoResults() this.So(len(units), should.Equal, 1) this.So(units[0].results, should.BeEmpty) this.So(this.executeCalls, should.Equal, []any{"silent"}) - this.So(this.tracked, should.Equal, []any{unitOfWorkInFlight}) } func (this *ExecutionFixture) TestExecutorBroadcastsMultipleResults() { @@ -114,7 +111,6 @@ func (this *ExecutionFixture) TestExecutorBroadcastsMultipleResults() { this.So(units[0].results[0].Value, should.Equal, "r1") this.So(units[0].results[1].Value, should.Equal, "r2") this.So(units[0].results[2].Value, should.Equal, "r3") - this.So(this.tracked, should.Equal, []any{unitOfWorkInFlight}) } func (this *ExecutionFixture) TestClosedInputClosesOutput() { diff --git a/handlers/harness/contracts.go b/handlers/harness/contracts.go index dac59aa..64970d6 100644 --- a/handlers/harness/contracts.go +++ b/handlers/harness/contracts.go @@ -43,8 +43,6 @@ type ( BatchComplete struct{} LoadShed struct{} CallerDeparted struct{} - UnitOfWorkInFlight struct{} - UnitOfWorkComplete struct{} SerializationError struct { Value any Error error diff --git a/handlers/harness/pipeline_test.go b/handlers/harness/pipeline_test.go index 6dfa13f..ee34d87 100644 --- a/handlers/harness/pipeline_test.go +++ b/handlers/harness/pipeline_test.go @@ -107,7 +107,7 @@ func (this *PipelineFixture) Track(observation any) { this.tracked = append(this.tracked, observation) } -func (this *PipelineFixture) countTracked() (batchInFlight, batchComplete, unitInFlight int) { +func (this *PipelineFixture) countTracked() (batchInFlight, batchComplete int) { this.trackLock.Lock() defer this.trackLock.Unlock() for _, observation := range this.tracked { @@ -116,11 +116,9 @@ func (this *PipelineFixture) countTracked() (batchInFlight, batchComplete, unitI batchInFlight++ case BatchComplete: batchComplete++ - case UnitOfWorkInFlight: - unitInFlight++ } } - return batchInFlight, batchComplete, unitInFlight + return batchInFlight, batchComplete } func (this *PipelineFixture) shutdown() { @@ -148,10 +146,9 @@ func (this *PipelineFixture) TestPipelineRoutesMessageThroughExecutionPersistenc this.So(this.dispatchCalls[0][0].(*Message).Value, should.Equal, "event-A") this.So(this.dispatchCalls[0][1].(*Message).Value, should.Equal, "event-B") - batchInFlight, batchComplete, unitInFlight := this.countTracked() + batchInFlight, batchComplete := this.countTracked() this.So(batchInFlight, should.Equal, 1) this.So(batchComplete, should.Equal, 1) - this.So(unitInFlight, should.Equal, 1) } func (this *PipelineFixture) TestPipelineHandlesMultipleMessagesAcrossHandleCalls() { @@ -180,10 +177,9 @@ func (this *PipelineFixture) TestPipelineHandlesMultipleMessagesAcrossHandleCall } this.So(dispatched, should.Equal, []any{"e1", "e2", "e3"}) - batchInFlight, batchComplete, unitInFlight := this.countTracked() + batchInFlight, batchComplete := this.countTracked() this.So(batchInFlight, should.Equal, 3) this.So(batchComplete, should.Equal, 3) - this.So(unitInFlight, should.BeGreaterThanOrEqualTo, 1) } func (this *PipelineFixture) TestPipelineShutsDownWithNoTraffic() { From 39e13de68764d76def1df0e0e652df0d0545d63b Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Wed, 3 Jun 2026 13:21:14 -0600 Subject: [PATCH 34/50] Prove the wait func. --- handlers/harness/retry_test.go | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 handlers/harness/retry_test.go diff --git a/handlers/harness/retry_test.go b/handlers/harness/retry_test.go new file mode 100644 index 0000000..21ef13c --- /dev/null +++ b/handlers/harness/retry_test.go @@ -0,0 +1,41 @@ +package harness + +import ( + "context" + "testing" + "time" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestWaitFixture(t *testing.T) { + gunit.Run(new(WaitFixture), t) +} + +type WaitFixture struct { + *gunit.Fixture +} + +func (this *WaitFixture) TestContextAlreadyCancelled_ReturnImmediately() { + ctx, cancel := context.WithCancel(this.Context()) + cancel() + + started := time.Now() + err := wait(ctx, time.Hour) + ended := time.Since(started) + + this.So(err, should.Equal, context.Canceled) + this.So(ended, should.BeLessThan, time.Second) +} + +func (this *WaitFixture) TestWait() { + const duration = time.Millisecond * 5 + + started := time.Now() + err := wait(this.Context(), duration) + ended := time.Since(started) + + this.So(err, should.BeNil) + this.So(ended, should.BeGreaterThanOrEqualTo, duration) +} From eb52a8eb11ca650a068fd837c7ea8217280da2a1 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Wed, 3 Jun 2026 13:26:04 -0600 Subject: [PATCH 35/50] Annoying last little bit of test coverage for the package. --- handlers/harness/config_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/handlers/harness/config_test.go b/handlers/harness/config_test.go index f00f081..226ce6a 100644 --- a/handlers/harness/config_test.go +++ b/handlers/harness/config_test.go @@ -25,6 +25,15 @@ func (this *ConfigFixture) apply(options ...option) configuration { return cfg } +func (this *ConfigFixture) TestNop() { + var n nop + this.So(func() { n.Track(nil) }, should.NOT.Panic) + this.So(n.Serialize(nil, nil), should.BeNil) + this.So(n.ContentType(), should.BeEmpty) + this.So(n.Write(nil), should.BeNil) + this.So(n.Dispatch(nil), should.BeNil) +} + func (this *ConfigFixture) TestDefaultsPopulateCapacities() { cfg := this.apply() this.So(cfg.BatchCapacity, should.Equal, 1024) From 41f6a9df4391f7e39f8f6f99d01a288e52da60b9 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Wed, 3 Jun 2026 13:37:42 -0600 Subject: [PATCH 36/50] Increase pipeline channel buffers. --- handlers/harness/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handlers/harness/config.go b/handlers/harness/config.go index cb31159..a01ff28 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -97,7 +97,7 @@ func (singleton) defaults(options ...option) []option { Options.Writer(blank), Options.Dispatcher(blank), Options.BatchCapacity(1024), - Options.UnitCapacity(1), + Options.UnitCapacity(4), Options.UnitSize(64), Options.SerializerCount(4), Options.ShedThreshold(0.80), From 9e78b0f5aa0b8c707d213af2218b22dbb4c591de Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Wed, 3 Jun 2026 13:39:05 -0600 Subject: [PATCH 37/50] Better names for some options; godoc comment for each option. --- handlers/harness/config.go | 42 ++++++++++++++++++++++++++++----- handlers/harness/config_test.go | 6 ++--- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/handlers/harness/config.go b/handlers/harness/config.go index a01ff28..a4ba4be 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -61,30 +61,60 @@ type configuration struct { func (singleton) Types(value ...any) option { return func(this *configuration) { this.Types = value } } + +// Monitor sets the Monitor collaborator that receives pipeline observations +// (BatchInFlight, BatchComplete, LoadShed, SerializationError, etc.). func (singleton) Monitor(value Monitor) option { return func(this *configuration) { this.Monitor = value } } + +// Serializer sets the collaborator used to encode outgoing messages into bytes. func (singleton) Serializer(value serializer) option { return func(this *configuration) { this.Serializer = value } } + +// Writer sets the collaborator that persists encoded messages (e.g. to a database or message store). func (singleton) Writer(value Writer) option { return func(this *configuration) { this.Writer = value } } + +// Dispatcher sets the collaborator that broadcasts outgoing messages to downstream consumers. func (singleton) Dispatcher(value Dispatcher) option { return func(this *configuration) { this.Dispatcher = value } } -func (singleton) BatchCapacity(value int) option { + +// BurstCapacity sets the buffer size of the channel between the entrypoint and +// execution stages. Larger values absorb more burst traffic before back-pressure +// reaches callers. Default: 1024. +func (singleton) BurstCapacity(value int) option { return func(this *configuration) { this.BatchCapacity = value } } -func (singleton) UnitCapacity(value int) option { + +// PipelineBufferCapacity sets the buffer size of the channels connecting all pipeline +// stages after execution (serialization → persistence → completion → broadcast → +// terminal). Default: 4. +func (singleton) PipelineBufferCapacity(value int) option { return func(this *configuration) { this.UnitCapacity = value } } -func (singleton) UnitSize(value int) option { + +// ExecutionUnitSize sets the maximum number of batches coalesced into a single unit of +// work before the execution stage flushes downstream. Higher values increase +// throughput at the cost of latency per batch. Default: 64. +func (singleton) ExecutionUnitSize(value int) option { return func(this *configuration) { this.UnitSize = value } } + +// SerializerCount sets the number of concurrent serialization goroutines. +// Default: 4. func (singleton) SerializerCount(value int) option { return func(this *configuration) { this.SerializerCount = value } } + +// ShedThreshold sets the load-shedding threshold as a fraction of BurstCapacity +// in the range [0, 1]. When the batch channel fill ratio meets or exceeds this +// value, new callers are refused (Admission returns 503; Handle is a no-op). +// This option only affects HTTP callers. +// Default: 0.80. func (singleton) ShedThreshold(value float64) option { return func(this *configuration) { this.ShedThreshold = value } } @@ -96,9 +126,9 @@ func (singleton) defaults(options ...option) []option { Options.Serializer(blank), Options.Writer(blank), Options.Dispatcher(blank), - Options.BatchCapacity(1024), - Options.UnitCapacity(4), - Options.UnitSize(64), + Options.BurstCapacity(1024), + Options.PipelineBufferCapacity(4), + Options.ExecutionUnitSize(64), Options.SerializerCount(4), Options.ShedThreshold(0.80), }, options...) diff --git a/handlers/harness/config_test.go b/handlers/harness/config_test.go index 226ce6a..5682c6e 100644 --- a/handlers/harness/config_test.go +++ b/handlers/harness/config_test.go @@ -58,9 +58,9 @@ func (this *ConfigFixture) TestTypesOptionStoresValuesVerbatim() { func (this *ConfigFixture) TestTunableOptionsOverrideDefaults() { cfg := this.apply( - Options.BatchCapacity(2), - Options.UnitCapacity(2), - Options.UnitSize(8), + Options.BurstCapacity(2), + Options.PipelineBufferCapacity(2), + Options.ExecutionUnitSize(8), Options.SerializerCount(3), Options.ShedThreshold(0.5), ) From 67435307c35eaaf40749d4396653bf0a8d6aba31 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Wed, 3 Jun 2026 13:42:05 -0600 Subject: [PATCH 38/50] Rename (and unexport backing fields) --- handlers/harness/config.go | 40 ++++++++++++++++----------------- handlers/harness/config_test.go | 32 +++++++++++++------------- handlers/harness/pipeline.go | 22 +++++++++--------- 3 files changed, 47 insertions(+), 47 deletions(-) diff --git a/handlers/harness/config.go b/handlers/harness/config.go index a4ba4be..c1b4795 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -44,70 +44,70 @@ type singleton struct{} type option func(*configuration) type configuration struct { - Monitor Monitor - Serializer serializer - Writer Writer - Dispatcher Dispatcher - Types []any - BatchCapacity int - UnitCapacity int - UnitSize int - SerializerCount int - ShedThreshold float64 + monitor Monitor + serializer serializer + writer Writer + dispatcher Dispatcher + types []any + burstCapacity int + pipelineBufferCapacity int + executionUnitSize int + serializerCount int + shedThreshold float64 } // Types registers the domain objects whose Execute.../Apply... methods drive // the pipeline. They are passed verbatim to newRouter(...) at build time. func (singleton) Types(value ...any) option { - return func(this *configuration) { this.Types = value } + return func(this *configuration) { this.types = value } } // Monitor sets the Monitor collaborator that receives pipeline observations // (BatchInFlight, BatchComplete, LoadShed, SerializationError, etc.). func (singleton) Monitor(value Monitor) option { - return func(this *configuration) { this.Monitor = value } + return func(this *configuration) { this.monitor = value } } // Serializer sets the collaborator used to encode outgoing messages into bytes. func (singleton) Serializer(value serializer) option { - return func(this *configuration) { this.Serializer = value } + return func(this *configuration) { this.serializer = value } } // Writer sets the collaborator that persists encoded messages (e.g. to a database or message store). func (singleton) Writer(value Writer) option { - return func(this *configuration) { this.Writer = value } + return func(this *configuration) { this.writer = value } } // Dispatcher sets the collaborator that broadcasts outgoing messages to downstream consumers. func (singleton) Dispatcher(value Dispatcher) option { - return func(this *configuration) { this.Dispatcher = value } + return func(this *configuration) { this.dispatcher = value } } // BurstCapacity sets the buffer size of the channel between the entrypoint and // execution stages. Larger values absorb more burst traffic before back-pressure // reaches callers. Default: 1024. func (singleton) BurstCapacity(value int) option { - return func(this *configuration) { this.BatchCapacity = value } + return func(this *configuration) { this.burstCapacity = value } } // PipelineBufferCapacity sets the buffer size of the channels connecting all pipeline // stages after execution (serialization → persistence → completion → broadcast → // terminal). Default: 4. func (singleton) PipelineBufferCapacity(value int) option { - return func(this *configuration) { this.UnitCapacity = value } + return func(this *configuration) { this.pipelineBufferCapacity = value } } // ExecutionUnitSize sets the maximum number of batches coalesced into a single unit of // work before the execution stage flushes downstream. Higher values increase // throughput at the cost of latency per batch. Default: 64. func (singleton) ExecutionUnitSize(value int) option { - return func(this *configuration) { this.UnitSize = value } + return func(this *configuration) { this.executionUnitSize = value } } // SerializerCount sets the number of concurrent serialization goroutines. // Default: 4. func (singleton) SerializerCount(value int) option { - return func(this *configuration) { this.SerializerCount = value } + return func(this *configuration) { this.serializerCount = value } } // ShedThreshold sets the load-shedding threshold as a fraction of BurstCapacity @@ -116,7 +116,7 @@ func (singleton) SerializerCount(value int) option { // This option only affects HTTP callers. // Default: 0.80. func (singleton) ShedThreshold(value float64) option { - return func(this *configuration) { this.ShedThreshold = value } + return func(this *configuration) { this.shedThreshold = value } } func (singleton) defaults(options ...option) []option { diff --git a/handlers/harness/config_test.go b/handlers/harness/config_test.go index 5682c6e..17f8841 100644 --- a/handlers/harness/config_test.go +++ b/handlers/harness/config_test.go @@ -36,24 +36,24 @@ func (this *ConfigFixture) TestNop() { func (this *ConfigFixture) TestDefaultsPopulateCapacities() { cfg := this.apply() - this.So(cfg.BatchCapacity, should.Equal, 1024) - this.So(cfg.UnitCapacity, should.Equal, 1) - this.So(cfg.UnitSize, should.Equal, 64) - this.So(cfg.SerializerCount, should.Equal, 4) - this.So(cfg.ShedThreshold, should.Equal, 0.80) + this.So(cfg.burstCapacity, should.Equal, 1024) + this.So(cfg.pipelineBufferCapacity, should.Equal, 1) + this.So(cfg.executionUnitSize, should.Equal, 64) + this.So(cfg.serializerCount, should.Equal, 4) + this.So(cfg.shedThreshold, should.Equal, 0.80) } func (this *ConfigFixture) TestDefaultCollaboratorsAreNop() { cfg := this.apply() - this.So(cfg.Monitor, should.Equal, nop{}) - this.So(cfg.Serializer, should.Equal, nop{}) - this.So(cfg.Writer, should.Equal, nop{}) - this.So(cfg.Dispatcher, should.Equal, nop{}) + this.So(cfg.monitor, should.Equal, nop{}) + this.So(cfg.serializer, should.Equal, nop{}) + this.So(cfg.writer, should.Equal, nop{}) + this.So(cfg.dispatcher, should.Equal, nop{}) } func (this *ConfigFixture) TestTypesOptionStoresValuesVerbatim() { cfg := this.apply(Options.Types("a", 42, struct{}{})) - this.So(cfg.Types, should.Equal, []any{"a", 42, struct{}{}}) + this.So(cfg.types, should.Equal, []any{"a", 42, struct{}{}}) } func (this *ConfigFixture) TestTunableOptionsOverrideDefaults() { @@ -64,17 +64,17 @@ func (this *ConfigFixture) TestTunableOptionsOverrideDefaults() { Options.SerializerCount(3), Options.ShedThreshold(0.5), ) - this.So(cfg.BatchCapacity, should.Equal, 2) - this.So(cfg.UnitCapacity, should.Equal, 2) - this.So(cfg.UnitSize, should.Equal, 8) - this.So(cfg.SerializerCount, should.Equal, 3) - this.So(cfg.ShedThreshold, should.Equal, 0.5) + this.So(cfg.burstCapacity, should.Equal, 2) + this.So(cfg.pipelineBufferCapacity, should.Equal, 2) + this.So(cfg.executionUnitSize, should.Equal, 8) + this.So(cfg.serializerCount, should.Equal, 3) + this.So(cfg.shedThreshold, should.Equal, 0.5) } func (this *ConfigFixture) TestCollaboratorOptionsOverrideDefaults() { recorder := &recordingMonitor{} cfg := this.apply(Options.Monitor(recorder)) - this.So(cfg.Monitor, should.Equal, recorder) + this.So(cfg.monitor, should.Equal, recorder) } func (this *ConfigFixture) TestZeroOptionsPipelineRunsInertly() { diff --git a/handlers/harness/pipeline.go b/handlers/harness/pipeline.go index 667e65f..18150d4 100644 --- a/handlers/harness/pipeline.go +++ b/handlers/harness/pipeline.go @@ -8,21 +8,21 @@ import ( func build(ctx context.Context, config configuration) (messaging.Handler, []messaging.Listener) { var ( - batches = make(chan *batch, config.BatchCapacity) - work1 = make(chan *unitOfWork, config.UnitCapacity) - work2 = make(chan *unitOfWork, config.UnitCapacity) - work3 = make(chan *unitOfWork, config.UnitCapacity) - work4 = make(chan *unitOfWork, config.UnitCapacity) - work5 = make(chan *unitOfWork, config.UnitCapacity) + batches = make(chan *batch, config.burstCapacity) + work1 = make(chan *unitOfWork, config.pipelineBufferCapacity) + work2 = make(chan *unitOfWork, config.pipelineBufferCapacity) + work3 = make(chan *unitOfWork, config.pipelineBufferCapacity) + work4 = make(chan *unitOfWork, config.pipelineBufferCapacity) + work5 = make(chan *unitOfWork, config.pipelineBufferCapacity) ) var ( - entrypoint = newEntrypoint(config.Monitor, batches, config.ShedThreshold) - executor = newExecution(config.Monitor, config.UnitSize, batches, work1, newRouter(config.Types...)) - serializers = newFanOut(serializationFactory(config.Monitor, config.Serializer), config.SerializerCount, config.UnitCapacity, work1, work2) - persistence = newPersistence(ctx, config.Monitor, work2, work3, config.Writer, wait) + entrypoint = newEntrypoint(config.monitor, batches, config.shedThreshold) + executor = newExecution(config.monitor, config.executionUnitSize, batches, work1, newRouter(config.types...)) + serializers = newFanOut(serializationFactory(config.monitor, config.serializer), config.serializerCount, config.pipelineBufferCapacity, work1, work2) + persistence = newPersistence(ctx, config.monitor, work2, work3, config.writer, wait) completion = newCompletion(work3, work4) - broadcast = newBroadcast(ctx, config.Monitor, work4, work5, config.Dispatcher, wait) + broadcast = newBroadcast(ctx, config.monitor, work4, work5, config.dispatcher, wait) terminal = newTerminal(work5) ) From b232570cb3d3cb5df6ae468b222894e8b5e740c2 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Wed, 3 Jun 2026 13:42:28 -0600 Subject: [PATCH 39/50] Fix broken test. --- handlers/harness/config_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handlers/harness/config_test.go b/handlers/harness/config_test.go index 17f8841..bf2679c 100644 --- a/handlers/harness/config_test.go +++ b/handlers/harness/config_test.go @@ -37,7 +37,7 @@ func (this *ConfigFixture) TestNop() { func (this *ConfigFixture) TestDefaultsPopulateCapacities() { cfg := this.apply() this.So(cfg.burstCapacity, should.Equal, 1024) - this.So(cfg.pipelineBufferCapacity, should.Equal, 1) + this.So(cfg.pipelineBufferCapacity, should.Equal, 4) this.So(cfg.executionUnitSize, should.Equal, 64) this.So(cfg.serializerCount, should.Equal, 4) this.So(cfg.shedThreshold, should.Equal, 0.80) From e6a5997dd7e11b8010967d01477996459ae87c2b Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Thu, 4 Jun 2026 15:12:33 -0600 Subject: [PATCH 40/50] Remove comment. It doesn't tell the whole story since there are obviously other concurrent clients. --- handlers/harness/sqladapter/writer.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/handlers/harness/sqladapter/writer.go b/handlers/harness/sqladapter/writer.go index c7ac14f..1b448ba 100644 --- a/handlers/harness/sqladapter/writer.go +++ b/handlers/harness/sqladapter/writer.go @@ -84,9 +84,6 @@ func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []a // https://dev.mysql.com/doc/refman/5.6/en/information-functions.html#function_last-insert-id // > If you insert multiple rows using a single INSERT statement, LAST_INSERT_ID() returns the value // > generated for the first inserted row only. - // This Writer is the sole writer to the Messages table and processes each batch sequentially - // inside a single transaction, so no other INSERT can interleave and create gaps between the - // IDs assigned to this batch. The stride-based assignment below is therefore safe. first, err := result.LastInsertId() if err != nil { return err From 004040a67636bb578a50d8a56d0753b20e2bf4d5 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Thu, 4 Jun 2026 15:55:45 -0600 Subject: [PATCH 41/50] More overt and integrated way to expose the entrypoint for HTTP. --- handlers/harness/admission.go | 29 +++++++++++++++------ handlers/harness/admission_test.go | 42 +++++++++++++----------------- handlers/harness/config.go | 4 +-- handlers/harness/config_test.go | 7 ++--- handlers/harness/pipeline.go | 5 ++-- handlers/harness/pipeline_test.go | 2 +- 6 files changed, 48 insertions(+), 41 deletions(-) diff --git a/handlers/harness/admission.go b/handlers/harness/admission.go index 4601730..795228f 100644 --- a/handlers/harness/admission.go +++ b/handlers/harness/admission.go @@ -14,11 +14,15 @@ type ( awaiter interface { await(ctx context.Context, message any) } + httpEntrypoint interface { + admitter + awaiter + } ) -// Admission refuses overloaded requests before the wrapped handler runs, +// admission refuses overloaded requests before the wrapped handler runs, // writing an inline 503. Wrap each mutating route with it. -func Admission(handler admitter, inner http.Handler) http.Handler { +func admission(handler admitter, inner http.Handler) http.Handler { return http.HandlerFunc(func(response http.ResponseWriter, request *http.Request) { if handler.admit() { inner.ServeHTTP(response, request) @@ -33,17 +37,26 @@ func Admission(handler admitter, inner http.Handler) http.Handler { var shedResponseBody = []byte(`{"errors":[{"message":"service overloaded"}]}`) -// AsHTTPHandler adapts the void, context-honoring await to the -// messaging.Handler the HTTP shells already depend on, so no shell (and no -// shell test) changes. -func AsHTTPHandler(handler awaiter) messaging.Handler { - return &httpAdapter{target: handler} +// HTTPAdapter adapts the entrypoint Handler for use by an http.Handler. +type HTTPAdapter interface { + // Handler is what the user-supplied http.Handler will invoke + messaging.Handler + + // HTTPHandler wraps the user-supplied http.Handler with an admission check. + HTTPHandler(inner http.Handler) (wrapped http.Handler) } type httpAdapter struct { - target awaiter + target httpEntrypoint } +func newHTTPAdapter(target httpEntrypoint) *httpAdapter { + return &httpAdapter{target} +} + +func (this *httpAdapter) HTTPHandler(inner http.Handler) http.Handler { + return admission(this.target, inner) +} func (this *httpAdapter) Handle(ctx context.Context, messages ...any) { for _, message := range messages { this.target.await(ctx, message) diff --git a/handlers/harness/admission_test.go b/handlers/harness/admission_test.go index 3a94e16..76d8b76 100644 --- a/handlers/harness/admission_test.go +++ b/handlers/harness/admission_test.go @@ -17,44 +17,37 @@ func TestAdmissionFixture(t *testing.T) { type AdmissionFixture struct { *gunit.Fixture ctx context.Context + + allow bool + ctxs []context.Context + messages []any } func (this *AdmissionFixture) Setup() { this.ctx = context.WithValue(this.Context(), "testing", this.Name()) } -type fakeAwaiter struct { - ctxs []context.Context - messages []any +func (this *AdmissionFixture) Handle(context.Context, ...any) {} +func (this *AdmissionFixture) admit() bool { + return this.allow } - -func (this *fakeAwaiter) Handle(context.Context, ...any) {} -func (this *fakeAwaiter) await(ctx context.Context, message any) { - this.ctxs = append(this.ctxs, ctx) +func (this *AdmissionFixture) await(ctx context.Context, message any) { + this.So(ctx.Value("testing"), should.Equal, this.Name()) this.messages = append(this.messages, message) } +func (this *AdmissionFixture) ServeHTTP(http.ResponseWriter, *http.Request) {} + func (this *AdmissionFixture) TestAsHTTPHandler_ForwardsSingleMessageToAwait() { - fake := &fakeAwaiter{} - AsHTTPHandler(fake).Handle(this.ctx, "x") - this.So(fake.messages, should.Equal, []any{"x"}) - this.So(fake.ctxs, should.HaveLength, 1) - this.So(fake.ctxs[0].Value("testing"), should.Equal, this.Name()) + newHTTPAdapter(this).Handle(this.ctx, "x") + this.So(this.messages, should.Equal, []any{"x"}) } func (this *AdmissionFixture) TestAsHTTPHandler_ForwardsEachMessageInOrder() { - fake := &fakeAwaiter{} - AsHTTPHandler(fake).Handle(this.ctx, "a", "b") - this.So(fake.messages, should.Equal, []any{"a", "b"}) + newHTTPAdapter(this).Handle(this.ctx, "a", "b") + this.So(this.messages, should.Equal, []any{"a", "b"}) } -type fakeAdmitter struct { - allow bool -} - -func (this *fakeAdmitter) Handle(context.Context, ...any) {} -func (this *fakeAdmitter) admit() bool { return this.allow } - func (this *AdmissionFixture) TestAdmission_PassesThroughWhenAdmitted() { var ran bool inner := http.HandlerFunc(func(response http.ResponseWriter, _ *http.Request) { @@ -65,7 +58,8 @@ func (this *AdmissionFixture) TestAdmission_PassesThroughWhenAdmitted() { recorder := httptest.NewRecorder() request := httptest.NewRequest(http.MethodPost, "/admin/orders", nil) - Admission(&fakeAdmitter{allow: true}, inner).ServeHTTP(recorder, request) + this.allow = true + newHTTPAdapter(this).HTTPHandler(inner).ServeHTTP(recorder, request) this.So(ran, should.BeTrue) this.So(recorder.Code, should.Equal, http.StatusTeapot) @@ -78,7 +72,7 @@ func (this *AdmissionFixture) TestAdmission_Writes503WhenRejected() { recorder := httptest.NewRecorder() request := httptest.NewRequest(http.MethodPost, "/admin/orders", nil) - Admission(&fakeAdmitter{allow: false}, inner).ServeHTTP(recorder, request) + newHTTPAdapter(this).HTTPHandler(inner).ServeHTTP(recorder, request) this.So(ran, should.BeFalse) this.So(recorder.Code, should.Equal, http.StatusServiceUnavailable) diff --git a/handlers/harness/config.go b/handlers/harness/config.go index c1b4795..a4167fc 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -30,7 +30,7 @@ import ( // corresponding Options.* functions. Collaborators default to a shared // no-op implementation, so omitting them produces a runnable but inert // pipeline — useful for tests, but not for production. -func New(ctx context.Context, options ...option) (messaging.Handler, []messaging.Listener) { +func New(ctx context.Context, options ...option) (HTTPAdapter, messaging.Handler, []messaging.Listener) { var cfg configuration for _, apply := range Options.defaults(options...) { apply(&cfg) @@ -112,7 +112,7 @@ func (singleton) SerializerCount(value int) option { // ShedThreshold sets the load-shedding threshold as a fraction of BurstCapacity // in the range [0, 1]. When the batch channel fill ratio meets or exceeds this -// value, new callers are refused (Admission returns 503; Handle is a no-op). +// value, new callers are refused (admission returns 503; Handle is a no-op). // This option only affects HTTP callers. // Default: 0.80. func (singleton) ShedThreshold(value float64) option { diff --git a/handlers/harness/config_test.go b/handlers/harness/config_test.go index bf2679c..387b3a2 100644 --- a/handlers/harness/config_test.go +++ b/handlers/harness/config_test.go @@ -81,7 +81,7 @@ func (this *ConfigFixture) TestZeroOptionsPipelineRunsInertly() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - handler, listeners := New(ctx) + forHTTP, forMQ, listeners := New(ctx) done := make(chan struct{}) go func() { @@ -93,8 +93,9 @@ func (this *ConfigFixture) TestZeroOptionsPipelineRunsInertly() { close(done) }() - handler.Handle(ctx, "payload") - this.So(handler.(interface{ Close() error }).Close(), should.BeNil) + forMQ.Handle(ctx, "payload") + forHTTP.Handle(ctx, "payload") + this.So(forMQ.(interface{ Close() error }).Close(), should.BeNil) <-done } diff --git a/handlers/harness/pipeline.go b/handlers/harness/pipeline.go index 18150d4..b0ed78b 100644 --- a/handlers/harness/pipeline.go +++ b/handlers/harness/pipeline.go @@ -6,7 +6,7 @@ import ( "github.com/smarty/messaging/v3" ) -func build(ctx context.Context, config configuration) (messaging.Handler, []messaging.Listener) { +func build(ctx context.Context, config configuration) (forHTTP *httpAdapter, forMQ messaging.Handler, listeners []messaging.Listener) { var ( batches = make(chan *batch, config.burstCapacity) work1 = make(chan *unitOfWork, config.pipelineBufferCapacity) @@ -26,7 +26,6 @@ func build(ctx context.Context, config configuration) (messaging.Handler, []mess terminal = newTerminal(work5) ) - var listeners []messaging.Listener listeners = append(listeners, entrypoint, executor, @@ -38,7 +37,7 @@ func build(ctx context.Context, config configuration) (messaging.Handler, []mess broadcast, terminal, ) - return entrypoint, listeners + return newHTTPAdapter(entrypoint), entrypoint, listeners } func serializationFactory(monitor Monitor, enc serializer) stationFactory { diff --git a/handlers/harness/pipeline_test.go b/handlers/harness/pipeline_test.go index ee34d87..3cf4742 100644 --- a/handlers/harness/pipeline_test.go +++ b/handlers/harness/pipeline_test.go @@ -40,7 +40,7 @@ type commandType string func (this *PipelineFixture) Setup() { this.ctx = context.WithValue(this.Context(), "testing", this.Name()) - this.handler, this.listeners = New(this.ctx, + _, this.handler, this.listeners = New(this.ctx, Options.Types(this), Options.Monitor(this), Options.Serializer(this), From d833234be2fd53ae741d25c9fcd180537ecfff11 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Thu, 4 Jun 2026 16:41:06 -0600 Subject: [PATCH 42/50] New returns a result struct with all pipeline components. --- handlers/harness/config.go | 19 ++++++++++++++++++- handlers/harness/config_test.go | 10 +++++----- handlers/harness/pipeline.go | 11 +++++++++-- handlers/harness/pipeline_test.go | 24 +++++++++++------------- 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/handlers/harness/config.go b/handlers/harness/config.go index a4167fc..83c5ea3 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -20,6 +20,7 @@ package harness import ( "context" "io" + "net/http" "github.com/smarty/messaging/v3" ) @@ -30,7 +31,7 @@ import ( // corresponding Options.* functions. Collaborators default to a shared // no-op implementation, so omitting them produces a runnable but inert // pipeline — useful for tests, but not for production. -func New(ctx context.Context, options ...option) (HTTPAdapter, messaging.Handler, []messaging.Listener) { +func New(ctx context.Context, options ...option) Pipeline { var cfg configuration for _, apply := range Options.defaults(options...) { apply(&cfg) @@ -38,6 +39,22 @@ func New(ctx context.Context, options ...option) (HTTPAdapter, messaging.Handler return build(ctx, cfg) } +type Pipeline struct { + // SheddingHTTPWrapper is meant to wrap around any http.Handler that calls SheddingEntrypoint. + // It responds with HTTP 503 in the event that the handler is backed up beyond the configured ShedThreshold. + SheddingHTTPWrapper func(http.Handler) http.Handler + + // SheddingEntrypoint is a messaging.Handler that is meant to be guarded by an admitter (such as SheddingHTTPWrapper). + SheddingEntrypoint messaging.Handler + + // BlockingEntrypoint is a messaging.Handler that will block until the results of the provided work have been durably stored. + BlockingEntrypoint messaging.Handler + + // Listeners contains each phase of the harness pipeline (serialization, persistence, broadcast, etc.). + // Each listener should be invoked on a separate goroutine by a component like github.com/smarty/dominoes. + Listeners []messaging.Listener +} + var Options singleton type singleton struct{} diff --git a/handlers/harness/config_test.go b/handlers/harness/config_test.go index 387b3a2..1bd4509 100644 --- a/handlers/harness/config_test.go +++ b/handlers/harness/config_test.go @@ -81,21 +81,21 @@ func (this *ConfigFixture) TestZeroOptionsPipelineRunsInertly() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - forHTTP, forMQ, listeners := New(ctx) + pipeline := New(ctx) done := make(chan struct{}) go func() { var wg sync.WaitGroup - for _, listener := range listeners { + for _, listener := range pipeline.Listeners { wg.Go(listener.Listen) } wg.Wait() close(done) }() - forMQ.Handle(ctx, "payload") - forHTTP.Handle(ctx, "payload") - this.So(forMQ.(interface{ Close() error }).Close(), should.BeNil) + pipeline.BlockingEntrypoint.Handle(ctx, "payload") + pipeline.SheddingEntrypoint.Handle(ctx, "payload") + this.So(pipeline.BlockingEntrypoint.(interface{ Close() error }).Close(), should.BeNil) <-done } diff --git a/handlers/harness/pipeline.go b/handlers/harness/pipeline.go index b0ed78b..7976941 100644 --- a/handlers/harness/pipeline.go +++ b/handlers/harness/pipeline.go @@ -6,7 +6,7 @@ import ( "github.com/smarty/messaging/v3" ) -func build(ctx context.Context, config configuration) (forHTTP *httpAdapter, forMQ messaging.Handler, listeners []messaging.Listener) { +func build(ctx context.Context, config configuration) (result Pipeline) { var ( batches = make(chan *batch, config.burstCapacity) work1 = make(chan *unitOfWork, config.pipelineBufferCapacity) @@ -26,6 +26,7 @@ func build(ctx context.Context, config configuration) (forHTTP *httpAdapter, for terminal = newTerminal(work5) ) + var listeners []messaging.Listener listeners = append(listeners, entrypoint, executor, @@ -37,7 +38,13 @@ func build(ctx context.Context, config configuration) (forHTTP *httpAdapter, for broadcast, terminal, ) - return newHTTPAdapter(entrypoint), entrypoint, listeners + adapter := newHTTPAdapter(entrypoint) + return Pipeline{ + SheddingHTTPWrapper: adapter.HTTPHandler, + SheddingEntrypoint: adapter, + BlockingEntrypoint: entrypoint, + Listeners: listeners, + } } func serializationFactory(monitor Monitor, enc serializer) stationFactory { diff --git a/handlers/harness/pipeline_test.go b/handlers/harness/pipeline_test.go index 3cf4742..90d1a09 100644 --- a/handlers/harness/pipeline_test.go +++ b/handlers/harness/pipeline_test.go @@ -9,7 +9,6 @@ import ( "github.com/smarty/gunit/v2" "github.com/smarty/gunit/v2/assert/better" "github.com/smarty/gunit/v2/assert/should" - "github.com/smarty/messaging/v3" ) func TestPipelineFixture(t *testing.T) { @@ -18,10 +17,9 @@ func TestPipelineFixture(t *testing.T) { type PipelineFixture struct { *gunit.Fixture - ctx context.Context - handler messaging.Handler - listeners []messaging.Listener - waiter sync.WaitGroup + ctx context.Context + pipeline Pipeline + waiter sync.WaitGroup executeLock sync.Mutex executeCalls []any @@ -40,14 +38,14 @@ type commandType string func (this *PipelineFixture) Setup() { this.ctx = context.WithValue(this.Context(), "testing", this.Name()) - _, this.handler, this.listeners = New(this.ctx, + this.pipeline = New(this.ctx, Options.Types(this), Options.Monitor(this), Options.Serializer(this), Options.Writer(this), Options.Dispatcher(this), ) - for _, listener := range this.listeners { + for _, listener := range this.pipeline.Listeners { this.waiter.Go(listener.Listen) } } @@ -122,7 +120,7 @@ func (this *PipelineFixture) countTracked() (batchInFlight, batchComplete int) { } func (this *PipelineFixture) shutdown() { - closer, ok := this.handler.(io.Closer) + closer, ok := this.pipeline.BlockingEntrypoint.(io.Closer) this.So(ok, better.BeTrue) this.So(closer.Close(), should.BeNil) this.waiter.Wait() @@ -131,7 +129,7 @@ func (this *PipelineFixture) shutdown() { func (this *PipelineFixture) TestPipelineRoutesMessageThroughExecutionPersistenceBroadcast() { this.executeOutputs = [][]any{{"event-A", "event-B"}} - this.handler.Handle(this.ctx, commandType("command-1")) + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("command-1")) this.shutdown() this.So(len(this.executeCalls), should.Equal, 1) @@ -154,9 +152,9 @@ func (this *PipelineFixture) TestPipelineRoutesMessageThroughExecutionPersistenc func (this *PipelineFixture) TestPipelineHandlesMultipleMessagesAcrossHandleCalls() { this.executeOutputs = [][]any{{"e1"}, {"e2"}, {"e3"}} - this.handler.Handle(this.ctx, commandType("c1")) - this.handler.Handle(this.ctx, commandType("c2")) - this.handler.Handle(this.ctx, commandType("c3")) + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("c1")) + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("c2")) + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("c3")) this.shutdown() this.So(len(this.executeCalls), should.Equal, 3) @@ -193,7 +191,7 @@ func (this *PipelineFixture) TestPipelineShutsDownWithNoTraffic() { func (this *PipelineFixture) TestPipelineSerializesEachBroadcastResult() { this.executeOutputs = [][]any{{map[string]int{"value": 42}}} - this.handler.Handle(this.ctx, commandType("go")) + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("go")) this.shutdown() this.So(len(this.writeCalls), should.Equal, 1) From 2e2adb6c43689edf1f0eccfae454338fc9327809 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Fri, 5 Jun 2026 13:04:28 -0600 Subject: [PATCH 43/50] Use *Message for writer interface. --- handlers/harness/03_persistence.go | 4 ++-- handlers/harness/03_persistence_test.go | 12 ++++++------ handlers/harness/config.go | 10 +++++----- handlers/harness/contracts.go | 2 +- handlers/harness/pipeline_test.go | 20 ++++++++++---------- handlers/harness/sqladapter/writer.go | 18 ++++++++++-------- handlers/harness/sqladapter/writer_test.go | 4 ++-- 7 files changed, 36 insertions(+), 34 deletions(-) diff --git a/handlers/harness/03_persistence.go b/handlers/harness/03_persistence.go index e0a9a82..70449c6 100644 --- a/handlers/harness/03_persistence.go +++ b/handlers/harness/03_persistence.go @@ -13,7 +13,7 @@ type persistence struct { output chan *unitOfWork writer Writer wait func(context.Context, time.Duration) error - buffer []any + buffer []*Message } func newPersistence(ctx context.Context, monitor Monitor, input, output chan *unitOfWork, writer Writer, wait func(context.Context, time.Duration) error) *persistence { @@ -24,7 +24,7 @@ func newPersistence(ctx context.Context, monitor Monitor, input, output chan *un output: output, writer: writer, wait: wait, - buffer: make([]any, 0, 1024), + buffer: make([]*Message, 0, 1024), } } diff --git a/handlers/harness/03_persistence_test.go b/handlers/harness/03_persistence_test.go index 76d2be9..ebd05fc 100644 --- a/handlers/harness/03_persistence_test.go +++ b/handlers/harness/03_persistence_test.go @@ -25,7 +25,7 @@ type PersistenceFixture struct { subject *persistence writeMu sync.Mutex - writeCalls [][]any + writeCalls [][]*Message writeFailCount int tracked []any @@ -47,11 +47,11 @@ func (this *PersistenceFixture) Track(observation any) { this.tracked = append(this.tracked, observation) } -func (this *PersistenceFixture) Write(ctx context.Context, messages ...any) error { +func (this *PersistenceFixture) Write(ctx context.Context, messages ...*Message) error { this.So(ctx.Value("testing"), should.Equal, this.Name()) this.writeMu.Lock() defer this.writeMu.Unlock() - captured := make([]any, len(messages)) + captured := make([]*Message, len(messages)) copy(captured, messages) this.writeCalls = append(this.writeCalls, captured) if this.writeFailCount > 0 { @@ -79,7 +79,7 @@ func (this *PersistenceFixture) TestWritesAllResultsThenForwardsUnit() { units := this.drain() this.So(len(units), should.Equal, 1) this.So(len(this.writeCalls), should.Equal, 1) - this.So(this.writeCalls[0], should.Equal, []any{m1, m2}) + this.So(this.writeCalls[0], should.Equal, []*Message{m1, m2}) this.So(this.waits, should.BeEmpty) this.So(this.tracked, should.BeEmpty) } @@ -96,8 +96,8 @@ func (this *PersistenceFixture) TestEachUnitIsWrittenIndependently() { units := this.drain() this.So(len(units), should.Equal, 2) this.So(len(this.writeCalls), should.Equal, 2) - this.So(this.writeCalls[0], should.Equal, []any{m1}) - this.So(this.writeCalls[1], should.Equal, []any{m2}) + this.So(this.writeCalls[0], should.Equal, []*Message{m1}) + this.So(this.writeCalls[1], should.Equal, []*Message{m2}) this.So(this.tracked, should.BeEmpty) } diff --git a/handlers/harness/config.go b/handlers/harness/config.go index 83c5ea3..af830e4 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -155,8 +155,8 @@ func (singleton) defaults(options ...option) []option { // zero options and still produce a runnable (if inert) pipeline. type nop struct{} -func (nop) Track(any) {} -func (nop) Serialize(io.Writer, any) error { return nil } -func (nop) ContentType() string { return "" } -func (nop) Write(context.Context, ...any) error { return nil } -func (nop) Dispatch(context.Context, ...any) error { return nil } +func (nop) Track(any) {} +func (nop) Serialize(io.Writer, any) error { return nil } +func (nop) ContentType() string { return "" } +func (nop) Write(context.Context, ...*Message) error { return nil } +func (nop) Dispatch(context.Context, ...any) error { return nil } diff --git a/handlers/harness/contracts.go b/handlers/harness/contracts.go index 64970d6..4c3c6cb 100644 --- a/handlers/harness/contracts.go +++ b/handlers/harness/contracts.go @@ -10,7 +10,7 @@ import ( type ( Writer interface { - Write(ctx context.Context, messages ...any) error + Write(ctx context.Context, messages ...*Message) error } Dispatcher interface { Dispatch(ctx context.Context, messages ...any) error diff --git a/handlers/harness/pipeline_test.go b/handlers/harness/pipeline_test.go index 90d1a09..3eaed55 100644 --- a/handlers/harness/pipeline_test.go +++ b/handlers/harness/pipeline_test.go @@ -26,7 +26,7 @@ type PipelineFixture struct { executeOutputs [][]any writeLock sync.Mutex - writeCalls [][]any + writeCalls [][]*Message dispatchLock sync.Mutex dispatchCalls [][]any @@ -79,13 +79,13 @@ func (this *PipelineFixture) Serialize(out io.Writer, _ any) error { func (this *PipelineFixture) ContentType() string { return "" } -func (this *PipelineFixture) Write(ctx context.Context, messages ...any) error { +func (this *PipelineFixture) Write(ctx context.Context, messages ...*Message) error { this.So(ctx.Value("testing"), should.Equal, this.Name()) + buffer := make([]*Message, len(messages)) + copy(buffer, messages) this.writeLock.Lock() - defer this.writeLock.Unlock() - captured := make([]any, len(messages)) - copy(captured, messages) - this.writeCalls = append(this.writeCalls, captured) + this.writeCalls = append(this.writeCalls, buffer) + this.writeLock.Unlock() return nil } @@ -136,8 +136,8 @@ func (this *PipelineFixture) TestPipelineRoutesMessageThroughExecutionPersistenc this.So(len(this.writeCalls), should.Equal, 1) this.So(len(this.writeCalls[0]), should.Equal, 2) - this.So(this.writeCalls[0][0].(*Message).Value, should.Equal, "event-A") - this.So(this.writeCalls[0][1].(*Message).Value, should.Equal, "event-B") + this.So(this.writeCalls[0][0].Value, should.Equal, "event-A") + this.So(this.writeCalls[0][1].Value, should.Equal, "event-B") this.So(len(this.dispatchCalls), should.Equal, 1) this.So(len(this.dispatchCalls[0]), should.Equal, 2) @@ -162,7 +162,7 @@ func (this *PipelineFixture) TestPipelineHandlesMultipleMessagesAcrossHandleCall var written []any for _, call := range this.writeCalls { for _, message := range call { - written = append(written, message.(*Message).Value) + written = append(written, message.Value) } } this.So(written, should.Equal, []any{"e1", "e2", "e3"}) @@ -195,6 +195,6 @@ func (this *PipelineFixture) TestPipelineSerializesEachBroadcastResult() { this.shutdown() this.So(len(this.writeCalls), should.Equal, 1) - message := this.writeCalls[0][0].(*Message) + message := this.writeCalls[0][0] this.So(message.Content.Len() > 0, should.BeTrue) } diff --git a/handlers/harness/sqladapter/writer.go b/handlers/harness/sqladapter/writer.go index 1b448ba..62a1840 100644 --- a/handlers/harness/sqladapter/writer.go +++ b/handlers/harness/sqladapter/writer.go @@ -11,12 +11,15 @@ import ( "github.com/smarty/messaging/v3/handlers/harness" ) +// Deprecated +type legacyWrite func(context.Context, *sql.Tx, ...*harness.Message) + type Writer struct { handle *sql.DB typeNames map[reflect.Type]string stride uint64 logger Logger - legacyWrite func(context.Context, *sql.Tx, ...any) + legacyWrite legacyWrite } // NewWriter builds a Writer that inserts rows into the `Messages` table and @@ -25,7 +28,7 @@ type Writer struct { // Deprecation warning: the legacyWrite escape hatch is retained for migration from // other projects and will be removed in a later release; new callers // should supply a no-op function. -func NewWriter(handle *sql.DB, typeNames map[reflect.Type]string, stride uint64, logger Logger, legacyWrite func(context.Context, *sql.Tx, ...any)) *Writer { +func NewWriter(handle *sql.DB, typeNames map[reflect.Type]string, stride uint64, logger Logger, legacyWrite legacyWrite) *Writer { return &Writer{ handle: handle, typeNames: typeNames, @@ -35,7 +38,7 @@ func NewWriter(handle *sql.DB, typeNames map[reflect.Type]string, stride uint64, } } -func (this *Writer) Write(ctx context.Context, messages ...any) (err error) { +func (this *Writer) Write(ctx context.Context, messages ...*harness.Message) (err error) { if len(messages) == 0 { return nil } @@ -62,12 +65,11 @@ func (this *Writer) Write(ctx context.Context, messages ...any) (err error) { return tx.Commit() } -func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []any) error { +func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []*harness.Message) error { var statement strings.Builder // TODO: reuse statement builder statement.WriteString(`INSERT INTO Messages (type, payload) VALUES `) args := make([]any, 0, len(messages)*2) // TODO: reuse slice/buffer - for i, raw := range messages { - message := raw.(*harness.Message) + for i, message := range messages { if message.Type == "" { message.Type = this.typeNames[reflect.TypeOf(message.Value)] } @@ -88,8 +90,8 @@ func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []a if err != nil { return err } - for i, raw := range messages { - raw.(*harness.Message).ID = uint64(first) + uint64(i)*this.stride + for i, message := range messages { + message.ID = uint64(first) + uint64(i)*this.stride } return nil } diff --git a/handlers/harness/sqladapter/writer_test.go b/handlers/harness/sqladapter/writer_test.go index af151ab..987a849 100644 --- a/handlers/harness/sqladapter/writer_test.go +++ b/handlers/harness/sqladapter/writer_test.go @@ -41,7 +41,7 @@ type WriterFixture struct { handle *sql.DB subject *Writer - legacyWriteCalls [][]any + legacyWriteCalls [][]*harness.Message legacyWritePanic any testStride uint64 } @@ -57,7 +57,7 @@ func (this *WriterFixture) Setup() { this.truncateTables() } -func (this *WriterFixture) fakeLegacyWrite(_ context.Context, _ *sql.Tx, messages ...any) { +func (this *WriterFixture) fakeLegacyWrite(_ context.Context, _ *sql.Tx, messages ...*harness.Message) { this.legacyWriteCalls = append(this.legacyWriteCalls, messages) if this.legacyWritePanic != nil { panic(this.legacyWritePanic) From 3fd8dc495e72a51393d952c7c29239bb6ba6f849 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Fri, 5 Jun 2026 13:10:37 -0600 Subject: [PATCH 44/50] Use *Message for dispatcher interface. --- handlers/harness/05_broadcast.go | 4 ++-- handlers/harness/05_broadcast_test.go | 16 ++++++++-------- handlers/harness/config.go | 10 +++++----- handlers/harness/contracts.go | 2 +- handlers/harness/pipeline_test.go | 16 ++++++++-------- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/handlers/harness/05_broadcast.go b/handlers/harness/05_broadcast.go index eada4f6..72dd552 100644 --- a/handlers/harness/05_broadcast.go +++ b/handlers/harness/05_broadcast.go @@ -11,7 +11,7 @@ type broadcast struct { monitor Monitor input chan *unitOfWork output chan *unitOfWork - buffer []any + buffer []*Message dispatcher Dispatcher wait func(context.Context, time.Duration) error } @@ -22,7 +22,7 @@ func newBroadcast(ctx context.Context, monitor Monitor, input, output chan *unit monitor: monitor, input: input, output: output, - buffer: make([]any, 0, 1024), + buffer: make([]*Message, 0, 1024), dispatcher: dispatcher, wait: wait, } diff --git a/handlers/harness/05_broadcast_test.go b/handlers/harness/05_broadcast_test.go index 299d445..e6a3aac 100644 --- a/handlers/harness/05_broadcast_test.go +++ b/handlers/harness/05_broadcast_test.go @@ -25,7 +25,7 @@ type BroadcastFixture struct { subject *broadcast dispatchMu sync.Mutex - dispatchCalls [][]any + dispatchCalls [][]*Message dispatchFailCount int tracked []any @@ -47,13 +47,13 @@ func (this *BroadcastFixture) Track(observation any) { this.tracked = append(this.tracked, observation) } -func (this *BroadcastFixture) Dispatch(ctx context.Context, messages ...any) error { +func (this *BroadcastFixture) Dispatch(ctx context.Context, messages ...*Message) error { this.So(ctx.Value("testing"), should.Equal, this.Name()) - this.dispatchMu.Lock() - defer this.dispatchMu.Unlock() - captured := make([]any, len(messages)) + captured := make([]*Message, len(messages)) copy(captured, messages) + this.dispatchMu.Lock() this.dispatchCalls = append(this.dispatchCalls, captured) + this.dispatchMu.Unlock() if this.dispatchFailCount > 0 { this.dispatchFailCount-- return errors.New("dispatch failure") @@ -79,7 +79,7 @@ func (this *BroadcastFixture) TestDispatchesAllResultsThenForwardsUnit() { units := this.drain() this.So(len(units), should.Equal, 1) this.So(len(this.dispatchCalls), should.Equal, 1) - this.So(this.dispatchCalls[0], should.Equal, []any{m1, m2}) + this.So(this.dispatchCalls[0], should.Equal, []*Message{m1, m2}) this.So(this.waits, should.BeEmpty) this.So(this.tracked, should.BeEmpty) } @@ -96,8 +96,8 @@ func (this *BroadcastFixture) TestEachUnitDispatchedIndependently() { units := this.drain() this.So(len(units), should.Equal, 2) this.So(len(this.dispatchCalls), should.Equal, 2) - this.So(this.dispatchCalls[0], should.Equal, []any{m1}) - this.So(this.dispatchCalls[1], should.Equal, []any{m2}) + this.So(this.dispatchCalls[0], should.Equal, []*Message{m1}) + this.So(this.dispatchCalls[1], should.Equal, []*Message{m2}) this.So(this.tracked, should.BeEmpty) } diff --git a/handlers/harness/config.go b/handlers/harness/config.go index af830e4..8ae7807 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -155,8 +155,8 @@ func (singleton) defaults(options ...option) []option { // zero options and still produce a runnable (if inert) pipeline. type nop struct{} -func (nop) Track(any) {} -func (nop) Serialize(io.Writer, any) error { return nil } -func (nop) ContentType() string { return "" } -func (nop) Write(context.Context, ...*Message) error { return nil } -func (nop) Dispatch(context.Context, ...any) error { return nil } +func (nop) Track(any) {} +func (nop) Serialize(io.Writer, any) error { return nil } +func (nop) ContentType() string { return "" } +func (nop) Write(context.Context, ...*Message) error { return nil } +func (nop) Dispatch(context.Context, ...*Message) error { return nil } diff --git a/handlers/harness/contracts.go b/handlers/harness/contracts.go index 4c3c6cb..765e90d 100644 --- a/handlers/harness/contracts.go +++ b/handlers/harness/contracts.go @@ -13,7 +13,7 @@ type ( Write(ctx context.Context, messages ...*Message) error } Dispatcher interface { - Dispatch(ctx context.Context, messages ...any) error + Dispatch(ctx context.Context, messages ...*Message) error } Monitor interface { Track(observation any) diff --git a/handlers/harness/pipeline_test.go b/handlers/harness/pipeline_test.go index 3eaed55..f061713 100644 --- a/handlers/harness/pipeline_test.go +++ b/handlers/harness/pipeline_test.go @@ -28,7 +28,7 @@ type PipelineFixture struct { writeLock sync.Mutex writeCalls [][]*Message dispatchLock sync.Mutex - dispatchCalls [][]any + dispatchCalls [][]*Message trackLock sync.Mutex tracked []any @@ -89,13 +89,13 @@ func (this *PipelineFixture) Write(ctx context.Context, messages ...*Message) er return nil } -func (this *PipelineFixture) Dispatch(ctx context.Context, messages ...any) error { +func (this *PipelineFixture) Dispatch(ctx context.Context, messages ...*Message) error { this.So(ctx.Value("testing"), should.Equal, this.Name()) - this.dispatchLock.Lock() - defer this.dispatchLock.Unlock() - captured := make([]any, len(messages)) + captured := make([]*Message, len(messages)) copy(captured, messages) + this.dispatchLock.Lock() this.dispatchCalls = append(this.dispatchCalls, captured) + this.dispatchLock.Unlock() return nil } @@ -141,8 +141,8 @@ func (this *PipelineFixture) TestPipelineRoutesMessageThroughExecutionPersistenc this.So(len(this.dispatchCalls), should.Equal, 1) this.So(len(this.dispatchCalls[0]), should.Equal, 2) - this.So(this.dispatchCalls[0][0].(*Message).Value, should.Equal, "event-A") - this.So(this.dispatchCalls[0][1].(*Message).Value, should.Equal, "event-B") + this.So(this.dispatchCalls[0][0].Value, should.Equal, "event-A") + this.So(this.dispatchCalls[0][1].Value, should.Equal, "event-B") batchInFlight, batchComplete := this.countTracked() this.So(batchInFlight, should.Equal, 1) @@ -170,7 +170,7 @@ func (this *PipelineFixture) TestPipelineHandlesMultipleMessagesAcrossHandleCall var dispatched []any for _, call := range this.dispatchCalls { for _, message := range call { - dispatched = append(dispatched, message.(*Message).Value) + dispatched = append(dispatched, message.Value) } } this.So(dispatched, should.Equal, []any{"e1", "e2", "e3"}) From 541a3889d6198efa72e332b7dcc1b34439bf76da Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Fri, 5 Jun 2026 15:24:13 -0600 Subject: [PATCH 45/50] Use *Message everywhere. --- handlers/harness/sqladapter/dispatcher.go | 13 ++++++------- handlers/harness/sqladapter/recovery.go | 2 +- handlers/harness/sqladapter/writer_test.go | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/handlers/harness/sqladapter/dispatcher.go b/handlers/harness/sqladapter/dispatcher.go index 4b30dab..71267f9 100644 --- a/handlers/harness/sqladapter/dispatcher.go +++ b/handlers/harness/sqladapter/dispatcher.go @@ -29,7 +29,7 @@ func NewDispatcher(connector messaging.Connector, handle *sql.DB, logger Logger) } } -func (this *Dispatcher) Dispatch(ctx context.Context, messages ...any) error { +func (this *Dispatcher) Dispatch(ctx context.Context, messages ...*harness.Message) error { if len(messages) == 0 { return nil } @@ -39,7 +39,7 @@ func (this *Dispatcher) Dispatch(ctx context.Context, messages ...any) error { return this.markDispatched(ctx, messages) } -func (this *Dispatcher) publish(ctx context.Context, messages []any) error { +func (this *Dispatcher) publish(ctx context.Context, messages []*harness.Message) error { connection, err := this.connector.Connect(ctx) if err != nil { return err @@ -53,8 +53,7 @@ func (this *Dispatcher) publish(ctx context.Context, messages []any) error { defer func() { _ = writer.Close() }() dispatches := make([]messaging.Dispatch, 0, len(messages)) // TODO: reuse slice, pool dispatch struct - for _, raw := range messages { - message := raw.(*harness.Message) + for _, message := range messages { dispatches = append(dispatches, messaging.Dispatch{ Durable: true, MessageType: message.Type, @@ -67,16 +66,16 @@ func (this *Dispatcher) publish(ctx context.Context, messages []any) error { return err } -func (this *Dispatcher) markDispatched(ctx context.Context, messages []any) error { +func (this *Dispatcher) markDispatched(ctx context.Context, messages []*harness.Message) error { var statement strings.Builder statement.WriteString(`UPDATE Messages SET dispatched = NOW(3) WHERE id IN (`) args := make([]any, 0, len(messages)) - for i, raw := range messages { + for i, message := range messages { if i > 0 { statement.WriteString(`,`) } statement.WriteString(`?`) - args = append(args, raw.(*harness.Message).ID) + args = append(args, message.ID) } statement.WriteString(`)`) if _, err := this.handle.ExecContext(ctx, statement.String(), args...); err != nil { diff --git a/handlers/harness/sqladapter/recovery.go b/handlers/harness/sqladapter/recovery.go index dc29b90..6c23154 100644 --- a/handlers/harness/sqladapter/recovery.go +++ b/handlers/harness/sqladapter/recovery.go @@ -28,7 +28,7 @@ func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger defer func() { _ = rows.Close() }() var total int - var messages []any + var messages []*harness.Message for rows.Next() { var ( id uint64 diff --git a/handlers/harness/sqladapter/writer_test.go b/handlers/harness/sqladapter/writer_test.go index 987a849..3a62260 100644 --- a/handlers/harness/sqladapter/writer_test.go +++ b/handlers/harness/sqladapter/writer_test.go @@ -81,7 +81,7 @@ func (this *WriterFixture) TestWrite_InsertsMessageRowAndInvokesLegacyWrite() { this.So(err, should.BeNil) this.So(this.countMessages(), should.Equal, 1) - this.So(this.legacyWriteCalls, should.Equal, [][]any{{message}}) + this.So(this.legacyWriteCalls, should.Equal, [][]*harness.Message{{message}}) this.So(message.Type, should.Equal, "order-received") } From e5f18641f6d27fd7f759a08a727872ea200b5831 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Fri, 5 Jun 2026 15:38:22 -0600 Subject: [PATCH 46/50] Remove unused fields. --- handlers/harness/message.go | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/handlers/harness/message.go b/handlers/harness/message.go index 5cec769..7b591c4 100644 --- a/handlers/harness/message.go +++ b/handlers/harness/message.go @@ -1,37 +1,22 @@ package harness -import ( - "bytes" - "time" -) +import "bytes" type Message struct { // ID represents the unique ID of this message and its sequential place within a larger stream. ID uint64 - // Value contains the in-memory Go message structure. - Value any - // Type is the name registered for the (Go) type of the Value. // (i.e. 'subscription:subscription-renewed-v2'). Type string + // Value contains the in-memory Go message structure. + Value any + // Content contains the serialized representation of the Go Value. Content *bytes.Buffer // ContentType identifies the serialization method employed to represent the Content // (i.e. 'application/json; charset=utf-8'). ContentType string - - // ContentEncoding identifies the encoding of the Content - // (i.e. 'gzip'). - ContentEncoding string - - // Stored represents the instant the Type, Content, ContentType, and ContentEncoding - // were durably stored (a critically important step in processing--if it's not stored, it didn't happen). - Stored time.Time - - // Dispatched represents the instant the Type, Content, ContentType, and ContentEncoding - // were published to the external messaging system (if configured). - Dispatched time.Time } From 2b7c6301b310a940a27f978f668006c455290f42 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Mon, 8 Jun 2026 17:04:56 -0600 Subject: [PATCH 47/50] It is the model's responsibility to only produce json-compatible values. I don't have a good idea of how to enforce this at compile-time or at startup. Until then, this is very awkward. For now it is up to the monitor to emit the name of the type and the actual value that wouldn't marshal. --- handlers/harness/02_serialization.go | 6 +----- handlers/harness/02_serialization_test.go | 3 +-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/handlers/harness/02_serialization.go b/handlers/harness/02_serialization.go index 5d773ef..f8da332 100644 --- a/handlers/harness/02_serialization.go +++ b/handlers/harness/02_serialization.go @@ -30,11 +30,7 @@ func (this *serialization) Listen() { if err != nil { failure.Error = fmt.Errorf("%w: %w", ErrSerialization, err) failure.Value = message.Value - this.monitor.Track(failure) - - message.Content.Reset() - message.ContentType = "go fmt.Sprintf(%#v)" - _, _ = fmt.Fprintf(message.Content, "%#v", message.Value) // Not JSON, but it will have to do... + this.monitor.Track(failure) // This would be a big, fat, hairy deal. It is the domain model's responsibility to only produce values that will marshal to JSON. } else { message.ContentType = this.serializer.ContentType() } diff --git a/handlers/harness/02_serialization_test.go b/handlers/harness/02_serialization_test.go index 424c2a8..c7bdcf9 100644 --- a/handlers/harness/02_serialization_test.go +++ b/handlers/harness/02_serialization_test.go @@ -129,7 +129,7 @@ func (this *SerializationFixture) TestSerializesEachResultValueIntoContent_Popul this.So(units[0].results[0].ContentType, should.Equal, "test/content-type") } -func (this *SerializationFixture) TestSerializerErrorIsTracked_FallbackToFmtSprintfEncoding() { +func (this *SerializationFixture) TestSerializerErrorIsTracked() { type TestMessage struct { Value string `json:"value"` } @@ -153,5 +153,4 @@ func (this *SerializationFixture) TestSerializerErrorIsTracked_FallbackToFmtSpri this.So(ok, should.BeTrue) this.So(observation.Error, should.WrapError, ErrSerialization) this.So(observation.Value, should.Equal, value) - this.So(units[0].results[0].Content.String(), should.Equal, `harness.TestMessage{Value:"unserializable"}`) } From 3605ce63a7435e7f55ae483c97883d1c37e159d7 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 9 Jun 2026 11:50:53 -0600 Subject: [PATCH 48/50] Restore a few checks and tests to ensure proper handling of message IDs. --- handlers/harness/sqladapter/writer.go | 26 +++++++ handlers/harness/sqladapter/writer_test.go | 82 ++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/handlers/harness/sqladapter/writer.go b/handlers/harness/sqladapter/writer.go index 62a1840..b88cf57 100644 --- a/handlers/harness/sqladapter/writer.go +++ b/handlers/harness/sqladapter/writer.go @@ -4,6 +4,7 @@ import ( "cmp" "context" "database/sql" + "errors" "fmt" "reflect" "strings" @@ -83,6 +84,10 @@ func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []* if err != nil { return err } + affected, err := result.RowsAffected() + if err != nil { + return err + } // https://dev.mysql.com/doc/refman/5.6/en/information-functions.html#function_last-insert-id // > If you insert multiple rows using a single INSERT statement, LAST_INSERT_ID() returns the value // > generated for the first inserted row only. @@ -90,8 +95,29 @@ func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []* if err != nil { return err } + return this.assignIDs(messages, affected, first) +} + +// assignIDs validates the row count and starting identity reported by the +// INSERT, then derives each message's ID from the first auto-increment value. +// The derivation relies on a single multi-row "simple insert" producing +// consecutive auto-increment values spaced by stride, which holds even under +// innodb_autoinc_lock_mode = 2 so long as no concurrent "bulk inserts" target +// the Messages table and stride matches the server's auto_increment_increment. +func (this *Writer) assignIDs(messages []*harness.Message, affected, first int64) error { + if affected != int64(len(messages)) { + return errRowsAffected + } + if first <= 0 { + return errIdentityFailure + } for i, message := range messages { message.ID = uint64(first) + uint64(i)*this.stride } return nil } + +var ( + errRowsAffected = errors.New("the number of modified rows was not expected compared to the number of writes performed") + errIdentityFailure = errors.New("unable to determine the identity of the inserted row(s)") +) diff --git a/handlers/harness/sqladapter/writer_test.go b/handlers/harness/sqladapter/writer_test.go index 3a62260..3259dec 100644 --- a/handlers/harness/sqladapter/writer_test.go +++ b/handlers/harness/sqladapter/writer_test.go @@ -134,6 +134,88 @@ func (this *WriterFixture) TestWrite_NoMessages_NoOp() { this.So(this.countMessages(), should.Equal, 0) } +func TestAssignIDsFixture(t *testing.T) { + gunit.Run(new(AssignIDsFixture), t) +} + +// AssignIDsFixture exercises the pure ID-derivation logic without a database, +// covering the stride arithmetic and the defensive guards that a live MySQL +// server would never naturally trip. +type AssignIDsFixture struct { + *gunit.Fixture +} + +func (this *AssignIDsFixture) writer(stride uint64) *Writer { + return NewWriter(nil, nil, stride, nil, nil) +} +func (this *AssignIDsFixture) messages(count int) (results []*harness.Message) { + for range count { + results = append(results, &harness.Message{}) + } + return results +} + +func (this *AssignIDsFixture) TestStridedIDsDerivedFromFirstIdentity() { + messages := this.messages(3) + + err := this.writer(7).assignIDs(messages, 3, 100) + + this.So(err, should.BeNil) + this.So(messages[0].ID, should.Equal, uint64(100)) + this.So(messages[1].ID, should.Equal, uint64(107)) + this.So(messages[2].ID, should.Equal, uint64(114)) +} + +func (this *AssignIDsFixture) TestZeroStrideDefaultsToOne() { + messages := this.messages(3) + + err := this.writer(0).assignIDs(messages, 3, 5) + + this.So(err, should.BeNil) + this.So(messages[0].ID, should.Equal, uint64(5)) + this.So(messages[1].ID, should.Equal, uint64(6)) + this.So(messages[2].ID, should.Equal, uint64(7)) +} + +func (this *AssignIDsFixture) TestSingleMessage() { + messages := this.messages(1) + + err := this.writer(7).assignIDs(messages, 1, 42) + + this.So(err, should.BeNil) + this.So(messages[0].ID, should.Equal, uint64(42)) +} + +func (this *AssignIDsFixture) TestRowsAffectedMismatchReturnsErrorAndLeavesIDsUntouched() { + messages := this.messages(3) + + err := this.writer(7).assignIDs(messages, 2, 100) + + this.So(err, should.Equal, errRowsAffected) + this.So(messages[0].ID, should.Equal, uint64(0)) + this.So(messages[1].ID, should.Equal, uint64(0)) + this.So(messages[2].ID, should.Equal, uint64(0)) +} + +func (this *AssignIDsFixture) TestZeroIdentityReturnsErrorAndLeavesIDsUntouched() { + messages := this.messages(2) + + err := this.writer(7).assignIDs(messages, 2, 0) + + this.So(err, should.Equal, errIdentityFailure) + this.So(messages[0].ID, should.Equal, uint64(0)) + this.So(messages[1].ID, should.Equal, uint64(0)) +} + +func (this *AssignIDsFixture) TestNegativeIdentityReturnsError() { + messages := this.messages(1) + + err := this.writer(7).assignIDs(messages, 1, -1) + + this.So(err, should.Equal, errIdentityFailure) + this.So(messages[0].ID, should.Equal, uint64(0)) +} + func (this *WriterFixture) countMessages() int { var count int err := this.handle.QueryRow(`SELECT COUNT(*) FROM Messages`).Scan(&count) From 695e56cb35682b13e68ec4cdd8e32a8d2f54a176 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 9 Jun 2026 11:58:07 -0600 Subject: [PATCH 49/50] Add reference to official documentation. --- handlers/harness/sqladapter/writer.go | 1 + 1 file changed, 1 insertion(+) diff --git a/handlers/harness/sqladapter/writer.go b/handlers/harness/sqladapter/writer.go index b88cf57..e2e08f7 100644 --- a/handlers/harness/sqladapter/writer.go +++ b/handlers/harness/sqladapter/writer.go @@ -104,6 +104,7 @@ func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []* // consecutive auto-increment values spaced by stride, which holds even under // innodb_autoinc_lock_mode = 2 so long as no concurrent "bulk inserts" target // the Messages table and stride matches the server's auto_increment_increment. +// https://dev.mysql.com/doc/refman/8.4/en/innodb-auto-increment-handling.html func (this *Writer) assignIDs(messages []*harness.Message, affected, first int64) error { if affected != int64(len(messages)) { return errRowsAffected From c82c508b0e452ec9c36d42b376dd9459c2808f55 Mon Sep 17 00:00:00 2001 From: Michael Whatcott Date: Tue, 9 Jun 2026 14:54:41 -0600 Subject: [PATCH 50/50] Fail fast when serialization is unsuccessful. --- ...2-40-proposal-serialization-fail-fast.html | 216 ++++++++++++++++++ handlers/harness/02_serialization.go | 20 +- handlers/harness/02_serialization_test.go | 48 +++- handlers/harness/config.go | 9 + 4 files changed, 271 insertions(+), 22 deletions(-) create mode 100644 doc/work-sessions/2026/2026-06-09_14-22-40-proposal-serialization-fail-fast.html diff --git a/doc/work-sessions/2026/2026-06-09_14-22-40-proposal-serialization-fail-fast.html b/doc/work-sessions/2026/2026-06-09_14-22-40-proposal-serialization-fail-fast.html new file mode 100644 index 0000000..b195522 --- /dev/null +++ b/doc/work-sessions/2026/2026-06-09_14-22-40-proposal-serialization-fail-fast.html @@ -0,0 +1,216 @@ + + + + +Proposal: Fail fast on serialization errors + + + +

Proposal: Fail fast on serialization errors

+ +

Background

+

+ Commit 2b7c630 established the contract: it is the domain model's + responsibility to only produce values that serialize successfully. A + serialization failure is therefore a programming error — a contract violation + that should never occur in a correctly built application. But the pipeline must + still do something when it occurs, and what it does today is the worst + available option. +

+

+ Today, handlers/harness/02_serialization.go:30-38 tracks a + SerializationError via the monitor and then lets the unit of work + keep flowing. The failed message proceeds with an empty Content + buffer and an empty ContentType, which means: +

+
    +
  • Stage 03 persists a Messages row with a type but an empty payload — a corrupt assertion that "this event occurred and here is its content."
  • +
  • Stage 04 acks the batch upstream, so the input is never redelivered.
  • +
  • Stage 05 publishes the empty payload to every downstream subscriber of that message type — the cascading-failure scenario.
  • +
  • If the row somehow escapes dispatch, sqladapter.Recover faithfully re-publishes the empty payload at every subsequent startup.
  • +
+ +

Decision (from design discussion, 2026-06-09)

+ + + + + +
QuestionAnswer
Persist a typed, payload-less row?No
Dispatch the message downstream?No — hard constraint
Mark a persisted row dispatched?Moot (nothing is persisted)
+

+ Instead: track the observation, then halt the process before the + unit reaches persistence. +

+ +

Approach

+

+ In 02_serialization.go, when serializer.Serialize returns + an error: track the SerializationError exactly as today, then + panic with the wrapped error (which satisfies + errors.Is(err, ErrSerialization)). The unit is never forwarded. +

+

+ This composes cleanly with semantics the pipeline already has: +

+
    +
  • Nothing is persisted, dispatched, or acked — the same + "no ack → MQ redelivers" contract the persistence-abandonment path relies on + (03_persistence.go:40).
  • +
  • The failure is deterministic: on restart the MQ redelivers + the input, the domain model produces the same unserializable value, and the app + crash-loops loudly until a human fixes the model. Anomaly visibility is maximal.
  • +
  • Downstream systems experience only the publisher's absence, + never a malformed message.
  • +
  • No schema changes; no special cases in Writer, + Dispatcher, or Recover.
  • +
+

+ Why the whole process and not just the unit: by the time + serialization runs, the domain model's in-memory state has already advanced + (execution happened in stage 01). Dropping the unit and continuing would let MQ + redelivery replay inputs against already-mutated state. The only safe recovery + point is process death followed by a clean rebuild of state — halt or forward, + nothing in between. +

+

+ Mechanically, the panic occurs inside one of the fanned-out serialization + goroutines (SerializerCount, default 4). An unrecovered panic in any + goroutine terminates the process with a stack trace on stderr — loud by design. + The existing defer close(this.output) still runs during unwind, so + test code (which recovers) observes a closed output channel. +

+ +

Sketch (not final code)

+
err := this.serializer.Serialize(message.Content, message.Value)
+if err != nil {
+    failure := SerializationError{
+        Error: fmt.Errorf("%w: %w", ErrSerialization, err),
+        Value: message.Value,
+    }
+    this.monitor.Track(failure)
+    panic(failure.Error) // contract violation (see commit 2b7c630): halt before
+                         // persistence so nothing is stored, acked, or dispatched.
+}
+message.ContentType = this.serializer.ContentType()
+
+

+ Note this also retires the awkward failure.Error = nil / failure.Value = nil + reset dance in the current loop — failure becomes a local inside the + error branch. +

+ +

Trade-offs

+
    +
  • Innocent bystanders. Other in-flight units + (and sibling messages within the failing unit) die unacked and are redelivered + after restart. At-least-once delivery already demands tolerance of this; it is + the same blast radius as a power failure.
  • +
  • Monitor flush. Track runs + synchronously before the panic, but if a monitor implementation buffers + asynchronously, the observation may be lost when the process dies. The panic + message and stack trace on stderr carry the same information, so the anomaly is + never silent.
  • +
  • Deliberate outage. The app crash-loops until + the domain bug is fixed and deployed. This is the accepted cost: stream + integrity outranks availability for a should-never-happen contract violation.
  • +
  • No escape hatch (yet). A configurable + Options.Fatal(func(error)) hook could replace the bare panic later + if an application ever needs graceful-drain-then-exit. Deliberately omitted now: + no known consumer needs it, and it would invite "handling" the unhandleable.
  • +
+ +

Implementation Checklist

+ +

Phase 1 — Red: capture fail-fast semantics in tests (agent)

+
    +
  • ☑ In 02_serialization_test.go, rewrite + TestSerializerErrorIsTracked as + TestSerializerErrorTracksThenPanics: arrange a unit of + [good, bad, good] messages with the bad one registered in + serializeFail; invoke Listen() synchronously + wrapped in a recover-capturing closure (input pre-loaded and closed).
  • +
  • ☑ Assert: the recovered value is an error satisfying + errors.Is(err, ErrSerialization) and wrapping the underlying + serializer error.
  • +
  • ☑ Assert: exactly one SerializationError was tracked + (with the offending Value) before the panic.
  • +
  • ☑ Assert: serialization stopped at the bad message + (serializeCalls contains the first good value and the bad value, not + the trailing good value).
  • +
  • ☑ Assert: the failing unit was never forwarded and the output channel + was closed (drain yields zero units).
  • +
  • ☑ Add TestUnitsBeforeFailureRemainForwarded: a fully good + unit followed by a failing unit — drain yields only the good unit; panic still + observed.
  • +
  • ☑ Run go test ./handlers/harness/ -run TestSerializationFixture | tail -50 + — confirm the new tests fail for the right reason (no panic occurs; unit is + forwarded today).
  • +
+ +

Phase 2 — Green: production change (your turn 🟠)

+
    +
  • ☑ Modify serialization.Listen in + 02_serialization.go to make the Phase 1 tests pass.
  • +
  • ☑ Run go test ./handlers/harness/ -run TestSerializationFixture | tail -50 + — confirm green.
  • +
+ +

Phase 3 — Documentation & sweep (agent)

+
    +
  • ☑ Update the package doc comment in config.go to state + the fail-fast contract: a serialization failure tracks a + SerializationError and panics; values supplied to the pipeline must + serialize (per commit 2b7c630).
  • +
  • ☑ Update the doc comment on SerializationError / + ErrSerialization in contracts.go to note the + observation immediately precedes a panic.
  • +
  • ☑ Grep for any other code or docs that assume serialization errors + flow through (e.g. empty-ContentType handling) and reconcile. + (Only match: the unrelated transport-level SerializationError in the + serialization package. Nothing to reconcile.)
  • +
  • ☑ Run make test | tail -50 — full suite green + (includes go mod tidy, go fmt, -race; + handlers/harness at 100% coverage).
  • +
+ +

Your Turn 🟠

+
+

+ Phase 2 is yours: the semantic heart of the change, sized at roughly ten lines in + 02_serialization.go. The failing tests from Phase 1 are your + specification. Guidance, not code: +

+
    +
  • Keep the loop structure and defer close(this.output) — + the deferred close is what lets the recovering test observe a closed output.
  • +
  • On error: build the SerializationError as a local, + Track it first, then panic with its (wrapped) error so + callers of recover() can use errors.Is(..., ErrSerialization).
  • +
  • On success: set message.ContentType as today.
  • +
  • The failure variable hoisted above the loop (and its + nil-reset lines) should disappear.
  • +
  • Replace the "big, fat, hairy deal" comment with one stating the + contract and the consequence (halt before persistence; nothing stored, acked, or + dispatched; MQ redelivers after restart).
  • +
+

+ If you'd rather I take Phase 2 as well, just say "skip the your-turn step." +

+
+ + + diff --git a/handlers/harness/02_serialization.go b/handlers/harness/02_serialization.go index f8da332..26bbe15 100644 --- a/handlers/harness/02_serialization.go +++ b/handlers/harness/02_serialization.go @@ -1,8 +1,6 @@ package harness -import ( - "fmt" -) +import "fmt" type serialization struct { monitor Monitor @@ -21,21 +19,19 @@ func newSerialization(monitor Monitor, enc serializer, input, output chan *unitO } func (this *serialization) Listen() { - var failure SerializationError - defer close(this.output) for unit := range this.input { for _, message := range unit.results { err := this.serializer.Serialize(message.Content, message.Value) if err != nil { - failure.Error = fmt.Errorf("%w: %w", ErrSerialization, err) - failure.Value = message.Value - this.monitor.Track(failure) // This would be a big, fat, hairy deal. It is the domain model's responsibility to only produce values that will marshal to JSON. - } else { - message.ContentType = this.serializer.ContentType() + failure := SerializationError{ + Value: message.Value, + Error: fmt.Errorf("%w: %w", ErrSerialization, err), + } + this.monitor.Track(failure) + panic(failure.Error) // The caller has failed to produce only values that will serialize successfully. } - failure.Error = nil - failure.Value = nil + message.ContentType = this.serializer.ContentType() } this.output <- unit } diff --git a/handlers/harness/02_serialization_test.go b/handlers/harness/02_serialization_test.go index c7bdcf9..9e654d3 100644 --- a/handlers/harness/02_serialization_test.go +++ b/handlers/harness/02_serialization_test.go @@ -129,28 +129,56 @@ func (this *SerializationFixture) TestSerializesEachResultValueIntoContent_Popul this.So(units[0].results[0].ContentType, should.Equal, "test/content-type") } -func (this *SerializationFixture) TestSerializerErrorIsTracked() { +func (this *SerializationFixture) listenAndRecover() (recovered any) { + defer func() { recovered = recover() }() + this.subject.Listen() + return nil +} + +func (this *SerializationFixture) TestSerializerErrorTracksThenPanics() { type TestMessage struct { Value string `json:"value"` } boom := errors.New("boom") - value := TestMessage{ + bad := TestMessage{ Value: "unserializable", } - this.serializeFail[value] = boom + this.serializeFail[bad] = boom unit := &unitOfWork{results: []*Message{ - {Value: value, Content: bytes.NewBuffer(nil)}, + {Value: "good-1", Content: bytes.NewBuffer(nil)}, + {Value: bad, Content: bytes.NewBuffer(nil)}, + {Value: "good-2", Content: bytes.NewBuffer(nil)}, }} this.input <- unit close(this.input) - go this.subject.Listen() + recovered := this.listenAndRecover() - units := this.drain() - this.So(len(units), should.Equal, 1) + this.So(recovered, should.NOT.BeNil) + err, isError := recovered.(error) + this.So(isError, should.BeTrue) + this.So(err, should.WrapError, ErrSerialization) + this.So(err, should.WrapError, boom) this.So(this.tracked, should.HaveLength, 1) - observation, ok := this.tracked[0].(SerializationError) - this.So(ok, should.BeTrue) + observation, isObservation := this.tracked[0].(SerializationError) + this.So(isObservation, should.BeTrue) this.So(observation.Error, should.WrapError, ErrSerialization) - this.So(observation.Value, should.Equal, value) + this.So(observation.Value, should.Equal, bad) + this.So(this.serializeCalls, should.Equal, []any{"good-1", bad}) + this.So(this.drain(), should.BeEmpty) // drain returning proves output was closed during unwind +} + +func (this *SerializationFixture) TestUnitsBeforeFailureRemainForwarded() { + boom := errors.New("boom") + this.serializeFail["bad"] = boom + this.input <- &unitOfWork{results: []*Message{{Value: "one", Content: bytes.NewBuffer(nil)}}} + this.input <- &unitOfWork{results: []*Message{{Value: "bad", Content: bytes.NewBuffer(nil)}}} + close(this.input) + + recovered := this.listenAndRecover() + + this.So(recovered, should.NOT.BeNil) + units := this.drain() + this.So(units, should.HaveLength, 1) + this.So(units[0].results[0].Content.String(), should.Equal, "encoded:one") } diff --git a/handlers/harness/config.go b/handlers/harness/config.go index 8ae7807..a100abe 100644 --- a/handlers/harness/config.go +++ b/handlers/harness/config.go @@ -15,6 +15,15 @@ // those retry loops abort when the context passed to New(ctx, ...) is cancelled, // so consumers must cancel it on shutdown to avoid hanging the drain. Custom // Writer and Dispatcher implementations must honor the context they are given. +// +// Values produced by registered domain types must serialize successfully — +// that is the calling application's contract. If the Serializer ever returns +// an error, the pipeline tracks a SerializationError observation and then +// panics, halting the process before the unit of work reaches persistence: +// nothing is stored, acked, or dispatched, and the message source redelivers +// after restart. The failure is deterministic, so the application crash-loops +// until the offending domain type is fixed. Messages already in the pipeline +// may not persist (requiring retry from the caller or redelivery from the MQ). package harness import (