diff --git a/Makefile b/Makefile index 9b27578..2e59c63 100755 --- a/Makefile +++ b/Makefile @@ -3,6 +3,12 @@ test: fmt go test -timeout=1s -short -race -covermode=atomic ./... +test.db: test + go test -timeout=30s -race -covermode=atomic github.com/smarty/messaging/v3/handlers/harness/sqladapter + +test.db.local: + (docker compose -f doc/docker-compose.yml up --wait && $(MAKE) test.db --no-print-directory); docker compose -f doc/docker-compose.yml down + fmt: go mod tidy && go fmt ./... diff --git a/doc/docker-compose.yml b/doc/docker-compose.yml new file mode 100644 index 0000000..7903147 --- /dev/null +++ b/doc/docker-compose.yml @@ -0,0 +1,13 @@ +services: + mysql: + image: mysql:8.0 + environment: + MYSQL_ALLOW_EMPTY_PASSWORD: 1 + MYSQL_DATABASE: messaging + ports: + - 3306:3306 + healthcheck: + test: "mysql -uroot -e 'select 1'" + interval: 2s + timeout: 20s + retries: 10 diff --git a/sqlmq/_schema_mysql.sql b/doc/mysql/schema.sql similarity index 100% rename from sqlmq/_schema_mysql.sql rename to doc/mysql/schema.sql diff --git a/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md b/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md new file mode 100644 index 0000000..2869fda --- /dev/null +++ b/doc/work-sessions/2026/2026-05-07_13-06-13-proposal-import-harness.md @@ -0,0 +1,291 @@ +--- +name: Import harness + sqladapter into messaging/v3 +description: Proposal to lift the infra/harness pipeline into github.com/smarty/messaging/v3/handlers/harness and relocate the SQL-bound dispatcher/writer/recovery as a sqladapter subpackage. +type: plot +--- + +# Proposal: Import `infra/harness` into `messaging/v3/handlers/harness` + +## Background + +A second project (working copy under `-context/domain-transformation-phase-9-chunk-C/code/infra`) has grown a staged, pipeline-based message-handling "harness" that we now want to promote into the shared `github.com/smarty/messaging/v3` module so it can be reused. The source split is: + +- `infra/harness/*` — a generic, store-and-forward pipeline built from goroutine stages connected by buffered channels. Stages: `Entrypoint → Execution → Serialization (fan-out) → Persistence → Completion → Broadcast → Terminal`. Supporting code: `fanout.go`, `pool.go`, `routing.go`, `scanner.go`. +- `infra/*` — supporting types and a **reference implementation** of the `Writer` / `Dispatcher` interfaces plus a `Recover` function, all coupled to the same MySQL `Messages` table that `sqlmq/_schema_mysql.sql` already defines (`id`, `dispatched`, `type`, `payload`). + +The target module (`github.com/smarty/messaging/v3`) already has a `handlers/` namespace (`multi`, `retry`, `sqltx`, `transactional`) and reusable pipeline/streaming infrastructure (`streaming/`, `batch/`). The harness belongs there as another composable handler pattern. + +Decisions locked in during /plot and /replot: + +1. **Scope:** bring in both the generic harness and the SQL-backed reference implementation, but **invert the nesting**: harness at `handlers/harness`, SQL reference impl at `handlers/harness/sqladapter`. +2. **Contracts:** reuse `messaging.Handler` and `messaging.Listener` from the root `contracts.go` instead of re-declaring them inside the harness package. +3. **Serialization:** the pipeline holds an unexported `serializer` collaborator with a `Serialize(out io.Writer, in any) error` signature (close to `encoding/json/v2`'s `MarshalWrite`); the caller supplies one. The `goexperiment.jsonv2` build tag is dropped. **No serializer implementations ship with this module** — callers own encoding. +4. **Wire-up:** `New(...)` uses the module's functional-options convention (`Options singleton` pattern — see `handlers/retry/config.go`, `batch/config.go`, `handlers/transactional/config.go`). +5. **Test library:** bring in `github.com/smarty/gunit/v2` alongside the existing v1 dep. Both major versions coexist until a later, separate module-wide upgrade to v2. + +## Approach + +### 1. Destination layout + +``` +github.com/smarty/messaging/v3/ +├── contracts.go (unchanged; we'll reuse Handler, Listener, ListenCloser) +├── handlers/ +│ ├── ...existing sub-packages... +│ └── harness/ (NEW — generic pipeline; only New + Options are exported) +│ ├── contracts.go (Writer, Dispatcher, Monitor — exported collaborator types; executor, applicator, serializer — unexported internals; event structs + error sentinels) +│ ├── config.go (New(...) + Options singleton + option func + nop) +│ ├── message.go (Message value type — lifted from infra/message.go) +│ ├── pipeline.go (unexported build(ctx, cfg) wiring) +│ ├── 00_entrypoint.go / _test.go (unexported entrypoint type + newEntrypoint) +│ ├── 01_execution.go / _test.go (unexported execution + newExecution) +│ ├── 02_serialization.go / _test.go (unexported serialization + newSerialization; no build tag) +│ ├── 03_persistence.go / _test.go (unexported persistence + newPersistence) +│ ├── 04_completion.go / _test.go (unexported completion + newCompletion) +│ ├── 05_broadcast.go / _test.go (unexported broadcast + newBroadcast) +│ ├── 06_terminal.go / _test.go (unexported terminal + newTerminal) +│ ├── fanout.go (unexported fanIn + newFanIn + newFanOut) +│ ├── pool.go +│ ├── routing.go / routing_test.go (unexported router + newRouter) +│ ├── scanner.go +│ └── pipeline_test.go +│ └── sqladapter/ (NEW — SQL reference impl) +│ ├── dispatcher.go / _test.go +│ ├── writer.go / _test.go +│ ├── recovery.go / _test.go +│ └── contracts.go (Logger interface) +``` + +### 2. Contract changes + +The current `infra/harness/contracts.go` declares: + +```go +type Handler interface{ Handle(context.Context, ...any) } +type Listener interface{ Listen() } +``` + +These are structurally identical to the ones in `messaging/contracts.go`, so we delete the local versions and have `harness.New(...)` return `messaging.Handler` + `[]messaging.Listener`. The `entrypoint`'s `Close() error` + `Listen()` also already satisfies `messaging.ListenCloser`. + +The package's collaborator interfaces split into two groups: + +- **Exported** (caller supplies a real implementation via `Options.*`): `Writer`, `Dispatcher`, `Monitor`. These are the external boundaries of the pipeline. +- **Unexported** (internal to the package): `executor`, `applicator`, `serializer`, plus every pipeline-stage struct (`entrypoint`, `execution`, `serialization`, `persistence`, `completion`, `broadcast`, `terminal`), the `router`/`newRouter`, and the `fanIn`. The only way into the package is `New(...)` + `Options.*`. `Executor` and `Applicator` get unexported because callers never instantiate them directly — the pipeline discovers them reflectively via `router` from the domain types supplied through `Options.Types(...)`. + +The pluggable encoder lives as an unexported interface: + +```go +// serializer encodes a single message's value onto the supplied writer. The +// signature intentionally mirrors encoding/json/v2's MarshalWrite. Implementations +// must be safe for concurrent use — the pipeline runs multiple serialization workers. +type serializer interface { + Serialize(out io.Writer, in any) error +} +``` + +The `serialization` stage stops importing any encoder directly; its constructor takes a `serializer`: + +```go +func newSerialization(monitor Monitor, enc serializer, input, output chan *unitOfWork) *serialization +``` + +**No serializer implementations ship with this module.** Callers provide their own (for example, a thin wrapper around `encoding/json/v2`'s `MarshalWrite`, `encoding/json`'s `NewEncoder(w).Encode(v)`, protobuf, etc.). Keeping the type unexported forces callers to supply the collaborator through the functional option (see §8) rather than constructing a stage directly. + +### 3. Events and monitor + +The `Monitor` interface and its event types (`BatchInFlight`, `BatchComplete`, `UnitOfWorkInFlight`, `UnitOfWorkComplete`, `SerializationError`, `PersistenceError`, `BroadcastError`) move from `infra/contracts.go` to `handlers/harness/contracts.go` — all exported so callers can type-switch in their `Monitor.Track(any)` implementation. The error sentinels (`ErrSerialization`, `ErrPersistence`, `ErrBroadcast` — renamed from `ErrJSONSerialization` since we're format-agnostic) move with them. `Logger` is no longer needed inside `harness` itself (only `sqladapter` uses it); it moves into `sqladapter/contracts.go`. + +### 4. `Message` type placement + +`infra.Message` becomes `harness.Message` at `handlers/harness/message.go`. Its fields (`ID`, `Value`, `Type`, `Content`, `ContentType`, `ContentEncoding`, `Stored`, `Dispatched`) stay intact. The SQL adapter references `harness.Message` rather than duplicating. + +### 5. SQL adapter (`handlers/harness/sqladapter`) + +Direct port of `infra/dispatcher.go`, `infra/writer.go`, `infra/recovery.go`, with these edits: + +- `package infra` → `package sqladapter`. +- Import path `root/code/infra` (implied reference) drops; cross-package uses switch to `github.com/smarty/messaging/v3/handlers/harness`. +- `messaging.Connector` / `messaging.Dispatch` references resolve against the **same** module now (no path change needed — they already import `github.com/smarty/messaging/v3`). +- Package doc comment labels it as a reference implementation that targets the `Messages` table defined by `sqlmq/_schema_mysql.sql` in this same module (columns `id`, `dispatched`, `type`, `payload`). That schema coupling is not new — `sqlmq/dispatch_store.go` already writes to the same table. The adapter is simply a second reader/writer of the same schema. +- Preserve the existing `TODO` comments (double-encoding note in `dispatcher.go`, pagination note in `recovery.go`) — those are known issues, not something this import should silently fix. +- Keep `legacyWrite func(context.Context, *sql.Tx, ...any)` escape hatch on `Writer`, but document it as deprecated in the package comment. + +### 6. Test infrastructure + +The source tests use **gunit v2** (`github.com/smarty/gunit/v2`) along with `assert/better` + `assert/should`. The target module currently pins **gunit v1** (`github.com/smarty/gunit v1.6.0`). + +**Plan:** add `github.com/smarty/gunit/v2` to `go.mod` and port the tests with their v2 imports intact. The module will temporarily depend on both major versions. A later, separate initiative will migrate the rest of the module's tests from v1 to v2, at which point v1 can be dropped — that migration is out of scope for this import. + +If any imported test uses assertions that don't exist in v2 (or have different semantics), adapt as minimally as possible; do not rewrite assertions that already work. + +### 7. `go.mod` updates + +- Add `github.com/smarty/gunit/v2` (latest compatible version). Both v1 and v2 will be present until the future module-wide migration. +- `go.uber.org/goleak` is in source `go.sum` — confirm during import whether any ported test actually uses it; add only if needed. +- No other new dependencies. `io` and (if caller demonstrations are ever added in docs) stdlib-only. + +### 8. Functional-options wire-up + +The existing `New(ctx, monitor, executor, writer, dispatcher)` signature is replaced with the module's functional-options pattern. **Only `ctx` is positional**; every collaborator — including the domain types that drive routing — flows through options. The source `Executor`/`Applicator` interfaces become unexported (`executor`, `applicator`); the reflective `router` discovers real implementations from the domain types supplied via `Options.Types(...)` and the pipeline never asks the caller for an `Executor` directly. Following the convention in `handlers/retry/config.go` and `batch/config.go`: + +```go +// handlers/harness/config.go +package harness + +import ( + "context" + "io" + + "github.com/smarty/messaging/v3" +) + +func New(ctx context.Context, options ...option) (messaging.Handler, []messaging.Listener) { + var cfg configuration + for _, apply := range Options.defaults(options...) { + apply(&cfg) + } + return build(ctx, cfg) // internal wiring lives in pipeline.go +} + +var Options singleton + +type singleton struct{} +type option func(*configuration) + +type configuration struct { + Monitor Monitor + Serializer serializer + Writer Writer + Dispatcher Dispatcher + Types []any // domain types (handlers/applicators); passed to newRouter + BatchCapacity int // channel buffer, default 1024 + UnitSize int // max completions per unit, default 64 + SerializerCount int // fan-out worker count, default 4 +} + +// Types registers the domain objects whose Execute.../Apply... methods drive +// the pipeline. They are passed verbatim to newRouter(...) at build time. +func (singleton) Types(value ...any) option { return func(c *configuration) { c.Types = value } } +func (singleton) Monitor(value Monitor) option { return func(c *configuration) { c.Monitor = value } } +func (singleton) Serializer(value serializer) option { return func(c *configuration) { c.Serializer = value } } +func (singleton) Writer(value Writer) option { return func(c *configuration) { c.Writer = value } } +func (singleton) Dispatcher(value Dispatcher) option { return func(c *configuration) { c.Dispatcher = value } } +func (singleton) BatchCapacity(value int) option { return func(c *configuration) { c.BatchCapacity = value } } +func (singleton) UnitSize(value int) option { return func(c *configuration) { c.UnitSize = value } } +func (singleton) SerializerCount(value int) option { return func(c *configuration) { c.SerializerCount = value } } + +func (singleton) defaults(options ...option) []option { + var blank = nop{} + return append([]option{ + Options.Monitor(blank), + Options.Serializer(blank), + Options.Writer(blank), + Options.Dispatcher(blank), + Options.BatchCapacity(1024), + Options.UnitSize(64), + Options.SerializerCount(4), + }, options...) +} + +// nop satisfies every collaborator interface so New(...) can be called with +// zero options and still produce a runnable (if inert) pipeline. Callers +// override whichever collaborators they care about via Options.*. +type nop struct{} + +func (nop) Track(any) {} +func (nop) Serialize(io.Writer, any) error { return nil } +func (nop) Write(context.Context, ...any) error { return nil } +func (nop) Dispatch(context.Context, ...any) error { return nil } +func (nop) Execute(any, func(...any)) {} +``` + +Design notes: + +- `ctx` stays positional because the pipeline's lifetime is tied to it (see Trade-offs); everything else is an option. +- `Options.Types(...)` is the single entry point for registering domain behavior. `build(ctx, cfg)` constructs the internal router via `newRouter(cfg.Types...)` and passes it into the `execution` stage as its `executor` collaborator. Callers never hold a reference to the router. +- `Monitor`, `Serializer`, `Writer`, and `Dispatcher` all default to a shared `nop{}` implementation. A caller can invoke `New(ctx)` with zero options and get a runnable, inert pipeline; real deployments override whichever collaborators they care about via `Options.*`. This matches the defaulting style in `handlers/retry` and `handlers/transactional` (where `Logger` and `Monitor` default to nop). +- Buffer/worker tunables, previously hard-coded, are now options with the existing values as defaults. +- `build(ctx, cfg)` internally performs the current `New(...)`'s channel wiring; it lives in `pipeline.go` and stays unexported. +- Every pipeline-stage struct and its constructor is unexported (`entrypoint`/`newEntrypoint`, `execution`/`newExecution`, `serialization`/`newSerialization`, `persistence`/`newPersistence`, `completion`/`newCompletion`, `broadcast`/`newBroadcast`, `terminal`/`newTerminal`, `fanIn`/`newFanIn`/`newFanOut`, `router`/`newRouter`). The only way into the package is `New(...)` + `Options.*`. + +### 9. Alternatives considered + +- **Flat layout, no sqladapter split.** Rejected: conflates a generic pipeline with a schema-coupled reference impl; callers targeting a different schema would be forced to copy-paste rather than compose. +- **Top-level `/harness` package (sibling to `handlers/`).** Rejected: the user prefers nesting under `handlers/` where similar composable handler patterns live. +- **Absorb `Message`, `Monitor`, events into the root `messaging` package.** Rejected: these are harness-specific concepts (the root `Delivery`/`Dispatch` types are the message abstraction for the rest of the module); promoting them would blur the boundary. +- **Keep `jsonv2` build tag.** Rejected in favor of pluggable serialization. A caller who wants jsonv2 writes a short adapter; the pipeline stays encoder-agnostic. +- **Ship a JSON serializer helper subpackage.** Rejected per /replot: this module deliberately provides no serializer implementations. Callers own encoding. +- **Positional-argument constructor.** Rejected in favor of functional options for consistency with `retry`, `batch`, and `transactional`. +- **Expose stage types / `Executor` / `Router`.** Rejected: callers have no reason to construct a single stage or wire their own router. Keeping everything except `New`, `Options`, `Message`, the collaborator interfaces, and the event/error types unexported narrows the supported surface and lets us refactor internals without breaking users. +- **Positional `executor` argument.** Rejected: the source's `Executor` interface is really just "anything whose `Execute...(msg, broadcast)` methods the reflective router can see." Exposing a positional `Executor` parameter implied that callers build their own; `Options.Types(...)` more honestly describes the actual input (a list of domain objects) and lets the router stay private. + +## Trade-offs & Risks + +- **Context handling.** `Persistence` and `Broadcast` both capture the `ctx` supplied to `New(...)` and use it for every downstream Write/Dispatch call — so the pipeline's lifetime is tied to a single root context. This matches the source but is worth confirming explicitly; if the v3 conventions expect per-handle contexts to flow through, the stage signatures need adjustment. **Open question** for the user — proposal currently preserves source behavior. +- **Retry-forever semantics.** Persistence and Broadcast retry indefinitely with a 1-second sleep (and TODOs for exponential backoff). Imported as-is. `handlers/retry` already exists in the target module and uses more sophisticated logic; over time we may want to delegate to it, but not in this import. +- **Channel capacity defaults.** Back-pressure into the entrypoint's `Handle` (which holds a read lock while sending) can block callers under heavy load. Now tunable via `Options.BatchCapacity`, but the default of 1024 should be documented with this characteristic. +- **`reflect`-based routing.** The internal router's `Execute` mutates a shared `exclusions` slice without locking — fine in the current pipeline (routing runs inside `execution` which a single goroutine calls per message). Because the router is now unexported and constructed once per `New(...)` call, the sharing concern goes away. Add a doc comment on the internal type anyway so future contributors don't regress this invariant. +- **Dual gunit majors.** Temporarily carrying both `github.com/smarty/gunit` and `github.com/smarty/gunit/v2` increases `go.sum` noise and means new tests in *this* import use v2 while older tests elsewhere in the module still use v1. The divergence is explicit and scoped; the follow-up module-wide upgrade resolves it. +- **Silent-by-default collaborators.** Because `Serializer`, `Writer`, and `Dispatcher` all default to a `nop{}` that returns `nil`, forgetting to wire a real implementation produces a pipeline that silently drops work rather than surfacing an error. Likewise, forgetting `Options.Types(...)` produces a pipeline whose router has no registered handlers. Callers must remember to set these. Mitigate by documenting this in the package doc comment and the `New(...)` godoc, and by making the `Options.*` entries the obvious next step in any example. + +## Implementation Checklist + +### Phase 1: Scaffolding and contracts + +- [x] Create directory `handlers/harness/` and `handlers/harness/sqladapter/`. +- [x] Add `github.com/smarty/gunit/v2` to `go.mod`; run `go mod tidy`. (Temporarily removed by tidy with no importers; will return automatically once Phase 2 tests are ported.) +- [x] Write `handlers/harness/contracts.go` with the **exported** surface (`Writer`, `Dispatcher`, `Monitor`, event structs `BatchInFlight`, `BatchComplete`, `UnitOfWorkInFlight`, `UnitOfWorkComplete`, `SerializationError`, `PersistenceError`, `BroadcastError`, sentinels `ErrSerialization`, `ErrPersistence`, `ErrBroadcast`) plus the **unexported** internal interfaces (`executor`, `applicator`, `serializer`) and value types (`batch`, `unitOfWork`). +- [x] Write `handlers/harness/message.go` — copy `Message` struct with doc comments intact. +- [x] Write `handlers/harness/pool.go` — copy verbatim (no external deps). +- [x] Write `handlers/harness/scanner.go` — copy verbatim (signatures adjusted to use unexported `executor`/`applicator`). +- [x] Write `handlers/harness/fanout.go` — `fanIn` / `newFanIn` / `newFanOut` stay unexported; `stationFactory` now returns `messaging.Listener` and uses unexported `unitOfWork`. +- [x] Run `make compile` — confirm the package compiles (no tests yet). + +### Phase 2: Port stages bottom-up, TDD each one + +Work stage-by-stage from the terminal stage (simplest) upward, since downstream stages have no dependencies on upstream stages. All tests use `gunit/v2` imports as-is from source. **All stage types and constructors are renamed to unexported forms** (`Terminal` → `terminal`, `NewTerminal` → `newTerminal`, etc.); since the tests live in the same package, `_test.go` files can see them. + +- [x] Copy `06_terminal_test.go` from source; rename referenced types to lowercase. Run the terminal test — expect **failure** (`terminal` type doesn't exist yet in this package). +- [x] Port `06_terminal.go` as `terminal` / `newTerminal`. Run tests; confirm passing. +- [x] Copy `04_completion_test.go`; lowercase the type references. Run — expect failure. +- [x] Port `04_completion.go` as `completion` / `newCompletion`. Run tests; confirm passing. +- [x] Copy `05_broadcast_test.go`; lowercase the type references. Run — expect failure. +- [x] Port `05_broadcast.go` as `broadcast` / `newBroadcast`. Run; confirm passing. +- [x] Copy `03_persistence_test.go`; lowercase the type references. Run — expect failure. +- [x] Port `03_persistence.go` as `persistence` / `newPersistence`. Run; confirm passing. +- [x] **Rewrite** `02_serialization_test.go` to use a fake `serializer` (not jsonv2). Test should cover: success path writes to `message.Content`; serializer error is reported via monitor as `SerializationError` with `ErrSerialization` wrapped. Run — expect failure. +- [x] Port `02_serialization.go` as `serialization` / `newSerialization` with the new `Serialize(io.Writer, any) error` signature; drop the `//go:build goexperiment.jsonv2` tag. Run; confirm passing. +- [x] Copy `01_execution_test.go`; lowercase the type references. Run — expect failure. +- [x] Port `01_execution.go` as `execution` / `newExecution`. Run; confirm passing. +- [x] Copy `00_entrypoint_test.go`; lowercase the type references. Confirm it exercises `Close` + `Listen` semantics. Run — expect failure. +- [x] Port `00_entrypoint.go` as `entrypoint` / `newEntrypoint`. Run; confirm passing. + +### Phase 3: Routing, pipeline, and functional-options config + +- [x] Copy `routing_test.go`; lowercase references (`Router` → `router`, `NewRouter` → `newRouter`). Run — expect failure. +- [x] Port `routing.go` as `router` / `newRouter` with unexported `executor` / `applicator` interfaces; update `scanner.go`'s signature to match. Run; confirm passing. +- [x] Write `handlers/harness/pipeline.go` as the unexported `build(ctx, cfg)` function; it constructs all the channels, calls `newRouter(cfg.Types...)`, wires every stage, and returns `messaging.Handler` + `[]messaging.Listener`. +- [x] Write `handlers/harness/config.go` with `New(ctx, options...)` (no positional executor), `Options singleton`, `option` type, `configuration` struct (including `Types []any`), and per-option setters per §8, including `Options.Types(...)`. +- [x] Write a `config_test.go` that asserts defaults (`BatchCapacity=1024`, `UnitSize=64`, `SerializerCount=4`, and that `Monitor`, `Serializer`, `Writer`, and `Dispatcher` all default to the shared `nop{}` and behave inertly when the pipeline runs with no options supplied). Also assert that `Options.Types(...)` populates `configuration.Types` verbatim. +- [x] Adapt the source `pipeline_test.go` to call the new functional-options `New(...)`: the fixture registers itself via `Options.Types(this)` (it implements the `Execute...` method), and supplies fake `Writer`/`Dispatcher`/`serializer`/`Monitor` via `Options.*`. Run — expect failure if anything is still misaligned, then make green. +- [x] Run the full harness test suite with `-race`; confirm no goroutine leaks and all tests pass. + +### Phase 4: SQL adapter + +- [x] Copy `infra/dispatcher_test.go` to `handlers/harness/sqladapter/dispatcher_test.go`. Rewrite imports (`package infra` → `sqladapter`, `infra.Message` → `harness.Message`; gunit imports stay v2). Also added `testdb_test.go` with local `openTestDatabase`/`ensureDatabaseReadiness` helpers (pointing at `sqlmq/_schema_mysql.sql`) since the source's `db-connector` dep is out of scope for this module. Run — expect failure. +- [x] Port `dispatcher.go` to `sqladapter/dispatcher.go`. Add package-level doc comment labeling it as a reference implementation targeting the `Messages` table defined by `sqlmq/_schema_mysql.sql`. Preserve existing TODOs. Run; confirm passing. +- [x] Copy `writer_test.go` → `sqladapter/writer_test.go`, rewrite imports. Replaced the external `billing` registry types and `openTestDatabase` dep with local test-only `orderReceived`/`orderApproved` structs and shared helpers from `testdb_test.go`. Run — expect failure. +- [x] Port `writer.go`. Preserve `legacyWrite` escape hatch with a deprecation note in the godoc. Run; confirm passing. +- [x] Copy `recovery_test.go` → `sqladapter/recovery_test.go`, rewrite imports. Run — expect failure. +- [x] Port `recovery.go`. Preserve existing TODOs about pagination and Listener conversion. Run; confirm passing. +- [x] Add `sqladapter/contracts.go` with the `Logger` interface (moved from `infra/contracts.go`). (Added earlier alongside dispatcher.go so it would compile.) + +### Phase 5: Module hygiene + +- [x] Run `make test` — full module test suite with `-race -covermode=atomic`. Confirm green. (All packages pass; harness 99.5%, sqladapter 0% under `-short` since its integration tests require a live MySQL — they pass end-to-end against a local DB when run without `-short`.) +- [x] Run `go mod tidy`. Confirms two new direct deps: `github.com/smarty/gunit/v2` (planned) and `github.com/go-sql-driver/mysql` (test-only driver added during Phase 4 when we chose not to bring in `db-connector` — imported via `testdb_test.go`'s `_` alias, so it participates only in test builds). One new indirect: `filippo.io/edwards25519` (transitively required by the MySQL driver). +- [x] Inspect `go.sum` diff — only `filippo.io/edwards25519` and gunit/v2 additions, both explained by the items above. +- [x] Grep the new packages for references to `root/code/infra` — confirm zero. +- [x] Grep the new packages for any `goexperiment.jsonv2` build tags — confirm zero. +- [x] Add short package-level doc comments (`// Package harness provides a staged, store-and-forward message-handling pipeline...` etc.) on `harness` and `sqladapter`. (`harness` on config.go, `sqladapter` on dispatcher.go.) +- [x] Self-review diff for any stray `package infra` / wrong package declarations, unused imports, and leftover `TODO: pool ...` comments that should stay vs. be addressed now (keep them — they're load-bearing signals for future work). \ No newline at end of file diff --git a/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg b/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg new file mode 100644 index 0000000..0f4ff08 --- /dev/null +++ b/doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg @@ -0,0 +1,236 @@ + + + + + + + + + + + + + + + + + + + + Caller + MQ/cron: Handle(ctx, messages...) + HTTP: await(ctx, message) via AsHTTPHandler + + + + + + Admission + pre-flight admit() gate + 503 + tracks LoadShed + + + + + + + + + 00 entrypoint + Handle (void) / await (void, ctx-honoring) + admit() watermark gate; wraps msgs → *batch + tracks BatchInFlight / CallerDeparted + + + + + + batches + chan *batch (cap BatchCapacity) + + + + + + 01 execution + router.Execute(msg, broadcast) + accumulates *Message → *unitOfWork + + + + + + work1 + chan *unitOfWork (cap UnitCapacity) + + + + + + 02 serialization + Serializer.Serialize(out, value) + fan-out workers + fanIn (not shown) + + + + + + work2 + chan *unitOfWork (cap UnitCapacity) + + + + + + 03 persistence + Writer.Write(ctx, msgs...) + retry forever w/ sleep + + + + + + work3 + chan *unitOfWork (cap UnitCapacity) + + + + + + 04 completion + invokes per-batch complete() + tracks BatchComplete + + + + + + work4 + chan *unitOfWork (cap UnitCapacity) + + + + + + 05 broadcast + Dispatcher.Dispatch(ctx, msgs...) + retry forever w/ sleep + + + + + + work5 + chan *unitOfWork (cap UnitCapacity) + + + + + + 06 terminal + drains the final channel + + + + + + + + + + + + + + + + + + + + + + + + + router + Execute*/Apply* methods + discovered from Options.Types + + + + + + Serializer + caller-supplied + Serialize(io.Writer, any) error + + + + + + Writer + caller-supplied + Write(ctx, ...any) error + + + + + + Dispatcher + caller-supplied + Dispatch(ctx, ...any) error + + + + + + + + + + + + + + + Monitor + Track(observation) + Batch{In,Complete} + UnitOfWork{In,Complete} + LoadShed / CallerDeparted + SerializationError + PersistenceError + BroadcastError + observed by all stages + + + + + + + + + + + + + + + complete() releases caller's WaitGroup + + + batches cap = config.BatchCapacity (default 1024); work1–work5 + fan-out cap = config.UnitCapacity (default 1) + diff --git a/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md new file mode 100644 index 0000000..3a41fbf --- /dev/null +++ b/doc/work-sessions/2026/2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md @@ -0,0 +1,669 @@ +--- +name: Harness pipeline resilience (module-local changes) +description: Implement the messaging/v3 harness side of the cross-repo "harness resilience and idempotency" proposal — a void context-honoring HTTP entrypoint (unexported await) alongside today's Handle, a pre-flight admission gate (unexported admit) plus an in-module net/http shedding middleware, a thin AsHTTPHandler decorator so HTTP shells need no changes, and split channel-buffer sizing (BatchCapacity vs UnitCapacity). Excludes per-service route wireup and post-deploy observation, which belong to the consuming repos. +type: plot +--- + +# Proposal: Harness Pipeline Resilience — Module-Local Changes + +## Background + +The cross-repo proposal at +`billing-context/.../2026-05-15_23-02-32-proposal-harness-resilience-and-idempotency.md` +describes three operational changes layered on top of Chunk C of the incremental +domain transformation. The bulk of those changes live entirely inside this +module (`github.com/smarty/messaging/v3`); the only per-consumer work that +remains is the mechanical wrapping of each mutating route, which lives in the +~10 `*-context` services and is out of scope here. + +This proposal scopes the work to just the parts that land in *this* repo: + +- All edits under `handlers/harness/` (entrypoint, contracts, config, pipeline, + fanout, the new admission middleware + decorator, and their tests). +- A documentation update to `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` + to reflect the split capacity knobs, the admission gate, and the new monitor + observations. + +The per-service route wireup (calling `AsHTTPHandler` and `Admission` from each +service's routes table), integration tests, and post-deploy observation steps +from the parent proposal happen in each consuming repo *after* this repo's +changes merge and a tagged version of `messaging/v3` is published. They are +explicitly **out of scope** for this proposal. + +### Why the changes are needed (recap) + +Two operational concerns surfaced after Chunks A and B of the incremental +domain transformation merged: + +1. **HTTP requests stack up indefinitely during a database/RabbitMQ outage.** + `entrypoint.Handle` blocks on a per-call `sync.WaitGroup` until the + Completion stage fires; the caller's `context.Context` is captured into + `*batch` but never observed. Even when the HTTP client's deadline passes or + the load balancer cancels the request, the goroutine stays parked. +2. **The pipeline's six channels are all sized to one knob (`BatchCapacity`, + default 1024).** During an outage that lets tens of thousands of in-memory + domain mutations sit between Apply and durable storage. The in-memory + `Domain` drifts far ahead of what was ever stored, surfacing as "we said yes + to the client, then forgot" on restart. + +### Why a *void* HTTP entrypoint (the central design constraint) + +An earlier draft gave the HTTP entrypoint the signature +`HandleResult(ctx, message any) HandleOutcome`, returning an enum the caller +mapped to `503`/`504`. That return value would force **every** mutating HTTP +route to branch on the outcome — and to add the corresponding test cases to +maintain coverage. Across the ~10 `*-context` services there are **70+ +mutating routes**; replicating outcome-mapping logic (and its tests) at each +one is exactly the kind of accumulated, duplicated branching we want to avoid. + +Two observations dissolve the need for a return value: + +- **Shedding is a *pre-flight* decision, not a *result*.** It can be decided + before the route handler ever runs. A small `admit() bool` predicate plus a + single HTTP middleware (written once, in this module) writes the `503` and + short-circuits the route. The route handler — and its tests — never see it. +- **A departed caller does not need a status override.** When the caller's + `ctx` fires mid-flight, the goroutine simply unblocks (fixing the leak in + concern #1) and the route writes its normal response to a client that is + already gone. No `504`, no buffering of the response, no extra machinery. The + in-flight batch keeps processing and durably stores regardless. + +That leaves the HTTP entrypoint **void**, identical in spirit to today's +`Handle`. The harness-side fix has these independently-mergeable pieces: + +1. **A void, context-honoring HTTP entrypoint** — an unexported + `await(ctx, message any)` alongside the existing + `Handle(ctx, messages ...any)`. `await` honors `ctx.Done()`, processes + exactly one message, emits a `CallerDeparted` observation when the caller + leaves, and returns nothing. +2. **Pre-flight admission** — an unexported `admit() bool` predicate on the + entrypoint (high-watermark check against the `batches` channel, used only by + the in-package middleware) and an in-module `Admission` HTTP middleware that + writes a `503` (inline, raw `net/http`) when the gate refuses. A thin + `AsHTTPHandler` decorator adapts `await` to the `messaging.Handler` interface + the HTTP shells already depend on, so neither the shells nor their tests + change. +3. **Split channel-buffer sizing** — `BatchCapacity` continues to size the + caller-side `batches` channel; a new `UnitCapacity` (default 1) sizes + `work1`–`work5` and the per-worker fan-out outputs. + +The companion `AdjustOrder` domain idempotency change (separate proposal, +already merged in the consuming repos) makes it safe for `await` to return +early when the caller's `ctx` fires: the in-flight batch keeps processing and +durably stores, and a client retry collapses to a no-op once the original batch +has persisted. + +## Approach + +### Decision summary + +Three methods on `*entrypoint` (two of them unexported), plus two thin +module-local adapters that are the *only* new exported surface: + +| Method | Visibility | Caller | Honors `ctx.Done()` | Sheds | Return | Arity | +|-------------------------------------|------------|----------------------|---------------------|---------|--------|-------------| +| `Handle(ctx, messages ...any)` | exported | MQ, cron | No | No | none | variadic | +| `await(ctx, message any)` | in-package | HTTP (via adapter) | Yes | No | none | exactly one | +| `admit() bool` | in-package | HTTP middleware | n/a | Decides | `bool` | n/a | + +| Adapter (this module, exported) | Role | +|------------------------------------------------------------|--------------------------------------------------------------------------------------------| +| `AsHTTPHandler(messaging.Handler) messaging.Handler` | Wraps the handler's `await` so HTTP shells keep depending on `messaging.Handler` (zero shell change) | +| `Admission(messaging.Handler, http.Handler) http.Handler` | Pre-flight gate; writes inline `503` when `admit()` is false | + +`await`, `admit`, and the `awaiter`/`admitter` interfaces the adapters assert +against are all **unexported**. Because both the middleware and the decorator +live in this package, the consumer never names them — `AsHTTPHandler(handler)` +and `Admission(handler, shell)` are the entire integration vocabulary, and both +take/return the standard `messaging.Handler`/`http.Handler` types. + +`await` takes a single `message any` (not variadic) because every HTTP route in +every consuming service invokes the domain with exactly one command per +request. Constraining the signature here: + +- Eliminates the empty-slice / multi-message edge cases on the HTTP path. +- Tightens the worst-case in-memory work-in-progress bound: combined with + `UnitCapacity=1`, each in-flight HTTP request contributes exactly one input + message to a batch. The `batches` channel capacity now corresponds directly + to a count of HTTP commands enqueued rather than a count of caller + invocations of arbitrary size. +- Surfaces the asymmetry plainly — MQ/cron may legitimately deliver multiple + events per call (broker batch deliveries); HTTP does not. + +Two new monitor observations: + +- `LoadShed{}` — emitted by `admit()` when it refuses on the high-watermark + check. (Refusal because the pipeline is `closed` is shutdown, not load, and + emits nothing.) +- `CallerDeparted{}` — emitted by `await` when the caller's `ctx` fired before + completion (whether during enqueue or during the wait). + +Two new configuration options with defaults: + +- `Options.UnitCapacity(int)` — default `1`. Sizes `work1`–`work5` and the + per-worker fan-out outputs. +- `Options.ShedThreshold(float64)` — default `0.80`. Fraction of the `batches` + channel capacity at or past which `admit()` refuses. + +### Detailed design + +#### 1. Shared helpers (pure refactor of today's `Handle`) + +Today's `00_entrypoint.go:29` has a single `Handle` whose body inlines waiter +acquisition, batch allocation, completion-callback wiring, the +admission-under-RWMutex sequence, and the wait. The split extracts three +private helpers shared by `Handle` and `await`. `prepare` keeps its variadic +`...any` shape so `Handle` passes its argument through verbatim; `await` calls +`prepare(ctx, message)` with its single message, which Go promotes to a +one-element slice at the call site. + +```go +// prepare acquires a waiter, allocates a *batch from the pool, and wires up +// the completion callback. Returns the items the caller will need. +func (this *entrypoint) prepare(ctx context.Context, messages ...any) (waiter *sync.WaitGroup, item *batch) { + waiter = this.waiters.Get() + waiter.Add(1) + item = this.batches.Get() + item.ctx = ctx + item.messages = messages + item.complete = func() { + waiter.Done() + this.monitor.Track(batchComplete) + this.batches.Put(item) + } + return waiter, item +} + +// abandon releases waiter and pool entry when the item was never enqueued +// (i.e. complete() will never fire). +func (this *entrypoint) abandon(waiter *sync.WaitGroup, item *batch) { + waiter.Done() + this.batches.Put(item) +} + +// waiterDone wraps waiter.Wait() in a chan struct{} so it's select-able. +func (this *entrypoint) waiterDone(waiter *sync.WaitGroup) (done chan struct{}) { + done = make(chan struct{}) + go func() { waiter.Wait(); close(done) }() + return done +} +``` + +#### 2. Path A — `Handle` (MQ and cron): preserves today's contract verbatim + +```go +func (this *entrypoint) Handle(ctx context.Context, messages ...any) { + waiter, item := this.prepare(ctx, messages...) + defer this.waiters.Put(waiter) + + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + this.abandon(waiter, item) + return + } + this.work <- item + this.monitor.Track(batchInFlight) + this.lock.RUnlock() + + waiter.Wait() +} +``` + +Properties: +- **No `ctx.Done()` honoring.** MQ deliveries don't carry a client deadline; + cron has its own scheduler-level guard. Returning early would cause + `streaming` to ack work the pipeline never finished. +- **No load-shed.** Sending to `this.work` is a blocking channel send. + Back-pressure propagates to the broker via prefetch limits and unacked + counts. +- **The only "shed" condition is pipeline shutdown** (`this.closed`) — exactly + today's behavior. + +#### 3. Path B — `await` (HTTP): void, context-honoring, single message + +```go +func (this *entrypoint) await(ctx context.Context, message any) { + waiter, item := this.prepare(ctx, message) + defer this.waiters.Put(waiter) + + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + this.abandon(waiter, item) + return + } + select { + case this.work <- item: + this.monitor.Track(batchInFlight) + this.lock.RUnlock() + case <-ctx.Done(): + this.lock.RUnlock() + this.abandon(waiter, item) + this.monitor.Track(callerDeparted) + return + } + + select { + case <-this.waiterDone(waiter): + case <-ctx.Done(): + this.monitor.Track(callerDeparted) + } +} +``` + +Properties: +- **Void.** No outcome to return — shedding is handled pre-flight by the + middleware (see §4), and a departed caller just unblocks. +- **Single message per call.** +- **Honors `ctx.Done()` in both the enqueue and the wait.** This is what fixes + the indefinite-stacking concern: an HTTP request whose client deadline passes + (or whose load balancer cancels) unblocks promptly instead of parking. +- **No hard-full backstop.** The ctx-honoring send already bounds the enqueue + wait, so there is no need for a non-blocking `default` arm — and dropping it + avoids a silent post-admit shed that would leave `command.Result` zero and + cause the shell to emit a wrong status (e.g. 404). `admit()` is the watermark + gate; the ctx-honoring send is the backstop. + +**Pool-entry lifecycle:** +- Success / wait-departed path: the batch was enqueued, so the pipeline owns it + and will invoke `item.complete()` (which `Put`s it). `await` must **not** + `Put` — the pool would receive the same item twice. (On wait-departure the + pipeline still completes the batch; only the HTTP goroutine returns early.) +- Enqueue-departed and closed paths: the item was never enqueued, so + `complete()` will never fire; `abandon(waiter, item)` does the cleanup. + +**Note — why `complete()` owns the `Put`, not the entrypoint.** A tempting +simplification is to have `complete()` only release the waiter and let the +entrypoint `Put` the batch once its own wait returns. That works for `Handle` +(which always waits for completion) but is *incorrect* for `await`: on the +wait-departed path `await` returns on `ctx.Done()` **before** completion, so an +entrypoint-side `Put` would never run and the pooled `*batch` would leak (worse, +the pipeline's later `complete()` would mutate an item the entrypoint believed +it had reclaimed). Completion-owned `Put` is precisely what lets a single +cleanup rule cover both "caller waited" and "caller departed but the pipeline +finished later." The entrypoint only `Put`s — via `abandon` — on the paths +where the batch was *never* enqueued and `complete()` will therefore never fire. +So the current split isn't just cleaner; it's the only correct allocation of +the `Put`. + +**Critically, the batch is not abandoned by the pipeline when the caller +departs.** When `ctx` fires after enqueue, the in-flight batch keeps +processing; `complete()` still fires; persistence still happens. Only the HTTP +caller's goroutine returns early. + +#### 4. Pre-flight admission: `admit()` + `Admission` middleware + `AsHTTPHandler` + +The high-watermark check lives in a side-effect-light predicate on the +entrypoint: + +```go +func (this *entrypoint) admit() bool { + this.lock.RLock() + defer this.lock.RUnlock() + if this.closed { + return false + } + if float64(len(this.work))/float64(cap(this.work)) >= this.shedThreshold { + this.monitor.Track(loadShed) + return false + } + return true +} +``` + +`Options.ShedThreshold(value float64)` exposes the threshold; default `0.80`. +Setting it ≥ `1.0` disables high-watermark shedding (only the closed-pipeline +refusal remains). + +The middleware and the decorator live in this module (new file +`handlers/harness/admission.go`) and depend only on the standard library and +`messaging/v3`. The `awaiter`/`admitter` interfaces are unexported; the adapters +accept the standard `messaging.Handler` returned by `New(...)` and assert it to +those interfaces internally (a failed assertion is a wireup-time programming +error and panics fast). The `503` response is flushed inline with raw +`net/http`: + +```go +type ( + admitter interface { + admit() bool + } + awaiter interface { + await(ctx context.Context, message any) + } +) + +// Admission refuses overloaded requests before the wrapped handler runs, +// writing an inline 503. Wrap each mutating route with it. +func Admission(handler messaging.Handler, inner http.Handler) http.Handler { + gate := handler.(admitter) + return http.HandlerFunc(func(response http.ResponseWriter, request *http.Request) { + if gate.admit() { + inner.ServeHTTP(response, request) + return + } + response.Header().Set("Content-Type", "application/json; charset=utf-8") + response.Header().Set("Retry-After", "1") + response.WriteHeader(http.StatusServiceUnavailable) + _, _ = response.Write(shedResponseBody) + }) +} + +// AsHTTPHandler adapts the void, context-honoring await to the +// messaging.Handler the HTTP shells already depend on, so no shell (and no +// shell test) changes. +func AsHTTPHandler(handler messaging.Handler) messaging.Handler { + return &httpAdapter{target: handler.(awaiter)} +} + +type httpAdapter struct { + target awaiter +} + +func (this *httpAdapter) Handle(ctx context.Context, messages ...any) { + for _, message := range messages { + this.target.await(ctx, message) + } +} + +var shedResponseBody = []byte(`{"errors":[{"message":"service overloaded"}]}`) +``` + +**Why this keeps the route handlers (and their tests) untouched.** Each +consuming service's HTTP shells already build a command, call +`handler.Handle(ctx, command)` on a `messaging.Handler`, and map the *mutated +command's* result field (`command.Result`) to its response — they read no +return value from `Handle` today. Two seams preserve that exactly: + +- `AsHTTPHandler(handler)` is substituted for the raw handler when the write + shells are constructed, so `Handle` now routes through `await` (ctx-honoring, + single-message) without the shell knowing. +- `Admission(handler, shell)` wraps each mutating route in the routes table, so + the `503` is decided before the shell runs. + +Illustrative consumer wireup (out of scope, shown for context only): + +```go +handler, listeners := harness.New(ctx, opts...) +httpHandler := harness.AsHTTPHandler(handler) +// ... +{"POST /admin/orders", harness.Admission(handler, NewAdminApproveOrderShell(httpHandler))}, +{"PUT /admin/accounts/:account/orders/:order/adjustments", harness.Admission(handler, NewAdminAdjustOrderShell(httpHandler))}, +``` + +The consumer performs no type assertions of its own — it passes the +`messaging.Handler` from `New(...)` straight into both adapters. Each of the 70+ +mutating routes across the ~10 services becomes a mechanical one-line wrap — no +per-route outcome logic, no per-route tests. The `503`/departed behavior is +tested **once**, here, against the middleware and the decorator. + +**Race note.** `admit()`'s `len(chan)/cap(chan)` snapshot races with concurrent +producers/consumers, and there is a TOCTOU window between `admit()` returning +true and `await`'s enqueue. Both are acceptable: the threshold is a soft signal, +and the ctx-honoring send means that even if the channel fills in the race +window, the request either drains normally or unblocks on `ctx.Done()` — it +never produces a wrong status the way a silent post-admit shed would. + +#### 5. Split channel buffer sizing + +Today (`pipeline.go:11-18`): + +```go +batches = make(chan *batch, config.BatchCapacity) +work1 = make(chan *unitOfWork, config.BatchCapacity) +// ... work2..work5 same +``` + +Proposed: + +```go +batches = make(chan *batch, config.BatchCapacity) +work1 = make(chan *unitOfWork, config.UnitCapacity) +work2 = make(chan *unitOfWork, config.UnitCapacity) +work3 = make(chan *unitOfWork, config.UnitCapacity) +work4 = make(chan *unitOfWork, config.UnitCapacity) +work5 = make(chan *unitOfWork, config.UnitCapacity) +``` + +And in `fanout.go:17`, the per-worker output channels (currently hardcoded to +1024) become `make(chan *unitOfWork, unitCapacity)` where `unitCapacity` is +threaded through `newFanOut`'s signature (lower-blast-radius option preferred +at implementation time). + +`UnitCapacity` defaults to 1. Tunable via `Options.UnitCapacity(value int)`. +Setting it equal to `BatchCapacity` reproduces today's behavior. + +**Why default 1, not 0?** Fully unbuffered channels turn every stage handoff +into a synchronization barrier — stage N can't begin unit N+1 until stage N+1 +has received unit N. Buffer-1 lets stage N finish unit N+1 *while* stage N+1 +is processing unit N. Pipelining benefit saturates at depth ~1 since each +stage runs in a single goroutine (except serialization, which has its own +fan-out concurrency). + +**Bound on in-memory drift during an outage** — with `UnitCapacity=1` and 5 +channels post-domain, plus the in-flight unit at each stage, the worst case is +~10 units' worth of unpersisted mutations. At `UnitSize=64` that's ~640 +batches' worth of broadcast results downstream of Execution. + +The single-message `await` signature *also* tightens the upstream side: each +HTTP-admitted batch on the `batches` channel now carries exactly one input +message, so `BatchCapacity` becomes a direct count of in-flight HTTP commands +rather than a count of caller invocations of arbitrary fan-out. + +### Non-goals + +- **Rewriting the pipeline.** The structure (Entrypoint → Execution → + Serialization → Persistence → Completion → Broadcast → Terminal) is + preserved verbatim. +- **Changing `messaging.Handler`.** `Handle(ctx, messages ...any)` keeps its + exact existing contract. `await` is a new (unexported) method, not a + replacement, and intentionally has a different signature (single message, + void). +- **Returning a status outcome from the HTTP path.** Explicitly rejected — see + Background and Alternatives. Shedding is decided pre-flight; departed callers + just unblock. +- **Buffering the HTTP response to override status post-hoc.** Not done — a + departed caller is already gone, so the shell's normal write to a dead + connection is harmless, and no `504` is emitted. +- **Per-service route wireup.** Calling `AsHTTPHandler` / `Admission` from each + service's routes table happens in the consuming repos, after a tagged + release. Out of scope here. +- **Domain-layer changes.** The companion `AdjustOrder` idempotency change is + in the consuming repos and is assumed merged before any consumer relies on a + departed-caller batch persisting. Nothing in this proposal touches domain + code. +- **`harness/sqladapter` changes.** No code changes; just a regression check + via `go test`. + +### Files modified (this repo only) + +| Path | Action | Purpose | +|--------------------------------------------------------------------|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `handlers/harness/00_entrypoint.go` | Modify | Extract `prepare`/`abandon`/`waiterDone`; keep `Handle` behavior identical; add unexported `await(ctx, message any)` and `admit() bool`; add `shedThreshold` field; add `loadShed`/`callerDeparted` sentinels | +| `handlers/harness/admission.go` | Add | Unexported `admitter`/`awaiter` interfaces; `Admission(messaging.Handler, http.Handler) http.Handler` (inline `503`); `AsHTTPHandler(messaging.Handler) messaging.Handler` decorator; `shedResponseBody` | +| `handlers/harness/contracts.go` | Modify | New `LoadShed` and `CallerDeparted` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. | +| `handlers/harness/config.go` | Modify | `Options.UnitCapacity(int)`, `Options.ShedThreshold(float64)`; defaults 1, 0.80 | +| `handlers/harness/pipeline.go` | Modify | Use `UnitCapacity` for `work1`–`work5`; pass it into `newFanOut`; thread `ShedThreshold` into `newEntrypoint` | +| `handlers/harness/fanout.go` | Modify | Accept `unitCapacity` and use it for the per-worker output channels instead of the hardcoded 1024 | +| `handlers/harness/00_entrypoint_test.go` | Modify | New tests for `await` (void, ctx-honoring, single message, pool lifecycle) and `admit` (watermark/closed/threshold), plus pinning tests for `Handle` | +| `handlers/harness/admission_test.go` | Add | Tests for `Admission` (pass-through when admitted; inline `503` body/headers when refused) and `AsHTTPHandler` (forwards to `await`) | +| `handlers/harness/config_test.go` | Modify | Assert defaults for `UnitCapacity`, `ShedThreshold`; assert override setters | +| `handlers/harness/pipeline_test.go` | Modify | Adjust assertions if any depend on default channel sizes (none expected to break — defaults preserve external observable behavior) | +| `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` | Modify | Reflect split `BatchCapacity`/`UnitCapacity` knobs, the `admit` gate + `Admission` middleware, `LoadShed`/`CallerDeparted` observations, and the void `await` ingress | + +### Alternatives considered + +- **Return a `HandleOutcome` enum from the HTTP path and map it per-route.** + Rejected — this is the motivating problem. Across ~10 `*-context` services + and 70+ mutating routes, each route would replicate `503`/`504` branching and + the tests to cover it. Centralizing the decision in a pre-flight gate + + middleware keeps the route handlers void and tested once. +- **Buffer the HTTP `ResponseWriter` in middleware to override status after the + shell runs (the "Option A" / `504` path).** Rejected — the consuming shells + flush the response *inside* the handler, so a post-hoc override requires + wrapping the writer in a buffering shim in this module. The only payoff would + be a precise `504`, but a departed caller's connection is already gone, so the + shell's normal write is harmless and no override is needed. More machinery, no + real benefit. +- **Panic on shed + recover in middleware.** Rejected — shedding is a + *high-frequency* condition precisely during an outage, so this would panic on + a large fraction of requests and blur the "panic = bug = 500 + page someone" + semantics that recovery middleware exists to serve. The parent proposal + rejected panic-on-shed for the MQ path for the same noise reasons. +- **Put the shedding middleware in each consuming repo.** Rejected — 10 + services would duplicate the middleware and its tests. It lives once, here. +- **Export `await`/`admit` (or their interfaces) for the consumer to call.** + Rejected — both are only ever invoked by the in-package `Admission` + middleware and `AsHTTPHandler` decorator, so keeping them unexported shrinks + the public surface to two functions and prevents consumers from reaching past + the adapters. The adapters take/return standard `messaging.Handler` and assert + to the unexported interfaces internally. +- **Keep a hard-full `default` backstop inside `await`.** Rejected — without a + return value a silent post-admit shed would leave `command.Result` zero and + the shell would emit a wrong status (e.g. 404). The ctx-honoring send bounds + the enqueue wait without it; `admit()` is the watermark gate. +- **Single shared method branching on caller type via a `ctx` value or + `Options.Source`.** Rejected — `streaming` acks unconditionally on clean + `Handle` return, so an MQ-side shed-then-return would silently drop messages. + The two paths require fundamentally different behavior. Separate methods make + the contract visible at every wiring site. +- **Inject a per-batch `ctx` through Persistence and Broadcast.** Rejected. + Per-batch ctx in retry-forever stages would unwind partially-completed work + and break the durability principle. The pipeline ctx (`harness.New(ctx, …)`) + is the right scope for those stages. +- **Keep `BatchCapacity` sizing all channels uniformly.** Rejected — preserves + the in-memory drift problem during outages. +- **Add nack/error return to `messaging.Handler.Handle` itself.** Rejected as + out-of-scope — would touch every existing `messaging.Handler` implementation + across all consumers. + +## Trade-offs & Risks + +- **The only new exported surface is two functions — `AsHTTPHandler` and + `Admission`.** Both take and return the standard `messaging.Handler` / + `http.Handler` types. The `await`/`admit` methods and the `awaiter`/`admitter` + interfaces they assert against are unexported, so the consumer never names + them; `AsHTTPHandler(New(...))` and `Admission(New(...), shell)` are the whole + integration vocabulary. The adapters type-assert the supplied handler to the + unexported interfaces internally and panic at wireup if handed something other + than the harness entrypoint — a deliberate fail-fast on misconfiguration. +- **This module now imports `net/http`** (in `admission.go`). Minor — it is a + standard-library dependency, isolated to the admission file. If desired at + implementation time, the middleware and decorator can move to a sibling + subpackage (e.g. `handlers/harness/admission`) to keep `net/http` out of the + core pipeline package; the unexported `awaiter`/`admitter` interfaces would + then need to be exported (or the adapters constructed inside `harness` and + re-exported). Lower-churn option (single package, unexported interfaces) is + preferred unless review objects. +- **Single-message HTTP signature is a hard constraint.** A hypothetical future + HTTP route needing to submit multiple commands atomically would not fit. + Acceptable today — every existing HTTP route invokes the domain with exactly + one command — and reversible later (a sibling `awaitBatch` could be added + without breaking existing call sites). +- **`waiterDone` allocates a goroutine and a channel per `await` call.** On the + HTTP path only. The cost is a few hundred bytes and one goroutine for the + duration of the in-flight batch — well within an HTTP request's budget. On a + wait-departure the goroutine parks until the pipeline eventually completes the + batch, then exits. +- **`UnitCapacity=1` reduces normal-throughput headroom slightly.** Pipelining + is preserved (depth-1 buffer between stages) but bursty workloads that + previously absorbed into deep buffers now apply backpressure earlier. + Mitigation: configurable; set `Options.UnitCapacity(1024)` to reproduce + today's characteristic. +- **Caller-departed batches keep doing work the caller no longer cares about.** + Intentional — matches the durability principle. Combined with the merged + domain-layer idempotency change, repeated retries collapse to no-ops after + the first applies. From this module's perspective this is a contract + guarantee: "we will not unwind the in-flight batch when the caller departs." +- **The shed-threshold as a fraction is inexact, and there is a TOCTOU window + between `admit()` and `await`'s enqueue.** Acceptable — soft signal, not a + hard limit; and the ctx-honoring send means a race-window channel-full never + produces a wrong status (it drains or unblocks on `ctx.Done()`). +- **`Handle` and `await` share state (`this.work`, `this.lock`, `this.closed`).** + Two paths writing the same channel under the same RWMutex is fine; race-free + under `-race`. Tests must cover both paths interleaving on a shrunk-capacity + fixture. +- **Cross-repo coordination.** This module's changes are backward-compatible + (new options have defaults; the new exported functions don't break existing + `messaging.Handler` callers). A consumer that doesn't yet wrap its routes + keeps working unchanged. Per-service adoption is sequenced after a tagged + release. +- **Diagram drift.** The pipeline diagram is the canonical visual reference; if + the SVG isn't updated alongside the code, reviewers will form a stale mental + model. Mitigation: diagram update is in the checklist. + +## Implementation Checklist + +### Phase 1: Configuration plumbing (red/green) + +- [x] Edit `handlers/harness/config_test.go` (`TestDefaultsPopulateCapacities`) to also assert `cfg.UnitCapacity == 1` and `cfg.ShedThreshold == 0.80`. Run `make test` — confirm failure (fields don't exist yet → compile error). +- [x] Edit `handlers/harness/config_test.go` (`TestTunableOptionsOverrideDefaults`) to also exercise `Options.UnitCapacity(2)` and `Options.ShedThreshold(0.5)` and assert the values stick. Compile error still expected. +- [x] Edit `handlers/harness/config.go` — add `UnitCapacity int` and `ShedThreshold float64` fields to `configuration`; add `Options.UnitCapacity(int)` and `Options.ShedThreshold(float64)` setters; add the two defaults (`UnitCapacity=1`, `ShedThreshold=0.80`) to `Options.defaults(...)`. +- [x] Run `make test` — confirm config tests pass. + +### Phase 2: Pipeline rewires for split capacity (red/green) + +- [x] Edit `handlers/harness/pipeline.go` — change `work1`–`work5` to `make(chan *unitOfWork, config.UnitCapacity)`; thread `config.UnitCapacity` into the `newFanOut` call. +- [x] Edit `handlers/harness/fanout.go` — extend `newFanOut`'s signature to take a `unitCapacity int` and use it where `1024` is currently hardcoded. +- [x] Run `make test` — pipeline tests should still pass under the new defaults; if any test depends on the old 1024 buffer it should be updated to set `Options.UnitCapacity(1024)` explicitly. + +### Phase 3: Monitor observations and sentinels + +- [x] Edit `handlers/harness/contracts.go` — add `LoadShed struct{}` and `CallerDeparted struct{}` event types alongside the existing `BatchInFlight`/`BatchComplete`/etc. +- [x] Edit `handlers/harness/00_entrypoint.go` — add unexported sentinel values `var loadShed LoadShed` and `var callerDeparted CallerDeparted` next to the existing `batchInFlight`/`batchComplete`. +- [x] Run `make test` — confirm the existing suite still compiles and passes. + +### Phase 4: Extract shared helpers (pure refactor — keep `Handle` behavior identical) + +- [x] Refactor `handlers/harness/00_entrypoint.go` to extract `prepare(ctx, messages ...any) (*sync.WaitGroup, *batch)`, `abandon(waiter, item)`, and `waiterDone(waiter) chan struct{}`; rewrite `Handle`'s body in terms of `prepare(ctx, messages...)` so it is observably identical. +- [x] Run `make test` — all existing tests must still pass; this step changes no externally observable behavior. + +### Phase 5: Add `await` (TDD, void HTTP path, single message) + +- [x] Add `TestAwait_ReturnsAfterCompletion` — call `await(ctx, "msg")` with a single message; let the pipeline complete; assert the call returns and the batch was processed. Run `make test` — confirm failure (no `await` method yet → compile error). Note: the entrypoint must hold a `shedThreshold` field wired through `newEntrypoint` from `pipeline.go`. +- [x] Add the unexported `await(ctx context.Context, message any)` method on `*entrypoint` with the body shown in §3 of Approach. Run — confirm `ReturnsAfterCompletion` passes. +- [x] Add `TestAwait_UnblocksOnContextCancelWhileWaiting` — fixture with a writer that blocks forever; enqueue succeeds; cancel the caller's `ctx`; assert `await` returns, Monitor sees `CallerDeparted{}`, and the batch is **not** abandoned (pipeline still owns it). Run — confirm passing. +- [x] Add `TestAwait_UnblocksOnContextCancelWhileEnqueuing` — fixture with `BatchCapacity=1` and a writer that blocks forever so the work channel stays full; cancel `ctx` before a slot frees; assert `await` returns, Monitor sees `CallerDeparted{}`, and the pool entry is restored (the never-enqueued batch is abandoned). Run — confirm passing. +- [x] Add `TestAwait_BatchCarriesExactlyOneMessage` — `await(ctx, "only")`; intercept the resulting `*batch` on the work channel; assert `len(item.messages) == 1` and `item.messages[0] == "only"`. Run — confirm passing (pins the single-message contract). +- [x] Add `TestAwait_ClosedPipelineReturnsImmediately` — close the entrypoint; call `await(ctx, "msg")`; assert it returns within a few milliseconds and the pool entry is returned. Run — confirm passing. + +### Phase 6: Add `admit()` gate (TDD) + +- [x] Add `TestAdmit_TrueWhenBelowThreshold` — fresh entrypoint, empty work channel; assert `admit()` is true. Run — confirm failure (no `admit` yet → compile error). +- [x] Add the unexported `admit() bool` method on `*entrypoint` with the body shown in §4. Run — confirm `TrueWhenBelowThreshold` passes. +- [x] Add `TestAdmit_FalseAtOrAboveThreshold_TracksLoadShed` — `BatchCapacity=10`, `ShedThreshold=0.5`, writer blocks forever; fill the work channel to ≥5; assert `admit()` is false and Monitor sees `LoadShed{}`. Run — confirm passing. +- [x] Add `TestAdmit_FalseWhenClosed_NoLoadShed` — close the entrypoint; assert `admit()` is false and Monitor sees **no** `LoadShed{}` (shutdown, not load). Run — confirm passing. +- [x] Add `TestAdmit_ThresholdAtOrAboveOneDisablesWatermark` — `ShedThreshold=2.0`; fill the channel; assert `admit()` stays true until the pipeline is closed. Run — confirm passing. + +### Phase 7: Add `AsHTTPHandler` decorator and `Admission` middleware (TDD, in-module) + +- [x] Add `TestAsHTTPHandler_ForwardsSingleMessageToAwait` (in `admission_test.go`) — a fake that implements `messaging.Handler` plus the unexported `await` (so it satisfies the internal `awaiter` assertion) records calls; `AsHTTPHandler(fake).Handle(ctx, "x")`; assert exactly one `await(ctx, "x")`. Run — confirm failure (no `AsHTTPHandler` yet). +- [x] Add the unexported `awaiter` interface, `httpAdapter`, and `AsHTTPHandler` to `handlers/harness/admission.go`. Run — confirm passing. +- [x] Add `TestAsHTTPHandler_ForwardsEachMessageInOrder` — `Handle(ctx, "a", "b")`; assert `await` called twice, in order (documents the variadic-to-single adaptation). Run — confirm passing. +- [x] Add `TestAdmission_PassesThroughWhenAdmitted` — a fake implementing `messaging.Handler` plus the unexported `admit` returning true wraps a recording inner handler; serve a request; assert the inner handler ran and its response is preserved. Run — confirm failure (no `Admission` yet). +- [x] Add the unexported `admitter` interface, `Admission`, and `shedResponseBody` to `handlers/harness/admission.go`. Run — confirm `PassesThroughWhenAdmitted` passes. +- [x] Add `TestAdmission_Writes503WhenRejected` — fake `admit` returning false; serve a request; assert the inner handler did **not** run, status is `503`, `Content-Type` is `application/json; charset=utf-8`, `Retry-After` is `1`, and the body equals the shed JSON. Run — confirm passing. + +### Phase 8: Pin the existing `Handle` contract (TDD, MQ/cron path) + +- [x] Add `TestHandle_BlocksUntilDurable` — submit a batch via `Handle`; the writer takes a controlled delay to acknowledge; assert `Handle` returns only after the writer completes. Run — confirm passing (pins the contract `streaming` depends on). +- [x] Add `TestHandle_DoesNotShedAtHighWatermark` — `BatchCapacity=2`, `ShedThreshold=0.5`, writer blocks forever; submit 5 batches via `Handle` (each in its own goroutine); assert all 5 are blocked; after unblocking, all 5 eventually return. Run — confirm passing. +- [x] Add `TestHandle_IgnoresContextCancel` — submit a batch via `Handle`; cancel the ctx; assert `Handle` is still blocked until the pipeline completes the batch. Run — confirm passing (deliberate contract: MQ deliveries don't honor a deadline). +- [x] Add `TestHandle_ReturnsImmediatelyOnClosedPipeline` — close the entrypoint; call `Handle`; assert it returns within a few milliseconds (no panic, no block). Run — confirm passing. +- [x] Add `TestHandle_PreservesVariadicMessages` — `Handle(ctx, "a", "b", "c")`; intercept the resulting `*batch`; assert `len(item.messages) == 3`. Run — confirm passing (pins the variadic contract after the `prepare` refactor). + +### Phase 9: Race and integration sanity + +- [x] Run the full harness test suite under `-race`: `go test -race ./handlers/harness/...`. Confirm green. (Core `harness` package green; `sqladapter` requires a live MySQL — verified green with `-short`, exercised against DB in item 3 below.) +- [x] Run `make test` (the project-level entry point that also runs `go mod tidy`, `go fmt ./...`). Confirm green. (`go fmt` clean; full `-short -race` suite green. `go mod tidy` blocked by a root-owned module cache in this sandbox — environment limitation, not a code issue.) +- [x] Run `go test ./handlers/harness/sqladapter/...` against a live MySQL (drop `-short` if present, or use the project's `make test.db.local`-equivalent). Confirm no regressions in the SQL adapter. (No docker/MySQL in this sandbox; package compiles and `go vet`s clean and no sqladapter code was touched. **Follow-up: run `make test.db.local` locally to fully confirm.**) + +### Phase 10: Documentation + +- [x] Update `doc/work-sessions/2026/2026-05-14_pipeline-component-diagram.svg` to reflect: split `BatchCapacity`/`UnitCapacity` knobs, the `admit` gate + `Admission` middleware in front of the HTTP ingress, `LoadShed`/`CallerDeparted` Monitor observations, and the void `await(ctx, message any)` ingress alongside `Handle`. (The current SVG shows a single `BatchCapacity` annotation and the `Handle` ingress; both need updating.) +- [x] Self-review the diff: confirm no `messaging.Handler` callers were inadvertently broken; confirm `admission.go` imports no `scuter` and mentions no `scuter` in comments; confirm `Options.UnitCapacity(1024)` reproduces today's runtime if a user wants it; confirm no domain code or sqladapter code was touched. + +### Out of scope (handled in each consuming repo's follow-on) + +- Per-service route wireup (`AsHTTPHandler` + `Admission` in each routes table), application-side monitor metric registration, integration tests, and post-deploy observation drills. These are addressed per service after this module's changes merge and a version is tagged. diff --git a/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md b/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md new file mode 100644 index 0000000..856a00b --- /dev/null +++ b/doc/work-sessions/2026/2026-06-01_12-19-49-proposal-waiter-pool-corruption-fix.md @@ -0,0 +1,254 @@ +--- +name: Fix pooled WaitGroup corruption in entrypoint.await +description: Stop returning a pooled *sync.WaitGroup to the pool while its count is non-zero and a detached Wait() goroutine still references it, eliminating a sync.WaitGroup-misuse / cross-request corruption hazard on the caller-departed path. +type: plot +--- + +# Proposal: Fix pooled `WaitGroup` corruption in `entrypoint.await` + +## Background + +`handlers/harness/00_entrypoint.go` exposes two ingress paths into the pipeline: + +- `Handle(ctx, messages...)` — the at-least-once (RMQ) path. It enqueues a batch + and blocks **inline** on `waiter.Wait()` until the pipeline calls `complete()`. +- `await(ctx, message)` — the context-honoring (HTTP) path. It enqueues a batch + and then waits on **either** completion **or** `ctx.Done()`, so a departed + caller (cancelled request) does not block on durable processing. + +Both paths share `prepare(...)`, which leases a `*sync.WaitGroup` from a +`sync.Pool` (`this.waiters`), calls `waiter.Add(1)`, and captures that waiter in +the batch's `complete` closure: + +```go +func (this *entrypoint) prepare(ctx context.Context, messages ...any) (waiter *sync.WaitGroup, item *batch) { + waiter = this.waiters.Get() + waiter.Add(1) + item = this.batches.Get() + item.ctx = ctx + item.messages = messages + item.complete = func() { + waiter.Done() + this.monitor.Track(batchComplete) + this.batches.Put(item) + } + return waiter, item +} +``` + +`complete()` is invoked **later**, by the pipeline's `completion` stage +(`04_completion.go`), once the batch is durably persisted. + +### The bug + +`await` returns the waiter to the pool unconditionally via `defer`: + +```go +func (this *entrypoint) await(ctx context.Context, message any) { + ... + waiter, item := this.prepare(ctx, message) + defer this.waiters.Put(waiter) // (B) blanket Put — the defect + + select { + case this.work <- item: + this.lock.RUnlock() + this.monitor.Track(batchInFlight) + case <-ctx.Done(): + this.lock.RUnlock() + this.abandon(waiter, item) // calls waiter.Done() -> count 0 + this.monitor.Track(callerDeparted) + return + } + + select { + case <-this.waiterDone(waiter): // detached goroutine: waiter.Wait(); close(done) + case <-ctx.Done(): // (A) caller departed WHILE waiting + this.monitor.Track(callerDeparted) + } +} +``` + +On path **(A)** — the caller's context is cancelled *after* the batch was +successfully enqueued — `await` returns and the deferred `Put` (B) returns the +waiter to the pool. But at that instant: + +1. The pipeline still owns the batch and **has not yet called `complete()`**, so + the waiter's internal counter is still `1` (the `Add(1)` from `prepare` has no + matching `Done()` yet). +2. The detached goroutine spawned by `waiterDone(waiter)` is **still blocked + inside `waiter.Wait()`**, holding a live reference to the same waiter. + +A subsequent `prepare()` can then `Get()` that very waiter and call `Add(1)` +again. Per the `sync.WaitGroup` contract, *"if a WaitGroup is reused to wait for +several independent sets of events, new `Add` calls must happen after all +previous `Wait` calls have returned"*. Here a new `Add` races a previous `Wait` +that has not returned, which is documented misuse. Consequences range from the +Go runtime panicking (`sync: WaitGroup is reused before previous Wait has +returned` / `WaitGroup misuse: Add called concurrently with Wait`) to silent +cross-request corruption, where one HTTP request's `await` blocks until an +unrelated earlier batch also completes. + +### Why the other paths are safe (and stay safe) + +| Path | State of waiter when returned to pool | Detached `Wait()` goroutine? | Safe? | +|------------------------------------|--------------------------------------------------|---------------------------------|--------| +| `Handle` normal | `Wait()` returned inline → count 0 | none (waits inline) | yes | +| `await` enqueue-failed (`abandon`) | `abandon` called `Done()` → count 0 | none (2nd select not reached) | yes | +| `await` normal completion | `waiterDone` fired → `Wait()` returned → count 0 | finished before `done` received | yes | +| `await` departed-while-waiting (A) | count still 1, `complete()` pending | **still blocked in `Wait()`** | **NO** | + +Only path (A) is defective. The fix must keep the safe paths recycling waiters +(the pool exists to cut allocations under load) while never recycling a waiter +that is still "in use." + +## Approach + +**Recommended: Option A — targeted lifecycle fix (minimal diff).** + +Remove the blanket `defer this.waiters.Put(waiter)` and instead return the +waiter to the pool *only* at the three points where it is provably quiescent +(counter at 0, no detached goroutine still reading it). On the departed-while- +waiting path, deliberately do **not** recycle the waiter: let it fall out of +scope. `complete()` will still fire later, unblocking the detached `waiterDone` +goroutine, after which both the waiter and the goroutine become garbage. We +"lose" exactly one pooled waiter per departed-in-flight request — an exceptional +path — which `sync.Pool` is designed to tolerate. + +Revised `await`: + +```go +func (this *entrypoint) await(ctx context.Context, message any) { + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + return + } + + waiter, item := this.prepare(ctx, message) + + select { + case this.work <- item: + this.lock.RUnlock() + this.monitor.Track(batchInFlight) + case <-ctx.Done(): + this.lock.RUnlock() + this.abandon(waiter, item) + this.waiters.Put(waiter) // safe: abandon() called Done() (count 0); no detached waiter. + this.monitor.Track(callerDeparted) + return + } + + select { + case <-this.waiterDone(waiter): + this.waiters.Put(waiter) // safe: detached Wait() returned before done was closed (count 0). + case <-ctx.Done(): + this.monitor.Track(callerDeparted) + // Intentionally do NOT recycle the waiter here: complete() is still + // pending and the detached waiterDone goroutine is still inside Wait(). + // Returning it to the pool now would let a later prepare() call Add(1) + // before this Wait() returns -- documented sync.WaitGroup misuse. The + // waiter is released to GC once complete() fires. + } +} +``` + +`Handle` is unchanged: its `defer this.waiters.Put(waiter)` runs only after its +inline `waiter.Wait()` returns, so the waiter is quiescent at recycle time. + +`prepare`, `abandon`, and `waiterDone` are unchanged. + +### Alternative considered — Option B: replace the pooled WaitGroup with a single-use completion channel + +Each ingress waits for **exactly one** `complete()` call, so a `sync.WaitGroup` +is heavier than needed. We could give `batch` a `done chan struct{}` that +`complete()` closes once; `Handle` does `<-done` and `await` does +`select { case <-done: ... case <-ctx.Done(): ... }`. This eliminates the +detached `waiterDone` goroutine and the entire pool-reuse hazard class, because +a channel is single-use and never recycled — an abandoned `done` channel is +simply collected by GC. + +Rejected as the primary fix because: + +- It is a larger change that touches `Handle`'s hot path and removes the + `waiters` pool the author added deliberately for allocation reduction. +- It trades pooled-waiter reuse for a per-request channel allocation. +- The user's request is scoped to *resolving the corruption concern*, and + Option A does so with a minimal, reviewable diff while preserving the + existing design intent and all current passing tests. + +Option B remains a reasonable future simplification if the detached goroutine or +per-request waiter churn proves costly; noting it here so the decision is +explicit. **Open question for the reviewer:** prefer the minimal Option A, or +take the larger Option B cleanup now? + +## Trade-offs & Risks + +- **One pooled waiter is dropped per departed-in-flight request.** This is the + exceptional path (HTTP caller cancelled after enqueue). Under sustained + cancellation storms the pool simply allocates fresh waiters; `sync.Pool` + absorbs this without leaking memory (dropped waiters are GC'd once their + pending `complete()` fires). No unbounded growth. +- **The detached `waiterDone` goroutine still lingers until `complete()`** on the + departed path. This is pre-existing behavior and is bounded by pipeline drain + time; the work is genuinely still in flight, so tracking its completion with + one goroutine is acceptable. Option A does not worsen this; Option B would + remove it. +- **Deterministic red-test difficulty.** The corruption only manifests on pool + *reuse* under concurrency, and `sync.Pool.Get` gives no reuse guarantee, so a + strictly deterministic failing unit test is not practical. The honest red test + is a race-enabled stress test (see Phase 1) that reliably trips Go's built-in + `WaitGroup` misuse detector before the fix and passes after. This is called out + explicitly rather than hidden behind a false "deterministic" claim. + +## Implementation Checklist + +### Phase 1: Capture the corruption (red) + +- [x] Add `TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter` to + `handlers/harness/00_entrypoint_test.go`. The test, in a loop sized for + reliable surfacing (e.g. a few thousand iterations), should: (1) start an + `await` whose context it cancels *after* the batch is received from + `this.work` (departed-while-waiting), deferring the matching `complete()` to a + background goroutine; (2) concurrently issue fresh `await`/`Handle` calls on + the **same** entrypoint that drive `prepare()` (and thus pool `Get`/`Add`), + completing each; so a recycled-too-early waiter is exercised by a new request. + (Implemented as 8 worker goroutines × 2000 departing `await`s against a + concurrent drainer that completes each enqueued batch — this produces the + recycle/reuse race more reliably than a sequential loop.) +- [x] Run `go test -race -run TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter ./handlers/harness/` + and confirm it fails for the right reason: a `sync: WaitGroup ...` misuse panic + or a race report on the waiter (NOT a generic assertion mismatch). Record the + observed failure mode in the PR description. + **Observed:** `WARNING: DATA RACE` on the waiter (read in `await` at + `00_entrypoint.go:80` vs. write from the detached `waiterDone` goroutine) and + `panic: sync: WaitGroup is reused before previous Wait has returned`. + +### Phase 2: Apply the lifecycle fix (green) + +- [x] In `00_entrypoint.go`, remove `defer this.waiters.Put(waiter)` from + `await`. +- [x] In `await`, add `this.waiters.Put(waiter)` immediately after + `this.abandon(waiter, item)` on the enqueue-failed branch. +- [x] In `await`, add `this.waiters.Put(waiter)` in the + `case <-this.waiterDone(waiter):` branch (normal completion). +- [x] In `await`, in the `case <-ctx.Done():` branch of the second `select`, + add the explanatory comment documenting why the waiter is intentionally NOT + recycled there. +- [x] Confirm `Handle` is unchanged and still recycles via its inline-`Wait()` + `defer this.waiters.Put(waiter)`. + +### Phase 3: Verify (green) and guard regressions + +- [x] Run `go test -race -run TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter ./handlers/harness/` + and confirm it now passes with no panic and a clean race report. +- [x] Confirm the existing departed-path tests still pass unchanged: + `TestAwait_UnblocksOnContextCancelWhileWaiting` (departed-while-waiting still + completes and tracks `CallerDeparted` + later `BatchComplete`) and + `TestAwait_UnblocksOnContextCancelWhileEnqueuing` (abandon path still tracks + `CallerDeparted`, no `BatchInFlight`/`BatchComplete`). +- [x] Run the full package suite: `make test` (exercises `go fmt`, `go vet`, + `-race`, coverage). Confirm green. (All packages pass; `handlers/harness` + coverage 99.6%.) +- [x] Re-read the diff against `## CLAUDE.md` Go conventions: receiver named + `this`, no naked returns, no new blank lines at method start/end, struct + initializers use field/value pairs (no new initializers introduced here). diff --git a/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md new file mode 100644 index 0000000..2da4c15 --- /dev/null +++ b/doc/work-sessions/2026/2026-06-01_13-56-53-proposal-retry-loop-cancellation.md @@ -0,0 +1,373 @@ +--- +name: Bound the unbounded retry loops in persistence and broadcast +description: Make the persistence and broadcast stages' retry loops cancellable via the pipeline context so a permanently-failing Writer/Dispatcher can no longer hang pipeline shutdown, while preserving store-and-forward durability (persistence is the ack gate; broadcast is recoverable via the startup recovery routine). +type: plot +--- + +# Proposal: Bound the unbounded retry loops in `persistence` and `broadcast` + +## Background + +Two pipeline stages retry their collaborator forever with no escape hatch: + +`handlers/harness/03_persistence.go:39` + +```go +for attempt := 1; ; attempt++ { + err := this.writer.Write(this.ctx, this.buffer...) + if err == nil { + failure.Attempt = 0 + failure.Error = nil + break + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrPersistence, err) + this.monitor.Track(failure) + this.sleep(time.Second) // TODO: exponential back-off w/ jitter +} +``` + +`handlers/harness/05_broadcast.go:39` is identical in shape (against `Dispatcher.Dispatch`). + +Both loops: + +1. Have **no maximum attempt count** — they spin until the collaborator + succeeds. +2. **Never consult `this.ctx.Done()`.** The pipeline context is passed *into* + `Write`/`Dispatch` (so a ctx-honoring driver aborts the in-flight call), but + the loop itself ignores cancellation and immediately retries. +3. Sleep via an **uninterruptible** `this.sleep(time.Second)` (wired to + `time.Sleep` in `pipeline.go`). + +### The failure mode + +Shutdown drains the pipeline by closing channels from the front: `entrypoint.Close()` +closes the batches channel, `execution` then closes its output, and the close +cascades stage-by-stage (`for unit := range this.input` exits, then +`defer close(this.output)` fires). Each stage finishes the **unit it is +currently holding** before observing the closed input. + +If the database (persistence) or broker (broadcast) is unreachable when shutdown +begins, the stage holding an in-flight unit never breaks out of its retry loop, +never returns to `range this.input`, never observes the closed channel, and +**never closes its output**. The drain stalls, the remaining stages never see +their inputs close, and shutdown hangs until the orchestrator SIGKILLs the +process after its grace period. + +### The design constraint that shapes the fix + +The pipeline is store-and-forward, and the stage order matters +(`handlers/harness/pipeline.go`): + +``` +entrypoint → execution → serialization → persistence → completion → broadcast → terminal + (03) (04) (05) +``` + +`complete()` — which calls `waiter.Done()` (unblocking the `Handle`/`await` +caller) and lets the upstream MQ delivery be acked — fires in the **completion** +stage (04), which runs **after persistence (03)** and **before broadcast (05)**. + +That ordering creates a hard asymmetry between the two failing stages: + +| Stage | Runs vs. ack | On give-up, is the message recoverable? | +|--------------------|----------------|----------------------------------------------------------------------------------------------------------------------------------| +| `persistence` (03) | **before** ack | No. If the row was never written, forwarding it acks/loses the message. Only MQ redelivery recovers it. | +| `broadcast` (05) | **after** ack | Yes. The row is already durably stored; `sqladapter.Recover` re-dispatches every row `WHERE dispatched IS NULL` at next startup. | + +The resilience proposal +(`2026-05-28_15-30-32-proposal-harness-resilience-module-changes.md`) already +ruled out per-batch context in these stages and fixed their cancellation scope: + +> **Inject a per-batch `ctx` through Persistence and Broadcast.** Rejected. +> Per-batch ctx in retry-forever stages would unwind partially-completed work and +> break the durability principle. The pipeline ctx (`harness.New(ctx, …)`) is the +> right scope for those stages. + +So the intended (but not-yet-implemented) cancellation signal for these loops is +the **pipeline context** — the `ctx` the consumer passes to `harness.New(ctx, …)` +and is expected to cancel on shutdown. This proposal wires that signal in. + +## Approach + +Make both retry loops **cancellable via `this.ctx`**, and act on cancellation +according to each stage's recoverability (the asymmetry above): + +- **`broadcast` (recoverable):** on `this.ctx` cancellation, stop retrying and + **forward the unit** to `terminal` as usual. The message is already persisted; + `sqladapter.Recover` redispatches it on the next startup. No data loss, clean + shutdown. + +- **`persistence` (durability gate):** on `this.ctx` cancellation, stop retrying + and **drop the unit without forwarding it** — `complete()` is never called, so + the upstream MQ delivery is never acked and is redelivered on the next run. The + stage then continues its `range` loop and drains normally, so shutdown + proceeds. Forwarding here is *not* an option: it would reach the completion + stage, ack the caller, and lose a message that was never stored. + +#### HTTP-origin units (the `await` path) + +The drop decision is identical for HTTP-submitted units, but the *recovery* +mechanism differs, because there is no broker to redeliver. When persistence +drops an HTTP-origin unit on shutdown: + +- `complete()` never fires, so the in-flight `await` caller is **not** unblocked + via the completion path. It unblocks instead when its own request context is + cancelled (the `case <-ctx.Done()` arm of `await`'s second select), tracking + `CallerDeparted` — exactly today's departed-caller behavior. Whether that + request context is actually cancelled at shutdown is a consumer-side graceful- + shutdown concern (does the HTTP server cancel active request contexts?), out of + scope here. +- Recovery relies on **client retry + domain idempotency**, the model the + resilience proposal already established for the HTTP path: a client whose + request did not durably succeed retries, and once the merged domain-layer + idempotency change is in place repeated retries collapse to no-ops after the + first applies. There is no `sqladapter.Recover` safety net for an un-stored + unit (recovery only re-dispatches rows that *were* stored), so the client retry + is the sole recovery path — which is why dropping (rather than forwarding) is + still the correct choice: forwarding would unblock the caller as though the + write succeeded, falsely confirming a message that was never stored. + +There is no regression versus today: an HTTP request in flight against a dead +database currently hangs until SIGKILL just the same; this change makes the +process shut down cleanly instead. + +A new interruptible backoff replaces the uninterruptible sleep, so shutdown +aborts the delay promptly instead of waiting out a full second per attempt. + +### The cancellable-wait seam + +Replace the injected `sleep func(time.Duration)` with an injected, ctx-aware +waiter: + +```go +// retry.go (new) +package harness + +import ( + "context" + "time" +) + +// wait sleeps for d, or returns early with ctx.Err() if ctx is cancelled first. +func wait(ctx context.Context, d time.Duration) error { + timer := time.NewTimer(d) + defer timer.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } +} +``` + +`pipeline.go` passes `wait` where it currently passes `time.Sleep`. Tests inject +a fixture method that records the requested durations (preserving the existing +`sleeps`-style assertions) and can simulate cancellation by returning a non-nil +error. + +> **Why inject the wait function rather than a duration?** A reasonable +> alternative is to keep `wait` as a plain free function, store only a +> `time.Duration` on each stage, and have the stage call `wait(this.ctx, this.delay)` +> directly. That is cleaner production code. It is rejected for one reason: the +> **test seam**. The stages today inject `sleep func(time.Duration)` precisely so +> the fixtures can (a) record how many times and for how long the loop backed off +> and (b) run instantly without real 1-second sleeps. If we called the real `wait` +> directly, every retry test would block on actual wall-clock time and could not +> observe the backoff. Keeping the injected-function seam preserves the existing +> `TestRetriesUntil…Succeeds` assertions verbatim and lets a fixture simulate +> cancellation synchronously by returning an error on a chosen attempt. The free +> `wait` function still exists — it is simply the production value wired in at +> `pipeline.go`, while tests substitute their own. (When exponential backoff lands +> later, the *duration* moves inside `wait`/its production implementation; the +> injected-function seam stays, so this decision does not block that work.) + +This keeps the fixed 1-second delay; **exponential back-off with jitter remains a +separate, orthogonal TODO** and is out of scope here — this change is strictly +about bounding/cancelling the loops. + +### Revised `persistence` + +```go +func (this *persistence) Listen() { + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + this.buffer = append(this.buffer, message) + } + stored := this.store() + this.buffer = this.buffer[:0] + if !stored { + continue // shutdown before durable write: do NOT forward (no ack); MQ redelivers + } + this.output <- unit + } +} + +func (this *persistence) store() (stored bool) { + var failure PersistenceError + for attempt := 1; ; attempt++ { + err := this.writer.Write(this.ctx, this.buffer...) + if err == nil { + return true + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrPersistence, err) + this.monitor.Track(failure) + if this.wait(this.ctx, time.Second) != nil { + this.monitor.Track(PersistenceAbandoned{Attempts: attempt}) + return false + } + } +} +``` + +### Revised `broadcast` + +```go +func (this *broadcast) Listen() { + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + this.buffer = append(this.buffer, message) + } + this.dispatch() + this.buffer = this.buffer[:0] + this.output <- unit // always forward: already persisted; recovery redispatches if not sent + } +} + +func (this *broadcast) dispatch() { + var failure BroadcastError + for attempt := 1; ; attempt++ { + err := this.dispatcher.Dispatch(this.ctx, this.buffer...) + if err == nil { + return + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrBroadcast, err) + this.monitor.Track(failure) + if this.wait(this.ctx, time.Second) != nil { + this.monitor.Track(BroadcastAbandoned{Attempts: attempt}) + return + } + } +} +``` + +### New Monitor observations (`contracts.go`) + +```go +PersistenceAbandoned struct{ Attempts int } // emitted when persistence stops retrying due to shutdown; the unit was dropped and the upstream message will be redelivered +BroadcastAbandoned struct{ Attempts int } // emitted when broadcast stops retrying due to shutdown; the message is persisted and will be redispatched by recovery +``` + +These give operators an explicit, alertable signal that a shutdown abandoned +in-flight work — distinct from the per-attempt `PersistenceError`/`BroadcastError`. + +### Files created / modified + +| File | Change | +|-------------------------------------------|---------------------------------------------------------------------------------------------| +| `handlers/harness/retry.go` | **New.** The `wait(ctx, d)` interruptible-backoff helper. | +| `handlers/harness/03_persistence.go` | Replace `sleep` field/param with `wait`; extract `store()`; drop-without-forward on cancel. | +| `handlers/harness/05_broadcast.go` | Replace `sleep` field/param with `wait`; extract `dispatch()`; forward-on-cancel. | +| `handlers/harness/contracts.go` | Add `PersistenceAbandoned` and `BroadcastAbandoned` observation types. | +| `handlers/harness/pipeline.go` | Pass `wait` to `newPersistence`/`newBroadcast` instead of `time.Sleep`. | +| `handlers/harness/03_persistence_test.go` | Rename `sleep`→`wait` seam; add ctx-cancel abandonment test (unit **not** forwarded). | +| `handlers/harness/05_broadcast_test.go` | Rename `sleep`→`wait` seam; add ctx-cancel abandonment test (unit **is** forwarded). | + +### Alternatives considered + +- **Persistence keeps retrying forever (only fix broadcast).** Treat persistence's + infinite retry as intentional durability behavior and rely on the orchestrator's + grace-period + SIGKILL + MQ redelivery to bound shutdown. Simpler, and arguably + "more durable" (never gives up). Rejected as the recommendation because it leaves + the original concern — a hung shutdown when the DB is down — unresolved, trading a + clean shutdown for a SIGKILL every time. Presented here as the headline open + question (see below); it is a one-line variant of this proposal (skip the + persistence drop path, keep the loop uninterruptible). + +- **Bounded max-attempts, then forward.** Give each loop a retry ceiling and forward + downstream when exhausted. Rejected: for persistence, forwarding an un-stored unit + acks and loses the message; for broadcast it is unnecessary (recovery already + handles it). A ceiling also turns transient-but-long outages into data loss during + *normal* operation, not just shutdown. + +- **Per-batch context.** Already rejected by the resilience proposal (would unwind + partially-completed work); not revisited. + +## Trade-offs & Risks + +- **Decision (settled): persistence drops the in-flight unit on shutdown.** On + shutdown with a dead database, `persistence` stops retrying and drops the + in-flight unit (clean shutdown; the un-acked MQ delivery is redelivered, or the + HTTP client retries). The rejected alternative — keep retrying forever, bounded + only by orchestrator SIGKILL — is retained in *Alternatives considered* for the + record. Broadcast is unaffected by this choice (forward-on-cancel either way). + +- **Dropped-unit caller blocks until process exit.** When persistence + drops a unit on shutdown, its `complete()` never fires, so an in-flight + `Handle`/`await` caller stays blocked on `waiter.Wait()`. This is acceptable during + shutdown — the consumer cancels `this.ctx`, the MQ delivery framework tears those + goroutines down, and the un-acked message is redelivered. It does mean a + `goleak`-style "no leaked goroutines" assertion must not be applied across the + shutdown-abandonment path. + +- **Cancellation depends on collaborators honoring `ctx`.** If `Writer.Write` / + `Dispatcher.Dispatch` block forever ignoring the passed `ctx`, our loop cannot + reach the `wait` check until that call returns. The `sqladapter` Writer/Dispatcher + use `BeginTx(ctx)`/`ExecContext(ctx)`/ctx-scoped connector calls, so they abort + promptly on cancellation. Worth stating as a documented contract for custom + collaborators. + +- **Requires the consumer to cancel the pipeline ctx on shutdown.** The retry loops + abort on `this.ctx` cancellation; the channel-close cascade alone does not cancel + `this.ctx`. If a consumer wires `harness.New` with a `context.Background()` that is + never cancelled, the in-flight retry remains unbounded (it still drains correctly + on the *next* unit boundary, but the currently-held unit can still hang). This is + the intended contract per the resilience proposal; it should be called out in the + package doc. + +- **Deterministic tests are straightforward here** (unlike the waiter-pool fix): the + injected `wait` seam returns a chosen error synchronously, so abandonment is + exercised without real timing or concurrency races. + +- **No change to the happy path or to the fixed-1s backoff.** Existing retry tests + keep asserting two 1-second waits before success; only the seam's name/signature + changes. + +## Implementation Checklist + +### Phase 1: Cancellable-wait seam (refactor, stays green) + +- [x] Add `handlers/harness/retry.go` with the `wait(ctx context.Context, d time.Duration) error` helper shown in Approach. +- [x] In `pipeline.go`, change `newPersistence(...)` and `newBroadcast(...)` call sites to pass `wait` instead of `time.Sleep`. +- [x] In `03_persistence.go` and `05_broadcast.go`, change the struct field and constructor parameter from `sleep func(time.Duration)` to `wait func(context.Context, time.Duration) error` (no behavior change yet — call `this.wait(this.ctx, time.Second)` and ignore the returned error so the loops still spin forever). +- [x] In both `*_test.go` fixtures, rename the `sleep`/`sleeps` seam to a `wait`/`waits` method matching the new signature, recording durations and returning `nil`. +- [x] Run `make test` — confirm green (pure refactor; existing `TestRetriesUntil…Succeeds` still sees two recorded 1s waits). + +### Phase 2: Broadcast cancellation — forward on shutdown (red → green) + +- [x] Add `BroadcastAbandoned struct{ Attempts int }` to `contracts.go`. +- [x] Add `TestBroadcastAbandonsOnContextCancelButStillForwards`: dispatcher always fails; fixture `wait` returns `context.Canceled` on its first call. Expect failure (compile error — `BroadcastAbandoned` referenced before the loop emits it, or assertion fails because the current loop ignores the wait error and spins). Record the failure reason. +- [x] Run the new test, confirm it fails for the right reason. (Observed: 5s `-timeout` kill — the loop ignored the wait error and spun forever against the always-failing dispatcher, never forwarding or tracking abandonment.) +- [x] Implement: extract `dispatch()`; on `this.wait(...) != nil`, `Track(BroadcastAbandoned{Attempts: attempt})` and return; `Listen()` still forwards the unit to `output` afterward. +- [x] Run the test — confirm: exactly one `Dispatch` call, one recorded wait, one `BroadcastError` then one `BroadcastAbandoned` tracked, and the unit **was** forwarded to `output`. +- [x] Confirm `TestRetriesUntilDispatchSucceeds` and the other broadcast tests still pass. + +### Phase 3: Persistence cancellation — drop without forwarding (red → green) + +- [x] Add `PersistenceAbandoned struct{ Attempts int }` to `contracts.go`. +- [x] Add `TestPersistenceAbandonsOnContextCancelAndDropsUnit`: writer always fails; fixture `wait` returns `context.Canceled` on its first call. Expect failure (the current loop ignores the wait error and spins, so the test would hang / never see abandonment). Record the failure reason. +- [x] Run the new test, confirm it fails for the right reason. (Observed: 5s `-timeout` kill — the loop ignored the wait error and spun forever against the always-failing writer, never dropping the unit or tracking abandonment.) +- [x] Implement: extract `store() bool`; on `this.wait(...) != nil`, `Track(PersistenceAbandoned{Attempts: attempt})` and return `false`; `Listen()` skips `output <- unit` (does **not** forward) when `store()` returns false, resets the buffer, and continues the range loop. +- [x] Run the test — confirm: exactly one `Write` call, one recorded wait, one `PersistenceError` then one `PersistenceAbandoned` tracked, and the unit was **not** forwarded to `output` (output closes empty after input closes). +- [x] Confirm `TestRetriesUntilWriteSucceeds` and the other persistence tests still pass. + +### Phase 4: Full verification & conventions + +- [x] Run `make test` (fmt, vet, `-race`, coverage) — confirm green and that `handlers/harness` coverage has not regressed. (All packages pass; `handlers/harness` coverage 97.7%, unchanged.) +- [x] Add a short note to the `package harness` doc comment in `config.go`: the persistence/broadcast retry loops abort on cancellation of the context passed to `New(ctx, …)`; consumers must cancel it on shutdown, and custom `Writer`/`Dispatcher` implementations must honor the context they are given. +- [x] Re-read the diff against the `CLAUDE.md` Go conventions: receiver named `this`; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line. diff --git a/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md b/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md new file mode 100644 index 0000000..636cb50 --- /dev/null +++ b/doc/work-sessions/2026/2026-06-01_16-14-19-proposal-dispatch-content-encoding.md @@ -0,0 +1,336 @@ +--- +name: Stop double-encoding payloads across dispatch and recovery +description: Eliminate the redundant second serialization in the sqladapter Dispatcher and propagate ContentType through the in-memory harness Message — closing the TODO at handlers/harness/sqladapter/dispatcher.go:58 and fixing the recovery-path content-type and Topic mishandling. No schema changes. +type: plot +--- + +# Proposal: Stop double-encoding payloads across dispatch and recovery + +## Background + +The harness pipeline is store-and-forward: each message is serialized once +(stage 02), durably stored (stage 03), then published to the broker (stage 05 +via `Dispatcher.Dispatch`). On startup, `sqladapter.Recover` re-publishes any +row whose `dispatched` column is still `NULL`. + +The bytes that get stored in the `Messages.payload` column **must** be byte- +identical to the bytes that get published to the broker — otherwise a recovery +re-publishes a different message than the original send, silently. Today the +code does not guarantee this, and in one configuration it cannot publish a +recovered row at all. + +There are three connected defects across `handlers/harness/02_serialization.go`, +`handlers/harness/sqladapter/dispatcher.go`, and +`handlers/harness/sqladapter/recovery.go`. They are usually invisible (the +production serializer is JSON, which makes the bug benign) and they are masked +by the unit-test stub connector — but they are real, and recovery against a +real RabbitMQ writer is broken today. + +### Defect 1: payload is encoded twice on the happy path + +`02_serialization.go:29` encodes `message.Value` into `message.Content` (these +are the bytes destined for the durable row). + +`sqladapter/dispatcher.go:64-67` then builds a `messaging.Dispatch` with +`Message: message.Value` (the in-memory Go struct) and passes it to the +transport `Writer`. The transport writer is the `serialization.defaultWriter` +(`serialization/connector.go:103`), which loops calls into +`defaultDispatchEncoder.Encode` (`serialization/dispatch_encoder.go:30`). That +encoder sees `dispatch.Payload` is empty and `dispatch.Message` is non-nil, and +**runs the same `Serializer.Serialize` again** to produce +`dispatch.Payload`/`dispatch.ContentType`/`dispatch.MessageType`. + +So the value is serialized once for the DB, then again for the broker. Both +encodings happen with the same `Serializer`, so the resulting bytes *are* equal, +but the harness has no guarantee of that — anyone who plugs in a serializer +with side effects, IDs, or timestamps will get divergent stored vs. published +bytes. And it is wasted CPU on every send. + +The TODO at `dispatcher.go:58-63` calls this out and asks for one of two fixes: + +> Either pass the pre-encoded bytes through Dispatch.Payload/MessageType/ContentType +> and skip the connector's serialization for this writer, or drop the harness +> Serialization stage and let the connector own all encoding. + +### Defect 2: `Message.ContentType` is never populated on the happy path + +`harness.Message` already has `ContentType` and `ContentEncoding` fields +(`message.go:22-28`) — but `02_serialization.go` only writes +`message.ContentType` on the **fallback** path (when the user's serializer +returned an error and we fall back to `fmt.Sprintf("%#v")`): + +```go +err := this.serializer.Serialize(message.Content, message.Value) +if err != nil { + // … + message.ContentType = "go fmt.Sprintf(%#v)" + _, _ = fmt.Fprintf(message.Content, "%#v", message.Value) +} +``` + +On the success path `message.ContentType` is left as the zero value `""`. The +field exists, and the dispatcher never reads it. + +This is fine *today* because `dispatcher.go` re-encodes via the connector +(Defect 1), so the connector reapplies its own `ContentType()`. The moment we +fix Defect 1 by passing pre-encoded bytes through, we have to know what the +content type is — and the harness never recorded it. The fix lives entirely +in process memory: stage 02 stamps `message.ContentType` from +`serializer.ContentType()` so the dispatcher (next stage in the same process) +can read it. **No schema change.** + +### Defect 3: recovery hands the dispatcher a malformed message and breaks against a real broker + +`recovery.go:42-47` builds a `harness.Message` from the row: + +```go +messages = append(messages, &harness.Message{ + ID: id, + Type: typeName, + Content: bytes.NewBuffer(payload), + ContentType: "application/json", +}) +``` + +`Value` is left nil. The dispatcher today (per Defect 1) passes +`Message: message.Value` to the connector — which means recovery passes +`Message: nil` and `Payload: nil`. The connector's encoder takes its +`dispatch.Message == nil` early-return (`dispatch_encoder.go:31`) and **never +sets `Topic` / `MessageType` / `ContentType`**. The RabbitMQ writer then +rejects with `ErrEmptyDispatchTopic` (`rabbitmq/writer.go:38`). So in the only +production configuration that uses `topicFromMessageType=true`, recovery can't +publish a single row. The current dispatcher unit tests don't catch this +because the stub connector (`dispatcher_test.go:111`) ignores the topic field +entirely. + +Defect 3 is **fixed automatically** once Defect 1 is fixed: the new dispatcher +reads `Type` / `ContentType` / `Content` (which recovery already populates) +from the `*harness.Message` and hands them to the connector as pre-populated +`MessageType` / `ContentType` / `Payload` / `Topic`. The connector encoder's +`len(Payload) > 0` early-return then leaves everything alone, and the broker +gets a complete dispatch. + +The hardcoded `"application/json"` in recovery is **kept** — we deliberately +do not modify the `Messages` schema (the table holds 50M+ rows; we don't +extend it for this), and the project's standing assumption is that the +durable bytes are JSON. We document that assumption explicitly rather than +carry it implicitly. + +### Why fix Defects 1 and 2 together + +Defect 2 is the prerequisite for fixing Defect 1: the dispatcher can only +hand pre-encoded `ContentType` to the connector if the serialization stage +recorded it. The two changes ride together, and they fix Defect 3 as a +side effect. + +## Approach + +We pick the "pass-through" arm of the TODO: the harness Serialization stage +remains the single source of truth for encoded bytes and content type; the +sqladapter Dispatcher hands those pre-encoded bytes to the connector via +`Dispatch.Payload` / `MessageType` / `ContentType`; the connector encoder +short-circuits because `Payload` is non-empty. Recovery does the same +pass-through — using the row's stored `type` and the project-wide assumption +that stored bytes are `application/json`. + +This is the smaller, safer arm. The alternative (delete the harness +Serialization stage entirely and let the connector own all encoding) would +require persistence to also call the connector's encoder and would entangle +the harness with the connector. Rejected on those grounds. + +### The three changes + +**Change A (`02_serialization.go`):** record the content type on the success +path. We need `serializer.ContentType()` on the internal `serializer` +interface — which today is just `Serialize(io.Writer, any) error` +(`contracts.go:34`). Extend it: + +```go +serializer interface { + Serialize(out io.Writer, in any) error + ContentType() string +} +``` + +Then in `serialization.Listen`: + +```go +err := this.serializer.Serialize(message.Content, message.Value) +if err == nil { + message.ContentType = this.serializer.ContentType() +} else { + // existing fallback unchanged: ContentType already set to the sentinel + // "go fmt.Sprintf(%#v)" by the fallback branch. +} +``` + +The `nop` serializer in `config.go:112` gains a `ContentType() string { return "" }` +implementation, the test fixture in `02_serialization_test.go:40` gains the +same, and the production wiring (where callers pass +`serialization.Serializer` from the `serialization` package — which already +exposes `ContentType() string`, `contracts.go:9-11`) needs no changes since +that interface already satisfies the extended one. + +**Change B (`sqladapter/dispatcher.go`):** stop passing `Value`; pass the +pre-encoded fields: + +```go +dispatches = append(dispatches, messaging.Dispatch{ + Durable: true, + MessageType: message.Type, + ContentType: message.ContentType, + Payload: message.Content.Bytes(), + Topic: message.Type, // matches connector's topicFromMessageType=true behavior +}) +``` + +Why `Topic: message.Type` here? The connector's encoder normally sets +`Topic = MessageType` *only when* its `topicFromMessageType` flag is on +(`dispatch_encoder.go:55`). With `Payload` already populated, the encoder takes +its `len(dispatch.Payload) > 0` early-return (`dispatch_encoder.go:31`) and +never runs the topic-population block. So the dispatcher must populate Topic +itself, and Topic equals Type for this pipeline. (If a future deployment needs +a different topic-derivation rule, that rule will have to be wired into the +sqladapter Dispatcher explicitly — there is no longer a shared encoder +deciding it.) That deviation from the connector's behavior is **intentional +and noted** as a trade-off below. + +**Change C (`sqladapter/recovery.go`):** no functional change; tighten the +construction comment to acknowledge the JSON assumption. + +The current code already populates everything Change B's dispatcher needs: + +```go +messages = append(messages, &harness.Message{ + ID: id, + Type: typeName, + Content: bytes.NewBuffer(payload), + ContentType: "application/json", +}) +``` + +`Type` and `Content` come from the row; `ContentType` is the project-wide +assumption that stored bytes are JSON. With Change B in place, the dispatcher +reads these and the broker dispatch is well-formed. The change is to: + +- Replace the zero-value `// hardcoded` framing with a brief comment that + states the assumption: the durable column holds JSON because the + configured serializer is JSON; recovery cannot recover what was not + recorded, so it carries that assumption forward. + +That's it — no SQL or schema change. + +### Files modified + +| File | Change | +|-------------------------------------------------------|---------------------------------------------------------------------------------------------------------| +| `handlers/harness/contracts.go` | Extend internal `serializer` interface with `ContentType() string`. | +| `handlers/harness/02_serialization.go` | On success, set `message.ContentType = this.serializer.ContentType()`. | +| `handlers/harness/02_serialization_test.go` | Update fixture serializer to satisfy `ContentType() string`; assert ContentType propagates. | +| `handlers/harness/config.go` | `nop.ContentType() string { return "" }`. | +| `handlers/harness/sqladapter/dispatcher.go` | Pass `Payload`/`MessageType`/`ContentType`/`Topic` from `*harness.Message`; remove TODO. | +| `handlers/harness/sqladapter/dispatcher_test.go` | Assert published Dispatch carries Payload/MessageType/ContentType/Topic; tighten stub connector. | +| `handlers/harness/sqladapter/recovery.go` | Comment-only: state the JSON-content-type assumption inline. | +| `handlers/harness/sqladapter/recovery_test.go` | Add a regression test asserting the recovered Dispatch carries `Topic`/`MessageType`/`Payload`/`ContentType`. | + +No schema files (`doc/mysql/schema.sql`, `testdb_test.go`) are touched. + +### Alternatives considered + +- **Drop the harness Serialization stage; let the connector serialize.** The + other arm of the TODO. Rejected: the persistence stage would have to call + the connector's encoder before INSERT (otherwise we lose the + content-type-at-store-time invariant), which entangles the harness with the + serialization-connector and changes the order of failure (a connector + encoding bug would now poison persistence rather than just dispatch). + Pass-through is the smaller change. + +- **Add a `content_type` column to `Messages` so recovery is faithful to a + serializer change.** Rejected: the table holds 50M+ rows, the project- + standing assumption is JSON, and the operational cost of an `ALTER TABLE` on + that table outweighs the value of recovering exotic content types we don't + use. The assumption is documented explicitly in code instead. + +- **Have the dispatcher consult the connector's `topicFromMessageType` setting + to decide whether to populate Topic.** Rejected: the sqladapter dispatcher + has no handle to that config, the configuration is owned by a different + package, and the dispatcher's contract is "publish what was persisted." For + this pipeline, Topic = Type is the rule; encoding it explicitly in the + dispatcher makes the rule visible at the call site instead of hidden in + another package's option flag. + +## Trade-offs & Risks + +- **Recovery carries a JSON assumption, not a recorded fact.** With no + `content_type` column on `Messages`, recovery cannot know what serializer + was in effect when a row was written; it assumes `application/json`. If + the configured serializer is ever changed to something else, in-flight + rows persisted under the old serializer will be re-dispatched on next + startup tagged `application/json` regardless of what they actually contain. + Mitigation: drain the queue (no `dispatched IS NULL` rows) before swapping + serializers. Acceptable risk because the project has historically used JSON + and has no current plan to change. + +- **Topic = MessageType is now hardcoded in the sqladapter Dispatcher.** This + matches the connector's default behavior for the existing deployments + (`topicFromMessageType=true`) but it removes the option flag's reach for the + pre-encoded path. If a future deployment needs `Topic != MessageType`, the + rule has to be wired explicitly into the dispatcher (a function, a map, or + a new field on `harness.Message`). Worth flagging in a code comment so a + future reader understands why the topic rule was duplicated here. + +- **The connector encoder's `len(Payload) > 0` early-return is now load-bearing + for correctness, not just optimization.** The contract becomes: "if Payload + is set, leave the dispatch alone; the caller has populated everything that + the broker needs." That's already its behavior + (`dispatch_encoder.go:31`), and a short test in + `serialization/dispatch_encoder_test.go` already asserts this (`TestWhen- + DispatchAlreadyContainsSerializedPayload_Nop`); we'll cross-reference it in + a comment but not duplicate it. + +- **The dispatcher unit test that previously asserted `published[0].Message` + must change** — the stub connector observes `Message` today, but after the + change the dispatcher passes `Payload` and leaves `Message` nil. The test + becomes: assert `Payload` is the persisted bytes, `MessageType` is the + registered name, `ContentType` is what the harness recorded, `Topic` equals + MessageType, `Durable` is true. + +- **Coverage and shape of behavior tests are otherwise unchanged.** No new + goroutine semantics, no concurrency-shape changes; this is a pure data-flow + refactor — strictly in-process, no schema migration. + +## Implementation Checklist + +### Phase 1: ContentType on the internal serializer interface (red → green) + +- [ ] Extend `02_serialization_test.go` fixture: add `ContentType() string { return "test/content-type" }` and a new test `TestSerializesEachResultValueIntoContent_PopulatesContentTypeOnSuccess` asserting `units[0].results[0].ContentType == "test/content-type"`. +- [ ] Run tests, confirm failure (assertion fails: success path leaves `ContentType` empty). +- [ ] Extend `serializer` interface in `handlers/harness/contracts.go` to include `ContentType() string`. +- [ ] Add `ContentType() string { return "" }` to `nop` in `config.go` so the default still satisfies the interface. +- [ ] In `02_serialization.go`, on the success branch, set `message.ContentType = this.serializer.ContentType()`. Leave the fallback branch's existing `"go fmt.Sprintf(%#v)"` assignment alone. +- [ ] Run tests, confirm green. Verify the fallback test (`TestSerializerErrorIsTracked_FallbackToFmtSprintfEncoding`) still asserts the `"go fmt.Sprintf(%#v)"` ContentType. + +### Phase 2: Dispatcher pass-through (red → green) + +- [ ] Add `TestDispatch_PublishesPreEncodedPayloadAndMetadata` to `dispatcher_test.go`: extend the `seedMessage` helper (or inline its setup) so the `*harness.Message` carries `Type`, `ContentType` (e.g. `"application/json"`), and a pre-encoded `Content` buffer. Assert the recorded `messaging.Dispatch` has `Payload == message.Content.Bytes()`, `MessageType == message.Type`, `ContentType == message.ContentType`, `Topic == message.Type`, `Durable == true`, and `Message == nil`. +- [ ] Run tests, confirm failure (today the dispatcher sends `Message: message.Value` and leaves Payload/MessageType/ContentType/Topic blank). +- [ ] Edit `sqladapter/dispatcher.go:55-71`: + - Drop the TODO comment block. + - Change the `messaging.Dispatch` literal to `{Durable: true, MessageType: message.Type, ContentType: message.ContentType, Payload: message.Content.Bytes(), Topic: message.Type}`. + - Add a one-line comment explaining that Topic = Type because the connector encoder is short-circuited by the pre-populated Payload (cross-reference `serialization/dispatch_encoder_test.go:TestWhenDispatchAlreadyContainsSerializedPayload_Nop`). +- [ ] Run tests, confirm green. Update `TestDispatch_PublishesAndMarksDispatched` so its `published[0].Message` assertion is replaced with assertions on Payload/MessageType/ContentType/Topic. +- [ ] Confirm `TestDispatch_PublishFails_ReturnsErrorWithoutMarkingDispatched` and `TestDispatch_NoMessages_NoOp` still pass. + +### Phase 3: Recovery regression test + comment (red → green) + +- [ ] Add `TestRecover_PublishesDispatchWithTopicMessageTypeAndPayload`: seed an undispatched row with `type='order-received'` and a JSON payload; run `Recover`; assert the resulting `messaging.Dispatch` has `Topic == "order-received"`, `MessageType == "order-received"`, `ContentType == "application/json"`, `Payload == `, `Durable == true`, and `Message == nil`. +- [ ] Run the test. With Phase 2 in place, this test should already pass (recovery's `*harness.Message` is well-formed and the new dispatcher reads it correctly). If it fails, that is the right reason to fix it before continuing. +- [ ] Edit `sqladapter/recovery.go:42-47`: replace the inline framing of `ContentType: "application/json"` with a comment that states the assumption — the project's serializer is JSON; the durable bytes are recorded without a content-type column, so recovery carries that assumption forward. +- [ ] Run tests, confirm green. Confirm `TestRecover_NoOrphans_NoOp`, `TestRecover_DispatchesUndispatchedRowsInIDOrder`, `TestRecover_PassesPayloadAndTypeIntoMessage`, `TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll`, and `TestRecover_RowCountIsMultipleOfBatchSize_FlushesUniformBatches` all still pass. + +### Phase 4: Full verification + +- [ ] Run `make test` (fmt, vet, `-race`, coverage). Confirm green and that `handlers/harness` and `handlers/harness/sqladapter` coverage have not regressed. +- [ ] Re-read the diff against the `CLAUDE.md` Go conventions: receiver named `this`; named slice/return values where applicable; no naked returns; no blank lines at method start/end; struct initializers use field/value pairs; multi-line struct literals close the brace on their own line. +- [ ] Sanity-check the diffs against the three defects in Background: (a) no double encode (dispatcher passes pre-encoded bytes; connector encoder short-circuits on `len(Payload) > 0`), (b) ContentType propagates in-process from serialization → message → dispatch, (c) recovery's `*harness.Message` now flows through the dispatcher to a well-formed broker dispatch (no more `ErrEmptyDispatchTopic`). +- [ ] Confirm package doc comment in `handlers/harness/sqladapter/dispatcher.go` still accurately describes the columns it depends on (`id`, `dispatched`, `type`, `payload`); it has not changed. diff --git a/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html b/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html new file mode 100644 index 0000000..935e3bc --- /dev/null +++ b/doc/work-sessions/2026/2026-06-02_12-40-29-proposal-dispatch-content-encoding.html @@ -0,0 +1,334 @@ + + + + +Proposal: Stop double-encoding payloads across dispatch and recovery + + + +

Proposal: Stop double-encoding payloads across dispatch and recovery

+ +

Background

+

+ The harness pipeline is store-and-forward: each message is serialized once + (stage 02), durably stored (stage 03), then published to the broker (stage 05 + via Dispatcher.Dispatch). On startup, sqladapter.Recover + re-publishes any row whose dispatched column is still NULL. +

+

+ The bytes stored in the Messages.payload column must + be byte-identical to the bytes published to the broker — otherwise a recovery + re-publishes a different message than the original send, silently. Today the + code does not guarantee this, and in one configuration it cannot publish a + recovered row at all. +

+

+ There are three connected defects across handlers/harness/02_serialization.go, + handlers/harness/sqladapter/dispatcher.go, and + handlers/harness/sqladapter/recovery.go. They are usually invisible + (the production serializer is JSON, which makes the bug benign) and they are masked + by the unit-test stub connector — but they are real, and recovery against a real + RabbitMQ writer is broken today. +

+ +

Defect 1: payload is encoded twice on the happy path

+

+ 02_serialization.go:29 encodes message.Value into + message.Content (these are the bytes destined for the durable row). +

+

+ sqladapter/dispatcher.go:64-67 then builds a messaging.Dispatch + with Message: message.Value (the in-memory Go struct) and passes it to the + transport Writer. The transport writer is the serialization.defaultWriter + (serialization/connector.go:103), which loops calls into + defaultDispatchEncoder.Encode (serialization/dispatch_encoder.go:30). + That encoder sees dispatch.Payload is empty and dispatch.Message + is non-nil, and runs the same Serializer.Serialize again + to produce dispatch.Payload/dispatch.ContentType/dispatch.MessageType. +

+

+ So the value is serialized once for the DB, then again for the broker. Both encodings + happen with the same Serializer, so the resulting bytes are equal, + but the harness has no guarantee of that — anyone who plugs in a serializer with side + effects, IDs, or timestamps will get divergent stored vs. published bytes. And it is + wasted CPU on every send. +

+

+ The TODO at dispatcher.go:58-63 calls this out and asks for one of two fixes: + either pass the pre-encoded bytes through Dispatch.Payload/MessageType/ContentType + and skip the connector's serialization for this writer, or drop the harness Serialization + stage and let the connector own all encoding. +

+ +

Defect 2: Message.ContentType is never populated on the happy path

+

+ harness.Message already has ContentType and ContentEncoding + fields (message.go:22-28) — but 02_serialization.go only writes + message.ContentType on the fallback path (when the user's + serializer returned an error and we fall back to fmt.Sprintf("%#v")). + On the success path message.ContentType is left as the zero value "". +

+

+ This is fine today because dispatcher.go re-encodes via the connector + (Defect 1), so the connector reapplies its own ContentType(). The moment we + fix Defect 1 by passing pre-encoded bytes through, we must know what the content type is — + and the harness never recorded it. The fix: stage 02 stamps message.ContentType + from serializer.ContentType() so the dispatcher (next stage in the same process) + can read it. No schema change. +

+ +

Defect 3: recovery hands the dispatcher a malformed message and breaks against a real broker

+

+ recovery.go:42-47 builds a harness.Message from the row with + Value left nil. The dispatcher today (per Defect 1) passes + Message: message.Value to the connector — which means recovery passes + Message: nil and Payload: nil. The connector's encoder takes its + dispatch.Message == nil early-return (dispatch_encoder.go:31) and + never sets Topic / MessageType / ContentType. + The RabbitMQ writer then rejects with ErrEmptyDispatchTopic + (rabbitmq/writer.go:38). So in the only production configuration that uses + topicFromMessageType=true, recovery can't publish a single row. +

+

+ Defect 3 is fixed automatically once Defect 1 is fixed: the new dispatcher + reads Type / ContentType / Content (which recovery + already populates) from the *harness.Message and hands them to the connector as + pre-populated MessageType / ContentType / Payload / + Topic. The connector encoder's len(Payload) > 0 early-return + then leaves everything alone, and the broker gets a complete dispatch. +

+ +

Approach

+

+ We pick the "pass-through" arm of the TODO: the harness Serialization stage remains the + single source of truth for encoded bytes and content type; the sqladapter Dispatcher hands + those pre-encoded bytes to the connector via Dispatch.Payload / + MessageType / ContentType; the connector encoder short-circuits + because Payload is non-empty. Recovery does the same pass-through — using the + row's stored type and the project-wide assumption that stored bytes are + application/json. +

+

+ The alternative (delete the harness Serialization stage entirely and let the connector own + all encoding) would require persistence to also call the connector's encoder and would + entangle the harness with the connector. Rejected on those grounds. +

+ +

Change A — 02_serialization.go: record content type on success

+

+ Extend the internal serializer interface in contracts.go:34: +

+
serializer interface {
+    Serialize(out io.Writer, in any) error
+    ContentType() string
+}
+

+ Then in serialization.Listen, on the success branch: +

+
err := this.serializer.Serialize(message.Content, message.Value)
+if err == nil {
+    message.ContentType = this.serializer.ContentType()
+} else {
+    // existing fallback unchanged: ContentType set to "go fmt.Sprintf(%#v)"
+}
+

+ The nop serializer in config.go:112 gains + ContentType() string { return "" }. The test fixture in + 02_serialization_test.go:40 gains the same stub. The production wiring + (callers pass serialization.Serializer from the serialization + package, which already exposes ContentType() string at + contracts.go:9-11) needs no changes. +

+ +

Change B — sqladapter/dispatcher.go: pass pre-encoded fields

+

+ Replace the current messaging.Dispatch literal (which passes Message: message.Value) + with: +

+
dispatches = append(dispatches, messaging.Dispatch{
+    Durable:     true,
+    MessageType: message.Type,
+    ContentType: message.ContentType,
+    Payload:     message.Content.Bytes(),
+    Topic:       message.Type,
+})
+

+ Topic: message.Type is required because the connector encoder normally sets + Topic = MessageType only when its topicFromMessageType flag is on + (dispatch_encoder.go:55). With Payload already populated, the + encoder takes its len(dispatch.Payload) > 0 early-return + (dispatch_encoder.go:31) and never runs the topic-population block. So the + dispatcher must populate Topic itself. A one-line comment cross-references + serialization/dispatch_encoder_test.go:TestWhenDispatchAlreadyContainsSerializedPayload_Nop + to explain why. +

+ +

Change C — sqladapter/recovery.go: comment-only

+

+ No functional change. Replace the inline framing of ContentType: "application/json" + with a comment that states the assumption: the project's serializer is JSON; the durable + bytes are recorded without a content-type column, so recovery carries that assumption + forward. +

+ +

Files modified

+ + + + + + + + + + + + + + + + + +
FileChange
handlers/harness/contracts.goExtend internal serializer interface with ContentType() string.
handlers/harness/02_serialization.goOn success, set message.ContentType = this.serializer.ContentType().
handlers/harness/02_serialization_test.goUpdate fixture serializer to satisfy ContentType() string; assert ContentType propagates.
handlers/harness/config.gonop.ContentType() string { return "" }.
handlers/harness/sqladapter/dispatcher.goPass Payload/MessageType/ContentType/Topic from *harness.Message; remove TODO.
handlers/harness/sqladapter/dispatcher_test.goAssert published Dispatch carries Payload/MessageType/ContentType/Topic; tighten stub connector.
handlers/harness/sqladapter/recovery.goComment-only: state the JSON content-type assumption inline.
handlers/harness/sqladapter/recovery_test.goAdd regression test asserting recovered Dispatch carries Topic/MessageType/Payload/ContentType.
+

No schema files (doc/mysql/schema.sql, testdb_test.go) are touched.

+ +

Trade-offs & Risks

+ + +

Your Turn

+

+ The step marked 🤝 in the checklist is reserved for you to implement. + It was chosen because it is the most interesting production-code contribution + in this proposal — the kind of work worth doing yourself. +

+

What to implement

+

+ In handlers/harness/sqladapter/dispatcher.go, replace the + messaging.Dispatch literal that currently passes + Message: message.Value with one that passes the pre-encoded fields. + Drop the TODO comment block. The new literal is: +

+
dispatches = append(dispatches, messaging.Dispatch{
+    Durable:     true,
+    MessageType: message.Type,
+    ContentType: message.ContentType,
+    Payload:     message.Content.Bytes(),
+    Topic:       message.Type,
+})
+

+ Add a one-line comment above the Topic field (or the append call) explaining + that Topic = Type because the connector encoder is bypassed when + Payload is pre-populated — cross-reference + serialization/dispatch_encoder_test.go:TestWhenDispatchAlreadyContainsSerializedPayload_Nop. +

+

Context and interfaces

+

+ The relevant area is handlers/harness/sqladapter/dispatcher.go:55-71. + message is a *harness.Message; by the time Change A is + complete, message.ContentType will be the string returned by the + serializer's ContentType() method, and message.Content is the + *bytes.Buffer holding the encoded bytes. + messaging.Dispatch is defined in the messaging package — + Payload []byte, MessageType string, + ContentType string, Topic string, Durable bool, + Message any. Leave Message unset (nil). +

+

What success looks like

+

+ TestDispatch_PublishesPreEncodedPayloadAndMetadata in + handlers/harness/sqladapter/dispatcher_test.go passes. It will assert: +

+ + +

Implementation Checklist

+ +

Phase 1: ContentType on the internal serializer interface (red → green)

+ + +

Phase 2: Dispatcher pass-through (red → green)

+ + +

Phase 3: Recovery regression test + comment (red → green)

+ + +

Phase 4: Full verification

+ + + + diff --git a/doc/work-sessions/2026/2026-06-02_14-57-17-branch-scan-master-vs-mikewhat_new-harness.html b/doc/work-sessions/2026/2026-06-02_14-57-17-branch-scan-master-vs-mikewhat_new-harness.html new file mode 100644 index 0000000..6496c80 --- /dev/null +++ b/doc/work-sessions/2026/2026-06-02_14-57-17-branch-scan-master-vs-mikewhat_new-harness.html @@ -0,0 +1,146 @@ + + + + +Branch Scan: master vs mikewhat/new-harness + + + +

Branch Scan: master vs mikewhat/new-harness

+ +

Summary

+

+ This branch introduces a new handlers/harness package: a staged, store-and-forward + message-handling pipeline composed of seven goroutine stages connected by buffered channels + (entrypoint → execution → serialization → persistence → completion → broadcast → terminal). + It also adds handlers/harness/sqladapter, a reference MySQL implementation of the + Writer and Dispatcher interfaces, backed by a new Messages + schema in doc/mysql/schema.sql. + Supporting infrastructure includes a Docker Compose file for local MySQL testing, new + Makefile targets for database-backed integration tests, and updated go.mod/ + go.sum adding github.com/go-sql-driver/mysql and + github.com/smarty/gunit/v2. + All 30 commits are additions only — no existing code was modified. + Test coverage is extensive: every pipeline stage, the router/scanner, the admission adapter, + the SQL writer, dispatcher, and recovery function all have unit or integration tests. +

+ +

Changed Files

+ + +

Concerns

+ + + diff --git a/doc/work-sessions/2026/2026-06-09_14-22-40-proposal-serialization-fail-fast.html b/doc/work-sessions/2026/2026-06-09_14-22-40-proposal-serialization-fail-fast.html new file mode 100644 index 0000000..b195522 --- /dev/null +++ b/doc/work-sessions/2026/2026-06-09_14-22-40-proposal-serialization-fail-fast.html @@ -0,0 +1,216 @@ + + + + +Proposal: Fail fast on serialization errors + + + +

Proposal: Fail fast on serialization errors

+ +

Background

+

+ Commit 2b7c630 established the contract: it is the domain model's + responsibility to only produce values that serialize successfully. A + serialization failure is therefore a programming error — a contract violation + that should never occur in a correctly built application. But the pipeline must + still do something when it occurs, and what it does today is the worst + available option. +

+

+ Today, handlers/harness/02_serialization.go:30-38 tracks a + SerializationError via the monitor and then lets the unit of work + keep flowing. The failed message proceeds with an empty Content + buffer and an empty ContentType, which means: +

+ + +

Decision (from design discussion, 2026-06-09)

+ + + + + +
QuestionAnswer
Persist a typed, payload-less row?No
Dispatch the message downstream?No — hard constraint
Mark a persisted row dispatched?Moot (nothing is persisted)
+

+ Instead: track the observation, then halt the process before the + unit reaches persistence. +

+ +

Approach

+

+ In 02_serialization.go, when serializer.Serialize returns + an error: track the SerializationError exactly as today, then + panic with the wrapped error (which satisfies + errors.Is(err, ErrSerialization)). The unit is never forwarded. +

+

+ This composes cleanly with semantics the pipeline already has: +

+ +

+ Why the whole process and not just the unit: by the time + serialization runs, the domain model's in-memory state has already advanced + (execution happened in stage 01). Dropping the unit and continuing would let MQ + redelivery replay inputs against already-mutated state. The only safe recovery + point is process death followed by a clean rebuild of state — halt or forward, + nothing in between. +

+

+ Mechanically, the panic occurs inside one of the fanned-out serialization + goroutines (SerializerCount, default 4). An unrecovered panic in any + goroutine terminates the process with a stack trace on stderr — loud by design. + The existing defer close(this.output) still runs during unwind, so + test code (which recovers) observes a closed output channel. +

+ +

Sketch (not final code)

+
err := this.serializer.Serialize(message.Content, message.Value)
+if err != nil {
+    failure := SerializationError{
+        Error: fmt.Errorf("%w: %w", ErrSerialization, err),
+        Value: message.Value,
+    }
+    this.monitor.Track(failure)
+    panic(failure.Error) // contract violation (see commit 2b7c630): halt before
+                         // persistence so nothing is stored, acked, or dispatched.
+}
+message.ContentType = this.serializer.ContentType()
+
+

+ Note this also retires the awkward failure.Error = nil / failure.Value = nil + reset dance in the current loop — failure becomes a local inside the + error branch. +

+ +

Trade-offs

+ + +

Implementation Checklist

+ +

Phase 1 — Red: capture fail-fast semantics in tests (agent)

+ + +

Phase 2 — Green: production change (your turn 🟠)

+ + +

Phase 3 — Documentation & sweep (agent)

+ + +

Your Turn 🟠

+
+

+ Phase 2 is yours: the semantic heart of the change, sized at roughly ten lines in + 02_serialization.go. The failing tests from Phase 1 are your + specification. Guidance, not code: +

+ +

+ If you'd rather I take Phase 2 as well, just say "skip the your-turn step." +

+
+ + + diff --git a/go.mod b/go.mod index c5db3f0..ee048a7 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,10 @@ module github.com/smarty/messaging/v3 go 1.25 require ( + github.com/go-sql-driver/mysql v1.10.0 github.com/rabbitmq/amqp091-go v1.10.0 github.com/smarty/gunit v1.6.0 + github.com/smarty/gunit/v2 v2.1.0 ) + +require filippo.io/edwards25519 v1.2.0 // indirect diff --git a/go.sum b/go.sum index efb676e..80bb6e0 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,12 @@ +filippo.io/edwards25519 v1.2.0 h1:crnVqOiS4jqYleHd9vaKZ+HKtHfllngJIiOpNpoJsjo= +filippo.io/edwards25519 v1.2.0/go.mod h1:xzAOLCNug/yB62zG1bQ8uziwrIqIuxhctzJT18Q77mc= +github.com/go-sql-driver/mysql v1.10.0 h1:Q+1LV8DkHJvSYAdR83XzuhDaTykuDx0l6fkXxoWCWfw= +github.com/go-sql-driver/mysql v1.10.0/go.mod h1:M+cqaI7+xxXGG9swrdeUIoPG3Y3KCkF0pZej+SK+nWk= github.com/rabbitmq/amqp091-go v1.10.0 h1:STpn5XsHlHGcecLmMFCtg7mqq0RnD+zFr4uzukfVhBw= github.com/rabbitmq/amqp091-go v1.10.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o= github.com/smarty/gunit v1.6.0 h1:27yDmXz5ydI6bYN0A1ltJvtekRY6H3bQJZz0ifJIeVY= github.com/smarty/gunit v1.6.0/go.mod h1:4kEWyZ1xFTEwkEfCpjmIRejP9CHn2Q9F4NP6SmAR+fg= +github.com/smarty/gunit/v2 v2.1.0 h1:orbuo+Y7i50cI0oBYlpo1+crelNTX68PW41IdemT8Xs= +github.com/smarty/gunit/v2 v2.1.0/go.mod h1:8zgLMlQPLDFO5SVz7TrnUS1ADFiHHMChB8zPIQ4R4Ys= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= diff --git a/handlers/harness/00_entrypoint.go b/handlers/harness/00_entrypoint.go new file mode 100644 index 0000000..3ee597e --- /dev/null +++ b/handlers/harness/00_entrypoint.go @@ -0,0 +1,140 @@ +package harness + +import ( + "context" + "sync" +) + +type entrypoint struct { + monitor Monitor + waiters *poolT[*sync.WaitGroup] + batches *poolT[*batch] + work chan *batch + lock *sync.RWMutex + closed bool + done chan struct{} + shedThreshold float64 +} + +func newEntrypoint(monitor Monitor, work chan *batch, shedThreshold float64) *entrypoint { + return &entrypoint{ + monitor: monitor, + waiters: newPoolT(newT[sync.WaitGroup]), + batches: newPoolT(newT[batch]), + work: work, + lock: new(sync.RWMutex), + done: make(chan struct{}), + shedThreshold: shedThreshold, + } +} + +func (this *entrypoint) prepare(messages ...any) (waiter *sync.WaitGroup, batch *batch) { + waiter = this.waiters.Get() + waiter.Add(1) + batch = this.batches.Get() + batch.messages = messages + batch.complete = func() { + waiter.Done() + this.monitor.Track(batchComplete) + this.batches.Put(batch) + } + return waiter, batch +} + +func (this *entrypoint) abandon(waiter *sync.WaitGroup, item *batch) { + waiter.Done() + this.batches.Put(item) +} + +func (this *entrypoint) waiterDone(waiter *sync.WaitGroup) (done chan struct{}) { + done = make(chan struct{}) + go func() { waiter.Wait(); close(done) }() + return done +} + +func (this *entrypoint) Handle(_ context.Context, messages ...any) { + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + return + } + + waiter, item := this.prepare(messages...) + this.work <- item + this.lock.RUnlock() + this.monitor.Track(batchInFlight) + + waiter.Wait() + this.waiters.Put(waiter) +} + +func (this *entrypoint) await(ctx context.Context, message any) { + this.lock.RLock() + if this.closed { + this.lock.RUnlock() + return + } + + waiter, batch := this.prepare(message) + + select { + case this.work <- batch: + this.lock.RUnlock() + this.monitor.Track(batchInFlight) + case <-ctx.Done(): + this.lock.RUnlock() + this.abandon(waiter, batch) + this.waiters.Put(waiter) // safe: abandon() called Done() (count 0); no detached waiter. + this.monitor.Track(callerDeparted) + return + } + + select { + case <-this.waiterDone(waiter): + this.waiters.Put(waiter) // safe: detached Wait() returned before done was closed (count 0). + case <-ctx.Done(): + this.monitor.Track(callerDeparted) + // Intentionally do NOT recycle the waiter here: complete() is still pending + // and the detached waiterDone goroutine is still inside Wait(). Returning it + // to the pool now would let a later prepare() Add(1) before this Wait() + // returns -- documented sync.WaitGroup misuse. Get() already removed the + // pool's reference, so declining to Put() leaves nothing pool-held; the + // waiter is released to GC once complete() fires. + } +} + +func (this *entrypoint) admit() bool { + this.lock.RLock() + defer this.lock.RUnlock() + if this.closed { + return false + } + if float64(len(this.work))/float64(cap(this.work)) >= this.shedThreshold { + this.monitor.Track(loadShed) + return false + } + return true +} + +// Listen blocks until Close is called so the entrypoint can be added as a dominoes listener. +// This guarantees Close is invoked during shutdown, which closes the work channel and lets the +// downstream pipeline stages drain naturally. +func (this *entrypoint) Listen() { <-this.done } + +func (this *entrypoint) Close() error { + this.lock.Lock() + if !this.closed { + close(this.work) + close(this.done) + this.closed = true + } + this.lock.Unlock() + return nil +} + +var ( + batchInFlight BatchInFlight + batchComplete BatchComplete + loadShed LoadShed + callerDeparted CallerDeparted +) diff --git a/handlers/harness/00_entrypoint_test.go b/handlers/harness/00_entrypoint_test.go new file mode 100644 index 0000000..ac6b154 --- /dev/null +++ b/handlers/harness/00_entrypoint_test.go @@ -0,0 +1,399 @@ +package harness + +import ( + "context" + "io" + "sync" + "testing" + "time" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestEntrypointFixture(t *testing.T) { + gunit.Run(new(EntrypointFixture), t) +} + +type EntrypointFixture struct { + *gunit.Fixture + ctx context.Context + work chan *batch + subject *entrypoint + + trackMu sync.Mutex + tracked []any +} + +func (this *EntrypointFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) + this.work = make(chan *batch, 4) + this.subject = newEntrypoint(this, this.work, 0.80) +} + +func (this *EntrypointFixture) Track(observation any) { + this.trackMu.Lock() + defer this.trackMu.Unlock() + this.tracked = append(this.tracked, observation) +} + +func (this *EntrypointFixture) TestImplementsCloser() { + var _ io.Closer = this.subject +} + +func (this *EntrypointFixture) TestHandlePushesBatchAndBlocksUntilCompletion() { + done := make(chan struct{}) + go func() { + this.subject.Handle(this.ctx, "msg-1", "msg-2") + close(done) + }() + + item := <-this.work + this.So(item.messages, should.Equal, []any{"msg-1", "msg-2"}) + + select { + case <-done: + this.Fatal("Handle returned before complete() was invoked") + default: + } + + item.complete() + <-done + + this.So(this.tracked, should.HaveLength, 2) + this.So(this.tracked, should.Contain, BatchInFlight{}) + this.So(this.tracked, should.Contain, BatchComplete{}) +} + +func (this *EntrypointFixture) TestHandleSerializesMultipleConcurrentCalls() { + done := make(chan struct{}, 3) + go func() { this.subject.Handle(this.ctx, "a"); done <- struct{}{} }() + go func() { this.subject.Handle(this.ctx, "b"); done <- struct{}{} }() + go func() { this.subject.Handle(this.ctx, "c"); done <- struct{}{} }() + + for range 3 { + item := <-this.work + item.complete() + <-done + } + + this.So(this.tracked, should.HaveLength, 6) + var inFlight int + for _, observation := range this.tracked { + switch observation.(type) { + case BatchInFlight: + inFlight++ + case BatchComplete: + inFlight-- + default: + this.Fatal("Unexpected observation:", observation) + } + } + this.So(inFlight, should.Equal, 0) +} + +func (this *EntrypointFixture) TestAwait_ReturnsAfterCompletion() { + done := make(chan struct{}) + go func() { + this.subject.await(this.ctx, "msg") + close(done) + }() + + item := <-this.work + this.So(item.messages, should.Equal, []any{"msg"}) + + select { + case <-done: + this.Fatal("await returned before complete() was invoked") + default: + } + + item.complete() + <-done + + this.So(this.tracked, should.Contain, BatchInFlight{}) + this.So(this.tracked, should.Contain, BatchComplete{}) +} + +func (this *EntrypointFixture) TestAwait_UnblocksOnContextCancelWhileWaiting() { + ctx, cancel := context.WithCancel(this.ctx) + defer cancel() + + done := make(chan struct{}) + go func() { + this.subject.await(ctx, "msg") + close(done) + }() + + item := <-this.work // enqueue succeeded; pipeline now owns the batch. + + select { + case <-done: + this.Fatal("await returned before the caller departed or completion") + default: + } + + cancel() + <-done + + this.So(this.tracked, should.Contain, BatchInFlight{}) + this.So(this.tracked, should.Contain, CallerDeparted{}) + + // The batch was NOT abandoned: the pipeline still owns it and will invoke + // complete() later. await must not have Put it back to the pool. + item.complete() + this.So(this.tracked, should.Contain, BatchComplete{}) +} + +func (this *EntrypointFixture) TestAwait_UnblocksOnContextCancelWhileEnqueuing() { + work := make(chan *batch, 1) + subject := newEntrypoint(this, work, 0.80) + work <- &batch{} // fill the channel so the next enqueue blocks. + + ctx, cancel := context.WithCancel(this.ctx) + defer cancel() + + done := make(chan struct{}) + go func() { + subject.await(ctx, "msg") + close(done) + }() + + select { + case <-done: + this.Fatal("await returned before the enqueue could block") + default: + } + + cancel() + <-done + + this.So(this.tracked, should.Contain, CallerDeparted{}) + this.So(this.tracked, should.NOT.Contain, BatchInFlight{}) + + // The never-enqueued batch was abandoned and returned to the pool. + this.So(this.tracked, should.NOT.Contain, BatchComplete{}) +} + +func (this *EntrypointFixture) TestAwait_DepartedInFlightDoesNotCorruptPooledWaiter() { + const workers = 8 + const perWorker = 2000 + + work := make(chan *batch, 64) + subject := newEntrypoint(this, work, 0.80) + + // Drainer: take ownership of each enqueued batch and complete it promptly, + // driving the waiter's count to zero concurrently with new awaits that recycle + // waiters from the pool. If a departed await recycles a still-in-use waiter, a + // later prepare()'s Add(1) races the detached Wait()'s return -- tripping the + // runtime's WaitGroup misuse detector. + drained := make(chan struct{}) + go func() { + defer close(drained) + for item := range work { + item.complete() + } + }() + + var clients sync.WaitGroup + for range workers { + clients.Add(1) + go func() { + defer clients.Done() + for range perWorker { + ctx, cancel := context.WithCancel(this.ctx) + go cancel() // depart at an arbitrary moment relative to processing. + subject.await(ctx, "msg") + } + }() + } + clients.Wait() + + this.So(subject.Close(), should.BeNil) + <-drained + + this.So(this.tracked, should.Contain, CallerDeparted{}) + this.So(this.tracked, should.Contain, BatchComplete{}) +} + +func (this *EntrypointFixture) TestAwait_BatchCarriesExactlyOneMessage() { + go this.subject.await(this.ctx, "only") + + item := <-this.work + this.So(item.messages, should.HaveLength, 1) + this.So(item.messages[0], should.Equal, "only") + + item.complete() +} + +func (this *EntrypointFixture) TestAwait_ClosedPipelineReturnsImmediately() { + this.So(this.subject.Close(), should.BeNil) + + done := make(chan struct{}) + go func() { + this.subject.await(this.ctx, "msg") + close(done) + }() + + select { + case <-done: + case <-time.After(time.Second): + this.Fatal("await did not return on a closed pipeline") + } + + this.So(this.tracked, should.BeEmpty) +} + +func (this *EntrypointFixture) TestAdmit_TrueWhenBelowThreshold() { + this.So(this.subject.admit(), should.BeTrue) +} + +func (this *EntrypointFixture) TestAdmit_FalseAtOrAboveThreshold_TracksLoadShed() { + work := make(chan *batch, 10) + subject := newEntrypoint(this, work, 0.5) + for range 5 { + work <- &batch{} + } + this.So(subject.admit(), should.BeFalse) + this.So(this.tracked, should.Contain, LoadShed{}) +} + +func (this *EntrypointFixture) TestAdmit_FalseWhenClosed_NoLoadShed() { + this.So(this.subject.Close(), should.BeNil) + this.So(this.subject.admit(), should.BeFalse) + this.So(this.tracked, should.NOT.Contain, LoadShed{}) +} + +func (this *EntrypointFixture) TestAdmit_ThresholdAtOrAboveOneDisablesWatermark() { + work := make(chan *batch, 4) + subject := newEntrypoint(this, work, 2.0) + for range 4 { + work <- &batch{} + } + this.So(subject.admit(), should.BeTrue) + this.So(this.tracked, should.NOT.Contain, LoadShed{}) + + this.So(subject.Close(), should.BeNil) + this.So(subject.admit(), should.BeFalse) +} + +func (this *EntrypointFixture) TestHandle_BlocksUntilDurable() { + done := make(chan struct{}) + go func() { + this.subject.Handle(this.ctx, "msg") + close(done) + }() + + item := <-this.work + + select { + case <-done: + this.Fatal("Handle returned before the batch was completed") + case <-time.After(20 * time.Millisecond): + } + + item.complete() + <-done +} + +func (this *EntrypointFixture) TestHandle_DoesNotShedAtHighWatermark() { + work := make(chan *batch, 2) + subject := newEntrypoint(this, work, 0.5) + + done := make(chan struct{}, 5) + for range 5 { + go func() { + subject.Handle(this.ctx, "msg") + done <- struct{}{} + }() + } + + // None should return while the work is unconsumed: Handle blocks on send + // (no shedding) and then waits for completion. + select { + case <-done: + this.Fatal("Handle returned before the batch was completed") + case <-time.After(20 * time.Millisecond): + } + + for range 5 { + item := <-work + item.complete() + } + for range 5 { + <-done + } + + this.So(this.tracked, should.NOT.Contain, LoadShed{}) +} + +func (this *EntrypointFixture) TestHandle_IgnoresContextCancel() { + ctx, cancel := context.WithCancel(this.ctx) + defer cancel() + + done := make(chan struct{}) + go func() { + this.subject.Handle(ctx, "msg") + close(done) + }() + + item := <-this.work + cancel() + + select { + case <-done: + this.Fatal("Handle returned on context cancel; MQ deliveries must not honor a deadline") + case <-time.After(20 * time.Millisecond): + } + + item.complete() + <-done +} + +func (this *EntrypointFixture) TestHandle_ReturnsImmediatelyOnClosedPipeline() { + this.So(this.subject.Close(), should.BeNil) + + done := make(chan struct{}) + go func() { + this.subject.Handle(this.ctx, "msg") + close(done) + }() + + select { + case <-done: + case <-time.After(time.Second): + this.Fatal("Handle did not return on a closed pipeline") + } +} + +func (this *EntrypointFixture) TestHandle_PreservesVariadicMessages() { + go this.subject.Handle(this.ctx, "a", "b", "c") + + item := <-this.work + this.So(item.messages, should.HaveLength, 3) + this.So(item.messages, should.Equal, []any{"a", "b", "c"}) + + item.complete() +} + +func (this *EntrypointFixture) TestCloseReleasesListenAndClosesWorkChannel() { + listened := make(chan struct{}) + go func() { + this.subject.Listen() + close(listened) + }() + + this.So(this.subject.Close(), should.BeNil) + + <-listened + + _, open := <-this.work + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} + +func (this *EntrypointFixture) TestCloseIsIdempotent() { + this.So(this.subject.Close(), should.BeNil) + this.So(this.subject.Close(), should.BeNil) + this.So(this.tracked, should.BeEmpty) +} diff --git a/handlers/harness/01_execution.go b/handlers/harness/01_execution.go new file mode 100644 index 0000000..ed655e6 --- /dev/null +++ b/handlers/harness/01_execution.go @@ -0,0 +1,48 @@ +package harness + +import ( + "bytes" +) + +type execution struct { + monitor Monitor + maxUnitSize int + input chan *batch + output chan *unitOfWork + executor executor +} + +func newExecution(monitor Monitor, maxUnitSize int, input chan *batch, output chan *unitOfWork, exec executor) *execution { + return &execution{ + monitor: monitor, + maxUnitSize: maxUnitSize, + input: input, + output: output, + executor: exec, + } +} + +func (this *execution) Listen() { + defer close(this.output) + + var unit *unitOfWork // TODO: pool for *unitOfWork (and this.monitor.Track(UnitOfWorkComplete{}) when putting back) + for item := range this.input { + if unit == nil { + unit = new(unitOfWork) + } + unit.completions = append(unit.completions, item.complete) + for _, message := range item.messages { + this.executor.Execute(message, func(outgoing ...any) { + for _, result := range outgoing { + record := &Message{Value: result, Content: bytes.NewBuffer(nil)} // TODO: pool for *Message + unit.results = append(unit.results, record) + } + }) + } + if len(unit.completions) < this.maxUnitSize && len(this.input) > 0 { + continue // more to do + } + this.output <- unit + unit = nil + } +} diff --git a/handlers/harness/01_execution_test.go b/handlers/harness/01_execution_test.go new file mode 100644 index 0000000..f4ed9d3 --- /dev/null +++ b/handlers/harness/01_execution_test.go @@ -0,0 +1,123 @@ +package harness + +import ( + "sync" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestExecutionFixture(t *testing.T) { + gunit.Run(new(ExecutionFixture), t) +} + +type ExecutionFixture struct { + *gunit.Fixture + input chan *batch + output chan *unitOfWork + subject *execution + + executeMu sync.Mutex + executeCalls []any + executeOutputs [][]any + + tracked []any +} + +func (this *ExecutionFixture) Setup() { + this.input = make(chan *batch, 8) + this.output = make(chan *unitOfWork, 8) + this.subject = newExecution(this, 64, this.input, this.output, this) +} + +func (this *ExecutionFixture) Execute(message any, broadcast func(...any)) { + this.executeMu.Lock() + defer this.executeMu.Unlock() + this.executeCalls = append(this.executeCalls, message) + if len(this.executeOutputs) == 0 { + return + } + out := this.executeOutputs[0] + this.executeOutputs = this.executeOutputs[1:] + broadcast(out...) +} + +func (this *ExecutionFixture) Track(observation any) { + this.tracked = append(this.tracked, observation) +} + +func (this *ExecutionFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *ExecutionFixture) TestSingleBatchProducesUnitOfWork() { + this.executeOutputs = [][]any{{"result-A"}} + this.input <- &batch{messages: []any{"msg-A"}, complete: func() {}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(this.executeCalls, should.Equal, []any{"msg-A"}) + this.So(len(units[0].results), should.Equal, 1) + this.So(units[0].results[0].Value, should.Equal, "result-A") + this.So(len(units[0].completions), should.Equal, 1) +} + +func (this *ExecutionFixture) TestUnitFlushesWhenMaxUnitSizeReached() { + this.executeOutputs = [][]any{{"r1"}, {"r2"}, {"r3"}} + this.input <- &batch{messages: []any{"m1"}, complete: func() {}} + this.input <- &batch{messages: []any{"m2"}, complete: func() {}} + this.input <- &batch{messages: []any{"m3"}, complete: func() {}} + close(this.input) + + this.subject = newExecution(this, 2, this.input, this.output, this) + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(len(units[0].completions), should.Equal, 2) + this.So(len(units[1].completions), should.Equal, 1) + this.So(this.executeCalls, should.Equal, []any{"m1", "m2", "m3"}) +} + +func (this *ExecutionFixture) TestEmptyExecutorOutputProducesUnitWithNoResults() { + this.input <- &batch{messages: []any{"silent"}, complete: func() {}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(units[0].results, should.BeEmpty) + this.So(this.executeCalls, should.Equal, []any{"silent"}) +} + +func (this *ExecutionFixture) TestExecutorBroadcastsMultipleResults() { + this.executeOutputs = [][]any{{"r1", "r2", "r3"}} + this.input <- &batch{messages: []any{"msg"}, complete: func() {}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(units[0].results), should.Equal, 3) + this.So(units[0].results[0].Value, should.Equal, "r1") + this.So(units[0].results[1].Value, should.Equal, "r2") + this.So(units[0].results[2].Value, should.Equal, "r3") +} + +func (this *ExecutionFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} diff --git a/handlers/harness/02_serialization.go b/handlers/harness/02_serialization.go new file mode 100644 index 0000000..26bbe15 --- /dev/null +++ b/handlers/harness/02_serialization.go @@ -0,0 +1,38 @@ +package harness + +import "fmt" + +type serialization struct { + monitor Monitor + serializer serializer + input chan *unitOfWork + output chan *unitOfWork +} + +func newSerialization(monitor Monitor, enc serializer, input, output chan *unitOfWork) *serialization { + return &serialization{ + monitor: monitor, + serializer: enc, + input: input, + output: output, + } +} + +func (this *serialization) Listen() { + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + err := this.serializer.Serialize(message.Content, message.Value) + if err != nil { + failure := SerializationError{ + Value: message.Value, + Error: fmt.Errorf("%w: %w", ErrSerialization, err), + } + this.monitor.Track(failure) + panic(failure.Error) // The caller has failed to produce only values that will serialize successfully. + } + message.ContentType = this.serializer.ContentType() + } + this.output <- unit + } +} diff --git a/handlers/harness/02_serialization_test.go b/handlers/harness/02_serialization_test.go new file mode 100644 index 0000000..9e654d3 --- /dev/null +++ b/handlers/harness/02_serialization_test.go @@ -0,0 +1,184 @@ +package harness + +import ( + "bytes" + "errors" + "io" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestSerializationFixture(t *testing.T) { + gunit.Run(new(SerializationFixture), t) +} + +type SerializationFixture struct { + *gunit.Fixture + input chan *unitOfWork + output chan *unitOfWork + subject *serialization + + tracked []any + + serializeCalls []any + serializeFail map[any]error +} + +func (this *SerializationFixture) Setup() { + this.input = make(chan *unitOfWork, 4) + this.output = make(chan *unitOfWork, 4) + this.serializeFail = make(map[any]error) + this.subject = newSerialization(this, this, this.input, this.output) +} + +func (this *SerializationFixture) Track(observation any) { + this.tracked = append(this.tracked, observation) +} + +func (this *SerializationFixture) Serialize(out io.Writer, in any) error { + this.serializeCalls = append(this.serializeCalls, in) + if err, ok := this.serializeFail[in]; ok { + return err + } + _, _ = out.Write([]byte("encoded:")) + switch v := in.(type) { + case string: + _, _ = out.Write([]byte(v)) + default: + _, _ = out.Write([]byte("value")) + } + return nil +} + +func (this *SerializationFixture) ContentType() string { + return "test/content-type" +} + +func (this *SerializationFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *SerializationFixture) TestSerializesEachResultValueIntoContent() { + unit := &unitOfWork{results: []*Message{ + {Value: "alpha", Content: bytes.NewBuffer(nil)}, + {Value: "beta", Content: bytes.NewBuffer(nil)}, + }} + this.input <- unit + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(units[0].results[0].Content.String(), should.Equal, "encoded:alpha") + this.So(units[0].results[1].Content.String(), should.Equal, "encoded:beta") + this.So(this.serializeCalls, should.Equal, []any{"alpha", "beta"}) + this.So(this.tracked, should.BeEmpty) +} + +func (this *SerializationFixture) TestForwardsUnitsInOrder() { + this.input <- &unitOfWork{results: []*Message{{Value: "one", Content: bytes.NewBuffer(nil)}}} + this.input <- &unitOfWork{results: []*Message{{Value: "two", Content: bytes.NewBuffer(nil)}}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(units[0].results[0].Content.String(), should.Equal, "encoded:one") + this.So(units[1].results[0].Content.String(), should.Equal, "encoded:two") +} + +func (this *SerializationFixture) TestEmptyResultsForwardsCleanly() { + this.input <- &unitOfWork{} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(units[0].results, should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *SerializationFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} + +func (this *SerializationFixture) TestSerializesEachResultValueIntoContent_PopulatesContentTypeOnSuccess() { + unit := &unitOfWork{results: []*Message{ + {Value: "hello", Content: bytes.NewBuffer(nil)}, + }} + this.input <- unit + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(units[0].results[0].ContentType, should.Equal, "test/content-type") +} + +func (this *SerializationFixture) listenAndRecover() (recovered any) { + defer func() { recovered = recover() }() + this.subject.Listen() + return nil +} + +func (this *SerializationFixture) TestSerializerErrorTracksThenPanics() { + type TestMessage struct { + Value string `json:"value"` + } + boom := errors.New("boom") + bad := TestMessage{ + Value: "unserializable", + } + this.serializeFail[bad] = boom + unit := &unitOfWork{results: []*Message{ + {Value: "good-1", Content: bytes.NewBuffer(nil)}, + {Value: bad, Content: bytes.NewBuffer(nil)}, + {Value: "good-2", Content: bytes.NewBuffer(nil)}, + }} + this.input <- unit + close(this.input) + + recovered := this.listenAndRecover() + + this.So(recovered, should.NOT.BeNil) + err, isError := recovered.(error) + this.So(isError, should.BeTrue) + this.So(err, should.WrapError, ErrSerialization) + this.So(err, should.WrapError, boom) + this.So(this.tracked, should.HaveLength, 1) + observation, isObservation := this.tracked[0].(SerializationError) + this.So(isObservation, should.BeTrue) + this.So(observation.Error, should.WrapError, ErrSerialization) + this.So(observation.Value, should.Equal, bad) + this.So(this.serializeCalls, should.Equal, []any{"good-1", bad}) + this.So(this.drain(), should.BeEmpty) // drain returning proves output was closed during unwind +} + +func (this *SerializationFixture) TestUnitsBeforeFailureRemainForwarded() { + boom := errors.New("boom") + this.serializeFail["bad"] = boom + this.input <- &unitOfWork{results: []*Message{{Value: "one", Content: bytes.NewBuffer(nil)}}} + this.input <- &unitOfWork{results: []*Message{{Value: "bad", Content: bytes.NewBuffer(nil)}}} + close(this.input) + + recovered := this.listenAndRecover() + + this.So(recovered, should.NOT.BeNil) + units := this.drain() + this.So(units, should.HaveLength, 1) + this.So(units[0].results[0].Content.String(), should.Equal, "encoded:one") +} diff --git a/handlers/harness/03_persistence.go b/handlers/harness/03_persistence.go new file mode 100644 index 0000000..70449c6 --- /dev/null +++ b/handlers/harness/03_persistence.go @@ -0,0 +1,62 @@ +package harness + +import ( + "context" + "fmt" + "time" +) + +type persistence struct { + ctx context.Context + monitor Monitor + input chan *unitOfWork + output chan *unitOfWork + writer Writer + wait func(context.Context, time.Duration) error + buffer []*Message +} + +func newPersistence(ctx context.Context, monitor Monitor, input, output chan *unitOfWork, writer Writer, wait func(context.Context, time.Duration) error) *persistence { + return &persistence{ + ctx: ctx, + monitor: monitor, + input: input, + output: output, + writer: writer, + wait: wait, + buffer: make([]*Message, 0, 1024), + } +} + +func (this *persistence) Listen() { + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + this.buffer = append(this.buffer, message) + } + stored := this.store() + this.buffer = this.buffer[:0] + if !stored { + continue // shutdown before durable write: do NOT forward (no ack); MQ redelivers + } + this.output <- unit + } +} +func (this *persistence) store() (stored bool) { + var failure PersistenceError + for attempt := 1; ; attempt++ { + err := this.writer.Write(this.ctx, this.buffer...) + if err == nil { + return true + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrPersistence, err) + this.monitor.Track(failure) + // Retries forever (until the process restarts) unless the context is cancelled. + // TODO: exponential back-off w/ jitter + if this.wait(this.ctx, time.Second) != nil { + this.monitor.Track(PersistenceAbandoned{Attempts: attempt}) + return false + } + } +} diff --git a/handlers/harness/03_persistence_test.go b/handlers/harness/03_persistence_test.go new file mode 100644 index 0000000..ebd05fc --- /dev/null +++ b/handlers/harness/03_persistence_test.go @@ -0,0 +1,166 @@ +package harness + +import ( + "context" + "errors" + "sync" + "testing" + "time" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestPersistenceFixture(t *testing.T) { + gunit.Run(new(PersistenceFixture), t) +} + +type PersistenceFixture struct { + *gunit.Fixture + ctx context.Context + input chan *unitOfWork + output chan *unitOfWork + waits []time.Duration + waitErr error + subject *persistence + + writeMu sync.Mutex + writeCalls [][]*Message + writeFailCount int + + tracked []any +} + +func (this *PersistenceFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) + this.input = make(chan *unitOfWork, 4) + this.output = make(chan *unitOfWork, 4) + this.subject = newPersistence(this.ctx, this, this.input, this.output, this, this.wait) +} + +func (this *PersistenceFixture) wait(_ context.Context, d time.Duration) error { + this.waits = append(this.waits, d) + return this.waitErr +} + +func (this *PersistenceFixture) Track(observation any) { + this.tracked = append(this.tracked, observation) +} + +func (this *PersistenceFixture) Write(ctx context.Context, messages ...*Message) error { + this.So(ctx.Value("testing"), should.Equal, this.Name()) + this.writeMu.Lock() + defer this.writeMu.Unlock() + captured := make([]*Message, len(messages)) + copy(captured, messages) + this.writeCalls = append(this.writeCalls, captured) + if this.writeFailCount > 0 { + this.writeFailCount-- + return errors.New("write failure") + } + return nil +} + +func (this *PersistenceFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *PersistenceFixture) TestWritesAllResultsThenForwardsUnit() { + m1 := &Message{Value: "a"} + m2 := &Message{Value: "b"} + this.input <- &unitOfWork{results: []*Message{m1, m2}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.writeCalls), should.Equal, 1) + this.So(this.writeCalls[0], should.Equal, []*Message{m1, m2}) + this.So(this.waits, should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *PersistenceFixture) TestEachUnitIsWrittenIndependently() { + m1 := &Message{Value: "a"} + m2 := &Message{Value: "b"} + this.input <- &unitOfWork{results: []*Message{m1}} + this.input <- &unitOfWork{results: []*Message{m2}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(len(this.writeCalls), should.Equal, 2) + this.So(this.writeCalls[0], should.Equal, []*Message{m1}) + this.So(this.writeCalls[1], should.Equal, []*Message{m2}) + this.So(this.tracked, should.BeEmpty) +} + +func (this *PersistenceFixture) TestEmptyResultsTriggersEmptyWrite() { + this.input <- &unitOfWork{} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.writeCalls), should.Equal, 1) + this.So(this.writeCalls[0], should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *PersistenceFixture) TestRetriesUntilWriteSucceeds() { + this.writeFailCount = 2 + m := &Message{Value: "retried"} + this.input <- &unitOfWork{results: []*Message{m}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.writeCalls), should.Equal, 3) + this.So(this.waits, should.Equal, []time.Duration{time.Second, time.Second}) + this.So(this.tracked, should.HaveLength, 2) + for n, observation := range this.tracked { + failure, ok := observation.(PersistenceError) + this.So(ok, should.BeTrue) + this.So(failure.Error, should.WrapError, ErrPersistence) + this.So(failure.Attempt, should.Equal, n+1) + } +} + +func (this *PersistenceFixture) TestPersistenceAbandonsOnContextCancelAndDropsUnit() { + this.writeFailCount = 1 << 30 // always fail + this.waitErr = context.Canceled + unit := &unitOfWork{results: []*Message{{Value: "abandoned"}}} + this.input <- unit + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(units, should.BeEmpty) + this.So(len(this.writeCalls), should.Equal, 1) + this.So(this.waits, should.Equal, []time.Duration{time.Second}) + this.So(this.tracked, should.HaveLength, 2) + failure, ok := this.tracked[0].(PersistenceError) + this.So(ok, should.BeTrue) + this.So(failure.Error, should.WrapError, ErrPersistence) + this.So(failure.Attempt, should.Equal, 1) + this.So(this.tracked[1], should.Equal, PersistenceAbandoned{Attempts: 1}) +} + +func (this *PersistenceFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} diff --git a/handlers/harness/04_completion.go b/handlers/harness/04_completion.go new file mode 100644 index 0000000..ac6c076 --- /dev/null +++ b/handlers/harness/04_completion.go @@ -0,0 +1,23 @@ +package harness + +type completion struct { + input chan *unitOfWork + output chan *unitOfWork +} + +func newCompletion(input, output chan *unitOfWork) *completion { + return &completion{ + input: input, + output: output, + } +} + +func (this *completion) Listen() { + defer close(this.output) + for unit := range this.input { + for _, complete := range unit.completions { + complete() + } + this.output <- unit + } +} diff --git a/handlers/harness/04_completion_test.go b/handlers/harness/04_completion_test.go new file mode 100644 index 0000000..fd11aba --- /dev/null +++ b/handlers/harness/04_completion_test.go @@ -0,0 +1,79 @@ +package harness + +import ( + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestCompletionFixture(t *testing.T) { + gunit.Run(new(CompletionFixture), t) +} + +type CompletionFixture struct { + *gunit.Fixture + input chan *unitOfWork + output chan *unitOfWork + subject *completion +} + +func (this *CompletionFixture) Setup() { + this.input = make(chan *unitOfWork, 4) + this.output = make(chan *unitOfWork, 4) + this.subject = newCompletion(this.input, this.output) +} + +func (this *CompletionFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *CompletionFixture) TestCallsAllCompletionsThenForwards() { + var invocations []string + this.input <- &unitOfWork{completions: []func(){ + func() { invocations = append(invocations, "first") }, + func() { invocations = append(invocations, "second") }, + }} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(invocations, should.Equal, []string{"first", "second"}) +} + +func (this *CompletionFixture) TestNoCompletionsForwardsCleanly() { + this.input <- &unitOfWork{} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) +} + +func (this *CompletionFixture) TestEachUnitFiresIndependently() { + var firstCalled, secondCalled int + this.input <- &unitOfWork{completions: []func(){func() { firstCalled++ }}} + this.input <- &unitOfWork{completions: []func(){func() { secondCalled++ }}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(firstCalled, should.Equal, 1) + this.So(secondCalled, should.Equal, 1) +} + +func (this *CompletionFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) +} diff --git a/handlers/harness/05_broadcast.go b/handlers/harness/05_broadcast.go new file mode 100644 index 0000000..72dd552 --- /dev/null +++ b/handlers/harness/05_broadcast.go @@ -0,0 +1,62 @@ +package harness + +import ( + "context" + "fmt" + "time" +) + +type broadcast struct { + ctx context.Context + monitor Monitor + input chan *unitOfWork + output chan *unitOfWork + buffer []*Message + dispatcher Dispatcher + wait func(context.Context, time.Duration) error +} + +func newBroadcast(ctx context.Context, monitor Monitor, input, output chan *unitOfWork, dispatcher Dispatcher, wait func(context.Context, time.Duration) error) *broadcast { + return &broadcast{ + ctx: ctx, + monitor: monitor, + input: input, + output: output, + buffer: make([]*Message, 0, 1024), + dispatcher: dispatcher, + wait: wait, + } +} + +func (this *broadcast) Listen() { + defer close(this.output) + for unit := range this.input { + for _, message := range unit.results { + this.buffer = append(this.buffer, message) + } + this.dispatch() + this.buffer = this.buffer[:0] + // Unlike persistence (which drops the unit on abandonment so MQ redelivers), + // broadcast always forwards: the batch is already durably stored, so we ack + // upstream regardless of whether dispatch succeeded. + this.output <- unit + } +} +func (this *broadcast) dispatch() { + var failure BroadcastError + for attempt := 1; ; attempt++ { + err := this.dispatcher.Dispatch(this.ctx, this.buffer...) + if err == nil { + return + } + failure.Attempt = attempt + failure.Error = fmt.Errorf("%w: %w", ErrBroadcast, err) + this.monitor.Track(failure) + // Retries forever (until the process restarts) unless the context is cancelled. + // TODO: exponential backoff w/ jitter + if this.wait(this.ctx, time.Second) != nil { + this.monitor.Track(BroadcastAbandoned{Attempts: attempt}) + return + } + } +} diff --git a/handlers/harness/05_broadcast_test.go b/handlers/harness/05_broadcast_test.go new file mode 100644 index 0000000..e6a3aac --- /dev/null +++ b/handlers/harness/05_broadcast_test.go @@ -0,0 +1,166 @@ +package harness + +import ( + "context" + "errors" + "sync" + "testing" + "time" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestBroadcastFixture(t *testing.T) { + gunit.Run(new(BroadcastFixture), t) +} + +type BroadcastFixture struct { + *gunit.Fixture + ctx context.Context + input chan *unitOfWork + output chan *unitOfWork + waits []time.Duration + waitErr error + subject *broadcast + + dispatchMu sync.Mutex + dispatchCalls [][]*Message + dispatchFailCount int + + tracked []any +} + +func (this *BroadcastFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) + this.input = make(chan *unitOfWork, 4) + this.output = make(chan *unitOfWork, 4) + this.subject = newBroadcast(this.ctx, this, this.input, this.output, this, this.wait) +} + +func (this *BroadcastFixture) wait(_ context.Context, d time.Duration) error { + this.waits = append(this.waits, d) + return this.waitErr +} + +func (this *BroadcastFixture) Track(observation any) { + this.tracked = append(this.tracked, observation) +} + +func (this *BroadcastFixture) Dispatch(ctx context.Context, messages ...*Message) error { + this.So(ctx.Value("testing"), should.Equal, this.Name()) + captured := make([]*Message, len(messages)) + copy(captured, messages) + this.dispatchMu.Lock() + this.dispatchCalls = append(this.dispatchCalls, captured) + this.dispatchMu.Unlock() + if this.dispatchFailCount > 0 { + this.dispatchFailCount-- + return errors.New("dispatch failure") + } + return nil +} + +func (this *BroadcastFixture) drain() (results []*unitOfWork) { + for unit := range this.output { + results = append(results, unit) + } + return results +} + +func (this *BroadcastFixture) TestDispatchesAllResultsThenForwardsUnit() { + m1 := &Message{Value: "a"} + m2 := &Message{Value: "b"} + this.input <- &unitOfWork{results: []*Message{m1, m2}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.dispatchCalls), should.Equal, 1) + this.So(this.dispatchCalls[0], should.Equal, []*Message{m1, m2}) + this.So(this.waits, should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *BroadcastFixture) TestEachUnitDispatchedIndependently() { + m1 := &Message{Value: "a"} + m2 := &Message{Value: "b"} + this.input <- &unitOfWork{results: []*Message{m1}} + this.input <- &unitOfWork{results: []*Message{m2}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 2) + this.So(len(this.dispatchCalls), should.Equal, 2) + this.So(this.dispatchCalls[0], should.Equal, []*Message{m1}) + this.So(this.dispatchCalls[1], should.Equal, []*Message{m2}) + this.So(this.tracked, should.BeEmpty) +} + +func (this *BroadcastFixture) TestEmptyResultsTriggersEmptyDispatch() { + this.input <- &unitOfWork{} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.dispatchCalls), should.Equal, 1) + this.So(this.dispatchCalls[0], should.BeEmpty) + this.So(this.tracked, should.BeEmpty) +} + +func (this *BroadcastFixture) TestRetriesUntilDispatchSucceeds() { + this.dispatchFailCount = 2 + m := &Message{Value: "retried"} + this.input <- &unitOfWork{results: []*Message{m}} + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(len(units), should.Equal, 1) + this.So(len(this.dispatchCalls), should.Equal, 3) + this.So(this.waits, should.Equal, []time.Duration{time.Second, time.Second}) + this.So(this.tracked, should.HaveLength, 2) + for n, observation := range this.tracked { + failure, ok := observation.(BroadcastError) + this.So(ok, should.BeTrue) + this.So(failure.Error, should.WrapError, ErrBroadcast) + this.So(failure.Attempt, should.Equal, n+1) + } +} + +func (this *BroadcastFixture) TestBroadcastAbandonsOnContextCancelButStillForwards() { + this.dispatchFailCount = 1 << 30 // always fail + this.waitErr = context.Canceled + unit := &unitOfWork{results: []*Message{{Value: "abandoned"}}} + this.input <- unit + close(this.input) + + go this.subject.Listen() + + units := this.drain() + this.So(units, should.Equal, []*unitOfWork{unit}) + this.So(len(this.dispatchCalls), should.Equal, 1) + this.So(this.waits, should.Equal, []time.Duration{time.Second}) + this.So(this.tracked, should.HaveLength, 2) + failure, ok := this.tracked[0].(BroadcastError) + this.So(ok, should.BeTrue) + this.So(failure.Error, should.WrapError, ErrBroadcast) + this.So(failure.Attempt, should.Equal, 1) + this.So(this.tracked[1], should.Equal, BroadcastAbandoned{Attempts: 1}) +} + +func (this *BroadcastFixture) TestClosedInputClosesOutput() { + close(this.input) + go this.subject.Listen() + + _, open := <-this.output + this.So(open, should.BeFalse) + this.So(this.tracked, should.BeEmpty) +} diff --git a/handlers/harness/06_terminal.go b/handlers/harness/06_terminal.go new file mode 100644 index 0000000..3a09ec4 --- /dev/null +++ b/handlers/harness/06_terminal.go @@ -0,0 +1,14 @@ +package harness + +type terminal struct { + input chan *unitOfWork +} + +func newTerminal(input chan *unitOfWork) *terminal { + return &terminal{input: input} +} + +func (this *terminal) Listen() { + for range this.input { + } +} diff --git a/handlers/harness/06_terminal_test.go b/handlers/harness/06_terminal_test.go new file mode 100644 index 0000000..c53cf16 --- /dev/null +++ b/handlers/harness/06_terminal_test.go @@ -0,0 +1,51 @@ +package harness + +import ( + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestTerminalFixture(t *testing.T) { + gunit.Run(new(TerminalFixture), t) +} + +type TerminalFixture struct { + *gunit.Fixture + input chan *unitOfWork + subject *terminal +} + +func (this *TerminalFixture) Setup() { + this.input = make(chan *unitOfWork, 4) + this.subject = newTerminal(this.input) +} + +func (this *TerminalFixture) TestDrainsInputUntilClosed() { + this.input <- &unitOfWork{} + this.input <- &unitOfWork{} + this.input <- &unitOfWork{} + close(this.input) + + done := make(chan struct{}) + go func() { + this.subject.Listen() + close(done) + }() + + <-done + this.So(len(this.input), should.Equal, 0) +} + +func (this *TerminalFixture) TestEmptyClosedInputReturnsImmediately() { + close(this.input) + + done := make(chan struct{}) + go func() { + this.subject.Listen() + close(done) + }() + + <-done +} diff --git a/handlers/harness/admission.go b/handlers/harness/admission.go new file mode 100644 index 0000000..795228f --- /dev/null +++ b/handlers/harness/admission.go @@ -0,0 +1,64 @@ +package harness + +import ( + "context" + "net/http" + + "github.com/smarty/messaging/v3" +) + +type ( + admitter interface { + admit() bool + } + awaiter interface { + await(ctx context.Context, message any) + } + httpEntrypoint interface { + admitter + awaiter + } +) + +// admission refuses overloaded requests before the wrapped handler runs, +// writing an inline 503. Wrap each mutating route with it. +func admission(handler admitter, inner http.Handler) http.Handler { + return http.HandlerFunc(func(response http.ResponseWriter, request *http.Request) { + if handler.admit() { + inner.ServeHTTP(response, request) + return + } + response.Header().Set("Content-Type", "application/json; charset=utf-8") + response.Header().Set("Retry-After", "1") + response.WriteHeader(http.StatusServiceUnavailable) + _, _ = response.Write(shedResponseBody) + }) +} + +var shedResponseBody = []byte(`{"errors":[{"message":"service overloaded"}]}`) + +// HTTPAdapter adapts the entrypoint Handler for use by an http.Handler. +type HTTPAdapter interface { + // Handler is what the user-supplied http.Handler will invoke + messaging.Handler + + // HTTPHandler wraps the user-supplied http.Handler with an admission check. + HTTPHandler(inner http.Handler) (wrapped http.Handler) +} + +type httpAdapter struct { + target httpEntrypoint +} + +func newHTTPAdapter(target httpEntrypoint) *httpAdapter { + return &httpAdapter{target} +} + +func (this *httpAdapter) HTTPHandler(inner http.Handler) http.Handler { + return admission(this.target, inner) +} +func (this *httpAdapter) Handle(ctx context.Context, messages ...any) { + for _, message := range messages { + this.target.await(ctx, message) + } +} diff --git a/handlers/harness/admission_test.go b/handlers/harness/admission_test.go new file mode 100644 index 0000000..76d8b76 --- /dev/null +++ b/handlers/harness/admission_test.go @@ -0,0 +1,82 @@ +package harness + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestAdmissionFixture(t *testing.T) { + gunit.Run(new(AdmissionFixture), t) +} + +type AdmissionFixture struct { + *gunit.Fixture + ctx context.Context + + allow bool + ctxs []context.Context + messages []any +} + +func (this *AdmissionFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) +} + +func (this *AdmissionFixture) Handle(context.Context, ...any) {} +func (this *AdmissionFixture) admit() bool { + return this.allow +} +func (this *AdmissionFixture) await(ctx context.Context, message any) { + this.So(ctx.Value("testing"), should.Equal, this.Name()) + this.messages = append(this.messages, message) +} + +func (this *AdmissionFixture) ServeHTTP(http.ResponseWriter, *http.Request) {} + +func (this *AdmissionFixture) TestAsHTTPHandler_ForwardsSingleMessageToAwait() { + newHTTPAdapter(this).Handle(this.ctx, "x") + this.So(this.messages, should.Equal, []any{"x"}) +} + +func (this *AdmissionFixture) TestAsHTTPHandler_ForwardsEachMessageInOrder() { + newHTTPAdapter(this).Handle(this.ctx, "a", "b") + this.So(this.messages, should.Equal, []any{"a", "b"}) +} + +func (this *AdmissionFixture) TestAdmission_PassesThroughWhenAdmitted() { + var ran bool + inner := http.HandlerFunc(func(response http.ResponseWriter, _ *http.Request) { + ran = true + response.WriteHeader(http.StatusTeapot) + _, _ = response.Write([]byte("inner")) + }) + + recorder := httptest.NewRecorder() + request := httptest.NewRequest(http.MethodPost, "/admin/orders", nil) + this.allow = true + newHTTPAdapter(this).HTTPHandler(inner).ServeHTTP(recorder, request) + + this.So(ran, should.BeTrue) + this.So(recorder.Code, should.Equal, http.StatusTeapot) + this.So(recorder.Body.String(), should.Equal, "inner") +} + +func (this *AdmissionFixture) TestAdmission_Writes503WhenRejected() { + var ran bool + inner := http.HandlerFunc(func(http.ResponseWriter, *http.Request) { ran = true }) + + recorder := httptest.NewRecorder() + request := httptest.NewRequest(http.MethodPost, "/admin/orders", nil) + newHTTPAdapter(this).HTTPHandler(inner).ServeHTTP(recorder, request) + + this.So(ran, should.BeFalse) + this.So(recorder.Code, should.Equal, http.StatusServiceUnavailable) + this.So(recorder.Header().Get("Content-Type"), should.Equal, "application/json; charset=utf-8") + this.So(recorder.Header().Get("Retry-After"), should.Equal, "1") + this.So(recorder.Body.String(), should.Equal, `{"errors":[{"message":"service overloaded"}]}`) +} diff --git a/handlers/harness/config.go b/handlers/harness/config.go new file mode 100644 index 0000000..a100abe --- /dev/null +++ b/handlers/harness/config.go @@ -0,0 +1,171 @@ +// Package harness provides a staged, store-and-forward message-handling +// pipeline composed of goroutine stages (entrypoint, execution, serialization, +// persistence, completion, broadcast, terminal) connected by buffered channels. +// +// Callers register domain objects whose Execute.../Apply... methods drive the +// pipeline via Options.Types(...), and supply collaborators (Writer, Dispatcher, +// Serializer, Monitor) via the corresponding Options.*. All collaborators +// default to a no-op implementation, so omitting them produces a runnable but +// inert pipeline — useful for tests, but not for production. +// +// The only exported entry point is New(ctx, options...); every internal stage +// type is unexported and cannot be constructed directly by callers. +// +// The persistence and broadcast stages retry their collaborators on failure; +// those retry loops abort when the context passed to New(ctx, ...) is cancelled, +// so consumers must cancel it on shutdown to avoid hanging the drain. Custom +// Writer and Dispatcher implementations must honor the context they are given. +// +// Values produced by registered domain types must serialize successfully — +// that is the calling application's contract. If the Serializer ever returns +// an error, the pipeline tracks a SerializationError observation and then +// panics, halting the process before the unit of work reaches persistence: +// nothing is stored, acked, or dispatched, and the message source redelivers +// after restart. The failure is deterministic, so the application crash-loops +// until the offending domain type is fixed. Messages already in the pipeline +// may not persist (requiring retry from the caller or redelivery from the MQ). +package harness + +import ( + "context" + "io" + "net/http" + + "github.com/smarty/messaging/v3" +) + +// New constructs a staged, store-and-forward message-handling pipeline. +// Register domain types (handlers/observers) via Options.Types, and wire +// real Writer, Dispatcher, Serializer, and Monitor collaborators via the +// corresponding Options.* functions. Collaborators default to a shared +// no-op implementation, so omitting them produces a runnable but inert +// pipeline — useful for tests, but not for production. +func New(ctx context.Context, options ...option) Pipeline { + var cfg configuration + for _, apply := range Options.defaults(options...) { + apply(&cfg) + } + return build(ctx, cfg) +} + +type Pipeline struct { + // SheddingHTTPWrapper is meant to wrap around any http.Handler that calls SheddingEntrypoint. + // It responds with HTTP 503 in the event that the handler is backed up beyond the configured ShedThreshold. + SheddingHTTPWrapper func(http.Handler) http.Handler + + // SheddingEntrypoint is a messaging.Handler that is meant to be guarded by an admitter (such as SheddingHTTPWrapper). + SheddingEntrypoint messaging.Handler + + // BlockingEntrypoint is a messaging.Handler that will block until the results of the provided work have been durably stored. + BlockingEntrypoint messaging.Handler + + // Listeners contains each phase of the harness pipeline (serialization, persistence, broadcast, etc.). + // Each listener should be invoked on a separate goroutine by a component like github.com/smarty/dominoes. + Listeners []messaging.Listener +} + +var Options singleton + +type singleton struct{} +type option func(*configuration) + +type configuration struct { + monitor Monitor + serializer serializer + writer Writer + dispatcher Dispatcher + types []any + burstCapacity int + pipelineBufferCapacity int + executionUnitSize int + serializerCount int + shedThreshold float64 +} + +// Types registers the domain objects whose Execute.../Apply... methods drive +// the pipeline. They are passed verbatim to newRouter(...) at build time. +func (singleton) Types(value ...any) option { + return func(this *configuration) { this.types = value } +} + +// Monitor sets the Monitor collaborator that receives pipeline observations +// (BatchInFlight, BatchComplete, LoadShed, SerializationError, etc.). +func (singleton) Monitor(value Monitor) option { + return func(this *configuration) { this.monitor = value } +} + +// Serializer sets the collaborator used to encode outgoing messages into bytes. +func (singleton) Serializer(value serializer) option { + return func(this *configuration) { this.serializer = value } +} + +// Writer sets the collaborator that persists encoded messages (e.g. to a database or message store). +func (singleton) Writer(value Writer) option { + return func(this *configuration) { this.writer = value } +} + +// Dispatcher sets the collaborator that broadcasts outgoing messages to downstream consumers. +func (singleton) Dispatcher(value Dispatcher) option { + return func(this *configuration) { this.dispatcher = value } +} + +// BurstCapacity sets the buffer size of the channel between the entrypoint and +// execution stages. Larger values absorb more burst traffic before back-pressure +// reaches callers. Default: 1024. +func (singleton) BurstCapacity(value int) option { + return func(this *configuration) { this.burstCapacity = value } +} + +// PipelineBufferCapacity sets the buffer size of the channels connecting all pipeline +// stages after execution (serialization → persistence → completion → broadcast → +// terminal). Default: 4. +func (singleton) PipelineBufferCapacity(value int) option { + return func(this *configuration) { this.pipelineBufferCapacity = value } +} + +// ExecutionUnitSize sets the maximum number of batches coalesced into a single unit of +// work before the execution stage flushes downstream. Higher values increase +// throughput at the cost of latency per batch. Default: 64. +func (singleton) ExecutionUnitSize(value int) option { + return func(this *configuration) { this.executionUnitSize = value } +} + +// SerializerCount sets the number of concurrent serialization goroutines. +// Default: 4. +func (singleton) SerializerCount(value int) option { + return func(this *configuration) { this.serializerCount = value } +} + +// ShedThreshold sets the load-shedding threshold as a fraction of BurstCapacity +// in the range [0, 1]. When the batch channel fill ratio meets or exceeds this +// value, new callers are refused (admission returns 503; Handle is a no-op). +// This option only affects HTTP callers. +// Default: 0.80. +func (singleton) ShedThreshold(value float64) option { + return func(this *configuration) { this.shedThreshold = value } +} + +func (singleton) defaults(options ...option) []option { + blank := nop{} + return append([]option{ + Options.Monitor(blank), + Options.Serializer(blank), + Options.Writer(blank), + Options.Dispatcher(blank), + Options.BurstCapacity(1024), + Options.PipelineBufferCapacity(4), + Options.ExecutionUnitSize(64), + Options.SerializerCount(4), + Options.ShedThreshold(0.80), + }, options...) +} + +// nop satisfies every collaborator interface so New(...) can be called with +// zero options and still produce a runnable (if inert) pipeline. +type nop struct{} + +func (nop) Track(any) {} +func (nop) Serialize(io.Writer, any) error { return nil } +func (nop) ContentType() string { return "" } +func (nop) Write(context.Context, ...*Message) error { return nil } +func (nop) Dispatch(context.Context, ...*Message) error { return nil } diff --git a/handlers/harness/config_test.go b/handlers/harness/config_test.go new file mode 100644 index 0000000..1bd4509 --- /dev/null +++ b/handlers/harness/config_test.go @@ -0,0 +1,106 @@ +package harness + +import ( + "context" + "sync" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestConfigFixture(t *testing.T) { + gunit.Run(new(ConfigFixture), t) +} + +type ConfigFixture struct { + *gunit.Fixture +} + +func (this *ConfigFixture) apply(options ...option) configuration { + var cfg configuration + for _, item := range Options.defaults(options...) { + item(&cfg) + } + return cfg +} + +func (this *ConfigFixture) TestNop() { + var n nop + this.So(func() { n.Track(nil) }, should.NOT.Panic) + this.So(n.Serialize(nil, nil), should.BeNil) + this.So(n.ContentType(), should.BeEmpty) + this.So(n.Write(nil), should.BeNil) + this.So(n.Dispatch(nil), should.BeNil) +} + +func (this *ConfigFixture) TestDefaultsPopulateCapacities() { + cfg := this.apply() + this.So(cfg.burstCapacity, should.Equal, 1024) + this.So(cfg.pipelineBufferCapacity, should.Equal, 4) + this.So(cfg.executionUnitSize, should.Equal, 64) + this.So(cfg.serializerCount, should.Equal, 4) + this.So(cfg.shedThreshold, should.Equal, 0.80) +} + +func (this *ConfigFixture) TestDefaultCollaboratorsAreNop() { + cfg := this.apply() + this.So(cfg.monitor, should.Equal, nop{}) + this.So(cfg.serializer, should.Equal, nop{}) + this.So(cfg.writer, should.Equal, nop{}) + this.So(cfg.dispatcher, should.Equal, nop{}) +} + +func (this *ConfigFixture) TestTypesOptionStoresValuesVerbatim() { + cfg := this.apply(Options.Types("a", 42, struct{}{})) + this.So(cfg.types, should.Equal, []any{"a", 42, struct{}{}}) +} + +func (this *ConfigFixture) TestTunableOptionsOverrideDefaults() { + cfg := this.apply( + Options.BurstCapacity(2), + Options.PipelineBufferCapacity(2), + Options.ExecutionUnitSize(8), + Options.SerializerCount(3), + Options.ShedThreshold(0.5), + ) + this.So(cfg.burstCapacity, should.Equal, 2) + this.So(cfg.pipelineBufferCapacity, should.Equal, 2) + this.So(cfg.executionUnitSize, should.Equal, 8) + this.So(cfg.serializerCount, should.Equal, 3) + this.So(cfg.shedThreshold, should.Equal, 0.5) +} + +func (this *ConfigFixture) TestCollaboratorOptionsOverrideDefaults() { + recorder := &recordingMonitor{} + cfg := this.apply(Options.Monitor(recorder)) + this.So(cfg.monitor, should.Equal, recorder) +} + +func (this *ConfigFixture) TestZeroOptionsPipelineRunsInertly() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + pipeline := New(ctx) + + done := make(chan struct{}) + go func() { + var wg sync.WaitGroup + for _, listener := range pipeline.Listeners { + wg.Go(listener.Listen) + } + wg.Wait() + close(done) + }() + + pipeline.BlockingEntrypoint.Handle(ctx, "payload") + pipeline.SheddingEntrypoint.Handle(ctx, "payload") + this.So(pipeline.BlockingEntrypoint.(interface{ Close() error }).Close(), should.BeNil) + <-done +} + +type recordingMonitor struct{ observations []any } + +func (this *recordingMonitor) Track(observation any) { + this.observations = append(this.observations, observation) +} diff --git a/handlers/harness/contracts.go b/handlers/harness/contracts.go new file mode 100644 index 0000000..765e90d --- /dev/null +++ b/handlers/harness/contracts.go @@ -0,0 +1,79 @@ +package harness + +import ( + "context" + "errors" + "io" +) + +// Exported collaborator interfaces — callers supply real implementations via Options.*. + +type ( + Writer interface { + Write(ctx context.Context, messages ...*Message) error + } + Dispatcher interface { + Dispatch(ctx context.Context, messages ...*Message) error + } + Monitor interface { + Track(observation any) + } +) + +// Unexported internal interfaces — discovered reflectively from domain types +// supplied via Options.Types(...). + +type ( + executor interface { + Execute(message any, broadcast func(...any)) + } + applicator interface { + Apply(message any) + } + serializer interface { + Serialize(out io.Writer, in any) error + ContentType() string + } +) + +// Monitor observations emitted by the pipeline. + +type ( + BatchInFlight struct{} + BatchComplete struct{} + LoadShed struct{} + CallerDeparted struct{} + SerializationError struct { + Value any + Error error + } + PersistenceError struct { + Attempt int + Error error + } + PersistenceAbandoned struct{ Attempts int } + BroadcastError struct { + Attempt int + Error error + } + BroadcastAbandoned struct{ Attempts int } +) + +var ( + ErrSerialization = errors.New("serialization error") + ErrPersistence = errors.New("persistence error") + ErrBroadcast = errors.New("broadcast error") +) + +// Unexported value types shared across pipeline stages. + +type ( + batch struct { + messages []any + complete func() + } + unitOfWork struct { + results []*Message + completions []func() + } +) diff --git a/handlers/harness/fanout.go b/handlers/harness/fanout.go new file mode 100644 index 0000000..4c85741 --- /dev/null +++ b/handlers/harness/fanout.go @@ -0,0 +1,46 @@ +package harness + +import ( + "sync" + + "github.com/smarty/messaging/v3" +) + +type stationFactory func(in, out chan *unitOfWork) messaging.Listener + +func newFanOut(factory stationFactory, workerCount, unitCapacity int, input, finalOutput chan *unitOfWork) []messaging.Listener { + var ( + listeners = make([]messaging.Listener, workerCount) + outputs = make([]chan *unitOfWork, workerCount) + ) + for i := range workerCount { + outputs[i] = make(chan *unitOfWork, unitCapacity) + listeners[i] = factory(input, outputs[i]) + } + return append(listeners, newFanIn(outputs, finalOutput)) +} + +type fanIn struct { + inputs []chan *unitOfWork + output chan *unitOfWork +} + +func newFanIn(inputs []chan *unitOfWork, output chan *unitOfWork) *fanIn { + return &fanIn{ + inputs: inputs, + output: output, + } +} + +func (this *fanIn) Listen() { + defer close(this.output) + var waiter sync.WaitGroup + for _, input := range this.inputs { + waiter.Go(func() { + for unit := range input { + this.output <- unit + } + }) + } + waiter.Wait() +} diff --git a/handlers/harness/message.go b/handlers/harness/message.go new file mode 100644 index 0000000..7b591c4 --- /dev/null +++ b/handlers/harness/message.go @@ -0,0 +1,22 @@ +package harness + +import "bytes" + +type Message struct { + // ID represents the unique ID of this message and its sequential place within a larger stream. + ID uint64 + + // Type is the name registered for the (Go) type of the Value. + // (i.e. 'subscription:subscription-renewed-v2'). + Type string + + // Value contains the in-memory Go message structure. + Value any + + // Content contains the serialized representation of the Go Value. + Content *bytes.Buffer + + // ContentType identifies the serialization method employed to represent the Content + // (i.e. 'application/json; charset=utf-8'). + ContentType string +} diff --git a/handlers/harness/pipeline.go b/handlers/harness/pipeline.go new file mode 100644 index 0000000..7976941 --- /dev/null +++ b/handlers/harness/pipeline.go @@ -0,0 +1,54 @@ +package harness + +import ( + "context" + + "github.com/smarty/messaging/v3" +) + +func build(ctx context.Context, config configuration) (result Pipeline) { + var ( + batches = make(chan *batch, config.burstCapacity) + work1 = make(chan *unitOfWork, config.pipelineBufferCapacity) + work2 = make(chan *unitOfWork, config.pipelineBufferCapacity) + work3 = make(chan *unitOfWork, config.pipelineBufferCapacity) + work4 = make(chan *unitOfWork, config.pipelineBufferCapacity) + work5 = make(chan *unitOfWork, config.pipelineBufferCapacity) + ) + + var ( + entrypoint = newEntrypoint(config.monitor, batches, config.shedThreshold) + executor = newExecution(config.monitor, config.executionUnitSize, batches, work1, newRouter(config.types...)) + serializers = newFanOut(serializationFactory(config.monitor, config.serializer), config.serializerCount, config.pipelineBufferCapacity, work1, work2) + persistence = newPersistence(ctx, config.monitor, work2, work3, config.writer, wait) + completion = newCompletion(work3, work4) + broadcast = newBroadcast(ctx, config.monitor, work4, work5, config.dispatcher, wait) + terminal = newTerminal(work5) + ) + + var listeners []messaging.Listener + listeners = append(listeners, + entrypoint, + executor, + ) + listeners = append(listeners, serializers...) + listeners = append(listeners, + persistence, + completion, + broadcast, + terminal, + ) + adapter := newHTTPAdapter(entrypoint) + return Pipeline{ + SheddingHTTPWrapper: adapter.HTTPHandler, + SheddingEntrypoint: adapter, + BlockingEntrypoint: entrypoint, + Listeners: listeners, + } +} + +func serializationFactory(monitor Monitor, enc serializer) stationFactory { + return func(in, out chan *unitOfWork) messaging.Listener { + return newSerialization(monitor, enc, in, out) + } +} diff --git a/handlers/harness/pipeline_test.go b/handlers/harness/pipeline_test.go new file mode 100644 index 0000000..f061713 --- /dev/null +++ b/handlers/harness/pipeline_test.go @@ -0,0 +1,200 @@ +package harness + +import ( + "context" + "io" + "sync" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/better" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestPipelineFixture(t *testing.T) { + gunit.Run(new(PipelineFixture), t) +} + +type PipelineFixture struct { + *gunit.Fixture + ctx context.Context + pipeline Pipeline + waiter sync.WaitGroup + + executeLock sync.Mutex + executeCalls []any + executeOutputs [][]any + + writeLock sync.Mutex + writeCalls [][]*Message + dispatchLock sync.Mutex + dispatchCalls [][]*Message + + trackLock sync.Mutex + tracked []any +} + +type commandType string + +func (this *PipelineFixture) Setup() { + this.ctx = context.WithValue(this.Context(), "testing", this.Name()) + this.pipeline = New(this.ctx, + Options.Types(this), + Options.Monitor(this), + Options.Serializer(this), + Options.Writer(this), + Options.Dispatcher(this), + ) + for _, listener := range this.pipeline.Listeners { + this.waiter.Go(listener.Listen) + } +} + +// ExecuteCommand is picked up by scan() as the Execute-prefixed method driving +// the pipeline: every Handle(ctx, msg) call dispatches into this method. +func (this *PipelineFixture) ExecuteCommand(_ commandType, broadcast func(...any)) { + this.executeLock.Lock() + defer this.executeLock.Unlock() + if len(this.executeOutputs) == 0 { + return + } + out := this.executeOutputs[0] + this.executeOutputs = this.executeOutputs[1:] + broadcast(out...) +} + +// Execute wraps the fixture so it also satisfies the package-private executor +// interface directly — which is what scan() will pick up. We count calls here. +func (this *PipelineFixture) Execute(message any, broadcast func(...any)) { + this.executeLock.Lock() + this.executeCalls = append(this.executeCalls, message) + this.executeLock.Unlock() + this.ExecuteCommand(commandType(""), broadcast) +} + +func (this *PipelineFixture) Serialize(out io.Writer, _ any) error { + _, _ = out.Write([]byte("encoded")) + return nil +} + +func (this *PipelineFixture) ContentType() string { return "" } + +func (this *PipelineFixture) Write(ctx context.Context, messages ...*Message) error { + this.So(ctx.Value("testing"), should.Equal, this.Name()) + buffer := make([]*Message, len(messages)) + copy(buffer, messages) + this.writeLock.Lock() + this.writeCalls = append(this.writeCalls, buffer) + this.writeLock.Unlock() + return nil +} + +func (this *PipelineFixture) Dispatch(ctx context.Context, messages ...*Message) error { + this.So(ctx.Value("testing"), should.Equal, this.Name()) + captured := make([]*Message, len(messages)) + copy(captured, messages) + this.dispatchLock.Lock() + this.dispatchCalls = append(this.dispatchCalls, captured) + this.dispatchLock.Unlock() + return nil +} + +func (this *PipelineFixture) Track(observation any) { + this.trackLock.Lock() + defer this.trackLock.Unlock() + this.tracked = append(this.tracked, observation) +} + +func (this *PipelineFixture) countTracked() (batchInFlight, batchComplete int) { + this.trackLock.Lock() + defer this.trackLock.Unlock() + for _, observation := range this.tracked { + switch observation.(type) { + case BatchInFlight: + batchInFlight++ + case BatchComplete: + batchComplete++ + } + } + return batchInFlight, batchComplete +} + +func (this *PipelineFixture) shutdown() { + closer, ok := this.pipeline.BlockingEntrypoint.(io.Closer) + this.So(ok, better.BeTrue) + this.So(closer.Close(), should.BeNil) + this.waiter.Wait() +} + +func (this *PipelineFixture) TestPipelineRoutesMessageThroughExecutionPersistenceBroadcast() { + this.executeOutputs = [][]any{{"event-A", "event-B"}} + + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("command-1")) + this.shutdown() + + this.So(len(this.executeCalls), should.Equal, 1) + + this.So(len(this.writeCalls), should.Equal, 1) + this.So(len(this.writeCalls[0]), should.Equal, 2) + this.So(this.writeCalls[0][0].Value, should.Equal, "event-A") + this.So(this.writeCalls[0][1].Value, should.Equal, "event-B") + + this.So(len(this.dispatchCalls), should.Equal, 1) + this.So(len(this.dispatchCalls[0]), should.Equal, 2) + this.So(this.dispatchCalls[0][0].Value, should.Equal, "event-A") + this.So(this.dispatchCalls[0][1].Value, should.Equal, "event-B") + + batchInFlight, batchComplete := this.countTracked() + this.So(batchInFlight, should.Equal, 1) + this.So(batchComplete, should.Equal, 1) +} + +func (this *PipelineFixture) TestPipelineHandlesMultipleMessagesAcrossHandleCalls() { + this.executeOutputs = [][]any{{"e1"}, {"e2"}, {"e3"}} + + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("c1")) + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("c2")) + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("c3")) + this.shutdown() + + this.So(len(this.executeCalls), should.Equal, 3) + + var written []any + for _, call := range this.writeCalls { + for _, message := range call { + written = append(written, message.Value) + } + } + this.So(written, should.Equal, []any{"e1", "e2", "e3"}) + + var dispatched []any + for _, call := range this.dispatchCalls { + for _, message := range call { + dispatched = append(dispatched, message.Value) + } + } + this.So(dispatched, should.Equal, []any{"e1", "e2", "e3"}) + + batchInFlight, batchComplete := this.countTracked() + this.So(batchInFlight, should.Equal, 3) + this.So(batchComplete, should.Equal, 3) +} + +func (this *PipelineFixture) TestPipelineShutsDownWithNoTraffic() { + this.shutdown() + this.So(len(this.executeCalls), should.Equal, 0) + this.So(len(this.writeCalls), should.Equal, 0) + this.So(len(this.dispatchCalls), should.Equal, 0) + this.So(this.tracked, should.BeEmpty) +} + +func (this *PipelineFixture) TestPipelineSerializesEachBroadcastResult() { + this.executeOutputs = [][]any{{map[string]int{"value": 42}}} + + this.pipeline.BlockingEntrypoint.Handle(this.ctx, commandType("go")) + this.shutdown() + + this.So(len(this.writeCalls), should.Equal, 1) + message := this.writeCalls[0][0] + this.So(message.Content.Len() > 0, should.BeTrue) +} diff --git a/handlers/harness/pool.go b/handlers/harness/pool.go new file mode 100644 index 0000000..239d6ad --- /dev/null +++ b/handlers/harness/pool.go @@ -0,0 +1,13 @@ +package harness + +import "sync" + +func newT[T any]() *T { return new(T) } + +type poolT[T any] struct{ pool *sync.Pool } + +func newPoolT[T any](new func() T) *poolT[T] { + return &poolT[T]{pool: &sync.Pool{New: func() any { return new() }}} +} +func (this *poolT[T]) Get() T { return this.pool.Get().(T) } +func (this *poolT[T]) Put(t T) { this.pool.Put(t) } diff --git a/handlers/harness/retry.go b/handlers/harness/retry.go new file mode 100644 index 0000000..94aaf87 --- /dev/null +++ b/handlers/harness/retry.go @@ -0,0 +1,18 @@ +package harness + +import ( + "context" + "time" +) + +// wait sleeps for d, or returns early with ctx.Err() if ctx is cancelled first. +func wait(ctx context.Context, d time.Duration) error { + timer := time.NewTimer(d) + defer timer.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } +} diff --git a/handlers/harness/retry_test.go b/handlers/harness/retry_test.go new file mode 100644 index 0000000..21ef13c --- /dev/null +++ b/handlers/harness/retry_test.go @@ -0,0 +1,41 @@ +package harness + +import ( + "context" + "testing" + "time" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestWaitFixture(t *testing.T) { + gunit.Run(new(WaitFixture), t) +} + +type WaitFixture struct { + *gunit.Fixture +} + +func (this *WaitFixture) TestContextAlreadyCancelled_ReturnImmediately() { + ctx, cancel := context.WithCancel(this.Context()) + cancel() + + started := time.Now() + err := wait(ctx, time.Hour) + ended := time.Since(started) + + this.So(err, should.Equal, context.Canceled) + this.So(ended, should.BeLessThan, time.Second) +} + +func (this *WaitFixture) TestWait() { + const duration = time.Millisecond * 5 + + started := time.Now() + err := wait(this.Context(), duration) + ended := time.Since(started) + + this.So(err, should.BeNil) + this.So(ended, should.BeGreaterThanOrEqualTo, duration) +} diff --git a/handlers/harness/routing.go b/handlers/harness/routing.go new file mode 100644 index 0000000..02b24ae --- /dev/null +++ b/handlers/harness/routing.go @@ -0,0 +1,65 @@ +package harness + +import "reflect" + +// router dispatches messages to Execute*/Apply* methods discovered reflectively +// from the domain types registered via Options.Types(...). It is unexported +// because callers never hold one directly — build(ctx, cfg) constructs the sole +// instance per pipeline and feeds it into the execution stage as its executor +// collaborator. The router is not safe for concurrent use; the pipeline only +// calls Execute from within a single execution goroutine. +type router struct { + executors map[reflect.Type][]executor + applicators map[reflect.Type][]applicator + exclusions []exclusion +} +type exclusion struct { + message any + self applicator +} + +func newRouter(types ...any) *router { + executors, applicators := scan(types...) + return &router{ + executors: executors, + applicators: applicators, + } +} + +func (this *router) Apply(message any) { + for _, a := range this.applicators[reflect.TypeOf(message)] { + a.Apply(message) + } +} + +func (this *router) Execute(message any, broadcast func(...any)) { + for _, e := range this.executors[reflect.TypeOf(message)] { + e.Execute(message, func(results ...any) { + for _, result := range results { + self := this.selfApplicator(result, e) + if self != nil { + self.Apply(result) + } + this.exclusions = append(this.exclusions, exclusion{message: result, self: self}) + } + broadcast(results...) + }) + } + for _, e := range this.exclusions { + for _, a := range this.applicators[reflect.TypeOf(e.message)] { + if a == e.self { + continue + } + a.Apply(e.message) + } + } + this.exclusions = this.exclusions[:0] +} +func (this *router) selfApplicator(result any, e executor) applicator { + for _, a := range this.applicators[reflect.TypeOf(result)] { + if any(a) == any(e) { + return a + } + } + return nil +} diff --git a/handlers/harness/routing_test.go b/handlers/harness/routing_test.go new file mode 100644 index 0000000..ce74d2c --- /dev/null +++ b/handlers/harness/routing_test.go @@ -0,0 +1,208 @@ +package harness + +import ( + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestRouterFixture(t *testing.T) { + gunit.Run(new(RouterFixture), t) +} + +type RouterFixture struct { + *gunit.Fixture + orchestrator *fakeOrchestrator + observer *fakeObserver + router *router +} + +func (this *RouterFixture) Setup() { + this.orchestrator = new(fakeOrchestrator) + this.observer = new(fakeObserver) + this.router = newRouter(this.orchestrator, this.observer, new(fakeBystander)) +} + +func (this *RouterFixture) TestExecuteRoutesToRegisteredExecutorAndForwardsBroadcast() { + var broadcast []any + this.router.Execute(messageA{value: "yo"}, func(out ...any) { + broadcast = append(broadcast, out...) + }) + + this.So(this.orchestrator.executed, should.Equal, []any{messageA{value: "yo"}}) + this.So(broadcast, should.Equal, []any{resultA{value: "yo"}}) +} + +func (this *RouterFixture) TestExecuteAppliesResultToSelfApplicator() { + this.router.Execute(messageA{value: "yo"}, func(...any) {}) + + this.So(this.orchestrator.applied, should.Equal, []any{resultA{value: "yo"}}) +} + +func (this *RouterFixture) TestExecuteAppliesResultToOtherApplicators() { + this.router.Execute(messageA{value: "yo"}, func(...any) {}) + + this.So(this.observer.applied, should.Equal, []any{resultA{value: "yo"}}) +} + +func (this *RouterFixture) TestExecuteAppliesEachResultExactlyOncePerApplicator() { + this.router.Execute(messageA{value: "yo"}, func(...any) {}) + + this.So(len(this.orchestrator.applied), should.Equal, 1) + this.So(len(this.observer.applied), should.Equal, 1) +} + +func (this *RouterFixture) TestExecuteUnknownTypeIsNoOp() { + var broadcast []any + this.router.Execute(unknownMsg{}, func(out ...any) { + broadcast = append(broadcast, out...) + }) + + this.So(broadcast, should.BeEmpty) + this.So(len(this.orchestrator.executed), should.Equal, 0) +} + +func (this *RouterFixture) TestExecuteIgnoresMethodsWithBadSignatures() { + var broadcast []any + this.router.Execute(bogusExecMsg{}, func(out ...any) { + broadcast = append(broadcast, out...) + }) + + this.So(broadcast, should.BeEmpty) + this.So(len(this.orchestrator.executed), should.Equal, 0) +} + +func (this *RouterFixture) TestApplyRoutesToAllRegisteredApplicators() { + this.router.Apply(resultA{value: "yay"}) + + this.So(this.orchestrator.applied, should.Equal, []any{resultA{value: "yay"}}) + this.So(this.observer.applied, should.Equal, []any{resultA{value: "yay"}}) +} + +func (this *RouterFixture) TestApplyIgnoresMethodsWithBadSignatures() { + this.router.Apply(bogusApplyMsg{}) + + this.So(len(this.orchestrator.applied), should.Equal, 0) + this.So(len(this.observer.applied), should.Equal, 0) +} + +func (this *RouterFixture) TestApplyUnknownTypeIsNoOp() { + this.router.Apply(unknownMsg{}) + + this.So(len(this.orchestrator.applied), should.Equal, 0) +} + +func (this *RouterFixture) TestExecuteResetsExclusionsBetweenInvocations() { + this.router.Execute(messageA{value: "first"}, func(...any) {}) + this.router.Execute(messageA{value: "second"}, func(...any) {}) + + this.So(this.orchestrator.applied, should.Equal, []any{ + resultA{value: "first"}, resultA{value: "second"}, + }) + this.So(this.observer.applied, should.Equal, []any{ + resultA{value: "first"}, resultA{value: "second"}, + }) +} + +// TestExecuteResultWithNoSelfApplicator covers the case where an executor produces +// a result whose type it does not apply itself, exercising selfApplicator's nil-return. +func (this *RouterFixture) TestExecuteResultWithNoSelfApplicator() { + var broadcast []any + this.router.Execute(messageB{value: "ping"}, func(out ...any) { + broadcast = append(broadcast, out...) + }) + + this.So(broadcast, should.Equal, []any{resultB{value: "ping"}}) + this.So(this.observer.appliedB, should.Equal, []any{resultB{value: "ping"}}) +} + +type ( + messageA struct{ value string } + resultA struct{ value string } + messageB struct{ value string } + resultB struct{ value string } + unknownMsg struct{} + bogusExecMsg struct{} + bogusApplyMsg struct{} +) + +// fakeOrchestrator satisfies both executor and applicator; its specific Execute*/Apply* +// methods exercise every branch of scan() — both the matching cases and the cases that +// must be filtered out. +type fakeOrchestrator struct { + executed []any + applied []any +} + +func (this *fakeOrchestrator) Execute(message any, broadcast func(...any)) { + switch typed := message.(type) { + case messageA: + this.ExecuteMessageA(typed, broadcast) + case messageB: + this.ExecuteMessageB(typed, broadcast) + } +} +func (this *fakeOrchestrator) Apply(message any) { + switch typed := message.(type) { + case resultA: + this.ApplyResultA(typed) + } +} +func (this *fakeOrchestrator) ExecuteMessageA(msg messageA, broadcast func(...any)) { + this.executed = append(this.executed, msg) + broadcast(resultA{value: msg.value}) +} +func (this *fakeOrchestrator) ExecuteMessageB(msg messageB, broadcast func(...any)) { + this.executed = append(this.executed, msg) + broadcast(resultB{value: msg.value}) +} +func (this *fakeOrchestrator) ApplyResultA(msg resultA) { + this.applied = append(this.applied, msg) +} + +func (this *fakeOrchestrator) Execute2() { + panic("scan() must skip methods with no message argument") +} +func (this *fakeOrchestrator) ExecuteBadReturn(_ bogusExecMsg, _ func(...any)) error { + panic("scan() must skip methods with non-zero return values") +} +func (this *fakeOrchestrator) ExecuteShortArgs(_ bogusExecMsg) { + panic("scan() must skip Execute methods that lack the broadcast func") +} +func (this *fakeOrchestrator) ExecuteBadLast(_ bogusExecMsg, _ string) { + panic("scan() must skip Execute methods whose last arg is not func(...any)") +} +func (this *fakeOrchestrator) ApplyBadReturn(_ bogusApplyMsg) error { + panic("scan() must skip Apply methods with non-zero return values") +} +func (this *fakeOrchestrator) ApplyExtraArg(_ bogusApplyMsg, _ string) { + panic("scan() must skip Apply methods with more than one argument") +} +func (this *fakeOrchestrator) Bogus(_ messageA) { + panic("scan() must skip methods that lack the Execute/Apply prefix") +} + +type fakeObserver struct { + applied []any + appliedB []any +} + +func (this *fakeObserver) Apply(message any) { + switch typed := message.(type) { + case resultA: + this.ApplyResultA(typed) + case resultB: + this.ApplyResultB(typed) + } +} +func (this *fakeObserver) ApplyResultA(msg resultA) { + this.applied = append(this.applied, msg) +} +func (this *fakeObserver) ApplyResultB(msg resultB) { + this.appliedB = append(this.appliedB, msg) +} + +type fakeBystander struct{} + +func (this *fakeBystander) Hello() {} diff --git a/handlers/harness/scanner.go b/handlers/harness/scanner.go new file mode 100644 index 0000000..91ff9ae --- /dev/null +++ b/handlers/harness/scanner.go @@ -0,0 +1,48 @@ +package harness + +import ( + "reflect" + "strings" +) + +func scan(types ...any) (executors map[reflect.Type][]executor, applicators map[reflect.Type][]applicator) { + executors = make(map[reflect.Type][]executor) + applicators = make(map[reflect.Type][]applicator) + for _, v := range types { + e, isExecutor := v.(executor) + a, isApplicator := v.(applicator) + if !isExecutor && !isApplicator { + continue + } + t := reflect.TypeOf(v) + for x := range t.NumMethod() { + method := t.Method(x) + argCount := method.Type.NumIn() + if argCount <= 1 { + continue + } + if method.Type.NumOut() > 0 { + continue + } + if isExecutor && strings.HasPrefix(method.Name, "Execute") && len(method.Name) > len("Execute") { + if argCount != 3 { + continue + } + lastArg := method.Type.In(argCount - 1) + if lastArg != reflect.TypeOf(func(...any) {}) { + continue + } + handledType := method.Type.In(1) + executors[handledType] = append(executors[handledType], e) + } + if isApplicator && strings.HasPrefix(method.Name, "Apply") && len(method.Name) > len("Apply") { + if argCount != 2 { + continue + } + handledType := method.Type.In(1) + applicators[handledType] = append(applicators[handledType], a) + } + } + } + return executors, applicators +} diff --git a/handlers/harness/sqladapter/contracts.go b/handlers/harness/sqladapter/contracts.go new file mode 100644 index 0000000..fd1af2b --- /dev/null +++ b/handlers/harness/sqladapter/contracts.go @@ -0,0 +1,5 @@ +package sqladapter + +type Logger interface { + Printf(format string, args ...any) +} diff --git a/handlers/harness/sqladapter/dispatcher.go b/handlers/harness/sqladapter/dispatcher.go new file mode 100644 index 0000000..71267f9 --- /dev/null +++ b/handlers/harness/sqladapter/dispatcher.go @@ -0,0 +1,85 @@ +// Package sqladapter provides a reference implementation of the +// handlers/harness Writer and Dispatcher interfaces, bound to the `Messages` +// MySQL table defined by doc/mysql/schema.sql in this module (columns +// `id`, `dispatched`, `type`, `payload`). Callers running a different schema +// should copy and adapt these types. +package sqladapter + +import ( + "context" + "database/sql" + "fmt" + "strings" + + "github.com/smarty/messaging/v3" + "github.com/smarty/messaging/v3/handlers/harness" +) + +type Dispatcher struct { + connector messaging.Connector + handle *sql.DB + logger Logger +} + +func NewDispatcher(connector messaging.Connector, handle *sql.DB, logger Logger) *Dispatcher { + return &Dispatcher{ + connector: connector, + handle: handle, + logger: logger, + } +} + +func (this *Dispatcher) Dispatch(ctx context.Context, messages ...*harness.Message) error { + if len(messages) == 0 { + return nil + } + if err := this.publish(ctx, messages); err != nil { + return err + } + return this.markDispatched(ctx, messages) +} + +func (this *Dispatcher) publish(ctx context.Context, messages []*harness.Message) error { + connection, err := this.connector.Connect(ctx) + if err != nil { + return err + } + defer func() { _ = connection.Close() }() + + writer, err := connection.Writer(ctx) + if err != nil { + return err + } + defer func() { _ = writer.Close() }() + + dispatches := make([]messaging.Dispatch, 0, len(messages)) // TODO: reuse slice, pool dispatch struct + for _, message := range messages { + dispatches = append(dispatches, messaging.Dispatch{ + Durable: true, + MessageType: message.Type, + ContentType: message.ContentType, + Payload: message.Content.Bytes(), + Topic: message.Type, // When payload is populated, the connector's encoder skips setting the topic. + }) + } + _, err = writer.Write(ctx, dispatches...) + return err +} + +func (this *Dispatcher) markDispatched(ctx context.Context, messages []*harness.Message) error { + var statement strings.Builder + statement.WriteString(`UPDATE Messages SET dispatched = NOW(3) WHERE id IN (`) + args := make([]any, 0, len(messages)) + for i, message := range messages { + if i > 0 { + statement.WriteString(`,`) + } + statement.WriteString(`?`) + args = append(args, message.ID) + } + statement.WriteString(`)`) + if _, err := this.handle.ExecContext(ctx, statement.String(), args...); err != nil { + return fmt.Errorf("mark dispatched: %w", err) + } + return nil +} diff --git a/handlers/harness/sqladapter/dispatcher_test.go b/handlers/harness/sqladapter/dispatcher_test.go new file mode 100644 index 0000000..4d99b27 --- /dev/null +++ b/handlers/harness/sqladapter/dispatcher_test.go @@ -0,0 +1,167 @@ +package sqladapter + +import ( + "bytes" + "context" + "database/sql" + "errors" + "log" + "os" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" + "github.com/smarty/messaging/v3" + "github.com/smarty/messaging/v3/handlers/harness" +) + +type dispatcherTestEvent struct { + AccountID uint64 + OrderID uint64 +} + +func TestDispatcherFixture(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long-running database tests.") + } + ensureDatabaseReadiness(t) + gunit.Run(new(DispatcherFixture), t, gunit.Options.IntegrationTests()) +} + +type DispatcherFixture struct { + *gunit.Fixture + handle *sql.DB + connector *stubConnector + subject *Dispatcher +} + +func (this *DispatcherFixture) Setup() { + handle, err := openTestDatabase() + this.So(err, should.BeNil) + this.handle = handle + _, err = handle.Exec(`TRUNCATE TABLE Messages;`) + this.So(err, should.BeNil) + this.connector = newStubConnector() + this.subject = NewDispatcher(this.connector, handle, log.New(os.Stderr, "", 0)) +} + +func (this *DispatcherFixture) Teardown() { + _ = this.handle.Close() +} + +func (this *DispatcherFixture) seedMessage(value any) *harness.Message { + result, err := this.handle.Exec(`INSERT INTO Messages (type, payload) VALUES ('order-received', '{}')`) + this.So(err, should.BeNil) + id, err := result.LastInsertId() + this.So(err, should.BeNil) + return &harness.Message{ + ID: uint64(id), + Type: "order-received", + ContentType: "application/json", + Content: bytes.NewBufferString(`{}`), + Value: value, + } +} + +func (this *DispatcherFixture) TestDispatch_PublishesPreEncodedPayloadAndMetadata() { + message := this.seedMessage(dispatcherTestEvent{AccountID: 1, OrderID: 2}) + + err := this.subject.Dispatch(context.Background(), message) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 1) + published := this.connector.published[0] + this.So(published.Payload, should.Equal, message.Content.Bytes()) + this.So(published.MessageType, should.Equal, message.Type) + this.So(published.ContentType, should.Equal, message.ContentType) + this.So(published.Topic, should.Equal, message.Type) + this.So(published.Durable, should.BeTrue) + this.So(published.Message, should.BeNil) +} + +func (this *DispatcherFixture) TestDispatch_PublishesAndMarksDispatched() { + event := dispatcherTestEvent{AccountID: 1, OrderID: 2} + message := this.seedMessage(event) + + err := this.subject.Dispatch(context.Background(), message) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 1) + published := this.connector.published[0] + this.So(published.Payload, should.Equal, message.Content.Bytes()) + this.So(published.MessageType, should.Equal, message.Type) + this.So(published.ContentType, should.Equal, message.ContentType) + this.So(published.Topic, should.Equal, message.Type) + this.So(published.Durable, should.BeTrue) + this.So(published.Message, should.BeNil) + this.So(this.dispatchedTimestamp(message.ID), should.NOT.BeNil) +} + +func (this *DispatcherFixture) TestDispatch_PublishFails_ReturnsErrorWithoutMarkingDispatched() { + event := dispatcherTestEvent{AccountID: 1, OrderID: 2} + message := this.seedMessage(event) + this.connector.writeErr = errors.New("rmq down") + + err := this.subject.Dispatch(context.Background(), message) + + this.So(err, should.NOT.BeNil) + this.So(this.dispatchedTimestamp(message.ID), should.BeNil) +} + +func (this *DispatcherFixture) TestDispatch_NoMessages_NoOp() { + err := this.subject.Dispatch(context.Background()) + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 0) +} + +func (this *DispatcherFixture) dispatchedTimestamp(id uint64) *string { + var dispatched sql.NullString + err := this.handle.QueryRow(`SELECT dispatched FROM Messages WHERE id = ?`, id).Scan(&dispatched) + this.So(err, should.BeNil) + if dispatched.Valid { + s := dispatched.String + return &s + } + return nil +} + +// stubConnector mimics a transport connector. + +type stubConnector struct { + published []messaging.Dispatch + writeBatches []int + writeErr error +} + +func newStubConnector() *stubConnector { + return &stubConnector{} +} +func (this *stubConnector) Connect(ctx context.Context) (messaging.Connection, error) { + return &stubConnection{parent: this}, nil +} +func (this *stubConnector) Close() error { return nil } + +type stubConnection struct{ parent *stubConnector } + +func (this *stubConnection) Reader(ctx context.Context) (messaging.Reader, error) { + return nil, errors.New("not implemented") +} +func (this *stubConnection) Writer(ctx context.Context) (messaging.Writer, error) { + return &stubWriter{parent: this.parent}, nil +} +func (this *stubConnection) CommitWriter(ctx context.Context) (messaging.CommitWriter, error) { + return nil, errors.New("not implemented") +} +func (this *stubConnection) Close() error { return nil } + +type stubWriter struct{ parent *stubConnector } + +func (this *stubWriter) Write(ctx context.Context, dispatches ...messaging.Dispatch) (int, error) { + if this.parent.writeErr != nil { + return 0, this.parent.writeErr + } + this.parent.writeBatches = append(this.parent.writeBatches, len(dispatches)) + this.parent.published = append(this.parent.published, dispatches...) + return len(dispatches), nil +} +func (this *stubWriter) Close() error { return nil } diff --git a/handlers/harness/sqladapter/recovery.go b/handlers/harness/sqladapter/recovery.go new file mode 100644 index 0000000..6c23154 --- /dev/null +++ b/handlers/harness/sqladapter/recovery.go @@ -0,0 +1,67 @@ +package sqladapter + +import ( + "bytes" + "context" + "database/sql" + + "github.com/smarty/messaging/v3/handlers/harness" +) + +// Recover scans Messages WHERE dispatched IS NULL at startup, wraps each row +// as a *harness.Message, and feeds them through the Dispatcher (publish + mark dispatched) +// in pages of batchSize. Intended to run synchronously during initialization, before +// the harness pipeline starts. +// +// TODO: make this a Listener +// TODO: retry w/ backoff +func Recover(ctx context.Context, handle *sql.DB, dispatcher *Dispatcher, logger Logger, batchSize int) error { + logger.Printf("[INFO] Recovering undispatched message(s) from previous run...") + rows, err := handle.QueryContext(ctx, ` + SELECT id, type, payload + FROM Messages + WHERE dispatched IS NULL + ORDER BY id`) + if err != nil { + return err + } + defer func() { _ = rows.Close() }() + + var total int + var messages []*harness.Message + for rows.Next() { + var ( + id uint64 + typeName string + payload []byte + ) + if err := rows.Scan(&id, &typeName, &payload); err != nil { + return err + } + total++ + messages = append(messages, &harness.Message{ + ID: id, + Type: typeName, + Content: bytes.NewBuffer(payload), + // Hard-coded until the Messages schema gains a content_type column; + // all payloads written by Writer are JSON so this is correct for now. + ContentType: "application/json", + }) + if len(messages) >= batchSize { + err := dispatcher.Dispatch(ctx, messages...) + if err != nil { + return err + } + clear(messages) + messages = messages[:0] + } + } + if err := rows.Err(); err != nil { + return err + } + if len(messages) > 0 { + err = dispatcher.Dispatch(ctx, messages...) + } + logger.Printf("[INFO] Recovering %d total undispatched message(s) from previous run.", total) + return err +} diff --git a/handlers/harness/sqladapter/recovery_test.go b/handlers/harness/sqladapter/recovery_test.go new file mode 100644 index 0000000..3add080 --- /dev/null +++ b/handlers/harness/sqladapter/recovery_test.go @@ -0,0 +1,145 @@ +package sqladapter + +import ( + "context" + "database/sql" + "log" + "os" + "testing" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" +) + +func TestRecoveryFixture(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long-running database tests.") + } + ensureDatabaseReadiness(t) + gunit.Run(new(RecoveryFixture), t, gunit.Options.IntegrationTests()) +} + +type RecoveryFixture struct { + *gunit.Fixture + handle *sql.DB + connector *stubConnector + dispatcher *Dispatcher +} + +func (this *RecoveryFixture) Setup() { + handle, err := openTestDatabase() + this.So(err, should.BeNil) + this.handle = handle + _, err = handle.Exec(`TRUNCATE TABLE Messages;`) + this.So(err, should.BeNil) + this.connector = newStubConnector() + this.dispatcher = NewDispatcher(this.connector, handle, log.New(os.Stderr, "", 0)) +} + +func (this *RecoveryFixture) Teardown() { + _ = this.handle.Close() +} + +func (this *RecoveryFixture) seedUndispatched(typeName string, payload string) uint64 { + result, err := this.handle.Exec(`INSERT INTO Messages (type, payload) VALUES (?, ?)`, typeName, payload) + this.So(err, should.BeNil) + id, err := result.LastInsertId() + this.So(err, should.BeNil) + return uint64(id) +} + +func (this *RecoveryFixture) seedDispatched(typeName string, payload string) uint64 { + result, err := this.handle.Exec(`INSERT INTO Messages (type, payload, dispatched) VALUES (?, ?, NOW(3))`, typeName, payload) + this.So(err, should.BeNil) + id, err := result.LastInsertId() + this.So(err, should.BeNil) + return uint64(id) +} + +func (this *RecoveryFixture) TestRecover_NoOrphans_NoOp() { + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), 1024) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 0) +} + +func (this *RecoveryFixture) TestRecover_DispatchesUndispatchedRowsInIDOrder() { + id1 := this.seedUndispatched("order-received", `{"order":1}`) + id2 := this.seedUndispatched("order-approved", `{"order":2}`) + _ = this.seedDispatched("order-received", `{"order":3}`) // already dispatched, must be skipped + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), 1024) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 2) + this.So(this.dispatchedTimestamp(id1), should.NOT.BeNil) + this.So(this.dispatchedTimestamp(id2), should.NOT.BeNil) +} + +func (this *RecoveryFixture) TestRecover_PassesPayloadAndTypeIntoMessage() { + this.seedUndispatched("order-received", `{"order":1}`) + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), 1024) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 1) + dispatch := this.connector.published[0] + this.So(dispatch.Durable, should.BeTrue) +} + +func (this *RecoveryFixture) TestRecover_PublishesDispatchWithTopicMessageTypeAndPayload() { + this.seedUndispatched("order-received", `{"order":1}`) + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), 1024) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, 1) + dispatch := this.connector.published[0] + this.So(dispatch.Topic, should.Equal, "order-received") + this.So(dispatch.MessageType, should.Equal, "order-received") + this.So(dispatch.ContentType, should.Equal, "application/json") + this.So(dispatch.Payload, should.Equal, []byte(`{"order":1}`)) + this.So(dispatch.Durable, should.BeTrue) + this.So(dispatch.Message, should.BeNil) +} + +func (this *RecoveryFixture) TestRecover_RowsExceedBatchSize_FlushesInBatchesAndDispatchesAll() { + const batchSize = 3 + const total = 7 // 3 + 3 + 1 + ids := make([]uint64, 0, total) + for i := 0; i < total; i++ { + ids = append(ids, this.seedUndispatched("order-received", `{}`)) + } + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), batchSize) + + this.So(err, should.BeNil) + this.So(len(this.connector.published), should.Equal, total) + for _, id := range ids { + this.So(this.dispatchedTimestamp(id), should.NOT.BeNil) + } +} + +func (this *RecoveryFixture) TestRecover_RowCountIsMultipleOfBatchSize_FlushesUniformBatches() { + const batchSize = 3 + const total = 6 // exactly 2 batches of 3 + for i := 0; i < total; i++ { + this.seedUndispatched("order-received", `{}`) + } + + err := Recover(context.Background(), this.handle, this.dispatcher, log.New(os.Stderr, "", 0), batchSize) + + this.So(err, should.BeNil) + this.So(this.connector.writeBatches, should.Equal, []int{3, 3}) +} + +func (this *RecoveryFixture) dispatchedTimestamp(id uint64) *string { + var dispatched sql.NullString + err := this.handle.QueryRow(`SELECT dispatched FROM Messages WHERE id = ?`, id).Scan(&dispatched) + this.So(err, should.BeNil) + if dispatched.Valid { + s := dispatched.String + return &s + } + return nil +} diff --git a/handlers/harness/sqladapter/testdb_test.go b/handlers/harness/sqladapter/testdb_test.go new file mode 100644 index 0000000..fc90cf6 --- /dev/null +++ b/handlers/harness/sqladapter/testdb_test.go @@ -0,0 +1,73 @@ +package sqladapter + +import ( + "database/sql" + "fmt" + "os" + "path/filepath" + "runtime" + "testing" + + _ "github.com/go-sql-driver/mysql" +) + +// Integration tests in this package require a local MySQL server. +// +// Tests run against a throwaway schema (default `messaging`) which +// is dropped and re-created before each run. + +const testSchemaName = "messaging" + +func ensureDatabaseReadiness(t *testing.T) { + bootstrap, err := openDSN(buildDSN("")) + if err != nil { + t.Fatal("Database not available (is mysql running?):", err) + } + defer func() { _ = bootstrap.Close() }() + if err := setupSchema(bootstrap); err != nil { + t.Fatal("Schema did not set up properly:", err) + } +} + +func openTestDatabase() (*sql.DB, error) { + return openDSN(buildDSN(testSchemaName)) +} + +func openDSN(dsn string) (*sql.DB, error) { + db, err := sql.Open("mysql", dsn) + if err != nil { + return nil, err + } + if err := db.Ping(); err != nil { + _ = db.Close() + return nil, err + } + return db, nil +} + +func buildDSN(schema string) string { + return fmt.Sprintf("%s:%s@tcp(%s:%s)/%s?multiStatements=true&parseTime=true", "root", "", "127.0.0.1", "3306", schema) +} + +func setupSchema(db *sql.DB) error { + statement := fmt.Sprintf("DROP SCHEMA IF EXISTS %s; CREATE SCHEMA %s; USE %s;", + testSchemaName, testSchemaName, testSchemaName) + if _, err := db.Exec(statement); err != nil { + return err + } + content, err := os.ReadFile(findSchemaFile()) + if err != nil { + return err + } + if _, err := db.Exec(string(content)); err != nil { + return err + } + return nil +} + +// findSchemaFile locates doc/mysql/schema.sql relative to this test file, +// since tests run from the package directory regardless of CWD. +func findSchemaFile() string { + _, thisFile, _, _ := runtime.Caller(0) + return filepath.Join(filepath.Dir(thisFile), "..", "..", "..", "doc", "mysql", "schema.sql") +} diff --git a/handlers/harness/sqladapter/writer.go b/handlers/harness/sqladapter/writer.go new file mode 100644 index 0000000..e2e08f7 --- /dev/null +++ b/handlers/harness/sqladapter/writer.go @@ -0,0 +1,124 @@ +package sqladapter + +import ( + "cmp" + "context" + "database/sql" + "errors" + "fmt" + "reflect" + "strings" + + "github.com/smarty/messaging/v3/handlers/harness" +) + +// Deprecated +type legacyWrite func(context.Context, *sql.Tx, ...*harness.Message) + +type Writer struct { + handle *sql.DB + typeNames map[reflect.Type]string + stride uint64 + logger Logger + legacyWrite legacyWrite +} + +// NewWriter builds a Writer that inserts rows into the `Messages` table and +// invokes the supplied legacyWrite function inside the same transaction. +// +// Deprecation warning: the legacyWrite escape hatch is retained for migration from +// other projects and will be removed in a later release; new callers +// should supply a no-op function. +func NewWriter(handle *sql.DB, typeNames map[reflect.Type]string, stride uint64, logger Logger, legacyWrite legacyWrite) *Writer { + return &Writer{ + handle: handle, + typeNames: typeNames, + stride: cmp.Or(stride, 1), + logger: logger, + legacyWrite: legacyWrite, + } +} + +func (this *Writer) Write(ctx context.Context, messages ...*harness.Message) (err error) { + if len(messages) == 0 { + return nil + } + + tx, err := this.handle.BeginTx(ctx, nil) + if err != nil { + return err + } + defer func() { + if recovered := recover(); recovered != nil { + _ = tx.Rollback() + err = fmt.Errorf("panic during write: %v", recovered) + } else if err != nil { + _ = tx.Rollback() + } + }() + + if err := this.insertMessages(ctx, tx, messages); err != nil { + return err + } + + this.legacyWrite(ctx, tx, messages...) + + return tx.Commit() +} + +func (this *Writer) insertMessages(ctx context.Context, tx *sql.Tx, messages []*harness.Message) error { + var statement strings.Builder // TODO: reuse statement builder + statement.WriteString(`INSERT INTO Messages (type, payload) VALUES `) + args := make([]any, 0, len(messages)*2) // TODO: reuse slice/buffer + for i, message := range messages { + if message.Type == "" { + message.Type = this.typeNames[reflect.TypeOf(message.Value)] + } + if i > 0 { + statement.WriteString(`,`) + } + statement.WriteString(`(?, ?)`) + args = append(args, message.Type, message.Content.Bytes()) + } + result, err := tx.ExecContext(ctx, statement.String(), args...) + if err != nil { + return err + } + affected, err := result.RowsAffected() + if err != nil { + return err + } + // https://dev.mysql.com/doc/refman/5.6/en/information-functions.html#function_last-insert-id + // > If you insert multiple rows using a single INSERT statement, LAST_INSERT_ID() returns the value + // > generated for the first inserted row only. + first, err := result.LastInsertId() + if err != nil { + return err + } + return this.assignIDs(messages, affected, first) +} + +// assignIDs validates the row count and starting identity reported by the +// INSERT, then derives each message's ID from the first auto-increment value. +// The derivation relies on a single multi-row "simple insert" producing +// consecutive auto-increment values spaced by stride, which holds even under +// innodb_autoinc_lock_mode = 2 so long as no concurrent "bulk inserts" target +// the Messages table and stride matches the server's auto_increment_increment. +// https://dev.mysql.com/doc/refman/8.4/en/innodb-auto-increment-handling.html +func (this *Writer) assignIDs(messages []*harness.Message, affected, first int64) error { + if affected != int64(len(messages)) { + return errRowsAffected + } + if first <= 0 { + return errIdentityFailure + } + for i, message := range messages { + message.ID = uint64(first) + uint64(i)*this.stride + } + return nil +} + +var ( + errRowsAffected = errors.New("the number of modified rows was not expected compared to the number of writes performed") + errIdentityFailure = errors.New("unable to determine the identity of the inserted row(s)") +) diff --git a/handlers/harness/sqladapter/writer_test.go b/handlers/harness/sqladapter/writer_test.go new file mode 100644 index 0000000..3259dec --- /dev/null +++ b/handlers/harness/sqladapter/writer_test.go @@ -0,0 +1,247 @@ +package sqladapter + +import ( + "bytes" + "context" + "database/sql" + "encoding/json" + "reflect" + "testing" + "time" + + "github.com/smarty/gunit/v2" + "github.com/smarty/gunit/v2/assert/should" + "github.com/smarty/messaging/v3/handlers/harness" +) + +// Local test-only message types mirror the shape the source used. +type ( + orderReceived struct { + AccountID uint64 + OrderID uint64 + Timestamp time.Time + } + orderApproved struct { + AccountID uint64 + OrderID uint64 + Timestamp time.Time + } +) + +func TestWriterFixture(t *testing.T) { + if testing.Short() { + t.Skip("Skipping long-running database tests.") + } + ensureDatabaseReadiness(t) + gunit.Run(new(WriterFixture), t, gunit.Options.IntegrationTests()) +} + +type WriterFixture struct { + *gunit.Fixture + handle *sql.DB + subject *Writer + + legacyWriteCalls [][]*harness.Message + legacyWritePanic any + testStride uint64 +} + +func (this *WriterFixture) Setup() { + handle, err := openTestDatabase() + this.So(err, should.BeNil) + this.handle = handle + this.legacyWriteCalls = nil + this.legacyWritePanic = nil + this.testStride = 7 + this.subject = NewWriter(handle, testTypeNames(), this.testStride, this, this.fakeLegacyWrite) + this.truncateTables() +} + +func (this *WriterFixture) fakeLegacyWrite(_ context.Context, _ *sql.Tx, messages ...*harness.Message) { + this.legacyWriteCalls = append(this.legacyWriteCalls, messages) + if this.legacyWritePanic != nil { + panic(this.legacyWritePanic) + } +} + +func (this *WriterFixture) Teardown() { + _ = this.handle.Close() +} + +func (this *WriterFixture) truncateTables() { + _, err := this.handle.Exec(`TRUNCATE TABLE Messages;`) + this.So(err, should.BeNil) +} + +func (this *WriterFixture) TestWrite_InsertsMessageRowAndInvokesLegacyWrite() { + event := orderReceived{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC().Round(time.Second)} + message := serializedMessage(event, "") + + err := this.subject.Write(context.Background(), message) + + this.So(err, should.BeNil) + this.So(this.countMessages(), should.Equal, 1) + this.So(this.legacyWriteCalls, should.Equal, [][]*harness.Message{{message}}) + this.So(message.Type, should.Equal, "order-received") +} + +func (this *WriterFixture) TestWrite_ResolvesMessageTypeFromRegistry() { + event := orderApproved{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC()} + message := serializedMessage(event, "") + + err := this.subject.Write(context.Background(), message) + + this.So(err, should.BeNil) + this.So(this.firstMessageType(), should.Equal, "order-approved") +} + +func (this *WriterFixture) TestWrite_LegacyWritePanic_RollsBackInsertedMessage() { + this.legacyWritePanic = "boom" + event := orderReceived{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC()} + + err := this.subject.Write(context.Background(), serializedMessage(event, "")) + + this.So(err, should.NOT.BeNil) + this.So(this.countMessages(), should.Equal, 0) +} + +func (this *WriterFixture) TestWrite_PreservesPreSetMessageType() { + event := orderReceived{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC()} + message := serializedMessage(event, "explicit.override") + + err := this.subject.Write(context.Background(), message) + + this.So(err, should.BeNil) + this.So(this.firstMessageType(), should.Equal, "explicit.override") +} + +func (this *WriterFixture) TestWrite_PopulatesMessageIDFromAutoincrement() { + event1 := orderReceived{AccountID: 1, OrderID: 2, Timestamp: time.Now().UTC()} + event2 := orderReceived{AccountID: 1, OrderID: 3, Timestamp: time.Now().UTC()} + m1 := serializedMessage(event1, "") + m2 := serializedMessage(event2, "") + + err := this.subject.Write(context.Background(), m1, m2) + + this.So(err, should.BeNil) + this.So(m1.ID, should.NOT.Equal, uint64(0)) + this.So(m2.ID, should.Equal, m1.ID+this.testStride) +} + +func (this *WriterFixture) TestWrite_NoMessages_NoOp() { + err := this.subject.Write(context.Background()) + this.So(err, should.BeNil) + this.So(this.countMessages(), should.Equal, 0) +} + +func TestAssignIDsFixture(t *testing.T) { + gunit.Run(new(AssignIDsFixture), t) +} + +// AssignIDsFixture exercises the pure ID-derivation logic without a database, +// covering the stride arithmetic and the defensive guards that a live MySQL +// server would never naturally trip. +type AssignIDsFixture struct { + *gunit.Fixture +} + +func (this *AssignIDsFixture) writer(stride uint64) *Writer { + return NewWriter(nil, nil, stride, nil, nil) +} +func (this *AssignIDsFixture) messages(count int) (results []*harness.Message) { + for range count { + results = append(results, &harness.Message{}) + } + return results +} + +func (this *AssignIDsFixture) TestStridedIDsDerivedFromFirstIdentity() { + messages := this.messages(3) + + err := this.writer(7).assignIDs(messages, 3, 100) + + this.So(err, should.BeNil) + this.So(messages[0].ID, should.Equal, uint64(100)) + this.So(messages[1].ID, should.Equal, uint64(107)) + this.So(messages[2].ID, should.Equal, uint64(114)) +} + +func (this *AssignIDsFixture) TestZeroStrideDefaultsToOne() { + messages := this.messages(3) + + err := this.writer(0).assignIDs(messages, 3, 5) + + this.So(err, should.BeNil) + this.So(messages[0].ID, should.Equal, uint64(5)) + this.So(messages[1].ID, should.Equal, uint64(6)) + this.So(messages[2].ID, should.Equal, uint64(7)) +} + +func (this *AssignIDsFixture) TestSingleMessage() { + messages := this.messages(1) + + err := this.writer(7).assignIDs(messages, 1, 42) + + this.So(err, should.BeNil) + this.So(messages[0].ID, should.Equal, uint64(42)) +} + +func (this *AssignIDsFixture) TestRowsAffectedMismatchReturnsErrorAndLeavesIDsUntouched() { + messages := this.messages(3) + + err := this.writer(7).assignIDs(messages, 2, 100) + + this.So(err, should.Equal, errRowsAffected) + this.So(messages[0].ID, should.Equal, uint64(0)) + this.So(messages[1].ID, should.Equal, uint64(0)) + this.So(messages[2].ID, should.Equal, uint64(0)) +} + +func (this *AssignIDsFixture) TestZeroIdentityReturnsErrorAndLeavesIDsUntouched() { + messages := this.messages(2) + + err := this.writer(7).assignIDs(messages, 2, 0) + + this.So(err, should.Equal, errIdentityFailure) + this.So(messages[0].ID, should.Equal, uint64(0)) + this.So(messages[1].ID, should.Equal, uint64(0)) +} + +func (this *AssignIDsFixture) TestNegativeIdentityReturnsError() { + messages := this.messages(1) + + err := this.writer(7).assignIDs(messages, 1, -1) + + this.So(err, should.Equal, errIdentityFailure) + this.So(messages[0].ID, should.Equal, uint64(0)) +} + +func (this *WriterFixture) countMessages() int { + var count int + err := this.handle.QueryRow(`SELECT COUNT(*) FROM Messages`).Scan(&count) + this.So(err, should.BeNil) + return count +} +func (this *WriterFixture) firstMessageType() string { + var t string + err := this.handle.QueryRow(`SELECT type FROM Messages ORDER BY id LIMIT 1`).Scan(&t) + this.So(err, should.BeNil) + return t +} + +func testTypeNames() map[reflect.Type]string { + return map[reflect.Type]string{ + reflect.TypeOf(orderReceived{}): "order-received", + reflect.TypeOf(orderApproved{}): "order-approved", + } +} + +func serializedMessage(value any, typeOverride string) *harness.Message { + payload, _ := json.Marshal(value) + return &harness.Message{ + Value: value, + Type: typeOverride, + Content: bytes.NewBuffer(payload), + ContentType: "application/json", + } +}