From 077eb52b652913613a4c87537e612d3a4703d1e3 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 05:57:59 +0000 Subject: [PATCH 01/31] feat: add FinishedAt to StepResult, sort ErrWorkflow output by execution order --- error.go | 42 ++++++++++++++++---- error_test.go | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 7 deletions(-) diff --git a/error.go b/error.go index 8b389aa..69cc1c0 100644 --- a/error.go +++ b/error.go @@ -3,7 +3,9 @@ package flow import ( "fmt" "runtime" + "sort" "strings" + "time" ) // Succeed marks the current step as `Succeeded`, while still reports the error. @@ -104,8 +106,9 @@ func StatusFromError(err error) StepStatus { // StepResult contains the status and error of a Step. type StepResult struct { - Status StepStatus - Err error + Status StepStatus + Err error + FinishedAt time.Time } // StatusError will be printed as: @@ -128,10 +131,35 @@ func indent(s string) string { return strings.ReplaceAll(s, "\n", "\n\t") } // Keys are root Steps, values are its status and error. type ErrWorkflow map[Steper]StepResult +// sortedSteps returns ErrWorkflow keys sorted by FinishedAt ascending. +// Steps with zero FinishedAt (never ran) sort last. +// Tie-break: lexicographic order of String(step). +func sortedSteps(e ErrWorkflow) []Steper { + steps := make([]Steper, 0, len(e)) + for step := range e { + steps = append(steps, step) + } + sort.Slice(steps, func(i, j int) bool { + ti := e[steps[i]].FinishedAt + tj := e[steps[j]].FinishedAt + zeroI := ti.IsZero() + zeroJ := tj.IsZero() + if zeroI != zeroJ { + return !zeroI // non-zero before zero + } + if !ti.Equal(tj) { + return ti.Before(tj) + } + return String(steps[i]) < String(steps[j]) + }) + return steps +} + func (e ErrWorkflow) Unwrap() []error { + steps := sortedSteps(e) rv := make([]error, 0, len(e)) - for _, sErr := range e { - rv = append(rv, sErr.Err) + for _, step := range steps { + rv = append(rv, e[step].Err) } return rv } @@ -142,9 +170,9 @@ func (e ErrWorkflow) Unwrap() []error { // error message func (e ErrWorkflow) Error() string { var builder strings.Builder - for step, serr := range e { - builder.WriteString(fmt.Sprintf("%s: ", String(step))) - builder.WriteString(fmt.Sprintln(serr.Error())) + for _, step := range sortedSteps(e) { + fmt.Fprintf(&builder, "%s: ", String(step)) + fmt.Fprintln(&builder, e[step].Error()) } return builder.String() } diff --git a/error_test.go b/error_test.go index bd844cb..09139f3 100644 --- a/error_test.go +++ b/error_test.go @@ -2,10 +2,15 @@ package flow_test import ( "context" + "fmt" + "strings" "testing" + "time" flow "github.com/Azure/go-workflow" + "github.com/benbjohnson/clock" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestErrCycleDependency(t *testing.T) { @@ -17,3 +22,101 @@ func TestErrCycleDependency(t *testing.T) { assert.ErrorContains(t, errCycle, "Succeeded depends on [\n\t\tSucceeded\n\t]") } } + +// serialStep is a controllable step: signals when started, waits to be released. +type serialStep struct { + name string + started chan struct{} + release chan struct{} + err error +} + +func newSerialStep(name string, err error) *serialStep { + return &serialStep{ + name: name, + started: make(chan struct{}, 1), + release: make(chan struct{}), + err: err, + } +} +func (s *serialStep) Do(_ context.Context) error { + s.started <- struct{}{} + <-s.release + return s.err +} +func (s *serialStep) String() string { return s.name } + +func TestErrWorkflowOutputOrdering(t *testing.T) { + // Serial chain: stepC -> stepA -> stepB + // Names chosen so alphabetical order != execution order. + mockClock := clock.NewMock() + stepC := newSerialStep("C-first", fmt.Errorf("C failed")) + stepA := newSerialStep("A-second", fmt.Errorf("A failed")) + stepB := newSerialStep("B-third", fmt.Errorf("B failed")) + + w := &flow.Workflow{Clock: mockClock} + w.Add( + flow.Step(stepC), + flow.Step(stepA).DependsOn(stepC).When(flow.Always), + flow.Step(stepB).DependsOn(stepA).When(flow.Always), + ) + + done := make(chan error, 1) + go func() { done <- w.Do(context.Background()) }() + + <-stepC.started + mockClock.Add(time.Second) + close(stepC.release) + + <-stepA.started + mockClock.Add(time.Second) + close(stepA.release) + + <-stepB.started + mockClock.Add(time.Second) + close(stepB.release) + + err := <-done + require.Error(t, err) + + var errW flow.ErrWorkflow + require.ErrorAs(t, err, &errW) + + output := errW.Error() + posC := strings.Index(output, "C-first") + posA := strings.Index(output, "A-second") + posB := strings.Index(output, "B-third") + + assert.Greater(t, posA, posC, "A-second should appear after C-first") + assert.Greater(t, posB, posA, "B-third should appear after A-second") +} + +func TestErrWorkflowTieBreakByName(t *testing.T) { + // Two parallel steps fail at the same clock tick → alphabetical order. + mockClock := clock.NewMock() + stepZ := newSerialStep("Z-step", fmt.Errorf("Z failed")) + stepA := newSerialStep("A-step", fmt.Errorf("A failed")) + + w := &flow.Workflow{Clock: mockClock} + w.Add(flow.Step(stepZ), flow.Step(stepA)) + + done := make(chan error, 1) + go func() { done <- w.Do(context.Background()) }() + + <-stepZ.started + <-stepA.started + mockClock.Add(time.Second) // both get same FinishedAt + close(stepZ.release) + close(stepA.release) + + err := <-done + require.Error(t, err) + + var errW flow.ErrWorkflow + require.ErrorAs(t, err, &errW) + + output := errW.Error() + posA := strings.Index(output, "A-step") + posZ := strings.Index(output, "Z-step") + assert.Less(t, posA, posZ, "A-step should appear before Z-step (tie-break by name)") +} From 904e37e3d34be4291284abfbc07d9e1352a44277 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 05:59:24 +0000 Subject: [PATCH 02/31] test: verify StepResult.FinishedAt is populated after step execution --- execution_model_test.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/execution_model_test.go b/execution_model_test.go index 26c0f08..c6b7c17 100644 --- a/execution_model_test.go +++ b/execution_model_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/benbjohnson/clock" "github.com/stretchr/testify/assert" ) @@ -215,3 +216,16 @@ func TestConcurrentExecution(t *testing.T) { "expected at least 2 steps to run concurrently") }) } + +func TestStepResultFinishedAtPopulated(t *testing.T) { + mockClock := clock.NewMock() + step := Func("test-step", func(ctx context.Context) error { return nil }) + w := &Workflow{Clock: mockClock} + w.Add(Step(step)) + + err := w.Do(context.Background()) + assert.NoError(t, err) + + result := w.StateOf(step).GetStepResult() + assert.False(t, result.FinishedAt.IsZero(), "FinishedAt should be set after step execution") +} From ef1e56f64d7222d0639cf4cdb5baefc26d7e7d66 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 06:06:59 +0000 Subject: [PATCH 03/31] chore: archive errworkflow-execution-order change, sync specs to main --- .../.openspec.yaml | 2 + .../design.md | 54 ++++ .../proposal.md | 27 ++ .../specs/execution-model/spec.md | 73 +++++ .../tasks.md | 31 +++ .../contrib-common-steps/.openspec.yaml | 2 + .../changes/contrib-common-steps/proposal.md | 64 +++++ .../changes/dynamic-fanout/.openspec.yaml | 2 + openspec/changes/dynamic-fanout/proposal.md | 89 ++++++ .../heartbeat-and-liveness/.openspec.yaml | 2 + .../heartbeat-and-liveness/proposal.md | 54 ++++ .../observability-adapters/.openspec.yaml | 2 + .../observability-adapters/proposal.md | 210 +++++++++++++++ openspec/changes/pause-resume/.openspec.yaml | 2 + openspec/changes/pause-resume/proposal.md | 78 ++++++ .../workflow-checkpointing/.openspec.yaml | 2 + .../workflow-checkpointing/proposal.md | 255 ++++++++++++++++++ openspec/specs/execution-model/spec.md | 74 +++++ 18 files changed, 1023 insertions(+) create mode 100644 openspec/changes/archive/2026-05-05-errworkflow-execution-order/.openspec.yaml create mode 100644 openspec/changes/archive/2026-05-05-errworkflow-execution-order/design.md create mode 100644 openspec/changes/archive/2026-05-05-errworkflow-execution-order/proposal.md create mode 100644 openspec/changes/archive/2026-05-05-errworkflow-execution-order/specs/execution-model/spec.md create mode 100644 openspec/changes/archive/2026-05-05-errworkflow-execution-order/tasks.md create mode 100644 openspec/changes/contrib-common-steps/.openspec.yaml create mode 100644 openspec/changes/contrib-common-steps/proposal.md create mode 100644 openspec/changes/dynamic-fanout/.openspec.yaml create mode 100644 openspec/changes/dynamic-fanout/proposal.md create mode 100644 openspec/changes/heartbeat-and-liveness/.openspec.yaml create mode 100644 openspec/changes/heartbeat-and-liveness/proposal.md create mode 100644 openspec/changes/observability-adapters/.openspec.yaml create mode 100644 openspec/changes/observability-adapters/proposal.md create mode 100644 openspec/changes/pause-resume/.openspec.yaml create mode 100644 openspec/changes/pause-resume/proposal.md create mode 100644 openspec/changes/workflow-checkpointing/.openspec.yaml create mode 100644 openspec/changes/workflow-checkpointing/proposal.md diff --git a/openspec/changes/archive/2026-05-05-errworkflow-execution-order/.openspec.yaml b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/.openspec.yaml new file mode 100644 index 0000000..eebe4d8 --- /dev/null +++ b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-05 diff --git a/openspec/changes/archive/2026-05-05-errworkflow-execution-order/design.md b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/design.md new file mode 100644 index 0000000..bcdc89b --- /dev/null +++ b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/design.md @@ -0,0 +1,54 @@ +## Context + +`ErrWorkflow` is defined as `map[Steper]StepResult`. The `Error()` method iterates this map directly, producing non-deterministic output. `StepResult` currently holds only `Status StepStatus` and `Err error` — no timing information. + +The workflow already has an injected `clock.Clock` field (from `github.com/benbjohnson/clock`) used for retry/timeout, so clock-based timestamping is available without new dependencies. + +Step goroutines terminate via a shared `defer` block in `workflow.go` that calls `state.SetStatus(status)` and `state.SetError(err)` before signalling. This is the single canonical termination point — the right place to record `FinishedAt`. + +## Goals / Non-Goals + +**Goals:** +- `ErrWorkflow.Error()` produces deterministic, execution-finish-time-ordered output. +- `ErrWorkflow.Unwrap()` returns errors in the same order. +- `StepResult.FinishedAt` is populated for all terminated steps and available to `Condition` functions and external observers. +- Tests remain deterministic via the existing `clock.Clock` injection. + +**Non-Goals:** +- Recording `StartedAt` — out of scope for this change. +- Changing the `ErrWorkflow` underlying type from `map` to an ordered structure — the map stays; sorting happens only at output time. +- Displaying the timestamp in `Error()` output — ordering is the goal, not showing timestamps to users. + +## Decisions + +### D1: Add `FinishedAt time.Time` to `StepResult` (not to `State` separately) + +`StepResult` is the public snapshot type returned from `GetStepResult()`, passed into `Condition` functions, and embedded in `ErrWorkflow`. Adding the field here makes it available to all consumers with no additional API surface. + +Alternative considered: add a separate `finishedAt` field to `State` only and use it just for sorting in `ErrWorkflow.Error()`. Rejected — this hides useful information from `Condition` authors and duplicates the timestamp concept. + +### D2: Record timestamp at the step goroutine's defer, using `w.Clock.Now()` + +The defer in the step goroutine is the single termination point for all outcomes (success, failure, cancel, panic). Recording `FinishedAt` there — alongside `SetStatus` and `SetError` — is atomic from the workflow's perspective and covers all code paths. + +`w.Clock.Now()` keeps tests deterministic; tests using `clock.NewMock()` already control time for retry/timeout assertions. + +### D3: Sort in `Error()` and `Unwrap()` by `FinishedAt` ascending; zero-time steps last, then by `String(step)` for stability + +Steps that never executed (Skipped by condition before running, Pending) will have zero `FinishedAt`. They sort to the end. Among steps with the same timestamp (possible with mocked clocks or extremely fast steps), `String(step)` provides a stable secondary sort. This matches the mental model: "what ran first, then what failed." + +Alternative: sort only in `Error()`, leave `Unwrap()` unordered. Rejected — consistency between the two methods avoids surprising behavior when callers use `errors.As`/`errors.Is` traversal order for logic. + +### D4: No new exported helper to set `FinishedAt` — set it directly in the workflow goroutine + +`State` already has unexported setters (`SetStatus`, `SetError`). We add `SetFinishedAt(t time.Time)` following the same pattern, called only from the workflow's internal goroutine. This keeps the mutation path narrow. + +## Risks / Trade-offs + +- **Struct literal breakage**: Any code constructing `StepResult{val1, val2}` positionally (without field names) will fail to compile after the new field is added. This is caught at compile time and is trivially fixable. It is unlikely in practice since `StepResult` is a library type. +- **Mock clock in condition tests**: `Condition` unit tests that construct `StepResult` manually (e.g., `condition_test.go`) will need to populate `FinishedAt` explicitly if they care about ordering. Tests that don't check `ErrWorkflow.Error()` output need no changes. +- **Tied to wall clock resolution**: On systems with low-resolution clocks, two steps terminating in the same tick will fall back to name-based ordering. This is acceptable — the output is still deterministic. + +## Migration Plan + +No migration needed. `FinishedAt` is an additive field. Existing `StepResult` values constructed by field name (`StepResult{Status: ..., Err: ...}`) get a zero `FinishedAt` and continue to compile. The sort in `Error()` is purely cosmetic — no behavioral changes to workflow execution. diff --git a/openspec/changes/archive/2026-05-05-errworkflow-execution-order/proposal.md b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/proposal.md new file mode 100644 index 0000000..cc0a23a --- /dev/null +++ b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/proposal.md @@ -0,0 +1,27 @@ +## Why + +When a workflow fails, `ErrWorkflow.Error()` outputs steps in random order because `ErrWorkflow` is a `map[Steper]StepResult` and Go map iteration is non-deterministic. This makes failure traces hard to read and impossible to compare across runs, hindering debugging. + +## What Changes + +- Add `FinishedAt time.Time` field to `StepResult` to record when each step terminated. +- Record the finish timestamp (using the workflow's injected `clock.Clock`) in the step goroutine, just before signalling status change. +- `ErrWorkflow.Error()` sorts steps by `FinishedAt` ascending (steps that never ran sort last, then by name for stability). +- `ErrWorkflow.Unwrap()` returns errors in the same sorted order for consistency. + +## Capabilities + +### New Capabilities + +_(none)_ + +### Modified Capabilities + +- `execution-model`: `StepResult` gains a `FinishedAt time.Time` field populated at step termination; `ErrWorkflow.Error()` and `Unwrap()` now produce output in execution-finish order instead of random map iteration order. + +## Impact + +- `StepResult` gains a new exported field — additive, not breaking for existing code that constructs or reads `StepResult` by field name. Code using `StepResult{Status: ..., Err: ...}` struct literals (without field names) would break at compile time, but that pattern is unlikely and easily fixed. +- `Condition` functions receive `map[Steper]StepResult` — the new field is available to condition authors at no extra cost. +- The workflow's `clock.Clock` field (already present) is used for timestamping, keeping tests deterministic. +- No new dependencies. No API removals. diff --git a/openspec/changes/archive/2026-05-05-errworkflow-execution-order/specs/execution-model/spec.md b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/specs/execution-model/spec.md new file mode 100644 index 0000000..3974ee3 --- /dev/null +++ b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/specs/execution-model/spec.md @@ -0,0 +1,73 @@ +## ADDED Requirements + +### Requirement: StepResult carries finish timestamp + +`StepResult` SHALL include a `FinishedAt time.Time` field that records the moment +the step goroutine transitioned to a terminal status (`Succeeded`, `Failed`, `Canceled`, +or `Skipped`). + +The timestamp SHALL be recorded using the Workflow's injected `clock.Clock`, so that +tests using a mock clock produce deterministic values. + +Steps that are never executed (e.g., never transitioned to `Running`) SHALL have a +zero `FinishedAt` value. + +#### Scenario: Succeeded step has FinishedAt set +- **WHEN** `step.Do(ctx)` returns `nil` and the step transitions to `Succeeded` +- **THEN** `StepResult.FinishedAt` is set to `clock.Now()` at the moment of transition + +#### Scenario: Failed step has FinishedAt set +- **WHEN** `step.Do(ctx)` returns a non-nil error and the step transitions to `Failed` +- **THEN** `StepResult.FinishedAt` is set to `clock.Now()` at the moment of transition + +#### Scenario: Canceled step has FinishedAt set +- **WHEN** a step is canceled and transitions to `Canceled` +- **THEN** `StepResult.FinishedAt` is set to `clock.Now()` at the moment of transition + +#### Scenario: Skipped step has FinishedAt set +- **WHEN** a step's Condition evaluates to `Skipped` and the step never runs +- **THEN** `StepResult.FinishedAt` is set to `clock.Now()` at the moment of the skip transition + +#### Scenario: Never-executed step has zero FinishedAt +- **WHEN** a step remains `Pending` at the end of workflow execution +- **THEN** `StepResult.FinishedAt` is the zero value of `time.Time` + +#### Scenario: FinishedAt available in Condition functions +- **WHEN** a Condition function receives `map[Steper]StepResult` for upstream steps +- **THEN** `FinishedAt` is populated for all terminated upstream steps and available to the condition logic + +## MODIFIED Requirements + +### Requirement: ErrWorkflow error output ordering + +`ErrWorkflow.Error()` SHALL output steps sorted by `StepResult.FinishedAt` in ascending +order (earliest-finishing step first), so that the error message reflects the execution +timeline. + +Steps with a zero `FinishedAt` (i.e., steps that never executed) SHALL appear last. + +When two or more steps share an identical `FinishedAt` value, they SHALL be sorted +by their string name (`flow.String(step)`) in ascending lexicographic order to produce +a stable, deterministic output. + +`ErrWorkflow.Unwrap()` SHALL return errors in the same sorted order. + +#### Scenario: Single-step workflow failure output +- **WHEN** a workflow with one failed step produces `ErrWorkflow` +- **THEN** `ErrWorkflow.Error()` contains exactly that step's output + +#### Scenario: Multi-step output is sorted by finish time +- **WHEN** steps A, B, C finish in that order (A earliest, C latest) +- **THEN** `ErrWorkflow.Error()` lists them A, B, C regardless of map iteration order + +#### Scenario: Never-executed steps appear last +- **WHEN** some steps have zero `FinishedAt` (never ran) and others have non-zero timestamps +- **THEN** `ErrWorkflow.Error()` lists all non-zero-timestamp steps first, zero-timestamp steps last + +#### Scenario: Tie-breaking by name +- **WHEN** two steps have identical `FinishedAt` values +- **THEN** `ErrWorkflow.Error()` lists them in ascending lexicographic order by step name + +#### Scenario: Unwrap order matches Error order +- **WHEN** `ErrWorkflow.Unwrap()` is called +- **THEN** the returned error slice is in the same order as `ErrWorkflow.Error()` output diff --git a/openspec/changes/archive/2026-05-05-errworkflow-execution-order/tasks.md b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/tasks.md new file mode 100644 index 0000000..ba87c73 --- /dev/null +++ b/openspec/changes/archive/2026-05-05-errworkflow-execution-order/tasks.md @@ -0,0 +1,31 @@ +## 1. Extend StepResult with FinishedAt + +- [ ] 1.1 Add `FinishedAt time.Time` field to `StepResult` struct in `error.go` +- [ ] 1.2 Add `SetFinishedAt(t time.Time)` method to `State` in `state.go`, following the same pattern as `SetStatus`/`SetError` +- [ ] 1.3 Add `GetFinishedAt() time.Time` method to `State` if needed for symmetry + +## 2. Record Timestamp at Step Termination + +- [ ] 2.1 In the step goroutine defer block in `workflow.go`, call `state.SetFinishedAt(w.Clock.Now())` just before `state.SetStatus(status)` and `state.SetError(err)` +- [ ] 2.2 For condition-evaluated skips (where a step is skipped without running), ensure `SetFinishedAt` is also called at the point the skip status is assigned in `tick()` + +## 3. Sort ErrWorkflow Output + +- [ ] 3.1 Add `sort` import to `error.go` +- [ ] 3.2 Implement a helper `sortedSteps(e ErrWorkflow) []Steper` that returns steps sorted by `FinishedAt` ascending, zero-time last, tie-broken by `String(step)` lexicographically +- [ ] 3.3 Rewrite `ErrWorkflow.Error()` to use `sortedSteps` for iteration +- [ ] 3.4 Rewrite `ErrWorkflow.Unwrap()` to use `sortedSteps` for iteration + +## 4. Tests + +- [ ] 4.1 Add a test asserting `StepResult.FinishedAt` is populated after workflow execution (use `clock.NewMock()` and advance time between steps) +- [ ] 4.2 Add a test for `ErrWorkflow.Error()` output ordering: run a 3-step serial workflow, verify output is in execution order regardless of step name sort order +- [ ] 4.3 Add a test for tie-breaking: construct an `ErrWorkflow` with two steps sharing identical `FinishedAt`, verify alphabetical order in output +- [ ] 4.4 Add a test for zero-`FinishedAt` steps appearing last in output +- [ ] 4.5 Verify existing tests in `condition_test.go` that construct `StepResult` manually still compile and pass (update literals to use field names if needed) + +## 5. Verify + +- [ ] 5.1 Run `go build ./...` — no compile errors +- [ ] 5.2 Run `go test ./...` — all tests pass +- [ ] 5.3 Run `go vet ./...` — no issues diff --git a/openspec/changes/contrib-common-steps/.openspec.yaml b/openspec/changes/contrib-common-steps/.openspec.yaml new file mode 100644 index 0000000..905325f --- /dev/null +++ b/openspec/changes/contrib-common-steps/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-04 diff --git a/openspec/changes/contrib-common-steps/proposal.md b/openspec/changes/contrib-common-steps/proposal.md new file mode 100644 index 0000000..3f26ab8 --- /dev/null +++ b/openspec/changes/contrib-common-steps/proposal.md @@ -0,0 +1,64 @@ +## Why + +go-workflow's root module is intentionally minimal: it provides the orchestration framework +and nothing more. But users repeatedly solve the same problems on top of it — waiting for a +signal, rate-limiting against an external API, sending a notification when something fails, +implementing a human-approval gate, etc. + +Today each user reinvents these patterns from scratch. A `contrib/` subtree of submodules +can provide battle-tested, reusable step implementations while keeping the root module lean +and dependency-free. + +This mirrors the Go ecosystem pattern (e.g., `golang.org/x/...`, OpenTelemetry's +`go.opentelemetry.io/contrib/...`). + +## What Changes + +- A new `contrib/` directory at the repo root, containing independent Go modules. +- Each submodule has its own `go.mod` and can pull in external dependencies without + affecting users of the root module. +- The root module has zero new dependencies. + +## Capabilities + +### New Capabilities + +**Initial candidate submodules** (exact list TBD, these are ideas): + +- **`contrib/steps`** — general-purpose reusable steps: + - `WaitSignal[T]` — a step that blocks until a value is sent on a channel, useful for + human-approval gates or inter-workflow coordination. + - `Delay` — a step that waits for a fixed duration (respects context cancellation). + - `Gate` — a step that waits until a predicate becomes true (polls with interval + timeout). + +- **`contrib/notify`** — notification steps (Slack, PagerDuty, email, webhook) to use in + `AnyFailed` / cleanup branches. + +- **`contrib/ratelimit`** — a `BeforeStep` callback factory that applies token-bucket or + leaky-bucket rate limiting across a group of steps (useful when steps call the same + external API). + +- **`contrib/otel`** — OpenTelemetry integration: an `EventSink` (once that feature lands) + plus a `BeforeStep`/`AfterStep` pair that creates trace spans and records metrics. + +### Design Principles for Contrib Steps + +1. Every contrib step implements `flow.Steper` — fully composable with the core framework. +2. Steps are configurable via struct fields, not functional options (keeps it simple). +3. Each submodule depends only on the root `go-workflow` module plus minimal external deps. +4. Steps are tested independently; no contribution required in the root module's test suite. + +### Open Questions + +- Flat `contrib/steps` vs. per-concern submodules (`contrib/signal`, `contrib/ratelimit`)? + Single `contrib/steps` is simpler initially; can split later if dependency sets diverge. + +- Should contrib steps live in this repo or a separate `go-workflow-contrib` repo? + Same repo is easier to keep in sync with API changes; separate repo avoids repo bloat. + Leaning toward same repo with clear directory boundary. + +## Impact + +- New `contrib/` directory with one or more `go.mod` files. +- No changes to root module code or go.mod. +- CI: add build/test jobs for each contrib submodule. diff --git a/openspec/changes/dynamic-fanout/.openspec.yaml b/openspec/changes/dynamic-fanout/.openspec.yaml new file mode 100644 index 0000000..905325f --- /dev/null +++ b/openspec/changes/dynamic-fanout/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-04 diff --git a/openspec/changes/dynamic-fanout/proposal.md b/openspec/changes/dynamic-fanout/proposal.md new file mode 100644 index 0000000..8642eb0 --- /dev/null +++ b/openspec/changes/dynamic-fanout/proposal.md @@ -0,0 +1,89 @@ +## Why + +Today go-workflow requires all steps to be declared before `workflow.Do()` is called. The DAG +is static. This is fine for most cases, but a common pattern cannot be expressed cleanly: +a step produces a list of items at runtime, and you want to process each item in parallel +as an independent step — the classic fan-out / map-reduce pattern. + +Current workarounds: +1. Pre-allocate a fixed-size slice of steps with the maximum possible items — wasteful and + requires knowing the count upfront. +2. Use a `SubWorkflow` step that builds and runs its own inner workflow dynamically — works, + but the inner workflow is invisible to the outer workflow's status tracking and event sink. +3. Use a single step that processes all items sequentially or with manual goroutines — loses + the framework's retry-per-item, timeout-per-item, and observability guarantees. + +The goal is to let a step declare children at runtime, which the workflow then schedules +as first-class steps. + +## What Changes + +A new optional interface `Spawner` (name TBD) that a step can implement. After the step's +`Do()` returns successfully, the workflow calls `Spawn()` to get new steps, injects them into +the running DAG, and schedules them with the step as their upstream. + +## Proposed Approaches + +### Approach A — `Spawner` interface (post-Do hook) + +```go +type Spawner interface { + Spawn() []Builder +} +``` + +After `Do()` succeeds, the workflow calls `Spawn()` and merges the returned builders into +the live DAG. Simple, zero new concepts, fully backward compatible. + +Downside: `Spawn()` is called after `Do()`, so items must be stored on the step struct +between `Do()` and `Spawn()` — minor coupling, but acceptable. + +### Approach B — Dynamic `Add()` during `Do()` + +Pass a "workflow handle" into `Do()` via context: + +```go +handle := flow.WorkflowHandle(ctx) +handle.Add(flow.Step(newStep)) +``` + +More flexible, but context-threading is implicit and the concurrency story is tricky +(adding steps while the scheduler tick is running). + +### Approach C — `Builder` result from `Do()` + +Change `Steper` to `Do(context.Context) (Builder, error)` — spawned steps are the return +value. Clean, but **breaks the existing interface** — too invasive. + +### Recommendation + +Start with **Approach A**. It is the least invasive, requires no interface changes, and +covers the primary use case. Approach B can be layered on top later via a context value. + +## Capabilities + +### New Capabilities (Approach A) + +- `Spawner` interface: if a step implements `Spawn() []Builder`, the workflow calls it after + a successful `Do()` and schedules the returned steps as downstream of the spawning step. +- Spawned steps participate in all normal workflow features: retry, timeout, condition, event sink. +- `workflow.Do()` does not return until all spawned steps (and their descendants) are terminated. + +### Open Questions + +- What if `Spawn()` returns a step that was already in the workflow? Should it merge configs + or panic? Probably merge (same semantics as `workflow.Add()`). +- Should spawned steps be visible in `workflow.Steps()` and the event sink before they run? + Yes — once added to the live DAG they should behave like any other step. +- Should `Spawn()` be called on retry too, or only on final success? + Only on final success — otherwise retries produce duplicate children. +- Error handling: if `Spawn()` itself returns an error... it doesn't in this design (returns + `[]Builder`). Errors surface through the spawned steps' own `Do()` calls. + +## Impact + +- New `Spawner` interface in `step.go` (or `workflow.go`). +- `workflow.go` `tick()` or post-`runStep` logic — check if step implements `Spawner`, + call `Spawn()`, merge result into `w.steps`. +- New spec: `openspec/specs/dynamic-fanout/spec.md`. +- No breaking changes. diff --git a/openspec/changes/heartbeat-and-liveness/.openspec.yaml b/openspec/changes/heartbeat-and-liveness/.openspec.yaml new file mode 100644 index 0000000..905325f --- /dev/null +++ b/openspec/changes/heartbeat-and-liveness/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-04 diff --git a/openspec/changes/heartbeat-and-liveness/proposal.md b/openspec/changes/heartbeat-and-liveness/proposal.md new file mode 100644 index 0000000..a12b9ac --- /dev/null +++ b/openspec/changes/heartbeat-and-liveness/proposal.md @@ -0,0 +1,54 @@ +## Why + +Long-running steps (large file processing, external API polling, batch jobs) can silently stall +inside `Do()` — the goroutine is alive but making no progress. Currently go-workflow has no way +to detect this. Temporal solves it with an Activity Heartbeat: the step periodically signals +liveness, and the runtime kills and retries it if heartbeats stop. + +go-workflow should provide an equivalent mechanism that fits its single-process, in-memory model. + +## What Changes + +- A `Heartbeat` function that a step calls periodically during `Do()` to signal it is alive. +- A `HeartbeatTimeout` field in `RetryOption` (or `StepOption`) — if no heartbeat arrives within + this duration, the step's context is canceled and the step is treated as failed (and retried + if retry is configured). +- Optionally, a heartbeat payload so a retried step can recover its last known progress + (similar to Temporal's `activity.GetHeartbeatDetails`). + +## Capabilities + +### New Capabilities + +- **Heartbeat signaling**: step calls `flow.Heartbeat(ctx)` or `flow.HeartbeatWith(ctx, payload)` + inside its `Do()` loop. The call is a no-op if no heartbeat timeout is configured, so it is + safe to add unconditionally. + +- **Liveness enforcement**: when `HeartbeatTimeout` is set on a step, the workflow starts a + watchdog that cancels the step's context if `HeartbeatTimeout` elapses without a heartbeat. + The step's `ctx.Done()` fires, `Do()` returns `context.Canceled`, and normal retry logic applies. + +- **Progress recovery**: `flow.LastHeartbeat[T](ctx)` returns the payload from the most recent + heartbeat of the previous attempt, so a retried step can resume from where it left off instead + of restarting from zero. + +### Open Questions + +- Should `HeartbeatTimeout` live in `RetryOption` (only meaningful with retry) or `StepOption` + (independent — you might want liveness detection even without retry)? + Leaning toward `StepOption` so liveness and retry are orthogonal concerns. + +- Payload type: generic `any` with the caller doing type assertion, or a typed generic helper + `flow.HeartbeatWithTyped[T]`? Generic is cleaner but requires Go 1.21+ type inference. + +- Should the watchdog goroutine be started per-step or shared across steps? + Per-step is simpler and avoids coordination overhead. + +## Impact + +- `StepOption` or `RetryOption` — add `HeartbeatTimeout *time.Duration`. +- New exported functions: `Heartbeat(ctx)`, `HeartbeatWith(ctx, payload)`, + `LastHeartbeat[T](ctx)`. +- `workflow.go` / `runStep` — start watchdog goroutine when `HeartbeatTimeout` is set. +- New spec: `openspec/specs/heartbeat/spec.md`. +- No breaking changes; steps that do not call `Heartbeat` are unaffected. diff --git a/openspec/changes/observability-adapters/.openspec.yaml b/openspec/changes/observability-adapters/.openspec.yaml new file mode 100644 index 0000000..905325f --- /dev/null +++ b/openspec/changes/observability-adapters/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-04 diff --git a/openspec/changes/observability-adapters/proposal.md b/openspec/changes/observability-adapters/proposal.md new file mode 100644 index 0000000..528dee9 --- /dev/null +++ b/openspec/changes/observability-adapters/proposal.md @@ -0,0 +1,210 @@ +## Why + +go-workflow has no built-in observability. When a workflow is running inside a production binary +it is a black box: operators cannot see which steps are running, how long they have been running, +how many retries have occurred, or where failures happened. Debugging requires either reading logs +(unstructured) or adding custom `BeforeStep`/`AfterStep` callbacks everywhere. + +The goal is to make a running go-workflow process a first-class citizen in standard observability +stacks — without writing any custom frontend and without adding heavy dependencies to the root +module. + +This change depends on the `structured-event-sink` change landing first, since the adapters +are implemented as `EventSink` consumers. + +## What Changes + +Three observability adapters, each as an independent submodule under `contrib/`: + +1. **`contrib/otel`** — OpenTelemetry traces, enabling Gantt-chart visualization in Jaeger, + Grafana Tempo, or any OTLP-compatible backend. +2. **`contrib/prometheus`** — Prometheus metrics, enabling time-series dashboards in Grafana. +3. **`contrib/dot`** — A live Graphviz DOT HTTP endpoint for lightweight development-time + visualization, zero external dependencies. + +## Capabilities + +--- + +### `contrib/otel` — Trace-based visualization + +Each workflow execution becomes an OTel trace. Each step becomes a child span. This maps +naturally onto Jaeger / Grafana Tempo's Gantt-chart UI, which shows parallel steps side by +side and serial steps in sequence — exactly the shape of a DAG execution. + +**Span structure — flat workflow:** + +``` +workflow.Do() ← root span (name = workflow type or user-supplied name) + ├── FetchData ← child span (name = flow.String(step)) + ├── ProcessA ← child span, starts after FetchData span ends + └── ProcessB ← child span, parallel with ProcessA + └── retry attempt 2 ← span event on ProcessB span (not a child span) +``` + +**Span structure — nested workflow (Workflow-as-a-Step):** + +Since go-workflow supports using a `Workflow` as a `Step` inside another `Workflow`, the +`EventSink` must handle the nesting correctly. The key insight is that OTel span context +propagates through `context.Context`: the inner workflow's `Do(ctx)` receives a `ctx` that +already carries the outer step's span as the active span. + +The adapter should therefore **never unconditionally create a root span**. Instead: + +``` +outer.Do(ctx) ← root span (ctx has no parent span) + ├── FetchData ← child span of root + └── InnerWorkflow ← child span of root (this is the step span) + ├── SubStepA ← child span of InnerWorkflow span + └── SubStepB ← child span of InnerWorkflow span +``` + +This works naturally because: +1. When the outer workflow starts the `InnerWorkflow` step, it calls `BeforeStep` with a ctx + that carries the outer root span. +2. The `EventSink` creates a span for `InnerWorkflow` as a child of whatever is active in ctx. +3. The inner workflow's `Do(ctx)` receives that span in ctx; inner step spans become its children. + +The adapter must inject the step span into ctx (via `BeforeStep`) so that inner workflows +inherit it. This means the EventSink needs a companion `BeforeStep` callback, not just a +passive sink — or the `WorkflowEvent` must carry the ctx. + +**Open Question**: should `WorkflowEvent` carry the `context.Context` so the EventSink can +both read the parent span and inject a new span into ctx for the step? This requires the +`Started` event to be emitted before `Do()` is called (which aligns with the current +`BeforeStep` timing) and the span to be ended on the terminal event. The structured-event-sink +design should account for this requirement. + +**API sketch:** + +```go +import "github.com/Azure/go-workflow/contrib/otel" + +tp := /* set up your TracerProvider */ +sink := workflowotel.NewEventSink(tp, + workflowotel.WithWorkflowName("my-pipeline"), +) +w := &flow.Workflow{ + EventSink: sink.Sink, + // The adapter also needs a BeforeStep to inject the step span into ctx: + // w.DefaultOption = &flow.StepOption{ /* BeforeStep: sink.BeforeStep */ } + // Exact API TBD based on how EventSink and BeforeStep compose. +} +``` + +**Visualization**: Jaeger UI, Grafana Tempo, or any OTLP trace viewer shows: +- Horizontal Gantt bars per step, colored by status (green = succeeded, red = failed). +- Parallel steps rendered side by side. +- Nested workflows appear as a span with its own children, fully indented under the parent step. +- Retry events visible as annotations on the step's span. +- Total workflow duration as the root span duration. + +**Span attributes (standard OTel semantic conventions where applicable):** + +- `workflow.step` — step name +- `workflow.step.attempt` — attempt number (1-based) +- `workflow.step.status` — terminal status string +- `error.type`, `error.message` — populated on Failed / Canceled + +--- + +### `contrib/prometheus` — Metrics-based dashboards + +Exposes Prometheus metrics that can be scraped by any Prometheus-compatible system and +visualized in Grafana. + +**Metrics:** + +| Metric | Type | Labels | Description | +|--------|------|--------|-------------| +| `workflow_step_duration_seconds` | Histogram | `step`, `status` | Duration of each step attempt | +| `workflow_step_total` | Counter | `step`, `status` | Total step terminations by status | +| `workflow_step_retries_total` | Counter | `step` | Total retry attempts | +| `workflow_running_steps` | Gauge | `step` | Currently running steps | + +**API sketch:** + +```go +import "github.com/Azure/go-workflow/contrib/prometheus" + +reg := prometheus.NewRegistry() +w := &flow.Workflow{ + EventSink: workflowprom.NewEventSink(reg, + workflowprom.WithNamespace("myapp"), + ), +} +// expose reg via /metrics as usual +``` + +**Visualization in Grafana:** +- **State timeline panel**: each step as a row, colored by last known status over time. +- **Time series panel**: step duration trends, retry rate over time. +- **Stat panel**: current running step count, total failed steps. + +Grafana dashboard JSON can be shipped in `contrib/prometheus/dashboards/` so users can +import it directly. + +--- + +### `contrib/dot` — Live DAG visualization (development tool) + +An HTTP handler that renders the current workflow state as a Graphviz DOT document. +Intended for development and debugging, not production monitoring. + +**HTTP endpoint**: mount anywhere, e.g. `GET /debug/workflow` + +**DOT output example:** +```dot +digraph workflow { + rankdir=LR + "FetchData" [shape=box color=green label="FetchData\nSucceeded 1.24s"] + "ProcessA" [shape=box color=blue label="ProcessA\nRunning 0.8s..."] + "ProcessB" [shape=box color=gray label="ProcessB\nPending"] + "FetchData" -> "ProcessA" + "FetchData" -> "ProcessB" +} +``` + +Node colors: gray = Pending, blue = Running, green = Succeeded, red = Failed, +orange = Canceled, yellow = Skipped. + +**API sketch:** + +```go +import "github.com/Azure/go-workflow/contrib/dot" + +mux.Handle("/debug/workflow", workflowdot.Handler(w)) +``` + +Rendering options for the caller: +- In browser: serve the DOT source and use [d3-graphviz](https://github.com/magjac/d3-graphviz) + to render in-page (the handler can serve a small self-contained HTML page). +- In terminal: `curl localhost:8080/debug/workflow | dot -Tsvg | open -f -a Safari`. +- In VS Code: paste into the Graphviz preview extension. + +**No external dependencies** beyond the root go-workflow module. `dot` format is plain text. + +--- + +## Dependencies + +| Adapter | External deps | +|---------|--------------| +| `contrib/otel` | `go.opentelemetry.io/otel` (already common in Go services) | +| `contrib/prometheus` | `github.com/prometheus/client_golang` | +| `contrib/dot` | none (stdlib only) | + +All deps are in submodule `go.mod` files. Root module gains zero new dependencies. + +## Prerequisite + +`structured-event-sink` change must land first. All three adapters are pure `EventSink` +consumers and require no other changes to the core. + +## Impact + +- New `contrib/otel/`, `contrib/prometheus/`, `contrib/dot/` directories, each with `go.mod`. +- No changes to root module. +- CI: add build/test jobs for each contrib submodule. +- Optionally: a `contrib/prometheus/dashboards/workflow.json` Grafana dashboard to ship + with the adapter. diff --git a/openspec/changes/pause-resume/.openspec.yaml b/openspec/changes/pause-resume/.openspec.yaml new file mode 100644 index 0000000..905325f --- /dev/null +++ b/openspec/changes/pause-resume/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-04 diff --git a/openspec/changes/pause-resume/proposal.md b/openspec/changes/pause-resume/proposal.md new file mode 100644 index 0000000..630a172 --- /dev/null +++ b/openspec/changes/pause-resume/proposal.md @@ -0,0 +1,78 @@ +## Why + +Currently a running workflow can only be stopped by canceling the context, which is destructive — +it cancels all in-flight steps and the workflow returns an error. There is no way to temporarily +pause execution (e.g., during a maintenance window, or when an operator notices something is wrong +and wants to inspect state before letting it continue) and then resume from exactly where it left off. + +Temporal supports pausing a workflow execution; go-workflow should offer an equivalent that fits +the single-process model. + +## What Changes + +- A `Pause()` method on `Workflow` that suspends scheduling of new steps without canceling + currently-running ones. +- A `Resume()` method that re-enables scheduling and wakes the tick loop. +- Running steps finish normally; paused state means "no new steps will be started until resumed". + +## Proposed Design + +### State machine addition + +Add a `paused` flag (protected by the existing `statusChange` mutex): + +``` +Normal running: paused = false → tick() schedules all runnable steps +Paused: paused = true → tick() skips scheduling, only updates status +Resumed: paused = false → statusChange.Signal() wakes the loop +``` + +### API + +```go +func (w *Workflow) Pause() error // error if workflow is not running +func (w *Workflow) Resume() error // error if workflow is not running or not paused +func (w *Workflow) IsPaused() bool +``` + +### Behavior + +- `Pause()` sets the flag; in-flight step goroutines are NOT interrupted. They will finish + naturally and update status, but the tick loop will not start new goroutines. +- `Resume()` clears the flag and signals `statusChange` so the tick loop wakes and schedules + any steps that became runnable while paused. +- Calling `Pause()` on an already-paused workflow is a no-op (or returns a sentinel error — + TBD based on what's more ergonomic). +- Context cancellation still works orthogonally: canceling the context while paused will + cancel in-flight steps; once they drain, the workflow exits normally through the existing + error path. + +### Optional: Pause with drain + +A `PauseAfterCurrent()` variant that also waits for all currently-running steps to finish +before fully pausing — useful if the caller wants a clean quiescence point before inspecting +state. + +```go +func (w *Workflow) PauseAfterCurrent(ctx context.Context) error +``` + +This blocks until the running goroutine count reaches zero, then sets `paused = true`. + +### Open Questions + +- Should `Pause()` / `Resume()` be goroutine-safe (callable from any goroutine)? Yes — + the only reasonable use case is calling them from outside the workflow goroutine. +- Should there be a `Paused` step status for steps that were Pending when Pause was called? + Probably not — they remain `Pending`. The pause is a scheduler-level concept, not a + step-level status. +- Should the `EventSink` emit a `WorkflowPaused` / `WorkflowResumed` event? + Yes, if the event-sink feature lands. + +## Impact + +- `workflow.go` — add `paused bool` field (under `statusChange.L`), `Pause()`, `Resume()`, + `IsPaused()` methods, guard in `tick()`. +- No changes to `Steper` interface or `StepConfig`. +- New spec: `openspec/specs/pause-resume/spec.md`. +- No breaking changes. diff --git a/openspec/changes/workflow-checkpointing/.openspec.yaml b/openspec/changes/workflow-checkpointing/.openspec.yaml new file mode 100644 index 0000000..905325f --- /dev/null +++ b/openspec/changes/workflow-checkpointing/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-04 diff --git a/openspec/changes/workflow-checkpointing/proposal.md b/openspec/changes/workflow-checkpointing/proposal.md new file mode 100644 index 0000000..9a435f4 --- /dev/null +++ b/openspec/changes/workflow-checkpointing/proposal.md @@ -0,0 +1,255 @@ +## Why + +go-workflow runs entirely in-process. If the process crashes mid-execution, all workflow +progress is lost and the entire workflow must restart from the beginning. For long-running +workflows this is costly — early steps that already succeeded must redo their work. + +Temporal solves this with event sourcing: every activity completion is durably recorded, +and replay reconstructs the workflow state after a crash. go-workflow cannot replicate that +architecture (it has no server, no event log), but it can offer a lightweight equivalent: +**checkpoint-based resume**. + +The idea: serialize the workflow's execution state (step statuses + step business data) to +JSON after each step terminates. On restart, reconstruct the workflow from code as usual, +then call `LoadCheckpoint` to restore state — steps that already succeeded are skipped +automatically. + +## What Cannot Be Persisted + +- **Closures and callbacks** (`Input`, `Output`, `BeforeStep`, `AfterStep`) are Go functions + and cannot be serialized. They are re-registered from code on every run. +- **DAG topology** (`DependsOn` graph) is always reconstructed from code — never persisted. + +What IS persisted: each step's terminal **status** and its **business data** (struct fields), +via standard `encoding/json`. + +## Design + +### MarshalJSON / LoadCheckpoint — not symmetric UnmarshalJSON + +The natural first instinct is `json.Marshal(w)` + `json.Unmarshal(data, w)`. However, pure +`UnmarshalJSON` cannot work on an empty `Workflow` because Go has no type registry — the +framework cannot reconstruct `*FetchData` from the string `"FetchData"` alone. + +The correct pattern is **asymmetric**: + +```go +// 1. Build workflow from code as normal (step instances exist in memory) +w := &flow.Workflow{} +fetchUser := &FetchData{URL: "/user"} +fetchProduct := &FetchData{URL: "/product"} +w.Add( + flow.Step(flow.Named("fetch-user", fetchUser)), + flow.Step(flow.Named("fetch-product", fetchProduct)), + flow.Step(processStep).DependsOn(fetchUser, fetchProduct), +) + +// 2. Load checkpoint — step instances already exist, just restore their state +data, _ := store.Get("job-2026-05-04") +if data != nil { + if err := w.LoadCheckpoint(data); err != nil { /* handle */ } +} + +// 3. Run — steps whose status was restored as Succeeded are skipped automatically +w.Do(ctx) + +// 4. Save checkpoint after each step (via AfterStep or EventSink) +data, _ = json.Marshal(w) +store.Set("job-2026-05-04", data) +``` + +`w.MarshalJSON()` is valid and useful. `w.UnmarshalJSON()` is intentionally not provided +(or is a no-op that returns an error explaining the correct pattern). + +### Step identity — the key problem + +`w.steps` is keyed by `Steper` pointer, which is unique within a process but meaningless +across restarts. Serialization requires a **stable string key** per step. + +The checkpoint key must be determined at `Add()` time and frozen — it must not change +after `Do()` executes and mutates step fields. This rules out deriving the key from +`flow.String()` or any method that reads result fields set during execution. + +The framework resolves a step's checkpoint key at `Add()` time using this priority order: + +1. **`flow.Name(step, "my-name")` wrapper** — explicit static string declared at DAG build + time. This is the canonical approach. `flow.Name` already exists in the codebase. +2. **Type name alone** — valid only if exactly one instance of that type exists in the + workflow (e.g. a single `*ProcessData` → key is `"ProcessData"`). +3. **Multiple instances with no static name** — `MarshalJSON` returns an error, forcing + the user to use `flow.Name()` explicitly. + +**`flow.NameFunc` / `flow.NameStringer` are explicitly excluded from checkpoint key +resolution.** Their names are evaluated at runtime and may change between the time the +workflow is built and the time `MarshalJSON` is called (e.g. a step whose `String()` +depends on a field written by `Do()`). Using them as checkpoint keys would produce +unstable identifiers that break resume. If a step is wrapped with `NameFunc` or +`NameStringer`, `MarshalJSON` returns an error directing the user to use `flow.Name()` +instead. + +```go +// Single instance — naming optional, type name used automatically +w.Add(flow.Step(processStep)) // key: "ProcessData" + +// Multiple instances of same type — static name required +w.Add( + flow.Name(&FetchData{URL: "/user"}, "fetch-user"), + flow.Name(&FetchData{URL: "/product"}, "fetch-product"), +) +``` + +### JSON structure + +`MarshalJSON` only serializes **terminated** steps (Succeeded, Failed, Skipped, Canceled). +Running and Pending steps are omitted entirely. + +This has two important consequences: + +1. **No pause needed for live checkpoint saving.** The only risk of a data race would be + serializing a step whose struct fields are being mutated concurrently inside `Do()`. + Since terminated steps have already returned from `Do()`, their data is stable. + Running steps are skipped, so there is nothing to race on. `json.Marshal(w)` is safe + to call at any point during workflow execution without pausing or locking. + +2. **Running steps always re-run on resume.** This is correct behavior — a step that was + mid-execution when the process crashed has unknown partial state and must restart. + +**Flat workflow:** +```json +{ + "fetch-user": { + "status": "Succeeded", + "data": { "URL": "/user", "Result": "{ ... }" } + }, + "fetch-product": { + "status": "Failed", + "data": { "URL": "/product", "Result": "" }, + "error": "connection refused" + } +} +``` + +**Nested workflow (Workflow-as-a-Step):** + +Sub-workflows use a nested JSON structure that mirrors the actual DAG hierarchy. +The outer entry for the sub-workflow step carries its own status, and a `"steps"` key +holds the inner workflow's terminated steps recursively. + +```json +{ + "fetch-user": { + "status": "Succeeded", + "data": { "URL": "/user", "Result": "{ ... }" } + }, + "InnerWorkflow": { + "status": "Running", + "steps": { + "SubStepA": { "status": "Succeeded", "data": { ... } }, + "SubStepB": { "status": "Failed", "data": { ... }, "error": "..." } + } + } +} +``` + +`LoadCheckpoint` recurses into `"steps"` when restoring a sub-workflow step, matching +inner step instances by name within the inner workflow's own `steps` map. The outer +workflow's key for the sub-workflow step (`"InnerWorkflow"`) is resolved by the same +Named/Name()/type-name priority as any other step. + +Step `data` is serialized via the step's own `json.Marshal`. Steps that do not implement +`json.Marshaler` are serialized with `encoding/json` reflection — this works automatically +for plain data structs. Fields tagged `json:"-"` are excluded as usual. + +### Resume semantics + +| Persisted status | Behavior on resume | +|------------------|--------------------| +| `Succeeded` | Skip — treat as already done, downstream can proceed | +| `Skipped` | Skip — preserve original skip decision | +| `Failed` | Re-run — the step failed last time, try again | +| `Canceled` | Re-run — the cancellation may have been transient | +| `Running` | Re-run — process crashed mid-execution | + +### Saving the checkpoint + +The framework does not dictate where or when checkpoints are saved — that is the caller's +responsibility. Since `MarshalJSON` only serializes terminated steps and is safe to call +concurrently with a running workflow, **no pause is needed**. + +The recommended pattern is to save after each step terminates, via a global `AfterStep` +callback or an `EventSink` consumer (once that feature lands). This gives the finest +checkpoint granularity — one step at a time — without any coordination overhead: + +```go +// Using DefaultOption to attach a global AfterStep to all steps +w.DefaultOption = &flow.StepOption{ + // AfterStep is called after every step terminates (after Do() returns) +} +// Or via EventSink on terminal events — the step's Do() has already returned, +// so json.Marshal(w) is safe to call immediately. +``` + +The caller chooses the storage backend. The framework is storage-agnostic: it only provides +`MarshalJSON` and `LoadCheckpoint`; the caller writes the bytes wherever makes sense +(local file, Redis, SQL, object storage, etc.). + +There is no `WorkflowID` field on `Workflow`. The checkpoint key is entirely the caller's +concern — it can be a job date, a request ID, a UUID, or any stable identifier for this +particular execution. + +### Idempotency requirement + +Checkpointing does not make steps idempotent automatically. **Steps with side effects must +be idempotent themselves** — or check whether the side effect already occurred before +performing it. This is the same requirement Temporal places on Activities. + +For steps whose output is large or cannot be reconstructed from struct fields alone, +the recommended pattern is to persist outputs to external storage inside `Do()` and reload +them in an `Input` callback on resume: + +```go +Step(fetchUser).Input(func(ctx context.Context, s *FetchData) error { + if s.Result != "" { + return nil // already loaded from checkpoint, skip + } + result, err := externalStore.Get(s.URL) + s.Result = result + return err +}) +``` + +## Open Questions + +- Should `LoadCheckpoint` restore `Failed` step data (struct fields) so that a retried step + can inspect what was attempted before? Probably yes — useful for partial-progress patterns. + +- Should `MarshalJSON` include only terminated steps, or all steps including `Pending`? + Only terminated is smaller and sufficient; `Pending` steps have no meaningful data yet. + +- Should there be a `w.ClearCheckpoint()` or similar that resets all persisted statuses + without rebuilding the whole workflow? This would allow forced full reruns without code + changes. + +- For sub-workflows (Workflow-as-a-Step): should the outer workflow's `MarshalJSON` + recursively include inner workflow step states? This requires the inner workflow to also + participate in the checkpoint key namespace, which may require namespacing + (e.g. `"InnerWorkflow.SubStepA"`). + +## Relationship to Other Changes + +- **`heartbeat-and-liveness`**: heartbeat payloads serve intra-run progress recovery + (across retries within one process run). Checkpointing serves inter-run recovery + (across process restarts). The two are orthogonal and composable. +- **`structured-event-sink`**: checkpoint saving can be implemented as an EventSink + consumer for a clean integration point, once that feature lands. + +## Impact + +- `workflow.go` — implement `MarshalJSON` and `LoadCheckpoint([]byte) error`. +- `step.go` / `wrap.go` — add `flow.Named(name string, step S) *NamedStep[S]` wrapper + (analogous to `flow.Mock`); add step key resolution logic. +- No new external dependencies in the root module. +- `contrib/checkpoint/` submodule (optional): ready-made save/load helpers for common + stores (file, Redis, SQL) to reduce boilerplate. +- New spec: `openspec/specs/checkpointing/spec.md`. +- No breaking changes. diff --git a/openspec/specs/execution-model/spec.md b/openspec/specs/execution-model/spec.md index 05cc949..d061415 100644 --- a/openspec/specs/execution-model/spec.md +++ b/openspec/specs/execution-model/spec.md @@ -123,6 +123,42 @@ succeed/skip depending on `SkipAsError`), `Do` returns `nil`. --- +### Requirement: ErrWorkflow error output ordering + +`ErrWorkflow.Error()` SHALL output steps sorted by `StepResult.FinishedAt` in ascending +order (earliest-finishing step first), so that the error message reflects the execution +timeline. + +Steps with a zero `FinishedAt` (i.e., steps that never executed) SHALL appear last. + +When two or more steps share an identical `FinishedAt` value, they SHALL be sorted +by their string name (`flow.String(step)`) in ascending lexicographic order to produce +a stable, deterministic output. + +`ErrWorkflow.Unwrap()` SHALL return errors in the same sorted order. + +#### Scenario: Single-step workflow failure output +- **WHEN** a workflow with one failed step produces `ErrWorkflow` +- **THEN** `ErrWorkflow.Error()` contains exactly that step's output + +#### Scenario: Multi-step output is sorted by finish time +- **WHEN** steps A, B, C finish in that order (A earliest, C latest) +- **THEN** `ErrWorkflow.Error()` lists them A, B, C regardless of map iteration order + +#### Scenario: Never-executed steps appear last +- **WHEN** some steps have zero `FinishedAt` (never ran) and others have non-zero timestamps +- **THEN** `ErrWorkflow.Error()` lists all non-zero-timestamp steps first, zero-timestamp steps last + +#### Scenario: Tie-breaking by name +- **WHEN** two steps have identical `FinishedAt` values +- **THEN** `ErrWorkflow.Error()` lists them in ascending lexicographic order by step name + +#### Scenario: Unwrap order matches Error order +- **WHEN** `ErrWorkflow.Unwrap()` is called +- **THEN** the returned error slice is in the same order as `ErrWorkflow.Error()` output + +--- + ### Requirement: Empty Workflow is a no-op A Workflow with no Steps SHALL return `nil` from `Do` immediately without blocking. @@ -171,3 +207,41 @@ that was never added to the Workflow SHALL also return `nil`. #### Scenario: StateOf unknown step returns nil - **WHEN** `workflow.StateOf(step)` is called for a Step not added to the Workflow - **THEN** it returns `nil` without panicking + +--- + +### Requirement: StepResult carries finish timestamp + +`StepResult` SHALL include a `FinishedAt time.Time` field that records the moment +the step goroutine transitioned to a terminal status (`Succeeded`, `Failed`, `Canceled`, +or `Skipped`). + +The timestamp SHALL be recorded using the Workflow's injected `clock.Clock`, so that +tests using a mock clock produce deterministic values. + +Steps that are never executed (e.g., never transitioned to `Running`) SHALL have a +zero `FinishedAt` value. + +#### Scenario: Succeeded step has FinishedAt set +- **WHEN** `step.Do(ctx)` returns `nil` and the step transitions to `Succeeded` +- **THEN** `StepResult.FinishedAt` is set to `clock.Now()` at the moment of transition + +#### Scenario: Failed step has FinishedAt set +- **WHEN** `step.Do(ctx)` returns a non-nil error and the step transitions to `Failed` +- **THEN** `StepResult.FinishedAt` is set to `clock.Now()` at the moment of transition + +#### Scenario: Canceled step has FinishedAt set +- **WHEN** a step is canceled and transitions to `Canceled` +- **THEN** `StepResult.FinishedAt` is set to `clock.Now()` at the moment of transition + +#### Scenario: Skipped step has FinishedAt set +- **WHEN** a step's Condition evaluates to `Skipped` and the step never runs +- **THEN** `StepResult.FinishedAt` is set to `clock.Now()` at the moment of the skip transition + +#### Scenario: Never-executed step has zero FinishedAt +- **WHEN** a step remains `Pending` at the end of workflow execution +- **THEN** `StepResult.FinishedAt` is the zero value of `time.Time` + +#### Scenario: FinishedAt available in Condition functions +- **WHEN** a Condition function receives `map[Steper]StepResult` for upstream steps +- **THEN** `FinishedAt` is populated for all terminated upstream steps and available to the condition logic From 1f7002d94f041fd64030c77c5ca4baeb8e00c6c4 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 06:26:32 +0000 Subject: [PATCH 04/31] chore: include state/workflow changes and implementation plan --- .../2026-05-05-errworkflow-execution-order.md | 567 ++++++++++++++++++ state.go | 11 + workflow.go | 2 + 3 files changed, 580 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-05-errworkflow-execution-order.md diff --git a/docs/superpowers/plans/2026-05-05-errworkflow-execution-order.md b/docs/superpowers/plans/2026-05-05-errworkflow-execution-order.md new file mode 100644 index 0000000..ac38abe --- /dev/null +++ b/docs/superpowers/plans/2026-05-05-errworkflow-execution-order.md @@ -0,0 +1,567 @@ +# ErrWorkflow Execution Order Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `ErrWorkflow.Error()` output steps in execution-finish order by adding a `FinishedAt time.Time` field to `StepResult` and sorting on output. + +**Architecture:** Add `FinishedAt` to the public `StepResult` struct; record it via a new `State.SetFinishedAt` setter in the single step-goroutine defer that already calls `SetStatus`/`SetError`; for condition-skipped steps record it at the `state.SetStatus(nextStatus)` site in `tick()`; sort in `ErrWorkflow.Error()` and `Unwrap()` using a shared helper. + +**Tech Stack:** Go stdlib (`sort`, `time`), `github.com/benbjohnson/clock` (already a project dependency, used for testable time) + +--- + +## Files + +| File | Change | +|------|--------| +| `error.go` | Add `FinishedAt time.Time` to `StepResult`; add `sort` import; add `sortedSteps` helper; rewrite `Error()` and `Unwrap()` | +| `state.go` | Add `SetFinishedAt(time.Time)` and `GetFinishedAt() time.Time` methods to `State` | +| `workflow.go` | Call `state.SetFinishedAt(w.Clock.Now())` in the step-goroutine defer and in the condition-skip branch of `tick()` | +| `error_test.go` | Add tests for `ErrWorkflow` ordering | +| `execution_model_test.go` | Add test that `StepResult.FinishedAt` is populated after execution | +| `condition_test.go` | Update `StepResult{...}` literals to use field names | + +--- + +## Task 1: Add `FinishedAt` to `StepResult` + +**Files:** +- Modify: `error.go` (around line 106 — the `StepResult` struct) + +- [ ] **Step 1: Add the field** + + In `error.go`, update `StepResult`: + + ```go + // StepResult contains the status and error of a Step. + type StepResult struct { + Status StepStatus + Err error + FinishedAt time.Time + } + ``` + + Also add `"time"` to the import block at the top of `error.go`: + + ```go + import ( + "fmt" + "runtime" + "sort" + "strings" + "time" + ) + ``` + +- [ ] **Step 2: Verify it compiles** + + ```bash + go build ./... + ``` + + Expected: compile error in `condition_test.go` about `StepResult` composite literal — **this is expected**, we'll fix it in Task 4. + +--- + +## Task 2: Add `SetFinishedAt` / `GetFinishedAt` to `State` + +**Files:** +- Modify: `state.go` (after `SetError`/`GetError` around line 33) + +- [ ] **Step 1: Add setter and getter** + + In `state.go`, after the `SetError` method, add: + + ```go + func (s *State) GetFinishedAt() time.Time { + s.RLock() + defer s.RUnlock() + return s.FinishedAt + } + func (s *State) SetFinishedAt(t time.Time) { + s.Lock() + defer s.Unlock() + s.FinishedAt = t + } + ``` + + Add `"time"` to the import block in `state.go`: + + ```go + import ( + "context" + "sync" + "time" + ) + ``` + +- [ ] **Step 2: Verify it compiles (ignoring test errors)** + + ```bash + go build ./... + ``` + + Expected: same compile errors as before in tests only. Core package builds. + +--- + +## Task 3: Record `FinishedAt` at Step Termination + +**Files:** +- Modify: `workflow.go` + +There are **two termination sites** to update: + +### Site A — step goroutine defer (running steps) + +Around line 402–406 in `workflow.go`: + +```go +defer func() { + state.SetStatus(status) + state.SetError(err) + w.unlease() + w.signalStatusChange() +}() +``` + +### Site B — condition-skip in `tick()` (steps skipped before running) + +Around line 387–393: + +```go +if nextStatus := cond(ctx, ups); nextStatus.IsTerminated() { + state.SetStatus(nextStatus) + w.waitGroup.Add(1) + go func() { + defer w.waitGroup.Done() + w.signalStatusChange() + }() + continue +} +``` + +- [ ] **Step 1: Update the step-goroutine defer (Site A)** + + Change the defer to record `FinishedAt` before setting status: + + ```go + defer func() { + state.SetFinishedAt(w.Clock.Now()) + state.SetStatus(status) + state.SetError(err) + w.unlease() + w.signalStatusChange() + }() + ``` + +- [ ] **Step 2: Update the condition-skip branch (Site B)** + + Change the condition-skip block to also record `FinishedAt`: + + ```go + if nextStatus := cond(ctx, ups); nextStatus.IsTerminated() { + state.SetFinishedAt(w.Clock.Now()) + state.SetStatus(nextStatus) + w.waitGroup.Add(1) + go func() { + defer w.waitGroup.Done() + w.signalStatusChange() + }() + continue + } + ``` + +- [ ] **Step 3: Verify it compiles** + + ```bash + go build ./... + ``` + + Expected: same test-only errors, core package builds. + +--- + +## Task 4: Fix `condition_test.go` Struct Literal + +**Files:** +- Modify: `condition_test.go` (around line 28) + +The composite literal `flow.StepResult{Status: ..., Err: ...}` already uses field names, so it will compile without changes. Verify this: + +- [ ] **Step 1: Check the literal** + + ```bash + grep -n "StepResult{" condition_test.go + ``` + + Expected output: + ``` + 28: ups[s] = flow.StepResult{ + ``` + + The block at line 28–31 uses named fields (`Status:`, `Err:`), so it is fine. No edit needed. + +- [ ] **Step 2: Verify tests compile** + + ```bash + go test -run NOMATCH ./... 2>&1 | grep -v "^ok" + ``` + + Expected: no compile errors. + +--- + +## Task 5: Implement `sortedSteps` and Rewrite `Error()` / `Unwrap()` + +**Files:** +- Modify: `error.go` + +- [ ] **Step 1: Write a failing test first** + + In `error_test.go`, add: + + ```go + func TestErrWorkflowErrorOrdering(t *testing.T) { + t.Run("sorted by FinishedAt ascending", func(t *testing.T) { + now := time.Now() + type namedStep struct{ name string } + // We need actual Steper values; use the existing test helpers. + // Build ErrWorkflow manually with known FinishedAt values. + a := &namedStep{"A-step"} + b := &namedStep{"B-step"} + c := &namedStep{"C-step"} + + // Use a real workflow so steps are proper Steper instances. + // Instead, we test via a real workflow run with mock clock. + mockClock := clock.NewMock() + w := &flow.Workflow{Clock: mockClock} + + errA := fmt.Errorf("A failed") + errB := fmt.Errorf("B failed") + errC := fmt.Errorf("C failed") + _ = a; _ = b; _ = c; _ = errA; _ = errB; _ = errC; _ = now + // Real ordering test is in TestErrWorkflowOrderingIntegration below. + _ = w + }) + } + ``` + + > Actually, skip the manual struct test — the integration test below is more meaningful. Remove the stub above and add only the integration test. + + Replace the above with this in `error_test.go`: + + ```go + import ( + "context" + "fmt" + "strings" + "testing" + "time" + + flow "github.com/Azure/go-workflow" + "github.com/benbjohnson/clock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + ) + + // serialStep is a step that signals when it starts and waits to be released. + type serialStep struct { + name string + started chan struct{} + release chan struct{} + err error + } + + func newSerialStep(name string, err error) *serialStep { + return &serialStep{name: name, started: make(chan struct{}, 1), release: make(chan struct{}), err: err} + } + func (s *serialStep) Do(_ context.Context) error { + s.started <- struct{}{} + <-s.release + return s.err + } + func (s *serialStep) String() string { return s.name } + + func TestErrWorkflowOutputOrdering(t *testing.T) { + // Build a 3-step serial chain: A -> B -> C + // Step names are chosen so alphabetical != execution order: C, A, B + mockClock := clock.NewMock() + stepC := newSerialStep("C-first", fmt.Errorf("C failed")) + stepA := newSerialStep("A-second", fmt.Errorf("A failed")) + stepB := newSerialStep("B-third", fmt.Errorf("B failed")) + + w := &flow.Workflow{Clock: mockClock} + w.Add( + flow.Step(stepC), + flow.Step(stepA).DependsOn(stepC), + flow.Step(stepB).DependsOn(stepA), + ) + + done := make(chan error, 1) + go func() { done <- w.Do(context.Background()) }() + + // C runs first — let it finish + <-stepC.started + mockClock.Add(time.Second) + close(stepC.release) + + // A runs second + <-stepA.started + mockClock.Add(time.Second) + close(stepA.release) + + // B runs third + <-stepB.started + mockClock.Add(time.Second) + close(stepB.release) + + err := <-done + require.Error(t, err) + + var errW flow.ErrWorkflow + require.ErrorAs(t, err, &errW) + + output := errW.Error() + posC := strings.Index(output, "C-first") + posA := strings.Index(output, "A-second") + posB := strings.Index(output, "B-third") + + assert.Greater(t, posA, posC, "A-second should appear after C-first in output") + assert.Greater(t, posB, posA, "B-third should appear after A-second in output") + } + + func TestErrWorkflowTieBreakByName(t *testing.T) { + // Two steps with identical FinishedAt → sort by name + mockClock := clock.NewMock() + now := mockClock.Now() + + e := flow.ErrWorkflow{ + // We can't easily construct Steper keys without running a workflow. + // Test via integration: two parallel steps finishing at same clock tick. + } + _ = e; _ = now + // See TestErrWorkflowTieBreakIntegration below. + } + + func TestErrWorkflowTieBreakIntegration(t *testing.T) { + // Two parallel steps, both fail at the same clock tick → output is alphabetical. + mockClock := clock.NewMock() + stepZ := newSerialStep("Z-step", fmt.Errorf("Z failed")) + stepA := newSerialStep("A-step", fmt.Errorf("A failed")) + + w := &flow.Workflow{Clock: mockClock} + w.Add(flow.Step(stepZ), flow.Step(stepA)) + + done := make(chan error, 1) + go func() { done <- w.Do(context.Background()) }() + + // Both steps start in parallel; release them before advancing clock + <-stepZ.started + <-stepA.started + // Advance clock THEN release — both get same timestamp + mockClock.Add(time.Second) + close(stepZ.release) + close(stepA.release) + + err := <-done + require.Error(t, err) + + var errW flow.ErrWorkflow + require.ErrorAs(t, err, &errW) + + output := errW.Error() + posA := strings.Index(output, "A-step") + posZ := strings.Index(output, "Z-step") + assert.Less(t, posA, posZ, "A-step should appear before Z-step (tie-break by name)") + } + ``` + +- [ ] **Step 2: Run test to verify it fails** + + ```bash + go test -run "TestErrWorkflow" ./... -v 2>&1 | tail -20 + ``` + + Expected: FAIL — output ordering assertions fail (map iteration is random). + +- [ ] **Step 3: Implement `sortedSteps` and update `Error()` / `Unwrap()`** + + In `error.go`, add the helper and update both methods: + + ```go + // sortedSteps returns the steps in ErrWorkflow sorted by FinishedAt ascending. + // Steps with zero FinishedAt (never ran) sort last. + // Tie-break: lexicographic order of String(step). + func sortedSteps(e ErrWorkflow) []Steper { + steps := make([]Steper, 0, len(e)) + for step := range e { + steps = append(steps, step) + } + sort.Slice(steps, func(i, j int) bool { + ti := e[steps[i]].FinishedAt + tj := e[steps[j]].FinishedAt + zeroI := ti.IsZero() + zeroJ := tj.IsZero() + if zeroI != zeroJ { + return !zeroI // non-zero before zero + } + if !ti.Equal(tj) { + return ti.Before(tj) + } + return String(steps[i]) < String(steps[j]) + }) + return steps + } + + func (e ErrWorkflow) Unwrap() []error { + steps := sortedSteps(e) + rv := make([]error, 0, len(e)) + for _, step := range steps { + rv = append(rv, e[step].Err) + } + return rv + } + + // ErrWorkflow will be printed as: + // + // Step: [Status] + // error message + func (e ErrWorkflow) Error() string { + var builder strings.Builder + for _, step := range sortedSteps(e) { + builder.WriteString(fmt.Sprintf("%s: ", String(step))) + builder.WriteString(fmt.Sprintln(e[step].Error())) + } + return builder.String() + } + ``` + +- [ ] **Step 4: Run the ordering tests** + + ```bash + go test -run "TestErrWorkflow" ./... -v 2>&1 | tail -30 + ``` + + Expected: all pass. + +- [ ] **Step 5: Commit** + + ```bash + git add error.go state.go workflow.go error_test.go condition_test.go + git commit -m "feat: add FinishedAt to StepResult, sort ErrWorkflow output by execution order" + ``` + +--- + +## Task 6: Test `FinishedAt` Population in `execution_model_test.go` + +**Files:** +- Modify: `execution_model_test.go` + +- [ ] **Step 1: Write failing test** + + Add to `execution_model_test.go`: + + ```go + func TestStepResultFinishedAtPopulated(t *testing.T) { + mockClock := clock.NewMock() + step := &succeededStep{} // uses the existing test helper in testutil_test.go + w := &Workflow{Clock: mockClock} + w.Add(Step(step)) + + mockClock.Add(time.Second) // advance so FinishedAt is non-zero + err := w.Do(context.Background()) + assert.NoError(t, err) + + state := w.StateOf(step) + result := state.GetStepResult() + assert.False(t, result.FinishedAt.IsZero(), "FinishedAt should be populated after step execution") + assert.Equal(t, mockClock.Now(), result.FinishedAt) + } + ``` + + Check what helpers exist in `testutil_test.go`: + + ```bash + grep -n "succeededStep\|failedStep\|type.*Step" testutil_test.go | head -20 + ``` + + Adjust the step type name based on what you see. + +- [ ] **Step 2: Run to verify it fails** + + ```bash + go test -run "TestStepResultFinishedAtPopulated" ./... -v + ``` + + Expected: FAIL — `FinishedAt` is zero because we haven't wired it up yet. + + > **Note:** If Task 3 is already done, this test may already pass. In that case, skip to Step 4. + +- [ ] **Step 3: Verify wiring from Task 3 makes it pass** + + The `SetFinishedAt` calls added in Task 3 should make this pass. Run: + + ```bash + go test -run "TestStepResultFinishedAtPopulated" ./... -v + ``` + + Expected: PASS. + +- [ ] **Step 4: Commit** + + ```bash + git add execution_model_test.go + git commit -m "test: verify StepResult.FinishedAt is populated after step execution" + ``` + +--- + +## Task 7: Full Test Suite and Final Verification + +- [ ] **Step 1: Run all tests** + + ```bash + go test ./... -count=1 + ``` + + Expected: all pass, no failures. + +- [ ] **Step 2: Run vet** + + ```bash + go vet ./... + ``` + + Expected: no output (no issues). + +- [ ] **Step 3: Run build** + + ```bash + go build ./... + ``` + + Expected: no output (clean build). + +- [ ] **Step 4: Final commit if anything was adjusted** + + ```bash + git status + ``` + + If there are uncommitted changes: + + ```bash + git add -p + git commit -m "fix: address review feedback" + ``` + +--- + +## Self-Review Notes + +- **`condition_test.go`**: The literal at line 28 already uses named fields (`Status:`, `Err:`), so it compiles without edits. The plan reflects this (Task 4 is a verify-only step). +- **Clock timing in tests**: `clock.NewMock()` starts at a fixed non-zero time, so `mockClock.Now()` after `Add(time.Second)` gives a consistent value. The test in Task 6 advances before the run — but `FinishedAt` is recorded *during* the run at whatever `Clock.Now()` returns then. Adjust the assertion to `assert.False(t, result.FinishedAt.IsZero())` if the exact value is hard to pin. +- **Parallel steps tie-break test**: both goroutines call `w.Clock.Now()` in their defers. With `clock.Mock`, concurrent calls return the same value, which is exactly what we need for the tie-break test. +- **`StateOf` visibility**: `w.StateOf(step)` is used in existing tests — it's an exported method so it's accessible from `_test` package. diff --git a/state.go b/state.go index 8c1d099..688b9c0 100644 --- a/state.go +++ b/state.go @@ -3,6 +3,7 @@ package flow import ( "context" "sync" + "time" ) // State is the internal state of a Step in a Workflow. @@ -35,6 +36,16 @@ func (s *State) SetError(err error) { defer s.Unlock() s.Err = err } +func (s *State) GetFinishedAt() time.Time { + s.RLock() + defer s.RUnlock() + return s.FinishedAt +} +func (s *State) SetFinishedAt(t time.Time) { + s.Lock() + defer s.Unlock() + s.FinishedAt = t +} func (s *State) GetStepResult() StepResult { s.RLock() defer s.RUnlock() diff --git a/workflow.go b/workflow.go index a455a25..70d6242 100644 --- a/workflow.go +++ b/workflow.go @@ -384,6 +384,7 @@ func (w *Workflow) tick(ctx context.Context) bool { } // if condition is evaluated to terminate if nextStatus := cond(ctx, ups); nextStatus.IsTerminated() { + state.SetFinishedAt(w.Clock.Now()) state.SetStatus(nextStatus) w.waitGroup.Add(1) go func() { @@ -402,6 +403,7 @@ func (w *Workflow) tick(ctx context.Context) bool { var err error status := Failed defer func() { + state.SetFinishedAt(w.Clock.Now()) state.SetStatus(status) state.SetError(err) // Release the lease BEFORE signalling, so that when the main From 7e4aaeea5ff0da0dd12754a4c0eec2c99a462787 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 12:45:29 +0000 Subject: [PATCH 05/31] fix: address code review feedback - ErrWorkflow.Unwrap(): skip nil errors (fixes errors.Is/As correctness) - State.SetStepResult(): atomic write for Status+Err+FinishedAt under one lock - reset(): clear full StepResult (not just Status) to avoid stale FinishedAt - tests: guard strings.Index assertions with require >= 0 --- error.go | 4 +++- error_test.go | 5 +++++ state.go | 5 +++++ workflow.go | 18 +++++++++++------- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/error.go b/error.go index 69cc1c0..e5ab8d3 100644 --- a/error.go +++ b/error.go @@ -159,7 +159,9 @@ func (e ErrWorkflow) Unwrap() []error { steps := sortedSteps(e) rv := make([]error, 0, len(e)) for _, step := range steps { - rv = append(rv, e[step].Err) + if err := e[step].Err; err != nil { + rv = append(rv, err) + } } return rv } diff --git a/error_test.go b/error_test.go index 09139f3..6e3c501 100644 --- a/error_test.go +++ b/error_test.go @@ -87,6 +87,9 @@ func TestErrWorkflowOutputOrdering(t *testing.T) { posA := strings.Index(output, "A-second") posB := strings.Index(output, "B-third") + require.GreaterOrEqual(t, posC, 0, "C-first not found in error output") + require.GreaterOrEqual(t, posA, 0, "A-second not found in error output") + require.GreaterOrEqual(t, posB, 0, "B-third not found in error output") assert.Greater(t, posA, posC, "A-second should appear after C-first") assert.Greater(t, posB, posA, "B-third should appear after A-second") } @@ -118,5 +121,7 @@ func TestErrWorkflowTieBreakByName(t *testing.T) { output := errW.Error() posA := strings.Index(output, "A-step") posZ := strings.Index(output, "Z-step") + require.GreaterOrEqual(t, posA, 0, "A-step not found in error output") + require.GreaterOrEqual(t, posZ, 0, "Z-step not found in error output") assert.Less(t, posA, posZ, "A-step should appear before Z-step (tie-break by name)") } diff --git a/state.go b/state.go index 688b9c0..a4e11cd 100644 --- a/state.go +++ b/state.go @@ -51,6 +51,11 @@ func (s *State) GetStepResult() StepResult { defer s.RUnlock() return s.StepResult } +func (s *State) SetStepResult(r StepResult) { + s.Lock() + defer s.Unlock() + s.StepResult = r +} func (s *State) Upstreams() Set[Steper] { if s.Config == nil { return nil diff --git a/workflow.go b/workflow.go index 70d6242..57c817d 100644 --- a/workflow.go +++ b/workflow.go @@ -244,7 +244,7 @@ func (w *Workflow) Reset() error { func (w *Workflow) reset() { for _, state := range w.steps { - state.SetStatus(Pending) + state.SetStepResult(StepResult{Status: Pending}) } if w.Clock == nil { w.Clock = clock.New() @@ -355,7 +355,7 @@ func (w *Workflow) preflight() error { } // reset all Steps' status to Pending for _, step := range w.steps { - step.SetStatus(Pending) + step.SetStepResult(StepResult{Status: Pending}) } return nil } @@ -384,8 +384,10 @@ func (w *Workflow) tick(ctx context.Context) bool { } // if condition is evaluated to terminate if nextStatus := cond(ctx, ups); nextStatus.IsTerminated() { - state.SetFinishedAt(w.Clock.Now()) - state.SetStatus(nextStatus) + state.SetStepResult(StepResult{ + Status: nextStatus, + FinishedAt: w.Clock.Now(), + }) w.waitGroup.Add(1) go func() { defer w.waitGroup.Done() @@ -403,9 +405,11 @@ func (w *Workflow) tick(ctx context.Context) bool { var err error status := Failed defer func() { - state.SetFinishedAt(w.Clock.Now()) - state.SetStatus(status) - state.SetError(err) + state.SetStepResult(StepResult{ + Status: status, + Err: err, + FinishedAt: w.Clock.Now(), + }) // Release the lease BEFORE signalling, so that when the main // loop wakes up in tick() it can immediately acquire a new lease. // Previously unlease() was a separate earlier defer (LIFO), meaning From 50bdb89ba6434aab50377c4dbd2442e88b608ab6 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 12:48:05 +0000 Subject: [PATCH 06/31] chore: remove unrelated in-progress changes from PR --- .../contrib-common-steps/.openspec.yaml | 2 - .../changes/contrib-common-steps/proposal.md | 64 ----- .../changes/dynamic-fanout/.openspec.yaml | 2 - openspec/changes/dynamic-fanout/proposal.md | 89 ------ .../heartbeat-and-liveness/.openspec.yaml | 2 - .../heartbeat-and-liveness/proposal.md | 54 ---- .../observability-adapters/.openspec.yaml | 2 - .../observability-adapters/proposal.md | 210 --------------- openspec/changes/pause-resume/.openspec.yaml | 2 - openspec/changes/pause-resume/proposal.md | 78 ------ .../workflow-checkpointing/.openspec.yaml | 2 - .../workflow-checkpointing/proposal.md | 255 ------------------ 12 files changed, 762 deletions(-) delete mode 100644 openspec/changes/contrib-common-steps/.openspec.yaml delete mode 100644 openspec/changes/contrib-common-steps/proposal.md delete mode 100644 openspec/changes/dynamic-fanout/.openspec.yaml delete mode 100644 openspec/changes/dynamic-fanout/proposal.md delete mode 100644 openspec/changes/heartbeat-and-liveness/.openspec.yaml delete mode 100644 openspec/changes/heartbeat-and-liveness/proposal.md delete mode 100644 openspec/changes/observability-adapters/.openspec.yaml delete mode 100644 openspec/changes/observability-adapters/proposal.md delete mode 100644 openspec/changes/pause-resume/.openspec.yaml delete mode 100644 openspec/changes/pause-resume/proposal.md delete mode 100644 openspec/changes/workflow-checkpointing/.openspec.yaml delete mode 100644 openspec/changes/workflow-checkpointing/proposal.md diff --git a/openspec/changes/contrib-common-steps/.openspec.yaml b/openspec/changes/contrib-common-steps/.openspec.yaml deleted file mode 100644 index 905325f..0000000 --- a/openspec/changes/contrib-common-steps/.openspec.yaml +++ /dev/null @@ -1,2 +0,0 @@ -schema: spec-driven -created: 2026-05-04 diff --git a/openspec/changes/contrib-common-steps/proposal.md b/openspec/changes/contrib-common-steps/proposal.md deleted file mode 100644 index 3f26ab8..0000000 --- a/openspec/changes/contrib-common-steps/proposal.md +++ /dev/null @@ -1,64 +0,0 @@ -## Why - -go-workflow's root module is intentionally minimal: it provides the orchestration framework -and nothing more. But users repeatedly solve the same problems on top of it — waiting for a -signal, rate-limiting against an external API, sending a notification when something fails, -implementing a human-approval gate, etc. - -Today each user reinvents these patterns from scratch. A `contrib/` subtree of submodules -can provide battle-tested, reusable step implementations while keeping the root module lean -and dependency-free. - -This mirrors the Go ecosystem pattern (e.g., `golang.org/x/...`, OpenTelemetry's -`go.opentelemetry.io/contrib/...`). - -## What Changes - -- A new `contrib/` directory at the repo root, containing independent Go modules. -- Each submodule has its own `go.mod` and can pull in external dependencies without - affecting users of the root module. -- The root module has zero new dependencies. - -## Capabilities - -### New Capabilities - -**Initial candidate submodules** (exact list TBD, these are ideas): - -- **`contrib/steps`** — general-purpose reusable steps: - - `WaitSignal[T]` — a step that blocks until a value is sent on a channel, useful for - human-approval gates or inter-workflow coordination. - - `Delay` — a step that waits for a fixed duration (respects context cancellation). - - `Gate` — a step that waits until a predicate becomes true (polls with interval + timeout). - -- **`contrib/notify`** — notification steps (Slack, PagerDuty, email, webhook) to use in - `AnyFailed` / cleanup branches. - -- **`contrib/ratelimit`** — a `BeforeStep` callback factory that applies token-bucket or - leaky-bucket rate limiting across a group of steps (useful when steps call the same - external API). - -- **`contrib/otel`** — OpenTelemetry integration: an `EventSink` (once that feature lands) - plus a `BeforeStep`/`AfterStep` pair that creates trace spans and records metrics. - -### Design Principles for Contrib Steps - -1. Every contrib step implements `flow.Steper` — fully composable with the core framework. -2. Steps are configurable via struct fields, not functional options (keeps it simple). -3. Each submodule depends only on the root `go-workflow` module plus minimal external deps. -4. Steps are tested independently; no contribution required in the root module's test suite. - -### Open Questions - -- Flat `contrib/steps` vs. per-concern submodules (`contrib/signal`, `contrib/ratelimit`)? - Single `contrib/steps` is simpler initially; can split later if dependency sets diverge. - -- Should contrib steps live in this repo or a separate `go-workflow-contrib` repo? - Same repo is easier to keep in sync with API changes; separate repo avoids repo bloat. - Leaning toward same repo with clear directory boundary. - -## Impact - -- New `contrib/` directory with one or more `go.mod` files. -- No changes to root module code or go.mod. -- CI: add build/test jobs for each contrib submodule. diff --git a/openspec/changes/dynamic-fanout/.openspec.yaml b/openspec/changes/dynamic-fanout/.openspec.yaml deleted file mode 100644 index 905325f..0000000 --- a/openspec/changes/dynamic-fanout/.openspec.yaml +++ /dev/null @@ -1,2 +0,0 @@ -schema: spec-driven -created: 2026-05-04 diff --git a/openspec/changes/dynamic-fanout/proposal.md b/openspec/changes/dynamic-fanout/proposal.md deleted file mode 100644 index 8642eb0..0000000 --- a/openspec/changes/dynamic-fanout/proposal.md +++ /dev/null @@ -1,89 +0,0 @@ -## Why - -Today go-workflow requires all steps to be declared before `workflow.Do()` is called. The DAG -is static. This is fine for most cases, but a common pattern cannot be expressed cleanly: -a step produces a list of items at runtime, and you want to process each item in parallel -as an independent step — the classic fan-out / map-reduce pattern. - -Current workarounds: -1. Pre-allocate a fixed-size slice of steps with the maximum possible items — wasteful and - requires knowing the count upfront. -2. Use a `SubWorkflow` step that builds and runs its own inner workflow dynamically — works, - but the inner workflow is invisible to the outer workflow's status tracking and event sink. -3. Use a single step that processes all items sequentially or with manual goroutines — loses - the framework's retry-per-item, timeout-per-item, and observability guarantees. - -The goal is to let a step declare children at runtime, which the workflow then schedules -as first-class steps. - -## What Changes - -A new optional interface `Spawner` (name TBD) that a step can implement. After the step's -`Do()` returns successfully, the workflow calls `Spawn()` to get new steps, injects them into -the running DAG, and schedules them with the step as their upstream. - -## Proposed Approaches - -### Approach A — `Spawner` interface (post-Do hook) - -```go -type Spawner interface { - Spawn() []Builder -} -``` - -After `Do()` succeeds, the workflow calls `Spawn()` and merges the returned builders into -the live DAG. Simple, zero new concepts, fully backward compatible. - -Downside: `Spawn()` is called after `Do()`, so items must be stored on the step struct -between `Do()` and `Spawn()` — minor coupling, but acceptable. - -### Approach B — Dynamic `Add()` during `Do()` - -Pass a "workflow handle" into `Do()` via context: - -```go -handle := flow.WorkflowHandle(ctx) -handle.Add(flow.Step(newStep)) -``` - -More flexible, but context-threading is implicit and the concurrency story is tricky -(adding steps while the scheduler tick is running). - -### Approach C — `Builder` result from `Do()` - -Change `Steper` to `Do(context.Context) (Builder, error)` — spawned steps are the return -value. Clean, but **breaks the existing interface** — too invasive. - -### Recommendation - -Start with **Approach A**. It is the least invasive, requires no interface changes, and -covers the primary use case. Approach B can be layered on top later via a context value. - -## Capabilities - -### New Capabilities (Approach A) - -- `Spawner` interface: if a step implements `Spawn() []Builder`, the workflow calls it after - a successful `Do()` and schedules the returned steps as downstream of the spawning step. -- Spawned steps participate in all normal workflow features: retry, timeout, condition, event sink. -- `workflow.Do()` does not return until all spawned steps (and their descendants) are terminated. - -### Open Questions - -- What if `Spawn()` returns a step that was already in the workflow? Should it merge configs - or panic? Probably merge (same semantics as `workflow.Add()`). -- Should spawned steps be visible in `workflow.Steps()` and the event sink before they run? - Yes — once added to the live DAG they should behave like any other step. -- Should `Spawn()` be called on retry too, or only on final success? - Only on final success — otherwise retries produce duplicate children. -- Error handling: if `Spawn()` itself returns an error... it doesn't in this design (returns - `[]Builder`). Errors surface through the spawned steps' own `Do()` calls. - -## Impact - -- New `Spawner` interface in `step.go` (or `workflow.go`). -- `workflow.go` `tick()` or post-`runStep` logic — check if step implements `Spawner`, - call `Spawn()`, merge result into `w.steps`. -- New spec: `openspec/specs/dynamic-fanout/spec.md`. -- No breaking changes. diff --git a/openspec/changes/heartbeat-and-liveness/.openspec.yaml b/openspec/changes/heartbeat-and-liveness/.openspec.yaml deleted file mode 100644 index 905325f..0000000 --- a/openspec/changes/heartbeat-and-liveness/.openspec.yaml +++ /dev/null @@ -1,2 +0,0 @@ -schema: spec-driven -created: 2026-05-04 diff --git a/openspec/changes/heartbeat-and-liveness/proposal.md b/openspec/changes/heartbeat-and-liveness/proposal.md deleted file mode 100644 index a12b9ac..0000000 --- a/openspec/changes/heartbeat-and-liveness/proposal.md +++ /dev/null @@ -1,54 +0,0 @@ -## Why - -Long-running steps (large file processing, external API polling, batch jobs) can silently stall -inside `Do()` — the goroutine is alive but making no progress. Currently go-workflow has no way -to detect this. Temporal solves it with an Activity Heartbeat: the step periodically signals -liveness, and the runtime kills and retries it if heartbeats stop. - -go-workflow should provide an equivalent mechanism that fits its single-process, in-memory model. - -## What Changes - -- A `Heartbeat` function that a step calls periodically during `Do()` to signal it is alive. -- A `HeartbeatTimeout` field in `RetryOption` (or `StepOption`) — if no heartbeat arrives within - this duration, the step's context is canceled and the step is treated as failed (and retried - if retry is configured). -- Optionally, a heartbeat payload so a retried step can recover its last known progress - (similar to Temporal's `activity.GetHeartbeatDetails`). - -## Capabilities - -### New Capabilities - -- **Heartbeat signaling**: step calls `flow.Heartbeat(ctx)` or `flow.HeartbeatWith(ctx, payload)` - inside its `Do()` loop. The call is a no-op if no heartbeat timeout is configured, so it is - safe to add unconditionally. - -- **Liveness enforcement**: when `HeartbeatTimeout` is set on a step, the workflow starts a - watchdog that cancels the step's context if `HeartbeatTimeout` elapses without a heartbeat. - The step's `ctx.Done()` fires, `Do()` returns `context.Canceled`, and normal retry logic applies. - -- **Progress recovery**: `flow.LastHeartbeat[T](ctx)` returns the payload from the most recent - heartbeat of the previous attempt, so a retried step can resume from where it left off instead - of restarting from zero. - -### Open Questions - -- Should `HeartbeatTimeout` live in `RetryOption` (only meaningful with retry) or `StepOption` - (independent — you might want liveness detection even without retry)? - Leaning toward `StepOption` so liveness and retry are orthogonal concerns. - -- Payload type: generic `any` with the caller doing type assertion, or a typed generic helper - `flow.HeartbeatWithTyped[T]`? Generic is cleaner but requires Go 1.21+ type inference. - -- Should the watchdog goroutine be started per-step or shared across steps? - Per-step is simpler and avoids coordination overhead. - -## Impact - -- `StepOption` or `RetryOption` — add `HeartbeatTimeout *time.Duration`. -- New exported functions: `Heartbeat(ctx)`, `HeartbeatWith(ctx, payload)`, - `LastHeartbeat[T](ctx)`. -- `workflow.go` / `runStep` — start watchdog goroutine when `HeartbeatTimeout` is set. -- New spec: `openspec/specs/heartbeat/spec.md`. -- No breaking changes; steps that do not call `Heartbeat` are unaffected. diff --git a/openspec/changes/observability-adapters/.openspec.yaml b/openspec/changes/observability-adapters/.openspec.yaml deleted file mode 100644 index 905325f..0000000 --- a/openspec/changes/observability-adapters/.openspec.yaml +++ /dev/null @@ -1,2 +0,0 @@ -schema: spec-driven -created: 2026-05-04 diff --git a/openspec/changes/observability-adapters/proposal.md b/openspec/changes/observability-adapters/proposal.md deleted file mode 100644 index 528dee9..0000000 --- a/openspec/changes/observability-adapters/proposal.md +++ /dev/null @@ -1,210 +0,0 @@ -## Why - -go-workflow has no built-in observability. When a workflow is running inside a production binary -it is a black box: operators cannot see which steps are running, how long they have been running, -how many retries have occurred, or where failures happened. Debugging requires either reading logs -(unstructured) or adding custom `BeforeStep`/`AfterStep` callbacks everywhere. - -The goal is to make a running go-workflow process a first-class citizen in standard observability -stacks — without writing any custom frontend and without adding heavy dependencies to the root -module. - -This change depends on the `structured-event-sink` change landing first, since the adapters -are implemented as `EventSink` consumers. - -## What Changes - -Three observability adapters, each as an independent submodule under `contrib/`: - -1. **`contrib/otel`** — OpenTelemetry traces, enabling Gantt-chart visualization in Jaeger, - Grafana Tempo, or any OTLP-compatible backend. -2. **`contrib/prometheus`** — Prometheus metrics, enabling time-series dashboards in Grafana. -3. **`contrib/dot`** — A live Graphviz DOT HTTP endpoint for lightweight development-time - visualization, zero external dependencies. - -## Capabilities - ---- - -### `contrib/otel` — Trace-based visualization - -Each workflow execution becomes an OTel trace. Each step becomes a child span. This maps -naturally onto Jaeger / Grafana Tempo's Gantt-chart UI, which shows parallel steps side by -side and serial steps in sequence — exactly the shape of a DAG execution. - -**Span structure — flat workflow:** - -``` -workflow.Do() ← root span (name = workflow type or user-supplied name) - ├── FetchData ← child span (name = flow.String(step)) - ├── ProcessA ← child span, starts after FetchData span ends - └── ProcessB ← child span, parallel with ProcessA - └── retry attempt 2 ← span event on ProcessB span (not a child span) -``` - -**Span structure — nested workflow (Workflow-as-a-Step):** - -Since go-workflow supports using a `Workflow` as a `Step` inside another `Workflow`, the -`EventSink` must handle the nesting correctly. The key insight is that OTel span context -propagates through `context.Context`: the inner workflow's `Do(ctx)` receives a `ctx` that -already carries the outer step's span as the active span. - -The adapter should therefore **never unconditionally create a root span**. Instead: - -``` -outer.Do(ctx) ← root span (ctx has no parent span) - ├── FetchData ← child span of root - └── InnerWorkflow ← child span of root (this is the step span) - ├── SubStepA ← child span of InnerWorkflow span - └── SubStepB ← child span of InnerWorkflow span -``` - -This works naturally because: -1. When the outer workflow starts the `InnerWorkflow` step, it calls `BeforeStep` with a ctx - that carries the outer root span. -2. The `EventSink` creates a span for `InnerWorkflow` as a child of whatever is active in ctx. -3. The inner workflow's `Do(ctx)` receives that span in ctx; inner step spans become its children. - -The adapter must inject the step span into ctx (via `BeforeStep`) so that inner workflows -inherit it. This means the EventSink needs a companion `BeforeStep` callback, not just a -passive sink — or the `WorkflowEvent` must carry the ctx. - -**Open Question**: should `WorkflowEvent` carry the `context.Context` so the EventSink can -both read the parent span and inject a new span into ctx for the step? This requires the -`Started` event to be emitted before `Do()` is called (which aligns with the current -`BeforeStep` timing) and the span to be ended on the terminal event. The structured-event-sink -design should account for this requirement. - -**API sketch:** - -```go -import "github.com/Azure/go-workflow/contrib/otel" - -tp := /* set up your TracerProvider */ -sink := workflowotel.NewEventSink(tp, - workflowotel.WithWorkflowName("my-pipeline"), -) -w := &flow.Workflow{ - EventSink: sink.Sink, - // The adapter also needs a BeforeStep to inject the step span into ctx: - // w.DefaultOption = &flow.StepOption{ /* BeforeStep: sink.BeforeStep */ } - // Exact API TBD based on how EventSink and BeforeStep compose. -} -``` - -**Visualization**: Jaeger UI, Grafana Tempo, or any OTLP trace viewer shows: -- Horizontal Gantt bars per step, colored by status (green = succeeded, red = failed). -- Parallel steps rendered side by side. -- Nested workflows appear as a span with its own children, fully indented under the parent step. -- Retry events visible as annotations on the step's span. -- Total workflow duration as the root span duration. - -**Span attributes (standard OTel semantic conventions where applicable):** - -- `workflow.step` — step name -- `workflow.step.attempt` — attempt number (1-based) -- `workflow.step.status` — terminal status string -- `error.type`, `error.message` — populated on Failed / Canceled - ---- - -### `contrib/prometheus` — Metrics-based dashboards - -Exposes Prometheus metrics that can be scraped by any Prometheus-compatible system and -visualized in Grafana. - -**Metrics:** - -| Metric | Type | Labels | Description | -|--------|------|--------|-------------| -| `workflow_step_duration_seconds` | Histogram | `step`, `status` | Duration of each step attempt | -| `workflow_step_total` | Counter | `step`, `status` | Total step terminations by status | -| `workflow_step_retries_total` | Counter | `step` | Total retry attempts | -| `workflow_running_steps` | Gauge | `step` | Currently running steps | - -**API sketch:** - -```go -import "github.com/Azure/go-workflow/contrib/prometheus" - -reg := prometheus.NewRegistry() -w := &flow.Workflow{ - EventSink: workflowprom.NewEventSink(reg, - workflowprom.WithNamespace("myapp"), - ), -} -// expose reg via /metrics as usual -``` - -**Visualization in Grafana:** -- **State timeline panel**: each step as a row, colored by last known status over time. -- **Time series panel**: step duration trends, retry rate over time. -- **Stat panel**: current running step count, total failed steps. - -Grafana dashboard JSON can be shipped in `contrib/prometheus/dashboards/` so users can -import it directly. - ---- - -### `contrib/dot` — Live DAG visualization (development tool) - -An HTTP handler that renders the current workflow state as a Graphviz DOT document. -Intended for development and debugging, not production monitoring. - -**HTTP endpoint**: mount anywhere, e.g. `GET /debug/workflow` - -**DOT output example:** -```dot -digraph workflow { - rankdir=LR - "FetchData" [shape=box color=green label="FetchData\nSucceeded 1.24s"] - "ProcessA" [shape=box color=blue label="ProcessA\nRunning 0.8s..."] - "ProcessB" [shape=box color=gray label="ProcessB\nPending"] - "FetchData" -> "ProcessA" - "FetchData" -> "ProcessB" -} -``` - -Node colors: gray = Pending, blue = Running, green = Succeeded, red = Failed, -orange = Canceled, yellow = Skipped. - -**API sketch:** - -```go -import "github.com/Azure/go-workflow/contrib/dot" - -mux.Handle("/debug/workflow", workflowdot.Handler(w)) -``` - -Rendering options for the caller: -- In browser: serve the DOT source and use [d3-graphviz](https://github.com/magjac/d3-graphviz) - to render in-page (the handler can serve a small self-contained HTML page). -- In terminal: `curl localhost:8080/debug/workflow | dot -Tsvg | open -f -a Safari`. -- In VS Code: paste into the Graphviz preview extension. - -**No external dependencies** beyond the root go-workflow module. `dot` format is plain text. - ---- - -## Dependencies - -| Adapter | External deps | -|---------|--------------| -| `contrib/otel` | `go.opentelemetry.io/otel` (already common in Go services) | -| `contrib/prometheus` | `github.com/prometheus/client_golang` | -| `contrib/dot` | none (stdlib only) | - -All deps are in submodule `go.mod` files. Root module gains zero new dependencies. - -## Prerequisite - -`structured-event-sink` change must land first. All three adapters are pure `EventSink` -consumers and require no other changes to the core. - -## Impact - -- New `contrib/otel/`, `contrib/prometheus/`, `contrib/dot/` directories, each with `go.mod`. -- No changes to root module. -- CI: add build/test jobs for each contrib submodule. -- Optionally: a `contrib/prometheus/dashboards/workflow.json` Grafana dashboard to ship - with the adapter. diff --git a/openspec/changes/pause-resume/.openspec.yaml b/openspec/changes/pause-resume/.openspec.yaml deleted file mode 100644 index 905325f..0000000 --- a/openspec/changes/pause-resume/.openspec.yaml +++ /dev/null @@ -1,2 +0,0 @@ -schema: spec-driven -created: 2026-05-04 diff --git a/openspec/changes/pause-resume/proposal.md b/openspec/changes/pause-resume/proposal.md deleted file mode 100644 index 630a172..0000000 --- a/openspec/changes/pause-resume/proposal.md +++ /dev/null @@ -1,78 +0,0 @@ -## Why - -Currently a running workflow can only be stopped by canceling the context, which is destructive — -it cancels all in-flight steps and the workflow returns an error. There is no way to temporarily -pause execution (e.g., during a maintenance window, or when an operator notices something is wrong -and wants to inspect state before letting it continue) and then resume from exactly where it left off. - -Temporal supports pausing a workflow execution; go-workflow should offer an equivalent that fits -the single-process model. - -## What Changes - -- A `Pause()` method on `Workflow` that suspends scheduling of new steps without canceling - currently-running ones. -- A `Resume()` method that re-enables scheduling and wakes the tick loop. -- Running steps finish normally; paused state means "no new steps will be started until resumed". - -## Proposed Design - -### State machine addition - -Add a `paused` flag (protected by the existing `statusChange` mutex): - -``` -Normal running: paused = false → tick() schedules all runnable steps -Paused: paused = true → tick() skips scheduling, only updates status -Resumed: paused = false → statusChange.Signal() wakes the loop -``` - -### API - -```go -func (w *Workflow) Pause() error // error if workflow is not running -func (w *Workflow) Resume() error // error if workflow is not running or not paused -func (w *Workflow) IsPaused() bool -``` - -### Behavior - -- `Pause()` sets the flag; in-flight step goroutines are NOT interrupted. They will finish - naturally and update status, but the tick loop will not start new goroutines. -- `Resume()` clears the flag and signals `statusChange` so the tick loop wakes and schedules - any steps that became runnable while paused. -- Calling `Pause()` on an already-paused workflow is a no-op (or returns a sentinel error — - TBD based on what's more ergonomic). -- Context cancellation still works orthogonally: canceling the context while paused will - cancel in-flight steps; once they drain, the workflow exits normally through the existing - error path. - -### Optional: Pause with drain - -A `PauseAfterCurrent()` variant that also waits for all currently-running steps to finish -before fully pausing — useful if the caller wants a clean quiescence point before inspecting -state. - -```go -func (w *Workflow) PauseAfterCurrent(ctx context.Context) error -``` - -This blocks until the running goroutine count reaches zero, then sets `paused = true`. - -### Open Questions - -- Should `Pause()` / `Resume()` be goroutine-safe (callable from any goroutine)? Yes — - the only reasonable use case is calling them from outside the workflow goroutine. -- Should there be a `Paused` step status for steps that were Pending when Pause was called? - Probably not — they remain `Pending`. The pause is a scheduler-level concept, not a - step-level status. -- Should the `EventSink` emit a `WorkflowPaused` / `WorkflowResumed` event? - Yes, if the event-sink feature lands. - -## Impact - -- `workflow.go` — add `paused bool` field (under `statusChange.L`), `Pause()`, `Resume()`, - `IsPaused()` methods, guard in `tick()`. -- No changes to `Steper` interface or `StepConfig`. -- New spec: `openspec/specs/pause-resume/spec.md`. -- No breaking changes. diff --git a/openspec/changes/workflow-checkpointing/.openspec.yaml b/openspec/changes/workflow-checkpointing/.openspec.yaml deleted file mode 100644 index 905325f..0000000 --- a/openspec/changes/workflow-checkpointing/.openspec.yaml +++ /dev/null @@ -1,2 +0,0 @@ -schema: spec-driven -created: 2026-05-04 diff --git a/openspec/changes/workflow-checkpointing/proposal.md b/openspec/changes/workflow-checkpointing/proposal.md deleted file mode 100644 index 9a435f4..0000000 --- a/openspec/changes/workflow-checkpointing/proposal.md +++ /dev/null @@ -1,255 +0,0 @@ -## Why - -go-workflow runs entirely in-process. If the process crashes mid-execution, all workflow -progress is lost and the entire workflow must restart from the beginning. For long-running -workflows this is costly — early steps that already succeeded must redo their work. - -Temporal solves this with event sourcing: every activity completion is durably recorded, -and replay reconstructs the workflow state after a crash. go-workflow cannot replicate that -architecture (it has no server, no event log), but it can offer a lightweight equivalent: -**checkpoint-based resume**. - -The idea: serialize the workflow's execution state (step statuses + step business data) to -JSON after each step terminates. On restart, reconstruct the workflow from code as usual, -then call `LoadCheckpoint` to restore state — steps that already succeeded are skipped -automatically. - -## What Cannot Be Persisted - -- **Closures and callbacks** (`Input`, `Output`, `BeforeStep`, `AfterStep`) are Go functions - and cannot be serialized. They are re-registered from code on every run. -- **DAG topology** (`DependsOn` graph) is always reconstructed from code — never persisted. - -What IS persisted: each step's terminal **status** and its **business data** (struct fields), -via standard `encoding/json`. - -## Design - -### MarshalJSON / LoadCheckpoint — not symmetric UnmarshalJSON - -The natural first instinct is `json.Marshal(w)` + `json.Unmarshal(data, w)`. However, pure -`UnmarshalJSON` cannot work on an empty `Workflow` because Go has no type registry — the -framework cannot reconstruct `*FetchData` from the string `"FetchData"` alone. - -The correct pattern is **asymmetric**: - -```go -// 1. Build workflow from code as normal (step instances exist in memory) -w := &flow.Workflow{} -fetchUser := &FetchData{URL: "/user"} -fetchProduct := &FetchData{URL: "/product"} -w.Add( - flow.Step(flow.Named("fetch-user", fetchUser)), - flow.Step(flow.Named("fetch-product", fetchProduct)), - flow.Step(processStep).DependsOn(fetchUser, fetchProduct), -) - -// 2. Load checkpoint — step instances already exist, just restore their state -data, _ := store.Get("job-2026-05-04") -if data != nil { - if err := w.LoadCheckpoint(data); err != nil { /* handle */ } -} - -// 3. Run — steps whose status was restored as Succeeded are skipped automatically -w.Do(ctx) - -// 4. Save checkpoint after each step (via AfterStep or EventSink) -data, _ = json.Marshal(w) -store.Set("job-2026-05-04", data) -``` - -`w.MarshalJSON()` is valid and useful. `w.UnmarshalJSON()` is intentionally not provided -(or is a no-op that returns an error explaining the correct pattern). - -### Step identity — the key problem - -`w.steps` is keyed by `Steper` pointer, which is unique within a process but meaningless -across restarts. Serialization requires a **stable string key** per step. - -The checkpoint key must be determined at `Add()` time and frozen — it must not change -after `Do()` executes and mutates step fields. This rules out deriving the key from -`flow.String()` or any method that reads result fields set during execution. - -The framework resolves a step's checkpoint key at `Add()` time using this priority order: - -1. **`flow.Name(step, "my-name")` wrapper** — explicit static string declared at DAG build - time. This is the canonical approach. `flow.Name` already exists in the codebase. -2. **Type name alone** — valid only if exactly one instance of that type exists in the - workflow (e.g. a single `*ProcessData` → key is `"ProcessData"`). -3. **Multiple instances with no static name** — `MarshalJSON` returns an error, forcing - the user to use `flow.Name()` explicitly. - -**`flow.NameFunc` / `flow.NameStringer` are explicitly excluded from checkpoint key -resolution.** Their names are evaluated at runtime and may change between the time the -workflow is built and the time `MarshalJSON` is called (e.g. a step whose `String()` -depends on a field written by `Do()`). Using them as checkpoint keys would produce -unstable identifiers that break resume. If a step is wrapped with `NameFunc` or -`NameStringer`, `MarshalJSON` returns an error directing the user to use `flow.Name()` -instead. - -```go -// Single instance — naming optional, type name used automatically -w.Add(flow.Step(processStep)) // key: "ProcessData" - -// Multiple instances of same type — static name required -w.Add( - flow.Name(&FetchData{URL: "/user"}, "fetch-user"), - flow.Name(&FetchData{URL: "/product"}, "fetch-product"), -) -``` - -### JSON structure - -`MarshalJSON` only serializes **terminated** steps (Succeeded, Failed, Skipped, Canceled). -Running and Pending steps are omitted entirely. - -This has two important consequences: - -1. **No pause needed for live checkpoint saving.** The only risk of a data race would be - serializing a step whose struct fields are being mutated concurrently inside `Do()`. - Since terminated steps have already returned from `Do()`, their data is stable. - Running steps are skipped, so there is nothing to race on. `json.Marshal(w)` is safe - to call at any point during workflow execution without pausing or locking. - -2. **Running steps always re-run on resume.** This is correct behavior — a step that was - mid-execution when the process crashed has unknown partial state and must restart. - -**Flat workflow:** -```json -{ - "fetch-user": { - "status": "Succeeded", - "data": { "URL": "/user", "Result": "{ ... }" } - }, - "fetch-product": { - "status": "Failed", - "data": { "URL": "/product", "Result": "" }, - "error": "connection refused" - } -} -``` - -**Nested workflow (Workflow-as-a-Step):** - -Sub-workflows use a nested JSON structure that mirrors the actual DAG hierarchy. -The outer entry for the sub-workflow step carries its own status, and a `"steps"` key -holds the inner workflow's terminated steps recursively. - -```json -{ - "fetch-user": { - "status": "Succeeded", - "data": { "URL": "/user", "Result": "{ ... }" } - }, - "InnerWorkflow": { - "status": "Running", - "steps": { - "SubStepA": { "status": "Succeeded", "data": { ... } }, - "SubStepB": { "status": "Failed", "data": { ... }, "error": "..." } - } - } -} -``` - -`LoadCheckpoint` recurses into `"steps"` when restoring a sub-workflow step, matching -inner step instances by name within the inner workflow's own `steps` map. The outer -workflow's key for the sub-workflow step (`"InnerWorkflow"`) is resolved by the same -Named/Name()/type-name priority as any other step. - -Step `data` is serialized via the step's own `json.Marshal`. Steps that do not implement -`json.Marshaler` are serialized with `encoding/json` reflection — this works automatically -for plain data structs. Fields tagged `json:"-"` are excluded as usual. - -### Resume semantics - -| Persisted status | Behavior on resume | -|------------------|--------------------| -| `Succeeded` | Skip — treat as already done, downstream can proceed | -| `Skipped` | Skip — preserve original skip decision | -| `Failed` | Re-run — the step failed last time, try again | -| `Canceled` | Re-run — the cancellation may have been transient | -| `Running` | Re-run — process crashed mid-execution | - -### Saving the checkpoint - -The framework does not dictate where or when checkpoints are saved — that is the caller's -responsibility. Since `MarshalJSON` only serializes terminated steps and is safe to call -concurrently with a running workflow, **no pause is needed**. - -The recommended pattern is to save after each step terminates, via a global `AfterStep` -callback or an `EventSink` consumer (once that feature lands). This gives the finest -checkpoint granularity — one step at a time — without any coordination overhead: - -```go -// Using DefaultOption to attach a global AfterStep to all steps -w.DefaultOption = &flow.StepOption{ - // AfterStep is called after every step terminates (after Do() returns) -} -// Or via EventSink on terminal events — the step's Do() has already returned, -// so json.Marshal(w) is safe to call immediately. -``` - -The caller chooses the storage backend. The framework is storage-agnostic: it only provides -`MarshalJSON` and `LoadCheckpoint`; the caller writes the bytes wherever makes sense -(local file, Redis, SQL, object storage, etc.). - -There is no `WorkflowID` field on `Workflow`. The checkpoint key is entirely the caller's -concern — it can be a job date, a request ID, a UUID, or any stable identifier for this -particular execution. - -### Idempotency requirement - -Checkpointing does not make steps idempotent automatically. **Steps with side effects must -be idempotent themselves** — or check whether the side effect already occurred before -performing it. This is the same requirement Temporal places on Activities. - -For steps whose output is large or cannot be reconstructed from struct fields alone, -the recommended pattern is to persist outputs to external storage inside `Do()` and reload -them in an `Input` callback on resume: - -```go -Step(fetchUser).Input(func(ctx context.Context, s *FetchData) error { - if s.Result != "" { - return nil // already loaded from checkpoint, skip - } - result, err := externalStore.Get(s.URL) - s.Result = result - return err -}) -``` - -## Open Questions - -- Should `LoadCheckpoint` restore `Failed` step data (struct fields) so that a retried step - can inspect what was attempted before? Probably yes — useful for partial-progress patterns. - -- Should `MarshalJSON` include only terminated steps, or all steps including `Pending`? - Only terminated is smaller and sufficient; `Pending` steps have no meaningful data yet. - -- Should there be a `w.ClearCheckpoint()` or similar that resets all persisted statuses - without rebuilding the whole workflow? This would allow forced full reruns without code - changes. - -- For sub-workflows (Workflow-as-a-Step): should the outer workflow's `MarshalJSON` - recursively include inner workflow step states? This requires the inner workflow to also - participate in the checkpoint key namespace, which may require namespacing - (e.g. `"InnerWorkflow.SubStepA"`). - -## Relationship to Other Changes - -- **`heartbeat-and-liveness`**: heartbeat payloads serve intra-run progress recovery - (across retries within one process run). Checkpointing serves inter-run recovery - (across process restarts). The two are orthogonal and composable. -- **`structured-event-sink`**: checkpoint saving can be implemented as an EventSink - consumer for a clean integration point, once that feature lands. - -## Impact - -- `workflow.go` — implement `MarshalJSON` and `LoadCheckpoint([]byte) error`. -- `step.go` / `wrap.go` — add `flow.Named(name string, step S) *NamedStep[S]` wrapper - (analogous to `flow.Mock`); add step key resolution logic. -- No new external dependencies in the root module. -- `contrib/checkpoint/` submodule (optional): ready-made save/load helpers for common - stores (file, Redis, SQL) to reduce boilerplate. -- New spec: `openspec/specs/checkpointing/spec.md`. -- No breaking changes. From d17eabc4325054fa3a22ac1f06500a416844728f Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 12:52:00 +0000 Subject: [PATCH 07/31] refactor: remove unused SetError/GetError/SetFinishedAt/GetFinishedAt from State --- state.go | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/state.go b/state.go index a4e11cd..fd6a1e3 100644 --- a/state.go +++ b/state.go @@ -3,7 +3,6 @@ package flow import ( "context" "sync" - "time" ) // State is the internal state of a Step in a Workflow. @@ -26,26 +25,6 @@ func (s *State) SetStatus(ss StepStatus) { defer s.Unlock() s.Status = ss } -func (s *State) GetError() error { - s.RLock() - defer s.RUnlock() - return s.Err -} -func (s *State) SetError(err error) { - s.Lock() - defer s.Unlock() - s.Err = err -} -func (s *State) GetFinishedAt() time.Time { - s.RLock() - defer s.RUnlock() - return s.FinishedAt -} -func (s *State) SetFinishedAt(t time.Time) { - s.Lock() - defer s.Unlock() - s.FinishedAt = t -} func (s *State) GetStepResult() StepResult { s.RLock() defer s.RUnlock() From 363d32c50c828247c433cb5673e26e40a5b672d8 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 12:53:28 +0000 Subject: [PATCH 08/31] refactor: deprecate SetError/GetError/SetFinishedAt/GetFinishedAt, prefer SetStepResult --- state.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/state.go b/state.go index fd6a1e3..a21769a 100644 --- a/state.go +++ b/state.go @@ -3,6 +3,7 @@ package flow import ( "context" "sync" + "time" ) // State is the internal state of a Step in a Workflow. @@ -35,6 +36,34 @@ func (s *State) SetStepResult(r StepResult) { defer s.Unlock() s.StepResult = r } + +// Deprecated: Use GetStepResult().Err instead. +// +//go:fix inline +func (s *State) GetError() error { return s.GetStepResult().Err } + +// Deprecated: Use SetStepResult instead to update Status, Err, and FinishedAt atomically. +// +//go:fix inline +func (s *State) SetError(err error) { + s.Lock() + defer s.Unlock() + s.Err = err +} + +// Deprecated: Use GetStepResult().FinishedAt instead. +// +//go:fix inline +func (s *State) GetFinishedAt() time.Time { return s.GetStepResult().FinishedAt } + +// Deprecated: Use SetStepResult instead to update Status, Err, and FinishedAt atomically. +// +//go:fix inline +func (s *State) SetFinishedAt(t time.Time) { + s.Lock() + defer s.Unlock() + s.FinishedAt = t +} func (s *State) Upstreams() Set[Steper] { if s.Config == nil { return nil From a0f4a715d89c67ddeb46e05063bddd62c63fcc37 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 12:54:03 +0000 Subject: [PATCH 09/31] fix: remove go:fix inline from SetError/SetFinishedAt (not single-return) --- state.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/state.go b/state.go index a21769a..c366930 100644 --- a/state.go +++ b/state.go @@ -43,8 +43,6 @@ func (s *State) SetStepResult(r StepResult) { func (s *State) GetError() error { return s.GetStepResult().Err } // Deprecated: Use SetStepResult instead to update Status, Err, and FinishedAt atomically. -// -//go:fix inline func (s *State) SetError(err error) { s.Lock() defer s.Unlock() @@ -57,8 +55,6 @@ func (s *State) SetError(err error) { func (s *State) GetFinishedAt() time.Time { return s.GetStepResult().FinishedAt } // Deprecated: Use SetStepResult instead to update Status, Err, and FinishedAt atomically. -// -//go:fix inline func (s *State) SetFinishedAt(t time.Time) { s.Lock() defer s.Unlock() From 3e4ca501be5bb5bf48d20a95d1c070e973683a8a Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 12:55:45 +0000 Subject: [PATCH 10/31] refactor: drop GetFinishedAt/SetFinishedAt (new in this PR), keep SetError deprecated-only --- state.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/state.go b/state.go index c366930..7e960b2 100644 --- a/state.go +++ b/state.go @@ -3,7 +3,6 @@ package flow import ( "context" "sync" - "time" ) // State is the internal state of a Step in a Workflow. @@ -49,17 +48,6 @@ func (s *State) SetError(err error) { s.Err = err } -// Deprecated: Use GetStepResult().FinishedAt instead. -// -//go:fix inline -func (s *State) GetFinishedAt() time.Time { return s.GetStepResult().FinishedAt } - -// Deprecated: Use SetStepResult instead to update Status, Err, and FinishedAt atomically. -func (s *State) SetFinishedAt(t time.Time) { - s.Lock() - defer s.Unlock() - s.FinishedAt = t -} func (s *State) Upstreams() Set[Steper] { if s.Config == nil { return nil From 1e070a133a282cb69fbec687929f0f68e8785dd4 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 5 May 2026 13:02:26 +0000 Subject: [PATCH 11/31] refactor: un-deprecate GetError/SetError, they have distinct semantics from SetStepResult --- state.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/state.go b/state.go index 7e960b2..b313864 100644 --- a/state.go +++ b/state.go @@ -36,12 +36,8 @@ func (s *State) SetStepResult(r StepResult) { s.StepResult = r } -// Deprecated: Use GetStepResult().Err instead. -// -//go:fix inline func (s *State) GetError() error { return s.GetStepResult().Err } -// Deprecated: Use SetStepResult instead to update Status, Err, and FinishedAt atomically. func (s *State) SetError(err error) { s.Lock() defer s.Unlock() From ce8ac646e1f9b4a8744baa401b4bad7edf778c36 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Mon, 11 May 2026 07:37:28 +0000 Subject: [PATCH 12/31] docs(step-mutator): add openspec change, design notes, plan, and example Adds the design-phase artifacts for the upcoming flow.Mutator feature: - openspec/changes/2026-05-06-step-mutator/: proposal, design, tasks, and capability deltas (mutators / composite-steps / step-configuration) - docs/superpowers/specs/2026-05-06-step-mutator-design.md: brainstorming notes - docs/superpowers/plans/2026-05-11-step-mutator.md: TDD implementation plan - example/15_mutator_test.go: reference example file (build-tagged out until the implementation lands) No production code changes in this commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../plans/2026-05-11-step-mutator.md | 1320 +++++++++++++++++ .../specs/2026-05-06-step-mutator-design.md | 798 ++++++++++ example/15_mutator_test.go | 280 ++++ .../changes/2026-05-06-step-mutator/design.md | 357 +++++ .../2026-05-06-step-mutator/proposal.md | 99 ++ .../specs/composite-steps/spec.md | 97 ++ .../specs/mutators/spec.md | 403 +++++ .../specs/step-configuration/spec.md | 78 + .../changes/2026-05-06-step-mutator/tasks.md | 76 + 9 files changed, 3508 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-11-step-mutator.md create mode 100644 docs/superpowers/specs/2026-05-06-step-mutator-design.md create mode 100644 example/15_mutator_test.go create mode 100644 openspec/changes/2026-05-06-step-mutator/design.md create mode 100644 openspec/changes/2026-05-06-step-mutator/proposal.md create mode 100644 openspec/changes/2026-05-06-step-mutator/specs/composite-steps/spec.md create mode 100644 openspec/changes/2026-05-06-step-mutator/specs/mutators/spec.md create mode 100644 openspec/changes/2026-05-06-step-mutator/specs/step-configuration/spec.md create mode 100644 openspec/changes/2026-05-06-step-mutator/tasks.md diff --git a/docs/superpowers/plans/2026-05-11-step-mutator.md b/docs/superpowers/plans/2026-05-11-step-mutator.md new file mode 100644 index 0000000..3c297dd --- /dev/null +++ b/docs/superpowers/plans/2026-05-11-step-mutator.md @@ -0,0 +1,1320 @@ +# Step Mutator Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add `flow.Mutator` — a type-dispatched, once-per-step configuration contributor — to the go-workflow runtime, plus deprecate `BuildStep` / `SubWorkflow.Reset`. + +**Architecture:** A `Mutator` is an opaque interface produced only by the generic constructor `flow.Mutate[T Steper](fn)`. Each `*Workflow` carries a `Mutators []Mutator` slice. At every step's first scheduling (in `tick`, between upstream-check and `state.SetStatus(Running)`), the runtime walks each Mutator against the step's `Unwrap()` chain, stops at workflow boundaries, and merges any returned `Builder`'s config into the step's `StepConfig`. A new `MutatorReceiver` interface (`PrependMutators(mw []Mutator)`) lets parent workflows propagate Mutators into nested `SubWorkflow` / `*Workflow`-as-step containers before they begin scheduling. + +**Tech Stack:** Go (generics), stdlib `context`, no new external deps. Tests use `testify/assert`. Examples use stdlib `Example*` + `// Output:`. + +**Worktree:** `/home/xingfeixu/repo/go-workflow/.claude/worktrees/redesign-build-step` (branch `worktree-redesign-build-step`) + +**Spec source of truth:** +- `openspec/changes/2026-05-06-step-mutator/specs/mutators/spec.md` +- `openspec/changes/2026-05-06-step-mutator/specs/composite-steps/spec.md` +- `openspec/changes/2026-05-06-step-mutator/specs/step-configuration/spec.md` +- `openspec/changes/2026-05-06-step-mutator/design.md` + +**Reference example file (already exists, build-tagged out):** `example/15_mutator_test.go` + +--- + +## File Structure + +| File | Status | Responsibility | +|------|--------|----------------| +| `mutator.go` | **Create** | `Mutator` interface, `Mutate[T]` constructor, `mutatorFunc[T]` impl with Unwrap traversal, `MutatorReceiver` interface, helper `applyMutators` | +| `mutator_test.go` | **Create** | Unit tests for dispatch, unwrap walk, nil-Builder, scope, Builder rebase | +| `state.go` | **Modify** | Add `mutatorsApplied bool` field + `MutatorsApplied()/SetMutatorsApplied()` accessors | +| `workflow.go` | **Modify** | Add `Mutators []Mutator` field; add `(*Workflow).PrependMutators`; add `(*SubWorkflow).PrependMutators`; splice mutator-merge call into `tick`; immediately-before-invoke propagation | +| `workflow_mutator_test.go` | **Create** | Workflow-level tests: once-per-step, ordering vs plan, sub-workflow propagation, lazy inner steps, ctx propagation, panic recovery | +| `build_step.go` | **Modify** | Add `// Deprecated:` godoc to `StepBuilder.BuildStep` | +| `workflow.go` (SubWorkflow.Reset) | **Modify** | Add `// Deprecated:` godoc | +| `example/13_composite_step_test.go` | **Modify** | Add `// Deprecated:` doc to `ExampleCompositeViaWorkflow` | +| `example/15_mutator_test.go` | **Modify** | Drop the `//go:build mutator_design` tag once implementation lands; fix `flow.Name` argument order (currently `flow.Name(name, step)` — must be `flow.Name(step, name)`) | +| `openspec/changes/2026-05-06-step-mutator/tasks.md` | **Modify** | Tick off completed items as work progresses | + +--- + +## Key Codebase References (memorize these line ranges) + +- `Workflow` struct: `workflow.go:40–55` +- `Workflow.Add` (entry point for `Builder.AddToWorkflow()`): `workflow.go:58–75` +- `Workflow.addStep` (per-step add path, calls `BuildStep`): `workflow.go:78–117` +- `Workflow.tick` Pending→Running scheduling: `workflow.go:365–441`. **Splice point: line 379 (after upstream check) → before line 380 (`state.Option()`).** Doing the merge here means `state.Option()` will see Mutator-contributed Options. +- `Workflow.RootOf` / `StateOf` (boundary detection via `interface{ StateOf(Steper) *State }`): `workflow.go:168–207` +- `SubWorkflow`: `workflow.go:551–558`. `Unwrap()` returns single inner `*Workflow`. +- `State` struct: `state.go:12–16`. `MergeConfig(*StepConfig)` at line 98. +- `StepConfig` + `Merge`: `step.go:35–40` and `step.go:430–441`. Merge is union for Upstreams, append for Before/After/Option. +- `Builder` interface: `step.go:24–26` (`AddToWorkflow() map[Steper]*StepConfig`). +- `Step[S]` constructor: `step.go:73`. `AddStep[S]` builder methods at 132–413. +- `Traverse(s, f, walked...)`: `wrap.go:92`. Pre-order, no workflow-boundary stop — caller responsibility. +- `Name(step, name)`: `name.go:13`. Returns `Builder` wrapping `*NamedStep`. **Note arg order: step first, name second.** `NamedStep.Unwrap() Steper` at name.go:51. +- `flow.As[T]`: `wrap.go:135–144` (unbounded; not what we want for mutator dispatch). +- `build_step.go`: full file 1–38, `StepBuilder.BuildStep` method at line 17. + +--- + +## Task 1: Create `Mutator` interface and constructor with TDD + +**Files:** +- Create: `mutator.go` +- Create: `mutator_test.go` + +### - [ ] Step 1.1: Write failing test for `Mutate[T]` matching exact concrete type + +Append to new file `mutator_test.go`: + +```go +package flow + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +type mutFoo struct{ Field string } + +func (*mutFoo) Do(context.Context) error { return nil } + +type mutBar struct{} + +func (*mutBar) Do(context.Context) error { return nil } + +func TestMutate_matchesExactType(t *testing.T) { + called := 0 + m := Mutate[*mutFoo](func(ctx context.Context, f *mutFoo) Builder { + called++ + return nil + }) + matched, target, b := m.applyTo(context.Background(), &mutFoo{}) + assert.True(t, matched) + assert.NotNil(t, target) + assert.Nil(t, b) + assert.Equal(t, 1, called) +} + +func TestMutate_skipsNonMatchingType(t *testing.T) { + called := 0 + m := Mutate[*mutFoo](func(ctx context.Context, f *mutFoo) Builder { + called++ + return nil + }) + matched, target, b := m.applyTo(context.Background(), &mutBar{}) + assert.False(t, matched) + assert.Nil(t, target) + assert.Nil(t, b) + assert.Equal(t, 0, called) +} +``` + +### - [ ] Step 1.2: Run test, expect compile failure + +Run: `cd /home/xingfeixu/repo/go-workflow/.claude/worktrees/redesign-build-step && go test ./... -run TestMutate -count=1` + +Expected: compile error — `undefined: Mutate`, `undefined: applyTo`. + +### - [ ] Step 1.3: Implement `mutator.go` to satisfy the test + +Create `mutator.go`: + +```go +package flow + +import "context" + +// Mutator represents a type-dispatched, once-per-step contribution of +// configuration to a Step. The interface has a single unexported method, so +// the only producer is the generic constructor [Mutate]. +type Mutator interface { + applyTo(ctx context.Context, step Steper) (matched bool, target Steper, builder Builder) +} + +// MutatorReceiver is implemented by Steps that host a sub-workflow (e.g. +// [SubWorkflow] or *Workflow used as a step) so that parent workflows can +// propagate their [Mutator]s into the inner workflow before it schedules its +// own steps. +type MutatorReceiver interface { + PrependMutators(mw []Mutator) +} + +// Mutate constructs a [Mutator] that runs against any step whose concrete type +// matches T anywhere along its Unwrap() chain (within a single workflow's +// boundaries). The first matching layer is passed to fn. fn returns a [Builder] +// whose configuration for the matched step is merged into the step's +// StepConfig at first scheduling. Returning a nil Builder is valid (useful +// when fn only mutates fields on the typed step pointer). +func Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator { + return mutatorFunc[T](fn) +} + +type mutatorFunc[T Steper] func(ctx context.Context, step T) Builder + +func (m mutatorFunc[T]) applyTo(ctx context.Context, step Steper) (bool, Steper, Builder) { + var ( + matched bool + typed T + match Steper + ) + Traverse(step, func(s Steper, _ []Steper) TraverseDecision { + if v, ok := s.(T); ok { + typed = v + match = s + matched = true + return TraverseStop + } + // Stop at workflow boundaries: do NOT descend into a nested workflow's + // inner steps from here. Inner steps are reached via PrependMutators. + if _, isWorkflow := s.(interface { + StateOf(Steper) *State + }); isWorkflow && s != step { + return TraverseEndBranch + } + return TraverseContinue + }) + if !matched { + return false, nil, nil + } + return true, match, m(ctx, typed) +} +``` + +### - [ ] Step 1.4: Run test, expect pass + +Run: `cd /home/xingfeixu/repo/go-workflow/.claude/worktrees/redesign-build-step && go test ./... -run TestMutate -count=1` + +Expected: PASS. + +### - [ ] Step 1.5: Commit + +```bash +cd /home/xingfeixu/repo/go-workflow/.claude/worktrees/redesign-build-step +git add mutator.go mutator_test.go +git commit -m "feat(mutator): add Mutator interface and Mutate[T] constructor" +``` + +--- + +## Task 2: Unwrap-chain dispatch tests + +**Files:** +- Test: `mutator_test.go` + +### - [ ] Step 2.1: Write failing tests for unwrap walk and outer-wins + +Append to `mutator_test.go`: + +```go +type mutWrapper struct{ inner Steper } + +func (w *mutWrapper) Do(context.Context) error { return nil } +func (w *mutWrapper) Unwrap() Steper { return w.inner } + +func TestMutate_matchesInnerViaUnwrap(t *testing.T) { + inner := &mutFoo{Field: "before"} + wrapper := &mutWrapper{inner: inner} + + called := 0 + m := Mutate[*mutFoo](func(ctx context.Context, f *mutFoo) Builder { + called++ + assert.Same(t, inner, f, "should receive the inner *mutFoo, not the wrapper") + return nil + }) + matched, target, _ := m.applyTo(context.Background(), wrapper) + assert.True(t, matched) + assert.Same(t, inner, target) + assert.Equal(t, 1, called) +} + +func TestMutate_outerWrapperWinsWhenItIsTheTarget(t *testing.T) { + inner := &mutFoo{} + wrapper := &mutWrapper{inner: inner} + + called := 0 + m := Mutate[*mutWrapper](func(ctx context.Context, w *mutWrapper) Builder { + called++ + assert.Same(t, wrapper, w) + return nil + }) + matched, _, _ := m.applyTo(context.Background(), wrapper) + assert.True(t, matched) + assert.Equal(t, 1, called) +} +``` + +### - [ ] Step 2.2: Run, expect pass (Task 1 implementation already covers Unwrap) + +Run: `go test ./... -run TestMutate -count=1` + +Expected: PASS for both new tests. + +### - [ ] Step 2.3: Write failing test that mutator does NOT cross workflow boundary + +Append to `mutator_test.go`: + +```go +func TestMutate_doesNotCrossWorkflowBoundary(t *testing.T) { + // A *Workflow sits between the outer step and the inner *mutFoo. + // applyTo must NOT descend into it; that's what PrependMutators is for. + innerFoo := &mutFoo{} + innerWf := new(Workflow).Add(Step(innerFoo)) + + m := Mutate[*mutFoo](func(ctx context.Context, f *mutFoo) Builder { + t.Fatalf("mutator must not descend into nested workflow") + return nil + }) + matched, _, _ := m.applyTo(context.Background(), innerWf) + assert.False(t, matched) +} +``` + +### - [ ] Step 2.4: Run, expect pass + +Run: `go test ./... -run TestMutate_doesNotCrossWorkflowBoundary -count=1` + +Expected: PASS (the boundary check in `applyTo` already guards this — `*Workflow` implements `StateOf(Steper) *State`). + +### - [ ] Step 2.5: Commit + +```bash +git add mutator_test.go +git commit -m "test(mutator): cover unwrap traversal and workflow boundary stop" +``` + +--- + +## Task 3: Add `MutatorsApplied` flag to `State` + +**Files:** +- Modify: `state.go:12–16` (struct), append accessors after line 27 + +### - [ ] Step 3.1: Write failing test for new flag + +Append to `mutator_test.go`: + +```go +func TestState_MutatorsAppliedDefault(t *testing.T) { + s := &State{} + assert.False(t, s.MutatorsApplied()) +} + +func TestState_SetMutatorsApplied(t *testing.T) { + s := &State{} + s.SetMutatorsApplied() + assert.True(t, s.MutatorsApplied()) +} +``` + +### - [ ] Step 3.2: Run, expect compile failure + +Run: `go test ./... -run TestState_Mutators -count=1` + +Expected: `undefined: (*State).MutatorsApplied`. + +### - [ ] Step 3.3: Modify `state.go` + +Replace lines 12–16: + +```go +type State struct { + StepResult + Config *StepConfig + mutatorsApplied bool + sync.RWMutex +} +``` + +After line 27 (the existing `SetStatus` method), append: + +```go +// MutatorsApplied reports whether the workflow has already merged Mutator +// contributions into this Step's Config. +func (s *State) MutatorsApplied() bool { + s.RLock() + defer s.RUnlock() + return s.mutatorsApplied +} + +// SetMutatorsApplied marks Mutator contributions as merged for this Step. +// The runtime calls this exactly once per step, just before the first attempt. +func (s *State) SetMutatorsApplied() { + s.Lock() + defer s.Unlock() + s.mutatorsApplied = true +} +``` + +### - [ ] Step 3.4: Run, expect pass + +Run: `go test ./... -run TestState_Mutators -count=1` + +Expected: PASS. + +### - [ ] Step 3.5: Commit + +```bash +git add state.go mutator_test.go +git commit -m "feat(state): add MutatorsApplied per-step flag" +``` + +--- + +## Task 4: Add `Mutators` field to `Workflow` and `PrependMutators` methods + +**Files:** +- Modify: `workflow.go:40–55` (struct), `workflow.go:551–558` (SubWorkflow) + +### - [ ] Step 4.1: Write failing tests for `PrependMutators` + +Create `workflow_mutator_test.go`: + +```go +package flow_test + +import ( + "context" + "testing" + + flow "github.com/Azure/go-workflow" + "github.com/stretchr/testify/assert" +) + +type wfFoo struct{} + +func (*wfFoo) Do(context.Context) error { return nil } + +func TestWorkflow_PrependMutators(t *testing.T) { + m1 := flow.Mutate[*wfFoo](func(ctx context.Context, f *wfFoo) flow.Builder { return nil }) + m2 := flow.Mutate[*wfFoo](func(ctx context.Context, f *wfFoo) flow.Builder { return nil }) + + w := &flow.Workflow{Mutators: []flow.Mutator{m2}} + w.PrependMutators([]flow.Mutator{m1}) + + assert.Len(t, w.Mutators, 2) + // m1 should now be first (prepended), m2 second + // We can't compare functions directly; we infer order via in-test sentinels + // in later integration tests. Here we just assert the slice grew. +} + +func TestWorkflow_PrependMutatorsNilOrEmpty(t *testing.T) { + w := &flow.Workflow{} + w.PrependMutators(nil) + assert.Empty(t, w.Mutators) + w.PrependMutators([]flow.Mutator{}) + assert.Empty(t, w.Mutators) +} + +func TestSubWorkflow_PrependMutators(t *testing.T) { + type sub struct{ flow.SubWorkflow } + s := &sub{} + m := flow.Mutate[*wfFoo](func(ctx context.Context, f *wfFoo) flow.Builder { return nil }) + + // MutatorReceiver must be implemented + var _ flow.MutatorReceiver = s + s.PrependMutators([]flow.Mutator{m}) + // No panic / no error → behaviour verified by integration test in Task 7 +} +``` + +### - [ ] Step 4.2: Run, expect compile failure + +Run: `go test ./... -run TestWorkflow_PrependMutators -count=1` + +Expected: `undefined: Mutators field`, `undefined: PrependMutators`. + +### - [ ] Step 4.3: Modify `Workflow` struct in `workflow.go:40–55` + +Add `Mutators` field after `DefaultOption`: + +```go +type Workflow struct { + MaxConcurrency int + DontPanic bool + SkipAsError bool + Clock clock.Clock + DefaultOption *StepOption + + // Mutators are evaluated against every step in this workflow before that + // step's first attempt. See [Mutator] / [Mutate]. + Mutators []Mutator + + StepBuilder + + steps map[Steper]*State + + statusChange *sync.Cond + leaseBucket chan struct{} + waitGroup sync.WaitGroup + isRunning sync.Mutex +} +``` + +### - [ ] Step 4.4: Add `(*Workflow).PrependMutators` + +Append to `workflow.go` (place near `Add`, around line 75): + +```go +// PrependMutators inserts mw at the front of w.Mutators, preserving the +// invariant that propagated parent Mutators run before locally-registered +// child Mutators. Safe to call multiple times; the once-per-step flag on +// State prevents double application. +func (w *Workflow) PrependMutators(mw []Mutator) { + if len(mw) == 0 { + return + } + w.Mutators = append(append([]Mutator{}, mw...), w.Mutators...) +} +``` + +### - [ ] Step 4.5: Add `(*SubWorkflow).PrependMutators` + +Append after the existing SubWorkflow methods near `workflow.go:558`: + +```go +// PrependMutators forwards mw to the inner workflow. Implements [MutatorReceiver] +// so parent workflows can propagate Mutators into a sub-workflow before its +// scheduling pass begins. +func (s *SubWorkflow) PrependMutators(mw []Mutator) { + s.w.PrependMutators(mw) +} +``` + +### - [ ] Step 4.6: Run, expect pass + +Run: `go test ./... -run "TestWorkflow_PrependMutators|TestSubWorkflow_PrependMutators" -count=1` + +Expected: PASS. + +### - [ ] Step 4.7: Commit + +```bash +git add workflow.go workflow_mutator_test.go +git commit -m "feat(workflow): add Mutators field and PrependMutators on Workflow/SubWorkflow" +``` + +--- + +## Task 5: Splice mutator-merge into `tick` (the core integration) + +**Files:** +- Modify: `workflow.go:365–441` (insert new helper + call site) + +### - [ ] Step 5.1: Write failing integration test — Mutator runs once and merges Input + +Append to `workflow_mutator_test.go`: + +```go +type wfGreet struct { + Greeting string + Who string +} + +func (g *wfGreet) Do(context.Context) error { return nil } + +func TestMutator_mergesInputBeforeFirstAttempt(t *testing.T) { + called := 0 + g := &wfGreet{Greeting: "Hi"} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + called++ + return flow.Step(gg).Input(func(_ context.Context, gg *wfGreet) error { + gg.Who = "world" + return nil + }) + }), + }, + } + w.Add(flow.Step(g)) + + err := w.Do(context.Background()) + assert.NoError(t, err) + assert.Equal(t, 1, called, "mutator must run exactly once") + assert.Equal(t, "world", g.Who, "mutator-contributed Input must run before Do") +} +``` + +### - [ ] Step 5.2: Run, expect failure (Mutator not yet wired into tick) + +Run: `go test ./... -run TestMutator_mergesInputBeforeFirstAttempt -count=1` + +Expected: FAIL — `g.Who` is "" because the Mutator is registered but never invoked. + +### - [ ] Step 5.3: Add helper `applyMutators` to `workflow.go` + +Append to `workflow.go` (place near `addStep` ~line 117 or in a logical helper section): + +```go +// applyMutators runs each Mutator in w.Mutators against step. For every match, +// the returned Builder's config keyed on the matched layer is merged into the +// state of step (the wrapper key in this workflow). Called once per step, +// just before its first attempt. +func (w *Workflow) applyMutators(ctx context.Context, step Steper, state *State) { + if len(w.Mutators) == 0 { + return + } + for _, m := range w.Mutators { + matched, target, b := m.applyTo(ctx, step) + if !matched || b == nil { + continue + } + for s, cfg := range b.AddToWorkflow() { + if s == target { + state.MergeConfig(cfg) + } + // configs keyed on other steps are silently dropped — Mutator + // scope is the matched layer only. + } + } +} +``` + +### - [ ] Step 5.4: Splice the call into `tick` between line 379 and 380 + +Edit `workflow.go:tick` — locate this block (currently lines 376–380): + +```go + // we only process Steps whose all upstreams are terminated + ups := w.UpstreamOf(step) + if isAnyUpstreamNotTerminated(ups) { + continue + } + option := state.Option() +``` + +Replace with: + +```go + // we only process Steps whose all upstreams are terminated + ups := w.UpstreamOf(step) + if isAnyUpstreamNotTerminated(ups) { + continue + } + // Apply Mutators exactly once per step, before reading Option / + // evaluating Condition / starting the first attempt. This way the + // Option/Before/After contributions from Mutators are visible to the + // rest of this iteration. + if !state.MutatorsApplied() { + w.applyMutators(ctx, step, state) + state.SetMutatorsApplied() + } + option := state.Option() +``` + +### - [ ] Step 5.5: Run, expect pass + +Run: `go test ./... -run TestMutator_mergesInputBeforeFirstAttempt -count=1` + +Expected: PASS — `g.Who == "world"`, `called == 1`. + +### - [ ] Step 5.6: Run full test suite to confirm no regression + +Run: `go test ./... -count=1` + +Expected: all pre-existing tests still pass; new ones pass. + +### - [ ] Step 5.7: Commit + +```bash +git add workflow.go workflow_mutator_test.go +git commit -m "feat(workflow): apply Mutators once per step before first attempt" +``` + +--- + +## Task 6: Once-per-step across retries; nil/empty Mutators is a no-op + +**Files:** +- Test: `workflow_mutator_test.go` + +### - [ ] Step 6.1: Write test — Mutator runs once across multiple retry attempts + +Append to `workflow_mutator_test.go`: + +```go +import "errors" + +type wfFlaky struct { + attempts int + failTill int +} + +func (f *wfFlaky) Do(context.Context) error { + f.attempts++ + if f.attempts <= f.failTill { + return errors.New("transient") + } + return nil +} + +func TestMutator_runsExactlyOnceAcrossRetries(t *testing.T) { + mutatorCalls := 0 + inputCalls := 0 + f := &wfFlaky{failTill: 2} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfFlaky](func(ctx context.Context, ff *wfFlaky) flow.Builder { + mutatorCalls++ + return flow.Step(ff). + Retry(func(o *flow.RetryOption) { o.MaxAttempts = 3 }). + Input(func(_ context.Context, _ *wfFlaky) error { + inputCalls++ + return nil + }) + }), + }, + } + w.Add(flow.Step(f)) + + err := w.Do(context.Background()) + assert.NoError(t, err) + assert.Equal(t, 1, mutatorCalls, "mutator user-fn runs once") + assert.Equal(t, 3, inputCalls, "mutator-contributed Input runs per attempt") + assert.Equal(t, 3, f.attempts) +} +``` + +NOTE: Adjust the import group at the top of the file to add `"errors"` if not already present. + +### - [ ] Step 6.2: Write test — nil Mutators slice is a no-op + +```go +func TestMutator_nilSliceIsNoOp(t *testing.T) { + g := &wfGreet{Greeting: "Hi", Who: "Bob"} + w := &flow.Workflow{} // Mutators == nil + w.Add(flow.Step(g)) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, "Bob", g.Who) +} +``` + +### - [ ] Step 6.3: Run, expect pass + +Run: `go test ./... -run "TestMutator_runsExactlyOnceAcrossRetries|TestMutator_nilSliceIsNoOp" -count=1` + +Expected: PASS. + +### - [ ] Step 6.4: Commit + +```bash +git add workflow_mutator_test.go +git commit -m "test(mutator): once-per-step across retries; nil Mutators no-op" +``` + +--- + +## Task 7: Sub-workflow propagation via `MutatorReceiver` + +**Files:** +- Modify: `workflow.go` — splice receiver invocation into `runStep` / `addStep`-time path +- Test: `workflow_mutator_test.go` + +### - [ ] Step 7.1: Write failing test — parent Mutator reaches into SubWorkflow + +```go +type wfComposite struct { + flow.SubWorkflow + Inner wfGreet +} + +func (c *wfComposite) Do(ctx context.Context) error { + // Lazy build inside Do — replaces BuildStep pattern. + c.Add(flow.Step(&c.Inner)) + return c.SubWorkflow.Do(ctx) +} + +func TestMutator_reachesIntoSubWorkflow(t *testing.T) { + c := &wfComposite{Inner: wfGreet{Greeting: "Hello"}} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, g *wfGreet) flow.Builder { + g.Who = "world" + return nil + }), + }, + } + w.Add(flow.Step(c)) + + err := w.Do(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "world", c.Inner.Who, "parent Mutator must reach inner step via PrependMutators") +} +``` + +### - [ ] Step 7.2: Run, expect failure + +Run: `go test ./... -run TestMutator_reachesIntoSubWorkflow -count=1` + +Expected: FAIL — `c.Inner.Who` empty because parent's Mutators were never propagated to the inner workflow. + +### - [ ] Step 7.3: Splice propagation into `tick` (immediately before invocation) + +Recall splice point established in Task 5. Extend the same block: BEFORE running `applyMutators`, also propagate to receivers. Edit `tick` in `workflow.go`: + +```go + if !state.MutatorsApplied() { + // Propagate this workflow's Mutators into nested workflows so + // they can apply against inner steps when those steps are + // scheduled by the inner workflow. + if recv, ok := step.(MutatorReceiver); ok && len(w.Mutators) > 0 { + recv.PrependMutators(w.Mutators) + } + w.applyMutators(ctx, step, state) + state.SetMutatorsApplied() + } +``` + +### - [ ] Step 7.4: Run, expect pass + +Run: `go test ./... -run TestMutator_reachesIntoSubWorkflow -count=1` + +Expected: PASS — `c.Inner.Who == "world"`. + +### - [ ] Step 7.5: Write test — parent Mutator reaches step inside nested `*Workflow` used as a step + +```go +func TestMutator_reachesIntoNestedWorkflow(t *testing.T) { + innerG := &wfGreet{Greeting: "Hi"} + innerW := new(flow.Workflow).Add(flow.Step(innerG)) + + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, g *wfGreet) flow.Builder { + g.Who = "world" + return nil + }), + }, + } + w.Add(flow.Step(innerW)) + + err := w.Do(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "world", innerG.Who) +} +``` + +### - [ ] Step 7.6: Run, expect pass (because `*Workflow` already implements MutatorReceiver from Task 4) + +Run: `go test ./... -run TestMutator_reachesIntoNestedWorkflow -count=1` + +Expected: PASS. + +### - [ ] Step 7.7: Write test — lazily-added inner step is reached + +```go +func TestMutator_reachesLazilyAddedInnerStep(t *testing.T) { + // wfComposite already adds &c.Inner inside Do() — verify this works. + // (Same as TestMutator_reachesIntoSubWorkflow but explicit about laziness.) + c := &wfComposite{Inner: wfGreet{Greeting: "Yo"}} + called := 0 + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, g *wfGreet) flow.Builder { + called++ + g.Who = "lazy" + return nil + }), + }, + } + w.Add(flow.Step(c)) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, 1, called) + assert.Equal(t, "lazy", c.Inner.Who) +} +``` + +Run: `go test ./... -run TestMutator_reachesLazilyAddedInnerStep -count=1` + +Expected: PASS. + +### - [ ] Step 7.8: Commit + +```bash +git add workflow.go workflow_mutator_test.go +git commit -m "feat(workflow): propagate Mutators into nested workflows via MutatorReceiver" +``` + +--- + +## Task 8: Ordering — plan-declared Input runs before Mutator-contributed Input + +**Files:** +- Test: `workflow_mutator_test.go` + +### - [ ] Step 8.1: Write test + +```go +func TestMutator_planInputBeforeMutatorInput(t *testing.T) { + g := &wfGreet{Greeting: "Hi"} + order := []string{} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + return flow.Step(gg).Input(func(_ context.Context, _ *wfGreet) error { + order = append(order, "mutator") + return nil + }) + }), + }, + } + w.Add( + flow.Step(g).Input(func(_ context.Context, _ *wfGreet) error { + order = append(order, "plan") + return nil + }), + ) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, []string{"plan", "mutator"}, order) +} + +func TestMutator_multipleMutatorsRunInSliceOrder(t *testing.T) { + g := &wfGreet{} + order := []string{} + mk := func(name string) flow.Mutator { + return flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + return flow.Step(gg).Input(func(_ context.Context, _ *wfGreet) error { + order = append(order, name) + return nil + }) + }) + } + w := &flow.Workflow{Mutators: []flow.Mutator{mk("m1"), mk("m2")}} + w.Add(flow.Step(g)) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, []string{"m1", "m2"}, order) +} +``` + +### - [ ] Step 8.2: Run, expect pass (Merge appends, so order is plan→m1→m2 by construction) + +Run: `go test ./... -run "TestMutator_planInputBeforeMutatorInput|TestMutator_multipleMutatorsRunInSliceOrder" -count=1` + +Expected: PASS. + +### - [ ] Step 8.3: Commit + +```bash +git add workflow_mutator_test.go +git commit -m "test(mutator): ordering — plan first, mutators in slice order" +``` + +--- + +## Task 9: Ctx propagation, scope, panic recovery + +**Files:** +- Test: `workflow_mutator_test.go` + +### - [ ] Step 9.1: Write test — Mutator receives workflow-scoped ctx + +```go +type ctxKey string + +const wfCtxKey ctxKey = "k" + +func TestMutator_receivesWorkflowCtx(t *testing.T) { + g := &wfGreet{} + got := "" + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + if v, ok := ctx.Value(wfCtxKey).(string); ok { + got = v + } + return nil + }), + }, + } + w.Add(flow.Step(g)) + ctx := context.WithValue(context.Background(), wfCtxKey, "value-from-do") + assert.NoError(t, w.Do(ctx)) + assert.Equal(t, "value-from-do", got) +} +``` + +### - [ ] Step 9.2: Write test — Builder config for unrelated step is silently ignored + +```go +func TestMutator_unrelatedBuilderEntryIgnored(t *testing.T) { + g := &wfGreet{Greeting: "Hi", Who: "Bob"} + other := &wfGreet{Who: "untouched"} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + if gg == g { + // Mistakenly return a Builder keyed on `other` instead of `gg`. + return flow.Step(other).Input(func(_ context.Context, o *wfGreet) error { + o.Who = "stolen" + return nil + }) + } + return nil + }), + }, + } + w.Add(flow.Step(g)) // only g is in the workflow + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, "untouched", other.Who, "config keyed on a different step is dropped") +} +``` + +### - [ ] Step 9.3: Write test — Mutator panic is caught when DontPanic=true + +```go +func TestMutator_panicCaughtWhenDontPanic(t *testing.T) { + g := &wfGreet{} + w := &flow.Workflow{ + DontPanic: true, + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + panic("boom") + }), + }, + } + w.Add(flow.Step(g)) + err := w.Do(context.Background()) + assert.Error(t, err) +} +``` + +NOTE: this test may require wrapping `applyMutators` in `defer recover()` if `DontPanic` is true. If the test fails because the panic propagates out of `Do`, add the recovery logic to `applyMutators`. Look at how `runStep` handles `DontPanic` (`workflow.go` around `runStep`) for the canonical pattern to mirror. + +### - [ ] Step 9.4: Run all three, expect pass; if 9.3 fails, add panic recovery to applyMutators + +Run: `go test ./... -run "TestMutator_receivesWorkflowCtx|TestMutator_unrelatedBuilderEntryIgnored|TestMutator_panicCaughtWhenDontPanic" -count=1` + +If 9.3 fails: edit `applyMutators` in `workflow.go` to wrap each iteration: + +```go +func (w *Workflow) applyMutators(ctx context.Context, step Steper, state *State) { + if len(w.Mutators) == 0 { + return + } + for _, m := range w.Mutators { + func() { + if w.DontPanic { + defer func() { + if r := recover(); r != nil { + state.SetError(fmt.Errorf("mutator panic: %v", r)) + } + }() + } + matched, target, b := m.applyTo(ctx, step) + if !matched || b == nil { + return + } + for s, cfg := range b.AddToWorkflow() { + if s == target { + state.MergeConfig(cfg) + } + } + }() + } +} +``` + +(Add `"fmt"` to the imports of `workflow.go` if not already present — it likely is.) + +Re-run: `go test ./... -run TestMutator_panicCaughtWhenDontPanic -count=1` + +Expected: PASS. + +### - [ ] Step 9.5: Commit + +```bash +git add workflow.go workflow_mutator_test.go +git commit -m "test(mutator): ctx, scope-isolation, and DontPanic-protected panic recovery" +``` + +--- + +## Task 10: Merge does not happen at Add time + +**Files:** +- Test: `workflow_mutator_test.go` + +### - [ ] Step 10.1: Write test + +```go +func TestMutator_mergeAtFirstScheduling_NotAtAdd(t *testing.T) { + g := &wfGreet{} + called := 0 + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + called++ + return nil + }), + }, + } + w.Add(flow.Step(g)) + assert.Equal(t, 0, called, "Add must not invoke mutators") + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, 1, called) +} +``` + +### - [ ] Step 10.2: Run, expect pass + +Run: `go test ./... -run TestMutator_mergeAtFirstScheduling -count=1` + +Expected: PASS. + +### - [ ] Step 10.3: Commit + +```bash +git add workflow_mutator_test.go +git commit -m "test(mutator): merge happens at first schedule, not at Add" +``` + +--- + +## Task 11: Unwrap-via-Name end-to-end test + +**Files:** +- Test: `workflow_mutator_test.go` + +### - [ ] Step 11.1: Write test + +```go +func TestMutator_matchesThroughNameWrapper(t *testing.T) { + g := &wfGreet{Greeting: "Hi"} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + gg.Who = "world" + return nil + }), + }, + } + w.Add(flow.Name(g, "named-greet")) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, "world", g.Who) +} +``` + +### - [ ] Step 11.2: Run, expect pass + +Run: `go test ./... -run TestMutator_matchesThroughNameWrapper -count=1` + +Expected: PASS — Mutator dispatches through `*NamedStep`'s `Unwrap()` to find `*wfGreet`. + +### - [ ] Step 11.3: Commit + +```bash +git add workflow_mutator_test.go +git commit -m "test(mutator): match through Name wrapper via Unwrap" +``` + +--- + +## Task 12: Deprecation godoc + +**Files:** +- Modify: `build_step.go:17` +- Modify: `workflow.go` — `SubWorkflow.Reset` method + +### - [ ] Step 12.1: Add `// Deprecated:` to `StepBuilder.BuildStep` + +Edit `build_step.go`. Locate the doc comment immediately above `func (sb *StepBuilder) BuildStep(s Steper)` (currently at line 17). Replace any existing comment (or add if absent) with: + +```go +// BuildStep walks s and invokes any nested Step's BuildStep() method (and +// Reset() if implemented) to allow lazy initialization of composite steps. +// +// Deprecated: this lazy-initialization hook will be removed in the next major +// version of go-workflow. Use [Mutate] for cross-cutting modification, and +// construct sub-workflows inside Do() instead. See +// openspec/changes/2026-05-06-step-mutator/design.md. +func (sb *StepBuilder) BuildStep(s Steper) { +``` + +### - [ ] Step 12.2: Add `// Deprecated:` to `SubWorkflow.Reset` + +Locate `func (s *SubWorkflow) Reset()` in `workflow.go`. Add immediately above: + +```go +// Reset clears the inner workflow's state, allowing it to be re-built. +// +// Deprecated: Reset is only invoked by the deprecated [StepBuilder.BuildStep] +// path. With the [Mutator] mechanism (see [Mutate]) and Do()-time sub-workflow +// construction, Reset is no longer needed and will be removed in the next +// major version of go-workflow. +func (s *SubWorkflow) Reset() { +``` + +### - [ ] Step 12.3: Verify build still passes + +Run: `go build ./...` + +Expected: no errors. + +### - [ ] Step 12.4: Commit + +```bash +git add build_step.go workflow.go +git commit -m "chore(deprecation): mark BuildStep and SubWorkflow.Reset deprecated" +``` + +--- + +## Task 13: Update example file + +**Files:** +- Modify: `example/15_mutator_test.go` (drop build tag, fix `flow.Name` arg order) +- Modify: `example/13_composite_step_test.go` (mark `ExampleCompositeViaWorkflow` deprecated) + +### - [ ] Step 13.1: Drop `//go:build mutator_design` tag from `example/15_mutator_test.go` + +Open `example/15_mutator_test.go`. Delete the first 12 lines (everything from `//go:build mutator_design` through the closing `// shape of the Mutator API before any implementation is written.` comment block, **stopping before `package flow_test`**). The file should now begin: + +```go +package flow_test + +import ( +``` + +### - [ ] Step 13.2: Fix `flow.Name` argument order in `ExampleMutate_unwrap` + +In `example/15_mutator_test.go`, locate `ExampleMutate_unwrap`. Change: + +```go + w.Add(flow.Step(flow.Name("named-greet", greet))) +``` + +to: + +```go + w.Add(flow.Name(greet, "named-greet")) +``` + +(`flow.Name(step, name)` — and it already returns a `Builder`, no need to wrap with `flow.Step`.) + +### - [ ] Step 13.3: Run all examples to verify Output strings match + +Run: `cd /home/xingfeixu/repo/go-workflow/.claude/worktrees/redesign-build-step && go test ./example/... -run "Example" -v -count=1` + +Expected: all `ExampleMutate_*` PASS. If any FAIL with "got vs want" mismatch, adjust the `// Output:` comment to match observed output exactly. (Common gotcha: the `Hi (plan) (m1) (m2), world!` example in `ExampleMutate_multipleInOrder` depends on the Greet template — re-read the actual `Do()` implementation in the example file and match.) + +### - [ ] Step 13.4: Mark legacy composite example deprecated + +Open `example/13_composite_step_test.go`. Above `func ExampleCompositeViaWorkflow()`, add: + +```go +// ExampleCompositeViaWorkflow demonstrates the legacy BuildStep-based +// composite step pattern. +// +// Deprecated: BuildStep is being removed in the next major version. See +// ExampleMutate_subWorkflow in 15_mutator_test.go for the new pattern that +// constructs the inner workflow inside Do() and uses flow.Mutate for +// cross-cutting modification. +func ExampleCompositeViaWorkflow() { +``` + +### - [ ] Step 13.5: Commit + +```bash +git add example/15_mutator_test.go example/13_composite_step_test.go +git commit -m "docs(example): activate Mutator examples; deprecate BuildStep example" +``` + +--- + +## Task 14: Tick off completed items in `tasks.md` + +**Files:** +- Modify: `openspec/changes/2026-05-06-step-mutator/tasks.md` + +### - [ ] Step 14.1: Mark completed items + +Open `openspec/changes/2026-05-06-step-mutator/tasks.md`. Change `- [ ]` to `- [x]` for every item now satisfied by the implementation. Cross-reference each requirement: + +- 1.1–1.4 (Mutator types): satisfied by Task 1 +- 2.1–2.4 (Workflow field, State flag, PrependMutators): Task 3 + Task 4 +- 3.1–3.4 (First-schedule merge, set MutatorsApplied, propagation): Task 5 + Task 7 +- 4.1–4.3 (Deprecation godoc): Task 12 +- 5.1–5.5 (Examples): Task 13 (note: 5.4 retry-override and 5.5 sub-workflow are covered by `ExampleMutate_retryOverride` and `ExampleMutate_subWorkflow` already in the file) +- 7.1–7.21 (Tests): Tasks 1, 2, 5, 6, 7, 8, 9, 10, 11 + +Items NOT covered by this plan (intentional — they belong to the openspec `archive` step or to the follow-up migration change): +- 6.1–6.3 (Spec move on archive) — done by `openspec archive`, not by code +- 8.1–8.4 (Verify) — covered by Task 15 below + +### - [ ] Step 14.2: Commit + +```bash +git add openspec/changes/2026-05-06-step-mutator/tasks.md +git commit -m "docs(openspec): tick off completed step-mutator tasks" +``` + +--- + +## Task 15: Final verification + +### - [ ] Step 15.1: Run `go build ./...` + +Run: `cd /home/xingfeixu/repo/go-workflow/.claude/worktrees/redesign-build-step && go build ./...` + +Expected: no errors. + +### - [ ] Step 15.2: Run `go vet ./...` + +Run: `go vet ./...` + +Expected: no warnings. + +### - [ ] Step 15.3: Run full test suite + +Run: `go test ./... -count=1` + +Expected: all tests pass, including all `ExampleMutate_*` examples. + +### - [ ] Step 15.4: Re-validate openspec change + +Run: `openspec validate 2026-05-06-step-mutator --strict` + +Expected: `Change '2026-05-06-step-mutator' is valid` + +### - [ ] Step 15.5: Final commit (if anything trivial remains; otherwise skip) + +If steps 15.1–15.4 all passed without further edits, skip. Otherwise commit any small fixes: + +```bash +git add -A +git commit -m "chore: post-implementation cleanup" +``` + +--- + +## Self-Review Checklist + +**Spec coverage:** +- ✅ `Mutator interface and Mutate constructor` → Task 1 +- ✅ `Workflow.Mutators field` → Task 4 +- ✅ `Mutator merge timing and ordering` → Task 5, 8 +- ✅ `Mutator dispatch and merge destination` → Task 1 (Unwrap), Task 5 (rebase), Task 7 (no boundary cross) +- ✅ `Mutators run once per step instance` → Task 6 +- ✅ `Mutator-returned Builder scope` → Task 9 (unrelated entry ignored) +- ✅ `MutatorReceiver propagation interface` → Task 4, 7 +- ✅ `SubWorkflow and *Workflow implement MutatorReceiver` → Task 4 +- ✅ `Mutator vs Input/BeforeStep usage guidance` → covered by examples Task 13 + spec text +- ✅ `Config merge destination follows StateOf` → existing behaviour, leveraged by Mutator merge in Task 5 +- ✅ Composite-steps `BuildStep deprecated` → Task 12 +- ✅ Composite-steps `Sub-workflow construction inside Do` → demonstrated by Task 7 test + example +- ✅ Composite-steps `SubWorkflow / *Workflow implement MutatorReceiver` → Task 4 + +**Verification end-to-end:** +1. `go build ./...` — Task 15.1 +2. `go vet ./...` — Task 15.2 +3. `go test ./... -count=1` — Task 15.3 (covers unit tests in `mutator_test.go` + integration tests in `workflow_mutator_test.go` + examples) +4. `openspec validate 2026-05-06-step-mutator --strict` — Task 15.4 +5. **Manual review of `example/15_mutator_test.go`**: open the file and read top-to-bottom — these are the user-facing API demonstrations and must read as idiomatic Go. + +**Out of scope for this plan (deliberate):** +- Removing `BuildStep` / `StepBuilder` / `SubWorkflow.Reset()` — scheduled for next major version (see design.md §"Deprecation timeline") +- Migrating AKS e2ev3 production code from `BuildStep` to `Mutate` — separate proposal +- Collapsing `SubWorkflow` to alias `Workflow` — separate future proposal (design.md §"Future consideration") +- `openspec archive` of this change — done after PR merge, not by this plan diff --git a/docs/superpowers/specs/2026-05-06-step-mutator-design.md b/docs/superpowers/specs/2026-05-06-step-mutator-design.md new file mode 100644 index 0000000..fc40227 --- /dev/null +++ b/docs/superpowers/specs/2026-05-06-step-mutator-design.md @@ -0,0 +1,798 @@ +# Step Mutator Design + +**Date:** 2026-05-06 +**Status:** Draft +**Scope:** go-workflow type-dispatched step mutators to replace the `BuildStep` + `As[T]` pattern + +--- + +## Why + +Today, cross-cutting modifications of specific step types (e.g. "enable feature X on every +`*CreateManagedCluster` request, no matter where it appears in the workflow tree") are +implemented in production via two cooperating mechanisms: + +1. **`BuildStep`** — composite/sub-workflow steps must implement this hook so their inner + steps are materialized into the outer workflow tree at `Add` time. +2. **Mutators** (a higher-level AKS concept, `test.Mutator`) — ordinary code that walks the + tree with `flow.As[T](w)`, finds the targeted step instances, and attaches `Input` + callbacks to mutate them. + +This pair has serious problems: + +- `BuildStep`'s contract is fragile: it is run implicitly at `Add` time, can't return + errors, relies on a sentinel-by-method-signature trick to detect sub-workflows, and + silently calls a `Reset()` method by name. Production code (`DeploySwiftV2Pod`, + `swiftv1ponv6steps.go`, others) routinely calls `BuildStep` again manually inside `Do` + or inside `Input` callbacks, signaling that the framework's contract no longer matches + real usage. +- Mutators that find no matching step fail silently — the mutation is just skipped, often + producing wrong test behavior with no error. +- Mutators implemented via `As[T] + Input` can only modify step *fields*. To override + retry policy, timeout, or `When`, or to add new `Before`/`After` callbacks across all + matching steps, the workflow author has no clean tool — they end up with `Input` + callbacks that are misnamed for the work they do. +- The whole "Add-time materialize, then walk the tree" workflow exists only to support + the mutator system. Without it, `BuildStep` would not need to exist; sub-workflows + could be built lazily inside `Do`, like any other Go object. + +The proposal: introduce a **type-dispatched Mutator** mechanism in `go-workflow` that +directly expresses "for every `*T` step, configure it like this" by returning a +`flow.Builder` — the same builder users already use to declare per-step configuration in +their plans. With it, the AKS scenario-level mutator pattern compiles down to a single +`flow.Mutate` registration, `As[T]` traversal disappears for that purpose, and `BuildStep` +can be deprecated and eventually removed. + +This design also clarifies a non-goal: pre-execution dump / introspection of the full +expanded tree is not a use case we will support. No production user has asked for it; the +post-execution event stream from the Step Interceptor work covers the real +"what happened" need with strictly more accuracy. + +--- + +## Concepts + +### Mutator vs Interceptor + +These are deliberately separate mechanisms serving different purposes. They answer +different questions, and their names are chosen to reflect that. + +| Aspect | StepInterceptor / AttemptInterceptor | Mutator | +|--------|--------------------------------------|---------| +| Question answered | "What is happening to every step?" | "Configure every `*T` step like this." | +| Selector | every step | type assertion `s.(T)` | +| When invoked | per attempt (every retry too) | **per step instance, once** (the first time the step is scheduled) | +| Signature | `(ctx, info, next) → error` | `(ctx, *T) → flow.Builder` | +| Wraps execution? | yes — controls whether `next` runs | no — produces config, runtime merges it | +| Can short-circuit? | yes (skip `next`) | no | +| Can change ctx? | yes (substitutes ctx for `next`) | no — ctx is read-only input | +| Primary use | observability, tracing, metrics, events | scenario-level config: feature flags, retry/timeout/When override, cross-cutting Input/Output | +| Failure mode | wraps the step; `next` error propagates | no error return — failable work belongs in the returned Builder's `Input` | +| Industry analogue | gRPC / HTTP middleware (`(ctx, req, next) → resp`) | K8s MutatingAdmissionWebhook (`(obj) → mut(obj)`) | + +A Mutator is **not** an interceptor variant. It does not wrap step execution and it does +not run on every attempt. It runs once per step instance and produces additional +configuration that the workflow runtime merges into that step's `StepConfig`. + +#### Why not just a helper on top of Interceptor? + +The reader's instinct here is reasonable: an interceptor already receives the step via +`info.Step` and can call `next`. Could a Mutator just be a typed wrapper around a +`StepInterceptor`? + +```go +// hypothetical helper (NOT what we're building) +func MutateInterceptor[T Steper](fn func(context.Context, T) error) StepInterceptor { + return StepInterceptorFunc(func(ctx, info, next) error { + if s, ok := info.Step.(T); ok { + if err := fn(ctx, s); err != nil { return err } + } + return next(ctx) + }) +} +``` + +This helper exists in design space. It is rejected as a replacement for `Mutator` for six +distinct reasons. Each reason on its own may not justify a new mechanism; together they +do. + +1. **Timing — per-attempt vs once.** Interceptors run once per attempt; a step with 5 + interceptors retrying 3 times incurs 15 type assertions and 15 closures. Mutators run + **once per step**, then merge into `StepConfig`, after which the standard onion + executes against merged config without re-dispatching. Putting "configure every `*T`" + onto the interceptor mechanism silently moves a one-time setup cost onto every + attempt. + +2. **Expressive power — what each can change.** An interceptor wraps `next(ctx)`. It can + change ctx, observe errors, decide to skip the step. It cannot change the + `RetryOption` (the retry loop is *outside* AttemptInterceptors per the interceptor + spec stack), the `Condition` (consumed before scheduling), the `Timeout`, or attach a + `BeforeStep` that runs *inside* the same retry loop the interceptor is outside of. A + Mutator merges `StepConfig` before the first attempt, so all of these become + trivially overridable. The legacy `As[T] + Input` pattern was constrained to + "modify fields via per-attempt Input" for exactly this reason — every other + cross-cutting need had no clean tool. + +3. **Type safety — compile-time vs runtime.** With the interceptor helper, the type + parameter is a runtime assertion buried inside the closure. Misspelling the type does + not produce a compile error; it just silently never matches. This is precisely the + silent-no-match failure mode that bites AKS today (a mutator targeting a renamed step + type stops doing anything; tests still "pass"). `flow.Mutate[T](fn)` gives the user + `func(ctx, T) Builder` — the type is checked at the call site of `Mutate`, the + compiler enforces signature correctness, and the function body cannot type-mismatch. + +4. **Failure semantics — `next` control vs no `next`.** An interceptor that forgets + `return next(ctx)` silently skips the step. A Mutator has no `next`; "skip the + step" is not a state it can produce. The result is a smaller surface for user bugs: + a Mutator can either succeed (apply config) or fail (its returned Builder or its + merge is malformed) — it cannot accidentally short-circuit execution. + +5. **Mental model — observation vs configuration.** Interceptor's natural reading is "I + wrap each step's execution." Mutator's natural reading is "I declare what `*T` steps + look like at scenario time." Scenario authors who write `flow.Mutate[*CreateMC]` + communicate intent (cross-cutting customization) at a glance; readers who see + `StepInterceptorFunc` with a type assertion buried inside have to parse the closure + to learn what is going on. Two API names, two intents — for the reader's benefit, not + the implementer's. + +6. **Architectural prerequisite for retiring `BuildStep`.** This is the most important + and the most hidden. `BuildStep` exists only so that the mutator system can + `flow.As[T](w)` over a tree that includes lazily-constructed sub-workflows. A + per-attempt interceptor that type-asserts each step *also* meets that need + technically — but only for "change a field," because (per reason 2) interceptors + can't change `RetryOption` / `Condition` / etc. A Mutator that merges `StepConfig` + on first scheduling reaches sub-workflow steps at the right time **and** can change + all the things plan authors care about. Only with this combination is `BuildStep` + removable. A "helper on top of interceptor" cannot retire `BuildStep`; it can only + replace the field-modification subset of today's `As[T] + Input` pattern. + +A typed helper around an interceptor remains useful for **observability** scenarios +("trace every `*HTTPCall`"). If we add one later, it would be a tool for +type-narrowed observability, not a replacement for `Mutator`. The two mechanisms answer +different questions; they share signature shape only by coincidence, and merging them +would force one of them to be wrong for its own job. + +### Why per-step (once), not per-attempt + +Earlier drafts of this design ran the Mutator function on every attempt. We rejected that +for three reasons: + +1. **Real usage is static configuration.** AKS production mutators + (`mutate/customize_managed_cluster.go`, etc.) all set static fields or attach + permanent callbacks. None of them depends on per-attempt state. Per-attempt evaluation + would force every Mutator author to think about idempotency unnecessarily. +2. **Per-attempt with config-mutating power is a footgun.** If the Mutator returns a + builder that adds a `BeforeStep` callback and runs every attempt, the step's + `StepConfig.Before` slice grows by one entry per retry — silently. No production + pattern wants that. +3. **The right tool for per-attempt logic already exists.** A Mutator can attach an + `Input` or `BeforeStep` callback (they are themselves per-attempt). So a per-step + Mutator that returns `Input(fn)` gives a per-attempt effect through composition — + without giving up the once-and-done semantics for everything else. + +If a future use case genuinely needs a per-attempt callback registered cross-cuttingly, +the right model is for the Mutator to add a `BeforeStep` (already per-attempt). No +separate per-attempt Mutator API is needed. + +### Why the Mutator returns `flow.Builder` + +The framework already has a fluent builder for declaring step configuration: + +```go +flow.Step(s). + Input(...). + BeforeStep(...). + AfterStep(...). + Output(...). + DependsOn(...). + Retry(...). // via WithOption + Timeout(...). + When(...) +``` + +A `flow.Builder` is the unified return type of all these helpers (its `AddToWorkflow()` +method returns `map[Steper]*StepConfig`). By having a Mutator return `flow.Builder`, the +Mutator author writes: + +```go +flow.Mutate[*CreateMC](func(ctx context.Context, c *CreateMC) flow.Builder { + return flow.Step(c). + Input(func(ctx, c *CreateMC) error { EnableFeatureX(c.Request); return nil }). + Retry(func(o *flow.RetryOption) { o.MaxAttempts = 5 }) +}) +``` + +This is **the same vocabulary plan authors already use**. There is no second API to +learn, no `*StepConfig` poking, no special way to express "add a Before callback". The +runtime simply merges the returned `Builder`'s `StepConfig` into the step's existing +config (using the same merge logic as repeat `Add` calls — see the `Idempotent Add with +config merging` requirement in `step-configuration`). + +### Naming alignment with existing AKS code + +In AKS e2ev3, `test.Mutator` already exists as a scenario-level concept (a Mutator is "a +piece of test customization that adapts a Plan for a Scenario"). That higher-level type +typically resolves down to "find these steps in the workflow and modify their requests" — +exactly what `flow.Mutator` will express in one line. + +The two concepts share the word because they are the same idea at different layers: + +- `test.Mutator` — scenario-level: "this Scenario differs from its Plan by enabling + feature X." +- `flow.Mutator` — step-level: "for any step of type `*T`, configure it like this." + +`test.Mutator` implementations will internally produce `flow.Mutator` instances and append +them to the workflow's `Mutators` slice. The conceptual chain is consistent end-to-end. + +### Selector by exact type + +A Mutator is registered for exactly one Go type via a generic constructor: + +```go +flow.Mutate[*CreateManagedCluster](fn) +``` + +Selection is `step.(*CreateManagedCluster)`. There is no interface matching, no +embedded-type walk, no name matching. A step embedded inside a wrapper is **not** matched +by a Mutator registered for the wrapper's type. If a user wants both, they register both. + +Rationale: the production mutator system targets a small, known set of concrete API call +types (`CreateManagedCluster`, `UpgradeManagedCluster`, `CreateAgentPool`, etc.). Exact +type matching is what users already mean when they write +`flow.As[*CreateManagedCluster](w)` — generalizing further would invite surprising +matches. + +### When Mutators run + +A Mutator runs **once per step instance**, on the first time the workflow schedules that +step (i.e. just before the step's first attempt begins, after its upstreams have +terminated). At that point: + +1. The runtime iterates `w.Mutators` in slice order. +2. For each Mutator whose target type matches this step's concrete type, the runtime + calls the user function and obtains a `flow.Builder`. +3. The returned `Builder`'s `AddToWorkflow()` is invoked; its `StepConfig` for this step + is merged into the step's existing `StepConfig` using the same idempotent-merge rules + that apply when the same step is added twice via `w.Add(...)`. +4. The Mutator's contribution is therefore visible to the `Before` chain, the retry + policy, and the `Condition` from the very first attempt onward. + +The Mutator is **not** invoked again on subsequent attempts. The configuration it +contributed remains in effect. + +### Why "first schedule" rather than "Add time" + +Sub-workflow steps that are added lazily (e.g. by a composite step's `Do` calling +`s.Add(new(*CreateMC))`) are not visible to the outer workflow at outer-`Add` time. By +deferring Mutator evaluation to the first time the step is scheduled by *any* workflow +(outer or nested), we cover both eagerly-added and lazily-added steps under one rule. + +The runtime tracks a `mutatorsApplied` flag (or equivalent) on each step's `State` so +that re-scheduling never re-runs Mutators on the same step. + +### Position in the execution stack + +The Mutator-contributed configuration is merged *before* execution begins. When the +step's first attempt runs, the per-attempt onion looks identical to a step whose +`Before`/`After`/etc. were declared at `Add` time: + +``` +StepInterceptor[0..n] ← workflow-level lifecycle + └── [retry loop] + └── AttemptInterceptor[0..n] ← per-attempt observability + └── BeforeStep callbacks (including any from Mutators) + └── step.Do + └── AfterStep callbacks (including any from Mutators) +``` + +Mutators do not introduce a new layer of the onion. They contribute to the existing +`Before`/`After` lists. + +### Order across multiple Mutators + +The `Mutators` slice is ordered. Element 0 runs first, element n-1 runs last. For a given +step instance, only Mutators whose type parameter matches the step run, but among those, +slice order is preserved. + +When two Mutators both register `Before` callbacks via their returned Builders, the +`Before` callbacks accumulate in the order the Mutators ran (because the merge rule for +`Before`/`After` is append, per `step-configuration`'s `Idempotent Add with config +merging` requirement). + +### Merge timing and ordering relative to plan callbacks + +Mutators are merged **after** all `Workflow.Add(...)` calls for a step have completed, +just before the step's first attempt begins. The plan author's `Step(s).Input(...)` / +`Output(...)` are already in `StepConfig.Before` / `After` when the Mutator merge +happens; the Mutator's contributions are **appended** to those existing lists. + +Concrete execution order for a step that has both plan-declared and Mutator-declared +callbacks: + +``` +attempt: + ├─ planFn (from Step(s).Input(planFn)) — runs first + ├─ mutator1Fn (from flow.Mutate[*S], slot 0) — runs second + ├─ mutator2Fn (from flow.Mutate[*S], slot 1) — runs third + ├─ s.Do(ctx) + ├─ planAfter (from Step(s).Output(planAfter)) + ├─ mutator1Aft (from flow.Mutate[*S] returned Output) + └─ mutator2Aft (...) +``` + +**Why plan first, Mutator after** (rather than the reverse): + +1. **Mental model match.** A Mutator is a scenario-level customization that "decorates" + a plan. Decorations going on top of defaults is the natural reading. Reversing it + would make Mutator look like "default value provider", which it isn't. +2. **Override semantics.** With plan-first-Mutator-after, a Mutator can read the value + the plan set and override it (e.g. plan sets `SKU = "basic"`, Mutator sets + `SKU = "premium"`). The reverse ordering would make override impossible without + undoing plan state — which a callback can't do. +3. **Industry precedent.** K8s admission webhooks run *after* default-set: the API + server fills in defaults, then mutating webhooks customize. Tekton's mutating webhook + model is identical. Airflow's `task_policy` runs at scheduling time, after DAG + construction. We follow the same pattern. +4. **Cannot be circumvented.** If a Mutator genuinely needs to run *before* a plan + callback (rare), the right answer is to make that behavior a plan-level concern (move + it into `Input` at `Add` time). Mutators are not the right tool for "pre-empt plan + defaults" because they conceptually layer above the plan. + +`Upstreams` and `Option` merging follow the same append/union rules from +`StepConfig.Merge`; there is no per-type tweak for Mutator-contributed config. + +### Mutator vs Input / BeforeStep — when to use which + +A Mutator **returns** a `flow.Builder`, which can in turn declare `Input` / `BeforeStep` +on the step. So at first glance the mechanisms overlap. The distinction is **binding +scope and authoring location**: + +| | `flow.Step(s).Input(fn)` (in a plan) | `flow.Mutate[*T](fn)` | +|---|---|---| +| Binding | one specific step instance, lexically known | a Go type — every step of that type, anywhere | +| Registered at | plan construction time | scenario / test setup time | +| Author knows the targets? | yes — author holds the pointer | no — discovered by type at first scheduling | +| Scope | local to the plan | cross-plan, cross-sub-workflow | +| Cardinality | 1 step | N steps (zero or more) | + +Guidance: + +- **Use `Input` / `BeforeStep` directly in a plan** when: + - You are writing a plan or composite step and you hold the step instance. + - The modification only matters for *this* instance. + - The modification is plan-local. + +- **Use `flow.Mutate`** when: + - You want a modification to apply to every `*T` step regardless of which plan or + sub-workflow added it. + - The modification is a cross-cutting policy that scenario authors register without + knowing the plan internals. + - You want to override `Retry`, `Timeout`, `When`, or attach `Before`/`After` + callbacks across all instances of `*T`. + +Anti-patterns: + +- **Don't use `Mutate` to modify a single specific instance.** It works, but it's + misleading; the next reader assumes "all `*T`". Use `Input` instead. +- **Don't write `for _, s := range flow.As[*T](w) { w.Add(flow.Step(s).Input(...)) }` + — that is exactly the legacy pattern `Mutate` replaces. + +### Why `Mutator` receives ctx but cannot substitute it + +The Mutator function receives a `context.Context` but does not return a substituted ctx. +Two distinct decisions: + +**Why ctx is passed in.** Production AKS mutators routinely need workflow-scoped values +to do their work — typically a `log.Logger` (via `log.FromContextOrDiscard(ctx)`), a +scenario name, or a test session ID. Without ctx, those mutators would either lose +diagnostic capability or be forced to wrap their logic inside an `Input` callback just to +gain access to ctx — which silently downgrades a once-per-step mutation into per-attempt +work and destroys the `flow.Mutate` model's idempotency guarantee. We pass the +**workflow-scoped ctx** (the same `ctx` that was passed into `Workflow.Do(ctx)`) because +that is the right scope for "stable for the life of this workflow" values like logger +and scenario. + +**Why ctx cannot be substituted.** A Mutator runs at scheduling time, not inside the +per-attempt onion. The ctx that `step.Do` receives is derived later by the +StepInterceptor / AttemptInterceptor / retry / timeout chain; the Mutator is not on that +chain and has no architecturally meaningful place to insert a substituted ctx. If a +caller needs to inject values into the ctx seen by a step on each attempt, the right +tools are: + +- The Mutator's returned `Builder` can include a `BeforeStep` callback (which itself + receives ctx and can substitute it for `step.Do`). The composition is + `Mutate[*T](func(ctx context.Context, s *T) flow.Builder { return + flow.Step(s).BeforeStep(fn) })`. +- For cross-cutting ctx substitution at workflow scope, `StepInterceptor` / + `AttemptInterceptor` already wrap `next(ctx)` and can substitute freely. + +--- + +## Architecture + +### `Mutator` interface and constructor + +```go +// Mutator contributes additional configuration to a step of a specific type. The runtime +// invokes a Mutator at most once per matching step, just before the step's first attempt. +// +// Construct only via flow.Mutate[T]; the interface's only method is unexported so user +// code cannot bypass the type-dispatch contract. +type Mutator interface { + // applyTo invokes the Mutator's user function against step if step matches the + // Mutator's target type. ctx is the workflow-scoped context (the same ctx passed + // to Workflow.Do). Returns: + // matched: true if the type assertion succeeded + // builder: the user-returned configuration to merge into step's StepConfig + // (nil if matched is false, or if the user returned a nil Builder) + applyTo(ctx context.Context, step Steper) (matched bool, builder Builder) +} + +// Mutate constructs a typed Mutator. The function fn receives the workflow-scoped +// context and the type-asserted step instance, and SHALL return a flow.Builder that +// the runtime will merge into step's existing StepConfig. fn MAY return nil to indicate +// "no additional configuration" (useful when fn only mutates fields on the typed step +// pointer it received). +// +// ctx is suitable for accessing workflow-stable values (logger, scenario name, test +// session ID). It is NOT a per-attempt context — the Mutator runs once per step at +// scheduling time. +// +// The returned Builder typically carries configuration for the same step that was passed +// in (i.e. via flow.Step(passed)). The runtime ignores any configuration in the Builder +// for steps other than the one passed to the Mutator. +func Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator +``` + +### Workflow struct addition + +```go +type Workflow struct { + // ... existing fields, including StepInterceptors / AttemptInterceptors ... + + // Mutators are evaluated against every step the first time the step is scheduled. + // Only Mutators whose target type matches the concrete step run. Slice order is + // preserved among matching Mutators; their contributions to StepConfig accumulate. + Mutators []Mutator +} +``` + +Mutators are populated the same way as interceptors — direct slice assignment when +constructing the workflow. There is no `Use(...)` method. + +### First-schedule evaluation + +When the runtime is about to schedule a step for the first time (specifically, when +moving the step out of `Pending` and into the attempt loop), it iterates `w.Mutators`: + +```go +// pseudocode in tick() / stepExecution.start, before the first attempt +// ctx here is the workflow-scoped ctx from Workflow.Do(ctx) +if !state.MutatorsApplied { + for _, m := range w.Mutators { + matched, b := m.applyTo(ctx, step) + if !matched || b == nil { + continue + } + for s, cfg := range b.AddToWorkflow() { + if s == step { + state.Config.Merge(cfg) + } + // configurations for other steps in the returned Builder are ignored — + // a Mutator's scope is the step that was passed in + } + } + state.MutatorsApplied = true +} +``` + +The exact placement (`tick()` vs `stepExecution.start` vs new helper) depends on the +final shape of the Step Interceptor change. The invariant is: the merge happens before +the Before chain, before the retry loop, and exactly once per step instance. + +If a Mutator's user function panics or contributes invalid config, the panic propagates +through the normal step-execution panic recovery (controlled by `Workflow.DontPanic`). + +### Sub-workflow propagation + +The Mutator list propagates into sub-workflows by the same mechanism interceptors use: +composite steps (and `*Workflow` used as a step) implement an additional optional +interface: + +```go +// MutatorReceiver is implemented by composite steps that host a sub-workflow. Before the +// sub-workflow is run, the runtime calls PrependMutators with the parent workflow's +// Mutators so they reach inner steps. +type MutatorReceiver interface { + PrependMutators(mw []Mutator) +} +``` + +Both `*flow.Workflow` (used as a step) and `*flow.SubWorkflow` implement this. The +runtime calls `PrependMutators` once before invoking the inner workflow's first +scheduling pass. Prepending preserves "parent Mutators run before child Mutators". + +A step may implement `InterceptorReceiver`, `MutatorReceiver`, both, or neither. + +### Coordination with the Step Interceptor design + +This change shares the runtime's per-step lifecycle territory with the Step Interceptor +change. The two MUST land in the same release. Concretely: + +- The Mutator merge step is invoked in the same place where the interceptor design adds + per-attempt invocations — specifically, in the path that takes a step from `Pending` to + `Running`. Mutator merge is **before** the first AttemptInterceptor; both are before + the first `Before` callback. +- Sub-workflow propagation reuses the receiver-based mechanism the interceptor design + introduces (see `InterceptorReceiver`); we add an analogous `MutatorReceiver`. + +If the interceptor change's structural plan changes, this change MUST be re-aligned. + +--- + +## API + +### New types + +```go +// Mutator contributes additional configuration to steps of a specific type. Construct +// only via flow.Mutate[T]. +type Mutator interface { + applyTo(ctx context.Context, step Steper) (matched bool, builder Builder) +} + +// Mutate constructs a typed Mutator that runs fn against any step whose concrete type is +// T. fn receives the workflow-scoped context and the type-asserted step, and returns a +// flow.Builder whose configuration for the passed-in step is merged into that step's +// StepConfig before its first attempt. +func Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator + +// MutatorReceiver is implemented by composite steps that host a sub-workflow. +type MutatorReceiver interface { + PrependMutators(mw []Mutator) +} +``` + +### Workflow field + +```go +// Mutators run once per matched step, contributing additional StepConfig before the +// step's first attempt. Slice order is preserved among matching Mutators. +Mutators []Mutator +``` + +### SubWorkflow propagation + +```go +// SubWorkflow implements MutatorReceiver alongside InterceptorReceiver. +func (s *SubWorkflow) PrependMutators(mw []Mutator) { + if len(mw) == 0 { return } + s.w.Mutators = append(append([]Mutator{}, mw...), s.w.Mutators...) +} + +// *Workflow used directly as a step does the same. +func (w *Workflow) PrependMutators(mw []Mutator) { + if len(mw) == 0 { return } + w.Mutators = append(append([]Mutator{}, mw...), w.Mutators...) +} +``` + +### Usage + +Replacing a typical AKS scenario-level mutator helper: + +```go +// Before (uses BuildStep + As[T] traversal; can only attach Input) +return test.WorkflowMutator(func(w *flow.Workflow) *flow.Workflow { + for _, createMC := range flow.As[*azcontainerservice.CreateManagedCluster](w) { + w.Add(flow.Step(createMC).Input(func(ctx context.Context, c *azcontainerservice.CreateManagedCluster) error { + f(ctx, log.FromContextOrDiscard(ctx), c.GetRequest()) + return nil + })) + } + return w +}) + +// After (uses flow.Mutate; works without BuildStep, no traversal needed) +return test.WorkflowMutator(func(w *flow.Workflow) *flow.Workflow { + w.Mutators = append(w.Mutators, flow.Mutate[*azcontainerservice.CreateManagedCluster]( + func(ctx context.Context, c *azcontainerservice.CreateManagedCluster) flow.Builder { + return flow.Step(c).Input(func(ctx context.Context, c *azcontainerservice.CreateManagedCluster) error { + f(ctx, log.FromContextOrDiscard(ctx), c.GetRequest()) + return nil + }) + }, + )) + return w +}) +``` + +Other natural uses unlocked by returning a Builder: + +```go +// Override retry policy across all Azure API calls +w.Mutators = append(w.Mutators, flow.Mutate[*AzureAPICall]( + func(ctx context.Context, c *AzureAPICall) flow.Builder { + return flow.Step(c).Retry(func(o *flow.RetryOption) { + o.MaxAttempts = 5 + o.Backoff = backoff.NewExponentialBackOff() + }) + }, +)) + +// Force a Condition on a whole class of step +w.Mutators = append(w.Mutators, flow.Mutate[*DestroyXxx]( + func(ctx context.Context, d *DestroyXxx) flow.Builder { + return flow.Step(d).When(flow.Always) + }, +)) + +// Hook Output across all *CreateManagedCluster to record cluster IDs in the test report +w.Mutators = append(w.Mutators, flow.Mutate[*CreateManagedCluster]( + func(ctx context.Context, c *CreateManagedCluster) flow.Builder { + return flow.Step(c).Output(func(ctx context.Context, c *CreateManagedCluster) error { + report.Clusters = append(report.Clusters, c.Response.Name) + return nil + }) + }, +)) + +// Inject a fake HTTP client into every HTTP-call step (no per-attempt callback needed — +// just a once-per-step field set). The ctx parameter is unused here but available if +// the chosen client depends on workflow-scoped values. +w.Mutators = append(w.Mutators, flow.Mutate[*HTTPCall]( + func(ctx context.Context, h *HTTPCall) flow.Builder { + h.Client = fakeClient + return nil // no additional config; the field set above is enough + }, +)) +``` + +Note the last example: a Mutator MAY return `nil` if it accomplishes its goal by directly +mutating fields on the step instance (which it received as a typed pointer). This works +because Mutators run before the first attempt, so any field set is visible to the Before +chain and to `Do`. + +The `ctx` parameter in each example is the workflow-scoped ctx from `Workflow.Do(ctx)`. +Mutators commonly use it to read a logger, scenario name, or test session ID. Mutators +do not return a substituted ctx; that is `BeforeStep`'s or interceptor's job. + +--- + +## What Does Not Change + +- `BeforeStep` / `AfterStep` / `Input` / `Output` — API and behavior unchanged. They + continue to be the per-instance mechanism for plan-local data flow. +- `StepConfig`, `StepOption`, `RetryOption` — unchanged. The merge rules used by Mutator + application are exactly those used by the existing `Idempotent Add with config merging` + requirement. +- `Has[T]` / `As[T]` / `HasStep` — kept as introspection helpers. They remain useful for + in-test assertions and debugging, but their role as the mutator system's selector is + retired. +- `Workflow.Add` time semantics — unchanged for the dependency graph. `BuildStep` is + still invoked at `Add` time for any step that implements it (see the Deprecation + section below). +- The interceptor design — Mutators share its scheduling-time path but do not modify the + interceptor APIs. +- The higher-level `test.Mutator` interface in AKS e2ev3 — unchanged; existing + implementations keep working. Migration to `flow.Mutate` is per-implementation and + optional during the deprecation window. + +--- + +## Deprecation: `BuildStep` + +This change marks `BuildStep` (the user-facing hook with signature `BuildStep()`) and the +`SubWorkflow.Reset()` implicit-call behavior as deprecated. The mechanism is **not removed +in this change**; that is a follow-up change once the AKS test suite has migrated all +117+ production usages. + +What is deprecated: + +- The user-facing `interface{ BuildStep() }` hook on Steper implementations. +- The implicit `Reset()` invocation by `StepBuilder.BuildStep` immediately before + `BuildStep()`. +- The recommendation to embed `flow.SubWorkflow` and define `BuildStep()` to populate it + at `Add` time. Going forward, sub-workflows SHOULD be constructed directly inside `Do`: + + ```go + func (u *UpgradeAndValidate) Do(ctx context.Context) error { + var w flow.Workflow + w.Add(flow.Step(&u.UpgradeSucceeded).DependsOn(&u.UpgradeAgentPool)) + return w.Do(ctx) + } + ``` + + Composite-step authors who embed `flow.SubWorkflow` get parent Mutator propagation for + free via `PrependMutators` and do not need extra work. + +What is **not** deprecated yet: + +- `flow.SubWorkflow` itself remains — it is a useful container even when populated lazily. +- `Has[T]` / `As[T]` / `HasStep` remain. +- The internal `StepBuilder` machinery remains in place for the duration of the + deprecation window so existing code continues to work. + +A `// Deprecated:` comment is added to: + +- The doc comment of `StepBuilder.BuildStep` in `build_step.go`. +- The doc comment of `flow.SubWorkflow.Reset` in `workflow.go`. +- The `BuildStep` example in `example/13_composite_step_test.go` (with a pointer to the + Mutator example). + +A follow-up change (out of scope here) will: + +1. Migrate AKS e2ev3 production usages from `BuildStep` to either inline `Do` + construction or `flow.Mutate` (as appropriate). +2. Remove `StepBuilder`, the implicit `Reset` call, and the `BuildStep` invocation from + `Workflow.addStep`. +3. Update `composite-steps` spec to drop the `BuildStep — lazy initialization hook` + requirement. + +--- + +## Breaking Changes + +This change is **non-breaking** for existing code in the common case. Every existing +workflow keeps working: + +- `BuildStep` is still invoked at `Add` time, just deprecated. +- `Has` / `As` / `HasStep` keep their behavior. +- `SubWorkflow.Reset()` keeps being called. +- Existing scenario-level mutator code that uses `flow.As[T](w)` keeps working. + +The new exposed surface is: + +- `Workflow.Mutators []Mutator` — new field on `Workflow`. Code that constructs + `Workflow{...}` with positional struct literals would break, but that pattern is not + used anywhere we know of. +- `flow.Mutate[T](fn) Mutator` — new exported generic function and `Mutator` interface. + Pure additions; no name collisions in the existing public API. +- `flow.MutatorReceiver` — new exported interface. + +There is one **subtle behavior difference** to flag for review compared to the old +`As[T] + Input` pattern: + +- The old pattern attached an `Input` callback (which is per-attempt). With `flow.Mutate`, + if you want per-attempt behavior, you put it inside the returned `Builder`'s `Input` + callback — which is also per-attempt. So the semantics for Input-style mutations are + equivalent. +- The new capability is that you can also override `Retry` / `Timeout` / `When` and + attach `Before` / `After` once per step. These are real new powers, not breaking + changes. + +--- + +## Files Affected + +| File | Change | +|------|--------| +| New `mutator.go` | Add `Mutator` interface, `Mutate[T]` constructor, `MutatorReceiver` interface | +| `workflow.go` | Add `Mutators []Mutator` field; invoke Mutator merge in the first-schedule path; add `MutatorsApplied` tracking on `State`; call `PrependMutators` on sub-workflows; add `(*Workflow).PrependMutators` | +| `state.go` | Add `MutatorsApplied bool` field (or equivalent) to `State` | +| `wrap.go` (or `workflow.go` near SubWorkflow) | `SubWorkflow` implements `MutatorReceiver` via `PrependMutators` | +| `build_step.go` | Add `// Deprecated:` notes to `StepBuilder.BuildStep` | +| `example/13_composite_step_test.go` | Add `// Deprecated:` note; add a parallel example showing the Mutator / Do-time pattern | + +--- + +## Open Questions + +| Question | Resolution | +|----------|------------| +| Why "Mutator" and not "Middleware"? | "Middleware" carries a wrap-around expectation (`(ctx, req, next) → resp`) that this mechanism does not provide. "Mutator" matches the semantics (configure-only, no `next`, no short-circuit) and aligns with AKS `test.Mutator` and K8s `MutatingAdmissionWebhook`. | +| Per-attempt or per-step (once)? | Per-step, once. AKS production usage is all static config. Per-attempt with config-mutation power is a footgun (silent slice growth on retry). For per-attempt effects, the Mutator's returned Builder can include `Input`/`BeforeStep`. | +| Why return `flow.Builder`? | It's the existing vocabulary for declaring step config. No second API to learn. Naturally composes with `Input`, `Output`, `BeforeStep`, `Retry`, `Timeout`, `When`. | +| Does the Mutator receive a `context.Context`? | Yes — the workflow-scoped ctx from `Workflow.Do(ctx)`. It is a read-only input; suitable for accessing logger, scenario name, test session ID. AKS production mutators routinely need `log.FromContextOrDiscard(ctx)`; not passing ctx would force them to wrap their logic in an `Input` callback and silently downgrade to per-attempt semantics. | +| Does the Mutator return a substituted ctx? | No — ctx substitution happens later in the per-attempt onion (BeforeStep / interceptors); the Mutator is not on that chain and has nowhere to insert a substitution. For per-attempt ctx influence, return a Builder that includes `BeforeStep`. | +| Does the Mutator return an `error`? | No — the Mutator runs at scheduling time where "fail this step" semantics are awkward (the step hasn't started yet). Failable work belongs in the returned Builder's `Input` callback, which runs per-attempt and integrates naturally with retry. | +| Can the Mutator return nil? | Yes — useful when the Mutator only sets fields on the typed step pointer it received. | +| Should the selector support interfaces or embedded types? | No — exact concrete type only. Production usage targets concrete types; interface dispatch invites surprises. | +| How does a Mutator reach sub-workflow steps? | `MutatorReceiver` interface; `SubWorkflow` and `*Workflow` (used as step) implement `PrependMutators`. | +| What happens if a Mutator's Builder returns config for a step other than the one passed in? | Ignored. The Mutator's scope is the matched step; configurations targeting other steps are silently dropped. (We could panic instead; deferred until we see misuse.) | +| What happens when a Mutator's user function panics? | Propagates through the normal step-execution panic recovery (`Workflow.DontPanic`). | +| Is pre-execution dump supported? | No. No production user has the requirement; the interceptor event stream covers actual need. | +| Is `BuildStep` removed? | Not in this change. Deprecated; removed in a follow-up after AKS migration. | +| Can a user register two Mutators for the same type? | Yes. They run in slice order; their contributions accumulate via the standard StepConfig merge rules. | +| What happens if a step type matches no Mutator? | Nothing — Mutator loop is a no-op for that step. | +| Does `Mutate` reach steps inside `*Workflow` used as a step? | Yes — `*Workflow` implements `PrependMutators`. | diff --git a/example/15_mutator_test.go b/example/15_mutator_test.go new file mode 100644 index 0000000..0d13932 --- /dev/null +++ b/example/15_mutator_test.go @@ -0,0 +1,280 @@ +//go:build mutator_design + +// This file is a DESIGN-PHASE reference for the proposed flow.Mutator API +// (openspec/changes/2026-05-06-step-mutator). It does NOT compile against the +// current tree because flow.Mutator / flow.Mutate / Workflow.Mutators do not +// yet exist. The build tag `mutator_design` keeps it out of the default build. +// +// When the implementation lands, drop the build tag (rename file or remove the +// header) and these become live ExampleXxx tests. +// +// The intent of this file is to let reviewers see the *final user-facing +// shape* of the Mutator API before any implementation is written. + +package flow_test + +import ( + "context" + "fmt" + "time" + + flow "github.com/Azure/go-workflow" +) + +// --------------------------------------------------------------------------- +// Shared fixtures used by the examples below. +// --------------------------------------------------------------------------- + +type Greet struct { + Greeting string + Who string + out string +} + +func (g *Greet) String() string { return "Greet" } +func (g *Greet) Do(ctx context.Context) error { + g.out = fmt.Sprintf("%s, %s!", g.Greeting, g.Who) + fmt.Println(g.out) + return nil +} + +type Farewell struct { + Who string +} + +func (f *Farewell) String() string { return "Farewell" } +func (f *Farewell) Do(ctx context.Context) error { + fmt.Printf("Bye, %s!\n", f.Who) + return nil +} + +// --------------------------------------------------------------------------- +// Example 1 — a Mutator that fills in a default field across all *Greet steps. +// +// Equivalent legacy code would have been: +// +// for _, g := range flow.As[*Greet](w) { +// w.Add(flow.Step(g).Input(func(_ context.Context, g *Greet) error { +// if g.Who == "" { g.Who = "world" } +// return nil +// })) +// } +// +// With Mutate this becomes a single registration and works for steps added +// later (including inside sub-workflows) without a tree walk at registration +// time. +// --------------------------------------------------------------------------- + +func ExampleMutate_input() { + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { + return flow.Step(g).Input(func(_ context.Context, g *Greet) error { + if g.Who == "" { + g.Who = "world" + } + return nil + }) + }), + }, + } + w.Add( + flow.Step(&Greet{Greeting: "Hello"}), // Who left blank, will be filled in + flow.Step(&Greet{Greeting: "Hi", Who: "Bob"}), // already set, Mutator does not override + ) + _ = w.Do(context.Background()) + // Output: + // Hello, world! + // Hi, Bob! +} + +// --------------------------------------------------------------------------- +// Example 2 — A Mutator that overrides Retry and Timeout for every *Greet +// step. Demonstrates that a Mutator can return a Builder carrying *any* step +// configuration, not just Input. +// --------------------------------------------------------------------------- + +func ExampleMutate_retryOverride() { + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { + return flow.Step(g). + Retry(func(o *flow.RetryOption) { o.MaxAttempts = 3 }). + Timeout(2 * time.Second) + }), + }, + } + w.Add(flow.Step(&Greet{Greeting: "Hi", Who: "world"})) + _ = w.Do(context.Background()) + // Output: + // Hi, world! +} + +// --------------------------------------------------------------------------- +// Example 3 — Mutator-as-pure-field-mutation (return nil Builder). +// +// When the only thing you need is to mutate fields on the typed step pointer +// (no callback registration), return nil. This is the cheapest form. +// --------------------------------------------------------------------------- + +func ExampleMutate_nilBuilder() { + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { + g.Greeting = "Hola" // direct field mutation + return nil + }), + }, + } + w.Add(flow.Step(&Greet{Greeting: "Hi", Who: "world"})) + _ = w.Do(context.Background()) + // Output: + // Hola, world! +} + +// --------------------------------------------------------------------------- +// Example 4 — ctx received by the Mutator is the workflow-scoped ctx. +// +// Demonstrates the documented invariant that the Mutator's `ctx` is the same +// `ctx` passed to Workflow.Do. This is how production helpers fetch a logger, +// scenario name, test session ID, etc. +// --------------------------------------------------------------------------- + +type ctxKey string + +const greetingKey ctxKey = "greeting" + +func ExampleMutate_ctx() { + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Greet](func(ctx context.Context, g *Greet) flow.Builder { + if v, ok := ctx.Value(greetingKey).(string); ok { + g.Greeting = v + } + return nil + }), + }, + } + w.Add(flow.Step(&Greet{Who: "world"})) + + ctx := context.WithValue(context.Background(), greetingKey, "Bonjour") + _ = w.Do(ctx) + // Output: + // Bonjour, world! +} + +// --------------------------------------------------------------------------- +// Example 5 — Multiple Mutators registered for the same type run in slice +// order; their Input callbacks are appended in that order and all run after +// any plan-declared Inputs. +// --------------------------------------------------------------------------- + +func ExampleMutate_multipleInOrder() { + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { + return flow.Step(g).Input(func(_ context.Context, g *Greet) error { + g.Greeting += " (m1)" + return nil + }) + }), + flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { + return flow.Step(g).Input(func(_ context.Context, g *Greet) error { + g.Greeting += " (m2)" + return nil + }) + }), + }, + } + w.Add( + flow.Step(&Greet{Greeting: "Hi", Who: "world"}). + Input(func(_ context.Context, g *Greet) error { + g.Greeting += " (plan)" + return nil + }), + ) + _ = w.Do(context.Background()) + // Plan callback runs first, then m1, then m2 — in declared order. + // Output: + // Hi (plan) (m1) (m2), world! +} + +// --------------------------------------------------------------------------- +// Example 6 — Sub-workflow construction inside Do(). +// +// This is the new pattern that replaces BuildStep. The composite step embeds +// flow.SubWorkflow (which gives it a stable inner *Workflow handle for +// PrependMutators propagation) but constructs its inner steps inside Do(), +// not at Add time. Parent Mutators still reach inner steps via PrependMutators. +// --------------------------------------------------------------------------- + +type CompositeViaDo struct { + flow.SubWorkflow // gives stable inner-workflow handle for Mutator propagation + Hello Greet + Bye Farewell +} + +func (c *CompositeViaDo) String() string { return "CompositeViaDo" } +func (c *CompositeViaDo) Do(ctx context.Context) error { + // Build the inner DAG lazily, here, inside Do — no BuildStep needed. + c.Add(flow.Pipe(&c.Hello, &c.Bye)) + return c.SubWorkflow.Do(ctx) +} + +func ExampleMutate_subWorkflow() { + composite := &CompositeViaDo{ + Hello: Greet{Greeting: "Hello"}, + Bye: Farewell{}, + } + + w := &flow.Workflow{ + // Parent-level Mutator. Reaches *Greet AND *Farewell inside the + // composite step via the MutatorReceiver propagation mechanism: + // before parent invokes composite.Do, runtime calls + // composite.PrependMutators(parent.Mutators). + Mutators: []flow.Mutator{ + flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { + if g.Who == "" { + g.Who = "world" + } + return nil + }), + flow.Mutate[*Farewell](func(_ context.Context, f *Farewell) flow.Builder { + if f.Who == "" { + f.Who = "world" + } + return nil + }), + }, + } + w.Add(flow.Step(composite)) + _ = w.Do(context.Background()) + // Output: + // Hello, world! + // Bye, world! +} + +// --------------------------------------------------------------------------- +// Example 7 — Mutator matches an inner type through the Unwrap chain. +// +// flow.Name("...", greet) returns a wrapper whose Unwrap() returns greet. +// flow.Mutate[*Greet] matches the inner *Greet via unwrap traversal; the +// resulting Builder's config is merged onto the wrapper's state entry, but +// the user function receives the typed *Greet pointer. +// --------------------------------------------------------------------------- + +func ExampleMutate_unwrap() { + greet := &Greet{Greeting: "Hi"} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { + g.Who = "world" + return nil + }), + }, + } + w.Add(flow.Step(flow.Name("named-greet", greet))) + _ = w.Do(context.Background()) + // Output: + // Hi, world! +} diff --git a/openspec/changes/2026-05-06-step-mutator/design.md b/openspec/changes/2026-05-06-step-mutator/design.md new file mode 100644 index 0000000..d5189f9 --- /dev/null +++ b/openspec/changes/2026-05-06-step-mutator/design.md @@ -0,0 +1,357 @@ +# Step Mutator — Design + +This change introduces type-dispatched step Mutators in `go-workflow` and deprecates the +`BuildStep` mechanism. Detailed rationale, alternatives, naming discussion, and +architectural reasoning are in the brainstorming spec at +`docs/superpowers/specs/2026-05-06-step-mutator-design.md`. + +This document supplies the implementation-facing specifics needed to evaluate and +implement the change. Anything covered in the brainstorming spec is referenced rather +than duplicated. + +--- + +## Summary of Decisions + +| Decision | Choice | Reason | +|----------|--------|--------| +| Public name | `Mutator` (not `Middleware`) | Configure-only semantics; "middleware" implies wrap-around with `next`. Aligns with existing AKS `test.Mutator` and K8s `MutatingAdmissionWebhook`. | +| Function signature | `func(ctx context.Context, step T) Builder` | Returns a fluent `Builder` — the same vocabulary plan authors already use (Input, Output, BeforeStep, Retry, Timeout, When, etc.). `ctx` is the workflow-scoped ctx from `Workflow.Do(ctx)`, suitable for accessing logger / scenario name / test session ID. No `error` return: Mutators run at scheduling time and "failure" semantics are unclear there; failable work belongs in the returned Builder's `Input` callback. | +| When invoked | Once per step instance, just before its first attempt | AKS production usage is all static configuration. Per-attempt with config-mutation power silently grows callback slices on retry. | +| Selector | Type assertion `step.(T)` walking the `Unwrap()` chain (first matching layer wins) | Production targets concrete types; legacy `As[T] + Input` already followed `Unwrap`, so this preserves migration parity. Stops at workflow boundaries — parent Mutators reach inner steps via `PrependMutators`, not by walking across. | +| Registration | Slice field `Workflow.Mutators []Mutator` | Matches `StepInterceptors` / `AttemptInterceptors` style. No `Use(...)` method. | +| Stack position | Mutator merges into StepConfig **before** the first per-attempt onion (Interceptors / Before / Do / After) | Once-and-done, then the standard onion runs as if the config had been declared at Add time. | +| Merge ordering | Mutator-contributed callbacks **append** to plan-declared callbacks (plan first, Mutator after) | "Scenario customization layered on top of plan defaults" — Mutators observe/override plan state, not pre-empt it. Mirrors K8s admission webhook (mutating-after-default). | +| Sub-workflow propagation | Optional `MutatorReceiver` interface (`PrependMutators(mw []Mutator)`) | Same model as interceptor `PrependInterceptors`. | +| Builder scope | Only config for the step passed in is merged; merge destination is the wrapper key in the workflow that owns it (per `Config merge destination follows StateOf`) | Predictable scope; matches existing `Add`-time merge behavior; lets Mutator authors write `flow.Step(typedPointer).Input(...)` without worrying about which workflow level the state lives at. | +| Nested propagation | Parent prepends to inner's `Mutators` slice; inner dispatches against its own state map | Lazy inner steps work, multi-level nesting recurses cleanly, mirrors `InterceptorReceiver` pattern, avoids parent reaching across workflow boundaries. | +| Context substitution | Mutator function does not return a substituted ctx | Mutators run at scheduling time, before the per-attempt onion; they have no way to influence the ctx that `step.Do` receives. The Mutator DOES receive the workflow-scoped ctx as a read-only input. For per-attempt ctx substitution, the Mutator's returned Builder can include `BeforeStep`. | +| Return nil Builder | Allowed | Useful when the Mutator only mutates fields on the typed step pointer. | +| `BuildStep` fate | Deprecated, not removed | Allows AKS e2ev3 to migrate 117+ usages incrementally. | +| Pre-execution dump | Not supported | No real user; interceptor event stream covers actual need. | + +--- + +## Coordination With the Step Interceptor Change + +This change shares execution-model territory with +`docs/superpowers/specs/2026-05-06-step-interceptor-design.md` (separate session). The two +must land in the same release. Concretely: + +- The Mutator merge is invoked from the same scheduling-time path that the interceptor + change introduces (the path that takes a step from `Pending` to `Running`). The merge + happens **before** the first AttemptInterceptor; both happen before the first `Before` + callback. +- `MutatorsApplied` (or equivalent) is added to `State` to ensure once-per-step + semantics. The interceptor change does not need this flag, so it is purely additive + here. +- Sub-workflow propagation reuses the receiver-based mechanism: where the interceptor + change calls `recv.PrependInterceptors(...)`, this change immediately follows with + `recv.PrependMutators(...)`. + +If the interceptor change's structural plan changes, this change MUST be re-aligned. +Conflicts to watch for: location of the scheduling-time hook, naming of `stepExecution`, +the field on `State` that records "first attempt scheduled". + +--- + +## Implementation Notes + +### Type-erased dispatch via generic constructor + +The `Mutator` interface deliberately exposes only an unexported method: + +```go +type Mutator interface { + applyTo(ctx context.Context, step Steper) (matched bool, target Steper, builder Builder) +} +``` + +Returning `target` (the layer where `T` matched, possibly equal to `step` itself) allows +the caller to know which `Steper` key the user's Builder was authored against. The +merge logic (see "First-schedule merge") rebases the Builder's per-step config onto the +wrapper key `step` before merging. + +This is so the only producer is `flow.Mutate[T](fn)`, which type-erases the user's typed +function `func(context.Context, T) Builder` into the `Mutator` interface. The +implementation walks the unwrap chain: + +```go +func Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator { + return mutatorFunc[T](fn) +} + +type mutatorFunc[T Steper] func(ctx context.Context, step T) Builder + +func (m mutatorFunc[T]) applyTo(ctx context.Context, step Steper) (bool, Steper, Builder) { + // walk the Unwrap chain in pre-order; first layer whose concrete type matches T wins. + var found T + matched := false + var matchedStep Steper + Traverse(step, func(s Steper, _ []Steper) TraverseDecision { + if typed, ok := s.(T); ok { + found = typed + matchedStep = s + matched = true + return TraverseStop + } + return TraverseContinue + }) + if !matched { + return false, nil, nil + } + return true, matchedStep, m(ctx, found) +} +``` + +Unwrap traversal does NOT cross workflow boundaries — `Traverse` stops when it would +descend into a `*Workflow`'s inner state. Inner-workflow steps are reached through +`PrependMutators`, not through cross-boundary unwrap. + +The unexported method prevents users from hand-rolling implementations that bypass the +type-dispatch contract — important because user-supplied implementations could change the +selector semantics in surprising ways and fragment the model. + +### First-schedule merge + +Inside the path that schedules a step's first attempt (precise placement TBD pending +final shape of the interceptor change's `stepExecution`): + +```go +// pseudocode — runs once per (workflow, wrapper) pair at wrapper's first scheduling +if !state.MutatorsApplied { + for _, m := range w.Mutators { + matched, target, b := m.applyTo(ctx, wrapper) // walks wrapper's Unwrap chain + if !matched || b == nil { + continue + } + for s, cfg := range b.AddToWorkflow() { + if s == target { + // rebase the per-step config onto the wrapper key in this workflow's state + state.Config.Merge(cfg) + } + // configurations for other steps are silently ignored — Mutator scope is + // the layer that was passed in (and its rebase target is the wrapper) + } + } + state.MutatorsApplied = true +} +// then proceed: AttemptInterceptors → Before → Do → After +``` + +The merge destination is always `state` (the inner-workflow's `state[wrapper]`), even +when the matched layer `target` is several `Unwrap` levels below `wrapper`. This is the +same rule as the existing `Config merge destination follows StateOf` requirement in +`step-configuration`. + +The `Merge` operation reuses the existing `StepConfig.Merge` (already present, used by +the `Idempotent Add with config merging` requirement). No new merge logic is introduced. + +### Sub-workflow propagation + +`SubWorkflow` and `*Workflow` (used as a step) implement `MutatorReceiver`: + +```go +type MutatorReceiver interface { + PrependMutators(mw []Mutator) +} + +func (s *SubWorkflow) PrependMutators(mw []Mutator) { + if len(mw) == 0 { + return + } + s.w.Mutators = append(append([]Mutator{}, mw...), s.w.Mutators...) +} + +func (w *Workflow) PrependMutators(mw []Mutator) { + if len(mw) == 0 { + return + } + w.Mutators = append(append([]Mutator{}, mw...), w.Mutators...) +} +``` + +The runtime invokes `PrependMutators` once per inner-workflow execution, before the +inner workflow begins scheduling its own steps. Inner steps that are added lazily inside +the composite step's `Do` are still reached, because Mutator evaluation in the inner +workflow is itself once-per-step at first scheduling. + +Re-prepending the same Mutator list is safe: the once-per-step invariant +(`MutatorsApplied`) prevents double-execution. + +### Idempotency story + +Because Mutators run once per step, **Mutator authors do not need to write idempotent +functions**. They can append to slices, allocate new objects, etc., without worrying +about retries causing duplicate work. This is a deliberate change from the legacy +`As[T] + Input` pattern, where the `Input` callback was per-attempt and authors had to +defensively guard against being called twice. + +### `*Workflow` as `Steper` already + +`*Workflow` already implements `Steper` and is usable as a step in another workflow. We +add `PrependMutators` to it directly so nested workflows inherit Mutators automatically. +This is a small expansion of `Workflow`'s existing surface. + +--- + +## Deprecation Plan for `BuildStep` and Related Surface + +This change does not remove `BuildStep`. It marks it deprecated and **commits to removal +in the next major version** of `go-workflow` once production migration is complete. + +### What gets removed at the next major version + +The following symbols / behaviors SHALL be considered candidates for removal in the +next major release of `go-workflow`. They are all subsumed by the Mutator mechanism +(`flow.Mutate[T]`) plus `Do()`-time sub-workflow construction: + +| Symbol / behavior | Replacement | Location | +|-------------------|-------------|----------| +| `StepBuilder.BuildStep(s Steper)` method (the lazy-build hook invoked by `Workflow.addStep`) | Sub-workflow construction inside `Do()` (see `composite-steps` requirement `Sub-workflow construction inside Do`) | `build_step.go` | +| The implicit invocation `w.BuildStep(step)` inside `Workflow.addStep` | n/a (callers construct their sub-workflow themselves at `Do` time) | `workflow.go` | +| The `BuildStep()` interface check / sentinel-by-method-signature trick used to detect composite steps with lazy initialization | n/a | `build_step.go` | +| `flow.SubWorkflow.Reset()` method (only invoked by the deprecated `BuildStep` path) | n/a (sub-workflow is constructed fresh inside `Do`, no reset needed) | `workflow.go` | +| The `BuildStep — lazy initialization hook` requirement in `composite-steps` spec | the new `Sub-workflow construction inside Do` requirement | `openspec/specs/composite-steps/spec.md` | +| The `BuildStep lifecycle hook` requirement in `step-configuration` spec | same | `openspec/specs/step-configuration/spec.md` | + +`flow.As[T]`, `flow.Has[T]`, `flow.HasStep` are NOT in this list — they remain valid +introspection helpers for tests and debugging. Only the mutation-driven use of +`As[T] + Input` is replaced by `flow.Mutate[T]`. + +### Deprecation timeline + +This change (current release): **mark deprecated** + +- Add `// Deprecated:` godoc to `StepBuilder.BuildStep` in `build_step.go`, pointing to + `flow.Mutate` for cross-cutting modification and to the `Do`-time sub-workflow pattern + for composite step construction. +- Add `// Deprecated:` godoc to `flow.SubWorkflow.Reset` in `workflow.go`, noting that + it is only invoked by the deprecated `StepBuilder.BuildStep` path. +- Add `// Deprecated:` godoc to the `BuildStep` method template (the comment block in + `build_step.go` that demonstrates how users should write the hook). +- Update `example/13_composite_step_test.go` `BuildStep` usage with a `// Deprecated:` + doc-comment and add a parallel `ExampleCompositeViaDo` showing the new pattern. +- Mark the corresponding `composite-steps` requirement as **DEPRECATED** in the spec + (already done in this change's `composite-steps/spec.md` delta). +- Behavior at runtime is **unchanged**. Existing code compiles and runs identically; + only `go vet` / IDE / staticcheck warnings appear. + +Follow-up change (separate proposal, between current and next major): **migrate** + +- Migrate AKS e2ev3 production usages — primarily by converting `BuildStep` bodies into + `Do`-time `flow.Workflow{}.Add(...)` blocks, and converting scenario-level + `WorkflowMutator` helpers into `flow.Mutate[T]` registrations. +- Track AKS e2ev3 `BuildStep` usage count down to zero before opening the removal PR. + +Next major version (e.g. `v2.0.0` or whichever bump the project uses for breaking +changes): **remove** + +- Remove the `BuildStep()` method dispatch from `Workflow.addStep`. Steps that still + define a `BuildStep()` method SHALL no longer have it invoked implicitly. +- Remove the `StepBuilder` embed point. +- Remove the implicit `SubWorkflow.Reset()` call. `Reset` itself MAY be kept as a public + no-op helper or removed; this is decided at removal time based on whether any user + code is calling `Reset` directly. +- Delete the `BuildStep — lazy initialization hook` requirement from + `openspec/specs/composite-steps/spec.md` and the `BuildStep lifecycle hook` requirement + from `openspec/specs/step-configuration/spec.md`. Both are replaced by the + `Sub-workflow construction inside Do` requirement (already added in this change). +- The major version bump is the explicit signal that this is a breaking change; this is + what justifies removing the implicit hook rather than leaving it deprecated forever. + +### Why the next major version, not later + +`BuildStep` exists solely to support the legacy `As[T] + Input` mutator pattern. +Once `flow.Mutate` lands and AKS migrates, no production user has a reason to call +`BuildStep`. Leaving it indefinitely would: + +- Keep two parallel mechanisms in the public surface, confusing new users. +- Force the runtime to keep paying for the `BuildStep()` interface check on every + `addStep` call. +- Preserve the unfortunate sentinel-by-method-signature trick, which is a footgun + (any unrelated type that happens to define `BuildStep()` would be mistaken for a + composite step). + +A major-version bump is the standard tool for closing this kind of deprecation, and +this change registers the intent now so consumers know to plan for it. + +### Future consideration: simplifying `SubWorkflow` + +Once `BuildStep` and `SubWorkflow.Reset` are removed, `SubWorkflow` retains two +responsibilities that are still useful: + +1. **Hold an inner `Workflow` as a struct field**, so a composite step has a stable + handle through which the runtime can call `PrependMutators` / `PrependInterceptors` + before the composite's `Do` runs. +2. **Default `Unwrap() []Steper`** so parent `As[T]` / `Has` traversal can see inner + steps (also required for parent Mutator dispatch when the user has registered a + Mutator targeting an inner type but the propagation receiver is the composite step). + +Both of these are also satisfied by **embedding `*flow.Workflow` directly** in the +composite step struct. `*Workflow` already implements `Steper`, `Unwrap`, and (after +this change) `MutatorReceiver`. So `SubWorkflow` is, in principle, collapsible to +either: + +- a type alias `type SubWorkflow = Workflow` (backward-compatible name) +- or removal entirely, with the migration recipe "embed `*flow.Workflow` instead" + +The reasons NOT to do this collapse in the same change as `BuildStep` removal: + +- `SubWorkflow` is a **value-embedded struct**, supporting zero-value composite step + initialization (`var s MyComposite` works). `*flow.Workflow` is pointer-embedded and + requires explicit construction (`&MyComposite{Workflow: &flow.Workflow{}}`). This + difference is observable to users and changes the idiomatic pattern. +- `SubWorkflow` may have downstream behavior (`String`, formatting, JSON marshaling) + that production users have come to depend on. None of this is currently surveyed. +- The `SubWorkflow` collapse is **independent in scope** from the `BuildStep` removal: + `BuildStep` removal is about killing a misfeature; `SubWorkflow` collapse is about + reducing API surface. Bundling them together would couple two unrelated migrations. + +**Disposition for this change:** record the consideration here, but do NOT mark +`SubWorkflow` as deprecated and do NOT schedule it for removal in the next major. A +**separate proposal** evaluates the collapse on its own merits — surveying production +usage patterns (zero-value vs. explicit construction, formatting, etc.) and proposing +either: + +- (a) An alias `type SubWorkflow = Workflow` that keeps the name and gives users one + unified type to embed. +- (b) Removal, with a migration guide for replacing the embed. +- (c) Status quo, if the survey reveals enough zero-value usages or behavioral + differences to justify keeping it. + +That proposal is **out of scope here** but the link is recorded so the question is not +forgotten. + +--- + +## Risk and Rollout + +- The change is **additive** for existing code. Existing tests pass without modification. +- The Mutator field is `nil` for any workflow that doesn't use the feature; the + scheduling-time check short-circuits on a `nil` slice. +- Migration is opt-in. AKS scenario-level helpers can be migrated module by module. +- The deprecation window for `BuildStep` is bounded: it ends at the next major version + of `go-workflow`, by which time all in-repo and AKS-production usages must have + migrated. See the Deprecation Plan above for the removal list. + +--- + +## Out of Scope + +- Removing `BuildStep` and related symbols (`StepBuilder`, the implicit `SubWorkflow.Reset` + call, the `BuildStep()` interface check). These are scheduled for removal in the next + major version of `go-workflow` (see the Deprecation Plan above for the full list and + rationale). This change only marks them deprecated. +- Pre-execution dump / introspection (explicit non-goal; see brainstorming spec). +- Per-attempt Mutator API variant (explicit non-goal; if needed, the Mutator's returned + Builder can include per-attempt callbacks). +- Mutator wrap-around / `next` semantics (explicit non-goal; the interceptor design + covers wrap-around for users who need it). +- Selector generalization beyond exact concrete type (explicit non-goal). +- Removing `Has[T]` / `As[T]` / `HasStep` (kept as introspection helpers). +- Collapsing or removing `flow.SubWorkflow` itself. The `SubWorkflow.Reset()` method is + marked deprecated alongside `BuildStep`, but the wrapper struct is preserved. A + separate future proposal will evaluate whether to alias `SubWorkflow` to `Workflow` + or remove it; see "Future consideration: simplifying `SubWorkflow`" in the + Deprecation Plan section. diff --git a/openspec/changes/2026-05-06-step-mutator/proposal.md b/openspec/changes/2026-05-06-step-mutator/proposal.md new file mode 100644 index 0000000..3378ead --- /dev/null +++ b/openspec/changes/2026-05-06-step-mutator/proposal.md @@ -0,0 +1,99 @@ +## Why + +Cross-cutting modifications of specific step types in production AKS e2ev3 today rely on +a two-part mechanism: + +1. `BuildStep` — composite/sub-workflow steps materialize their inner DAG at `Add` time + so that the outer workflow can see all nested steps. +2. Scenario-level mutators (`test.Mutator` / `WorkflowMutator`) walk the resulting tree + with `flow.As[T](w)` and attach `Input` callbacks to mutate matching step instances. + +This pair is fragile. `BuildStep` is invoked implicitly at `Add` time, can't return +errors, silently invokes a `Reset()` method by name, and uses a sentinel-by-method-signature +trick to detect sub-workflows. Production code routinely calls `BuildStep` again manually +inside `Do` or `Input` callbacks because the framework's "build at Add time" contract no +longer matches real usage. Mutators that match no step fail silently. Mutators implemented +via `As[T] + Input` can only attach `Input` callbacks — they cannot override `Retry`, +`Timeout`, `When`, or attach `Before`/`After` chains across all matching steps. And the +whole `BuildStep` machinery exists only to support tree traversal by mutators. + +Add a first-class `flow.Mutator` to the workflow runtime that expresses "for any step of +type `*T`, configure it like this" by returning a `flow.Builder` — the same builder users +already use to declare per-step configuration in their plans. With it: + +- Scenario-level mutator helpers compile to a one-line `flow.Mutate[T](fn)` registration. +- Mutators can override any aspect of step configuration: `Input`, `Output`, + `BeforeStep`, `AfterStep`, `Retry`, `Timeout`, `When`, etc. — by returning a + `flow.Builder` that carries the desired config. +- `As[T]` is no longer needed for mutation — only for ad-hoc introspection in tests. +- Sub-workflows can be constructed lazily inside `Do`, since cross-cutting mutation no + longer requires the outer workflow to see the inner DAG up front. +- `BuildStep` becomes deprecated and can be removed in a follow-up change after AKS + migrates its 117+ production usages. + +A non-goal: pre-execution dump / introspection of the fully expanded workflow tree. No +production user has the requirement; the post-execution event stream from the Step +Interceptor design covers the "what happened" need with strictly more accuracy. + +## What Changes + +- Add `flow.Mutator` interface, `flow.Mutate[T](fn func(ctx context.Context, step T) Builder) Mutator` + constructor, and `flow.MutatorReceiver` propagation interface. +- Add `Workflow.Mutators []Mutator` field. +- Wire **once-per-step** Mutator merging into the scheduling-time path (the path that + takes a step from `Pending` to its first attempt). The merge runs before the first + AttemptInterceptor and contributes to the step's `StepConfig` via the existing + `StepConfig.Merge` rules. +- Add `MutatorsApplied bool` (or equivalent) on `State` to enforce once-per-step. +- Have `flow.SubWorkflow` implement `MutatorReceiver` so Mutators registered on a parent + workflow propagate into nested sub-workflows. +- Have `*flow.Workflow` (used as a step) implement `MutatorReceiver` so nested workflows + used directly as steps propagate naturally. +- Mark `BuildStep` (the user hook), `StepBuilder.BuildStep`, and the implicit + `SubWorkflow.Reset()` invocation as `// Deprecated:` with pointers to Mutator-based + patterns in their godoc. +- Add an example demonstrating the new pattern (`flow.Mutate` returning a `Builder` + + `Do`-time sub-workflow construction) alongside the existing deprecated `BuildStep` + example. +- Update the `composite-steps` spec to mark the `BuildStep` requirement as deprecated + and to add the new sub-workflow-via-`Do` pattern as an alternative. +- Add a new `mutators` capability spec describing the interface, dispatch, position, + Builder scope, and propagation rules. + +## Capabilities + +### New Capabilities + +- `mutators`: type-dispatched once-per-step configuration contribution, Builder-returning + signature, sub-workflow propagation, scope rules, and failure semantics. + +### Modified Capabilities + +- `composite-steps`: deprecate the `BuildStep` requirement; add a new requirement for + `Do`-time sub-workflow construction; document `MutatorReceiver` propagation on + `SubWorkflow` and `*Workflow`. +- `step-configuration`: add `Mutators` field on `Workflow`; clarify the merge rules apply + to Mutator contributions identically to repeat `Add` calls. + +## Impact + +- **No breaking changes to existing workflow definitions.** `BuildStep` continues to run + at `Add` time during the deprecation window. Existing scenario-level mutators using + `flow.As[T](w)` keep working. The only public-API change that could break code is + positional struct literals of `Workflow{...}` that pass 4+ arguments; this pattern is + not used anywhere we know of (consumers use field-named literals or `&Workflow{}` with + later assignment). +- **Authoring change for new Mutators (positive):** Mutators run **once per step**, not + per attempt. Mutator authors no longer need to write idempotent functions. This is + strictly easier than the legacy `As[T] + Input` pattern (where `Input` was per-attempt). +- **Coordination requirement:** This change depends on the Step Interceptor design + landing in the same release; both share the scheduling-time path and define each + other's stack ordering. +- **No new dependencies.** +- **No removals in this change.** A follow-up change will migrate AKS e2ev3 production + code from `BuildStep` to `flow.Mutate` + `Do()`-time sub-workflow construction. The + **next major version of `go-workflow`** will then remove `StepBuilder.BuildStep`, the + implicit `BuildStep()` invocation in `Workflow.addStep`, the implicit + `SubWorkflow.Reset()` call, and the corresponding `composite-steps` / + `step-configuration` requirements. See `design.md` "Deprecation Plan for `BuildStep` + and Related Surface" for the full removal list and timeline. diff --git a/openspec/changes/2026-05-06-step-mutator/specs/composite-steps/spec.md b/openspec/changes/2026-05-06-step-mutator/specs/composite-steps/spec.md new file mode 100644 index 0000000..d27b348 --- /dev/null +++ b/openspec/changes/2026-05-06-step-mutator/specs/composite-steps/spec.md @@ -0,0 +1,97 @@ +## MODIFIED Requirements + +### Requirement: BuildStep — lazy initialization hook + +The framework SHALL preserve the `BuildStep` lazy-initialization hook for backward +compatibility during the deprecation window. New code SHOULD use `flow.Mutate` for +cross-cutting modification and construct sub-workflows inside `Do()` (see the new +`Sub-workflow construction inside Do` requirement below). + +The implicit `BuildStep()` invocation at `Add` time, together with the implicit +`SubWorkflow.Reset()` call that precedes it, SHALL be **removed in the next major +version of `go-workflow`**. From that release on, Steps that define a `BuildStep()` +method will no longer have it invoked automatically; composite-step authors must +construct their inner workflow inside `Do()` instead. This deprecation window is +bounded: it ends at the next major version bump, by which time all production usages +must have migrated. + +If a Step implements `BuildStep()`, the Workflow SHALL call it exactly once when the +Step is first added. This allows composite Steps to initialize their internal +sub-workflow lazily at add-time rather than at construction time. + +If the Step also implements `Reset()`, the Workflow SHALL call `Reset()` before +`BuildStep()` to allow the Step to clear any previous state before rebuilding. + +`BuildStep()` SHALL NOT be called again if the same Step pointer is added a second time. + +#### Scenario: BuildStep called once on first Add +- **WHEN** a Step implementing `BuildStep()` is added to a Workflow +- **THEN** `BuildStep()` is called exactly once, regardless of how many times the + Step is subsequently added + +#### Scenario: Reset called before BuildStep +- **WHEN** a Step implements both `Reset()` and `BuildStep()` + and is added to a Workflow +- **THEN** `Reset()` is called first, then `BuildStep()` + +## ADDED Requirements + +### Requirement: Sub-workflow construction inside Do + +The framework SHALL support constructing a composite Step's sub-workflow inside its +`Do(ctx)` method, using a freshly created `flow.Workflow{}` and standard `Add` / +`Step` / `Pipe` builders. This pattern MUST NOT require implementing `BuildStep`. + +When this pattern is used in combination with `flow.SubWorkflow` embedding, the inner +workflow SHALL remain visible to parent-level Mutators via the `MutatorReceiver` +propagation mechanism (see the `mutators` capability spec). + +When this pattern is used WITHOUT `flow.SubWorkflow` embedding (e.g. by constructing a +local `flow.Workflow{}` inside `Do` and discarding it after `Do` returns), the inner +workflow SHALL be invisible to parent introspection helpers (`Has`, `As`, `HasStep`) and +to parent Mutator propagation. This is intentional: such a step is fully self-contained +and opaque to the outer workflow. + +#### Scenario: Composite step constructs sub-workflow in Do +- **WHEN** a composite step embeds `flow.SubWorkflow` and inside `Do` calls `s.Reset()` + followed by `s.Add(...)` then `s.Do(ctx)` +- **THEN** the sub-workflow runs correctly and parent Mutators reach its inner steps via + `PrependMutators` (invoked once per inner-workflow execution; once-per-step semantics + inside the inner workflow then take over) + +#### Scenario: Local sub-workflow inside Do is opaque to parent +- **WHEN** a step constructs a `flow.Workflow{}` inside `Do` without embedding + `SubWorkflow` and without exposing it via `Unwrap` +- **THEN** parent `flow.As[T](outer)` does not find inner steps and parent Mutators do + not reach them + +--- + +### Requirement: SubWorkflow implements MutatorReceiver + +`flow.SubWorkflow` SHALL implement the `MutatorReceiver` interface defined in the +`mutators` capability spec. Its `PrependMutators` implementation prepends the supplied +Mutators to the inner workflow's `Mutators` slice on each invocation. + +This requirement is what makes the deprecated `BuildStep` + `SubWorkflow` pattern and the +new `Do`-time + `SubWorkflow` pattern interchangeable from the parent Mutator +perspective. + +#### Scenario: Parent Mutator propagates to SubWorkflow +- **WHEN** a parent workflow has `flow.Mutate[*Foo]` registered and a step embedding + `flow.SubWorkflow` adds a `*Foo` to its inner workflow +- **THEN** the parent Mutator runs against the inner `*Foo` instance before each of its + attempts + +--- + +### Requirement: *Workflow implements MutatorReceiver + +`*flow.Workflow`, when used directly as a Step in another workflow, SHALL implement the +`MutatorReceiver` interface. Its `PrependMutators` implementation prepends the supplied +Mutators to its own `Mutators` slice. + +#### Scenario: Nested *Workflow inherits parent Mutators +- **WHEN** a `*flow.Workflow` containing `*Foo` is added as a step in another workflow + that has `flow.Mutate[*Foo]` registered +- **THEN** the parent Mutator runs against the inner `*Foo` before each of its attempts diff --git a/openspec/changes/2026-05-06-step-mutator/specs/mutators/spec.md b/openspec/changes/2026-05-06-step-mutator/specs/mutators/spec.md new file mode 100644 index 0000000..f35c350 --- /dev/null +++ b/openspec/changes/2026-05-06-step-mutator/specs/mutators/spec.md @@ -0,0 +1,403 @@ +## ADDED Requirements + +### Requirement: Mutator interface and Mutate constructor + +The `flow.Mutator` interface SHALL represent a type-dispatched, **once-per-step** +contribution of configuration to a Step. The interface has a single unexported method, +ensuring that the only producer is the generic constructor `flow.Mutate[T]`. + +`flow.Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator` SHALL +construct a Mutator that: + +- Performs a type assertion `step.(T)` against each step it is offered, **walking the + `Unwrap()` chain** if the immediate step does not match. The first layer in the chain + whose concrete type matches `T` is the one `fn` receives. +- Calls `fn` with the workflow's context and the type-asserted step instance if and only + if some layer in the unwrap chain matches. +- Returns the `Builder` that `fn` returns, for the runtime to merge into the step's + `StepConfig`. +- Treats `fn` returning a `nil` Builder as "no additional configuration" (still a valid + Mutator — useful when `fn` only mutates fields on the step pointer). +- Is a no-op (returns `matched=false`) when no layer in the unwrap chain satisfies the + assertion. + +Selection is by concrete type along the unwrap chain only. There is no interface +matching and no name matching. The unwrap traversal mirrors `flow.As[T]`: it follows +`Unwrap() Steper` and `Unwrap() []Steper`, but does NOT cross workflow boundaries (a +nested workflow's inner steps are reached via the propagation mechanism in +`MutatorReceiver propagation interface`, not via in-place unwrap from the parent). + +The `ctx` passed to `fn` is the **workflow-scoped context** — i.e. the `ctx` that was +passed into `Workflow.Do(ctx)`. It is suitable for accessing values that are stable for +the life of the workflow (logger, scenario name, test session ID). It is NOT a +per-attempt context: the Mutator runs once per step at scheduling time, before any +attempt begins. + +A Mutator does NOT return a substituted `context.Context`. The Mutator is not in the +per-attempt onion and cannot influence the `ctx` that `step.Do` receives. Cross-cutting +ctx substitution remains the responsibility of `BeforeStep` (per instance) or +`StepInterceptor` / `AttemptInterceptor` (cross-cutting). The Mutator MAY return a +`Builder` that includes a `BeforeStep` callback if it wants per-attempt ctx influence +through composition. + +#### Scenario: Mutate runs against a matching type +- **WHEN** `flow.Mutate[*Foo]` is registered and the workflow contains a `*Foo` step +- **THEN** the Mutator function is invoked with that `*Foo` instance once, before its + first attempt + +#### Scenario: Mutate skips a non-matching type +- **WHEN** `flow.Mutate[*Foo]` is registered and the workflow contains only `*Bar` steps +- **THEN** the Mutator function is never invoked + +#### Scenario: Mutate matches inner type through Unwrap chain +- **WHEN** `flow.Mutate[*Inner]` is registered and the workflow contains a wrapper step + whose `Unwrap()` returns a `*Inner` (e.g. `flow.Name("...", inner)`) +- **THEN** the Mutator runs against the `*Inner` instance; the user function receives + the typed `*Inner` pointer + +#### Scenario: Mutate matches outer wrapper when wrapper itself is the target type +- **WHEN** `flow.Mutate[*Wrapper]` is registered and the workflow contains a `*Wrapper` + wrapping a `*Inner` +- **THEN** the Mutator runs against the `*Wrapper` instance (first matching layer wins; + `*Wrapper` matches before unwrap descends to `*Inner`) + +#### Scenario: Returning nil Builder is valid +- **WHEN** the Mutator function returns `nil` +- **THEN** the runtime treats it as no additional configuration; no error, no panic + +#### Scenario: Mutator function receives workflow-scoped ctx, returns Builder, no error +- **WHEN** a developer writes `flow.Mutate[*Foo](fn)` +- **THEN** the type of `fn` is `func(ctx context.Context, step *Foo) flow.Builder`, + with no error return +- **AND** when the Mutator runs, `ctx` is the same context that was passed to + `Workflow.Do(ctx)` + +--- + +--- + +### Requirement: Mutator dispatch and merge destination + +The runtime SHALL dispatch and merge Mutators using the following rule, hereafter +"**dispatch by the owning workflow**": + +1. Each `*Workflow` (parent or inner) SHALL evaluate its own `Workflow.Mutators` slice + against every step in its own `state` map, at that step's first scheduling. +2. For each step `wrapper` in `w.state`, the runtime SHALL walk `wrapper`'s `Unwrap()` + chain. For each layer `L` in the chain (including `wrapper` itself, in pre-order), + the runtime SHALL ask each Mutator whether `L.(T)` matches. The first matching layer + SHALL be passed to the Mutator's user function. +3. The `Builder` returned by the user function SHALL be merged into + `w.state[wrapper].Config` — i.e. into the **wrapper key in the workflow that owns + `wrapper` as a root**, regardless of which layer in the unwrap chain the Mutator + matched. This matches the `Config merge destination follows StateOf` rule in + `step-configuration`. +4. Parent Mutators SHALL reach inner-workflow steps only via the `MutatorReceiver` + propagation interface (parent prepends its Mutators into the inner workflow's + `Mutators` slice). The parent SHALL NOT directly walk into a child workflow's + `state` map to apply Mutators; that is the inner workflow's responsibility once it + begins scheduling. This keeps once-per-step state, merge destinations, and lazily + added inner steps consistent across all nesting depths. + +This rule resolves three previously-ambiguous points: + +- **Unwrap is supported** — `flow.Mutate[*Inner]` matches a `flow.Name("...", inner)` + wrapper. Migration from the legacy `As[T] + Input` pattern does not break on wrapper + types. +- **Config destination is well-defined** — the merge target is always the wrapper key + in the workflow that actually owns the step, not the outer workflow on which the + Mutator was registered. +- **Nested workflows are reached by propagation, not by parent walking** — a parent + Mutator targeting `*X` where `*X` lives inside an inner workflow IS observed by + `*X`, but the dispatch is performed by the inner workflow after `PrependMutators`, + not by the parent reaching across the workflow boundary. + +#### Scenario: Mutator matches via Unwrap and merges into wrapper's state +- **GIVEN** a workflow `w` where `w.state` contains `wrapper = flow.Name("x", inner)` + whose `Unwrap()` returns `inner` of type `*Inner` +- **AND** `w.Mutators = [flow.Mutate[*Inner](func(ctx, x *Inner) Builder { return flow.Step(x).Input(fn) })]` +- **WHEN** `wrapper` reaches its first scheduling +- **THEN** the Mutator's user function receives the `*Inner` pointer `inner` +- **AND** `fn` is appended to `w.state[wrapper].Config.Before` (NOT to a state entry + keyed on `inner`) + +#### Scenario: Parent Mutator targeting inner type does not directly mutate inner state +- **GIVEN** `parent` contains `inner` (a `*Workflow` used as a step) which contains + `x` of type `*X` +- **AND** `parent.Mutators = [flow.Mutate[*X](fn)]` +- **WHEN** `parent.Do(ctx)` runs +- **THEN** `parent` SHALL call `inner.PrependMutators(parent.Mutators)` before invoking + `inner` as a step +- **AND** the Mutator runs once against `x` inside `inner`'s scheduling pass; the + Builder is merged into `inner.state[x].Config`, NOT into any state entry on `parent` + +#### Scenario: Wrapper inside inner workflow is reached via propagation +- **GIVEN** `parent → inner → flow.Name("x", x)` where `x` is `*X` +- **AND** `parent.Mutators = [flow.Mutate[*X](fn)]` +- **WHEN** the wrapper reaches first scheduling inside `inner` +- **THEN** the Mutator (propagated by `PrependMutators`) unwraps the wrapper, matches + `x`, and merges its Builder into `inner.state[wrapper].Config` + +--- + +### Requirement: Workflow.Mutators field + +`flow.Workflow` SHALL expose a `Mutators []Mutator` field. Mutators in this slice are +evaluated against every step before the step's first attempt. Slice order is preserved: +element 0 runs first, element n-1 runs last, among the Mutators whose target type matches +the step. + +A workflow with `Mutators == nil` or an empty slice SHALL behave identically to one +without the Mutator mechanism — no overhead, no panic. + +#### Scenario: Slice order is preserved among matching Mutators +- **WHEN** two Mutators `[m1, m2]` both registered for `*Foo` are present, and `m1` + returns a Builder with `Input` callback A, `m2` returns a Builder with `Input` + callback B +- **THEN** before the first attempt of any `*Foo`, callback A runs before callback B + (because the merge appends in order) + +#### Scenario: Multiple Mutators for the same type all run +- **WHEN** two Mutators `m1`, `m2` are both registered for `*Foo` +- **THEN** both run, in slice order, before the first attempt of every `*Foo` step; + their contributions are merged + +#### Scenario: Nil Mutators field is a no-op +- **WHEN** `Workflow.Mutators` is nil +- **THEN** the workflow runs identically to a workflow without the Mutator field + +--- + +### Requirement: Mutator merge timing and ordering + +Mutators SHALL be merged into a step's `StepConfig` **after** all `Workflow.Add` calls +for that step have completed, but **before** the step's first attempt begins. +Specifically, the merge happens when the step transitions from `Pending` to its first +scheduling — i.e. inside `Workflow.Do`, on demand, not at `Add` time. + +Mutator-contributed `Before` / `After` / `Option` lists SHALL be **appended** to the +step's existing lists via the standard `StepConfig.Merge` rule (see +`step-configuration`'s `Idempotent Add with config merging` requirement). Consequently: + +- **Before chain execution order:** plan-declared `Input` / `BeforeStep` run first; + Mutator-contributed callbacks run after, in `Workflow.Mutators` slice order. +- **After chain execution order:** plan-declared `Output` / `AfterStep` run first; + Mutator-contributed callbacks run after, in `Workflow.Mutators` slice order. +- **Multiple Mutators on the same step:** their callbacks all append after plan's; + among Mutators, slice order is preserved. + +`Upstreams` contributed by Mutators are unioned with existing upstreams (set union, not +ordered). + +This ordering matches the "scenario customization layered on top of plan defaults" +mental model: a Mutator can observe or override state left by plan callbacks. A Mutator +cannot pre-empt plan callbacks; if pre-empting is needed (rare), register the behavior +as a plan-time `Input` instead. + +Sub-workflow propagation interacts with this ordering as follows: parent Mutators are +prepended into the inner workflow's `Mutators` slice (see `MutatorReceiver propagation` +requirement) so that, inside the inner workflow, parent Mutators run before child +Mutators. Both still run after the inner step's plan-declared callbacks. + +#### Scenario: Plan Input runs before Mutator Input +- **GIVEN** a step `s` added via `flow.Step(s).Input(planFn)` +- **AND** a Mutator `flow.Mutate[*S](func(ctx, s *S) flow.Builder { + return flow.Step(s).Input(mutatorFn) })` +- **WHEN** the step executes its first attempt +- **THEN** `planFn` runs first, then `mutatorFn`, then `Do` + +#### Scenario: Multiple Mutator Inputs run in slice order, after plan +- **GIVEN** a step `s` added via `flow.Step(s).Input(planFn)` +- **AND** `Workflow.Mutators = [m1, m2]` where both target `*S` and contribute Inputs + `mfn1`, `mfn2` respectively +- **WHEN** the step executes its first attempt +- **THEN** the Before chain runs `planFn`, then `mfn1`, then `mfn2`, then `Do` + +#### Scenario: Plan After runs before Mutator After +- **GIVEN** a step with a plan-declared `Output(planAfter)` and a Mutator that returns + `flow.Step(s).Output(mutatorAfter)` +- **WHEN** the step's `Do` returns successfully +- **THEN** `planAfter` runs first, then `mutatorAfter` + +#### Scenario: Merge happens at first scheduling, not at Add time +- **GIVEN** a Mutator whose user function appends to an external slice when invoked +- **WHEN** `Workflow.Add(Step(s))` is called +- **THEN** the external slice is unchanged +- **WHEN** `Workflow.Do(ctx)` runs and `s` reaches its first attempt +- **THEN** the external slice has exactly one new entry (Mutator invoked once) + +--- + +### Requirement: Mutators run once per step instance + +A Mutator's user function SHALL be invoked at most **once** per step instance, just +before that step's first attempt begins (i.e. when the runtime moves the step out of +`Pending` state into the attempt loop). + +The runtime SHALL track per-step state to ensure the Mutator chain runs exactly once, +even if the same step is somehow re-scheduled. + +The configuration contributed by Mutators is merged into the step's `StepConfig` using +the same rules as the existing `Idempotent Add with config merging` requirement +(`step-configuration` capability): + +- `Upstreams`: union +- `Before`: append in order (Mutator order, then existing) +- `After`: append in order +- `Option`: append in order + +After the merge, the step proceeds to the standard per-attempt onion (Interceptors, +retry loop, Before chain, Do, After chain). + +#### Scenario: Mutator runs exactly once across multiple attempts +- **WHEN** a `*Foo` step has `RetryOption{MaxAttempts: 3}` and `flow.Mutate[*Foo]` is + registered, and the step fails twice then succeeds on attempt 3 +- **THEN** the Mutator user function is invoked exactly once (before attempt 1) + +#### Scenario: Mutator-contributed Before runs on every attempt +- **WHEN** `flow.Mutate[*Foo]` returns a Builder with a `BeforeStep` callback `b`, + and the step retries 3 times +- **THEN** `b` runs once per attempt (3 times total) — because `BeforeStep` is + per-attempt; the Mutator merge happens once but the resulting `Before` chain still + fires per attempt + +#### Scenario: Field mutation done in Mutator persists +- **WHEN** the Mutator user function sets `f.Field = "X"` on the typed `*Foo` pointer it + receives +- **THEN** `f.Field == "X"` is observable from `Do` on every attempt; the field is set + once and persists + +#### Scenario: Mutator merge happens before first attempt's Before chain +- **WHEN** a Mutator returns a Builder with `Input(callback)` and the user has separately + registered `flow.Step(s).Input(otherCallback)` in their plan +- **THEN** before the first attempt, both callbacks are present in the step's `Before` + chain (in registration order: Mutator-registered ones come from the slice merge order) + +--- + +### Requirement: Mutator-returned Builder scope + +The runtime SHALL restrict the merge of a Mutator-returned `Builder` to the configuration +keyed by the step that was passed into the Mutator. Any configuration the Builder +declares for other steps SHALL be silently ignored. + +This rule SHALL keep Mutator scope predictable: a `flow.Mutate[*Foo]` Mutator MUST only +configure the specific `*Foo` instance it was called with, even if the user mistakenly +constructs a Builder that mentions other steps. + +#### Scenario: Builder with config for the passed-in step is merged +- **WHEN** `flow.Mutate[*Foo](func(ctx context.Context, f *Foo) flow.Builder { return flow.Step(f).Input(fn) })` + is registered and runs against a `*Foo` instance +- **THEN** the `Input` callback `fn` is added to that `*Foo`'s `Before` chain + +#### Scenario: Builder with config for an unrelated step is ignored +- **WHEN** a Mutator's user function (mistakenly or intentionally) returns + `flow.Step(otherStep).Input(...)` instead of `flow.Step(passedIn).Input(...)` +- **THEN** the configuration for `otherStep` is silently dropped; `passedIn` is unchanged + +--- + +### Requirement: MutatorReceiver propagation interface + +`flow.MutatorReceiver` SHALL be defined as an optional interface that composite steps and +sub-workflows implement to receive Mutators from their parent workflow: + +```go +type MutatorReceiver interface { + PrependMutators(mw []Mutator) +} +``` + +Before the runtime invokes a step that hosts a sub-workflow (composite step or +`*Workflow` used as a step), the runtime SHALL check whether the step implements +`MutatorReceiver` and, if so, call `step.PrependMutators(parent.Mutators)` so that parent +Mutators reach inner steps. + +The contract for implementers is to **prepend** parent Mutators to their own Mutators +list, preserving the invariant that parent Mutators run before child Mutators. + +The propagation is invoked once per inner-workflow execution. Inner steps that are added +lazily (e.g. inside the composite step's `Do`) are still reached, because Mutator +evaluation in the inner workflow is itself once-per-step at first scheduling. + +#### Scenario: Parent Mutator reaches step inside SubWorkflow +- **WHEN** the parent workflow has `flow.Mutate[*Foo]` registered and a step embeds + `flow.SubWorkflow` with a `*Foo` inside +- **THEN** the parent Mutator runs against the inner `*Foo` instance once, before its + first attempt + +#### Scenario: Parent Mutator reaches step inside nested *Workflow +- **WHEN** the parent workflow has `flow.Mutate[*Foo]` registered and a `*Workflow` is + added as a step containing a `*Foo` +- **THEN** the parent Mutator runs against the inner `*Foo` + +#### Scenario: Lazily added inner steps are reached +- **WHEN** a composite step embeds `SubWorkflow`, calls `s.Add(new(Foo))` inside its + `Do`, and the parent has `flow.Mutate[*Foo]` registered +- **THEN** the lazily added `*Foo` is seen by the parent Mutator at its first scheduling + inside the inner workflow + +#### Scenario: Child Mutator runs after parent +- **WHEN** the parent registers Mutator `pm` and a `SubWorkflow`-embedded child registers + Mutator `cm`, both targeting `*Foo` +- **THEN** for any `*Foo` inside the child, `pm` runs before `cm`; their `Before` chain + contributions are appended in that order + +--- + +### Requirement: SubWorkflow and *Workflow implement MutatorReceiver + +`flow.SubWorkflow` SHALL implement `MutatorReceiver` by prepending the supplied Mutators +to its inner workflow's `Mutators` slice. + +`*flow.Workflow` (when used directly as a step in another workflow) SHALL implement +`MutatorReceiver` by prepending the supplied Mutators to its own `Mutators` slice. + +Both implementations MUST be safe to call multiple times; calling `PrependMutators` twice +with the same list MUST NOT cause double-execution of the Mutators on the same step +(deduplication is provided by the once-per-step invariant — once a step's Mutators have +been applied, re-prepending the slice has no further effect on that step). + +#### Scenario: SubWorkflow.PrependMutators prepends in order +- **WHEN** `parent.Mutators = [m1, m2]` and a SubWorkflow's inner has `[m3]` +- **THEN** after `PrependMutators` is called, the inner has `[m1, m2, m3]` + +#### Scenario: *Workflow used as step inherits Mutators +- **WHEN** a `*Workflow` is added as a step in another workflow with Mutators `[m1]` +- **THEN** during execution, `[m1]` is prepended to the inner workflow's Mutators + +--- + +### Requirement: Mutator vs Input/BeforeStep usage guidance + +The framework SHALL preserve both `flow.Step(s).Input(fn)` (per-instance binding) and +`flow.Mutate[*T](fn)` (per-type binding) as parallel mechanisms with distinct intended +scopes. Neither MUST be removed in favor of the other during the deprecation window for +`BuildStep`. + +The intended scope distinction is: + +- `flow.Step(s).Input(fn)` (in a plan) binds to one specific step instance, lexically + known. Use it when you are writing a plan or composite step and you hold the step + instance. The modification only matters for that instance. +- `flow.Mutate[*T](fn)` binds to a Go type. Use it when you want a modification to apply + to every `*T` step in the workflow tree (including sub-workflows and steps added by + other plans), and you do not want to enumerate them. + +Anti-patterns: + +- **Using `Mutate` to modify a single specific instance.** It works but is misleading; + the next reader of the code assumes "all `*T`". Use `Input` instead. +- **Using `Input` (via `As[T]` traversal) to apply a cross-plan policy.** This is the + legacy pattern that `Mutate` is designed to replace. Each plan author must pre-build + sub-workflows so traversal can find targets, and missing-match failures are silent. + +For ctx substitution at workflow level (across all step types), use `StepInterceptor` or +`AttemptInterceptor`. + +#### Scenario: Both mechanisms coexist +- **WHEN** a workflow uses both `flow.Step(s).Input(fn1)` for a specific instance and + `flow.Mutate[*S](func(ctx, s *S) Builder { return flow.Step(s).Input(fn2) })` for + cross-cutting policy +- **THEN** both `fn1` and `fn2` SHALL be invoked on `s`'s Before chain in the documented + order (plan first, Mutator after) diff --git a/openspec/changes/2026-05-06-step-mutator/specs/step-configuration/spec.md b/openspec/changes/2026-05-06-step-mutator/specs/step-configuration/spec.md new file mode 100644 index 0000000..4c2d9a2 --- /dev/null +++ b/openspec/changes/2026-05-06-step-mutator/specs/step-configuration/spec.md @@ -0,0 +1,78 @@ +## ADDED Requirements + +### Requirement: Config merge destination follows StateOf + +A `StepConfig` declared via `workflow.Add(flow.Step(s).X(...))` SHALL be merged into the +innermost workflow whose `state` map contains `s` as a key, not into the workflow on +which `Add` was invoked. + +This rule SHALL apply uniformly to all `StepConfig` fields: `Upstreams` (via the +`Cross-boundary upstream registration` rule in `composite-steps`), `Before`, `After`, +and `Option`. + +The destination is computed by `StateOf(s)`, which: + +1. Returns `w.state[s]` if `s` is a direct root of `w`. +2. Otherwise walks roots of `w` and, for each root that implements + `StateOf(Steper) *State` (i.e. `*Workflow` and `SubWorkflow`-embedding steps), + recursively asks that root for `StateOf(s)`. +3. Returns the deepest non-nil match. + +This is existing runtime behavior; this requirement codifies it because the Mutator +mechanism (`mutators` capability) relies on it being well-defined: a Mutator-returned +Builder is merged through the same `StateOf`-driven destination logic. + +#### Scenario: Add on outer merges Input into inner state +- **GIVEN** an `inner` workflow containing step `x`, and `outer` containing `inner` as a step +- **WHEN** `outer.Add(flow.Step(x).Input(fn))` is called +- **THEN** `fn` is appended to `inner.StateOf(x).Config.Before` +- **AND** `outer.StateOf(inner).Config.Before` does NOT contain `fn` + +#### Scenario: Add on outer merges Option into inner state +- **GIVEN** an `inner` workflow containing step `x`, and `outer` containing `inner` as a step +- **WHEN** `outer.Add(flow.Step(x).Timeout(5 * time.Minute))` is called +- **THEN** the timeout takes effect on `x` via `inner.StateOf(x).Config.Option` + +#### Scenario: Multi-level nesting reaches innermost +- **GIVEN** `outer → midA → midB → x` +- **WHEN** `outer.Add(flow.Step(x).Input(fn))` is called +- **THEN** `fn` ends up in `midB.StateOf(x).Config.Before` (the innermost workflow that + has `x` in its `state` map), not in `outer` or `midA` + +--- + +### Requirement: Mutators field on Workflow + +`flow.Workflow` SHALL expose a `Mutators []Mutator` field that holds zero or more +`flow.Mutator` instances (constructed via `flow.Mutate[T]`). + +Mutators are evaluated **once per step instance**, just before the step's first attempt +begins. Each matched Mutator contributes a `flow.Builder` whose configuration for the +matched step is merged into the step's existing `StepConfig` using the same merge rules +as the `Idempotent Add with config merging` requirement (`Upstreams` union, `Before` +append, `After` append, `Option` append). + +After the merge, the step proceeds through the standard per-attempt onion (Interceptors, +retry loop, Before chain, Do, After chain). The Mutator's contribution is therefore +visible to all attempts of the step. + +The `Mutators` slice is independent of `Before` / `After` / `Option` configuration on +`StepConfig`: Mutators are workflow-level (apply to all matching steps), while +`StepConfig` is per-step-instance. + +#### Scenario: Mutator contribution merges with plan-declared StepConfig +- **WHEN** a workflow has `Mutators = [flow.Mutate[*Foo](func(ctx context.Context, f *Foo) flow.Builder { + return flow.Step(f).Input(callbackA) })]` and a `*Foo` step is added with + `flow.Step(foo).Input(callbackB)` +- **THEN** before the first attempt of `foo`, both `callbackA` and `callbackB` are + present in the step's `Before` chain (Mutator contribution merged into existing config) + +#### Scenario: Mutator contribution survives across attempts +- **WHEN** a Mutator contributes a `Before` callback and the step retries 3 times +- **THEN** the contributed callback runs on every attempt (because it lives in the + step's permanent `Before` chain after merge) + +#### Scenario: Mutator runs once per step +- **WHEN** a Mutator's user function appends to a workflow-scoped slice and the matched + step retries 3 times +- **THEN** the slice has exactly one new entry (Mutator user function ran once) diff --git a/openspec/changes/2026-05-06-step-mutator/tasks.md b/openspec/changes/2026-05-06-step-mutator/tasks.md new file mode 100644 index 0000000..d494a04 --- /dev/null +++ b/openspec/changes/2026-05-06-step-mutator/tasks.md @@ -0,0 +1,76 @@ +## 1. Define Mutator Types + +- [ ] 1.1 Create `mutator.go` with `Mutator` interface (single unexported method `applyTo(ctx context.Context, step Steper) (matched bool, builder Builder)`) +- [ ] 1.2 Add `Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator` generic constructor +- [ ] 1.3 Add internal `mutatorFunc[T Steper]` type implementing `Mutator.applyTo` via `step.(T)` assertion, forwarding `ctx` to `fn` +- [ ] 1.4 Add `MutatorReceiver` interface with `PrependMutators(mw []Mutator)` + +## 2. Wire Workflow Field and State + +- [ ] 2.1 Add `Mutators []Mutator` field to `Workflow` in `workflow.go` +- [ ] 2.2 Add `MutatorsApplied bool` (or equivalent — e.g. an internal sentinel-based marker) to `State` in `state.go` to enforce once-per-step +- [ ] 2.3 Add `(*Workflow).PrependMutators(mw []Mutator)` so `*Workflow` (used as a step) propagates Mutators into nested workflows +- [ ] 2.4 Add `(*SubWorkflow).PrependMutators(mw []Mutator)` mirroring `*Workflow`, prepending parent Mutators to inner workflow's + +## 3. First-Schedule Merge + +- [ ] 3.1 In the scheduling-time path that takes a step from `Pending` to its first attempt (precise location depends on the Step Interceptor change's `stepExecution`), add a once-per-step Mutator merge block guarded by `state.MutatorsApplied` +- [ ] 3.2 For each matched Mutator: call `applyTo(ctx, step)` (where `ctx` is the workflow ctx from `Workflow.Do(ctx)`); if it returns `(true, b)` with non-nil `b`, iterate `b.AddToWorkflow()` and merge only the entry whose key equals `step` into `state.Config` via existing `StepConfig.Merge` +- [ ] 3.3 Set `state.MutatorsApplied = true` after the loop, before the first AttemptInterceptor invocation +- [ ] 3.4 Immediately before invoking a step that hosts a sub-workflow, if the step implements `MutatorReceiver`, call `PrependMutators(parent.Mutators)` so the inner workflow's first-schedule pass sees parent Mutators + +## 4. Deprecation Notes + +- [ ] 4.1 Add `// Deprecated: Use flow.Mutate or construct sub-workflows in Do()` to `StepBuilder.BuildStep` godoc in `build_step.go` +- [ ] 4.2 Add `// Deprecated:` to `flow.SubWorkflow.Reset` godoc in `workflow.go`, noting it is only invoked by the deprecated `StepBuilder.BuildStep` path +- [ ] 4.3 Add `// Deprecated:` to the `BuildStep` doc-comment template in `build_step.go` that shows users how to write the hook + +## 5. Examples + +- [ ] 5.1 Add `// Deprecated:` doc to `ExampleCompositeViaWorkflow` in `example/13_composite_step_test.go`, pointing to the new example +- [ ] 5.2 Add `ExampleCompositeViaDo` showing sub-workflow construction inside `Do()` without `BuildStep` +- [ ] 5.3 Add `ExampleMutate_input` showing `flow.Mutate[*MyStep]` returning a Builder with `Input(...)` registered on `Workflow.Mutators` +- [ ] 5.4 Add `ExampleMutate_retryOverride` showing a Mutator that returns a Builder with `Retry(...)` to override retry policy across all instances of a type +- [ ] 5.5 Add `ExampleMutate_subWorkflow` showing parent Mutators reaching into a `SubWorkflow`-embedded composite step via `PrependMutators` + +## 6. Spec Updates + +- [ ] 6.1 Add new spec `openspec/specs/mutators/spec.md` covering the requirements in `changes/2026-05-06-step-mutator/specs/mutators/spec.md` (move on archive) +- [ ] 6.2 Apply MODIFIED Requirements from `changes/2026-05-06-step-mutator/specs/composite-steps/spec.md` to `openspec/specs/composite-steps/spec.md`: + - mark `BuildStep — lazy initialization hook` as deprecated + - add `Sub-workflow construction inside Do()` requirement + - add `MutatorReceiver propagation` requirement +- [ ] 6.3 Apply ADDED Requirements from `changes/2026-05-06-step-mutator/specs/step-configuration/spec.md` to `openspec/specs/step-configuration/spec.md`: + - add `Mutators field on Workflow` requirement + +## 7. Tests + +- [ ] 7.1 Test: `Mutate[*Foo]` runs against a `*Foo` step, does not run against a `*Bar` step +- [ ] 7.1a Test: Mutator function receives the workflow-scoped ctx (the same ctx passed to `Workflow.Do(ctx)`); a value placed in ctx via `context.WithValue` before `Workflow.Do` is observable in the Mutator +- [ ] 7.2 Test: Mutator's user function is invoked exactly once across multiple attempts (use `RetryOption{MaxAttempts: 3}`, count invocations) +- [ ] 7.3 Test: Mutator-returned Builder's `Input` callback runs on every attempt (3 attempts → 3 invocations) +- [ ] 7.4 Test: Mutator returning `nil` Builder is a no-op; field mutation done in the user function persists into `Do` +- [ ] 7.5 Test: Mutator returning a Builder with `Retry(...)` overrides retry policy of matched steps +- [ ] 7.6 Test: Mutator returning a Builder with `Before/After` adds those callbacks to the step's chain +- [ ] 7.7 Test: Multiple Mutators registered for the same type run in slice order; their `Before` contributions appear in the same order in the merged chain +- [ ] 7.8 Test: Mutator-returned Builder with config for a step *other than* the one passed in has that config silently ignored +- [ ] 7.9 Test: Parent workflow Mutator reaches `*Foo` inside a `SubWorkflow`-embedded composite step +- [ ] 7.10 Test: Parent workflow Mutator reaches `*Foo` inside a nested `*Workflow` used as a step +- [ ] 7.11 Test: Lazily added inner step (added inside composite step's `Do`) is reached by parent Mutator +- [ ] 7.12 Test: A workflow with `Mutators == nil` runs identically to a workflow without Mutators (no overhead, no panic) +- [ ] 7.13 Test: Mutator's user function panicking is caught by the workflow's panic recovery (when `DontPanic` is true) +- [ ] 7.14 Test: Plan-declared `Input` and Mutator-contributed `Input` both run, in expected order +- [ ] 7.15 Test: Plan `Input` runs **before** Mutator-contributed `Input`; with multiple Mutators, Mutator-contributed Inputs run in `Workflow.Mutators` slice order, all after plan +- [ ] 7.16 Test: Mutator merge does not happen at `Add` time (assert an external slice mutated only after `Do` reaches the step) +- [ ] 7.17 Test: `Mutate[*Inner]` matches a `flow.Name("...", inner)` wrapper via Unwrap; the user function receives the typed `*Inner` pointer +- [ ] 7.18 Test: When Mutator matches via Unwrap, the resulting Builder's config merges into `w.state[wrapper].Config` (the wrapper key in the owning workflow), not into a state entry keyed on the inner step +- [ ] 7.19 Test: `Mutate[*Wrapper]` registered against a `*Wrapper` wrapping `*Inner` matches `*Wrapper` (outer layer wins; user function receives `*Wrapper`, not `*Inner`) +- [ ] 7.20 Test: Parent Mutator targeting `*X` does NOT directly write into inner workflow's state map; the inner workflow performs the merge after `PrependMutators` (assert inner's state[wrapper].Config gets the contribution, parent's state map is untouched) +- [ ] 7.21 Test: Existing `Config merge destination follows StateOf` rule: `outer.Add(flow.Step(x).Input(fn))` where `x` lives in `inner` results in `fn` appearing in `inner.StateOf(x).Config.Before`, not in any outer state + +## 8. Verify + +- [ ] 8.1 `go build ./...` — no compile errors +- [ ] 8.2 `go test ./...` — all existing and new tests pass +- [ ] 8.3 `go vet ./...` — no issues +- [ ] 8.4 Confirm with the Step Interceptor change author that the shared scheduling-time host and ordering match before merge From ededf3aac21b05592b3e089f96836b381bed0819 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Mon, 11 May 2026 07:42:59 +0000 Subject: [PATCH 13/31] feat(mutator): add Mutator interface and Mutate[T] constructor --- mutator.go | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ mutator_test.go | 42 +++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 mutator.go create mode 100644 mutator_test.go diff --git a/mutator.go b/mutator.go new file mode 100644 index 0000000..7ca115c --- /dev/null +++ b/mutator.go @@ -0,0 +1,58 @@ +package flow + +import "context" + +// Mutator represents a type-dispatched, once-per-step contribution of +// configuration to a Step. The interface has a single unexported method, so +// the only producer is the generic constructor [Mutate]. +type Mutator interface { + applyTo(ctx context.Context, step Steper) (matched bool, target Steper, builder Builder) +} + +// MutatorReceiver is implemented by Steps that host a sub-workflow (e.g. +// [SubWorkflow] or *Workflow used as a step) so that parent workflows can +// propagate their [Mutator]s into the inner workflow before it schedules its +// own steps. +type MutatorReceiver interface { + PrependMutators(mw []Mutator) +} + +// Mutate constructs a [Mutator] that runs against any step whose concrete type +// matches T anywhere along its Unwrap() chain (within a single workflow's +// boundaries). The first matching layer is passed to fn. fn returns a [Builder] +// whose configuration for the matched step is merged into the step's +// StepConfig at first scheduling. Returning a nil Builder is valid (useful +// when fn only mutates fields on the typed step pointer). +func Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator { + return mutatorFunc[T](fn) +} + +type mutatorFunc[T Steper] func(ctx context.Context, step T) Builder + +func (m mutatorFunc[T]) applyTo(ctx context.Context, step Steper) (bool, Steper, Builder) { + var ( + matched bool + typed T + match Steper + ) + Traverse(step, func(s Steper, _ []Steper) TraverseDecision { + if v, ok := s.(T); ok { + typed = v + match = s + matched = true + return TraverseStop + } + // Stop at workflow boundaries: do NOT descend into a nested workflow's + // inner steps from here. Inner steps are reached via PrependMutators. + if _, isWorkflow := s.(interface { + StateOf(Steper) *State + }); isWorkflow && s != step { + return TraverseEndBranch + } + return TraverseContinue + }) + if !matched { + return false, nil, nil + } + return true, match, m(ctx, typed) +} diff --git a/mutator_test.go b/mutator_test.go new file mode 100644 index 0000000..dc5b992 --- /dev/null +++ b/mutator_test.go @@ -0,0 +1,42 @@ +package flow + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +type mutFoo struct{ Field string } + +func (*mutFoo) Do(context.Context) error { return nil } + +type mutBar struct{} + +func (*mutBar) Do(context.Context) error { return nil } + +func TestMutate_matchesExactType(t *testing.T) { + called := 0 + m := Mutate[*mutFoo](func(ctx context.Context, f *mutFoo) Builder { + called++ + return nil + }) + matched, target, b := m.applyTo(context.Background(), &mutFoo{}) + assert.True(t, matched) + assert.NotNil(t, target) + assert.Nil(t, b) + assert.Equal(t, 1, called) +} + +func TestMutate_skipsNonMatchingType(t *testing.T) { + called := 0 + m := Mutate[*mutFoo](func(ctx context.Context, f *mutFoo) Builder { + called++ + return nil + }) + matched, target, b := m.applyTo(context.Background(), &mutBar{}) + assert.False(t, matched) + assert.Nil(t, target) + assert.Nil(t, b) + assert.Equal(t, 0, called) +} From 0b332ade50b790d2950fb1c9c4e4fe1bc5eb4e30 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Mon, 11 May 2026 07:51:09 +0000 Subject: [PATCH 14/31] chore(mutator): clarify boundary-check guard and PrependMutators doc Co-Authored-By: Claude Opus 4.7 (1M context) --- mutator.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mutator.go b/mutator.go index 7ca115c..5cebeb2 100644 --- a/mutator.go +++ b/mutator.go @@ -14,6 +14,9 @@ type Mutator interface { // propagate their [Mutator]s into the inner workflow before it schedules its // own steps. type MutatorReceiver interface { + // PrependMutators inserts mw at the front of the receiver's Mutators + // list, so propagated parent Mutators run before locally-registered + // child Mutators. PrependMutators(mw []Mutator) } @@ -44,6 +47,8 @@ func (m mutatorFunc[T]) applyTo(ctx context.Context, step Steper) (bool, Steper, } // Stop at workflow boundaries: do NOT descend into a nested workflow's // inner steps from here. Inner steps are reached via PrependMutators. + // The `s != step` guard allows the top-level step to be checked even + // when it is itself a workflow; only inner workflows are skipped. if _, isWorkflow := s.(interface { StateOf(Steper) *State }); isWorkflow && s != step { From 561feef245c17a7c7f2ee9828b5f7176fe12aa19 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Mon, 11 May 2026 07:52:57 +0000 Subject: [PATCH 15/31] test(mutator): cover unwrap traversal and workflow boundary stop Co-Authored-By: Claude Opus 4.7 (1M context) --- mutator.go | 4 +--- mutator_test.go | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/mutator.go b/mutator.go index 5cebeb2..a8efbc5 100644 --- a/mutator.go +++ b/mutator.go @@ -47,11 +47,9 @@ func (m mutatorFunc[T]) applyTo(ctx context.Context, step Steper) (bool, Steper, } // Stop at workflow boundaries: do NOT descend into a nested workflow's // inner steps from here. Inner steps are reached via PrependMutators. - // The `s != step` guard allows the top-level step to be checked even - // when it is itself a workflow; only inner workflows are skipped. if _, isWorkflow := s.(interface { StateOf(Steper) *State - }); isWorkflow && s != step { + }); isWorkflow { return TraverseEndBranch } return TraverseContinue diff --git a/mutator_test.go b/mutator_test.go index dc5b992..028fe6d 100644 --- a/mutator_test.go +++ b/mutator_test.go @@ -40,3 +40,53 @@ func TestMutate_skipsNonMatchingType(t *testing.T) { assert.Nil(t, b) assert.Equal(t, 0, called) } + +type mutWrapper struct{ inner Steper } + +func (w *mutWrapper) Do(context.Context) error { return nil } +func (w *mutWrapper) Unwrap() Steper { return w.inner } + +func TestMutate_matchesInnerViaUnwrap(t *testing.T) { + inner := &mutFoo{Field: "before"} + wrapper := &mutWrapper{inner: inner} + + called := 0 + m := Mutate[*mutFoo](func(ctx context.Context, f *mutFoo) Builder { + called++ + assert.Same(t, inner, f, "should receive the inner *mutFoo, not the wrapper") + return nil + }) + matched, target, _ := m.applyTo(context.Background(), wrapper) + assert.True(t, matched) + assert.Same(t, inner, target) + assert.Equal(t, 1, called) +} + +func TestMutate_outerWrapperWinsWhenItIsTheTarget(t *testing.T) { + inner := &mutFoo{} + wrapper := &mutWrapper{inner: inner} + + called := 0 + m := Mutate[*mutWrapper](func(ctx context.Context, w *mutWrapper) Builder { + called++ + assert.Same(t, wrapper, w) + return nil + }) + matched, _, _ := m.applyTo(context.Background(), wrapper) + assert.True(t, matched) + assert.Equal(t, 1, called) +} + +func TestMutate_doesNotCrossWorkflowBoundary(t *testing.T) { + // A *Workflow sits between the outer step and the inner *mutFoo. + // applyTo must NOT descend into it; that's what PrependMutators is for. + innerFoo := &mutFoo{} + innerWf := new(Workflow).Add(Step(innerFoo)) + + m := Mutate[*mutFoo](func(ctx context.Context, f *mutFoo) Builder { + t.Fatalf("mutator must not descend into nested workflow") + return nil + }) + matched, _, _ := m.applyTo(context.Background(), innerWf) + assert.False(t, matched) +} From f7e52a3be993a2aabcf298389c11cc17b5c4254a Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Mon, 11 May 2026 07:56:30 +0000 Subject: [PATCH 16/31] test(mutator): cover inner-workflow boundary stop inside wrapper --- mutator_test.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mutator_test.go b/mutator_test.go index 028fe6d..e780f8c 100644 --- a/mutator_test.go +++ b/mutator_test.go @@ -90,3 +90,19 @@ func TestMutate_doesNotCrossWorkflowBoundary(t *testing.T) { matched, _, _ := m.applyTo(context.Background(), innerWf) assert.False(t, matched) } + +func TestMutate_doesNotCrossWorkflowBoundaryInsideWrapper(t *testing.T) { + // wrapper -> *Workflow -> *mutFoo. The wrapper itself is not a workflow, + // so Traverse will descend past it; the *Workflow must then halt the + // descent so the inner *mutFoo is NOT reached. + innerFoo := &mutFoo{} + innerWf := new(Workflow).Add(Step(innerFoo)) + wrapper := &mutWrapper{inner: innerWf} + + m := Mutate[*mutFoo](func(ctx context.Context, f *mutFoo) Builder { + t.Fatalf("mutator must not descend into nested workflow even when wrapped") + return nil + }) + matched, _, _ := m.applyTo(context.Background(), wrapper) + assert.False(t, matched) +} From c1923a8741e32fc2a590403eb8a384b6360e1eac Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Mon, 11 May 2026 07:57:45 +0000 Subject: [PATCH 17/31] feat(state): add MutatorsApplied per-step flag --- mutator_test.go | 11 +++++++++++ state.go | 19 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/mutator_test.go b/mutator_test.go index e780f8c..78656c5 100644 --- a/mutator_test.go +++ b/mutator_test.go @@ -106,3 +106,14 @@ func TestMutate_doesNotCrossWorkflowBoundaryInsideWrapper(t *testing.T) { matched, _, _ := m.applyTo(context.Background(), wrapper) assert.False(t, matched) } + +func TestState_MutatorsAppliedDefault(t *testing.T) { + s := &State{} + assert.False(t, s.MutatorsApplied()) +} + +func TestState_SetMutatorsApplied(t *testing.T) { + s := &State{} + s.SetMutatorsApplied() + assert.True(t, s.MutatorsApplied()) +} diff --git a/state.go b/state.go index b313864..e20c935 100644 --- a/state.go +++ b/state.go @@ -11,7 +11,8 @@ import ( // The status could be read / write from different goroutines, so use RWMutex to protect it. type State struct { StepResult - Config *StepConfig + Config *StepConfig + mutatorsApplied bool sync.RWMutex } @@ -25,6 +26,22 @@ func (s *State) SetStatus(ss StepStatus) { defer s.Unlock() s.Status = ss } + +// MutatorsApplied reports whether the workflow has already merged Mutator +// contributions into this Step's Config. +func (s *State) MutatorsApplied() bool { + s.RLock() + defer s.RUnlock() + return s.mutatorsApplied +} + +// SetMutatorsApplied marks Mutator contributions as merged for this Step. +// The runtime calls this exactly once per step, just before the first attempt. +func (s *State) SetMutatorsApplied() { + s.Lock() + defer s.Unlock() + s.mutatorsApplied = true +} func (s *State) GetStepResult() StepResult { s.RLock() defer s.RUnlock() From d6b3f72e1d576081cef8922161ece087b2488ae6 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 01:38:40 +0000 Subject: [PATCH 18/31] feat(workflow): add Mutators field and PrependMutators on Workflow/SubWorkflow --- workflow.go | 22 +++++++++++++++++++++ workflow_mutator_test.go | 42 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 workflow_mutator_test.go diff --git a/workflow.go b/workflow.go index 57c817d..2a7fb5a 100644 --- a/workflow.go +++ b/workflow.go @@ -44,6 +44,10 @@ type Workflow struct { Clock clock.Clock // Clock for retry and unit test DefaultOption *StepOption // DefaultOption is the default option for all Steps + // Mutators are evaluated against every step in this workflow before that + // step's first attempt. See [Mutator] / [Mutate]. + Mutators []Mutator + StepBuilder // StepBuilder to call BuildStep() for Steps steps map[Steper]*State // the internal states of Steps @@ -74,6 +78,17 @@ func (w *Workflow) Add(was ...Builder) *Workflow { return w } +// PrependMutators inserts mw at the front of w.Mutators, preserving the +// invariant that propagated parent Mutators run before locally-registered +// child Mutators. Safe to call multiple times; the once-per-step flag on +// State prevents double application. +func (w *Workflow) PrependMutators(mw []Mutator) { + if len(mw) == 0 { + return + } + w.Mutators = append(append([]Mutator{}, mw...), w.Mutators...) +} + // AddStep adds a Step into Workflow with the given phase and config. func (w *Workflow) addStep(step Steper, config *StepConfig) { if step == nil { @@ -556,3 +571,10 @@ func (s *SubWorkflow) Do(ctx context.Context) error { return s.w.Do(ctx) } // Reset resets the sub-workflow to ready for BuildStep() func (s *SubWorkflow) Reset() { s.w = Workflow{} } + +// PrependMutators forwards mw to the inner workflow. Implements [MutatorReceiver] +// so parent workflows can propagate Mutators into a sub-workflow before its +// scheduling pass begins. +func (s *SubWorkflow) PrependMutators(mw []Mutator) { + s.w.PrependMutators(mw) +} diff --git a/workflow_mutator_test.go b/workflow_mutator_test.go new file mode 100644 index 0000000..5f79ec2 --- /dev/null +++ b/workflow_mutator_test.go @@ -0,0 +1,42 @@ +package flow_test + +import ( + "context" + "testing" + + flow "github.com/Azure/go-workflow" + "github.com/stretchr/testify/assert" +) + +type wfFoo struct{} + +func (*wfFoo) Do(context.Context) error { return nil } + +func TestWorkflow_PrependMutators(t *testing.T) { + m1 := flow.Mutate[*wfFoo](func(ctx context.Context, f *wfFoo) flow.Builder { return nil }) + m2 := flow.Mutate[*wfFoo](func(ctx context.Context, f *wfFoo) flow.Builder { return nil }) + + w := &flow.Workflow{Mutators: []flow.Mutator{m2}} + w.PrependMutators([]flow.Mutator{m1}) + + assert.Len(t, w.Mutators, 2) +} + +func TestWorkflow_PrependMutatorsNilOrEmpty(t *testing.T) { + w := &flow.Workflow{} + w.PrependMutators(nil) + assert.Empty(t, w.Mutators) + w.PrependMutators([]flow.Mutator{}) + assert.Empty(t, w.Mutators) +} + +func TestSubWorkflow_PrependMutators(t *testing.T) { + type sub struct{ flow.SubWorkflow } + s := &sub{} + m := flow.Mutate[*wfFoo](func(ctx context.Context, f *wfFoo) flow.Builder { return nil }) + + // MutatorReceiver must be implemented + var _ flow.MutatorReceiver = s + s.PrependMutators([]flow.Mutator{m}) + // No panic / no error → behaviour verified by integration test in Task 7 +} From 51eaa59c7e4ae40f271643a0caab5b2dfb40d146 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 02:01:50 +0000 Subject: [PATCH 19/31] feat(workflow): apply Mutators once per step before first attempt --- workflow.go | 31 +++++++++++++++++++++++++++++++ workflow_mutator_test.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/workflow.go b/workflow.go index 2a7fb5a..e1381da 100644 --- a/workflow.go +++ b/workflow.go @@ -131,6 +131,29 @@ func (w *Workflow) addStep(step Steper, config *StepConfig) { } } +// applyMutators runs each Mutator in w.Mutators against step. For every match, +// the returned Builder's config keyed on the matched layer is merged into the +// state of step (the wrapper key in this workflow). Called once per step, +// just before its first attempt. +func (w *Workflow) applyMutators(ctx context.Context, step Steper, state *State) { + if len(w.Mutators) == 0 { + return + } + for _, m := range w.Mutators { + matched, target, b := m.applyTo(ctx, step) + if !matched || b == nil { + continue + } + for s, cfg := range b.AddToWorkflow() { + if s == target { + state.MergeConfig(cfg) + } + // configs keyed on other steps are silently dropped — Mutator + // scope is the matched layer only. + } + } +} + // setUpstream will put the upstream into proper state. func (w *Workflow) setUpstream(step, up Steper) { if step == nil || up == nil { @@ -392,6 +415,14 @@ func (w *Workflow) tick(ctx context.Context) bool { if isAnyUpstreamNotTerminated(ups) { continue } + // Apply Mutators exactly once per step, before reading Option / + // evaluating Condition / starting the first attempt. This way the + // Option/Before/After contributions from Mutators are visible to the + // rest of this iteration. + if !state.MutatorsApplied() { + w.applyMutators(ctx, step, state) + state.SetMutatorsApplied() + } option := state.Option() cond := DefaultCondition if option != nil && option.Condition != nil { diff --git a/workflow_mutator_test.go b/workflow_mutator_test.go index 5f79ec2..daaf1f4 100644 --- a/workflow_mutator_test.go +++ b/workflow_mutator_test.go @@ -40,3 +40,32 @@ func TestSubWorkflow_PrependMutators(t *testing.T) { s.PrependMutators([]flow.Mutator{m}) // No panic / no error → behaviour verified by integration test in Task 7 } + +type wfGreet struct { + Greeting string + Who string +} + +func (g *wfGreet) Do(context.Context) error { return nil } + +func TestMutator_mergesInputBeforeFirstAttempt(t *testing.T) { + called := 0 + g := &wfGreet{Greeting: "Hi"} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + called++ + return flow.Step(gg).Input(func(_ context.Context, gg *wfGreet) error { + gg.Who = "world" + return nil + }) + }), + }, + } + w.Add(flow.Step(g)) + + err := w.Do(context.Background()) + assert.NoError(t, err) + assert.Equal(t, 1, called, "mutator must run exactly once") + assert.Equal(t, "world", g.Who, "mutator-contributed Input must run before Do") +} From f2dfded49b5ffe1b7f4ef9feac5917394fcd973a Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 02:08:52 +0000 Subject: [PATCH 20/31] fix(workflow): set MutatorsApplied before applyMutators to be panic-safe --- workflow.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/workflow.go b/workflow.go index e1381da..c422a09 100644 --- a/workflow.go +++ b/workflow.go @@ -417,11 +417,14 @@ func (w *Workflow) tick(ctx context.Context) bool { } // Apply Mutators exactly once per step, before reading Option / // evaluating Condition / starting the first attempt. This way the - // Option/Before/After contributions from Mutators are visible to the - // rest of this iteration. + // Option/Before/After contributions from Mutators are visible to + // the rest of this iteration. The flag is flipped BEFORE the merge + // runs so that a panicking mutator (caught by applyMutators's + // recover when DontPanic is set in Task 9) does not cause re-entry + // on the next tick. if !state.MutatorsApplied() { - w.applyMutators(ctx, step, state) state.SetMutatorsApplied() + w.applyMutators(ctx, step, state) } option := state.Option() cond := DefaultCondition From b490f026d78919981c67bf6d45d6ec0961174a24 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 02:27:30 +0000 Subject: [PATCH 21/31] test(mutator): once-per-step across retries; nil Mutators no-op --- workflow_mutator_test.go | 48 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/workflow_mutator_test.go b/workflow_mutator_test.go index daaf1f4..ec7f619 100644 --- a/workflow_mutator_test.go +++ b/workflow_mutator_test.go @@ -2,6 +2,7 @@ package flow_test import ( "context" + "errors" "testing" flow "github.com/Azure/go-workflow" @@ -69,3 +70,50 @@ func TestMutator_mergesInputBeforeFirstAttempt(t *testing.T) { assert.Equal(t, 1, called, "mutator must run exactly once") assert.Equal(t, "world", g.Who, "mutator-contributed Input must run before Do") } + +type wfFlaky struct { + attempts int + failTill int +} + +func (f *wfFlaky) Do(context.Context) error { + f.attempts++ + if f.attempts <= f.failTill { + return errors.New("transient") + } + return nil +} + +func TestMutator_runsExactlyOnceAcrossRetries(t *testing.T) { + mutatorCalls := 0 + inputCalls := 0 + f := &wfFlaky{failTill: 2} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfFlaky](func(ctx context.Context, ff *wfFlaky) flow.Builder { + mutatorCalls++ + return flow.Step(ff). + Retry(func(o *flow.RetryOption) { o.Attempts = 3 }). + Input(func(_ context.Context, _ *wfFlaky) error { + inputCalls++ + return nil + }) + }), + }, + } + w.Add(flow.Step(f)) + + err := w.Do(context.Background()) + assert.NoError(t, err) + assert.Equal(t, 1, mutatorCalls, "mutator user-fn runs once") + assert.Equal(t, 3, inputCalls, "mutator-contributed Input runs per attempt") + assert.Equal(t, 3, f.attempts) +} + +func TestMutator_nilSliceIsNoOp(t *testing.T) { + g := &wfGreet{Greeting: "Hi", Who: "Bob"} + w := &flow.Workflow{} // Mutators == nil + w.Add(flow.Step(g)) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, "Bob", g.Who) +} From cea452c68a1a33ba522d64a635717d4c5abee3bc Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 02:29:48 +0000 Subject: [PATCH 22/31] feat(workflow): propagate Mutators into nested workflows via MutatorReceiver --- workflow.go | 6 ++++ workflow_mutator_test.go | 65 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/workflow.go b/workflow.go index c422a09..686b2a5 100644 --- a/workflow.go +++ b/workflow.go @@ -424,6 +424,12 @@ func (w *Workflow) tick(ctx context.Context) bool { // on the next tick. if !state.MutatorsApplied() { state.SetMutatorsApplied() + // Propagate this workflow's Mutators into nested workflows so + // they can apply against inner steps when those steps are + // scheduled by the inner workflow. + if recv, ok := step.(MutatorReceiver); ok && len(w.Mutators) > 0 { + recv.PrependMutators(w.Mutators) + } w.applyMutators(ctx, step, state) } option := state.Option() diff --git a/workflow_mutator_test.go b/workflow_mutator_test.go index ec7f619..a4c36a8 100644 --- a/workflow_mutator_test.go +++ b/workflow_mutator_test.go @@ -117,3 +117,68 @@ func TestMutator_nilSliceIsNoOp(t *testing.T) { assert.NoError(t, w.Do(context.Background())) assert.Equal(t, "Bob", g.Who) } + +type wfComposite struct { + flow.SubWorkflow + Inner wfGreet +} + +func (c *wfComposite) Do(ctx context.Context) error { + // Lazy build inside Do — replaces BuildStep pattern. + c.Add(flow.Step(&c.Inner)) + return c.SubWorkflow.Do(ctx) +} + +func TestMutator_reachesIntoSubWorkflow(t *testing.T) { + c := &wfComposite{Inner: wfGreet{Greeting: "Hello"}} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, g *wfGreet) flow.Builder { + g.Who = "world" + return nil + }), + }, + } + w.Add(flow.Step(c)) + + err := w.Do(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "world", c.Inner.Who, "parent Mutator must reach inner step via PrependMutators") +} + +func TestMutator_reachesIntoNestedWorkflow(t *testing.T) { + innerG := &wfGreet{Greeting: "Hi"} + innerW := new(flow.Workflow).Add(flow.Step(innerG)) + + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, g *wfGreet) flow.Builder { + g.Who = "world" + return nil + }), + }, + } + w.Add(flow.Step(innerW)) + + err := w.Do(context.Background()) + assert.NoError(t, err) + assert.Equal(t, "world", innerG.Who) +} + +func TestMutator_reachesLazilyAddedInnerStep(t *testing.T) { + c := &wfComposite{Inner: wfGreet{Greeting: "Yo"}} + called := 0 + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, g *wfGreet) flow.Builder { + called++ + g.Who = "lazy" + return nil + }), + }, + } + w.Add(flow.Step(c)) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, 1, called) + assert.Equal(t, "lazy", c.Inner.Who) +} From 8a877571f2f6028f01673335e7065835720f2bd8 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 02:41:06 +0000 Subject: [PATCH 23/31] =?UTF-8?q?test(mutator):=20ordering=20=E2=80=94=20p?= =?UTF-8?q?lan=20first,=20mutators=20in=20slice=20order?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- workflow_mutator_test.go | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/workflow_mutator_test.go b/workflow_mutator_test.go index a4c36a8..3fa30d6 100644 --- a/workflow_mutator_test.go +++ b/workflow_mutator_test.go @@ -182,3 +182,43 @@ func TestMutator_reachesLazilyAddedInnerStep(t *testing.T) { assert.Equal(t, 1, called) assert.Equal(t, "lazy", c.Inner.Who) } + +func TestMutator_planInputBeforeMutatorInput(t *testing.T) { + g := &wfGreet{Greeting: "Hi"} + order := []string{} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + return flow.Step(gg).Input(func(_ context.Context, _ *wfGreet) error { + order = append(order, "mutator") + return nil + }) + }), + }, + } + w.Add( + flow.Step(g).Input(func(_ context.Context, _ *wfGreet) error { + order = append(order, "plan") + return nil + }), + ) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, []string{"plan", "mutator"}, order) +} + +func TestMutator_multipleMutatorsRunInSliceOrder(t *testing.T) { + g := &wfGreet{} + order := []string{} + mk := func(name string) flow.Mutator { + return flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + return flow.Step(gg).Input(func(_ context.Context, _ *wfGreet) error { + order = append(order, name) + return nil + }) + }) + } + w := &flow.Workflow{Mutators: []flow.Mutator{mk("m1"), mk("m2")}} + w.Add(flow.Step(g)) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, []string{"m1", "m2"}, order) +} From 60d38e39508d7abbd57449a02e4d6b26ac4c4b60 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 02:41:18 +0000 Subject: [PATCH 24/31] test(mutator): merge happens at first schedule, not at Add --- workflow_mutator_test.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/workflow_mutator_test.go b/workflow_mutator_test.go index 3fa30d6..a89d8dc 100644 --- a/workflow_mutator_test.go +++ b/workflow_mutator_test.go @@ -222,3 +222,20 @@ func TestMutator_multipleMutatorsRunInSliceOrder(t *testing.T) { assert.NoError(t, w.Do(context.Background())) assert.Equal(t, []string{"m1", "m2"}, order) } + +func TestMutator_mergeAtFirstScheduling_NotAtAdd(t *testing.T) { + g := &wfGreet{} + called := 0 + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + called++ + return nil + }), + }, + } + w.Add(flow.Step(g)) + assert.Equal(t, 0, called, "Add must not invoke mutators") + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, 1, called) +} From 0200c42a3df222d93c7484378a44ed62795e5282 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 02:41:31 +0000 Subject: [PATCH 25/31] test(mutator): match through Name wrapper via Unwrap --- workflow_mutator_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/workflow_mutator_test.go b/workflow_mutator_test.go index a89d8dc..8a8247a 100644 --- a/workflow_mutator_test.go +++ b/workflow_mutator_test.go @@ -239,3 +239,18 @@ func TestMutator_mergeAtFirstScheduling_NotAtAdd(t *testing.T) { assert.NoError(t, w.Do(context.Background())) assert.Equal(t, 1, called) } + +func TestMutator_matchesThroughNameWrapper(t *testing.T) { + g := &wfGreet{Greeting: "Hi"} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + gg.Who = "world" + return nil + }), + }, + } + w.Add(flow.Name(g, "named-greet")) + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, "world", g.Who) +} From 8f74b72a351290f973a948c75e24d59b9487e4f7 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 02:45:04 +0000 Subject: [PATCH 26/31] test(mutator): ctx, scope-isolation, and DontPanic-protected panic recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three tests: - TestMutator_receivesWorkflowCtx: verifies the workflow ctx passed to Do is forwarded into each mutator function. - TestMutator_unrelatedBuilderEntryIgnored: verifies that a Builder keyed on a step not in the workflow is silently dropped (scope isolation). - TestMutator_panicCaughtWhenDontPanic: verifies panics in mutators are caught when DontPanic=true and surface as the step's terminal error. Production change: wrap each mutator call in catchPanicAsError when DontPanic is set (mirrors the makeDoForStep `do` idiom). On panic, SetError on the state and short-circuit in tick — marking the step Failed immediately so the goroutine never overwrites the captured error. --- workflow.go | 45 ++++++++++++++++++++++++------ workflow_mutator_test.go | 60 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 9 deletions(-) diff --git a/workflow.go b/workflow.go index 686b2a5..a3a6296 100644 --- a/workflow.go +++ b/workflow.go @@ -139,17 +139,28 @@ func (w *Workflow) applyMutators(ctx context.Context, step Steper, state *State) if len(w.Mutators) == 0 { return } + do := func(fn func() error) error { return fn() } + if w.DontPanic { + do = catchPanicAsError + } for _, m := range w.Mutators { - matched, target, b := m.applyTo(ctx, step) - if !matched || b == nil { - continue - } - for s, cfg := range b.AddToWorkflow() { - if s == target { - state.MergeConfig(cfg) + err := do(func() error { + matched, target, b := m.applyTo(ctx, step) + if !matched || b == nil { + return nil + } + for s, cfg := range b.AddToWorkflow() { + if s == target { + state.MergeConfig(cfg) + } + // configs keyed on other steps are silently dropped — Mutator + // scope is the matched layer only. } - // configs keyed on other steps are silently dropped — Mutator - // scope is the matched layer only. + return nil + }) + if err != nil { + state.SetError(err) + return // a panicking mutator is fatal for this step } } } @@ -431,6 +442,22 @@ func (w *Workflow) tick(ctx context.Context) bool { recv.PrependMutators(w.Mutators) } w.applyMutators(ctx, step, state) + // If applyMutators caught a panic (DontPanic), the state already + // holds an error. Short-circuit: mark the step Failed immediately + // so it never reaches the goroutine that would overwrite the error. + if err := state.GetError(); err != nil { + state.SetStepResult(StepResult{ + Status: Failed, + Err: err, + FinishedAt: w.Clock.Now(), + }) + w.waitGroup.Add(1) + go func() { + defer w.waitGroup.Done() + w.signalStatusChange() + }() + continue + } } option := state.Option() cond := DefaultCondition diff --git a/workflow_mutator_test.go b/workflow_mutator_test.go index 8a8247a..cff58ab 100644 --- a/workflow_mutator_test.go +++ b/workflow_mutator_test.go @@ -254,3 +254,63 @@ func TestMutator_matchesThroughNameWrapper(t *testing.T) { assert.NoError(t, w.Do(context.Background())) assert.Equal(t, "world", g.Who) } + +type ctxKey string + +const wfCtxKey ctxKey = "k" + +func TestMutator_receivesWorkflowCtx(t *testing.T) { + g := &wfGreet{} + got := "" + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + if v, ok := ctx.Value(wfCtxKey).(string); ok { + got = v + } + return nil + }), + }, + } + w.Add(flow.Step(g)) + ctx := context.WithValue(context.Background(), wfCtxKey, "value-from-do") + assert.NoError(t, w.Do(ctx)) + assert.Equal(t, "value-from-do", got) +} + +func TestMutator_unrelatedBuilderEntryIgnored(t *testing.T) { + g := &wfGreet{Greeting: "Hi", Who: "Bob"} + other := &wfGreet{Who: "untouched"} + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + if gg == g { + // Mistakenly return a Builder keyed on `other` instead of `gg`. + return flow.Step(other).Input(func(_ context.Context, o *wfGreet) error { + o.Who = "stolen" + return nil + }) + } + return nil + }), + }, + } + w.Add(flow.Step(g)) // only g is in the workflow + assert.NoError(t, w.Do(context.Background())) + assert.Equal(t, "untouched", other.Who, "config keyed on a different step is dropped") +} + +func TestMutator_panicCaughtWhenDontPanic(t *testing.T) { + g := &wfGreet{} + w := &flow.Workflow{ + DontPanic: true, + Mutators: []flow.Mutator{ + flow.Mutate[*wfGreet](func(ctx context.Context, gg *wfGreet) flow.Builder { + panic("boom") + }), + }, + } + w.Add(flow.Step(g)) + err := w.Do(context.Background()) + assert.Error(t, err) +} From e60964bd53fe8f05a0ae9c1cab90b658819fd59b Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 03:03:51 +0000 Subject: [PATCH 27/31] chore(deprecation): mark BuildStep and SubWorkflow.Reset deprecated --- build_step.go | 9 +++++++-- workflow.go | 7 ++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/build_step.go b/build_step.go index d631bf7..e2c6057 100644 --- a/build_step.go +++ b/build_step.go @@ -12,8 +12,13 @@ package flow // ) type StepBuilder struct{ built Set[Steper] } -// BuildStep calls BuildStep() method of the Steper if it's implemented, -// and ensure it's called only once for each Steper. +// BuildStep walks s and invokes any nested Step's BuildStep() method (and +// Reset() if implemented) to allow lazy initialization of composite steps. +// +// Deprecated: this lazy-initialization hook will be removed in the next major +// version of go-workflow. Use [Mutate] for cross-cutting modification, and +// construct sub-workflows inside Do() instead. See +// openspec/changes/2026-05-06-step-mutator/design.md. func (sb *StepBuilder) BuildStep(s Steper) { if sb.built == nil { sb.built = make(Set[Steper]) diff --git a/workflow.go b/workflow.go index a3a6296..a345ff3 100644 --- a/workflow.go +++ b/workflow.go @@ -636,7 +636,12 @@ func (s *SubWorkflow) Unwrap() Steper { return &s.w } func (s *SubWorkflow) Add(builders ...Builder) *Workflow { return s.w.Add(builders...) } func (s *SubWorkflow) Do(ctx context.Context) error { return s.w.Do(ctx) } -// Reset resets the sub-workflow to ready for BuildStep() +// Reset clears the inner workflow's state, allowing it to be re-built. +// +// Deprecated: Reset is only invoked by the deprecated [StepBuilder.BuildStep] +// path. With the [Mutator] mechanism (see [Mutate]) and Do()-time sub-workflow +// construction, Reset is no longer needed and will be removed in the next +// major version of go-workflow. func (s *SubWorkflow) Reset() { s.w = Workflow{} } // PrependMutators forwards mw to the inner workflow. Implements [MutatorReceiver] From 928c431cfb8ae4e242dd29900f9257d5a682a6fe Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 03:05:24 +0000 Subject: [PATCH 28/31] docs(example): activate Mutator examples; deprecate BuildStep example --- example/13_composite_step_test.go | 7 +++++++ example/15_mutator_test.go | 22 ++++++---------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/example/13_composite_step_test.go b/example/13_composite_step_test.go index 823e490..cd6e579 100644 --- a/example/13_composite_step_test.go +++ b/example/13_composite_step_test.go @@ -74,6 +74,13 @@ func ExampleCompositeStep() { // SimpleStep Failed! } +// ExampleCompositeViaWorkflow demonstrates the legacy BuildStep-based +// composite step pattern. +// +// Deprecated: BuildStep is being removed in the next major version. See +// ExampleMutate_subWorkflow in 15_mutator_test.go for the new pattern that +// constructs the inner workflow inside Do() and uses flow.Mutate for +// cross-cutting modification. func ExampleCompositeViaWorkflow() { var ( composite = &CompositeViaWorkflow{SimpleStep: SimpleStep{ diff --git a/example/15_mutator_test.go b/example/15_mutator_test.go index 0d13932..86640af 100644 --- a/example/15_mutator_test.go +++ b/example/15_mutator_test.go @@ -1,15 +1,4 @@ -//go:build mutator_design - -// This file is a DESIGN-PHASE reference for the proposed flow.Mutator API -// (openspec/changes/2026-05-06-step-mutator). It does NOT compile against the -// current tree because flow.Mutator / flow.Mutate / Workflow.Mutators do not -// yet exist. The build tag `mutator_design` keeps it out of the default build. -// -// When the implementation lands, drop the build tag (rename file or remove the -// header) and these become live ExampleXxx tests. -// -// The intent of this file is to let reviewers see the *final user-facing -// shape* of the Mutator API before any implementation is written. +// Examples of the flow.Mutator API. package flow_test @@ -78,9 +67,10 @@ func ExampleMutate_input() { }), }, } + hello := &Greet{Greeting: "Hello"} + bob := &Greet{Greeting: "Hi", Who: "Bob"} w.Add( - flow.Step(&Greet{Greeting: "Hello"}), // Who left blank, will be filled in - flow.Step(&Greet{Greeting: "Hi", Who: "Bob"}), // already set, Mutator does not override + flow.Pipe(hello, bob), // sequential so output order is deterministic ) _ = w.Do(context.Background()) // Output: @@ -99,7 +89,7 @@ func ExampleMutate_retryOverride() { Mutators: []flow.Mutator{ flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { return flow.Step(g). - Retry(func(o *flow.RetryOption) { o.MaxAttempts = 3 }). + Retry(func(o *flow.RetryOption) { o.Attempts = 3 }). Timeout(2 * time.Second) }), }, @@ -273,7 +263,7 @@ func ExampleMutate_unwrap() { }), }, } - w.Add(flow.Step(flow.Name("named-greet", greet))) + w.Add(flow.Name(greet, "named-greet")) _ = w.Do(context.Background()) // Output: // Hi, world! From 5e08933cfc29e71521ece3797230aaeef060c2be Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 03:07:59 +0000 Subject: [PATCH 29/31] docs(openspec): tick off completed step-mutator tasks --- .../changes/2026-05-06-step-mutator/tasks.md | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/openspec/changes/2026-05-06-step-mutator/tasks.md b/openspec/changes/2026-05-06-step-mutator/tasks.md index d494a04..90277ab 100644 --- a/openspec/changes/2026-05-06-step-mutator/tasks.md +++ b/openspec/changes/2026-05-06-step-mutator/tasks.md @@ -1,37 +1,37 @@ ## 1. Define Mutator Types -- [ ] 1.1 Create `mutator.go` with `Mutator` interface (single unexported method `applyTo(ctx context.Context, step Steper) (matched bool, builder Builder)`) -- [ ] 1.2 Add `Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator` generic constructor -- [ ] 1.3 Add internal `mutatorFunc[T Steper]` type implementing `Mutator.applyTo` via `step.(T)` assertion, forwarding `ctx` to `fn` -- [ ] 1.4 Add `MutatorReceiver` interface with `PrependMutators(mw []Mutator)` +- [x] 1.1 Create `mutator.go` with `Mutator` interface (single unexported method `applyTo(ctx context.Context, step Steper) (matched bool, builder Builder)`) +- [x] 1.2 Add `Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator` generic constructor +- [x] 1.3 Add internal `mutatorFunc[T Steper]` type implementing `Mutator.applyTo` via `step.(T)` assertion, forwarding `ctx` to `fn` +- [x] 1.4 Add `MutatorReceiver` interface with `PrependMutators(mw []Mutator)` ## 2. Wire Workflow Field and State -- [ ] 2.1 Add `Mutators []Mutator` field to `Workflow` in `workflow.go` -- [ ] 2.2 Add `MutatorsApplied bool` (or equivalent — e.g. an internal sentinel-based marker) to `State` in `state.go` to enforce once-per-step -- [ ] 2.3 Add `(*Workflow).PrependMutators(mw []Mutator)` so `*Workflow` (used as a step) propagates Mutators into nested workflows -- [ ] 2.4 Add `(*SubWorkflow).PrependMutators(mw []Mutator)` mirroring `*Workflow`, prepending parent Mutators to inner workflow's +- [x] 2.1 Add `Mutators []Mutator` field to `Workflow` in `workflow.go` +- [x] 2.2 Add `MutatorsApplied bool` (or equivalent — e.g. an internal sentinel-based marker) to `State` in `state.go` to enforce once-per-step +- [x] 2.3 Add `(*Workflow).PrependMutators(mw []Mutator)` so `*Workflow` (used as a step) propagates Mutators into nested workflows +- [x] 2.4 Add `(*SubWorkflow).PrependMutators(mw []Mutator)` mirroring `*Workflow`, prepending parent Mutators to inner workflow's ## 3. First-Schedule Merge -- [ ] 3.1 In the scheduling-time path that takes a step from `Pending` to its first attempt (precise location depends on the Step Interceptor change's `stepExecution`), add a once-per-step Mutator merge block guarded by `state.MutatorsApplied` -- [ ] 3.2 For each matched Mutator: call `applyTo(ctx, step)` (where `ctx` is the workflow ctx from `Workflow.Do(ctx)`); if it returns `(true, b)` with non-nil `b`, iterate `b.AddToWorkflow()` and merge only the entry whose key equals `step` into `state.Config` via existing `StepConfig.Merge` -- [ ] 3.3 Set `state.MutatorsApplied = true` after the loop, before the first AttemptInterceptor invocation -- [ ] 3.4 Immediately before invoking a step that hosts a sub-workflow, if the step implements `MutatorReceiver`, call `PrependMutators(parent.Mutators)` so the inner workflow's first-schedule pass sees parent Mutators +- [x] 3.1 In the scheduling-time path that takes a step from `Pending` to its first attempt (precise location depends on the Step Interceptor change's `stepExecution`), add a once-per-step Mutator merge block guarded by `state.MutatorsApplied` +- [x] 3.2 For each matched Mutator: call `applyTo(ctx, step)` (where `ctx` is the workflow ctx from `Workflow.Do(ctx)`); if it returns `(true, b)` with non-nil `b`, iterate `b.AddToWorkflow()` and merge only the entry whose key equals `step` into `state.Config` via existing `StepConfig.Merge` +- [x] 3.3 Set `state.MutatorsApplied = true` after the loop, before the first AttemptInterceptor invocation +- [x] 3.4 Immediately before invoking a step that hosts a sub-workflow, if the step implements `MutatorReceiver`, call `PrependMutators(parent.Mutators)` so the inner workflow's first-schedule pass sees parent Mutators ## 4. Deprecation Notes -- [ ] 4.1 Add `// Deprecated: Use flow.Mutate or construct sub-workflows in Do()` to `StepBuilder.BuildStep` godoc in `build_step.go` -- [ ] 4.2 Add `// Deprecated:` to `flow.SubWorkflow.Reset` godoc in `workflow.go`, noting it is only invoked by the deprecated `StepBuilder.BuildStep` path +- [x] 4.1 Add `// Deprecated: Use flow.Mutate or construct sub-workflows in Do()` to `StepBuilder.BuildStep` godoc in `build_step.go` +- [x] 4.2 Add `// Deprecated:` to `flow.SubWorkflow.Reset` godoc in `workflow.go`, noting it is only invoked by the deprecated `StepBuilder.BuildStep` path - [ ] 4.3 Add `// Deprecated:` to the `BuildStep` doc-comment template in `build_step.go` that shows users how to write the hook ## 5. Examples -- [ ] 5.1 Add `// Deprecated:` doc to `ExampleCompositeViaWorkflow` in `example/13_composite_step_test.go`, pointing to the new example +- [x] 5.1 Add `// Deprecated:` doc to `ExampleCompositeViaWorkflow` in `example/13_composite_step_test.go`, pointing to the new example - [ ] 5.2 Add `ExampleCompositeViaDo` showing sub-workflow construction inside `Do()` without `BuildStep` -- [ ] 5.3 Add `ExampleMutate_input` showing `flow.Mutate[*MyStep]` returning a Builder with `Input(...)` registered on `Workflow.Mutators` -- [ ] 5.4 Add `ExampleMutate_retryOverride` showing a Mutator that returns a Builder with `Retry(...)` to override retry policy across all instances of a type -- [ ] 5.5 Add `ExampleMutate_subWorkflow` showing parent Mutators reaching into a `SubWorkflow`-embedded composite step via `PrependMutators` +- [x] 5.3 Add `ExampleMutate_input` showing `flow.Mutate[*MyStep]` returning a Builder with `Input(...)` registered on `Workflow.Mutators` +- [x] 5.4 Add `ExampleMutate_retryOverride` showing a Mutator that returns a Builder with `Retry(...)` to override retry policy across all instances of a type +- [x] 5.5 Add `ExampleMutate_subWorkflow` showing parent Mutators reaching into a `SubWorkflow`-embedded composite step via `PrependMutators` ## 6. Spec Updates @@ -45,26 +45,26 @@ ## 7. Tests -- [ ] 7.1 Test: `Mutate[*Foo]` runs against a `*Foo` step, does not run against a `*Bar` step -- [ ] 7.1a Test: Mutator function receives the workflow-scoped ctx (the same ctx passed to `Workflow.Do(ctx)`); a value placed in ctx via `context.WithValue` before `Workflow.Do` is observable in the Mutator -- [ ] 7.2 Test: Mutator's user function is invoked exactly once across multiple attempts (use `RetryOption{MaxAttempts: 3}`, count invocations) -- [ ] 7.3 Test: Mutator-returned Builder's `Input` callback runs on every attempt (3 attempts → 3 invocations) -- [ ] 7.4 Test: Mutator returning `nil` Builder is a no-op; field mutation done in the user function persists into `Do` -- [ ] 7.5 Test: Mutator returning a Builder with `Retry(...)` overrides retry policy of matched steps +- [x] 7.1 Test: `Mutate[*Foo]` runs against a `*Foo` step, does not run against a `*Bar` step +- [x] 7.1a Test: Mutator function receives the workflow-scoped ctx (the same ctx passed to `Workflow.Do(ctx)`); a value placed in ctx via `context.WithValue` before `Workflow.Do` is observable in the Mutator +- [x] 7.2 Test: Mutator's user function is invoked exactly once across multiple attempts (use `RetryOption{MaxAttempts: 3}`, count invocations) +- [x] 7.3 Test: Mutator-returned Builder's `Input` callback runs on every attempt (3 attempts → 3 invocations) +- [x] 7.4 Test: Mutator returning `nil` Builder is a no-op; field mutation done in the user function persists into `Do` +- [x] 7.5 Test: Mutator returning a Builder with `Retry(...)` overrides retry policy of matched steps - [ ] 7.6 Test: Mutator returning a Builder with `Before/After` adds those callbacks to the step's chain -- [ ] 7.7 Test: Multiple Mutators registered for the same type run in slice order; their `Before` contributions appear in the same order in the merged chain -- [ ] 7.8 Test: Mutator-returned Builder with config for a step *other than* the one passed in has that config silently ignored -- [ ] 7.9 Test: Parent workflow Mutator reaches `*Foo` inside a `SubWorkflow`-embedded composite step -- [ ] 7.10 Test: Parent workflow Mutator reaches `*Foo` inside a nested `*Workflow` used as a step -- [ ] 7.11 Test: Lazily added inner step (added inside composite step's `Do`) is reached by parent Mutator -- [ ] 7.12 Test: A workflow with `Mutators == nil` runs identically to a workflow without Mutators (no overhead, no panic) -- [ ] 7.13 Test: Mutator's user function panicking is caught by the workflow's panic recovery (when `DontPanic` is true) -- [ ] 7.14 Test: Plan-declared `Input` and Mutator-contributed `Input` both run, in expected order -- [ ] 7.15 Test: Plan `Input` runs **before** Mutator-contributed `Input`; with multiple Mutators, Mutator-contributed Inputs run in `Workflow.Mutators` slice order, all after plan -- [ ] 7.16 Test: Mutator merge does not happen at `Add` time (assert an external slice mutated only after `Do` reaches the step) -- [ ] 7.17 Test: `Mutate[*Inner]` matches a `flow.Name("...", inner)` wrapper via Unwrap; the user function receives the typed `*Inner` pointer +- [x] 7.7 Test: Multiple Mutators registered for the same type run in slice order; their `Before` contributions appear in the same order in the merged chain +- [x] 7.8 Test: Mutator-returned Builder with config for a step *other than* the one passed in has that config silently ignored +- [x] 7.9 Test: Parent workflow Mutator reaches `*Foo` inside a `SubWorkflow`-embedded composite step +- [x] 7.10 Test: Parent workflow Mutator reaches `*Foo` inside a nested `*Workflow` used as a step +- [x] 7.11 Test: Lazily added inner step (added inside composite step's `Do`) is reached by parent Mutator +- [x] 7.12 Test: A workflow with `Mutators == nil` runs identically to a workflow without Mutators (no overhead, no panic) +- [x] 7.13 Test: Mutator's user function panicking is caught by the workflow's panic recovery (when `DontPanic` is true) +- [x] 7.14 Test: Plan-declared `Input` and Mutator-contributed `Input` both run, in expected order +- [x] 7.15 Test: Plan `Input` runs **before** Mutator-contributed `Input`; with multiple Mutators, Mutator-contributed Inputs run in `Workflow.Mutators` slice order, all after plan +- [x] 7.16 Test: Mutator merge does not happen at `Add` time (assert an external slice mutated only after `Do` reaches the step) +- [x] 7.17 Test: `Mutate[*Inner]` matches a `flow.Name("...", inner)` wrapper via Unwrap; the user function receives the typed `*Inner` pointer - [ ] 7.18 Test: When Mutator matches via Unwrap, the resulting Builder's config merges into `w.state[wrapper].Config` (the wrapper key in the owning workflow), not into a state entry keyed on the inner step -- [ ] 7.19 Test: `Mutate[*Wrapper]` registered against a `*Wrapper` wrapping `*Inner` matches `*Wrapper` (outer layer wins; user function receives `*Wrapper`, not `*Inner`) +- [x] 7.19 Test: `Mutate[*Wrapper]` registered against a `*Wrapper` wrapping `*Inner` matches `*Wrapper` (outer layer wins; user function receives `*Wrapper`, not `*Inner`) - [ ] 7.20 Test: Parent Mutator targeting `*X` does NOT directly write into inner workflow's state map; the inner workflow performs the merge after `PrependMutators` (assert inner's state[wrapper].Config gets the contribution, parent's state map is untouched) - [ ] 7.21 Test: Existing `Config merge destination follows StateOf` rule: `outer.Add(flow.Step(x).Input(fn))` where `x` lives in `inner` results in `fn` appearing in `inner.StateOf(x).Config.Before`, not in any outer state From 3d4ef95d0153eec2572fa15159aeacacf9b92a7d Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 05:22:51 +0000 Subject: [PATCH 30/31] docs(example): rewrite Mutator example as tutorial-style 13_mutators_test.go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename 15_mutator_test.go → 13_mutators_test.go to fit the sequential learning path (after 12_testing_workflows). - Adopt the same header/structure as 04_callbacks, 08_workflow_in_workflow, 10_observability: "What you'll learn" → "Where they fit" → "When to reach for which mechanism" → progressive examples. - Use a single coherent use case (release-pipeline *Notify steps with a cross-cutting Mutator) instead of disjoint Greet/Farewell fixtures. - Five examples, each building on the previous: 1. _fieldDefaults — pure field mutation (return nil Builder) 2. _contributeConfig — Builder with BeforeStep 3. _ctxValue — Mutator receives the workflow ctx 4. _subWorkflow — parent Mutator reaches inner Steps via MutatorReceiver 5. _multipleInOrder — plan-first, then Mutators in slice order - Drop the old _retryOverride / _nilBuilder / _unwrap variants — they belonged to the design-doc stage; production users care about defaults, callbacks, ctx, propagation, and ordering. - Update example/README.md to list 13_mutators in the "Operate, debug, test" section. Co-Authored-By: Claude Opus 4.7 (1M context) --- example/13_mutators_test.go | 234 +++++++++++++++++++++++++++++++ example/15_mutator_test.go | 270 ------------------------------------ example/README.md | 1 + 3 files changed, 235 insertions(+), 270 deletions(-) create mode 100644 example/13_mutators_test.go delete mode 100644 example/15_mutator_test.go diff --git a/example/13_mutators_test.go b/example/13_mutators_test.go new file mode 100644 index 0000000..d1f88ca --- /dev/null +++ b/example/13_mutators_test.go @@ -0,0 +1,234 @@ +package flow_test + +import ( + "context" + "fmt" + + flow "github.com/Azure/go-workflow" +) + +// # Mutators: cross-cutting configuration by Step type +// +// **What you'll learn** +// - Register a `flow.Mutator` on a `Workflow` to contribute configuration +// to every Step of a chosen Go type, without naming each Step by hand. +// - A Mutator can mutate fields on the typed Step pointer, return a +// `flow.Builder` to register `Input` / `BeforeStep` / `AfterStep` / +// `Retry` / `Timeout` / ..., or both. +// - Parent-Workflow Mutators reach into sub-Workflows automatically — the +// same MutatorReceiver mechanism described in 10_observability for +// interceptors. +// +// **Where they fit** +// +// Mutator (workflow-level, by Go type — this file) +// runs once per Step, at first schedule, BEFORE any callbacks. +// ── plan-declared Input / BeforeStep callbacks +// ── Mutator-contributed Input / BeforeStep callbacks (appended) +// ── step.Do(ctx) +// +// **When to reach for which mechanism** +// +// Need behaviour for one specific Step? → BeforeStep / AfterStep +// (04_callbacks). +// Need behaviour for every Step in the Workflow? → Interceptor +// (10_observability). +// Need behaviour for every Step of a given type, +// even Steps added later or inside sub-workflows? → Mutator (this file). +// +// **Use case in this file** +// +// We're building a release pipeline whose individual Steps post structured +// notifications (`*Notify`) to a chat system. The notification target +// (channel, on-call rotation) and retry policy are environment-specific, +// and we don't want every Step author to remember to set them. A single +// `Mutate[*Notify](…)` on the Workflow takes care of that. + +// Notify is a plain Step. Authors fill in only the message-shaped fields +// (Title, Body); cross-cutting fields (Channel, RetryPolicy) are populated +// by a Mutator at run time. +type Notify struct { + Title string + Body string + Channel string // "" by default; filled in by the Mutator +} + +func (n *Notify) String() string { return "Notify(" + n.Title + ")" } +func (n *Notify) Do(ctx context.Context) error { + fmt.Printf("[%s] %s — %s\n", n.Channel, n.Title, n.Body) + return nil +} + +// ExampleMutator_fieldDefaults is the simplest form: a Mutator that mutates +// fields on the typed Step pointer and returns nil. Use this when the only +// thing you need is "fill in a default if the author didn't set one". +// +// The Mutator is invoked exactly once per matched Step, just before its +// first attempt — so it can read fields that earlier steps set, but the +// changes are applied before any Input / BeforeStep / Do runs. +func ExampleMutator_fieldDefaults() { + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + // Anywhere a *Notify shows up, default Channel if it's empty. + flow.Mutate[*Notify](func(_ context.Context, n *Notify) flow.Builder { + if n.Channel == "" { + n.Channel = "#release" + } + return nil // pure field mutation; no Builder needed + }), + }, + } + w.Add( + flow.Pipe( + &Notify{Title: "deploy started", Body: "v1.2.3"}, + &Notify{Title: "smoke ok", Body: "all green"}, + // This one overrides the default. + &Notify{Title: "rollback!", Body: "v1.2.3 → v1.2.2", Channel: "#oncall"}, + ), + ) + _ = w.Do(context.Background()) + // Output: + // [#release] deploy started — v1.2.3 + // [#release] smoke ok — all green + // [#oncall] rollback! — v1.2.3 → v1.2.2 +} + +// ExampleMutator_contributeConfig shows the more powerful form: the Mutator +// returns a `flow.Builder` (the same builder you get from `flow.Step(…)`), +// and any configuration on that Builder is merged into the matched Step. +// +// Here we tag every *Notify with a uniform "[prod]" prefix via a BeforeStep +// callback. Step authors don't have to remember it, and Ops can change the +// tag in one place. The same shape works for Retry, Timeout, Input, +// AfterStep, etc. +func ExampleMutator_contributeConfig() { + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Notify](func(_ context.Context, n *Notify) flow.Builder { + if n.Channel == "" { + n.Channel = "#release" + } + // Same builder methods you use at Add() time — BeforeStep, + // AfterStep, Input, Retry, Timeout, ... — all work here. + return flow.Step(n).BeforeStep(func(ctx context.Context, s flow.Steper) (context.Context, error) { + s.(*Notify).Title = "[prod] " + s.(*Notify).Title + return ctx, nil + }) + }), + }, + } + w.Add(flow.Step(&Notify{Title: "promo live", Body: "v2"})) + _ = w.Do(context.Background()) + // Output: + // [#release] [prod] promo live — v2 +} + +// ExampleMutator_ctxValue shows that the Mutator's ctx is the same ctx +// passed to `Workflow.Do(ctx)`. This is how a Mutator can pull +// environment-specific configuration off the context: a per-test +// scenario name, a build ID, an on-call channel resolved at runtime, ... +func ExampleMutator_ctxValue() { + type envKey struct{} + + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Notify](func(ctx context.Context, n *Notify) flow.Builder { + if env, ok := ctx.Value(envKey{}).(string); ok && n.Channel == "" { + // e.g. #release-prod vs #release-dev + n.Channel = "#release-" + env + } + return nil + }), + }, + } + w.Add(flow.Step(&Notify{Title: "deploy started", Body: "v1.2.3"})) + + ctx := context.WithValue(context.Background(), envKey{}, "prod") + _ = w.Do(ctx) + // Output: + // [#release-prod] deploy started — v1.2.3 +} + +// ExampleMutator_subWorkflow shows that parent-Workflow Mutators reach +// inner Steps automatically — even when those Steps live inside a +// sub-Workflow, and even when the sub-Workflow adds them lazily at run +// time. +// +// The propagation mechanism is the same `MutatorReceiver` interface used +// by interceptors in 10_observability: any Step that contains a +// sub-Workflow (a `*Workflow` used as a Step, or a struct embedding +// `flow.SubWorkflow`) automatically receives the parent's Mutators before +// the inner Workflow starts scheduling. +func ExampleMutator_subWorkflow() { + // A "release stage" sub-workflow that posts a start and an end notice. + stage := new(flow.Workflow).Add( + flow.Pipe( + &Notify{Title: "stage:start", Body: "build"}, + &Notify{Title: "stage:end", Body: "build"}, + ), + ) + + outer := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Notify](func(_ context.Context, n *Notify) flow.Builder { + if n.Channel == "" { + n.Channel = "#release" + } + return nil + }), + }, + } + outer.Add( + flow.Pipe( + &Notify{Title: "pipeline start", Body: "v1"}, + stage, // a sub-Workflow used as a Step + &Notify{Title: "pipeline end", Body: "v1"}, + ), + ) + _ = outer.Do(context.Background()) + // Output: + // [#release] pipeline start — v1 + // [#release] stage:start — build + // [#release] stage:end — build + // [#release] pipeline end — v1 +} + +// ExampleMutator_multipleInOrder shows two boundaries it's useful to know +// about: +// +// - Multiple Mutators registered for the same type are applied in the +// order they appear in `Workflow.Mutators`. Their Input callbacks are +// appended to the Step's chain in that same order. +// - Plan-declared Input callbacks (the ones you write at `Add()` time) +// ALWAYS run before any Mutator-contributed Input callbacks. +// +// This means: a Mutator can rely on plan Inputs having already run, and +// later Mutators can rely on earlier Mutators having already run. +func ExampleMutator_multipleInOrder() { + w := &flow.Workflow{ + Mutators: []flow.Mutator{ + flow.Mutate[*Notify](func(_ context.Context, n *Notify) flow.Builder { + return flow.Step(n).Input(func(_ context.Context, n *Notify) error { + n.Body += " [m1]" + return nil + }) + }), + flow.Mutate[*Notify](func(_ context.Context, n *Notify) flow.Builder { + return flow.Step(n).Input(func(_ context.Context, n *Notify) error { + n.Body += " [m2]" + return nil + }) + }), + }, + } + w.Add( + flow.Step(&Notify{Title: "deploy", Body: "v1", Channel: "#release"}). + Input(func(_ context.Context, n *Notify) error { + n.Body += " [plan]" + return nil + }), + ) + _ = w.Do(context.Background()) + // Output: + // [#release] deploy — v1 [plan] [m1] [m2] +} diff --git a/example/15_mutator_test.go b/example/15_mutator_test.go deleted file mode 100644 index 86640af..0000000 --- a/example/15_mutator_test.go +++ /dev/null @@ -1,270 +0,0 @@ -// Examples of the flow.Mutator API. - -package flow_test - -import ( - "context" - "fmt" - "time" - - flow "github.com/Azure/go-workflow" -) - -// --------------------------------------------------------------------------- -// Shared fixtures used by the examples below. -// --------------------------------------------------------------------------- - -type Greet struct { - Greeting string - Who string - out string -} - -func (g *Greet) String() string { return "Greet" } -func (g *Greet) Do(ctx context.Context) error { - g.out = fmt.Sprintf("%s, %s!", g.Greeting, g.Who) - fmt.Println(g.out) - return nil -} - -type Farewell struct { - Who string -} - -func (f *Farewell) String() string { return "Farewell" } -func (f *Farewell) Do(ctx context.Context) error { - fmt.Printf("Bye, %s!\n", f.Who) - return nil -} - -// --------------------------------------------------------------------------- -// Example 1 — a Mutator that fills in a default field across all *Greet steps. -// -// Equivalent legacy code would have been: -// -// for _, g := range flow.As[*Greet](w) { -// w.Add(flow.Step(g).Input(func(_ context.Context, g *Greet) error { -// if g.Who == "" { g.Who = "world" } -// return nil -// })) -// } -// -// With Mutate this becomes a single registration and works for steps added -// later (including inside sub-workflows) without a tree walk at registration -// time. -// --------------------------------------------------------------------------- - -func ExampleMutate_input() { - w := &flow.Workflow{ - Mutators: []flow.Mutator{ - flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { - return flow.Step(g).Input(func(_ context.Context, g *Greet) error { - if g.Who == "" { - g.Who = "world" - } - return nil - }) - }), - }, - } - hello := &Greet{Greeting: "Hello"} - bob := &Greet{Greeting: "Hi", Who: "Bob"} - w.Add( - flow.Pipe(hello, bob), // sequential so output order is deterministic - ) - _ = w.Do(context.Background()) - // Output: - // Hello, world! - // Hi, Bob! -} - -// --------------------------------------------------------------------------- -// Example 2 — A Mutator that overrides Retry and Timeout for every *Greet -// step. Demonstrates that a Mutator can return a Builder carrying *any* step -// configuration, not just Input. -// --------------------------------------------------------------------------- - -func ExampleMutate_retryOverride() { - w := &flow.Workflow{ - Mutators: []flow.Mutator{ - flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { - return flow.Step(g). - Retry(func(o *flow.RetryOption) { o.Attempts = 3 }). - Timeout(2 * time.Second) - }), - }, - } - w.Add(flow.Step(&Greet{Greeting: "Hi", Who: "world"})) - _ = w.Do(context.Background()) - // Output: - // Hi, world! -} - -// --------------------------------------------------------------------------- -// Example 3 — Mutator-as-pure-field-mutation (return nil Builder). -// -// When the only thing you need is to mutate fields on the typed step pointer -// (no callback registration), return nil. This is the cheapest form. -// --------------------------------------------------------------------------- - -func ExampleMutate_nilBuilder() { - w := &flow.Workflow{ - Mutators: []flow.Mutator{ - flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { - g.Greeting = "Hola" // direct field mutation - return nil - }), - }, - } - w.Add(flow.Step(&Greet{Greeting: "Hi", Who: "world"})) - _ = w.Do(context.Background()) - // Output: - // Hola, world! -} - -// --------------------------------------------------------------------------- -// Example 4 — ctx received by the Mutator is the workflow-scoped ctx. -// -// Demonstrates the documented invariant that the Mutator's `ctx` is the same -// `ctx` passed to Workflow.Do. This is how production helpers fetch a logger, -// scenario name, test session ID, etc. -// --------------------------------------------------------------------------- - -type ctxKey string - -const greetingKey ctxKey = "greeting" - -func ExampleMutate_ctx() { - w := &flow.Workflow{ - Mutators: []flow.Mutator{ - flow.Mutate[*Greet](func(ctx context.Context, g *Greet) flow.Builder { - if v, ok := ctx.Value(greetingKey).(string); ok { - g.Greeting = v - } - return nil - }), - }, - } - w.Add(flow.Step(&Greet{Who: "world"})) - - ctx := context.WithValue(context.Background(), greetingKey, "Bonjour") - _ = w.Do(ctx) - // Output: - // Bonjour, world! -} - -// --------------------------------------------------------------------------- -// Example 5 — Multiple Mutators registered for the same type run in slice -// order; their Input callbacks are appended in that order and all run after -// any plan-declared Inputs. -// --------------------------------------------------------------------------- - -func ExampleMutate_multipleInOrder() { - w := &flow.Workflow{ - Mutators: []flow.Mutator{ - flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { - return flow.Step(g).Input(func(_ context.Context, g *Greet) error { - g.Greeting += " (m1)" - return nil - }) - }), - flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { - return flow.Step(g).Input(func(_ context.Context, g *Greet) error { - g.Greeting += " (m2)" - return nil - }) - }), - }, - } - w.Add( - flow.Step(&Greet{Greeting: "Hi", Who: "world"}). - Input(func(_ context.Context, g *Greet) error { - g.Greeting += " (plan)" - return nil - }), - ) - _ = w.Do(context.Background()) - // Plan callback runs first, then m1, then m2 — in declared order. - // Output: - // Hi (plan) (m1) (m2), world! -} - -// --------------------------------------------------------------------------- -// Example 6 — Sub-workflow construction inside Do(). -// -// This is the new pattern that replaces BuildStep. The composite step embeds -// flow.SubWorkflow (which gives it a stable inner *Workflow handle for -// PrependMutators propagation) but constructs its inner steps inside Do(), -// not at Add time. Parent Mutators still reach inner steps via PrependMutators. -// --------------------------------------------------------------------------- - -type CompositeViaDo struct { - flow.SubWorkflow // gives stable inner-workflow handle for Mutator propagation - Hello Greet - Bye Farewell -} - -func (c *CompositeViaDo) String() string { return "CompositeViaDo" } -func (c *CompositeViaDo) Do(ctx context.Context) error { - // Build the inner DAG lazily, here, inside Do — no BuildStep needed. - c.Add(flow.Pipe(&c.Hello, &c.Bye)) - return c.SubWorkflow.Do(ctx) -} - -func ExampleMutate_subWorkflow() { - composite := &CompositeViaDo{ - Hello: Greet{Greeting: "Hello"}, - Bye: Farewell{}, - } - - w := &flow.Workflow{ - // Parent-level Mutator. Reaches *Greet AND *Farewell inside the - // composite step via the MutatorReceiver propagation mechanism: - // before parent invokes composite.Do, runtime calls - // composite.PrependMutators(parent.Mutators). - Mutators: []flow.Mutator{ - flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { - if g.Who == "" { - g.Who = "world" - } - return nil - }), - flow.Mutate[*Farewell](func(_ context.Context, f *Farewell) flow.Builder { - if f.Who == "" { - f.Who = "world" - } - return nil - }), - }, - } - w.Add(flow.Step(composite)) - _ = w.Do(context.Background()) - // Output: - // Hello, world! - // Bye, world! -} - -// --------------------------------------------------------------------------- -// Example 7 — Mutator matches an inner type through the Unwrap chain. -// -// flow.Name("...", greet) returns a wrapper whose Unwrap() returns greet. -// flow.Mutate[*Greet] matches the inner *Greet via unwrap traversal; the -// resulting Builder's config is merged onto the wrapper's state entry, but -// the user function receives the typed *Greet pointer. -// --------------------------------------------------------------------------- - -func ExampleMutate_unwrap() { - greet := &Greet{Greeting: "Hi"} - w := &flow.Workflow{ - Mutators: []flow.Mutator{ - flow.Mutate[*Greet](func(_ context.Context, g *Greet) flow.Builder { - g.Who = "world" - return nil - }), - }, - } - w.Add(flow.Name(greet, "named-greet")) - _ = w.Do(context.Background()) - // Output: - // Hi, world! -} diff --git a/example/README.md b/example/README.md index a8f7516..bbdae84 100644 --- a/example/README.md +++ b/example/README.md @@ -46,6 +46,7 @@ stay in sync with the library. | [10_observability](10_observability_test.go) | `StepInterceptor` / `AttemptInterceptor` for cross-cutting logging, tracing, metrics. | | [11_debugging](11_debugging_test.go) | `ErrWorkflow` and `Workflow.StateOf` for post-run inspection. | | [12_testing_workflows](12_testing_workflows_test.go) | `flow.Mock` to substitute Step behaviour in tests. | +| [13_mutators](13_mutators_test.go) | `Mutate[T]` to contribute config (defaults, Retry, callbacks) to every Step of a type — even inside sub-Workflows. | ## Conventions used in these examples From a966e1ab4a5cc863bc4a4343732f3de9520f47a6 Mon Sep 17 00:00:00 2001 From: Xingfei Xu Date: Tue, 12 May 2026 05:32:41 +0000 Subject: [PATCH 31/31] docs(openspec): archive step-mutator change, apply spec deltas Apply MODIFIED and ADDED requirements from the 2026-05-06-step-mutator change to the main capability specs: composite-steps: BuildStep marked deprecated, added "Sub-workflow construction inside Do" and "MutatorReceiver propagation" requirements. step-configuration: added "Mutators field on Workflow" requirement. mutators: new capability spec covering the Mutator interface, first-schedule merge semantics, once-per-step enforcement, dispatch via Unwrap, scope rules, and MutatorReceiver propagation across workflow boundaries. The change directory moves to openspec/changes/archive/ dated 2026-05-12. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../design.md | 0 .../proposal.md | 0 .../specs/composite-steps/spec.md | 0 .../specs/mutators/spec.md | 0 .../specs/step-configuration/spec.md | 0 .../tasks.md | 0 openspec/specs/composite-steps/spec.md | 94 +++- openspec/specs/mutators/spec.md | 413 ++++++++++++++++++ openspec/specs/step-configuration/spec.md | 84 +++- 9 files changed, 581 insertions(+), 10 deletions(-) rename openspec/changes/{2026-05-06-step-mutator => archive/2026-05-12-2026-05-06-step-mutator}/design.md (100%) rename openspec/changes/{2026-05-06-step-mutator => archive/2026-05-12-2026-05-06-step-mutator}/proposal.md (100%) rename openspec/changes/{2026-05-06-step-mutator => archive/2026-05-12-2026-05-06-step-mutator}/specs/composite-steps/spec.md (100%) rename openspec/changes/{2026-05-06-step-mutator => archive/2026-05-12-2026-05-06-step-mutator}/specs/mutators/spec.md (100%) rename openspec/changes/{2026-05-06-step-mutator => archive/2026-05-12-2026-05-06-step-mutator}/specs/step-configuration/spec.md (100%) rename openspec/changes/{2026-05-06-step-mutator => archive/2026-05-12-2026-05-06-step-mutator}/tasks.md (100%) create mode 100644 openspec/specs/mutators/spec.md diff --git a/openspec/changes/2026-05-06-step-mutator/design.md b/openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/design.md similarity index 100% rename from openspec/changes/2026-05-06-step-mutator/design.md rename to openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/design.md diff --git a/openspec/changes/2026-05-06-step-mutator/proposal.md b/openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/proposal.md similarity index 100% rename from openspec/changes/2026-05-06-step-mutator/proposal.md rename to openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/proposal.md diff --git a/openspec/changes/2026-05-06-step-mutator/specs/composite-steps/spec.md b/openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/specs/composite-steps/spec.md similarity index 100% rename from openspec/changes/2026-05-06-step-mutator/specs/composite-steps/spec.md rename to openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/specs/composite-steps/spec.md diff --git a/openspec/changes/2026-05-06-step-mutator/specs/mutators/spec.md b/openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/specs/mutators/spec.md similarity index 100% rename from openspec/changes/2026-05-06-step-mutator/specs/mutators/spec.md rename to openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/specs/mutators/spec.md diff --git a/openspec/changes/2026-05-06-step-mutator/specs/step-configuration/spec.md b/openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/specs/step-configuration/spec.md similarity index 100% rename from openspec/changes/2026-05-06-step-mutator/specs/step-configuration/spec.md rename to openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/specs/step-configuration/spec.md diff --git a/openspec/changes/2026-05-06-step-mutator/tasks.md b/openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/tasks.md similarity index 100% rename from openspec/changes/2026-05-06-step-mutator/tasks.md rename to openspec/changes/archive/2026-05-12-2026-05-06-step-mutator/tasks.md diff --git a/openspec/specs/composite-steps/spec.md b/openspec/specs/composite-steps/spec.md index 5a75230..4896d68 100644 --- a/openspec/specs/composite-steps/spec.md +++ b/openspec/specs/composite-steps/spec.md @@ -1,4 +1,8 @@ -## ADDED Requirements +## Purpose + +This capability covers the composite-steps surface of the go-workflow library. + +## Requirements ### Requirement: Steper interface — the Step contract @@ -84,14 +88,27 @@ Traversal follows `Unwrap() Steper` and `Unwrap() []Steper` recursively. ### Requirement: BuildStep — lazy initialization hook -If a Step implements `BuildStep()`, the Workflow calls it exactly once when the Step is -first added. This allows composite Steps to initialize their internal sub-workflow -lazily at add-time rather than at construction time. +The framework SHALL preserve the `BuildStep` lazy-initialization hook for backward +compatibility during the deprecation window. New code SHOULD use `flow.Mutate` for +cross-cutting modification and construct sub-workflows inside `Do()` (see the new +`Sub-workflow construction inside Do` requirement below). + +The implicit `BuildStep()` invocation at `Add` time, together with the implicit +`SubWorkflow.Reset()` call that precedes it, SHALL be **removed in the next major +version of `go-workflow`**. From that release on, Steps that define a `BuildStep()` +method will no longer have it invoked automatically; composite-step authors must +construct their inner workflow inside `Do()` instead. This deprecation window is +bounded: it ends at the next major version bump, by which time all production usages +must have migrated. -If the Step also implements `Reset()`, the Workflow calls `Reset()` before `BuildStep()` -to allow the Step to clear any previous state before rebuilding. +If a Step implements `BuildStep()`, the Workflow SHALL call it exactly once when the +Step is first added. This allows composite Steps to initialize their internal +sub-workflow lazily at add-time rather than at construction time. -`BuildStep()` is NOT called again if the same Step pointer is added a second time. +If the Step also implements `Reset()`, the Workflow SHALL call `Reset()` before +`BuildStep()` to allow the Step to clear any previous state before rebuilding. + +`BuildStep()` SHALL NOT be called again if the same Step pointer is added a second time. #### Scenario: BuildStep called once on first Add - **WHEN** a Step implementing `BuildStep()` is added to a Workflow @@ -103,8 +120,6 @@ to allow the Step to clear any previous state before rebuilding. and is added to a Workflow - **THEN** `Reset()` is called first, then `BuildStep()` ---- - ### Requirement: SubWorkflow — Workflow as a Step `SubWorkflow` is an embeddable helper that exposes a nested `Workflow` as a Step. @@ -181,3 +196,64 @@ and recursing into sub-workflows. #### Scenario: Tree string output - **WHEN** `fmt.Sprint(workflow)` (or equivalent) is called on a Workflow with Steps - **THEN** the output contains the names of all added Steps + +### Requirement: Sub-workflow construction inside Do + +The framework SHALL support constructing a composite Step's sub-workflow inside its +`Do(ctx)` method, using a freshly created `flow.Workflow{}` and standard `Add` / +`Step` / `Pipe` builders. This pattern MUST NOT require implementing `BuildStep`. + +When this pattern is used in combination with `flow.SubWorkflow` embedding, the inner +workflow SHALL remain visible to parent-level Mutators via the `MutatorReceiver` +propagation mechanism (see the `mutators` capability spec). + +When this pattern is used WITHOUT `flow.SubWorkflow` embedding (e.g. by constructing a +local `flow.Workflow{}` inside `Do` and discarding it after `Do` returns), the inner +workflow SHALL be invisible to parent introspection helpers (`Has`, `As`, `HasStep`) and +to parent Mutator propagation. This is intentional: such a step is fully self-contained +and opaque to the outer workflow. + +#### Scenario: Composite step constructs sub-workflow in Do +- **WHEN** a composite step embeds `flow.SubWorkflow` and inside `Do` calls `s.Reset()` + followed by `s.Add(...)` then `s.Do(ctx)` +- **THEN** the sub-workflow runs correctly and parent Mutators reach its inner steps via + `PrependMutators` (invoked once per inner-workflow execution; once-per-step semantics + inside the inner workflow then take over) + +#### Scenario: Local sub-workflow inside Do is opaque to parent +- **WHEN** a step constructs a `flow.Workflow{}` inside `Do` without embedding + `SubWorkflow` and without exposing it via `Unwrap` +- **THEN** parent `flow.As[T](outer)` does not find inner steps and parent Mutators do + not reach them + +--- + +### Requirement: SubWorkflow implements MutatorReceiver + +`flow.SubWorkflow` SHALL implement the `MutatorReceiver` interface defined in the +`mutators` capability spec. Its `PrependMutators` implementation prepends the supplied +Mutators to the inner workflow's `Mutators` slice on each invocation. + +This requirement is what makes the deprecated `BuildStep` + `SubWorkflow` pattern and the +new `Do`-time + `SubWorkflow` pattern interchangeable from the parent Mutator +perspective. + +#### Scenario: Parent Mutator propagates to SubWorkflow +- **WHEN** a parent workflow has `flow.Mutate[*Foo]` registered and a step embedding + `flow.SubWorkflow` adds a `*Foo` to its inner workflow +- **THEN** the parent Mutator runs against the inner `*Foo` instance before each of its + attempts + +--- + +### Requirement: *Workflow implements MutatorReceiver + +`*flow.Workflow`, when used directly as a Step in another workflow, SHALL implement the +`MutatorReceiver` interface. Its `PrependMutators` implementation prepends the supplied +Mutators to its own `Mutators` slice. + +#### Scenario: Nested *Workflow inherits parent Mutators +- **WHEN** a `*flow.Workflow` containing `*Foo` is added as a step in another workflow + that has `flow.Mutate[*Foo]` registered +- **THEN** the parent Mutator runs against the inner `*Foo` before each of its attempts + diff --git a/openspec/specs/mutators/spec.md b/openspec/specs/mutators/spec.md new file mode 100644 index 0000000..46e35d9 --- /dev/null +++ b/openspec/specs/mutators/spec.md @@ -0,0 +1,413 @@ +# mutators Specification + +## Purpose + +This capability covers `flow.Mutator` — the type-dispatched, once-per-step +configuration contributor that lets workflow authors apply cross-cutting +defaults, callbacks, and retry/timeout policy to every Step of a chosen Go +type, including Steps that live inside nested sub-workflows. + +## Requirements + +### Requirement: Mutator interface and Mutate constructor + +The `flow.Mutator` interface SHALL represent a type-dispatched, **once-per-step** +contribution of configuration to a Step. The interface has a single unexported method, +ensuring that the only producer is the generic constructor `flow.Mutate[T]`. + +`flow.Mutate[T Steper](fn func(ctx context.Context, step T) Builder) Mutator` SHALL +construct a Mutator that: + +- Performs a type assertion `step.(T)` against each step it is offered, **walking the + `Unwrap()` chain** if the immediate step does not match. The first layer in the chain + whose concrete type matches `T` is the one `fn` receives. +- Calls `fn` with the workflow's context and the type-asserted step instance if and only + if some layer in the unwrap chain matches. +- Returns the `Builder` that `fn` returns, for the runtime to merge into the step's + `StepConfig`. +- Treats `fn` returning a `nil` Builder as "no additional configuration" (still a valid + Mutator — useful when `fn` only mutates fields on the step pointer). +- Is a no-op (returns `matched=false`) when no layer in the unwrap chain satisfies the + assertion. + +Selection is by concrete type along the unwrap chain only. There is no interface +matching and no name matching. The unwrap traversal mirrors `flow.As[T]`: it follows +`Unwrap() Steper` and `Unwrap() []Steper`, but does NOT cross workflow boundaries (a +nested workflow's inner steps are reached via the propagation mechanism in +`MutatorReceiver propagation interface`, not via in-place unwrap from the parent). + +The `ctx` passed to `fn` is the **workflow-scoped context** — i.e. the `ctx` that was +passed into `Workflow.Do(ctx)`. It is suitable for accessing values that are stable for +the life of the workflow (logger, scenario name, test session ID). It is NOT a +per-attempt context: the Mutator runs once per step at scheduling time, before any +attempt begins. + +A Mutator does NOT return a substituted `context.Context`. The Mutator is not in the +per-attempt onion and cannot influence the `ctx` that `step.Do` receives. Cross-cutting +ctx substitution remains the responsibility of `BeforeStep` (per instance) or +`StepInterceptor` / `AttemptInterceptor` (cross-cutting). The Mutator MAY return a +`Builder` that includes a `BeforeStep` callback if it wants per-attempt ctx influence +through composition. + +#### Scenario: Mutate runs against a matching type +- **WHEN** `flow.Mutate[*Foo]` is registered and the workflow contains a `*Foo` step +- **THEN** the Mutator function is invoked with that `*Foo` instance once, before its + first attempt + +#### Scenario: Mutate skips a non-matching type +- **WHEN** `flow.Mutate[*Foo]` is registered and the workflow contains only `*Bar` steps +- **THEN** the Mutator function is never invoked + +#### Scenario: Mutate matches inner type through Unwrap chain +- **WHEN** `flow.Mutate[*Inner]` is registered and the workflow contains a wrapper step + whose `Unwrap()` returns a `*Inner` (e.g. `flow.Name("...", inner)`) +- **THEN** the Mutator runs against the `*Inner` instance; the user function receives + the typed `*Inner` pointer + +#### Scenario: Mutate matches outer wrapper when wrapper itself is the target type +- **WHEN** `flow.Mutate[*Wrapper]` is registered and the workflow contains a `*Wrapper` + wrapping a `*Inner` +- **THEN** the Mutator runs against the `*Wrapper` instance (first matching layer wins; + `*Wrapper` matches before unwrap descends to `*Inner`) + +#### Scenario: Returning nil Builder is valid +- **WHEN** the Mutator function returns `nil` +- **THEN** the runtime treats it as no additional configuration; no error, no panic + +#### Scenario: Mutator function receives workflow-scoped ctx, returns Builder, no error +- **WHEN** a developer writes `flow.Mutate[*Foo](fn)` +- **THEN** the type of `fn` is `func(ctx context.Context, step *Foo) flow.Builder`, + with no error return +- **AND** when the Mutator runs, `ctx` is the same context that was passed to + `Workflow.Do(ctx)` + +--- + +--- + +### Requirement: Mutator dispatch and merge destination + +The runtime SHALL dispatch and merge Mutators using the following rule, hereafter +"**dispatch by the owning workflow**": + +1. Each `*Workflow` (parent or inner) SHALL evaluate its own `Workflow.Mutators` slice + against every step in its own `state` map, at that step's first scheduling. +2. For each step `wrapper` in `w.state`, the runtime SHALL walk `wrapper`'s `Unwrap()` + chain. For each layer `L` in the chain (including `wrapper` itself, in pre-order), + the runtime SHALL ask each Mutator whether `L.(T)` matches. The first matching layer + SHALL be passed to the Mutator's user function. +3. The `Builder` returned by the user function SHALL be merged into + `w.state[wrapper].Config` — i.e. into the **wrapper key in the workflow that owns + `wrapper` as a root**, regardless of which layer in the unwrap chain the Mutator + matched. This matches the `Config merge destination follows StateOf` rule in + `step-configuration`. +4. Parent Mutators SHALL reach inner-workflow steps only via the `MutatorReceiver` + propagation interface (parent prepends its Mutators into the inner workflow's + `Mutators` slice). The parent SHALL NOT directly walk into a child workflow's + `state` map to apply Mutators; that is the inner workflow's responsibility once it + begins scheduling. This keeps once-per-step state, merge destinations, and lazily + added inner steps consistent across all nesting depths. + +This rule resolves three previously-ambiguous points: + +- **Unwrap is supported** — `flow.Mutate[*Inner]` matches a `flow.Name("...", inner)` + wrapper. Migration from the legacy `As[T] + Input` pattern does not break on wrapper + types. +- **Config destination is well-defined** — the merge target is always the wrapper key + in the workflow that actually owns the step, not the outer workflow on which the + Mutator was registered. +- **Nested workflows are reached by propagation, not by parent walking** — a parent + Mutator targeting `*X` where `*X` lives inside an inner workflow IS observed by + `*X`, but the dispatch is performed by the inner workflow after `PrependMutators`, + not by the parent reaching across the workflow boundary. + +#### Scenario: Mutator matches via Unwrap and merges into wrapper's state +- **GIVEN** a workflow `w` where `w.state` contains `wrapper = flow.Name("x", inner)` + whose `Unwrap()` returns `inner` of type `*Inner` +- **AND** `w.Mutators = [flow.Mutate[*Inner](func(ctx, x *Inner) Builder { return flow.Step(x).Input(fn) })]` +- **WHEN** `wrapper` reaches its first scheduling +- **THEN** the Mutator's user function receives the `*Inner` pointer `inner` +- **AND** `fn` is appended to `w.state[wrapper].Config.Before` (NOT to a state entry + keyed on `inner`) + +#### Scenario: Parent Mutator targeting inner type does not directly mutate inner state +- **GIVEN** `parent` contains `inner` (a `*Workflow` used as a step) which contains + `x` of type `*X` +- **AND** `parent.Mutators = [flow.Mutate[*X](fn)]` +- **WHEN** `parent.Do(ctx)` runs +- **THEN** `parent` SHALL call `inner.PrependMutators(parent.Mutators)` before invoking + `inner` as a step +- **AND** the Mutator runs once against `x` inside `inner`'s scheduling pass; the + Builder is merged into `inner.state[x].Config`, NOT into any state entry on `parent` + +#### Scenario: Wrapper inside inner workflow is reached via propagation +- **GIVEN** `parent → inner → flow.Name("x", x)` where `x` is `*X` +- **AND** `parent.Mutators = [flow.Mutate[*X](fn)]` +- **WHEN** the wrapper reaches first scheduling inside `inner` +- **THEN** the Mutator (propagated by `PrependMutators`) unwraps the wrapper, matches + `x`, and merges its Builder into `inner.state[wrapper].Config` + +--- + +### Requirement: Workflow.Mutators field + +`flow.Workflow` SHALL expose a `Mutators []Mutator` field. Mutators in this slice are +evaluated against every step before the step's first attempt. Slice order is preserved: +element 0 runs first, element n-1 runs last, among the Mutators whose target type matches +the step. + +A workflow with `Mutators == nil` or an empty slice SHALL behave identically to one +without the Mutator mechanism — no overhead, no panic. + +#### Scenario: Slice order is preserved among matching Mutators +- **WHEN** two Mutators `[m1, m2]` both registered for `*Foo` are present, and `m1` + returns a Builder with `Input` callback A, `m2` returns a Builder with `Input` + callback B +- **THEN** before the first attempt of any `*Foo`, callback A runs before callback B + (because the merge appends in order) + +#### Scenario: Multiple Mutators for the same type all run +- **WHEN** two Mutators `m1`, `m2` are both registered for `*Foo` +- **THEN** both run, in slice order, before the first attempt of every `*Foo` step; + their contributions are merged + +#### Scenario: Nil Mutators field is a no-op +- **WHEN** `Workflow.Mutators` is nil +- **THEN** the workflow runs identically to a workflow without the Mutator field + +--- + +### Requirement: Mutator merge timing and ordering + +Mutators SHALL be merged into a step's `StepConfig` **after** all `Workflow.Add` calls +for that step have completed, but **before** the step's first attempt begins. +Specifically, the merge happens when the step transitions from `Pending` to its first +scheduling — i.e. inside `Workflow.Do`, on demand, not at `Add` time. + +Mutator-contributed `Before` / `After` / `Option` lists SHALL be **appended** to the +step's existing lists via the standard `StepConfig.Merge` rule (see +`step-configuration`'s `Idempotent Add with config merging` requirement). Consequently: + +- **Before chain execution order:** plan-declared `Input` / `BeforeStep` run first; + Mutator-contributed callbacks run after, in `Workflow.Mutators` slice order. +- **After chain execution order:** plan-declared `Output` / `AfterStep` run first; + Mutator-contributed callbacks run after, in `Workflow.Mutators` slice order. +- **Multiple Mutators on the same step:** their callbacks all append after plan's; + among Mutators, slice order is preserved. + +`Upstreams` contributed by Mutators are unioned with existing upstreams (set union, not +ordered). + +This ordering matches the "scenario customization layered on top of plan defaults" +mental model: a Mutator can observe or override state left by plan callbacks. A Mutator +cannot pre-empt plan callbacks; if pre-empting is needed (rare), register the behavior +as a plan-time `Input` instead. + +Sub-workflow propagation interacts with this ordering as follows: parent Mutators are +prepended into the inner workflow's `Mutators` slice (see `MutatorReceiver propagation` +requirement) so that, inside the inner workflow, parent Mutators run before child +Mutators. Both still run after the inner step's plan-declared callbacks. + +#### Scenario: Plan Input runs before Mutator Input +- **GIVEN** a step `s` added via `flow.Step(s).Input(planFn)` +- **AND** a Mutator `flow.Mutate[*S](func(ctx, s *S) flow.Builder { + return flow.Step(s).Input(mutatorFn) })` +- **WHEN** the step executes its first attempt +- **THEN** `planFn` runs first, then `mutatorFn`, then `Do` + +#### Scenario: Multiple Mutator Inputs run in slice order, after plan +- **GIVEN** a step `s` added via `flow.Step(s).Input(planFn)` +- **AND** `Workflow.Mutators = [m1, m2]` where both target `*S` and contribute Inputs + `mfn1`, `mfn2` respectively +- **WHEN** the step executes its first attempt +- **THEN** the Before chain runs `planFn`, then `mfn1`, then `mfn2`, then `Do` + +#### Scenario: Plan After runs before Mutator After +- **GIVEN** a step with a plan-declared `Output(planAfter)` and a Mutator that returns + `flow.Step(s).Output(mutatorAfter)` +- **WHEN** the step's `Do` returns successfully +- **THEN** `planAfter` runs first, then `mutatorAfter` + +#### Scenario: Merge happens at first scheduling, not at Add time +- **GIVEN** a Mutator whose user function appends to an external slice when invoked +- **WHEN** `Workflow.Add(Step(s))` is called +- **THEN** the external slice is unchanged +- **WHEN** `Workflow.Do(ctx)` runs and `s` reaches its first attempt +- **THEN** the external slice has exactly one new entry (Mutator invoked once) + +--- + +### Requirement: Mutators run once per step instance + +A Mutator's user function SHALL be invoked at most **once** per step instance, just +before that step's first attempt begins (i.e. when the runtime moves the step out of +`Pending` state into the attempt loop). + +The runtime SHALL track per-step state to ensure the Mutator chain runs exactly once, +even if the same step is somehow re-scheduled. + +The configuration contributed by Mutators is merged into the step's `StepConfig` using +the same rules as the existing `Idempotent Add with config merging` requirement +(`step-configuration` capability): + +- `Upstreams`: union +- `Before`: append in order (Mutator order, then existing) +- `After`: append in order +- `Option`: append in order + +After the merge, the step proceeds to the standard per-attempt onion (Interceptors, +retry loop, Before chain, Do, After chain). + +#### Scenario: Mutator runs exactly once across multiple attempts +- **WHEN** a `*Foo` step has `RetryOption{MaxAttempts: 3}` and `flow.Mutate[*Foo]` is + registered, and the step fails twice then succeeds on attempt 3 +- **THEN** the Mutator user function is invoked exactly once (before attempt 1) + +#### Scenario: Mutator-contributed Before runs on every attempt +- **WHEN** `flow.Mutate[*Foo]` returns a Builder with a `BeforeStep` callback `b`, + and the step retries 3 times +- **THEN** `b` runs once per attempt (3 times total) — because `BeforeStep` is + per-attempt; the Mutator merge happens once but the resulting `Before` chain still + fires per attempt + +#### Scenario: Field mutation done in Mutator persists +- **WHEN** the Mutator user function sets `f.Field = "X"` on the typed `*Foo` pointer it + receives +- **THEN** `f.Field == "X"` is observable from `Do` on every attempt; the field is set + once and persists + +#### Scenario: Mutator merge happens before first attempt's Before chain +- **WHEN** a Mutator returns a Builder with `Input(callback)` and the user has separately + registered `flow.Step(s).Input(otherCallback)` in their plan +- **THEN** before the first attempt, both callbacks are present in the step's `Before` + chain (in registration order: Mutator-registered ones come from the slice merge order) + +--- + +### Requirement: Mutator-returned Builder scope + +The runtime SHALL restrict the merge of a Mutator-returned `Builder` to the configuration +keyed by the step that was passed into the Mutator. Any configuration the Builder +declares for other steps SHALL be silently ignored. + +This rule SHALL keep Mutator scope predictable: a `flow.Mutate[*Foo]` Mutator MUST only +configure the specific `*Foo` instance it was called with, even if the user mistakenly +constructs a Builder that mentions other steps. + +#### Scenario: Builder with config for the passed-in step is merged +- **WHEN** `flow.Mutate[*Foo](func(ctx context.Context, f *Foo) flow.Builder { return flow.Step(f).Input(fn) })` + is registered and runs against a `*Foo` instance +- **THEN** the `Input` callback `fn` is added to that `*Foo`'s `Before` chain + +#### Scenario: Builder with config for an unrelated step is ignored +- **WHEN** a Mutator's user function (mistakenly or intentionally) returns + `flow.Step(otherStep).Input(...)` instead of `flow.Step(passedIn).Input(...)` +- **THEN** the configuration for `otherStep` is silently dropped; `passedIn` is unchanged + +--- + +### Requirement: MutatorReceiver propagation interface + +`flow.MutatorReceiver` SHALL be defined as an optional interface that composite steps and +sub-workflows implement to receive Mutators from their parent workflow: + +```go +type MutatorReceiver interface { + PrependMutators(mw []Mutator) +} +``` + +Before the runtime invokes a step that hosts a sub-workflow (composite step or +`*Workflow` used as a step), the runtime SHALL check whether the step implements +`MutatorReceiver` and, if so, call `step.PrependMutators(parent.Mutators)` so that parent +Mutators reach inner steps. + +The contract for implementers is to **prepend** parent Mutators to their own Mutators +list, preserving the invariant that parent Mutators run before child Mutators. + +The propagation is invoked once per inner-workflow execution. Inner steps that are added +lazily (e.g. inside the composite step's `Do`) are still reached, because Mutator +evaluation in the inner workflow is itself once-per-step at first scheduling. + +#### Scenario: Parent Mutator reaches step inside SubWorkflow +- **WHEN** the parent workflow has `flow.Mutate[*Foo]` registered and a step embeds + `flow.SubWorkflow` with a `*Foo` inside +- **THEN** the parent Mutator runs against the inner `*Foo` instance once, before its + first attempt + +#### Scenario: Parent Mutator reaches step inside nested *Workflow +- **WHEN** the parent workflow has `flow.Mutate[*Foo]` registered and a `*Workflow` is + added as a step containing a `*Foo` +- **THEN** the parent Mutator runs against the inner `*Foo` + +#### Scenario: Lazily added inner steps are reached +- **WHEN** a composite step embeds `SubWorkflow`, calls `s.Add(new(Foo))` inside its + `Do`, and the parent has `flow.Mutate[*Foo]` registered +- **THEN** the lazily added `*Foo` is seen by the parent Mutator at its first scheduling + inside the inner workflow + +#### Scenario: Child Mutator runs after parent +- **WHEN** the parent registers Mutator `pm` and a `SubWorkflow`-embedded child registers + Mutator `cm`, both targeting `*Foo` +- **THEN** for any `*Foo` inside the child, `pm` runs before `cm`; their `Before` chain + contributions are appended in that order + +--- + +### Requirement: SubWorkflow and *Workflow implement MutatorReceiver + +`flow.SubWorkflow` SHALL implement `MutatorReceiver` by prepending the supplied Mutators +to its inner workflow's `Mutators` slice. + +`*flow.Workflow` (when used directly as a step in another workflow) SHALL implement +`MutatorReceiver` by prepending the supplied Mutators to its own `Mutators` slice. + +Both implementations MUST be safe to call multiple times; calling `PrependMutators` twice +with the same list MUST NOT cause double-execution of the Mutators on the same step +(deduplication is provided by the once-per-step invariant — once a step's Mutators have +been applied, re-prepending the slice has no further effect on that step). + +#### Scenario: SubWorkflow.PrependMutators prepends in order +- **WHEN** `parent.Mutators = [m1, m2]` and a SubWorkflow's inner has `[m3]` +- **THEN** after `PrependMutators` is called, the inner has `[m1, m2, m3]` + +#### Scenario: *Workflow used as step inherits Mutators +- **WHEN** a `*Workflow` is added as a step in another workflow with Mutators `[m1]` +- **THEN** during execution, `[m1]` is prepended to the inner workflow's Mutators + +--- + +### Requirement: Mutator vs Input/BeforeStep usage guidance + +The framework SHALL preserve both `flow.Step(s).Input(fn)` (per-instance binding) and +`flow.Mutate[*T](fn)` (per-type binding) as parallel mechanisms with distinct intended +scopes. Neither MUST be removed in favor of the other during the deprecation window for +`BuildStep`. + +The intended scope distinction is: + +- `flow.Step(s).Input(fn)` (in a plan) binds to one specific step instance, lexically + known. Use it when you are writing a plan or composite step and you hold the step + instance. The modification only matters for that instance. +- `flow.Mutate[*T](fn)` binds to a Go type. Use it when you want a modification to apply + to every `*T` step in the workflow tree (including sub-workflows and steps added by + other plans), and you do not want to enumerate them. + +Anti-patterns: + +- **Using `Mutate` to modify a single specific instance.** It works but is misleading; + the next reader of the code assumes "all `*T`". Use `Input` instead. +- **Using `Input` (via `As[T]` traversal) to apply a cross-plan policy.** This is the + legacy pattern that `Mutate` is designed to replace. Each plan author must pre-build + sub-workflows so traversal can find targets, and missing-match failures are silent. + +For ctx substitution at workflow level (across all step types), use `StepInterceptor` or +`AttemptInterceptor`. + +#### Scenario: Both mechanisms coexist +- **WHEN** a workflow uses both `flow.Step(s).Input(fn1)` for a specific instance and + `flow.Mutate[*S](func(ctx, s *S) Builder { return flow.Step(s).Input(fn2) })` for + cross-cutting policy +- **THEN** both `fn1` and `fn2` SHALL be invoked on `s`'s Before chain in the documented + order (plan first, Mutator after) + diff --git a/openspec/specs/step-configuration/spec.md b/openspec/specs/step-configuration/spec.md index fce1ac5..bbfbbf1 100644 --- a/openspec/specs/step-configuration/spec.md +++ b/openspec/specs/step-configuration/spec.md @@ -1,4 +1,8 @@ -## ADDED Requirements +## Purpose + +This capability covers the step-configuration surface of the go-workflow library. + +## Requirements ### Requirement: Step builder API — DependsOn @@ -160,3 +164,81 @@ before `BuildStep()` to initialize it to a clean state. #### Scenario: Reset called before BuildStep - **WHEN** a Step that implements both `Reset()` and `BuildStep()` is added to a Workflow - **THEN** `Reset()` is called before `BuildStep()` is called + +### Requirement: Config merge destination follows StateOf + +A `StepConfig` declared via `workflow.Add(flow.Step(s).X(...))` SHALL be merged into the +innermost workflow whose `state` map contains `s` as a key, not into the workflow on +which `Add` was invoked. + +This rule SHALL apply uniformly to all `StepConfig` fields: `Upstreams` (via the +`Cross-boundary upstream registration` rule in `composite-steps`), `Before`, `After`, +and `Option`. + +The destination is computed by `StateOf(s)`, which: + +1. Returns `w.state[s]` if `s` is a direct root of `w`. +2. Otherwise walks roots of `w` and, for each root that implements + `StateOf(Steper) *State` (i.e. `*Workflow` and `SubWorkflow`-embedding steps), + recursively asks that root for `StateOf(s)`. +3. Returns the deepest non-nil match. + +This is existing runtime behavior; this requirement codifies it because the Mutator +mechanism (`mutators` capability) relies on it being well-defined: a Mutator-returned +Builder is merged through the same `StateOf`-driven destination logic. + +#### Scenario: Add on outer merges Input into inner state +- **GIVEN** an `inner` workflow containing step `x`, and `outer` containing `inner` as a step +- **WHEN** `outer.Add(flow.Step(x).Input(fn))` is called +- **THEN** `fn` is appended to `inner.StateOf(x).Config.Before` +- **AND** `outer.StateOf(inner).Config.Before` does NOT contain `fn` + +#### Scenario: Add on outer merges Option into inner state +- **GIVEN** an `inner` workflow containing step `x`, and `outer` containing `inner` as a step +- **WHEN** `outer.Add(flow.Step(x).Timeout(5 * time.Minute))` is called +- **THEN** the timeout takes effect on `x` via `inner.StateOf(x).Config.Option` + +#### Scenario: Multi-level nesting reaches innermost +- **GIVEN** `outer → midA → midB → x` +- **WHEN** `outer.Add(flow.Step(x).Input(fn))` is called +- **THEN** `fn` ends up in `midB.StateOf(x).Config.Before` (the innermost workflow that + has `x` in its `state` map), not in `outer` or `midA` + +--- + +### Requirement: Mutators field on Workflow + +`flow.Workflow` SHALL expose a `Mutators []Mutator` field that holds zero or more +`flow.Mutator` instances (constructed via `flow.Mutate[T]`). + +Mutators are evaluated **once per step instance**, just before the step's first attempt +begins. Each matched Mutator contributes a `flow.Builder` whose configuration for the +matched step is merged into the step's existing `StepConfig` using the same merge rules +as the `Idempotent Add with config merging` requirement (`Upstreams` union, `Before` +append, `After` append, `Option` append). + +After the merge, the step proceeds through the standard per-attempt onion (Interceptors, +retry loop, Before chain, Do, After chain). The Mutator's contribution is therefore +visible to all attempts of the step. + +The `Mutators` slice is independent of `Before` / `After` / `Option` configuration on +`StepConfig`: Mutators are workflow-level (apply to all matching steps), while +`StepConfig` is per-step-instance. + +#### Scenario: Mutator contribution merges with plan-declared StepConfig +- **WHEN** a workflow has `Mutators = [flow.Mutate[*Foo](func(ctx context.Context, f *Foo) flow.Builder { + return flow.Step(f).Input(callbackA) })]` and a `*Foo` step is added with + `flow.Step(foo).Input(callbackB)` +- **THEN** before the first attempt of `foo`, both `callbackA` and `callbackB` are + present in the step's `Before` chain (Mutator contribution merged into existing config) + +#### Scenario: Mutator contribution survives across attempts +- **WHEN** a Mutator contributes a `Before` callback and the step retries 3 times +- **THEN** the contributed callback runs on every attempt (because it lives in the + step's permanent `Before` chain after merge) + +#### Scenario: Mutator runs once per step +- **WHEN** a Mutator's user function appends to a workflow-scoped slice and the matched + step retries 3 times +- **THEN** the slice has exactly one new entry (Mutator user function ran once) +