From ba615c1d5303f0ff5abc08d247563204c4f6c73b Mon Sep 17 00:00:00 2001 From: jayaprabhakar Date: Sun, 17 May 2026 18:43:24 -0700 Subject: [PATCH] feat(no_graph): auto-replay trace with full per-step state on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When --experimental_no_graph reports a failure, the lightweight action- name trace it prints is enough to know WHAT failed but not the variable values at each step (no_graph drops Process state after expansion). Make the model checker re-exec itself with --trace-file pointing at the trace so the standard default-mode error report is printed below — converting the spartan no_graph trace into the same rich state-by- state output the user would have seen without --experimental_no_graph. For the replay to faithfully reproduce the failure, the trace must contain EVERY non-thread-X link name that guided-mode's ShouldScheduleNode would match against — not just action transitions. Two pathTail-population sites that previously inherited the parent's path are switched to extend it with the link name: - processNode's intermediate-fork loop (non-yield branches): captures "Stmt:N" / "Any:var=value" / "For:value" / "checkpoint" — the choices made inside oneof / parallel-for / etc. - scheduleChannelMessages: captures "channel-X-message-Y". The remaining inheritPath sites (YieldNode/YieldFork thread continuations producing "thread-X") stay as-is because trace.go auto-schedules thread-X links without consulting the trace. Mechanism: - Trace recorded by no_graph is now an exact replay-able sequence of Inbound link names default mode would produce. - replayTraceWithFullState writes the trace to outDir/trace.txt (so the user has it on disk) and execs the current binary with --trace-file + the same .json. The replay run goes through the standard error-report code path that prints per-step state. - Called from both no_graph failure sites: early-deadlock detection and invariant-failure reporting. Edge cases: - Replay run uses default mode (graph kept), bounded by trace length — cheap regardless of original state-space size. - Missing jsonFilename or empty path → skip silently. - Exec failure → print fallback "replay manually" hint. - For deadlock failures, the replay shows the state at the deadlock point but doesn't itself say "DEADLOCK detected" (guided trace doesn't run post-traversal deadlock detection). The no_graph output already labels it as DEADLOCK, so the user sees both. Threading the jsonFilename through modelCheckSingleSpec required adding it as a parameter; the 4 call sites are updated with the appropriate json file (main / composition / refinement / impl). Verified: - 94/94 reference suite passes (default-mode paths unchanged). - Simple invariant failure with role action: replay reproduces exact per-step state. - oneof block ("Stmt:N"): replay captures the failing choice branch and reproduces the failure state. - oneof expression ("Any:var=value"): same. - Deadlock: replay shows the deadlocked state. Co-Authored-By: Claude Opus 4.7 (1M context) --- main.go | 56 +++++++++++++++++++++++++++++++++++---- modelchecker/processor.go | 14 +++++++--- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/main.go b/main.go index 35bb046..c76af0a 100644 --- a/main.go +++ b/main.go @@ -11,6 +11,7 @@ import ( "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/proto" "os" + "os/exec" "os/signal" "path/filepath" "runtime/pprof" @@ -124,7 +125,7 @@ func main() { return } } - modelCheckSingleSpec(f, stateConfig, dirPath, outDir, sourceFileName, nil) + modelCheckSingleSpec(f, stateConfig, dirPath, outDir, sourceFileName, jsonFilename, nil) } } @@ -202,7 +203,7 @@ func runCompositionalModelChecking(f *ast.File, dirPath string, outDir string) { return } fmt.Println("Model checking composed spec:", composedSourceFileName) - root := modelCheckSingleSpec(composedFile, composedStateConfig, dirPath, composedOutDir, composedSourceFileName, nil) + root := modelCheckSingleSpec(composedFile, composedStateConfig, dirPath, composedOutDir, composedSourceFileName, composedJsonFileName, nil) if root == nil { fmt.Println("Error in model checking composed spec:", spec.Name, "Aborting") return @@ -284,7 +285,7 @@ func runRefinementModelChecking(f *ast.File, dirPath string, outDir string, json } fmt.Println("Model checking abstract spec:", abstractSourceFile) - root := modelCheckSingleSpec(abstractFile, stateConfig, dirPath, abstractOutDir, abstractSourceFile, nil) + root := modelCheckSingleSpec(abstractFile, stateConfig, dirPath, abstractOutDir, abstractSourceFile, abstractJsonFile, nil) if root == nil { fmt.Println("Error in model checking abstract spec:", spec.Name) return @@ -324,7 +325,7 @@ func runRefinementModelChecking(f *ast.File, dirPath string, outDir string, json return } - root := modelCheckSingleSpec(f, stateConfig, dirPath, implOutDir, f.SourceInfo.GetFileName(), joinHashes) + root := modelCheckSingleSpec(f, stateConfig, dirPath, implOutDir, f.SourceInfo.GetFileName(), jsonFilename, joinHashes) if root == nil { fmt.Println("Error in model checking implementation spec") return @@ -544,7 +545,50 @@ func addToJoinHashes(joinHashes modelchecker.JoinHashes, i int, from, to *modelc joinHashes[key][i][pair] = true } -func modelCheckSingleSpec(f *ast.File, stateConfig *ast.StateSpaceOptions, dirPath string, outDir string, sourceFileName string, hashes modelchecker.JoinHashes) *modelchecker.Node { +// replayTraceWithFullState writes the action-name path to a trace file +// (so the user has it on disk too) and re-execs the binary with +// --trace-file pointing at it. The replay runs in default mode (with +// graph) so the standard error-report machinery prints the per-step +// state — converting the lightweight no_graph trace into the full +// rich output that the user would have seen without --experimental_no_graph. +// Best-effort: failures to write/exec just print a fallback command. +func replayTraceWithFullState(path []string, outDir, jsonFilename string) { + if len(path) == 0 || jsonFilename == "" { + return + } + traceFile := filepath.Join(outDir, "trace.txt") + body := strings.Join(path, "\n") + "\n" + if err := os.WriteFile(traceFile, []byte(body), 0644); err != nil { + fmt.Printf("(replay skipped: %v)\n", err) + return + } + self, err := os.Executable() + if err != nil { + fmt.Printf("(replay skipped: %v)\nReplay manually with:\n fizz --trace-file %s \n", err, traceFile) + return + } + // Preserve flags that change which spec state space the replay sees. + // preinit-hook flags override spec constants; if we drop them the + // replay reads a different spec (different NUM_WRITERS, etc.) and the + // trace won't match anything. Everything else is safe to leave at + // default for the replay — guided trace fixes the path either way. + args := []string{"--trace-file", traceFile} + if preinitHookFile != "" { + args = append(args, "--preinit-hook-file", preinitHookFile) + } + if preinitHook != "" { + args = append(args, "--preinit-hook", preinitHook) + } + args = append(args, jsonFilename) + fmt.Println() + fmt.Println("=== Replaying trace with full per-step state ===") + cmd := exec.Command(self, args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + _ = cmd.Run() +} + +func modelCheckSingleSpec(f *ast.File, stateConfig *ast.StateSpaceOptions, dirPath string, outDir string, sourceFileName string, jsonFilename string, hashes modelchecker.JoinHashes) *modelchecker.Node { // Parse trace if provided (either from file or string) var guidedTrace *modelchecker.GuidedTrace if traceFile != "" { @@ -652,6 +696,7 @@ func modelCheckSingleSpec(f *ast.File, stateConfig *ast.StateSpaceOptions, dirPa for _, step := range path { fmt.Printf(" %s\n", step) } + replayTraceWithFullState(path, outDir, jsonFilename) } else { dumpFailedNode(sourceFileName, earlyDead, rootNode, outDir) } @@ -687,6 +732,7 @@ func modelCheckSingleSpec(f *ast.File, stateConfig *ast.StateSpaceOptions, dirPa for _, step := range path { fmt.Printf(" %s\n", step) } + replayTraceWithFullState(path, outDir, jsonFilename) return nil } fmt.Printf("Visited entries: %d Unique states: %d\n", p1.GetVisitedNodesCount(), p1.GetUniqueYieldCount()) diff --git a/modelchecker/processor.go b/modelchecker/processor.go index c7e097e..4f19dc3 100644 --- a/modelchecker/processor.go +++ b/modelchecker/processor.go @@ -2145,7 +2145,11 @@ func (p *Processor) processNode(node *Node) (bool, bool) { for _, fork := range forks { newNode := node.ForkForAlternatePaths(fork, fork.Name) if p.ShouldScheduleNode(newNode) { - p.inheritPath(node, newNode) + // Intermediate forks carry choice information in fork.Name: + // "Stmt:N" / "Any:var=value" / "For:value" / "checkpoint". + // These need to be in the trace so guided-trace replay can + // re-make the same nondeterministic choices. + p.extendPath(node, newNode, fork.Name) p.breakParentRef(newNode) p.intermediateStates.Add(newNode) } @@ -2969,7 +2973,8 @@ func (p *Processor) YieldFork(node *Node, process *Process) { func (p *Processor) scheduleChannelMessages(node *Node) { for i, msgs := range node.ChannelMessages { for j, _ := range msgs { - newNode := node.ForkForAlternatePaths(node.Process.Fork(), fmt.Sprintf("channel-%d-message-%d", i, j)) + linkName := fmt.Sprintf("channel-%d-message-%d", i, j) + newNode := node.ForkForAlternatePaths(node.Process.Fork(), linkName) newMsg := newNode.ChannelMessages[i][j] newNode.ChannelMessages[i] = append(newNode.ChannelMessages[i][:j], newNode.ChannelMessages[i][j+1:]...) thread := newNode.Process.NewThread() @@ -2980,7 +2985,10 @@ func (p *Processor) scheduleChannelMessages(node *Node) { thread.Stack.Push(frame) if p.ShouldScheduleNode(newNode) { - p.inheritPath(node, newNode) + // Channel-message links are matched against the trace + // during replay (unlike thread-X which are auto-scheduled), + // so they must be in the trace. + p.extendPath(node, newNode, linkName) p.breakParentRef(newNode) p.enqueueScheduled(newNode) }