diff --git a/README.md b/README.md index 922ba7c..e469452 100644 --- a/README.md +++ b/README.md @@ -496,12 +496,26 @@ opens a reverse-incremental search through history — each keystroke narrows the match, Ctrl-R again walks to the next older one, Esc restores the original line, Enter accepts. -Press `<` to rewind one instruction. Each explicit step (`s`, `S`, `n`, -`f`) records a full CPU + RAM snapshot beforehand, kept in a 256-entry -FIFO ring; the status bar shows `rwd:N` while non-empty. Free-run via -`r` does NOT snapshot — the 64 KiB-per-step cost would dominate at multi-MHz -throughput — so reverse-step covers single-stepping sessions, not whole -program executions. +Press `<` to rewind one instruction. Every step — explicit (`s`, `S`, `n`, +`f`) or free-run (`r`) — records a page-level copy-on-write delta beforehand, +kept in a 256-entry FIFO ring; the status bar shows `rwd:N` while non-empty. + +For jumps deeper than that ring, **deep rewind** keeps periodic full-RAM +*keyframes* (one every 4096 steps) and reconstructs any earlier step by +restoring the nearest keyframe and replaying forward to the exact target: + +| Command | Effect | +|--------------------|-------------------------------------------------------------| +| `:rewind N` | Step back N executed steps (keyframe replay for deep jumps) | +| `:rewind-budget MB`| Cap keyframe memory; sets the deep-rewind reach | + +Reach (steps) = `budget / 64 KiB × 4096`. At the default 128 MiB cap that's +~8.4M steps; `:rewind-budget 256` reaches ~16.7M. The budget is a ceiling — +a short run holds only the keyframes it produced — and the status bar shows +`deep:@` once keyframes exist. A deep rewind replays at most +4096 instructions (sub-millisecond on the cycle-accurate core). Replay assumes +deterministic execution between keyframes; live keyboard input is captured in +the snapshot, so buffered input replays correctly. --- diff --git a/cpu/keyframe.go b/cpu/keyframe.go new file mode 100644 index 0000000..8dde983 --- /dev/null +++ b/cpu/keyframe.go @@ -0,0 +1,146 @@ +package cpu + +// Keyframe-based deep rewind (issue #392). +// +// The per-step SnapshotRing only reaches back as far as its capacity (a few +// hundred steps) — fine for "oops, step back one" but useless for "rewind to +// somewhere in the last few million steps". Storing a delta for every one of +// those steps is infeasible, so deep rewind instead keeps periodic *full* +// machine snapshots (keyframes) and reconstructs an arbitrary earlier state +// by restoring the nearest keyframe at or before the target step and +// replaying forward the handful of steps in between. +// +// reach (steps) = ring capacity (keyframes) × keyframe interval (steps) +// ring capacity = budget bytes / KeyframeBytes +// +// Memory is a *cap*, not a preallocation: a short run holds only as many +// keyframes as it produced. Forward-replay cost is bounded by the interval, +// so a larger interval trades replay latency for reach at a fixed budget. + +// KeyframeBytes is the accounting size of one keyframe: a full 64 KiB RAM +// image. Register/peripheral state is negligible next to it, so the budget +// math treats every keyframe as this fixed size. +const KeyframeBytes = 0x10000 + +// Keyframe is a full machine snapshot tagged with the step index at which it +// was taken. Snap.Pages holds every page (a complete RAM image), so Restore +// reconstructs the exact state with no delta chain. +type Keyframe struct { + Step uint64 + Snap Snapshot +} + +// SnapshotFull captures a complete RAM image (all 256 pages) plus registers, +// suitable for use as a keyframe base. Unlike CPU.Snapshot — which records +// only a page delta for undoing a single step — this is self-contained: +// Restore needs nothing else. Peripherals are filled in by the caller, as +// with the delta path. +func (c *CPU) SnapshotFull(ram *RAM) Snapshot { + s := c.Snapshot(ram) + pages := make(map[byte][256]byte, 256) + for p := 0; p < 256; p++ { + var img [256]byte + base := p << 8 + copy(img[:], ram.Data[base:base+256]) + pages[byte(p)] = img + } + s.Pages = pages + return s +} + +// KeyframeRing is a fixed-capacity FIFO of keyframes ordered by ascending +// step. Push appends the newest; when full it drops the oldest, so the ring +// always holds the most recent `cap` keyframes. Nil receiver methods are +// safe and behave as an empty, zero-capacity ring. +type KeyframeRing struct { + buf []Keyframe + head int // next-write index + size int + cap int +} + +// NewKeyframeRing builds a ring sized to hold budgetBytes worth of keyframes. +// A budget too small for even one keyframe still yields a 1-slot ring so deep +// rewind degrades to "nearest keyframe" rather than disabling outright; a +// non-positive budget yields nil (feature off). +func NewKeyframeRing(budgetBytes int) *KeyframeRing { + if budgetBytes <= 0 { + return nil + } + c := budgetBytes / KeyframeBytes + if c < 1 { + c = 1 + } + return &KeyframeRing{buf: make([]Keyframe, c), cap: c} +} + +// Cap returns the ring's keyframe capacity (0 for a nil ring). +func (r *KeyframeRing) Cap() int { + if r == nil { + return 0 + } + return r.cap +} + +// Len returns the number of keyframes currently held. +func (r *KeyframeRing) Len() int { + if r == nil { + return 0 + } + return r.size +} + +// Bytes is the approximate resident size of the held keyframes. +func (r *KeyframeRing) Bytes() int { + return r.Len() * KeyframeBytes +} + +// Push appends a keyframe. Callers are responsible for pushing in ascending +// step order (the TUI does, since it captures during forward execution). +func (r *KeyframeRing) Push(kf Keyframe) { + if r == nil || r.cap == 0 { + return + } + r.buf[r.head] = kf + r.head = (r.head + 1) % r.cap + if r.size < r.cap { + r.size++ + } +} + +// Nearest returns the latest keyframe whose Step is <= target, and true. When +// the ring is empty or every held keyframe is newer than target (target fell +// off the back of the reach window), it returns false. +func (r *KeyframeRing) Nearest(target uint64) (Keyframe, bool) { + if r == nil || r.size == 0 { + return Keyframe{}, false + } + // Entries run oldest..newest starting at (head - size). Scan newest-first + // and take the first with Step <= target. + for i := 0; i < r.size; i++ { + idx := (r.head - 1 - i + r.cap) % r.cap + if r.buf[idx].Step <= target { + return r.buf[idx], true + } + } + return Keyframe{}, false +} + +// Oldest returns the lowest step still reachable (the back of the window) and +// true, or (0,false) when empty. Used to report reach to the user. +func (r *KeyframeRing) Oldest() (uint64, bool) { + if r == nil || r.size == 0 { + return 0, false + } + idx := (r.head - r.size + r.cap) % r.cap + return r.buf[idx].Step, true +} + +// Reset drops all keyframes without freeing the backing buffer. +func (r *KeyframeRing) Reset() { + if r == nil { + return + } + r.head = 0 + r.size = 0 +} diff --git a/cpu/keyframe_test.go b/cpu/keyframe_test.go new file mode 100644 index 0000000..5c5e4bf --- /dev/null +++ b/cpu/keyframe_test.go @@ -0,0 +1,95 @@ +package cpu + +import "testing" + +func TestSnapshotFull_RoundTrip(t *testing.T) { + ram := NewRAM() + ram.EnableShadow() + c := New(ram) + for a := 0; a < 0x10000; a += 257 { + ram.Data[a] = byte(a) + } + c.A, c.X, c.PC = 0x11, 0x22, 0x9000 + + kf := c.SnapshotFull(ram) + if len(kf.Pages) != 256 { + t.Fatalf("SnapshotFull captured %d pages; want 256", len(kf.Pages)) + } + // Mutate everything, then restore. + for a := 0; a < 0x10000; a++ { + ram.Data[a] = 0xEE + } + c.A, c.X, c.PC = 0, 0, 0 + c.Restore(kf, ram) + if c.A != 0x11 || c.X != 0x22 || c.PC != 0x9000 { + t.Errorf("regs not restored: A=%02X X=%02X PC=%04X", c.A, c.X, c.PC) + } + for a := 0; a < 0x10000; a += 257 { + if ram.Data[a] != byte(a) { + t.Fatalf("RAM[%04X] = %02X; want %02X", a, ram.Data[a], byte(a)) + } + } +} + +func TestKeyframeRing_CapFromBudget(t *testing.T) { + if r := NewKeyframeRing(0); r != nil { + t.Error("zero budget should yield nil ring") + } + // 64 MiB / 64 KiB = 1024. + if r := NewKeyframeRing(64 << 20); r.Cap() != 1024 { + t.Errorf("cap = %d; want 1024", r.Cap()) + } + // Sub-keyframe budget still yields a 1-slot ring. + if r := NewKeyframeRing(100); r.Cap() != 1 { + t.Errorf("tiny budget cap = %d; want 1", r.Cap()) + } +} + +func TestKeyframeRing_NearestAndEviction(t *testing.T) { + r := NewKeyframeRing(3 * KeyframeBytes) // cap 3 + for _, step := range []uint64{0, 1000, 2000, 3000} { + r.Push(Keyframe{Step: step}) + } + // Cap 3 -> step 0 evicted; window is {1000,2000,3000}. + if old, _ := r.Oldest(); old != 1000 { + t.Errorf("oldest = %d; want 1000", old) + } + cases := []struct { + target uint64 + step uint64 + ok bool + }{ + {3500, 3000, true}, + {3000, 3000, true}, + {2999, 2000, true}, + {2000, 2000, true}, + {1000, 1000, true}, + {999, 0, false}, // older than the back of the window + } + for _, c := range cases { + kf, ok := r.Nearest(c.target) + if ok != c.ok || (ok && kf.Step != c.step) { + t.Errorf("Nearest(%d) = (%d,%v); want (%d,%v)", c.target, kf.Step, ok, c.step, c.ok) + } + } +} + +func TestKeyframeRing_Bytes(t *testing.T) { + r := NewKeyframeRing(10 * KeyframeBytes) + r.Push(Keyframe{Step: 0}) + r.Push(Keyframe{Step: 1}) + if got := r.Bytes(); got != 2*KeyframeBytes { + t.Errorf("Bytes = %d; want %d", got, 2*KeyframeBytes) + } +} + +func TestKeyframeRing_NilSafe(t *testing.T) { + var r *KeyframeRing + r.Push(Keyframe{}) + if r.Len() != 0 || r.Cap() != 0 || r.Bytes() != 0 { + t.Error("nil ring should report zero") + } + if _, ok := r.Nearest(5); ok { + t.Error("nil ring Nearest should be false") + } +} diff --git a/docs/context.md b/docs/context.md index 70bf6e2..1cb7011 100644 --- a/docs/context.md +++ b/docs/context.md @@ -143,6 +143,7 @@ Bus chain: `CPU → tui.WBus → cpu.MMIO → cpu.RAM` - #1, #2, #3, #7, #8 (cycle audit), #9 (65C02), #10 (IRQ/NMI), #11–#15 ### Merged PRs of note +- Deep rewind via keyframes (issue #392, v1.3.0): the per-step `SnapshotRing` only reaches back its capacity (256 steps) — fine for "step back one", useless for "rewind into the last few million steps". Added keyframe-based deep rewind: `cpu.KeyframeRing` holds periodic full-RAM snapshots (`CPU.SnapshotFull` captures all 256 pages; one keyframe every `keyframeInterval`=4096 steps), and `:rewind N` reconstructs any earlier step by restoring the nearest keyframe ≤ target (`KeyframeRing.Nearest`) and replaying forward to the exact step (`rewindToStep` → `stepReplay` loop under a `replayingRewind` guard so replay doesn't re-capture keyframes). Small jumps still pop the fine ring exactly. `:rewind-budget MB` resizes the ring (cap = budget/64KiB); reach = cap × interval, shown in the status bar as `deep:@`. **Note — the issue's own numbers are mutually inconsistent**: full 64 KiB keyframes every 1k steps can't reach 10M under 256 MiB (that's ~4M). Used interval 4096 instead so 256 MiB reaches ~16.7M while forward-replay stays ≤4096 instructions (benchmarked **1.3 ms** incl. replay, vs the 100 ms acceptance). Memory is a *cap* not a reservation — the ring only fills to the run length; the old "fixed 256-entry ring" already sat at ≤16 MiB so the issue's "ring grows" framing was off. A step-0 keyframe is seeded on the first step so sub-interval targets are reachable. `StepCount` tracks position; `<` and reset keep it in sync. Determinism caveat: forward replay assumes deterministic execution between keyframes (buffered keyboard input is snapshotted, so it replays). Deltas-from-previous-keyframe compression is a future optimisation. No state-format change (StepCount/keyframes are ephemeral). `cpu` ring logic unit-tested apart from the TUI; deep-rewind exactness verified byte-for-byte against a RAM-mutating loop ROM. - Trace replay — search / jump-to-cycle / diff (issue #391, v1.3.0): four navigation features on top of `-trace-replay` (issue #64's playback). (1) **`:find EXPR` / `:rfind EXPR`** — jump to the next/previous frame matching an expression over the frame's registers/flags, reusing the breakpoint-condition `expr` grammar against a scratch CPU loaded per frame (`framePredicate`). A bare `=` is normalised to `==` (`normalizeFindExpr`) so `:find PC=$8042` works as users type it; bare `:find` repeats the last expression to sweep matches. (2) **`:cycle N`** — `Replay.SeekCycle` binary-searches the monotonic cycle column (O(log N) on a 1M-frame trace). (3) **`-diff PATH`** — loads a second trace; `trace.Diff` walks both by index and returns the first `Frame.Equal` mismatch (or a length-mismatch divergence at the shorter trace's end) as `trace.Divergence{Index,Cycle,Found}`, computed eagerly in `WithReplayDiff` and surfaced in the status line. (4) **`d` / `D`** — `d` toggles a side-by-side diff overlay (`diffModal`, double-bordered like the help modal) centred on the primary cursor with mismatched frames in red + a `✗` gutter at the divergence; `D` jumps both cursors there. Pure-`trace` logic (SeekCycle/FindFunc/Diff/Frame.Equal) is unit-tested separately from the TUI wiring. No state-format change. - Watch panel array expansion (issue #390, v1.3.0): `:watch` learns an `xN` (or `[N]`) array token — `:watch grid word x16` pins 16 consecutive LE words and renders them as indexed rows `grid[0..15]` (header `[16]`, first `maxWatchElemRows`=8 shown, rest collapsed to `… +N more`). Element width = the watch's `byte`/`word` kind; addresses are `Addr + i*Width`. `symbols.Table` now parses the cc65 `sym size=` field (`Size(addr)`) and seeds the count automatically when present — but **the issue's premise was false**: cc65 V2.18 `.dbg` carries *no* struct member layout, array bounds, or element types. C globals get bare `sym ... type=lab` records with no `size=`; even local `csym` records collapse every type to `type id=0 val="00"` (void). So struct-tree expansion is impossible from `.dbg` and the auto-seed rarely fires for data globals — `xN` is the workhorse. Scoped to array-only best-effort per that finding; struct overlays + DAP `variables` array children deferred (DAP has no globals scope yet). New `Watch.Count` is an optional v1 state field (omitempty, no schema bump). Tests: `symbols` size parse, `:watch xN`/`[N]` parsing + element addressing, panel render + truncation. - Blargg `apu_test` 4/8 → 8/8 PASS — Mesen2 frame-counter substeps + DMC alignment (PRs #379-#382, nessy v0.10): wired Blargg's `apu_test.nes` (8 sub-tests: len_ctr, len_table, irq_flag, irq_timing, len_timing, irq_flag_timing, dmc_basics, dmc_rates) into the accuracy harness (#379) and closed every gap it surfaced over three follow-up PRs. (1) **6 internal frame-counter sub-steps** (#380) — Mesen2 `ApuFrameCounter.h:19` table encodes the user-visible 'step 3' of 4-step mode as 3 CPU cycles (29828, 29829, 29830) where IRQ asserts continuously and the half-frame tick fires at cycle 29829. chippy's 4-entry interval table from #377 fired the tick at 29828; replaced with `frameStepIntervalsNtsc4Step = [6]int{7456, 7458, 7457, 1, 1, 7457}` + 5-step analogue, switch in `advanceFrameStep` extended to 6 cases (step 3 = IRQ-only, step 4 = q+h+IRQ, step 5 = idle/reset for 4-step). Cleared 5-len_timing. (2) **DMC buffer-fill + enable-fetch + $4015 read** (#381) — three real-silicon DMC behaviors chippy was getting wrong: `maybeRefill` was silencing whenever `bufferEmpty=true` at the 8-bit boundary instead of only when `bytesRemaining=0` too; `setEnabled` didn't schedule the initial DMA fetch (Mesen `SetEnabled` does via `transferStartDelay`); $4015 read was clearing the DMC IRQ flag (per nesdev + Mesen `NesApu.cpp:101`, only frame-counter IRQ is cleared by $4015 read — DMC IRQ acks via $4015 write or $4010 bit-7 clear). dmcChannel now inits with `bufferEmpty=true`+`silenced=true`. Cleared 7-dmc_basics' 18 sub-tests. (3) **Mesen-aligned DMC Clock** (#382) — three compounding structural mismatches: chippy burned an extra 'reload-only' fire per byte (each byte = 9 fires instead of Mesen's 8), the timer reload was period+1 cycles between fires (429 vs Mesen's 428), and the fetch-schedule check only ran at byte boundaries. Replaced `clockShift`+`maybeRefill` with a unified `clock()` mirroring Mesen `DeltaModulationChannel::Run`'s inner body: always shift+decrement, reload at `bitsRemaining=0` boundary, schedule fetch on every clock when buffer-empty+bytes-pending. Initialise `bitsRemaining=8` (matches Mesen `Reset:36`). Cleared 8-dmc_rates' 16 rates × 2 boundary checks. **All four accuracy ROMs now PASS**: `ppu_vbl_nmi` 10/10, `instr_timing`, `cpu_interrupts_v2` 5/5, `apu_test` 8/8. No regression on nestest / Klaus / demo SHAs. The DMC restructure also fixes any ROM that uses delta samples — the rate timing was off by ~12% before. Refs #318 (rolling accuracy tracker). diff --git a/internal/tui/complete.go b/internal/tui/complete.go index 888638b..dd9a704 100644 --- a/internal/tui/complete.go +++ b/internal/tui/complete.go @@ -23,6 +23,7 @@ var defaultVerbs = func() []string { "syms", "symbols", "mem", "find", "rfind", "cycle", + "rewind", "rewind-budget", "trace", "textsave", "theme", diff --git a/internal/tui/model.go b/internal/tui/model.go index 4adcff7..49e5dc8 100644 --- a/internal/tui/model.go +++ b/internal/tui/model.go @@ -213,6 +213,17 @@ type Model struct { // the runtime cost. Nil disables the feature; default cap is set in New. Rewind *rewindRing + // Deep rewind (issue #392). StepCount counts every executed step since + // the last reset. Keyframes holds periodic full-RAM snapshots (one every + // keyframeInterval steps) so `:rewind N` can reconstruct a state far + // beyond the fine ring's depth by restoring the nearest keyframe and + // replaying forward. RewindBudgetMB caps keyframe memory; the ring drops + // the oldest keyframe when full, so reach = budget/64KiB × interval. + StepCount uint64 + Keyframes *cpu.KeyframeRing + RewindBudgetMB int + replayingRewind bool // suppresses keyframe capture during forward replay + // Immediate window — a modal REPL over the chippy expression grammar. // `I` opens, Esc closes; while open, all keystrokes feed // updateImmediate. Each Enter compiles + evaluates the buffer against @@ -292,22 +303,24 @@ func New(c *cpu.CPU, r *cpu.RAM) Model { applyTheme(t) rewind := newRewindRing(defaultRewindCap) m := Model{ - CPU: c, - RAM: r, - Breakpoints: map[uint16]*Breakpoint{}, - MemBPs: map[uint16]*MemBP{}, - MemViewAddr: 0x0000, - Status: "ready", - TargetHz: 0, - DisasmFollow: true, - SourceFollow: true, - StackAnnotate: true, - HistIdx: -1, - RIMatchIdx: -1, - Rewind: rewind, - Theme: string(t), - W: 120, - H: 40, + CPU: c, + RAM: r, + Breakpoints: map[uint16]*Breakpoint{}, + MemBPs: map[uint16]*MemBP{}, + MemViewAddr: 0x0000, + Status: "ready", + TargetHz: 0, + DisasmFollow: true, + SourceFollow: true, + StackAnnotate: true, + HistIdx: -1, + RIMatchIdx: -1, + Rewind: rewind, + RewindBudgetMB: defaultRewindBudgetMB, + Keyframes: cpu.NewKeyframeRing(defaultRewindBudgetMB << 20), + Theme: string(t), + W: 120, + H: 40, } m.Source = NewLocalSource(c, r) return m @@ -704,6 +717,8 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { if m.Rewind != nil { m.Rewind.Reset() } + m.Keyframes.Reset() + m.StepCount = 0 m.Status = "reset" case "<": if m.TraceReplay != nil { @@ -719,6 +734,9 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { s, _ := m.Rewind.Pop() m.CPU.Restore(s, m.RAM) m.restoreperipherals(s) + if m.StepCount > 0 { + m.StepCount-- + } m.Status = fmt.Sprintf("rewind -> $%04X (depth %d)", m.CPU.PC, m.Rewind.Len()) case "I": m.ImmediateActive = true @@ -935,17 +953,50 @@ func (m *Model) step() int { defer m.CPUMu.Unlock() } if m.Rewind == nil { - return m.Source.Step() + n := m.Source.Step() + m.StepCount++ + return n } + m.seedKeyframe() snap := m.CPU.Snapshot(m.RAM) m.captureperipherals(&snap) m.RAM.ResetShadow() n := m.Source.Step() snap.Pages = m.RAM.TakeShadow() m.Rewind.Push(snap) + m.StepCount++ + m.maybeKeyframe() return n } +// seedKeyframe captures the step-0 keyframe — the machine state before the +// very first step — so deep rewinds to any target below the first interval +// boundary have a base to replay forward from. Runs once per run (guarded by +// StepCount==0) and never during replay. +func (m *Model) seedKeyframe() { + if m.Keyframes == nil || m.replayingRewind || m.StepCount != 0 || m.Keyframes.Len() > 0 { + return + } + kf := cpu.Keyframe{Step: 0, Snap: m.CPU.SnapshotFull(m.RAM)} + m.captureperipherals(&kf.Snap) + m.Keyframes.Push(kf) +} + +// maybeKeyframe captures a full-RAM keyframe at every keyframeInterval-th +// step so `:rewind` can reach far past the fine ring. Skipped while replaying +// (the keyframes for that span already exist) and when deep rewind is off. +func (m *Model) maybeKeyframe() { + if m.Keyframes == nil || m.replayingRewind { + return + } + if m.StepCount%keyframeInterval != 0 { + return + } + kf := cpu.Keyframe{Step: m.StepCount, Snap: m.CPU.SnapshotFull(m.RAM)} + m.captureperipherals(&kf.Snap) + m.Keyframes.Push(kf) +} + // captureperipherals fills the snapshot's Peripherals map with the // current state of every wired MMIO device. Keys are the peripheral's // base MMIO address as `"$XXXX"` so restore can route bytes back to @@ -1333,6 +1384,9 @@ func (m Model) View() string { rewindSeg := "" if d := m.Rewind.Len(); d > 0 { rewindSeg = fmt.Sprintf(" │ rwd:%d", d) + if m.Keyframes.Len() > 0 { + rewindSeg += fmt.Sprintf(" deep:%s@%dMiB", humanCount(m.rewindReachSteps()), m.RewindBudgetMB) + } } statusText := fmt.Sprintf( " %s │ cyc=%d │ PC=$%04X │ %s%s │ [?] help [:] cmd [s/n] step [r] run [<] back [v] src [q] quit", @@ -1418,6 +1472,8 @@ func helpPages() [][]helpSection { {"n", "step over (run JSR to RTS)"}, {"f", "run to next source line"}, {"<", "rewind one step (snapshot ring; depth shown as `rwd:N`)"}, + {":rewind N", "rewind N steps (keyframe replay for deep jumps)"}, + {":rewind-budget MB", "cap keyframe memory; sets deep-rewind reach"}, {"r", "run / pause"}, {"R", "reset CPU"}, {"b", "toggle breakpoint at PC"}, diff --git a/internal/tui/prompt.go b/internal/tui/prompt.go index c50f459..a1270a0 100644 --- a/internal/tui/prompt.go +++ b/internal/tui/prompt.go @@ -259,6 +259,10 @@ func (m *Model) runCommand(line string) string { return m.cmdFind(args, -1) case "cycle": return m.cmdCycle(args) + case "rewind": + return m.cmdRewind(args) + case "rewind-budget": + return m.cmdRewindBudget(args) case "watch", "w": if len(args) == 0 { return "usage: :watch $XXXX [byte|word] [label] | :watch reg [label]" diff --git a/internal/tui/rewind.go b/internal/tui/rewind.go index 9526bb3..688f8e8 100644 --- a/internal/tui/rewind.go +++ b/internal/tui/rewind.go @@ -9,6 +9,23 @@ import "github.com/nkane/chippy/cpu" const defaultRewindCap = cpu.DefaultSnapshotRingCap +// Deep-rewind tuning (issue #392). +// +// keyframeInterval — steps between full-RAM keyframes. Forward-replay after +// a deep rewind costs at most this many steps, so it bounds latency; a +// larger value buys more reach per byte of budget. 4096 6502 instructions +// replay in well under a millisecond, far inside the 100 ms target. +// defaultRewindBudgetMB — cap on keyframe memory. Reach in steps is +// (budgetMB·1MiB / 64KiB) · interval. At 128 MiB that's 2048 · 4096 ≈ +// 8.4M steps; raise with `:rewind-budget` (256 MiB ≈ 16.7M). Memory is a +// cap, not a reservation — a short run holds only the keyframes it made. +const keyframeInterval uint64 = 4096 + +const ( + defaultRewindBudgetMB = 128 + maxRewindBudgetMB = 1024 +) + type rewindRing = cpu.SnapshotRing func newRewindRing(cap int) *rewindRing { diff --git a/internal/tui/rewind_deep.go b/internal/tui/rewind_deep.go new file mode 100644 index 0000000..1f6d802 --- /dev/null +++ b/internal/tui/rewind_deep.go @@ -0,0 +1,140 @@ +package tui + +import ( + "fmt" + "strconv" + + "github.com/nkane/chippy/cpu" +) + +// cmdRewind handles `:rewind N` — step the machine back N executed steps. +// Small jumps that still fit in the fine ring pop exact per-step deltas; +// larger jumps restore the nearest keyframe and replay forward to the exact +// target (issue #392). +func (m *Model) cmdRewind(args []string) string { + if m.Rewind == nil { + return "rewind: disabled" + } + if len(args) == 0 { + return "usage: :rewind N (steps back; see :rewind-budget)" + } + n, err := strconv.ParseUint(args[0], 10, 64) + if err != nil || n == 0 { + return fmt.Sprintf("rewind: bad count %q", args[0]) + } + if n > m.StepCount { + n = m.StepCount + } + target := m.StepCount - n + return m.rewindToStep(target) +} + +// rewindToStep moves execution back to the given absolute step index, +// updating the CPU, RAM, peripherals, StepCount, and the fine ring. Returns a +// status string. +func (m *Model) rewindToStep(target uint64) string { + if target >= m.StepCount { + return "rewind: already at or before that step" + } + delta := m.StepCount - target + + // Fast path: the fine ring still holds every step back to the target. + if delta <= uint64(m.Rewind.Len()) { + for i := uint64(0); i < delta; i++ { + s, ok := m.Rewind.Pop() + if !ok { + break + } + m.CPU.Restore(s, m.RAM) + m.restoreperipherals(s) + } + m.StepCount = target + return fmt.Sprintf("rewind -> step %d ($%04X)", m.StepCount, m.CPU.PC) + } + + // Deep path: restore the nearest keyframe at/before target, then replay + // forward the remainder. + kf, ok := m.Keyframes.Nearest(target) + if !ok { + oldest := uint64(0) + if o, has := m.Keyframes.Oldest(); has { + oldest = o + } + return fmt.Sprintf("rewind: step %d beyond reach (oldest = %d; raise :rewind-budget)", + target, oldest) + } + m.CPU.Restore(kf.Snap, m.RAM) + m.restoreperipherals(kf.Snap) + m.StepCount = kf.Step + m.Rewind.Reset() + + m.replayingRewind = true + for m.StepCount < target { + m.stepReplay() + } + m.replayingRewind = false + return fmt.Sprintf("rewind -> step %d ($%04X, replayed %d from keyframe %d)", + m.StepCount, m.CPU.PC, target-kf.Step, kf.Step) +} + +// stepReplay advances one step during forward replay: it captures fine-ring +// deltas (so `<` works after landing) but skips keyframe capture and the DAP +// mutex dance — replay is always synchronous within a key handler. +func (m *Model) stepReplay() { + snap := m.CPU.Snapshot(m.RAM) + m.captureperipherals(&snap) + m.RAM.ResetShadow() + m.Source.Step() + snap.Pages = m.RAM.TakeShadow() + m.Rewind.Push(snap) + m.StepCount++ +} + +// cmdRewindBudget handles `:rewind-budget MB` — resize the keyframe memory +// cap. Rebuilding the ring drops existing keyframes (reach restarts from the +// next keyframe), so it reports the new reach. +func (m *Model) cmdRewindBudget(args []string) string { + if len(args) == 0 { + return fmt.Sprintf("rewind-budget = %d MiB (%s); usage: :rewind-budget MB", + m.RewindBudgetMB, m.rewindReachLabel()) + } + mb, err := strconv.Atoi(args[0]) + if err != nil || mb < 1 { + return fmt.Sprintf("rewind-budget: bad value %q (1-%d MiB)", args[0], maxRewindBudgetMB) + } + if mb > maxRewindBudgetMB { + mb = maxRewindBudgetMB + } + m.RewindBudgetMB = mb + m.Keyframes = cpu.NewKeyframeRing(mb << 20) + m.seedKeyframe() + return fmt.Sprintf("rewind-budget = %d MiB — reach ≈ %s steps, %d keyframes", + mb, humanCount(m.rewindReachSteps()), m.Keyframes.Cap()) +} + +// rewindReachSteps is the maximum number of steps back a deep rewind can +// currently reach: ring capacity × keyframe interval. +func (m *Model) rewindReachSteps() uint64 { + if m.Keyframes == nil { + return uint64(m.Rewind.Len()) + } + return uint64(m.Keyframes.Cap()) * keyframeInterval +} + +// rewindReachLabel summarises current deep-rewind state for the status line. +func (m *Model) rewindReachLabel() string { + return fmt.Sprintf("reach %s, %d/%d KiB-frames", + humanCount(m.rewindReachSteps()), m.Keyframes.Len(), m.Keyframes.Cap()) +} + +// humanCount renders a step count compactly (1234567 -> "1.2M"). +func humanCount(n uint64) string { + switch { + case n >= 1_000_000: + return fmt.Sprintf("%.1fM", float64(n)/1_000_000) + case n >= 1_000: + return fmt.Sprintf("%.1fk", float64(n)/1_000) + default: + return strconv.FormatUint(n, 10) + } +} diff --git a/internal/tui/rewind_deep_test.go b/internal/tui/rewind_deep_test.go new file mode 100644 index 0000000..1f6310d --- /dev/null +++ b/internal/tui/rewind_deep_test.go @@ -0,0 +1,189 @@ +package tui + +import ( + "strings" + "testing" + + "github.com/nkane/chippy/cpu" +) + +// newLoopModel loads a deterministic program that mutates RAM every step so +// rewinds can be verified byte-for-byte: +// +// $8000 EE 00 02 INC $0200 +// $8003 E8 INX +// $8004 4C 00 80 JMP $8000 +// +// Each loop bumps $0200 and X (both mod 256), so every step changes both a +// register and a RAM cell — exactly what a rewind must restore. +func newLoopModel() Model { + ram := cpu.NewRAM() + ram.EnableShadow() + c := cpu.New(ram) + prog := []byte{0xEE, 0x00, 0x02, 0xE8, 0x4C, 0x00, 0x80} + ram.Load(0x8000, prog) + c.PC = 0x8000 + return New(c, ram) +} + +type machineState struct { + step uint64 + pc uint16 + a, x byte + cell byte // $0200 +} + +func (m *Model) snapState() machineState { + return machineState{ + step: m.StepCount, + pc: m.CPU.PC, + a: m.CPU.A, + x: m.CPU.X, + cell: m.RAM.Read(0x0200), + } +} + +func runSteps(m *Model, n int) { + for i := 0; i < n; i++ { + m.step() + } +} + +func assertState(t *testing.T, got, want machineState) { + t.Helper() + if got != want { + t.Fatalf("state mismatch:\n got %+v\n want %+v", got, want) + } +} + +func TestDeepRewind_Exact(t *testing.T) { + m := newLoopModel() + runSteps(&m, 4000) + want := m.snapState() // remember step 4000 + + // Run well past the fine ring (256) so the rewind must take the deep + // keyframe-replay path. + runSteps(&m, 1500) // now at step 5500 + if m.StepCount != 5500 { + t.Fatalf("StepCount = %d; want 5500", m.StepCount) + } + + out := m.cmdRewind([]string{"1500"}) + if !strings.Contains(out, "replayed") { + t.Errorf("expected deep-path status, got %q", out) + } + assertState(t, m.snapState(), want) +} + +func TestDeepRewind_FastPathWithinFineRing(t *testing.T) { + m := newLoopModel() + runSteps(&m, 500) + want := m.snapState() + runSteps(&m, 100) // 100 <= fine ring cap (256) + out := m.cmdRewind([]string{"100"}) + if strings.Contains(out, "replayed") { + t.Errorf("100-step rewind should use fast path, got %q", out) + } + assertState(t, m.snapState(), want) +} + +func TestDeepRewind_ContinuityAfterLanding(t *testing.T) { + // After a deep rewind, `<` (fine-ring pop) must still work, and stepping + // forward again must reproduce the original trajectory. + m := newLoopModel() + runSteps(&m, 3000) + atStep3000 := m.snapState() + runSteps(&m, 2000) // step 5000 + m.cmdRewind([]string{"2000"}) + assertState(t, m.snapState(), atStep3000) + + // Fine ring repopulated by replay -> one more rewind step works. + beforePC := m.CPU.PC + s, ok := m.Rewind.Pop() + if !ok { + t.Fatal("fine ring empty after deep rewind replay") + } + m.CPU.Restore(s, m.RAM) + if m.CPU.PC == beforePC && beforePC != 0x8000 { + t.Errorf("pop did not change PC") + } +} + +func TestRewindBudget_Resize(t *testing.T) { + m := newLoopModel() + if m.RewindBudgetMB != defaultRewindBudgetMB { + t.Fatalf("default budget = %d; want %d", m.RewindBudgetMB, defaultRewindBudgetMB) + } + out := m.cmdRewindBudget([]string{"256"}) + if m.RewindBudgetMB != 256 { + t.Errorf("budget = %d; want 256", m.RewindBudgetMB) + } + // 256 MiB / 64 KiB = 4096 keyframes. + if m.Keyframes.Cap() != 4096 { + t.Errorf("cap = %d; want 4096", m.Keyframes.Cap()) + } + if !strings.Contains(out, "256 MiB") { + t.Errorf("status %q missing budget", out) + } + // Clamp + reject. + m.cmdRewindBudget([]string{"99999"}) + if m.RewindBudgetMB != maxRewindBudgetMB { + t.Errorf("over-max budget = %d; want clamp %d", m.RewindBudgetMB, maxRewindBudgetMB) + } + if out := m.cmdRewindBudget([]string{"0"}); !strings.Contains(out, "bad value") { + t.Errorf("zero budget = %q; want bad value", out) + } +} + +func TestRewind_BeyondReach(t *testing.T) { + m := newLoopModel() + // Tiny budget -> 1 keyframe slot. After many steps the seed keyframe at + // step 0 is evicted by later keyframes, so an old target is unreachable. + m.cmdRewindBudget([]string{"1"}) // 1 MiB -> 16 keyframes + runSteps(&m, 200000) // many keyframes; step 0 long evicted + out := m.cmdRewind([]string{"199000"}) + if !strings.Contains(out, "beyond reach") { + t.Errorf("expected beyond-reach, got %q", out) + } +} + +func TestRewind_Reset(t *testing.T) { + m := newLoopModel() + runSteps(&m, 5000) + if m.Keyframes.Len() == 0 || m.StepCount == 0 { + t.Fatal("expected keyframes + step count before reset") + } + m.Keyframes.Reset() + m.StepCount = 0 + if m.Keyframes.Len() != 0 || m.StepCount != 0 { + t.Error("reset did not clear deep-rewind state") + } +} + +// BenchmarkDeepRewind measures a worst-case deep rewind: a full +// keyframe-interval forward replay. Acceptance (#392) is <100 ms; on the +// cycle-accurate core a 4096-instruction replay runs in well under 1 ms. +func BenchmarkDeepRewind(b *testing.B) { + m := newLoopModel() + runSteps(&m, 20000) + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Jump back a full interval (forces keyframe restore + replay), then + // forward again so the next iteration repeats the same work. + m.rewindToStep(m.StepCount - keyframeInterval) + runSteps(&m, int(keyframeInterval)) + } +} + +func TestHumanCount(t *testing.T) { + cases := map[uint64]string{ + 500: "500", + 1500: "1.5k", + 2_400_000: "2.4M", + } + for n, want := range cases { + if got := humanCount(n); got != want { + t.Errorf("humanCount(%d) = %q; want %q", n, got, want) + } + } +}