diff --git a/.prettierignore b/.prettierignore index 4be88241..9423dcdf 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,5 +1,6 @@ # Generated — never hand-edit; regenerated by `npm run api` / sqlc / openapi-typescript frontend/src/api/schema.ts +frontend/src/renderer/routeTree.gen.ts backend/internal/httpd/apispec/openapi.yaml # Build outputs @@ -12,6 +13,7 @@ frontend/playwright-report # Lockfiles package-lock.json frontend/package-lock.json +frontend/pnpm-lock.yaml # Go uses gofmt, not Prettier backend/ diff --git a/docs/plans/session-lifecycle-persistence.md b/docs/plans/session-lifecycle-persistence.md index 7dc83393..ea4dee4a 100644 --- a/docs/plans/session-lifecycle-persistence.md +++ b/docs/plans/session-lifecycle-persistence.md @@ -19,7 +19,7 @@ save/restore middle. 2. **The "last-stop manifest" is the existing SQLite state, not a new file.** `ListAllSessions` already records id, kind (worker/orchestrator), harness, `is_terminated`, and `Metadata{branch, workspacePath, agentSessionId, - prompt}`. The `session_worktrees` table already has a `preserved_ref` column +prompt}`. The `session_worktrees` table already has a `preserved_ref` column (migration 0009) that nothing currently writes. No manifest.json, no new migration, no new format. The manifest is a query. 3. **Uncommitted work is captured as a git commit object pointed to by a ref** @@ -29,7 +29,7 @@ save/restore middle. the stable key the rest of the system already uses. 4. **Untracked files: respect `.gitignore`.** Build the preserve commit through a temp index (`GIT_INDEX_FILE= git add -A; git write-tree; git - commit-tree`) so tracked + staged + new (non-ignored) files are captured, +commit-tree`) so tracked + staged + new (non-ignored) files are captured, side-effect-free, without mutating the working tree or the stash stack. Ignored paths (`node_modules/`, build output, ignored `.env`) are skipped. Log a one-line count of skipped ignored paths so it is never silent. (Chosen @@ -84,6 +84,7 @@ save/restore middle. ## Tasks (smallest coherent diff first; each ends with ONE runnable check) ### Task 1 — `ForceDestroy` on the workspace port + gitworktree adapter + Add `ForceDestroy(ctx, info) error` to the `ports.Workspace` interface and the gitworktree adapter. It runs `git worktree remove --force `, then prune, then `os.RemoveAll` as a backstop. New arg builder in `commands.go`; leave the @@ -93,6 +94,7 @@ comment that ForceDestroy is only safe after the work is captured. `ForceDestroy`, and asserts the path is gone and the worktree is deregistered. ### Task 2 — `StashUncommitted` + `ApplyPreserved` on the gitworktree adapter + - `StashUncommitted(ctx, info) (ref string, err error)`: build the preserve commit via a temp index that respects `.gitignore` (`GIT_INDEX_FILE= git add -A` → `git write-tree` → `git commit-tree`), @@ -104,11 +106,12 @@ comment that ForceDestroy is only safe after the work is captured. the commit). On clean success delete the ref (`git update-ref -d`); on conflict, keep the ref, leave conflict markers, return a sentinel the caller logs. -**Check:** Go test that round-trips a tracked edit AND a new non-ignored file -through StashUncommitted → ForceDestroy → re-add → ApplyPreserved and asserts -both reappear; and that a path matched by `.gitignore` does NOT reappear. + **Check:** Go test that round-trips a tracked edit AND a new non-ignored file + through StashUncommitted → ForceDestroy → re-add → ApplyPreserved and asserts + both reappear; and that a path matched by `.gitignore` does NOT reappear. ### Task 3 — `SaveAndTeardownAll` + `RestoreAll` on the session manager + - `SaveAndTeardownAll(ctx)`: `ListAllSessions`; for each live (non-terminated) session with a non-empty `Metadata.WorkspacePath`: `StashUncommitted` → `UpsertSessionWorktree(preserved_ref=...)` (commit) → `MarkTerminated` @@ -127,24 +130,26 @@ both reappear; and that a path matched by `.gitignore` does NOT reappear. gate on `preserved_ref` being non-empty: a clean worktree at shutdown writes a row with an empty `preserved_ref` and must still be restored. No new column is needed (consistent with Task 6 leaving `state` alone). -**Check:** Go test with fakes asserting (a) save calls capture-then-force in -order and writes preserved_ref before ForceDestroy, (b) RestoreAll restores BOTH -a worker and an orchestrator, (c) a session the user killed before shutdown is -not resurrected. + **Check:** Go test with fakes asserting (a) save calls capture-then-force in + order and writes preserved_ref before ForceDestroy, (b) RestoreAll restores BOTH + a worker and an orchestrator, (c) a session the user killed before shutdown is + not resurrected. ### Task 4 — Wire into daemon boot/shutdown (`daemon.go`) + - After `startSession` returns and before `srv.Run(ctx)`: call `RestoreAll` (best-effort; log failures; never block boot). - After `srv.Run(ctx)` returns and before the store closes: call `SaveAndTeardownAll` with a fresh bounded context (not the cancelled `ctx`). - Expose the manager (or a minimal `LifecycleSaver`/`LifecycleRestorer` seam) from the wiring up to `Run`. -**Check:** Manual run documented in report — spawn a session, edit a tracked -file + add a new file, `POST /shutdown`; assert worktree removed and -`refs/ao/preserved/` exists; restart daemon; assert worktree re-created and -both edits reapplied. Plus `go build ./backend/...` green. + **Check:** Manual run documented in report — spawn a session, edit a tracked + file + add a new file, `POST /shutdown`; assert worktree removed and + `refs/ao/preserved/` exists; restart daemon; assert worktree re-created and + both edits reapplied. Plus `go build ./backend/...` green. ### Task 5 — Frontend: call `/shutdown` before kill (`main.ts`) + In `before-quit`: `event.preventDefault()` once, `await fetch( http://127.0.0.1:/shutdown, {method:'POST'})` with an ~8s bounded timeout (port from the running.json the app already reads), then `killDaemon` + @@ -153,6 +158,7 @@ http://127.0.0.1:/shutdown, {method:'POST'})` with an ~8s bounded timeout log shows the save ran and exited cleanly (not just SIGTERM-killed). ### Task 6 — Trim the over-built `session_worktrees.state` enum usage + No schema change. Ensure the save/restore code reads/writes only `preserved_ref` and leaves `state` at its default; add `ponytail:` comments noting the enum is unused multi-repo scaffolding. @@ -208,6 +214,5 @@ endpoint. No new file, migration, format, or endpoint. ## Execution order Tasks are sequential where coupled: Task 2 shares the gitworktree adapter with -Task 1 (do 1 then 2, same package); Task 3 depends on 1 + 2; Task 4 depends on -3. Task 5 (frontend) and Task 6 (storage cleanup) are independent and can run +Task 1 (do 1 then 2, same package); Task 3 depends on 1 + 2; Task 4 depends on 3. Task 5 (frontend) and Task 6 (storage cleanup) are independent and can run anytime. Suggested order: 1 → 2 → 3 → 4, then 5 and 6. diff --git a/docs/superpowers/plans/2026-06-24-crash-proof-session-reconcile.md b/docs/superpowers/plans/2026-06-24-crash-proof-session-reconcile.md index 1ee9bc3b..b0afb665 100644 --- a/docs/superpowers/plans/2026-06-24-crash-proof-session-reconcile.md +++ b/docs/superpowers/plans/2026-06-24-crash-proof-session-reconcile.md @@ -35,10 +35,12 @@ ## Task 1: Widen `runtimeController` with `IsAlive` and adopt-alive live pass **Files:** + - Modify: `backend/internal/session_manager/manager.go:64-67` (interface), add methods near `manager.go:558-623` - Test: `backend/internal/session_manager/manager_test.go:138-152` (fake), new test fn **Interfaces:** + - Consumes: `domain.SessionRecord` (`.IsTerminated`, `.Metadata.WorkspacePath`, `.Metadata.Branch`, `.Metadata.RuntimeHandleID`); `runtimeHandle(meta)` -> `ports.RuntimeHandle`; `workspaceInfo(rec)` -> `ports.WorkspaceInfo`; `m.workspace.StashUncommitted`, `m.lcm.MarkTerminated`, `m.store.ListAllSessions`. - Produces: `func (m *Manager) reconcileLive(ctx context.Context, rec domain.SessionRecord) error`; widened `runtimeController` with `IsAlive(ctx context.Context, handle ports.RuntimeHandle) (bool, error)`. @@ -210,10 +212,12 @@ git -c user.email=dev@theharshitsingh.com commit -m "feat(session): reconcile li ## Task 2: Reap pass and the `Reconcile` entry point **Files:** + - Modify: `backend/internal/session_manager/manager.go` (add `reconcileReap`, `Reconcile`; the latter reuses the existing `RestoreAll` body) - Test: `backend/internal/session_manager/manager_test.go` **Interfaces:** + - Consumes: `m.store.ListAllSessions`, `m.runtime.IsAlive`, `m.runtime.Destroy`, `reconcileLive` (Task 1), the existing `RestoreAll` method (`manager.go:637`). - Produces: `func (m *Manager) Reconcile(ctx context.Context) error`; `func (m *Manager) reconcileReap(ctx context.Context, rec domain.SessionRecord) error`. @@ -334,7 +338,7 @@ func (m *Manager) Reconcile(ctx context.Context) error { } ``` -> Note: the live pass re-reads `rec.IsTerminated` from the pre-pass snapshot, so a session terminated *by* the live pass is not also reaped in the same run. That is fine: its tmux is already gone (that is why it was terminated), so reaping would be a no-op anyway. +> Note: the live pass re-reads `rec.IsTerminated` from the pre-pass snapshot, so a session terminated _by_ the live pass is not also reaped in the same run. That is fine: its tmux is already gone (that is why it was terminated), so reaping would be a no-op anyway. - [ ] **Step 4: Run the tests, verify they pass** @@ -354,11 +358,13 @@ git -c user.email=dev@theharshitsingh.com commit -m "feat(session): reconcile re ## Task 3: Wire `Reconcile` into daemon boot **Files:** + - Modify: `backend/internal/daemon/lifecycle_wiring.go:64-67` (interface) - Modify: `backend/internal/daemon/daemon.go:144-149` (boot call) - Test: `backend/internal/daemon/wiring_test.go` **Interfaces:** + - Consumes: `Manager.Reconcile` (Task 2). - Produces: `sessionLifecycle` interface gains `Reconcile(ctx context.Context) error`. @@ -429,9 +435,11 @@ git -c user.email=dev@theharshitsingh.com commit -m "feat(daemon): run Reconcile ## Task 4: Integration test over the sqlite store **Files:** + - Modify: `backend/internal/integration/lifecycle_sqlite_test.go` **Interfaces:** + - Consumes: the real `Manager.Reconcile`, a real sqlite store, and the test's runtime fake (find how this file already fakes the runtime; reuse it, scripting `IsAlive` per handle). - [ ] **Step 1: Read the existing integration harness** @@ -498,10 +506,12 @@ git -c user.email=dev@theharshitsingh.com commit -m "test(integration): reconcil ## Task 5: Frontend wedged-orphan kill+replace branch **Files:** + - Modify: `frontend/src/main.ts` (in `startDaemonInner`, around lines 457-495) - Test: `frontend/src/main.test.ts` or the existing main-process test file **Interfaces:** + - Consumes: existing `inspectExistingDaemon`, `resolveDaemonFromPort`, `readDaemonProbe`, `killDaemon`, `parseRunFile`/`defaultRunFilePath`, `expectedDaemonPort`. - Produces: a pure decision helper, e.g. `function planDaemonTakeover(probe: DaemonProbe | null): "reuse" | "replace"`, unit-testable without spawning. @@ -553,22 +563,26 @@ export function planDaemonTakeover(probe: DaemonProbe | null): "reuse" | "replac Then, in `startDaemonInner`, after the existing `inspectExistingDaemon` + `resolveDaemonFromPort` attach attempts fail (i.e. just before `spawn`), add: probe the expected port; if something answers but is unhealthy, SIGTERM the holder via the run-file PID and wait for the port to free before spawning. Concretely, before the `spawn(...)` at line 505: ```ts - // A process may hold the port without being a healthy daemon we can attach to - // (wedged orphan from a crash, or a PID-dead-but-port-held run-file). Spawning - // then would make the Go child collide and exit 1. Detect it and clear it. - const holderProbe = await readDaemonProbe(expectedDaemonPort(process.env)); - if (planDaemonTakeover(holderProbe) === "replace" && holderProbe) { - const runFile = parseRunFile(await readRunFileSafe(defaultRunFilePath())); - if (runFile?.pid) { +// A process may hold the port without being a healthy daemon we can attach to +// (wedged orphan from a crash, or a PID-dead-but-port-held run-file). Spawning +// then would make the Go child collide and exit 1. Detect it and clear it. +const holderProbe = await readDaemonProbe(expectedDaemonPort(process.env)); +if (planDaemonTakeover(holderProbe) === "replace" && holderProbe) { + const runFile = parseRunFile(await readRunFileSafe(defaultRunFilePath())); + if (runFile?.pid) { + try { + process.kill(-runFile.pid, "SIGTERM"); + } catch { try { - process.kill(-runFile.pid, "SIGTERM"); + process.kill(runFile.pid, "SIGTERM"); } catch { - try { process.kill(runFile.pid, "SIGTERM"); } catch { /* already gone */ } + /* already gone */ } } - await waitForPortFree(expectedDaemonPort(process.env), 8_000); - await rmRunFileSafe(defaultRunFilePath()); } + await waitForPortFree(expectedDaemonPort(process.env), 8_000); + await rmRunFileSafe(defaultRunFilePath()); +} ``` > Use the file's existing run-file read/parse helpers (`parseRunFile`, `defaultRunFilePath`). If `readRunFileSafe`/`rmRunFileSafe`/`waitForPortFree` do not exist, add small local helpers: `readRunFileSafe` wraps `fs.readFile` returning `""` on ENOENT; `rmRunFileSafe` wraps `fs.rm` ignoring ENOENT; `waitForPortFree` polls `readDaemonProbe` until it returns null or the timeout elapses. Keep each to a few lines, matching the file's existing async style. diff --git a/docs/superpowers/plans/2026-06-24-restore-recreate-orchestrator.md b/docs/superpowers/plans/2026-06-24-restore-recreate-orchestrator.md index 6811fa9f..55bf8293 100644 --- a/docs/superpowers/plans/2026-06-24-restore-recreate-orchestrator.md +++ b/docs/superpowers/plans/2026-06-24-restore-recreate-orchestrator.md @@ -22,11 +22,13 @@ ### Task 1: Typed error for un-resumable restore (fixes the 500) **Files:** + - Modify: `backend/internal/session_manager/manager.go` (sentinel near line 25; the "nothing to resume from" return at line 480) - Modify: `backend/internal/service/session/service.go` (`toAPIError`, near line 450) - Test: `backend/internal/service/session/service_test.go` (new test for the mapping) **Interfaces:** + - Produces: `sessionmanager.ErrNotResumable` (a sentinel `error`), and the wire contract `409` with code `SESSION_NOT_RESUMABLE` from `POST /api/v1/sessions/{id}/restore` when a terminated session has neither `agent_session_id` nor `prompt`. Task 2 (frontend) consumes the `SESSION_NOT_RESUMABLE` code. - [ ] **Step 1: Write the failing test** @@ -117,12 +119,14 @@ Co-Authored-By: Claude Opus 4.8 " ### Task 2: Restore-unavailable popup + recreate via existing orchestrator endpoint **Files:** + - Modify: `frontend/src/renderer/lib/spawn-orchestrator.ts` (optional `clean` param) - Create: `frontend/src/renderer/components/RestoreUnavailableDialog.tsx` (the popup) - Modify: `frontend/src/renderer/components/TerminalPane.tsx` (route `SESSION_NOT_RESUMABLE` to the dialog) - Test: `frontend/src/renderer/lib/spawn-orchestrator.test.ts` (new; clean param) **Interfaces:** + - Consumes from Task 1: the restore response error envelope `{ code: "SESSION_NOT_RESUMABLE", message, ... }`. - Consumes existing: `spawnOrchestrator(projectId, clean?)` (extended here), `isOrchestrator(session)` from `frontend/src/renderer/types/workspace.ts`, `apiClient`/`apiErrorMessage` from `lib/api-client`, `workspaceQueryKey` already imported in `TerminalPane.tsx`. - Produces: `RestoreUnavailableDialog` React component with props `{ open: boolean; session: SessionView; onOpenChange: (open: boolean) => void; onRecreated: (newOrchestratorId: string) => void }`. @@ -251,9 +255,7 @@ export function RestoreUnavailableDialog({ open, session, onOpenChange, onRecrea - - Session can no longer be restored - + Session can no longer be restored {orchestrator ? "This orchestrator has no saved agent session to resume. You can create a new orchestrator on the same branch; its committed work is preserved and the old worktree is cleaned." @@ -287,45 +289,47 @@ In `frontend/src/renderer/components/TerminalPane.tsx`, add state and a dialog m Add state near the other `useState` hooks in `AttachedTerminal`: ```tsx - const [restoreUnavailable, setRestoreUnavailable] = useState(false); +const [restoreUnavailable, setRestoreUnavailable] = useState(false); ``` Replace the `catch`/error handling inside `restoreSession` so a `SESSION_NOT_RESUMABLE` code opens the dialog instead of setting the inline error. The `restoreError` returned by `apiClient.POST` is the parsed error envelope, so read its `code`: ```tsx - try { - const { error: restoreError } = await apiClient.POST("/api/v1/sessions/{sessionId}/restore", { - params: { path: { sessionId: session.id } }, - }); - if (restoreError) { - const code = (restoreError as { code?: string }).code; - if (code === "SESSION_NOT_RESUMABLE") { - setRestoreUnavailable(true); - return; - } - throw new Error(apiErrorMessage(restoreError, "Unable to restore session")); - } - await queryClient.invalidateQueries({ queryKey: workspaceQueryKey }); - } catch (err) { - setRestoreError(err instanceof Error ? err.message : "Unable to restore session"); - } finally { - setIsRestoring(false); +try { + const { error: restoreError } = await apiClient.POST("/api/v1/sessions/{sessionId}/restore", { + params: { path: { sessionId: session.id } }, + }); + if (restoreError) { + const code = (restoreError as { code?: string }).code; + if (code === "SESSION_NOT_RESUMABLE") { + setRestoreUnavailable(true); + return; } + throw new Error(apiErrorMessage(restoreError, "Unable to restore session")); + } + await queryClient.invalidateQueries({ queryKey: workspaceQueryKey }); +} catch (err) { + setRestoreError(err instanceof Error ? err.message : "Unable to restore session"); +} finally { + setIsRestoring(false); +} ``` Mount the dialog inside the component's returned JSX (e.g. just before the closing tag of the root `div` in `AttachedTerminal`, alongside the other absolutely-positioned children): ```tsx - {session && ( - { - await queryClient.invalidateQueries({ queryKey: workspaceQueryKey }); - }} - /> - )} +{ + session && ( + { + await queryClient.invalidateQueries({ queryKey: workspaceQueryKey }); + }} + /> + ); +} ``` Add the import at the top of the file: diff --git a/docs/superpowers/specs/2026-06-24-crash-proof-session-reconcile-design.md b/docs/superpowers/specs/2026-06-24-crash-proof-session-reconcile-design.md index 2c0c41fd..ae28c91f 100644 --- a/docs/superpowers/specs/2026-06-24-crash-proof-session-reconcile-design.md +++ b/docs/superpowers/specs/2026-06-24-crash-proof-session-reconcile-design.md @@ -58,7 +58,7 @@ restore logic in as one branch. Iterating `ListAllSessions`: Reconcile iterates `ListAllSessions` and acts per session: -| DB state | tmux via `IsAlive(handle)` | Action | +| DB state | tmux via `IsAlive(handle)` | Action | | ----------------------------- | -------------------------- | ------------------------------------------------------------------ | | `is_terminated=0` | alive | **Adopt** — no-op, leave live. Agent keeps running. | | `is_terminated=0` | gone | `StashUncommitted` (best-effort) -> `MarkTerminated`. No relaunch. | diff --git a/docs/superpowers/specs/2026-06-24-restore-recreate-orchestrator-design.md b/docs/superpowers/specs/2026-06-24-restore-recreate-orchestrator-design.md index 3adcef75..f6a5c5da 100644 --- a/docs/superpowers/specs/2026-06-24-restore-recreate-orchestrator-design.md +++ b/docs/superpowers/specs/2026-06-24-restore-recreate-orchestrator-design.md @@ -70,6 +70,7 @@ must REUSE the existing branch, so it goes through the existing-branch attach ### Backend #### 1. Typed error for un-resumable restore (fixes the 500) + - Add sentinel in `session_manager` (next to the existing sentinels near `manager.go:25`): ```go @@ -88,6 +89,7 @@ must REUSE the existing branch, so it goes through the existing-branch attach ``` #### 2. Recreate: REUSE the existing `POST /api/v1/orchestrators` (clean=true) + **Discovery during planning:** the recreate capability already ships. No new endpoint or manager method is needed. @@ -96,7 +98,7 @@ endpoint or manager method is needed. - `Service.SpawnOrchestrator(ctx, projectID, clean)` (`service/session/service.go:263`): when `clean` is true it kills any active orchestrators for the project, then `Spawn(SpawnConfig{ProjectID, Kind: - orchestrator})`. +orchestrator})`. - `Spawn` with no branch defaults to the canonical orchestrator branch `ao/-orchestrator` (`defaultSessionBranch`). That is the SAME branch the dead orchestrator used. @@ -135,7 +137,7 @@ route, no `RecreateOrchestrator`, no OpenAPI/spec regen. orchestrator"** → calls the existing `spawnOrchestrator` helper (`frontend/src/renderer/lib/spawn-orchestrator.ts`) extended with a `clean` argument: `spawnOrchestrator(projectId, true)` → `POST /api/v1/orchestrators - {projectId, clean:true}`, with a loading state; on success, invalidate +{projectId, clean:true}`, with a loading state; on success, invalidate workspace queries and select the returned new orchestrator id; "Cancel" closes. - If `kind === "worker"`: explanatory text + "Close" only (no recreate). diff --git a/frontend/src/main.ts b/frontend/src/main.ts index 6f7bc180..ac3bb4af 100644 --- a/frontend/src/main.ts +++ b/frontend/src/main.ts @@ -507,7 +507,12 @@ async function startDaemonInner(startEpoch: number): Promise { // process.kill(pid, 0) does not kill; it throws iff the PID is not live. let holderPidAlive = false; if (runFilePid) { - try { process.kill(runFilePid, 0); holderPidAlive = true; } catch { holderPidAlive = false; } + try { + process.kill(runFilePid, 0); + holderPidAlive = true; + } catch { + holderPidAlive = false; + } } if (shouldReplacePortHolder(orphanProbe, holderPidAlive)) { // Use the run-file PID when available; fall back to the probe's reported diff --git a/frontend/src/renderer/components/RestoreUnavailableDialog.tsx b/frontend/src/renderer/components/RestoreUnavailableDialog.tsx index 98dbda5f..d5f99d33 100644 --- a/frontend/src/renderer/components/RestoreUnavailableDialog.tsx +++ b/frontend/src/renderer/components/RestoreUnavailableDialog.tsx @@ -37,9 +37,7 @@ export function RestoreUnavailableDialog({ open, session, onOpenChange, onRecrea - - Session can no longer be restored - + Session can no longer be restored {orchestrator ? "This orchestrator has no saved agent session to resume. You can create a new orchestrator on the same branch; its committed work is preserved and the old worktree is cleaned."