Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/lib/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4730,7 +4730,6 @@ function skippedStepMessage(
}

// ── Main ─────────────────────────────────────────────────────────

async function onboard(opts: OnboardOptions = {}): Promise<void> {
setOnboardBrandingAgent(opts.agent || process.env.NEMOCLAW_AGENT || null);
NON_INTERACTIVE = opts.nonInteractive || process.env.NEMOCLAW_NON_INTERACTIVE === "1";
Expand All @@ -4747,6 +4746,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
env: process.env,
stdinIsTty: Boolean(process.stdin && process.stdin.isTTY),
stdoutIsTty: Boolean(process.stdout && process.stdout.isTTY),
persistedSessionStatus: onboardSession.loadSession()?.status ?? null,
},
{
isNonInteractive,
Expand Down
53 changes: 52 additions & 1 deletion src/lib/onboard/entry-options.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import { describe, expect, it, vi } from "vitest";

import {
resolveOnboardEntryOptions,
type OnboardEntryOptionsDeps,
resolveOnboardEntryOptions,
} from "../../../dist/lib/onboard/entry-options";

class ExitError extends Error {
Expand Down Expand Up @@ -138,6 +138,57 @@ describe("resolveOnboardEntryOptions", () => {
expect(deps.exitProcess).toHaveBeenCalledTimes(1);
});

it("auto-detects resume from a persisted in_progress session without --resume (#5470)", () => {
const deps = createDeps();

const result = resolveOnboardEntryOptions(
{
opts: {},
env: {},
stdinIsTty: true,
stdoutIsTty: true,
persistedSessionStatus: "in_progress",
},
deps,
);

expect(result.resume).toBe(true);
expect(deps.error).not.toHaveBeenCalled();
});

it("does not auto-resume when --fresh is set even with an in_progress session (#5470)", () => {
const deps = createDeps();

const result = resolveOnboardEntryOptions(
{
opts: { fresh: true },
env: {},
stdinIsTty: true,
stdoutIsTty: true,
persistedSessionStatus: "in_progress",
},
deps,
);

// --fresh wins; an auto-detected resume must NOT trip the mutual-exclusion
// error (that guard is for explicit --resume + --fresh only).
expect(result.resume).toBe(false);
expect(result.fresh).toBe(true);
expect(deps.error).not.toHaveBeenCalled();
});

it("does not auto-resume when the persisted session is not in_progress (#5470)", () => {
const deps = createDeps();

for (const status of ["complete", "failed", "pending", "", null, undefined] as const) {
const result = resolveOnboardEntryOptions(
{ opts: {}, env: {}, stdinIsTty: true, stdoutIsTty: true, persistedSessionStatus: status },
deps,
);
expect(result.resume).toBe(false);
}
});

it("prints validation guidance for invalid sandbox names", () => {
const deps = createDeps({
validateName: vi.fn(() => {
Expand Down
21 changes: 19 additions & 2 deletions src/lib/onboard/entry-options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ export interface OnboardEntryOptionsInput {
env: NodeJS.ProcessEnv | Record<string, string | undefined>;
stdinIsTty: boolean;
stdoutIsTty: boolean;
/**
* Status of the persisted onboard session (`~/.nemoclaw/onboard-session.json`),
* or null when there is no session on disk. When it is "in_progress" a prior
* onboard was interrupted, so resume mode is auto-detected even without an
* explicit `--resume` flag (#5470). Optional: omitting it preserves the
* flag-only behavior for callers that don't load the session.
*/
persistedSessionStatus?: string | null;
}

export interface OnboardEntryOptionsDeps {
Expand Down Expand Up @@ -39,12 +47,21 @@ export function resolveOnboardEntryOptions(
input: OnboardEntryOptionsInput,
deps: OnboardEntryOptionsDeps,
): ResolvedOnboardEntryOptions {
const resume = input.opts.resume === true;
const explicitResume = input.opts.resume === true;
const fresh = input.opts.fresh === true;
if (resume && fresh) {
// The mutual-exclusion error applies only to the explicit flags — a leftover
// in_progress session combined with an explicit `--fresh` is not a conflict
// (fresh wins, see below), so it must not trip this guard.
if (explicitResume && fresh) {
deps.error(" --resume and --fresh cannot both be set.");
deps.exitProcess(1);
}
// Auto-detect resume from a persisted in_progress session so a re-run of
// `nemoclaw onboard` after an interrupted attempt continues that attempt
// (banner + resume preflight) instead of starting over (#5470). `--fresh`
// always wins, and an explicit `--resume` is preserved unchanged.
const sessionInProgress = input.persistedSessionStatus === "in_progress";
const resume = !fresh && (explicitResume || sessionInProgress);

const requestedFromDockerfile =
input.opts.fromDockerfile ||
Expand Down
92 changes: 92 additions & 0 deletions test/e2e/test-onboard-resume.sh
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,98 @@ else
fail "Registry does not contain resumed sandbox entry"
fi

# ══════════════════════════════════════════════════════════════════
# Phase 3.5: Implicit resume (plain `onboard`, no --resume flag) — #5470
# ══════════════════════════════════════════════════════════════════
# The fix auto-detects resume from a persisted in_progress session. The
# section above proves explicit `--resume`; this proves a plain `onboard`
# rerun resumes on its own, and that `--fresh` suppresses it.
section "Phase 3.5: Implicit resume from in_progress session"

# Re-mark the now-complete session as in_progress so a plain `onboard` has
# something to auto-resume. Everything is already provisioned, so the resume
# skips every cached step and finishes fast.
# Mimic an interrupted-but-resumable session: status "in_progress" AND
# resumable !== false. Phase 3 marks the completed session `resumable: false`,
# so flipping status alone would (correctly) be rejected as "no resumable
# session"; resetting resumable reconstructs the interrupted shape the resume
# machine accepts (session-bootstrap.ts:140).
set_session_in_progress() {
node -e '
const fs = require("fs");
const file = process.argv[1];
const data = JSON.parse(fs.readFileSync(file, "utf8"));
data.status = "in_progress";
data.resumable = true;
fs.writeFileSync(file, JSON.stringify(data, null, 2));
' "$SESSION_FILE"
}

set_session_in_progress
info "Running plain onboard (no --resume) on an in_progress session..."
IMPLICIT_LOG="$(mktemp)"
env -u NVIDIA_INFERENCE_API_KEY -u COMPATIBLE_API_KEY \
NEMOCLAW_NON_INTERACTIVE=1 \
NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
NEMOCLAW_POLICY_MODE=skip \
node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$IMPLICIT_LOG" 2>&1
implicit_exit=$?
implicit_output="$(cat "$IMPLICIT_LOG")"
rm -f "$IMPLICIT_LOG"

if [ $implicit_exit -eq 0 ]; then
pass "Implicit resume (plain onboard) completed successfully"
else
fail "Implicit resume exited $implicit_exit (expected 0)"
echo "$implicit_output"
fi

if echo "$implicit_output" | grep -q "(resume mode)"; then
pass "Plain onboard auto-detected resume mode from in_progress session"
else
fail "Plain onboard did not show '(resume mode)' for an in_progress session"
fi

if echo "$implicit_output" | grep -q "\[resume\] Skipping\|\[reuse\] Skipping"; then
pass "Implicit resume skipped cached steps"
else
fail "Implicit resume did not skip any cached steps"
fi

# --fresh must suppress the auto-resume even with an in_progress session.
# Fail-fast at preflight (step 1, before sandbox recreation) so this stays
# cheap and non-destructive; the banner is emitted before that step.
set_session_in_progress
info "Running onboard --fresh on the same in_progress session (fail-fast)..."
FRESH_LOG="$(mktemp)"
NEMOCLAW_NON_INTERACTIVE=1 \
NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 \
NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
NEMOCLAW_POLICY_MODE=skip \
NEMOCLAW_E2E_FAILURE_INJECTION=1 \
NEMOCLAW_E2E_FORCE_FAIL_AT_STEP=preflight \
node "$REPO/bin/nemoclaw.js" onboard --fresh --non-interactive >"$FRESH_LOG" 2>&1
fresh_exit=$?
fresh_output="$(cat "$FRESH_LOG")"
rm -f "$FRESH_LOG"

# Confirm the run actually executed and aborted at preflight, so the
# banner-absence assertion below is meaningful (not a vacuous pass from an
# unrelated early failure).
if [ $fresh_exit -ne 0 ] && echo "$fresh_output" | grep -q "\[e2e\] Forced onboarding failure at step 'preflight'."; then
pass "--fresh run failed fast at preflight as intended"
else
fail "--fresh run did not fail at preflight as expected (exit $fresh_exit)"
echo "$fresh_output"
fi

if echo "$fresh_output" | grep -q "(resume mode)"; then
fail "--fresh did not suppress auto-resume (unexpected '(resume mode)')"
else
pass "--fresh suppressed auto-resume despite an in_progress session"
fi
Comment thread
coderabbitai[bot] marked this conversation as resolved.

# ══════════════════════════════════════════════════════════════════
# Phase 4: Final cleanup
# ══════════════════════════════════════════════════════════════════
Expand Down
Loading