Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions apps/cli/src/data/execution-atom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import { startReplayProxy } from "../utils/replay-proxy-server";
import { toViewerRunState, pushStepState } from "../utils/push-step-state";
import { extractCloseArtifacts } from "../utils/extract-close-artifacts";
import { loadReplayEvents } from "../utils/load-replay-events";
import { writeRunResult } from "../utils/write-run-result";
import { CiResultOutput, CiStepResult } from "@expect/shared/models";
import { VERSION } from "../constants";
import { getStepElapsedMs, getTotalElapsedMs } from "../utils/step-elapsed";

const LIVE_VIEW_PORT_MIN = 50000;
const LIVE_VIEW_PORT_RANGE = 10000;
Expand Down Expand Up @@ -175,6 +179,36 @@ const executeCore = (input: ExecuteInput) =>
yield* git.saveTestedFingerprint();
}

const statuses = report.stepStatuses;
const stepResults = report.steps.map((step) => {
const entry = statuses.get(step.id);
const stepStatus = entry?.status ?? ("not-run" as const);
const elapsed = getStepElapsedMs(step);
return new CiStepResult({
title: step.title,
status: stepStatus,
...(elapsed !== undefined ? { duration_ms: elapsed } : {}),
...(stepStatus === "failed" && entry?.summary ? { error: entry.summary } : {}),
});
});

const totalDurationMs = getTotalElapsedMs(report.steps) || durationMs;
const summaryParts = [`${passedCount} passed`, `${failedCount} failed`];
Copy link
Copy Markdown
Contributor

@vercel vercel bot Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The summary construction in execution-atom.ts is missing the skipped count, resulting in inconsistent output compared to run-test.ts

Fix on Vercel

const resultOutput = new CiResultOutput({
version: VERSION,
status: report.status,
title: report.title,
duration_ms: totalDurationMs,
steps: stepResults,
artifacts: {
...(artifacts.videoUrl ? { video: artifacts.videoUrl } : {}),
...(artifacts.localReplayUrl ? { replay: artifacts.localReplayUrl } : {}),
},
summary: `${summaryParts.join(", ")} out of ${report.steps.length} step${report.steps.length === 1 ? "" : "s"}`,
});

yield* writeRunResult(finalExecuted.id ?? crypto.randomUUID(), resultOutput);
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai bot Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: finalExecuted.id ?? crypto.randomUUID() does not handle empty-string ids, so runs with id: "" all write to the same .expect/runs/.json file.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At apps/cli/src/data/execution-atom.ts, line 210:

<comment>`finalExecuted.id ?? crypto.randomUUID()` does not handle empty-string ids, so runs with `id: ""` all write to the same `.expect/runs/.json` file.</comment>

<file context>
@@ -175,6 +179,36 @@ const executeCore = (input: ExecuteInput) =>
+      summary: `${summaryParts.join(", ")} out of ${report.steps.length} step${report.steps.length === 1 ? "" : "s"}`,
+    });
+
+    yield* writeRunResult(finalExecuted.id ?? crypto.randomUUID(), resultOutput);
+
     return {
</file context>
Suggested change
yield* writeRunResult(finalExecuted.id ?? crypto.randomUUID(), resultOutput);
yield* writeRunResult(finalExecuted.id && finalExecuted.id.length > 0 ? finalExecuted.id : crypto.randomUUID(), resultOutput);
Fix with Cubic

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using the ?? operator doesn't properly handle empty-string IDs, causing multiple runs to overwrite the same file

Fix on Vercel


return {
executedPlan: finalExecuted,
report,
Expand Down
19 changes: 18 additions & 1 deletion apps/cli/src/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { join } from "node:path";
import { Option } from "effect";
import { Command } from "commander";
import { ChangesFor } from "@expect/supervisor";
import type { ScopeTier } from "@expect/shared/models";
import { runHeadless } from "./utils/run-test";
import { runInit } from "./commands/init";
import { runAddGithubAction } from "./commands/add-github-action";
Expand Down Expand Up @@ -34,12 +35,15 @@ const TARGETS: readonly Target[] = ["unstaged", "branch", "changes"];

type OutputFormat = "text" | "json";

const SCOPE_TIERS: readonly ScopeTier[] = ["quick", "standard", "thorough"];

interface CommanderOpts {
message?: string;
flow?: string;
yes?: boolean;
agent?: AgentBackend;
target?: Target;
scope?: ScopeTier;
Copy link
Copy Markdown
Contributor

@vercel vercel bot Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CommanderOpts interface declares scope/target/output/agent with literal union types (ScopeTier, Target, OutputFormat, AgentBackend), but Commander provides raw strings for all options, making the type annotation misleading about type safety.

Fix on Vercel

verbose?: boolean;
headed?: boolean;
noCookies?: boolean;
Expand All @@ -63,6 +67,11 @@ const program = new Command()
"agent provider to use (claude, codex, copilot, gemini, cursor, opencode, or droid)",
)
.option("-t, --target <target>", "what to test: unstaged, branch, or changes", "changes")
.option(
"-s, --scope <tier>",
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai bot Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: --scope is exposed as a global option, but it only affects the headless execution path. In interactive runs it is ignored, so users can pass --scope quick and still get default behavior.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At apps/cli/src/index.tsx, line 71:

<comment>`--scope` is exposed as a global option, but it only affects the headless execution path. In interactive runs it is ignored, so users can pass `--scope quick` and still get default behavior.</comment>

<file context>
@@ -63,6 +67,11 @@ const program = new Command()
   )
   .option("-t, --target <target>", "what to test: unstaged, branch, or changes", "changes")
+  .option(
+    "-s, --scope <tier>",
+    "test depth: quick (one check, ~30s), standard (primary + follow-ups), thorough (full audit)",
+    "standard",
</file context>
Fix with Cubic

"test depth: quick (one check, ~30s), standard (primary + follow-ups), thorough (full audit)",
"standard",
)
.option("--verbose", "enable verbose logging")
.option("--headed", "show a visible browser window during tests")
.option("--no-cookies", "skip system browser cookie extraction")
Expand All @@ -80,7 +89,8 @@ Examples:
$ expect --headed -m "smoke test" -y run with a visible browser
$ expect --target branch test all branch changes
$ expect --target unstaged test unstaged changes
$ expect --no-cookies -m "test" -y skip system browser cookie extraction
$ expect --scope quick -m "check the button" -y fast focused test (~30s)
$ expect --scope thorough --target branch full audit before merge
$ expect -u http://localhost:3000 -m "test" -y specify dev server URL directly
$ expect watch -m "test the login flow" watch mode`,
);
Expand Down Expand Up @@ -113,6 +123,12 @@ const runHeadlessForTarget = async (target: Target, opts: CommanderOpts) => {
? Option.some(CI_EXECUTION_TIMEOUT_MS)
: Option.none();

const scopeTier = opts.scope ?? "standard";
if (!SCOPE_TIERS.includes(scopeTier)) {
console.error(`Unknown scope tier: ${scopeTier}. Use ${SCOPE_TIERS.join(", ")}.`);
process.exit(1);
}

const { changesFor } = await resolveChangesFor(target);
return runHeadless({
changesFor,
Expand All @@ -123,6 +139,7 @@ const runHeadlessForTarget = async (target: Target, opts: CommanderOpts) => {
ci: ciMode,
timeoutMs,
output: opts.output ?? "text",
scopeTier,
baseUrl: opts.url?.join(", "),
});
};
Expand Down
2 changes: 2 additions & 0 deletions apps/cli/src/layers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { Executor, FlowStorage, Git, Reporter, Updates, Watch } from "@expect/su
import { Agent, AgentBackend } from "@expect/agent";
import { RrVideo } from "@expect/browser";
import { Analytics, DebugFileLoggerLayer, Tracing } from "@expect/shared/observability";
import * as NodeServices from "@effect/platform-node/NodeServices";

export const layerCli = ({ verbose, agent }: { verbose: boolean; agent: AgentBackend }) => {
const gitLayer = Git.withRepoRoot(process.cwd());
Expand All @@ -25,6 +26,7 @@ export const layerCli = ({ verbose, agent }: { verbose: boolean; agent: AgentBac
Layer.provide(Agent.layerFor(agent ?? "claude")),
Layer.provide(DebugFileLoggerLayer),
Layer.provide(Tracing.layerAxiom("expect-cli")),
Layer.provideMerge(NodeServices.layer),
Layer.provideMerge(Layer.succeed(References.MinimumLogLevel, verbose ? "All" : "Error")),
);
};
73 changes: 42 additions & 31 deletions apps/cli/src/utils/run-test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Config, Effect, Option, Stream, Schema } from "effect";
import { type ChangesFor, CiResultOutput, CiStepResult } from "@expect/shared/models";
import { type ChangesFor, CiResultOutput, CiStepResult, type ScopeTier } from "@expect/shared/models";
import { Executor, ExecutedTestPlan, Reporter, Github } from "@expect/supervisor";
import { Analytics } from "@expect/shared/observability";
import type { AgentBackend } from "@expect/agent";
Expand All @@ -13,6 +13,7 @@ import { createCiReporter } from "./ci-reporter";
import { writeGhaOutputs, writeGhaStepSummary } from "./gha-output";
import { getStepElapsedMs, getTotalElapsedMs } from "./step-elapsed";
import { formatElapsedTime } from "./format-elapsed-time";
import { writeRunResult } from "./write-run-result";

class ExecutionTimeoutError extends Schema.ErrorClass<ExecutionTimeoutError>(
"ExecutionTimeoutError",
Expand All @@ -34,6 +35,7 @@ interface HeadlessRunOptions {
ci: boolean;
timeoutMs: Option.Option<number>;
output: "text" | "json";
scopeTier: ScopeTier;
baseUrl?: string;
}

Expand Down Expand Up @@ -132,6 +134,7 @@ export const runHeadless = (options: HeadlessRunOptions) =>
instruction: options.instruction,
isHeadless: !options.headed,
cookieBrowserKeys: [],
scopeTier: options.scopeTier,
baseUrl: options.baseUrl,
})
.pipe(
Expand Down Expand Up @@ -366,39 +369,47 @@ export const runHeadless = (options: HeadlessRunOptions) =>
);
}

if (isJsonOutput) {
const stepResults = report.steps.map((step) => {
const entry = statuses.get(step.id);
const stepStatus = entry?.status ?? ("not-run" as const);
const elapsed = getStepElapsedMs(step);
return new CiStepResult({
title: step.title,
status: stepStatus,
...(elapsed !== undefined ? { duration_ms: elapsed } : {}),
...(stepStatus === "failed" && entry?.summary ? { error: entry.summary } : {}),
});
const stepResults = report.steps.map((step) => {
const entry = statuses.get(step.id);
const stepStatus = entry?.status ?? ("not-run" as const);
const elapsed = getStepElapsedMs(step);
return new CiStepResult({
title: step.title,
status: stepStatus,
...(elapsed !== undefined ? { duration_ms: elapsed } : {}),
...(stepStatus === "failed" && entry?.summary ? { error: entry.summary } : {}),
});
});

const summaryParts = [`${passedCount} passed`, `${failedCount} failed`];
if (skippedCount > 0) summaryParts.push(`${skippedCount} skipped`);
const summaryText = `${summaryParts.join(", ")} out of ${report.steps.length} step${report.steps.length === 1 ? "" : "s"}`;

const resultOutput = new CiResultOutput({
version: VERSION,
status: report.status,
title: report.title,
duration_ms: totalDurationMs,
steps: stepResults,
artifacts: {
...(effectiveVideoPath ? { video: effectiveVideoPath } : {}),
...(artifacts.replayPath ? { replay: artifacts.replayPath } : {}),
...(artifacts.screenshotPaths.length > 0
? { screenshots: [...artifacts.screenshotPaths] }
: {}),
},
summary: summaryText,
});
const summaryParts = [`${passedCount} passed`, `${failedCount} failed`];
if (skippedCount > 0) summaryParts.push(`${skippedCount} skipped`);
const summaryText = `${summaryParts.join(", ")} out of ${report.steps.length} step${report.steps.length === 1 ? "" : "s"}`;

const resultOutput = new CiResultOutput({
version: VERSION,
status: report.status,
title: report.title,
duration_ms: totalDurationMs,
steps: stepResults,
artifacts: {
...(effectiveVideoPath ? { video: effectiveVideoPath } : {}),
...(artifacts.replayPath ? { replay: artifacts.replayPath } : {}),
...(artifacts.screenshotPaths.length > 0
? { screenshots: [...artifacts.screenshotPaths] }
: {}),
},
summary: summaryText,
});

const runResultPath = yield* writeRunResult(
finalExecuted.id ?? crypto.randomUUID(),
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai bot Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Use a truthy fallback for finalExecuted.id so empty-string IDs don’t collapse to .expect/runs/.json and overwrite previous run results.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At apps/cli/src/utils/run-test.ts, line 405:

<comment>Use a truthy fallback for `finalExecuted.id` so empty-string IDs don’t collapse to `.expect/runs/.json` and overwrite previous run results.</comment>

<file context>
@@ -366,39 +369,47 @@ export const runHeadless = (options: HeadlessRunOptions) =>
+          });
+
+          const runResultPath = yield* writeRunResult(
+            finalExecuted.id ?? crypto.randomUUID(),
+            resultOutput,
+          );
</file context>
Suggested change
finalExecuted.id ?? crypto.randomUUID(),
finalExecuted.id || crypto.randomUUID(),
Fix with Cubic

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The nullish coalescing operator (??) doesn't treat empty strings as falsy, causing empty-string IDs to be used instead of generating UUIDs

Fix on Vercel

resultOutput,
);
if (!isJsonOutput) {
process.stderr.write(`Run result: ${runResultPath}\n`);
}

if (isJsonOutput) {
const jsonString = JSON.stringify(
Schema.encodeSync(CiResultOutput)(resultOutput),
undefined,
Expand Down
60 changes: 60 additions & 0 deletions apps/cli/src/utils/write-run-result.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import * as path from "node:path";
import { Effect, Option, Schema } from "effect";
import { FileSystem } from "effect/FileSystem";
import { CiResultOutput } from "@expect/shared/models";

// Persists structured run results to .expect/runs/{planId}.json so outer
// agents (Cursor, Claude Code, Codex) can read a single file instead of
// polling terminal output. Each run gets a unique planId (UUID), enabling
// parallel agent sessions without file conflicts.

Copy link
Copy Markdown
Contributor

@vercel vercel bot Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Constants EXPECT_STATE_DIR, EXPECT_RUNS_DIR, and EXPECT_RUNS_MAX_KEPT are duplicated in write-run-result.ts instead of being imported from supervisor

Fix on Vercel

const EXPECT_STATE_DIR = ".expect";
const EXPECT_RUNS_DIR = "runs";
const EXPECT_RUNS_MAX_KEPT = 20;

export const writeRunResult = Effect.fn("writeRunResult")(function* (
planId: string,
resultOutput: CiResultOutput,
) {
const fileSystem = yield* FileSystem;
const runsDir = path.join(process.cwd(), EXPECT_STATE_DIR, EXPECT_RUNS_DIR);

yield* fileSystem.makeDirectory(runsDir, { recursive: true });

const filePath = path.join(runsDir, `${planId}.json`);
const jsonString = JSON.stringify(Schema.encodeSync(CiResultOutput)(resultOutput), undefined, 2);
yield* fileSystem.writeFileString(filePath, jsonString + "\n");

yield* pruneOldRuns(runsDir);

return filePath;
});

const pruneOldRuns = Effect.fn("pruneOldRuns")(function* (runsDir: string) {
const fileSystem = yield* FileSystem;

const entries = yield* fileSystem.readDirectory(runsDir);
const jsonFiles = entries.filter((file) => file.endsWith(".json"));

if (jsonFiles.length <= EXPECT_RUNS_MAX_KEPT) return;

const withStats = yield* Effect.forEach(
jsonFiles,
(file) =>
Effect.gen(function* () {
const filePath = path.join(runsDir, file);
const stat = yield* fileSystem.stat(filePath);
const mtime = Option.getOrElse(stat.mtime, () => new Date(0));
return { filePath, mtime: mtime.getTime() };
}),
{ concurrency: "unbounded" },
);

withStats.sort((left, right) => right.mtime - left.mtime);

yield* Effect.forEach(
withStats.slice(EXPECT_RUNS_MAX_KEPT),
(entry) => fileSystem.remove(entry.filePath),
{ concurrency: "unbounded" },
);
});
55 changes: 51 additions & 4 deletions packages/browser/src/mcp/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,42 @@ const imageResult = (base64: string) => ({
content: [{ type: "image" as const, data: base64, mimeType: "image/png" }],
});

const AUTH_PAGE_INDICATORS = [
"login",
"signin",
"sign-in",
"sign_in",
"auth",
"authenticate",
"sso",
"oauth",
];

const buildAuthRedirectWarning = (requestedUrl: string, currentUrl: string): string => {
try {
const requestedOrigin = new URL(requestedUrl).origin;
const currentOrigin = new URL(currentUrl).origin;
const currentPath = new URL(currentUrl).pathname.toLowerCase();

const redirectedToAuthPage =
currentOrigin !== requestedOrigin ||
AUTH_PAGE_INDICATORS.some(
(indicator) => currentPath.includes(indicator) && !requestedUrl.includes(indicator),
);

if (!redirectedToAuthPage) return "";

return (
`\n\n⚠️ AUTH REDIRECT DETECTED: Page redirected to ${currentUrl} instead of staying at ${requestedUrl}. ` +
`This likely means authentication is required. ` +
`If tests need authenticated access, re-run with cookie injection (--cookies) or ensure the dev server allows unauthenticated access. ` +
`You should emit STEP_SKIPPED with category=auth-blocked for any steps that require authentication.`
);
} catch {
return "";
}
};

const AsyncFunction = Object.getPrototypeOf(async () => {}).constructor;

// Tool annotations (readOnlyHint, destructiveHint) enable parallel execution in the Claude Agent SDK.
Expand Down Expand Up @@ -106,13 +142,24 @@ export const createBrowserMcpServer = <E>(
cdpUrl,
browserType,
});

const page = yield* session.requirePage();
const authWarning = buildAuthRedirectWarning(url, page.url());
if (authWarning) {
yield* Effect.logWarning("Auth redirect detected", {
requestedUrl: url,
currentUrl: page.url(),
});
}

const engineSuffix = browserType && browserType !== "chromium" ? ` [${browserType}]` : "";
const cdpSuffix = cdpUrl ? ` (connected via CDP: ${cdpUrl})` : "";
const cookieSuffix =
result.injectedCookieCount > 0
? ` (${result.injectedCookieCount} cookies synced from local browser)`
: "";
return textResult(
`Opened ${url}${engineSuffix}${cdpSuffix}` +
(result.injectedCookieCount > 0
? ` (${result.injectedCookieCount} cookies synced from local browser)`
: ""),
`Opened ${url}${engineSuffix}${cdpSuffix}${cookieSuffix}${authWarning}`,
);
}).pipe(Effect.withSpan(`mcp.tool.open`)),
),
Expand Down
Loading
Loading