Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/validation/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ export type { VisualJudgeInput, VisualJudgeResult } from "./visual-judge.js";

export { discoverIosArtifact, discoverAndroidArtifact } from "./discover.js";
export type { IosArtifact, AndroidArtifact } from "./discover.js";

export { runStage1Visual } from "./stage1.js";
export type { Stage1VisualInput, Stage1VisualResult } from "./stage1.js";
92 changes: 92 additions & 0 deletions src/validation/stage1.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import { join } from "node:path";
import { mkdir } from "node:fs/promises";
import { discoverIosArtifact, discoverAndroidArtifact } from "./discover.js";
import { runVisualJudge, DEFAULT_STAGE1_RUBRIC, type VisualJudgeResult } from "./visual-judge.js";
import type { Layer3Criterion } from "./layer3.js";

export type Stage1VisualInput = {
// Pre-built output dirs. Pass undefined to skip the platform.
iosDir?: string;
androidDir?: string;
// Where to write screenshots. Defaults to <cwd>/tmp/screenshots.
screenshotDir?: string;
spec: string;
rubric?: readonly Layer3Criterion[];
renderWaitMs?: number;
samplesPerCriterion?: number;
};

export type Stage1VisualResult = {
ios?: VisualJudgeResult;
android?: VisualJudgeResult;
};

// One-call convenience wrapper for the Stage 1 visual judge: discover the
// pre-built artifact + identifier on each requested platform (#42), then run
// the install → launch → capture → judge chain (#40, #41) for that platform.
//
// Returns a Stage1VisualResult shaped to match JudgeInput.visual's per-
// platform expectation, so callers can pass it through to runJudge directly.
//
// Caller responsibilities:
// - Run Layer 2 in build mode first so the .app / .apk exists.
// - Ensure a sim/emulator is booted for each platform being judged.
// - Pick which platforms to judge — pass undefined for the others.
export async function runStage1Visual(input: Stage1VisualInput): Promise<Stage1VisualResult> {
const screenshotDir = input.screenshotDir ?? join(process.cwd(), "tmp", "screenshots");
await mkdir(screenshotDir, { recursive: true });
const rubric = input.rubric ?? DEFAULT_STAGE1_RUBRIC;

const result: Stage1VisualResult = {};

if (input.iosDir) {
const ios = await discoverIosArtifact(input.iosDir);
if (ios) {
result.ios = await runVisualJudge({
platform: "ios",
artifactPath: ios.appPath,
bundleId: ios.bundleId,
screenshotPath: join(screenshotDir, "ios-home.png"),
spec: input.spec,
rubric,
...(input.renderWaitMs !== undefined ? { renderWaitMs: input.renderWaitMs } : {}),
...(input.samplesPerCriterion !== undefined ? { samplesPerCriterion: input.samplesPerCriterion } : {}),
});
} else {
result.ios = stubFailure("ios", "iOS artifact not discovered (run Layer 2 build mode first)");
}
}

if (input.androidDir) {
const android = await discoverAndroidArtifact(input.androidDir);
if (android) {
result.android = await runVisualJudge({
platform: "android",
artifactPath: android.apkPath,
packageName: android.packageName,
screenshotPath: join(screenshotDir, "android-home.png"),
spec: input.spec,
rubric,
...(input.renderWaitMs !== undefined ? { renderWaitMs: input.renderWaitMs } : {}),
...(input.samplesPerCriterion !== undefined ? { samplesPerCriterion: input.samplesPerCriterion } : {}),
});
} else {
result.android = stubFailure("android", "Android artifact not discovered (run Layer 2 build mode first)");
}
}

return result;
}

function stubFailure(platform: "ios" | "android", error: string): VisualJudgeResult {
return {
ok: false,
launch: {
ok: false,
command: `discover ${platform} artifact`,
durationMs: 0,
error,
},
error,
};
}
21 changes: 20 additions & 1 deletion tests/smoke.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { test } from "node:test";
import assert from "node:assert/strict";
import { runLayer1, runLayer2, runLayer3, captureScreenshot, installAndLaunch, runVisualJudge, DEFAULT_STAGE1_RUBRIC, discoverIosArtifact, discoverAndroidArtifact } from "../src/validation/index.js";
import { runLayer1, runLayer2, runLayer3, captureScreenshot, installAndLaunch, runVisualJudge, DEFAULT_STAGE1_RUBRIC, discoverIosArtifact, discoverAndroidArtifact, runStage1Visual } from "../src/validation/index.js";
import { dispatch } from "../src/dispatch.js";

test("validation layers are exported as functions", () => {
Expand All @@ -12,6 +12,25 @@ test("validation layers are exported as functions", () => {
assert.equal(typeof runVisualJudge, "function");
assert.equal(typeof discoverIosArtifact, "function");
assert.equal(typeof discoverAndroidArtifact, "function");
assert.equal(typeof runStage1Visual, "function");
});

test("runStage1Visual returns structured failure when artifacts not built", async () => {
const result = await runStage1Visual({
iosDir: "/nonexistent/ios",
androidDir: "/nonexistent/android",
spec: "test",
});
assert.equal(result.ios?.ok, false);
assert.equal(result.android?.ok, false);
assert.match(result.ios?.error ?? "", /not discovered/i);
assert.match(result.android?.error ?? "", /not discovered/i);
});

test("runStage1Visual returns empty result when no platforms requested", async () => {
const result = await runStage1Visual({ spec: "test" });
assert.equal(result.ios, undefined);
assert.equal(result.android, undefined);
});

test("discoverAndroidArtifact returns null for missing dir", async () => {
Expand Down
Loading