From 9f91cce6b3311eeff185e5329c9261821c603c73 Mon Sep 17 00:00:00 2001 From: dadachi Date: Sun, 3 May 2026 07:25:34 +0900 Subject: [PATCH] =?UTF-8?q?Layer=203=20Phase=205d:=20runStage1Visual=20?= =?UTF-8?q?=E2=80=94=20convenience=20runner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One-call wrapper that ties Phase 5c artifact discovery (#42) and Phase 5a visual-judge orchestration (#40) together for both platforms. Returns a Stage1VisualResult shaped to match JudgeInput.visual's per-platform expectation, so callers can pass it through to runJudge directly: const visual = await runStage1Visual({ iosDir: "./out//ios", androidDir: "./out//android", spec: domain.displayName, }); const judge = await runJudge({ ..., visual }); Per-platform behavior: - Pass undefined to skip the platform. - If discovery fails (build hasn't happened, project layout unexpected), surfaces a structured VisualJudgeResult with ok=false and an actionable error message ("iOS artifact not discovered (run Layer 2 build mode first)") — same shape as a real launch/capture failure, so downstream aggregation in runJudge (#41) doesn't need a special case. Caller responsibilities: - Run Layer 2 in build mode first so .app / .apk exists - Ensure a sim/emulator is booted for each platform being judged - Decide which platforms to judge (the function judges only those passed) Tests: 16/16 npm run ci green. - Structured failure when artifacts missing ✓ - Empty result when no platforms requested ✓ Out of scope (Phase 5e, the final integration): - CLI flag / env var that opts dispatch into Stage 1 visual - Forcing Layer 2 build mode when visual is enabled - Plumbing the runStage1Visual call into dispatch.ts post-Layer-2 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/validation/index.ts | 3 ++ src/validation/stage1.ts | 92 ++++++++++++++++++++++++++++++++++++++++ tests/smoke.test.ts | 21 ++++++++- 3 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 src/validation/stage1.ts diff --git a/src/validation/index.ts b/src/validation/index.ts index 01f8687..bc30700 100644 --- a/src/validation/index.ts +++ b/src/validation/index.ts @@ -18,3 +18,6 @@ export type { VisualJudgeInput, VisualJudgeResult } from "./visual-judge.js"; export { discoverIosArtifact, discoverAndroidArtifact } from "./discover.js"; export type { IosArtifact, AndroidArtifact } from "./discover.js"; + +export { runStage1Visual } from "./stage1.js"; +export type { Stage1VisualInput, Stage1VisualResult } from "./stage1.js"; diff --git a/src/validation/stage1.ts b/src/validation/stage1.ts new file mode 100644 index 0000000..652a72a --- /dev/null +++ b/src/validation/stage1.ts @@ -0,0 +1,92 @@ +import { join } from "node:path"; +import { mkdir } from "node:fs/promises"; +import { discoverIosArtifact, discoverAndroidArtifact } from "./discover.js"; +import { runVisualJudge, DEFAULT_STAGE1_RUBRIC, type VisualJudgeResult } from "./visual-judge.js"; +import type { Layer3Criterion } from "./layer3.js"; + +export type Stage1VisualInput = { + // Pre-built output dirs. Pass undefined to skip the platform. + iosDir?: string; + androidDir?: string; + // Where to write screenshots. Defaults to /tmp/screenshots. + screenshotDir?: string; + spec: string; + rubric?: readonly Layer3Criterion[]; + renderWaitMs?: number; + samplesPerCriterion?: number; +}; + +export type Stage1VisualResult = { + ios?: VisualJudgeResult; + android?: VisualJudgeResult; +}; + +// One-call convenience wrapper for the Stage 1 visual judge: discover the +// pre-built artifact + identifier on each requested platform (#42), then run +// the install → launch → capture → judge chain (#40, #41) for that platform. +// +// Returns a Stage1VisualResult shaped to match JudgeInput.visual's per- +// platform expectation, so callers can pass it through to runJudge directly. +// +// Caller responsibilities: +// - Run Layer 2 in build mode first so the .app / .apk exists. +// - Ensure a sim/emulator is booted for each platform being judged. +// - Pick which platforms to judge — pass undefined for the others. +export async function runStage1Visual(input: Stage1VisualInput): Promise { + const screenshotDir = input.screenshotDir ?? join(process.cwd(), "tmp", "screenshots"); + await mkdir(screenshotDir, { recursive: true }); + const rubric = input.rubric ?? DEFAULT_STAGE1_RUBRIC; + + const result: Stage1VisualResult = {}; + + if (input.iosDir) { + const ios = await discoverIosArtifact(input.iosDir); + if (ios) { + result.ios = await runVisualJudge({ + platform: "ios", + artifactPath: ios.appPath, + bundleId: ios.bundleId, + screenshotPath: join(screenshotDir, "ios-home.png"), + spec: input.spec, + rubric, + ...(input.renderWaitMs !== undefined ? { renderWaitMs: input.renderWaitMs } : {}), + ...(input.samplesPerCriterion !== undefined ? { samplesPerCriterion: input.samplesPerCriterion } : {}), + }); + } else { + result.ios = stubFailure("ios", "iOS artifact not discovered (run Layer 2 build mode first)"); + } + } + + if (input.androidDir) { + const android = await discoverAndroidArtifact(input.androidDir); + if (android) { + result.android = await runVisualJudge({ + platform: "android", + artifactPath: android.apkPath, + packageName: android.packageName, + screenshotPath: join(screenshotDir, "android-home.png"), + spec: input.spec, + rubric, + ...(input.renderWaitMs !== undefined ? { renderWaitMs: input.renderWaitMs } : {}), + ...(input.samplesPerCriterion !== undefined ? { samplesPerCriterion: input.samplesPerCriterion } : {}), + }); + } else { + result.android = stubFailure("android", "Android artifact not discovered (run Layer 2 build mode first)"); + } + } + + return result; +} + +function stubFailure(platform: "ios" | "android", error: string): VisualJudgeResult { + return { + ok: false, + launch: { + ok: false, + command: `discover ${platform} artifact`, + durationMs: 0, + error, + }, + error, + }; +} diff --git a/tests/smoke.test.ts b/tests/smoke.test.ts index fa9b515..872b12a 100644 --- a/tests/smoke.test.ts +++ b/tests/smoke.test.ts @@ -1,6 +1,6 @@ import { test } from "node:test"; import assert from "node:assert/strict"; -import { runLayer1, runLayer2, runLayer3, captureScreenshot, installAndLaunch, runVisualJudge, DEFAULT_STAGE1_RUBRIC, discoverIosArtifact, discoverAndroidArtifact } from "../src/validation/index.js"; +import { runLayer1, runLayer2, runLayer3, captureScreenshot, installAndLaunch, runVisualJudge, DEFAULT_STAGE1_RUBRIC, discoverIosArtifact, discoverAndroidArtifact, runStage1Visual } from "../src/validation/index.js"; import { dispatch } from "../src/dispatch.js"; test("validation layers are exported as functions", () => { @@ -12,6 +12,25 @@ test("validation layers are exported as functions", () => { assert.equal(typeof runVisualJudge, "function"); assert.equal(typeof discoverIosArtifact, "function"); assert.equal(typeof discoverAndroidArtifact, "function"); + assert.equal(typeof runStage1Visual, "function"); +}); + +test("runStage1Visual returns structured failure when artifacts not built", async () => { + const result = await runStage1Visual({ + iosDir: "/nonexistent/ios", + androidDir: "/nonexistent/android", + spec: "test", + }); + assert.equal(result.ios?.ok, false); + assert.equal(result.android?.ok, false); + assert.match(result.ios?.error ?? "", /not discovered/i); + assert.match(result.android?.error ?? "", /not discovered/i); +}); + +test("runStage1Visual returns empty result when no platforms requested", async () => { + const result = await runStage1Visual({ spec: "test" }); + assert.equal(result.ios, undefined); + assert.equal(result.android, undefined); }); test("discoverAndroidArtifact returns null for missing dir", async () => {