diff --git a/docs/LESSON.md b/docs/LESSON.md index 2956350..91b9579 100644 --- a/docs/LESSON.md +++ b/docs/LESSON.md @@ -23,6 +23,7 @@ ## 2026-05-20 +- **E2E CLI smoke should be self-contained and network-real, not command-only.** A stable pattern is: bootstrap a temp sandbox with `aqa init`, overwrite `.aqa/project.yaml` + `profiles.yaml` with schema-valid minimal config, create a local `packs/pack-local-smoke` scenario, boot a local HTTP server (`/healthz`), run `aqa run --profile smoke`, and assert `.aqa/runs//events.jsonl` + `findings.jsonl` exist. This catches orchestration regressions without external services and without adding root-level test dependencies. - **Hash-chain verifier and writer must share the exact same canonical body contract.** `EventChainWriter` hashes `sha256(prev_hash || canonical(rest_without_prev_hash_and_hash))` while persisted events expose `prev_hash: null` on the first record. A verifier that re-hashes including `prev_hash` (or expects the first record to carry the all-zero seed literal in `prev_hash`) will produce false mismatches on valid logs. Keep one canonical rule across writer and verifier, and treat the all-zero seed as internal hash input only. ## 2026-05-18 (v1.0 → v1.1 retrospective — patterns across the full 24-task roadmap) diff --git a/docs/PROGRESS.md b/docs/PROGRESS.md index 619ce8e..cfc2dba 100644 --- a/docs/PROGRESS.md +++ b/docs/PROGRESS.md @@ -11,6 +11,7 @@ ## 2026-05-20 +- **v1.8.2 slice closed — CLI smoke now runs a real HTTP end-to-end path.** `scripts/e2e-cli.mjs` no longer stops at version/help/doctor/validate only: it now boots a local HTTP `/healthz` target, seeds a schema-valid local smoke pack/profile, executes `aqa run --profile smoke` with the real HTTP probe runner, and asserts run artifacts are emitted under `.aqa/runs//` (`events.jsonl` non-empty, `findings.jsonl` present). This closes the old “CLI smoke is command-only” gap and makes CI catch integration regressions earlier. - **v1.8.1 slice closed — audit-chain canonical reconciliation.** Aligned `@aqa/compliance.verifyEventChain` with `@aqa/runner.EventChainWriter`: hash recomputation now excludes `prev_hash` from canonical body (matches writer), and first-record `prev_hash: null` is now treated as canonical instead of expecting all-zero literal in the field. Updated compliance tests and removed stale divergence note in `@aqa/kit` run smoke tests. - **v1.x docs closure in progress — README/docs refresh pass started.** Removed stale preview/stub wording from README, added the new **How you use it** section after the 7-word model, updated quick-start flow to the current shipped commands (including admin panel boot), and aligned `PACK-AUTHORING.md` with the real HTTP probe runner now shipped in v1.8 (`aqa run` uses `project.sut.base_url` for `http` probes). - **v1.7 slice 4j closed — AuditChainViewer autoload from live initial chain.** Removed the manual dependency on "Load good chain" for live audit data: `AuditChainViewer` now consumes `initialChain` reactively, resets verify state safely on incoming chain changes, and both Audit pages pass normalized `/api/audit` events via `initialChain`. Added e2e coverage proving `/api/audit` data auto-loads and verifies to `CHAIN OK` without demo-button interaction. diff --git a/scripts/e2e-cli.mjs b/scripts/e2e-cli.mjs index 2ce0370..7d59d31 100644 --- a/scripts/e2e-cli.mjs +++ b/scripts/e2e-cli.mjs @@ -1,23 +1,28 @@ #!/usr/bin/env node /** - * End-to-end smoke test for the `aqa` CLI against `examples/bun-api`. + * End-to-end smoke test for the `aqa` CLI in a self-contained local sandbox. * * What this exercises: * 1. `aqa --version` — binary is wired * 2. `aqa --help` — help text is reachable * 3. `aqa doctor` — project profiler runs without throwing - * 4. `aqa validate` — schema-validates the example's - * agentic-qa-kit.yaml - * - * What it does NOT do (deferred): - * - `aqa run` against a live target. The runner contract is exercised - * in `packages/runner/test/*`; spinning up the example app + LLM - * adapter inside CI is a Task 7 follow-up. + * 4. `aqa validate` — schema-validates generated .aqa/* + * 5. `aqa run --profile smoke` against a live local HTTP target + * + verifies run artifacts are written * * Wire into CI via `bun run test:e2e-cli` from the root package.json. */ import { spawnSync } from 'node:child_process'; -import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + readdirSync, + rmSync, + writeFileSync, +} from 'node:fs'; +import { createServer } from 'node:http'; import { tmpdir } from 'node:os'; import { dirname, join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; @@ -59,40 +64,226 @@ if ((initResult.status ?? -1) !== 0) { process.exit(1); } +// Overwrite the generated project/profile files with a minimal schema-valid +// configuration tailored for this smoke run. This avoids requiring YAML +// tooling in the root devDependencies. +const projectPath = join(SANDBOX, '.aqa', 'project.yaml'); +const profilesPath = join(SANDBOX, '.aqa', 'profiles.yaml'); +writeFileSync( + projectPath, + `schema_version: "1" +name: aqa-cli-e2e-fixture +stack: + runtime: node + framework: smoke-fixture + db: [] + package_manager: npm +sut: + type: api + base_url: http://127.0.0.1:0 +tags: [] +`, + 'utf8', +); +writeFileSync( + profilesPath, + `schema_version: "1" +profiles: + smoke: + schema_version: "1" + name: smoke + execution_mode: orchestrator + llm_usage: [] + llm_budget_usd: null + parallelism: 1 + require_deterministic_replay: false + packs: + - pack-local-smoke + tags: + - smoke +`, + 'utf8', +); + +// Add a local smoke pack with one HTTP scenario and wire the smoke profile +// to it. This keeps the e2e deterministic and independent from bundled pack +// evolution. +const packRoot = join(SANDBOX, 'packs', 'pack-local-smoke'); +mkdirSync(join(packRoot, 'scenarios'), { recursive: true }); +const packManifest = `schema_version: "1" +name: pack-local-smoke +version: 0.1.0 +description: local smoke fixture for e2e-cli +author: ci +license: MIT +applies_when: + sut_type: [api] +templates: [] +scenarios: + - scenarios/smoke-noop.yaml +risks: [] +oracles: [] +probes: [] +`; +const packScenario = `schema_version: "1" +id: scn-smoke-noop +title: local smoke GET /healthz returns 200 +risk_refs: [r-smoke] +invariant_refs: [inv-smoke] +preconditions: [] +steps: + - id: probe-noop + kind: http + with: { method: "GET", url: "/healthz" } +oracles: + - id: o-status-ok + kind: http_status + with: { expected: 200 } +tags: [smoke] +`; +writeFileSync(join(packRoot, 'pack.yaml'), packManifest, 'utf8'); +writeFileSync( + join(packRoot, 'package.json'), + JSON.stringify({ name: 'pack-local-smoke', version: '0.0.0', private: true }, null, 2), +); +writeFileSync(join(packRoot, 'scenarios', 'smoke-noop.yaml'), packScenario, 'utf8'); + const cases = [ { label: 'version', args: ['--version'], expectExit: 0, expectStdout: /\d+\.\d+/ }, { label: 'help', args: ['--help'], expectExit: 0, expectStdout: /Usage/i }, { label: 'doctor', args: ['doctor'], expectExit: 0, cwd: SANDBOX }, { label: 'validate', args: ['validate'], expectExit: 0, cwd: SANDBOX }, + { + label: 'run-smoke', + args: ['run', '--profile', 'smoke'], + expectExit: 0, + cwd: SANDBOX, + timeout: 90_000, + }, ]; +const app = createServer((req, res) => { + if (req.url === '/healthz') { + res.writeHead(200, { 'content-type': 'application/json' }); + res.end(JSON.stringify({ ok: true })); + return; + } + res.writeHead(404, { 'content-type': 'application/json' }); + res.end(JSON.stringify({ ok: false })); +}); + let failed = 0; -for (const c of cases) { - const result = spawnSync(process.execPath, [AQA_BIN, ...c.args], { - cwd: c.cwd ?? ROOT, - encoding: 'utf8', - timeout: 20_000, +try { + const boundPort = await new Promise((resolve, reject) => { + const onError = (error) => { + app.off('listening', onListening); + reject(error); + }; + const onListening = () => { + app.off('error', onError); + const address = app.address(); + if (!address || typeof address === 'string') { + reject(new Error('Unable to determine local server port for smoke run')); + return; + } + resolve(address.port); + }; + app.once('error', onError); + app.once('listening', onListening); + app.listen(0, '127.0.0.1'); }); - const expectedExits = Array.isArray(c.expectExit) ? c.expectExit : [c.expectExit]; - const exitOk = expectedExits.includes(result.status ?? -1); - const stdoutOk = c.expectStdout ? c.expectStdout.test(result.stdout ?? '') : true; - const ok = exitOk && stdoutOk; + writeFileSync( + projectPath, + `schema_version: "1" +name: aqa-cli-e2e-fixture +stack: + runtime: node + framework: smoke-fixture + db: [] + package_manager: npm +sut: + type: api + base_url: http://127.0.0.1:${boundPort} +tags: [] +`, + 'utf8', + ); + + for (const c of cases) { + const result = spawnSync(process.execPath, [AQA_BIN, ...c.args], { + cwd: c.cwd ?? ROOT, + encoding: 'utf8', + timeout: c.timeout ?? 20_000, + }); + + const expectedExits = Array.isArray(c.expectExit) ? c.expectExit : [c.expectExit]; + const exitOk = expectedExits.includes(result.status ?? -1); + const stdoutOk = c.expectStdout ? c.expectStdout.test(result.stdout ?? '') : true; + const ok = exitOk && stdoutOk; + + if (ok) { + console.log(`✓ ${c.label} (exit=${result.status})`); + } else { + failed += 1; + console.log(`✗ ${c.label} (exit=${result.status}, expected ${expectedExits.join('|')})`); + if (result.signal) console.log(` signal: ${result.signal} (likely timeout)`); + if (result.stdout) + console.log(' stdout:', result.stdout.split('\n').slice(0, 5).join('\n ')); + if (result.stderr) + console.log(' stderr:', result.stderr.split('\n').slice(0, 5).join('\n ')); + } + } - if (ok) { - console.log(`✓ ${c.label} (exit=${result.status})`); + // Verify `aqa run` produced run artifacts with a non-empty events chain. + const runsDir = join(SANDBOX, '.aqa', 'runs'); + if (failed === 0) { + if (!existsSync(runsDir)) { + failed += 1; + console.error('✗ run-smoke did not produce .aqa/runs'); + } else { + const runIds = readdirSync(runsDir).sort(); + if (runIds.length === 0) { + failed += 1; + console.error('✗ run-smoke did not produce any .aqa/runs/ directory'); + } else { + const latest = join(runsDir, runIds[runIds.length - 1]); + const eventsPath = join(latest, 'events.jsonl'); + const findingsPath = join(latest, 'findings.jsonl'); + if (!existsSync(eventsPath)) { + failed += 1; + console.error('✗ run-smoke did not produce events.jsonl'); + } else { + const eventsText = readFileSync(eventsPath, 'utf8').trim(); + if (!eventsText) { + failed += 1; + console.error('✗ run-smoke produced empty events.jsonl'); + } + } + if (!existsSync(findingsPath)) { + failed += 1; + console.error('✗ run-smoke did not produce findings.jsonl'); + } + } + } + } +} catch (error) { + failed += 1; + console.error('✗ failed to execute smoke run setup'); + if (error instanceof Error) { + console.error(` ${error.message}`); } else { - failed += 1; - console.log(`✗ ${c.label} (exit=${result.status}, expected ${expectedExits.join('|')})`); - if (result.stdout) - console.log(' stdout:', result.stdout.split('\n').slice(0, 5).join('\n ')); - if (result.stderr) - console.log(' stderr:', result.stderr.split('\n').slice(0, 5).join('\n ')); + console.error(` ${String(error)}`); + } +} finally { + try { + app.close(); + } catch { + // ignore close failures during cleanup } + rmSync(SANDBOX, { recursive: true, force: true }); } -rmSync(SANDBOX, { recursive: true, force: true }); - if (failed > 0) { console.error(`\n${failed} smoke check(s) failed.`); process.exit(1);