From b30dba295a2e96fcda9841aeca6725ebbea679a2 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 01:40:24 -0700 Subject: [PATCH 01/17] test(e2e): simplify legacy migration tracking --- test/e2e-scenario/docs/MIGRATION.md | 77 +- test/e2e-scenario/docs/README.md | 37 +- test/e2e-scenario/docs/RETIREMENT.md | 10 +- .../e2e-migration-inventory.test.ts | 236 +---- .../migration/legacy-inventory.json | 900 ------------------ 5 files changed, 84 insertions(+), 1176 deletions(-) delete mode 100644 test/e2e-scenario/migration/legacy-inventory.json diff --git a/test/e2e-scenario/docs/MIGRATION.md b/test/e2e-scenario/docs/MIGRATION.md index 167f74ea10..21408be2d9 100644 --- a/test/e2e-scenario/docs/MIGRATION.md +++ b/test/e2e-scenario/docs/MIGRATION.md @@ -1,16 +1,18 @@ -# E2E Scenario Migration Notes +# NemoClaw E2E Migration Notes -This file describes how to move coverage into the Vitest scenario framework -without confusing that work with the retired typed-shell scenario runner. -Changing status, ownership, and per-test decisions belong in GitHub issues and -PRs. +This file describes how to move coverage into the single Vitest E2E system +without confusing that work with the retired typed-shell scenario runner or a +second bash-driven harness. Vitest is the harness, GitHub Actions is the matrix, +and NemoClaw fixtures may invoke real subprocess and system boundaries when +those boundaries are the contract. Migration state is tracked outside the repository in GitHub issues and pull -requests. -Use GitHub issues and pull requests for status changes. +requests. Use GitHub issues and pull requests as the source of truth for status +changes, ownership, deletion evidence, and contract-preserving migration +decisions. ## Current State @@ -26,14 +28,16 @@ The scenario runner cutover is complete: - The typed-shell scenario runner, shell validation-suite tree, and retiring scenario workflows are removed. See `RETIREMENT.md`. -Direct legacy E2E scripts under `test/e2e/test-*.sh` remain in place. Many are -expected to stay because they test shell/install/user-flow behavior or preserve -umbrella integration smoke value. #5098 tracks family-by-family migration, -augmentation, and eventual deletion decisions for those scripts. +Direct legacy E2E scripts under `test/e2e/test-*.sh` remain in place until they +are migrated by contract. Some currently test shell, install, platform, process, +or full user-flow behavior. Preserve those real boundaries by invoking them from +Vitest tests and fixtures instead of keeping a separate durable E2E runner. +Issue #5098 tracks family-by-family migration, augmentation, and eventual +deletion decisions for those scripts. ## Target Architecture -The durable scenario framework has one execution path: +The durable E2E system has one execution path: - Vitest owns execution, filtering, reporters, timeouts, fixture lifecycle, skip handling, and CI integration. @@ -44,48 +48,39 @@ The durable scenario framework has one execution path: and supported combinations without becoming a second runner. - Product-facing manifests describe desired setup/onboarding state, not test execution logic. -- Shell scripts remain only for direct legacy E2Es or narrow system-boundary - probes where shell is the contract or lowest-risk adapter. +- Shell and system-boundary behavior should be exercised from Vitest when it is + the contract or lowest-risk adapter. -## Deletion Inventory +## Migration Governance -`test/e2e-scenario/migration/legacy-inventory.json` is a machine-readable -deletion gate. +The former `test/e2e-scenario/migration/legacy-inventory.json` ledger is removed +because it duplicated live GitHub issues and pull requests and quickly became a +stale source of truth. -It must cover: +The useful deletion invariant is smaller: -- every direct legacy shell entrypoint under `test/e2e/test-*.sh`; -- explicitly retained bridge entrypoints such as `test/e2e/brev-e2e.test.ts`; -- retired internal scenario-runner surfaces removed by the cutover. +> A PR that deletes a legacy E2E script must show the replacement Vitest +> coverage or explain the retirement rationale. -Status values: - -- `not-migrated`: legacy coverage has no equivalent typed scenario yet. -- `bridge-probe`: coverage is temporarily represented by a bridge path. -- `covered`: equivalent Vitest live scenario coverage exists. -- `retired`: maintainers agreed the legacy surface is no longer required. - -Do not set `deletionReady: true` on a direct legacy script unless the record is -`covered` or `retired` and the approval issue records the deletion rationale. -The retired internal scenario-runner surfaces are already marked through #5098; -that does not imply direct legacy bash scripts are deletion-ready. +Record that evidence in the PR body and linked issue. The evidence should name +the legacy contract, the replacement Vitest coverage, any intentionally retired +behavior, and the verification that preserves fidelity. ## Migration Pattern When moving behavior from a legacy E2E script: -1. Identify the test family and policy from #5098: KEEP_BASH, HYBRID, or - MIGRATE_TYPED. +1. Identify the actual contract: CLI behavior, installer behavior, full user + journey, process boundary, platform boundary, or another observable behavior. 2. Add or update manifests only when product setup/onboarding state changes. 3. Add typed scenario registry coverage when the live matrix needs a stable scenario ID. -4. Add fixture helpers before copying shell logic. -5. For HYBRID tests, keep the bash test and add a focused typed peer for the - contract being strengthened. -6. For MIGRATE_TYPED tests, prove parity first, then mark the inventory row - covered before any deletion PR. -7. Leave umbrella KEEP_BASH tests in place unless the tracking issue explicitly - revises their classification. +4. Add only the fixture or helper needed for the migration. +5. Preserve real boundaries. Use `bash`, login shells, `/proc`, process + signals, `sudo`, Docker host state, installer scripts, or full journey flows + from Vitest when they are the behavior being tested. +6. Prove equivalence in the PR, then delete the bash harness when the Vitest + test preserves the same value. ## Useful Commands diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index 68a750dc1e..5c9c4ded56 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -1,13 +1,14 @@ -# NemoClaw E2E Scenario Framework +# NemoClaw E2E Vitest Fixtures -NemoClaw scenario E2E now uses **Vitest as the scenario execution runner**. -Vitest owns discovery, filtering, timeouts, reporters, fixture lifecycle, -skips, and CI integration. NemoClaw owns the domain layer: scenario metadata, -phase fixtures, product clients, evidence artifacts, redaction, cleanup, -expected-state probes, and typed assertion helpers. +NemoClaw E2E now has one target execution model: **Vitest as the harness** and +GitHub Actions as the matrix. Vitest owns discovery, filtering, timeouts, +reporters, fixture lifecycle, skips, and CI integration. NemoClaw owns the +domain layer: scenario metadata, phase fixtures, product clients, evidence +artifacts, redaction, cleanup, expected-state probes, and typed assertion +helpers. The retired typed-shell scenario runner is documented in [`RETIREMENT.md`](./RETIREMENT.md). Do not add new durable behavior to the old @@ -15,8 +16,7 @@ YAML/bash scenario-runner shape. Direct legacy E2E scripts under `test/e2e/test-*.sh` still provide most live nightly and platform coverage. Those scripts are not deleted by the scenario -runner cutover; migrate or augment them family by family using the inventory -rules in `MIGRATION.md`. +runner cutover; migrate them by contract using the rules in `MIGRATION.md`. ## Sources Of Truth @@ -29,7 +29,7 @@ rules in `MIGRATION.md`. | Expected-state probes | `test/e2e-scenario/scenarios/expected-states.ts` | | Product-facing setup/onboarding state | `test/e2e-scenario/manifests/*.yaml` | | Legacy direct E2E coverage | `test/e2e/test-*.sh` and their workflows | -| Deletion guard inventory | `test/e2e-scenario/migration/legacy-inventory.json` | +| Migration status and deletion evidence | GitHub issues and pull requests | ## Scenario Model @@ -87,7 +87,6 @@ test/e2e-scenario/ framework-tests/ # Fast framework and metadata tests live/ # Opt-in live Vitest scenario tests manifests/ # Product-facing NemoClawInstance desired state - migration/ # Machine-readable deletion guard inventory scenarios/ # Typed registry, matrix helpers, expected states ``` @@ -105,7 +104,9 @@ test/e2e-scenario/ ## Migration Tracking -Migration status is tracked outside the repository. +Migration status is tracked outside the repository. GitHub issues and pull +requests are the source of truth for script-by-script state, ownership, deletion +evidence, replacement Vitest coverage, and retirement rationale. GitHub issues and PRs own changing migration status. The key issues are: @@ -114,11 +115,11 @@ GitHub issues and PRs own changing migration status. The key issues are: - #4990: phase fixtures and registry-driven live discovery - #5098: direct legacy bash-suite migration epic -The repo-local inventory at -`test/e2e-scenario/migration/legacy-inventory.json` is a deletion gate, not a -progress dashboard. It prevents accidental deletion of direct legacy E2E -scripts and records the retired internal typed-shell runner surfaces. +The former repo-local `legacy-inventory.json` ledger is removed because it +duplicated live GitHub state and drifted quickly. A PR that deletes a legacy E2E +script must show the replacement Vitest coverage or explain the retirement +rationale in the PR body and linked issue. -Prefer new scenario coverage in Vitest fixtures unless shell itself is the -contract or an existing legacy umbrella test is intentionally kept for -end-to-end install/user-flow fidelity. +Prefer new E2E coverage in Vitest fixtures. When shell, installer, process, +platform, or full user-flow behavior is the contract, invoke that real boundary +from Vitest rather than preserving a second durable runner. diff --git a/test/e2e-scenario/docs/RETIREMENT.md b/test/e2e-scenario/docs/RETIREMENT.md index 6db7bc94ab..03e55d1743 100644 --- a/test/e2e-scenario/docs/RETIREMENT.md +++ b/test/e2e-scenario/docs/RETIREMENT.md @@ -51,14 +51,14 @@ and artifact shape operators needed from the retired workflows: ## What Was Not Removed Direct legacy E2E scripts under `test/e2e/test-*.sh` remain in place. Those -scripts are governed by #5098 and -`test/e2e-scenario/migration/legacy-inventory.json`. They should be migrated, -augmented, or kept by family according to their KEEP_BASH, HYBRID, or -MIGRATE_TYPED classification. +scripts are governed by #5098 and live GitHub issues and pull requests. They +should be migrated by contract into the single Vitest E2E system. A PR that +deletes a legacy E2E script must show the replacement Vitest coverage or explain +the retirement rationale in the PR body and linked issue. That includes the security and messaging contracts that the deleted typed-shell validation suites used to mirror. Until #5098 migrates those families into -Vitest scenario fixtures, the active source of truth remains: +Vitest fixtures, the active source of truth remains: - `test/e2e/test-credential-sanitization.sh` and `test/e2e/test-credential-migration.sh` for credential leak prevention and diff --git a/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts b/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts index b4b9e0b9cc..226aabaf0b 100644 --- a/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts @@ -6,227 +6,39 @@ import path from "node:path"; import { describe, expect, it } from "vitest"; -const INVENTORY_PATH = path.resolve(import.meta.dirname, "../migration/legacy-inventory.json"); const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); -const LEGACY_E2E_DIR = path.join(REPO_ROOT, "test/e2e"); -const EXPECTED_STATUS_VALUES = ["not-migrated", "bridge-probe", "covered", "retired"] as const; -const INTERNAL_SURFACE_ROOTS = [ - "test/e2e-scenario/nemoclaw_scenarios", - "test/e2e-scenario/onboarding_assertions", - "test/e2e-scenario/runtime/lib", - "test/e2e-scenario/runtime/reports", - "test/e2e-scenario/scenarios/orchestrators", - "test/e2e-scenario/validation_suites", -] as const; +const SCENARIO_SUITE_DIR = path.join(REPO_ROOT, "test/e2e-scenario"); +const MIGRATION_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "MIGRATION.md"); +const README_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "README.md"); +const RETIREMENT_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "RETIREMENT.md"); +const LEGACY_INVENTORY = path.join(SCENARIO_SUITE_DIR, "migration", "legacy-inventory.json"); -type MigrationStatus = "not-migrated" | "bridge-probe" | "covered" | "retired"; - -interface LegacyInventoryEntry { - legacyScript: string; - domain: string; - ownerIssue: string; - status: MigrationStatus; - targetVitestScenarios: string[]; - bridgeProbes: string[]; - retiredReason: string; - deletionReady: boolean; - deletionApprovalIssue?: string; - notes: string; -} - -interface LegacyInternalSurface { - id: string; - paths: string[]; - domain: string; - ownerIssue: string; - status: MigrationStatus; - replacementSurface: string; - targetVitestScenarios: string[]; - bridgeProbes: string[]; - retiredReason: string; - deletionReady: boolean; - deletionApprovalIssue?: string; - notes: string; -} - -interface LegacyInventory { - version: number; - statusValues: MigrationStatus[]; - deletionReadiness: { - requires: string[]; - }; - entries: LegacyInventoryEntry[]; - internalSurfaces: LegacyInternalSurface[]; -} - -function loadInventory(): LegacyInventory { - return JSON.parse(fs.readFileSync(INVENTORY_PATH, "utf8")) as LegacyInventory; -} - -function repoPathExists(repoRelativePath: string): boolean { - expect(path.isAbsolute(repoRelativePath)).toBe(false); - expect(repoRelativePath).not.toContain(".."); - - return fs.existsSync(path.join(REPO_ROOT, repoRelativePath)); -} - -function listLegacyShellEntrypoints(): string[] { - return fs - .readdirSync(LEGACY_E2E_DIR) - .filter((name) => /^test-.*\.sh$/.test(name)) - .map((name) => `test/e2e/${name}`) - .sort(); -} - -function listRepoFilesUnder(repoRelativeDir: string): string[] { - const absoluteDir = path.join(REPO_ROOT, repoRelativeDir); - const files: string[] = []; - const visit = (dir: string) => { - for (const dirent of fs.readdirSync(dir, { withFileTypes: true })) { - const absolutePath = path.join(dir, dirent.name); - if (dirent.isDirectory()) { - visit(absolutePath); - } else if (dirent.isFile()) { - files.push(path.relative(REPO_ROOT, absolutePath).split(path.sep).join("/")); - } - } - }; - visit(absoluteDir); - return files.sort(); -} - -function isCoveredByInventoryPath(filePath: string, inventoryPath: string): boolean { - return filePath === inventoryPath || filePath.startsWith(`${inventoryPath}/`); +function read(filePath: string): string { + return fs.readFileSync(filePath, "utf8"); } -function expectPathListIsRepoRelative(paths: readonly string[], options = { mustExist: true }) { - expect(paths.length).toBeGreaterThan(0); - for (const repoRelativePath of paths) { - expect(repoRelativePath).not.toBe(""); - if (options.mustExist) { - expect(repoPathExists(repoRelativePath)).toBe(true); - } - } -} - -function expectMigrationRecordDeletionGate( - record: Pick< - LegacyInventoryEntry | LegacyInternalSurface, - | "status" - | "targetVitestScenarios" - | "bridgeProbes" - | "retiredReason" - | "deletionReady" - | "deletionApprovalIssue" - >, -) { - if (record.status === "covered") { - expect(record.targetVitestScenarios.length).toBeGreaterThan(0); - for (const scenario of record.targetVitestScenarios) { - expect(scenario).toMatch(/^test\/e2e-scenario\/live\/.+\.test\.ts$/); - expect(repoPathExists(scenario)).toBe(true); - } - } - - if (record.status === "bridge-probe") { - expect(record.bridgeProbes.length).toBeGreaterThan(0); - for (const probe of record.bridgeProbes) { - expect(repoPathExists(probe)).toBe(true); - } - } - - if (record.status === "retired") { - expect(record.retiredReason).not.toBe(""); - } - - if (record.deletionReady) { - expect(["covered", "retired"]).toContain(record.status); - expect(["#4357", "#5098"]).toContain(record.deletionApprovalIssue); - expect( - record.status === "retired" ? record.retiredReason : record.targetVitestScenarios.length, - ).toBeTruthy(); - } -} - -describe("E2E migration inventory deletion gates", () => { - it("uses a constrained migration vocabulary with owning issues", () => { - const inventory = loadInventory(); - const statuses = new Set(inventory.statusValues); - const legacyScripts = new Set(); - const internalSurfaceIds = new Set(); - - expect(inventory.version).toBe(1); - expect(inventory.statusValues).toEqual([...EXPECTED_STATUS_VALUES]); - expect(inventory.deletionReadiness.requires.length).toBeGreaterThan(0); - expect(inventory.entries.length).toBeGreaterThan(0); - expect(inventory.internalSurfaces.length).toBeGreaterThan(0); - - for (const entry of inventory.entries) { - expect(statuses.has(entry.status)).toBe(true); - expect(entry.legacyScript).not.toBe(""); - expect(repoPathExists(entry.legacyScript)).toBe(true); - expect(legacyScripts.has(entry.legacyScript)).toBe(false); - legacyScripts.add(entry.legacyScript); - expect(entry.domain).not.toBe(""); - expect(entry.ownerIssue).toMatch(/^#(?:3588|434[7-9]|435[0-7]|4941)$/); - expect(entry.notes).not.toBe(""); - } - - for (const surface of inventory.internalSurfaces) { - expect(statuses.has(surface.status)).toBe(true); - expect(surface.id).toMatch(/^[a-z0-9-]+$/); - expect(internalSurfaceIds.has(surface.id)).toBe(false); - internalSurfaceIds.add(surface.id); - expectPathListIsRepoRelative(surface.paths, { mustExist: surface.status !== "retired" }); - expect(surface.domain).not.toBe(""); - expect(surface.ownerIssue).toMatch(/^#(?:3588|434[7-9]|435[0-7]|4941|5098)$/); - expect(surface.replacementSurface).not.toBe(""); - expect(surface.notes).not.toBe(""); - } +describe("E2E migration tracking policy", () => { + it("does not use a repo-local JSON ledger as durable migration state", () => { + expect(fs.existsSync(LEGACY_INVENTORY)).toBe(false); }); - it("covers every current direct legacy shell entrypoint", () => { - const inventory = loadInventory(); - const inventoriedShellScripts = inventory.entries - .map((entry) => entry.legacyScript) - .filter((legacyScript) => /^test\/e2e\/test-.+\.sh$/.test(legacyScript)) - .sort(); + it("documents GitHub issues and PRs as the migration source of truth", () => { + const docs = [MIGRATION_DOC, README_DOC, RETIREMENT_DOC].map(read).join("\n"); - expect(inventoriedShellScripts).toEqual(listLegacyShellEntrypoints()); + expect(docs).toContain("GitHub issues and pull requests"); + expect(docs).toContain("source of truth"); + expect(docs).toContain("replacement Vitest coverage"); + expect(docs).toContain("retirement rationale"); }); - it("covers legacy scenario runner internal surfaces by path", () => { - const inventory = loadInventory(); - const surfacePaths = inventory.internalSurfaces.flatMap((surface) => surface.paths); - - for (const root of INTERNAL_SURFACE_ROOTS) { - if (!repoPathExists(root)) { - const retiredSurface = inventory.internalSurfaces.find((surface) => - surface.paths.some((surfacePath) => isCoveredByInventoryPath(root, surfacePath)), - ); - expect(retiredSurface?.status).toBe("retired"); - expect(retiredSurface?.retiredReason).not.toBe(""); - continue; - } - - const files = listRepoFilesUnder(root); - for (const file of files) { - expect( - surfacePaths.some((surfacePath) => isCoveredByInventoryPath(file, surfacePath)), - ).toBe(true); - } - } - }); - - it("requires coverage, retirement evidence, and #4357 approval before deletion", () => { - const inventory = loadInventory(); - - for (const entry of inventory.entries) { - expectMigrationRecordDeletionGate(entry); - } + it("keeps durable taxonomy out of the repo-local migration docs", () => { + const docs = [MIGRATION_DOC, README_DOC, RETIREMENT_DOC].map(read).join("\n"); - for (const surface of inventory.internalSurfaces) { - expectMigrationRecordDeletionGate(surface); - } + expect(docs).not.toMatch(/\bKEEP_BASH\b/); + expect(docs).not.toMatch(/\bHYBRID\b/); + expect(docs).not.toMatch(/\bMIGRATE_TYPED\b/); + expect(docs).not.toMatch(/\bnot-migrated\b/); + expect(docs).not.toMatch(/\bbridge-probe\b/); + expect(docs).not.toMatch(/\bdeletionReady\b/); }); }); diff --git a/test/e2e-scenario/migration/legacy-inventory.json b/test/e2e-scenario/migration/legacy-inventory.json deleted file mode 100644 index e66981d726..0000000000 --- a/test/e2e-scenario/migration/legacy-inventory.json +++ /dev/null @@ -1,900 +0,0 @@ -{ - "$comment": "SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.\nSPDX-License-Identifier: Apache-2.0", - "version": 1, - "statusValues": ["not-migrated", "bridge-probe", "covered", "retired"], - "deletionReadiness": { - "rule": "A legacy E2E entrypoint is deletion-ready only after equivalent Vitest scenario coverage exists, or after #4357 records an explicit retirement decision.", - "requires": [ - "same relevant CI lane or approved replacement lane", - "required secrets, skips, and runner requirements preserved", - "artifact and failure evidence at least as useful as the legacy script", - "migration status set to covered or retired", - "approval recorded through #4357" - ] - }, - "entries": [ - { - "legacyScript": "test/e2e/test-full-e2e.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Do not delete until onboarding, gateway, sandbox, and inference smoke behavior has equivalent Vitest coverage." - }, - { - "legacyScript": "test/e2e/test-cloud-onboard-e2e.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Expected to migrate with the smoke/onboarding family after repo-local CLI smoke." - }, - { - "legacyScript": "test/e2e/test-inference-routing.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Provider routing and inference.local checks need typed inference/provider fixtures before deletion." - }, - { - "legacyScript": "test/e2e/test-openclaw-inference-switch.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Provider switch behavior should migrate with the inference fixture family." - }, - { - "legacyScript": "test/e2e/test-messaging-providers.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Split into Telegram, Discord, Slack, fake-provider, and token-rotation Vitest scenarios before deleting." - }, - { - "legacyScript": "test/e2e/test-whatsapp-qr-compact-e2e.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Hermetic WhatsApp pairing-QR size guard (NemoClaw#4522); migrate alongside the messaging-provider scenarios once a Vitest harness can drive the openclaw renderQrTerminal renderer." - }, - { - "legacyScript": "test/e2e/test-token-rotation.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Token rotation should migrate after provider fixtures can isolate per-provider state." - }, - { - "legacyScript": "test/e2e/test-credential-sanitization.sh", - "domain": "security", - "ownerIssue": "#4352", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Credential redaction and sandbox-visible secret checks should migrate with the security fixture family." - }, - { - "legacyScript": "test/e2e/test-network-policy.sh", - "domain": "security-policy", - "ownerIssue": "#4352", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Network policy checks need typed policy fixtures and preserved failure evidence." - }, - { - "legacyScript": "test/e2e/test-sandbox-survival.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Sandbox survival should migrate with lifecycle fixtures and stronger cleanup evidence." - }, - { - "legacyScript": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4356", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Crash-loop recovery remains a late migration because it exercises failure timing and recovery classification." - }, - { - "legacyScript": "test/e2e/test-agent-turn-latency-e2e.sh", - "domain": "support-diagnostics", - "ownerIssue": "#4347", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-brave-search-e2e.sh", - "domain": "security-policy", - "ownerIssue": "#4352", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-channels-add-remove.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-channels-stop-start.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-cloud-inference-e2e.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-common-egress-agent-e2e.sh", - "domain": "security-policy", - "ownerIssue": "#4352", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-concurrent-gateway-ports.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-credential-migration.sh", - "domain": "security-policy", - "ownerIssue": "#4352", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-dashboard-remote-bind.sh", - "domain": "support-diagnostics", - "ownerIssue": "#4347", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-device-auth-health.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-diagnostics.sh", - "domain": "support-diagnostics", - "ownerIssue": "#4347", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-docs-validation.sh", - "domain": "support-diagnostics", - "ownerIssue": "#4347", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-double-onboard.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-gateway-drift-preflight.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-gateway-health-honest.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-gpu-double-onboard.sh", - "domain": "platform", - "ownerIssue": "#4354", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-gpu-e2e.sh", - "domain": "platform", - "ownerIssue": "#4354", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-jetson-nvmap-gpu.sh", - "domain": "platform", - "ownerIssue": "#4354", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Jetson Orin /dev/nvmap CUDA usability + status reporter-workflow E2E for #4231; Jetson-gated. Classify coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-hermes-discord-e2e.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-hermes-e2e.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-hermes-inference-switch.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-hermes-root-entrypoint-smoke.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-hermes-sandbox-secret-boundary.sh", - "domain": "security-policy", - "ownerIssue": "#4352", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-hermes-slack-e2e.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-issue-4434-tui-unreachable-inference.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-issue-4462-scope-upgrade-approval.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-kimi-inference-compat.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-launchable-smoke.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-messaging-compatible-endpoint.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-model-router-provider-routed-inference.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-onboard-inference-smoke.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-onboard-negative-paths.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-onboard-repair.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-onboard-resume.sh", - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-openclaw-discord-pairing.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-openclaw-skill-cli-e2e.sh", - "domain": "support-diagnostics", - "ownerIssue": "#4347", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-openclaw-slack-pairing.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-openclaw-tui-chat-correlation.sh", - "domain": "support-diagnostics", - "ownerIssue": "#4347", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-openshell-gateway-upgrade.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-openshell-version-pin.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "covered", - "targetVitestScenarios": ["test/e2e-scenario/live/openshell-version-pin.test.ts"], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Covered by free-standing live test (PR #5107). Hermetic installer-script behavioral test — stubs PATH binaries, runs scripts/install-openshell.sh, asserts the four [PASS] outcomes from the legacy bash guard. Not registry-driven; dispatched as a discrete openshell-version-pin-vitest job in e2e-vitest-scenarios.yaml. Bash guard retained in regression-e2e.yaml; deletion is a follow-up PR with #4357 approval.", - "frozenAtSha": "3d9c01931f598cf2450bd88028cb3b44bcf367b0", - "convergenceEvidence": { - "redRunUrl": null, - "greenRunUrlA": "https://github.com/NVIDIA/NemoClaw/actions/runs/27252871283/job/80480862915", - "greenRunUrlB": "https://github.com/NVIDIA/NemoClaw/actions/runs/27253022534/job/80481579156", - "totalPasses": 2, - "assertionsResolvedInOrder": [ - "installer-exits-zero", - "download-log-contains-v0.0.44", - "download-log-excludes-v0.0.45", - "replaced-openshell-reports-0.0.44" - ], - "convergedAtSha": "3d9c01931f598cf2450bd88028cb3b44bcf367b0", - "firstDispatchPassed": true, - "firstDispatchPassNote": "Framework already covered the regression target; legacy script was duplicative beyond the entry-point assertions. Post-pivot verification dispatch confirmed reproducibility on the correct architectural home (test/e2e-scenario/live/)." - } - }, - { - "legacyScript": "test/e2e/test-overlayfs-autofix.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-rebuild-hermes.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-rebuild-openclaw.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-runtime-overrides.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-sandbox-operations.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-sandbox-rebuild.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-sessions-agents-cli.sh", - "domain": "support-diagnostics", - "ownerIssue": "#4347", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-shields-config.sh", - "domain": "security-policy", - "ownerIssue": "#4352", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-skill-agent-e2e.sh", - "domain": "support-diagnostics", - "ownerIssue": "#4347", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-snapshot-commands.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-spark-install.sh", - "domain": "platform", - "ownerIssue": "#4354", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-state-backup-restore.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-strict-tool-call-probe.sh", - "domain": "inference", - "ownerIssue": "#4349", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-telegram-injection.sh", - "domain": "messaging", - "ownerIssue": "#4351", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-tunnel-lifecycle.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-upgrade-stale-sandbox.sh", - "domain": "sandbox-lifecycle", - "ownerIssue": "#4355", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/test-vm-driver-privileged-exec-routing.sh", - "domain": "platform", - "ownerIssue": "#4354", - "status": "not-migrated", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "", - "deletionReady": false, - "notes": "Initial completeness row; classify detailed coverage and deletion evidence in the owning migration issue before deleting." - }, - { - "legacyScript": "test/e2e/brev-e2e.test.ts", - "domain": "platform-brev", - "ownerIssue": "#4354", - "status": "bridge-probe", - "targetVitestScenarios": [], - "bridgeProbes": ["test/e2e/brev-e2e.test.ts"], - "retiredReason": "", - "deletionReady": false, - "notes": "Already uses Vitest, but still dispatches legacy remote shell suites; keep as a bridge until remote execution uses shared fixtures." - } - ], - "internalSurfaces": [ - { - "id": "typed-shell-orchestrators", - "paths": ["test/e2e-scenario/scenarios/orchestrators"], - "domain": "scenario-runner", - "ownerIssue": "#4357", - "status": "retired", - "replacementSurface": "test/e2e-scenario/framework/phases", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "Deleted by #5106 after the Vitest scenario workflow gained matrix, run-plan, per-phase result, summary, and artifact parity.", - "deletionReady": true, - "deletionApprovalIssue": "#5098", - "notes": "Registry-driven Vitest execution now owns the surviving phase fixtures; the custom typed-shell orchestrator path is retired." - }, - { - "id": "legacy-bash-scenario-workers", - "paths": ["test/e2e-scenario/nemoclaw_scenarios"], - "domain": "scenario-runner", - "ownerIssue": "#4357", - "status": "retired", - "replacementSurface": "test/e2e-scenario/framework/phases", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "Deleted by #5106 with the typed-shell runner cutover; surviving live setup runs through Vitest fixtures and direct legacy bash tests remain separately inventoried.", - "deletionReady": true, - "deletionApprovalIssue": "#5098", - "notes": "These bridge workers belonged to the retired scenario runner, not to the direct legacy test/e2e/test-*.sh suite." - }, - { - "id": "legacy-onboarding-assertion-workers", - "paths": ["test/e2e-scenario/onboarding_assertions"], - "domain": "smoke-onboarding", - "ownerIssue": "#4348", - "status": "retired", - "replacementSurface": "test/e2e-scenario/framework/phases/onboarding.ts", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "Deleted by #5106 because onboarding phase fixtures now emit onboarding.result.json and no longer dispatch shell assertion shims.", - "deletionReady": true, - "deletionApprovalIssue": "#5098", - "notes": "Onboarding readiness and negative preflight evidence now live in the Vitest phase fixture layer." - }, - { - "id": "legacy-validation-suites", - "paths": ["test/e2e-scenario/validation_suites"], - "domain": "runtime-suites", - "ownerIssue": "#4357", - "status": "retired", - "replacementSurface": "test/e2e-scenario/framework/phases", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "Deleted by #5106 with the scenario-runner cutover; suiteIds remain registry metadata while family migrations add typed Vitest coverage or keep direct bash tests.", - "deletionReady": true, - "deletionApprovalIssue": "#5098", - "notes": "This shell suite tree was only executed by the retired typed-shell scenario runner. Direct legacy bash tests remain governed by entries above." - }, - { - "id": "legacy-runtime-helper-libraries", - "paths": [ - "test/e2e-scenario/runtime/lib", - "test/e2e-scenario/runtime/reports" - ], - "domain": "scenario-runner", - "ownerIssue": "#4357", - "status": "retired", - "replacementSurface": "test/e2e-scenario/framework", - "targetVitestScenarios": [], - "bridgeProbes": [], - "retiredReason": "Deleted by #5106 because the bridge shell workers and report renderer were removed; artifact writing is owned by the Vitest fixture layer.", - "deletionReady": true, - "deletionApprovalIssue": "#5098", - "notes": "Redaction, command evidence, cleanup, phase results, and workflow summaries now live under test/e2e-scenario/framework and e2e-vitest-scenarios.yaml." - } - ] -} From e246d20d66318e85443f46b0376e74841574a53e Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 02:26:22 -0700 Subject: [PATCH 02/17] test(e2e): retire orphaned assertion inventory --- test/e2e-scenario/docs/MIGRATION.md | 6 +- test/e2e-scenario/docs/README.md | 9 +- .../e2e-migration-inventory.test.ts | 9 + test/e2e/docs/parity-inventory.generated.json | 17092 ---------------- 4 files changed, 17 insertions(+), 17099 deletions(-) delete mode 100644 test/e2e/docs/parity-inventory.generated.json diff --git a/test/e2e-scenario/docs/MIGRATION.md b/test/e2e-scenario/docs/MIGRATION.md index 21408be2d9..7f7bcd8032 100644 --- a/test/e2e-scenario/docs/MIGRATION.md +++ b/test/e2e-scenario/docs/MIGRATION.md @@ -53,9 +53,9 @@ The durable E2E system has one execution path: ## Migration Governance -The former `test/e2e-scenario/migration/legacy-inventory.json` ledger is removed -because it duplicated live GitHub issues and pull requests and quickly became a -stale source of truth. +The former `test/e2e-scenario/migration/legacy-inventory.json` ledger and +generated legacy assertion inventories are removed because they duplicated live +GitHub issues and pull requests and quickly became stale sources of truth. The useful deletion invariant is smaller: diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index 5c9c4ded56..27d961a154 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -115,10 +115,11 @@ GitHub issues and PRs own changing migration status. The key issues are: - #4990: phase fixtures and registry-driven live discovery - #5098: direct legacy bash-suite migration epic -The former repo-local `legacy-inventory.json` ledger is removed because it -duplicated live GitHub state and drifted quickly. A PR that deletes a legacy E2E -script must show the replacement Vitest coverage or explain the retirement -rationale in the PR body and linked issue. +The former repo-local `legacy-inventory.json` ledger and generated legacy +assertion inventories are removed because they duplicated live GitHub state and +drifted quickly. A PR that deletes a legacy E2E script must show the replacement +Vitest coverage or explain the retirement rationale in the PR body and linked +issue. Prefer new E2E coverage in Vitest fixtures. When shell, installer, process, platform, or full user-flow behavior is the contract, invoke that real boundary diff --git a/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts b/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts index 226aabaf0b..f10b1a244d 100644 --- a/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts @@ -12,6 +12,13 @@ const MIGRATION_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "MIGRATION.md"); const README_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "README.md"); const RETIREMENT_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "RETIREMENT.md"); const LEGACY_INVENTORY = path.join(SCENARIO_SUITE_DIR, "migration", "legacy-inventory.json"); +const LEGACY_ASSERTION_INVENTORY = path.join( + REPO_ROOT, + "test", + "e2e", + "docs", + "parity-inventory.generated.json", +); function read(filePath: string): string { return fs.readFileSync(filePath, "utf8"); @@ -20,6 +27,7 @@ function read(filePath: string): string { describe("E2E migration tracking policy", () => { it("does not use a repo-local JSON ledger as durable migration state", () => { expect(fs.existsSync(LEGACY_INVENTORY)).toBe(false); + expect(fs.existsSync(LEGACY_ASSERTION_INVENTORY)).toBe(false); }); it("documents GitHub issues and PRs as the migration source of truth", () => { @@ -29,6 +37,7 @@ describe("E2E migration tracking policy", () => { expect(docs).toContain("source of truth"); expect(docs).toContain("replacement Vitest coverage"); expect(docs).toContain("retirement rationale"); + expect(docs).toContain("generated legacy assertion inventories"); }); it("keeps durable taxonomy out of the repo-local migration docs", () => { diff --git a/test/e2e/docs/parity-inventory.generated.json b/test/e2e/docs/parity-inventory.generated.json deleted file mode 100644 index f42dff2ee9..0000000000 --- a/test/e2e/docs/parity-inventory.generated.json +++ /dev/null @@ -1,17092 +0,0 @@ -{ - "generated_by": "scripts/e2e/extract-legacy-assertions.ts", - "entrypoints": [ - { - "script": "test/e2e/brev-e2e.test.ts", - "assertions": [], - "zero_assertion_review": { - "reason": "TODO: review legacy entrypoint for assertions not expressed as PASS/FAIL output" - } - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "assertions": [ - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 245, - "text": "B0: sudo is required to edit /etc/hosts for Bedrock hostname mapping", - "polarity": "fail", - "normalized_id": "b0.sudo.is.required.to.edit.etc.hosts.for.bedrock.hostname.mapping", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 249, - "text": "B0: passwordless sudo is required to edit /etc/hosts for Bedrock hostname mapping", - "polarity": "fail", - "normalized_id": "b0.passwordless.sudo.is.required.to.edit.etc.hosts.for.bedrock.hostname.mapping", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 263, - "text": "B0: Bedrock Runtime hostname maps to localhost", - "polarity": "pass", - "normalized_id": "b0.bedrock.runtime.hostname.maps.to.localhost", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 265, - "text": "B0: Bedrock Runtime hostname did not resolve to localhost after hosts edit", - "polarity": "fail", - "normalized_id": "b0.bedrock.runtime.hostname.did.not.resolve.to.localhost.after.hosts.edit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 442, - "text": "B1: onboard completed for Bedrock Runtime compatible Anthropic endpoint", - "polarity": "pass", - "normalized_id": "b1.onboard.completed.for.bedrock.runtime.compatible.anthropic.endpoint", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 444, - "text": "B1: onboard failed for Bedrock Runtime compatible Anthropic endpoint", - "polarity": "fail", - "normalized_id": "b1.onboard.failed.for.bedrock.runtime.compatible.anthropic.endpoint", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 509, - "text": "B2: onboard state keeps provider identity as compatible-anthropic-endpoint", - "polarity": "pass", - "normalized_id": "b2.onboard.state.keeps.provider.identity.as.compatible.anthropic.endpoint", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 511, - "text": "B2: onboard state did not preserve compatible-anthropic-endpoint identity: ${probe:0:500}", - "polarity": "fail", - "normalized_id": "b2.onboard.state.did.not.preserve.compatible.anthropic.endpoint.identity.probe.0.500", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 519, - "text": "B3: Bedrock Runtime adapter health endpoint failed", - "polarity": "fail", - "normalized_id": "b3.bedrock.runtime.adapter.health.endpoint.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 541, - "text": "B3: Bedrock Runtime adapter health reports fake endpoint and us-east-1", - "polarity": "pass", - "normalized_id": "b3.bedrock.runtime.adapter.health.reports.fake.endpoint.and.us.east.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 543, - "text": "B3: Bedrock Runtime adapter health payload was not the expected fake endpoint", - "polarity": "fail", - "normalized_id": "b3.bedrock.runtime.adapter.health.payload.was.not.the.expected.fake.endpoint", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 550, - "text": "B4: openshell inference get failed: ${route:0:300}", - "polarity": "fail", - "normalized_id": "b4.openshell.inference.get.failed.route.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 556, - "text": "B4: OpenShell route points at compatible-anthropic-endpoint", - "polarity": "pass", - "normalized_id": "b4.openshell.route.points.at.compatible.anthropic.endpoint", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 558, - "text": "B4: OpenShell route did not point at compatible-anthropic-endpoint: ${plain_route:0:400}", - "polarity": "fail", - "normalized_id": "b4.openshell.route.did.not.point.at.compatible.anthropic.endpoint.plain.route.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 563, - "text": "B5: OpenShell provider registry contains compatible-anthropic-endpoint", - "polarity": "pass", - "normalized_id": "b5.openshell.provider.registry.contains.compatible.anthropic.endpoint", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 565, - "text": "B5: OpenShell provider registry did not expose compatible-anthropic-endpoint", - "polarity": "fail", - "normalized_id": "b5.openshell.provider.registry.did.not.expose.compatible.anthropic.endpoint", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 610, - "text": "B6: OpenClaw config uses only managed inference.local provider", - "polarity": "pass", - "normalized_id": "b6.openclaw.config.uses.only.managed.inference.local.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 612, - "text": "B6: OpenClaw config did not use the expected inference.local provider shape", - "polarity": "fail", - "normalized_id": "b6.openclaw.config.did.not.use.the.expected.inference.local.provider.shape", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 619, - "text": "B6: could not read Hermes config.yaml: ${config:0:240}", - "polarity": "fail", - "normalized_id": "b6.could.not.read.hermes.config.yaml.config.0.240", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 662, - "text": "B6: Hermes config.yaml was not patched correctly: ${probe:0:400}", - "polarity": "fail", - "normalized_id": "b6.hermes.config.yaml.was.not.patched.correctly.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 665, - "text": "B6: Hermes config uses inference.local without OpenShell/OpenClaw provider blocks", - "polarity": "pass", - "normalized_id": "b6.hermes.config.uses.inference.local.without.openshell.openclaw.provider.blocks", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 684, - "text": "B7: sandbox inference.local chat completion returned PONG", - "polarity": "pass", - "normalized_id": "b7.sandbox.inference.local.chat.completion.returned.pong", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 686, - "text": "B7: sandbox inference.local chat completion failed: ${response:0:400}", - "polarity": "fail", - "normalized_id": "b7.sandbox.inference.local.chat.completion.failed.response.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 697, - "text": "B8: OpenClaw agent turn hit a provider or transport error", - "polarity": "fail", - "normalized_id": "b8.openclaw.agent.turn.hit.a.provider.or.transport.error", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 720, - "text": "B8: OpenClaw agent completed a Bedrock-backed turn through inference.local", - "polarity": "pass", - "normalized_id": "b8.openclaw.agent.completed.a.bedrock.backed.turn.through.inference.local", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 722, - "text": "B8: OpenClaw agent did not return PONG through Bedrock adapter", - "polarity": "fail", - "normalized_id": "b8.openclaw.agent.did.not.return.pong.through.bedrock.adapter", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 743, - "text": "B8: Hermes local chat API completed a Bedrock-backed turn through inference.local", - "polarity": "pass", - "normalized_id": "b8.hermes.local.chat.api.completed.a.bedrock.backed.turn.through.inference.local", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 745, - "text": "B8: Hermes local chat API did not return PONG through Bedrock adapter: ${response:0:400}", - "polarity": "fail", - "normalized_id": "b8.hermes.local.chat.api.did.not.return.pong.through.bedrock.adapter.response.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 754, - "text": "B9: fake Bedrock Runtime endpoint observed authenticated Converse traffic", - "polarity": "pass", - "normalized_id": "b9.fake.bedrock.runtime.endpoint.observed.authenticated.converse.traffic", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 756, - "text": "B9: fake Bedrock Runtime endpoint did not observe authenticated Converse traffic", - "polarity": "fail", - "normalized_id": "b9.fake.bedrock.runtime.endpoint.did.not.observe.authenticated.converse.traffic", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 760, - "text": "B10: fake Bedrock Runtime endpoint observed authenticated ConverseStream traffic", - "polarity": "pass", - "normalized_id": "b10.fake.bedrock.runtime.endpoint.observed.authenticated.conversestream.traffic", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 762, - "text": "B10: fake Bedrock Runtime endpoint did not observe OpenClaw streamed traffic", - "polarity": "fail", - "normalized_id": "b10.fake.bedrock.runtime.endpoint.did.not.observe.openclaw.streamed.traffic", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 769, - "text": "B11: Bedrock Runtime adapter host log was not written", - "polarity": "fail", - "normalized_id": "b11.bedrock.runtime.adapter.host.log.was.not.written", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 777, - "text": "B11: Bedrock Runtime adapter host log records safe Converse and ConverseStream breadcrumbs", - "polarity": "pass", - "normalized_id": "b11.bedrock.runtime.adapter.host.log.records.safe.converse.and.conversestream.breadcrumbs", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 779, - "text": "B11: Bedrock Runtime adapter host log did not record a ConverseStream breadcrumb", - "polarity": "fail", - "normalized_id": "b11.bedrock.runtime.adapter.host.log.did.not.record.a.conversestream.breadcrumb", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 782, - "text": "B11: Bedrock Runtime adapter host log records safe Converse breadcrumbs", - "polarity": "pass", - "normalized_id": "b11.bedrock.runtime.adapter.host.log.records.safe.converse.breadcrumbs", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 785, - "text": "B11: Bedrock Runtime adapter host log did not record expected request breadcrumbs", - "polarity": "fail", - "normalized_id": "b11.bedrock.runtime.adapter.host.log.did.not.record.expected.request.breadcrumbs", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 869, - "text": "B11: adapter token file was not created on the host", - "polarity": "fail", - "normalized_id": "b11.adapter.token.file.was.not.created.on.the.host", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 892, - "text": "B12: sandbox configs, env, proc, and logs contain no Bedrock token or hostname leaks", - "polarity": "pass", - "normalized_id": "b12.sandbox.configs.env.proc.and.logs.contain.no.bedrock.token.or.hostname.leaks", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 894, - "text": "B12: leak scan found forbidden Bedrock token or hostname locations", - "polarity": "fail", - "normalized_id": "b12.leak.scan.found.forbidden.bedrock.token.or.hostname.locations", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 949, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 951, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 956, - "text": "python3 is available", - "polarity": "pass", - "normalized_id": "python3.is.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 958, - "text": "python3 not found", - "polarity": "fail", - "normalized_id": "python3.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 963, - "text": "NEMOCLAW_NON_INTERACTIVE=1", - "polarity": "pass", - "normalized_id": "nemoclaw.non.interactive.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 965, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 970, - "text": "third-party software acceptance is set", - "polarity": "pass", - "normalized_id": "third.party.software.acceptance.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 972, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 984, - "text": "B0: source CLI and OpenShell are ready", - "polarity": "pass", - "normalized_id": "b0.source.cli.and.openshell.are.ready", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 986, - "text": "B0: source CLI/OpenShell preparation failed", - "polarity": "fail", - "normalized_id": "b0.source.cli.openshell.preparation.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 995, - "text": "B0: fake Bedrock Runtime endpoint started", - "polarity": "pass", - "normalized_id": "b0.fake.bedrock.runtime.endpoint.started", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-bedrock-runtime-compatible-anthropic.sh", - "line": 997, - "text": "B0: fake Bedrock Runtime endpoint failed to start", - "polarity": "fail", - "normalized_id": "b0.fake.bedrock.runtime.endpoint.failed.to.start", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 193, - "text": "B1: ${onboard_cmd_desc} completed for Brave Search-enabled onboard", - "polarity": "pass", - "normalized_id": "b1.onboard.cmd.desc.completed.for.brave.search.enabled.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 195, - "text": "B1: ${onboard_cmd_desc} failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "b1.onboard.cmd.desc.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 216, - "text": "B2a: openshell policy get failed (exit $rc)", - "polarity": "fail", - "normalized_id": "b2a.openshell.policy.get.failed.exit.rc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 218, - "text": "B2a: brave preset applied — api.search.brave.com is in the loaded gateway policy", - "polarity": "pass", - "normalized_id": "b2a.brave.preset.applied.api.search.brave.com.is.in.the.loaded.gateway.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 220, - "text": "B2a: brave preset NOT applied — api.search.brave.com is missing from the gateway policy", - "polarity": "fail", - "normalized_id": "b2a.brave.preset.not.applied.api.search.brave.com.is.missing.from.the.gateway.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 238, - "text": "B2b: could not read openclaw web-search config (exit $config_rc)", - "polarity": "fail", - "normalized_id": "b2b.could.not.read.openclaw.web.search.config.exit.config.rc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 241, - "text": "B2b: brave preset wired through to openclaw — tools.web.search.provider=brave and enabled=true", - "polarity": "pass", - "normalized_id": "b2b.brave.preset.wired.through.to.openclaw.tools.web.search.provider.brave.and.enabled.true", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 243, - "text": "B2b: openclaw web-search config does not select brave (got: $(printf '%s' ", - "polarity": "fail", - "normalized_id": "b2b.openclaw.web.search.config.does.not.select.brave.got.printf.s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 257, - "text": "B3a: SECURITY — real BRAVE_API_KEY found verbatim in /sandbox/.openclaw/openclaw.json", - "polarity": "fail", - "normalized_id": "b3a.security.real.brave.api.key.found.verbatim.in.sandbox.openclaw.openclaw.json", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 259, - "text": "B3a: openclaw.json contains the placeholder, not the real key", - "polarity": "pass", - "normalized_id": "b3a.openclaw.json.contains.the.placeholder.not.the.real.key", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 261, - "text": "B3a: openclaw.json has neither the real key nor the placeholder — web search not configured", - "polarity": "fail", - "normalized_id": "b3a.openclaw.json.has.neither.the.real.key.nor.the.placeholder.web.search.not.configured", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 268, - "text": "B3b: SECURITY — real BRAVE_API_KEY visible to sandbox shell via printenv", - "polarity": "fail", - "normalized_id": "b3b.security.real.brave.api.key.visible.to.sandbox.shell.via.printenv", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 270, - "text": "B3b: sandbox shell env does not expose the real key (placeholder or empty)", - "polarity": "pass", - "normalized_id": "b3b.sandbox.shell.env.does.not.expose.the.real.key.placeholder.or.empty", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 272, - "text": "B3b: unexpected non-empty BRAVE_API_KEY in sandbox env", - "polarity": "fail", - "normalized_id": "b3b.unexpected.non.empty.brave.api.key.in.sandbox.env", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 286, - "text": "B4a: agent web-search turn — could not get SSH config", - "polarity": "fail", - "normalized_id": "b4a.agent.web.search.turn.could.not.get.ssh.config", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 305, - "text": "B4a: agent web-search failed with provider/transport error (exit ${rc}): $(printf '%s' ", - "polarity": "fail", - "normalized_id": "b4a.agent.web.search.failed.with.provider.transport.error.exit.rc.printf.s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 326, - "text": "B4a: openclaw agent web-search returned a real Brave result", - "polarity": "pass", - "normalized_id": "b4a.openclaw.agent.web.search.returned.a.real.brave.result", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 328, - "text": "B4a: agent web-search did not return a recognizable Brave result (exit ${rc}, reply='$(printf '%s' ", - "polarity": "fail", - "normalized_id": "b4a.agent.web.search.did.not.return.a.recognizable.brave.result.exit.rc.reply.printf.s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 359, - "text": "B4b: real Brave search via curl returned HTTP 200 with non-empty web.results[]", - "polarity": "pass", - "normalized_id": "b4b.real.brave.search.via.curl.returned.http.200.with.non.empty.web.results", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 361, - "text": "B4b: HTTP 200 but response had no web.results[] (body parsed empty)", - "polarity": "fail", - "normalized_id": "b4b.http.200.but.response.had.no.web.results.body.parsed.empty", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 366, - "text": "B4b: curl never completed an HTTP transaction — check curl is in brave.yaml binaries allowlist. $(printf '%s' ", - "polarity": "fail", - "normalized_id": "b4b.curl.never.completed.an.http.transaction.check.curl.is.in.brave.yaml.binaries.allowlist.printf.s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 368, - "text": "B4b: unexpected HTTP status '${status_code:-}' from Brave (exit $rc)", - "polarity": "fail", - "normalized_id": "b4b.unexpected.http.status.status.code.none.from.brave.exit.rc", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 390, - "text": "B0: BRAVE_API_KEY is available", - "polarity": "pass", - "normalized_id": "b0.brave.api.key.is.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 394, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 397, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 400, - "text": "python3 not found", - "polarity": "fail", - "normalized_id": "python3.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-brave-search-e2e.sh", - "line": 403, - "text": "python3 is available", - "polarity": "pass", - "normalized_id": "python3.is.available", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-channels-stop-start.sh", - "assertions": [], - "zero_assertion_review": { - "reason": "TODO: review legacy entrypoint for assertions not expressed as PASS/FAIL output" - } - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 101, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 104, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 107, - "text": "NVIDIA_API_KEY not set or invalid", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 110, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 113, - "text": "Could not cd to repo root", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 139, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 143, - "text": "NemoClaw installed", - "polarity": "pass", - "normalized_id": "nemoclaw.installed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 146, - "text": "nemoclaw not on PATH", - "polarity": "fail", - "normalized_id": "nemoclaw.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 150, - "text": "openshell not on PATH", - "polarity": "fail", - "normalized_id": "openshell.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 153, - "text": "CLIs on PATH", - "polarity": "pass", - "normalized_id": "clis.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 161, - "text": "python3 not on PATH", - "polarity": "fail", - "normalized_id": "python3.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 173, - "text": "Could not build chat payload", - "polarity": "fail", - "normalized_id": "could.not.build.chat.payload", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 190, - "text": "openshell sandbox ssh-config failed for '${SANDBOX_NAME}'", - "polarity": "fail", - "normalized_id": "openshell.sandbox.ssh.config.failed.for.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 219, - "text": "Chat completion returned PONG (attempt ${attempt}/${MAX_ATTEMPTS})", - "polarity": "pass", - "normalized_id": "chat.completion.returned.pong.attempt.attempt.max.attempts", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 236, - "text": "Live chat: $last_fail", - "polarity": "fail", - "normalized_id": "live.chat.last.fail", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 247, - "text": "Repo skill validation failed", - "polarity": "fail", - "normalized_id": "repo.skill.validation.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 250, - "text": "Repo agent skills (SKILL.md) valid", - "polarity": "pass", - "normalized_id": "repo.agent.skills.skill.md.valid", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 259, - "text": "Sandbox OpenClaw layout check failed (exit ${sb_rc}): ${sb_out:0:240}", - "polarity": "fail", - "normalized_id": "sandbox.openclaw.layout.check.failed.exit.sb.rc.sb.out.0.240", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 262, - "text": "Sandbox /sandbox/.openclaw + openclaw.json OK", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.openclaw.openclaw.json.ok", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 265, - "text": "Sandbox /sandbox/.openclaw/skills present", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.openclaw.skills.present", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-inference-e2e.sh", - "line": 269, - "text": "Unexpected sandbox check output: ${sb_out:0:240}", - "polarity": "fail", - "normalized_id": "unexpected.sandbox.check.output.sb.out.0.240", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 99, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 107, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 109, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 114, - "text": "NVIDIA_API_KEY is set (starts with nvapi-)", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set.starts.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 116, - "text": "NVIDIA_API_KEY not set or invalid — required for cloud onboard", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid.required.for.cloud.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 121, - "text": "Network access to integrate.api.nvidia.com", - "polarity": "pass", - "normalized_id": "network.access.to.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 123, - "text": "Cannot reach integrate.api.nvidia.com", - "polarity": "fail", - "normalized_id": "cannot.reach.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 129, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required for non-interactive install", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required.for.non.interactive.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 133, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required for non-interactive install", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required.for.non.interactive.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 136, - "text": "Non-interactive mode configured", - "polarity": "pass", - "normalized_id": "non.interactive.mode.configured", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 142, - "text": "Host OS is Linux", - "polarity": "pass", - "normalized_id": "host.os.is.linux", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 183, - "text": "Interactive install (RUN_E2E_CLOUD_ONBOARD_INTERACTIVE_INSTALL=1) is not yet supported — use non-interactive mode", - "polarity": "fail", - "normalized_id": "interactive.install.run.e2e.cloud.onboard.interactive.install.1.is.not.yet.supported.use.non.interactive.mode", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 214, - "text": "Public install completed (exit 0)", - "polarity": "pass", - "normalized_id": "public.install.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 216, - "text": "Public install failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "public.install.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 223, - "text": "Public install unexpectedly used the local source checkout", - "polarity": "fail", - "normalized_id": "public.install.unexpectedly.used.the.local.source.checkout", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 232, - "text": "Public install used the GitHub clone path", - "polarity": "pass", - "normalized_id": "public.install.used.the.github.clone.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 234, - "text": "Public install did not show the GitHub clone path", - "polarity": "fail", - "normalized_id": "public.install.did.not.show.the.github.clone.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 242, - "text": "Public install used requested ref ${PUBLIC_INSTALL_REF}", - "polarity": "pass", - "normalized_id": "public.install.used.requested.ref.public.install.ref", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 244, - "text": "Public install did not use requested ref ${PUBLIC_INSTALL_REF}", - "polarity": "fail", - "normalized_id": "public.install.did.not.use.requested.ref.public.install.ref", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 252, - "text": "nemoclaw on PATH ($(command -v nemoclaw))", - "polarity": "pass", - "normalized_id": "nemoclaw.on.path.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 254, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 259, - "text": "openshell on PATH ($(openshell --version 2>&1 || echo unknown))", - "polarity": "pass", - "normalized_id": "openshell.on.path.openshell.version.2.1.echo.unknown", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 261, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 266, - "text": "nemoclaw --help exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.help.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 268, - "text": "nemoclaw --help failed", - "polarity": "fail", - "normalized_id": "nemoclaw.help.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 295, - "text": "$(basename ", - "polarity": "pass", - "normalized_id": "basename", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 297, - "text": "$(basename ", - "polarity": "fail", - "normalized_id": "basename", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 313, - "text": "Cleanup or verification failed", - "polarity": "fail", - "normalized_id": "cleanup.or.verification.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-cloud-onboard-e2e.sh", - "line": 316, - "text": "Cleanup complete", - "polarity": "pass", - "normalized_id": "cleanup.complete", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-credential-migration.sh", - "assertions": [ - { - "script": "test/e2e/test-credential-migration.sh", - "line": 97, - "text": "NVIDIA_API_KEY not set", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 100, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 106, - "text": "install.sh failed; see /tmp/nemoclaw-e2e-install.log", - "polarity": "fail", - "normalized_id": "install.sh.failed.see.tmp.nemoclaw.e2e.install.log", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 114, - "text": "openshell still missing after install", - "polarity": "fail", - "normalized_id": "openshell.still.missing.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 118, - "text": "nemoclaw still missing after install", - "polarity": "fail", - "normalized_id": "nemoclaw.still.missing.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 121, - "text": "openshell + nemoclaw on PATH", - "polarity": "pass", - "normalized_id": "openshell.nemoclaw.on.path", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 167, - "text": "nemoclaw onboard succeeded with only the legacy file as the credential source", - "polarity": "pass", - "normalized_id": "nemoclaw.onboard.succeeded.with.only.the.legacy.file.as.the.credential.source", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 169, - "text": "nemoclaw onboard failed (exit $ONBOARD_EXIT); see log below", - "polarity": "fail", - "normalized_id": "nemoclaw.onboard.failed.exit.onboard.exit.see.log.below", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 176, - "text": "Migration notice was emitted to stderr", - "polarity": "pass", - "normalized_id": "migration.notice.was.emitted.to.stderr", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 178, - "text": "Expected migration notice on stderr; not found in onboard log", - "polarity": "fail", - "normalized_id": "expected.migration.notice.on.stderr.not.found.in.onboard.log", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 185, - "text": "Legacy credentials.json still exists after successful onboard", - "polarity": "fail", - "normalized_id": "legacy.credentials.json.still.exists.after.successful.onboard", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 187, - "text": "Legacy credentials.json was removed after onboard", - "polarity": "pass", - "normalized_id": "legacy.credentials.json.was.removed.after.onboard", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 196, - "text": "openshell -g nemoclaw provider list --names failed", - "polarity": "fail", - "normalized_id": "openshell.g.nemoclaw.provider.list.names.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 209, - "text": "At least one provider is registered with the gateway ($PROVIDER_COUNT total)", - "polarity": "pass", - "normalized_id": "at.least.one.provider.is.registered.with.the.gateway.provider.count.total", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 211, - "text": "No providers registered with the gateway after migration", - "polarity": "fail", - "normalized_id": "no.providers.registered.with.the.gateway.after.migration", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 221, - "text": "A non-allowlisted key from the tampered file appears as a gateway provider", - "polarity": "fail", - "normalized_id": "a.non.allowlisted.key.from.the.tampered.file.appears.as.a.gateway.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 223, - "text": "Non-allowlisted keys from the tampered file did not become providers", - "polarity": "pass", - "normalized_id": "non.allowlisted.keys.from.the.tampered.file.did.not.become.providers", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 232, - "text": "nemoclaw credentials list failed", - "polarity": "fail", - "normalized_id": "nemoclaw.credentials.list.failed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 240, - "text": "credentials list surfaces gateway-registered providers", - "polarity": "pass", - "normalized_id": "credentials.list.surfaces.gateway.registered.providers", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 242, - "text": "credentials list did not produce the expected gateway header", - "polarity": "fail", - "normalized_id": "credentials.list.did.not.produce.the.expected.gateway.header", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 248, - "text": "credentials.json reappeared on disk after credentials list", - "polarity": "fail", - "normalized_id": "credentials.json.reappeared.on.disk.after.credentials.list", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 250, - "text": "No plaintext credentials.json on disk after credentials list", - "polarity": "pass", - "normalized_id": "no.plaintext.credentials.json.on.disk.after.credentials.list", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 273, - "text": "node invocation of removeLegacyCredentialsFile failed", - "polarity": "fail", - "normalized_id": "node.invocation.of.removelegacycredentialsfile.failed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 277, - "text": "Symlink at credentials path was not removed", - "polarity": "fail", - "normalized_id": "symlink.at.credentials.path.was.not.removed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 279, - "text": "Symlink at credentials path was removed", - "polarity": "pass", - "normalized_id": "symlink.at.credentials.path.was.removed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 283, - "text": "Victim file was deleted; secureUnlink followed the symlink", - "polarity": "fail", - "normalized_id": "victim.file.was.deleted.secureunlink.followed.the.symlink", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 285, - "text": "Victim file contents were modified; secureUnlink wrote through the symlink", - "polarity": "fail", - "normalized_id": "victim.file.contents.were.modified.secureunlink.wrote.through.the.symlink", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-migration.sh", - "line": 287, - "text": "Victim file is untouched (link removed without following the target)", - "polarity": "pass", - "normalized_id": "victim.file.is.untouched.link.removed.without.following.the.target", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "assertions": [ - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 114, - "text": "NVIDIA_API_KEY not set", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 117, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 120, - "text": "openshell not found on PATH", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 123, - "text": "openshell found", - "polarity": "pass", - "normalized_id": "openshell.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 126, - "text": "nemoclaw not found on PATH", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 129, - "text": "nemoclaw found", - "polarity": "pass", - "normalized_id": "nemoclaw.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 132, - "text": "node not found on PATH", - "polarity": "fail", - "normalized_id": "node.not.found.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 135, - "text": "node found", - "polarity": "pass", - "normalized_id": "node.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 140, - "text": "Sandbox '${SANDBOX_NAME}' is running", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 142, - "text": "Sandbox '${SANDBOX_NAME}' not running — run test-full-e2e.sh first", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.not.running.run.test.full.e2e.sh.first", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 297, - "text": "Sanitization ran successfully", - "polarity": "pass", - "normalized_id": "sanitization.ran.successfully", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 299, - "text": "Sanitization script failed: ${sanitize_result:0:200}", - "polarity": "fail", - "normalized_id": "sanitization.script.failed.sanitize.result.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 306, - "text": "C1: No fake NVIDIA key found in bundle", - "polarity": "pass", - "normalized_id": "c1.no.fake.nvidia.key.found.in.bundle", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 308, - "text": "C1: Fake NVIDIA key found in bundle: ${nvapi_hits:0:200}", - "polarity": "fail", - "normalized_id": "c1.fake.nvidia.key.found.in.bundle.nvapi.hits.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 317, - "text": "C1b: No fake GitHub/npm/gateway tokens found in bundle", - "polarity": "pass", - "normalized_id": "c1b.no.fake.github.npm.gateway.tokens.found.in.bundle", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 319, - "text": "C1b: Fake tokens found — github: ${github_hits:0:80}, npm: ${npm_hits:0:80}, gateway: ${gateway_hits:0:80}", - "polarity": "fail", - "normalized_id": "c1b.fake.tokens.found.github.github.hits.0.80.npm.npm.hits.0.80.gateway.gateway.hits.0.80", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 326, - "text": "C2: auth-profiles.json deleted from bundle", - "polarity": "pass", - "normalized_id": "c2.auth.profiles.json.deleted.from.bundle", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 328, - "text": "C2: auth-profiles.json still exists: $auth_files", - "polarity": "fail", - "normalized_id": "c2.auth.profiles.json.still.exists.auth.files", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 348, - "text": "C3a: nvidia.apiKey replaced with sentinel", - "polarity": "pass", - "normalized_id": "c3a.nvidia.apikey.replaced.with.sentinel", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 350, - "text": "C3a: nvidia.apiKey not sanitized (got: $nvidia_apikey)", - "polarity": "fail", - "normalized_id": "c3a.nvidia.apikey.not.sanitized.got.nvidia.apikey", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 354, - "text": "C3b: gateway.auth.token replaced with sentinel", - "polarity": "pass", - "normalized_id": "c3b.gateway.auth.token.replaced.with.sentinel", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 356, - "text": "C3b: gateway.auth.token not sanitized (got: $gateway_token)", - "polarity": "fail", - "normalized_id": "c3b.gateway.auth.token.not.sanitized.got.gateway.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 374, - "text": "C4a: agents.defaults.model.primary preserved", - "polarity": "pass", - "normalized_id": "c4a.agents.defaults.model.primary.preserved", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 376, - "text": "C4a: agents.defaults.model.primary corrupted (got: $model_primary)", - "polarity": "fail", - "normalized_id": "c4a.agents.defaults.model.primary.corrupted.got.model.primary", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 380, - "text": "C4b: gateway.mode preserved", - "polarity": "pass", - "normalized_id": "c4b.gateway.mode.preserved", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 382, - "text": "C4b: gateway.mode corrupted (got: $gateway_mode)", - "polarity": "fail", - "normalized_id": "c4b.gateway.mode.corrupted.got.gateway.mode", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 390, - "text": "C5: workspace/project.md intact", - "polarity": "pass", - "normalized_id": "c5.workspace.project.md.intact", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 392, - "text": "C5: workspace/project.md content changed", - "polarity": "fail", - "normalized_id": "c5.workspace.project.md.content.changed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 395, - "text": "C5: workspace/project.md missing from bundle", - "polarity": "fail", - "normalized_id": "c5.workspace.project.md.missing.from.bundle", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 415, - "text": "C6: Sandbox probe failed — SSH did not execute; cannot verify auth-profiles.json absence", - "polarity": "fail", - "normalized_id": "c6.sandbox.probe.failed.ssh.did.not.execute.cannot.verify.auth.profiles.json.absence", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 417, - "text": "C6: No auth-profiles.json found inside sandbox", - "polarity": "pass", - "normalized_id": "c6.no.auth.profiles.json.found.inside.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 419, - "text": "C6: auth-profiles.json found inside sandbox: $c6_result", - "polarity": "fail", - "normalized_id": "c6.auth.profiles.json.found.inside.sandbox.c6.result", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 433, - "text": "C7: Sandbox probe failed — SSH did not execute; cannot verify secret absence", - "polarity": "fail", - "normalized_id": "c7.sandbox.probe.failed.ssh.did.not.execute.cannot.verify.secret.absence", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 435, - "text": "C7: No secret patterns (nvapi-, ghp_, npm_) found in sandbox config", - "polarity": "pass", - "normalized_id": "c7.no.secret.patterns.nvapi.ghp.npm.found.in.sandbox.config", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 437, - "text": "C7: Secret patterns found in sandbox — nvapi: ${c7_nvapi:0:100}, ghp: ${c7_ghp:0:100}, npm: ${c7_npm:0:100}", - "polarity": "fail", - "normalized_id": "c7.secret.patterns.found.in.sandbox.nvapi.c7.nvapi.0.100.ghp.c7.ghp.0.100.npm.c7.npm.0.100", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 492, - "text": "C8: Symlink traversal blocked — outside file preserved", - "polarity": "pass", - "normalized_id": "c8.symlink.traversal.blocked.outside.file.preserved", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 494, - "text": "C8: Symlink traversal — outside file was DELETED through symlink!", - "polarity": "fail", - "normalized_id": "c8.symlink.traversal.outside.file.was.deleted.through.symlink", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 550, - "text": "C9a: Empty digest string correctly rejected", - "polarity": "pass", - "normalized_id": "c9a.empty.digest.string.correctly.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 552, - "text": "C9a: Empty digest string was ACCEPTED — bypass still possible!", - "polarity": "fail", - "normalized_id": "c9a.empty.digest.string.was.accepted.bypass.still.possible", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 556, - "text": "C9b: Undefined digest correctly rejected", - "polarity": "pass", - "normalized_id": "c9b.undefined.digest.correctly.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 558, - "text": "C9b: Undefined digest was ACCEPTED — bypass still possible!", - "polarity": "fail", - "normalized_id": "c9b.undefined.digest.was.accepted.bypass.still.possible", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 585, - "text": "C10: Wrong digest correctly rejected", - "polarity": "pass", - "normalized_id": "c10.wrong.digest.correctly.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 587, - "text": "C10: Wrong digest was ACCEPTED — verification broken!", - "polarity": "fail", - "normalized_id": "c10.wrong.digest.was.accepted.verification.broken", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 614, - "text": "C11: Correct digest correctly accepted", - "polarity": "pass", - "normalized_id": "c11.correct.digest.correctly.accepted", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 616, - "text": "C11: Correct digest was REJECTED — false negative!", - "polarity": "fail", - "normalized_id": "c11.correct.digest.was.rejected.false.negative", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 679, - "text": "C12: All pattern-matched credential fields stripped", - "polarity": "pass", - "normalized_id": "c12.all.pattern.matched.credential.fields.stripped", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 681, - "text": "C12: Some credential fields NOT stripped: ${c12_result}", - "polarity": "fail", - "normalized_id": "c12.some.credential.fields.not.stripped.c12.result", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 760, - "text": "C13: All non-credential fields preserved correctly", - "polarity": "pass", - "normalized_id": "c13.all.non.credential.fields.preserved.correctly", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 762, - "text": "C13: Some non-credential fields were corrupted: ${c13_result}", - "polarity": "fail", - "normalized_id": "c13.some.non.credential.fields.were.corrupted.c13.result", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 778, - "text": "Blueprint digest field found and identified", - "polarity": "pass", - "normalized_id": "blueprint.digest.field.found.and.identified", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 781, - "text": "Blueprint digest field found (empty)", - "polarity": "pass", - "normalized_id": "blueprint.digest.field.found.empty", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-credential-sanitization.sh", - "line": 784, - "text": "Blueprint has a digest value set", - "polarity": "pass", - "normalized_id": "blueprint.has.a.digest.value.set", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "assertions": [ - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 8, - "text": "$1", - "polarity": "pass", - "normalized_id": "1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 10, - "text": "$1", - "polarity": "fail", - "normalized_id": "1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 28, - "text": "nemoclaw CLI is not on PATH", - "polarity": "fail", - "normalized_id": "nemoclaw.cli.is.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 31, - "text": "openshell CLI is not on PATH", - "polarity": "fail", - "normalized_id": "openshell.cli.is.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 33, - "text": "Required CLIs are available", - "polarity": "pass", - "normalized_id": "required.clis.are.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 44, - "text": "nemoclaw connect completed with NEMOCLAW_DASHBOARD_BIND=0.0.0.0", - "polarity": "pass", - "normalized_id": "nemoclaw.connect.completed.with.nemoclaw.dashboard.bind.0.0.0.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 47, - "text": "nemoclaw connect failed with NEMOCLAW_DASHBOARD_BIND=0.0.0.0", - "polarity": "fail", - "normalized_id": "nemoclaw.connect.failed.with.nemoclaw.dashboard.bind.0.0.0.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 55, - "text": "No OpenShell forward found for ${SANDBOX_NAME} on ${DASHBOARD_PORT}", - "polarity": "fail", - "normalized_id": "no.openshell.forward.found.for.sandbox.name.on.dashboard.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 61, - "text": "Dashboard forward binds all interfaces for remote origin (${DASHBOARD_PORT})", - "polarity": "pass", - "normalized_id": "dashboard.forward.binds.all.interfaces.for.remote.origin.dashboard.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 64, - "text": "Dashboard forward is still localhost-only; expected 0.0.0.0:${DASHBOARD_PORT}", - "polarity": "fail", - "normalized_id": "dashboard.forward.is.still.localhost.only.expected.0.0.0.0.dashboard.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 67, - "text": "Could not prove dashboard forward uses 0.0.0.0:${DASHBOARD_PORT} from: ${FORWARD_LINE}", - "polarity": "fail", - "normalized_id": "could.not.prove.dashboard.forward.uses.0.0.0.0.dashboard.port.from.forward.line", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-dashboard-remote-bind.sh", - "line": 72, - "text": "Remote dashboard bind guard completed", - "polarity": "pass", - "normalized_id": "remote.dashboard.bind.guard.completed", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "assertions": [ - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 139, - "text": "Preflight checks passed", - "polarity": "pass", - "normalized_id": "preflight.checks.passed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 170, - "text": "Install failed with exit code $INSTALL_EXIT", - "polarity": "fail", - "normalized_id": "install.failed.with.exit.code.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 176, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 190, - "text": "Onboard succeeded — sandbox '${SANDBOX_NAME}' registered", - "polarity": "pass", - "normalized_id": "onboard.succeeded.sandbox.sandbox.name.registered", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 192, - "text": "Sandbox '${SANDBOX_NAME}' not found in nemoclaw list after onboard", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.not.found.in.nemoclaw.list.after.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 223, - "text": "/health returns 200 (auth-free health endpoint via sandbox exec)", - "polarity": "pass", - "normalized_id": "health.returns.200.auth.free.health.endpoint.via.sandbox.exec", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 228, - "text": "/health returned ${HEALTH_CODE} — expected 200", - "polarity": "fail", - "normalized_id": "health.returned.health.code.expected.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 239, - "text": "/ returns 401 (device auth is active — confirms test premise)", - "polarity": "pass", - "normalized_id": "returns.401.device.auth.is.active.confirms.test.premise", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 245, - "text": "/ returned ${ROOT_CODE:-empty} — expected 401 (device auth) or 200 (no auth)", - "polarity": "fail", - "normalized_id": "returned.root.code.empty.expected.401.device.auth.or.200.no.auth", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 260, - "text": "Status reports 'Offline' — #2342 REGRESSION: 401 treated as dead", - "polarity": "fail", - "normalized_id": "status.reports.offline.2342.regression.401.treated.as.dead", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 263, - "text": "Status does NOT report 'Offline' (gateway correctly detected as alive)", - "polarity": "pass", - "normalized_id": "status.does.not.report.offline.gateway.correctly.detected.as.alive", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 268, - "text": "Status shows positive health indicator (Running/Online/Healthy)", - "polarity": "pass", - "normalized_id": "status.shows.positive.health.indicator.running.online.healthy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 285, - "text": "Host port forward to dashboard is live (HTTP ${HOST_HEALTH_CODE})", - "polarity": "pass", - "normalized_id": "host.port.forward.to.dashboard.is.live.http.host.health.code", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 291, - "text": "Host health probe returned ${HOST_HEALTH_CODE} — expected 200 or 401", - "polarity": "fail", - "normalized_id": "host.health.probe.returned.host.health.code.expected.200.or.401", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 319, - "text": "Status reports 'Offline' during recovery — #2342 regression", - "polarity": "fail", - "normalized_id": "status.reports.offline.during.recovery.2342.regression", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 321, - "text": "Status does not report 'Offline' during recovery attempt", - "polarity": "pass", - "normalized_id": "status.does.not.report.offline.during.recovery.attempt", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 340, - "text": "Gateway recovered after restart (HTTP ${RECOVER_HEALTH} on /health)", - "polarity": "pass", - "normalized_id": "gateway.recovered.after.restart.http.recover.health.on.health", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 353, - "text": "Onboard log contains deployment verification output", - "polarity": "pass", - "normalized_id": "onboard.log.contains.deployment.verification.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-device-auth-health.sh", - "line": 355, - "text": "Onboard log confirms dashboard readiness check passed", - "polarity": "pass", - "normalized_id": "onboard.log.confirms.dashboard.readiness.check.passed", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-diagnostics.sh", - "assertions": [ - { - "script": "test/e2e/test-diagnostics.sh", - "line": 182, - "text": "TC-DIAG-04: Exit code", - "polarity": "fail", - "normalized_id": "tc.diag.04.exit.code", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 187, - "text": "TC-DIAG-04: Version output matches semver ($version_output)", - "polarity": "pass", - "normalized_id": "tc.diag.04.version.output.matches.semver.version.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 189, - "text": "TC-DIAG-04: Format", - "polarity": "fail", - "normalized_id": "tc.diag.04.format", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 217, - "text": "TC-DIAG-02: Exit code", - "polarity": "fail", - "normalized_id": "tc.diag.02.exit.code", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 223, - "text": "TC-DIAG-02: debug --quick produced non-empty archive (${elapsed}s)", - "polarity": "pass", - "normalized_id": "tc.diag.02.debug.quick.produced.non.empty.archive.elapsed.s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 225, - "text": "TC-DIAG-02: Output", - "polarity": "fail", - "normalized_id": "tc.diag.02.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 229, - "text": "TC-DIAG-02: Completed within time limit (${elapsed}s)", - "polarity": "pass", - "normalized_id": "tc.diag.02.completed.within.time.limit.elapsed.s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 231, - "text": "TC-DIAG-02: Timing", - "polarity": "fail", - "normalized_id": "tc.diag.02.timing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 253, - "text": "TC-DIAG-01: Setup", - "polarity": "fail", - "normalized_id": "tc.diag.01.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 258, - "text": "TC-DIAG-01: Debug tarball created", - "polarity": "pass", - "normalized_id": "tc.diag.01.debug.tarball.created", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 262, - "text": "TC-DIAG-01: Extract", - "polarity": "fail", - "normalized_id": "tc.diag.01.extract", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 279, - "text": "TC-DIAG-01: No API key found in debug tarball", - "polarity": "pass", - "normalized_id": "tc.diag.01.no.api.key.found.in.debug.tarball", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 281, - "text": "TC-DIAG-01: Credential leak", - "polarity": "fail", - "normalized_id": "tc.diag.01.credential.leak", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 287, - "text": "TC-DIAG-01: No nvapi- pattern credentials in tarball", - "polarity": "pass", - "normalized_id": "tc.diag.01.no.nvapi.pattern.credentials.in.tarball", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 289, - "text": "TC-DIAG-01: Pattern leak", - "polarity": "fail", - "normalized_id": "tc.diag.01.pattern.leak", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 306, - "text": "TC-DIAG-05: Config", - "polarity": "fail", - "normalized_id": "tc.diag.05.config", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 310, - "text": "TC-DIAG-05: openclaw.json readable inside sandbox", - "polarity": "pass", - "normalized_id": "tc.diag.05.openclaw.json.readable.inside.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 316, - "text": "TC-DIAG-05: nemoclaw status shows model info", - "polarity": "pass", - "normalized_id": "tc.diag.05.nemoclaw.status.shows.model.info", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 318, - "text": "TC-DIAG-05: nemoclaw status shows Model field", - "polarity": "pass", - "normalized_id": "tc.diag.05.nemoclaw.status.shows.model.field", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 320, - "text": "TC-DIAG-05: Status", - "polarity": "fail", - "normalized_id": "tc.diag.05.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 338, - "text": "TC-DIAG-03: List", - "polarity": "fail", - "normalized_id": "tc.diag.03.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 343, - "text": "TC-DIAG-03: credentials list works (store empty — API key passed via env on CI)", - "polarity": "pass", - "normalized_id": "tc.diag.03.credentials.list.works.store.empty.api.key.passed.via.env.on.ci", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 347, - "text": "TC-DIAG-03: Value leak", - "polarity": "fail", - "normalized_id": "tc.diag.03.value.leak", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 349, - "text": "TC-DIAG-03: credentials list does not expose env key values", - "polarity": "pass", - "normalized_id": "tc.diag.03.credentials.list.does.not.expose.env.key.values", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 355, - "text": "TC-DIAG-03: credentials list shows key name", - "polarity": "pass", - "normalized_id": "tc.diag.03.credentials.list.shows.key.name", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 362, - "text": "TC-DIAG-03: Value leak", - "polarity": "fail", - "normalized_id": "tc.diag.03.value.leak", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 364, - "text": "TC-DIAG-03: credentials list does not expose key values", - "polarity": "pass", - "normalized_id": "tc.diag.03.credentials.list.does.not.expose.key.values", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 373, - "text": "TC-DIAG-03: credentials reset completed", - "polarity": "pass", - "normalized_id": "tc.diag.03.credentials.reset.completed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 375, - "text": "TC-DIAG-03: Reset", - "polarity": "fail", - "normalized_id": "tc.diag.03.reset", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 383, - "text": "TC-DIAG-03: Post-reset", - "polarity": "fail", - "normalized_id": "tc.diag.03.post.reset", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 385, - "text": "TC-DIAG-03: NVIDIA_API_KEY removed after reset", - "polarity": "pass", - "normalized_id": "tc.diag.03.nvidia.api.key.removed.after.reset", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 405, - "text": "$PASS${NC}", - "polarity": "pass", - "normalized_id": "pass.nc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-diagnostics.sh", - "line": 406, - "text": "$FAIL${NC}", - "polarity": "fail", - "normalized_id": "fail.nc", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-docs-validation.sh", - "assertions": [ - { - "script": "test/e2e/test-docs-validation.sh", - "line": 81, - "text": "nemoclaw on PATH", - "polarity": "pass", - "normalized_id": "nemoclaw.on.path", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-docs-validation.sh", - "line": 90, - "text": "nemoclaw on PATH (after sourcing nvm)", - "polarity": "pass", - "normalized_id": "nemoclaw.on.path.after.sourcing.nvm", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-docs-validation.sh", - "line": 92, - "text": "nemoclaw not on PATH — install NemoClaw first", - "polarity": "fail", - "normalized_id": "nemoclaw.not.on.path.install.nemoclaw.first", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-docs-validation.sh", - "line": 109, - "text": "CLI / docs parity check passed", - "polarity": "pass", - "normalized_id": "cli.docs.parity.check.passed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-docs-validation.sh", - "line": 111, - "text": "CLI / docs parity check failed (exit ${cli_rc})", - "polarity": "fail", - "normalized_id": "cli.docs.parity.check.failed.exit.cli.rc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-docs-validation.sh", - "line": 135, - "text": "Markdown link validation passed", - "polarity": "pass", - "normalized_id": "markdown.link.validation.passed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-docs-validation.sh", - "line": 141, - "text": "Markdown link validation failed (exit ${links_rc})", - "polarity": "fail", - "normalized_id": "markdown.link.validation.failed.exit.links.rc", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-double-onboard.sh", - "assertions": [ - { - "script": "test/e2e/test-double-onboard.sh", - "line": 401, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 409, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 411, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 416, - "text": "openshell CLI installed", - "polarity": "pass", - "normalized_id": "openshell.cli.installed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 418, - "text": "openshell CLI not found — cannot continue", - "polarity": "fail", - "normalized_id": "openshell.cli.not.found.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 423, - "text": "nemoclaw CLI available", - "polarity": "pass", - "normalized_id": "nemoclaw.cli.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 425, - "text": "nemoclaw CLI not found — cannot continue", - "polarity": "fail", - "normalized_id": "nemoclaw.cli.not.found.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 430, - "text": "python3 installed", - "polarity": "pass", - "normalized_id": "python3.installed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 432, - "text": "python3 not found — cannot continue", - "polarity": "fail", - "normalized_id": "python3.not.found.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 437, - "text": "Fake OpenAI-compatible endpoint started at ${FAKE_BASE_URL}", - "polarity": "pass", - "normalized_id": "fake.openai.compatible.endpoint.started.at.fake.base.url", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 439, - "text": "Failed to start fake OpenAI-compatible endpoint", - "polarity": "fail", - "normalized_id": "failed.to.start.fake.openai.compatible.endpoint", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 458, - "text": "First onboard completed successfully", - "polarity": "pass", - "normalized_id": "first.onboard.completed.successfully", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 460, - "text": "First onboard timed out after ${PHASE_TIMEOUT}s (exit 124)", - "polarity": "fail", - "normalized_id": "first.onboard.timed.out.after.phase.timeout.s.exit.124", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 463, - "text": "First onboard exited $exit1 (expected 0)", - "polarity": "fail", - "normalized_id": "first.onboard.exited.exit1.expected.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 468, - "text": "Sandbox '$SANDBOX_A' created", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.a.created", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 470, - "text": "Sandbox '$SANDBOX_A' creation not confirmed in output", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.a.creation.not.confirmed.in.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 474, - "text": "Gateway is running after first onboard", - "polarity": "pass", - "normalized_id": "gateway.is.running.after.first.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 476, - "text": "Gateway is not running after first onboard", - "polarity": "fail", - "normalized_id": "gateway.is.not.running.after.first.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 480, - "text": "Sandbox '$SANDBOX_A' exists in openshell", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.a.exists.in.openshell", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 482, - "text": "Sandbox '$SANDBOX_A' not found in openshell", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.a.not.found.in.openshell", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 486, - "text": "Registry contains '$SANDBOX_A'", - "polarity": "pass", - "normalized_id": "registry.contains.sandbox.a", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 488, - "text": "Registry does not contain '$SANDBOX_A'", - "polarity": "fail", - "normalized_id": "registry.does.not.contain.sandbox.a", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 505, - "text": "Second onboard completed successfully", - "polarity": "pass", - "normalized_id": "second.onboard.completed.successfully", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 507, - "text": "Second onboard timed out after ${PHASE_TIMEOUT}s (exit 124)", - "polarity": "fail", - "normalized_id": "second.onboard.timed.out.after.phase.timeout.s.exit.124", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 510, - "text": "Second onboard exited $exit2 (expected 0)", - "polarity": "fail", - "normalized_id": "second.onboard.exited.exit2.expected.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 516, - "text": "Healthy gateway runtime reused on second onboard ($GATEWAY_ID_BEFORE)", - "polarity": "pass", - "normalized_id": "healthy.gateway.runtime.reused.on.second.onboard.gateway.id.before", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 518, - "text": "Gateway runtime changed on second onboard (before=$GATEWAY_ID_BEFORE after=$GATEWAY_ID_AFTER)", - "polarity": "fail", - "normalized_id": "gateway.runtime.changed.on.second.onboard.before.gateway.id.before.after.gateway.id.after", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 522, - "text": "Port 8080 conflict detected (regression)", - "polarity": "fail", - "normalized_id": "port.8080.conflict.detected.regression", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 524, - "text": "No port 8080 conflict on second onboard", - "polarity": "pass", - "normalized_id": "no.port.8080.conflict.on.second.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 528, - "text": "Port 18789 conflict detected on second onboard", - "polarity": "fail", - "normalized_id": "port.18789.conflict.detected.on.second.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 530, - "text": "No port 18789 conflict on second onboard", - "polarity": "pass", - "normalized_id": "no.port.18789.conflict.on.second.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 534, - "text": "Sandbox '$SANDBOX_A' still exists after recreate", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.a.still.exists.after.recreate", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 536, - "text": "Sandbox '$SANDBOX_A' missing after recreate", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.a.missing.after.recreate", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 554, - "text": "Alternate gateway alias selected before third onboard", - "polarity": "pass", - "normalized_id": "alternate.gateway.alias.selected.before.third.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 556, - "text": "Alternate gateway alias was not selected before third onboard (selected=${selected_gateway:-unknown})", - "polarity": "fail", - "normalized_id": "alternate.gateway.alias.was.not.selected.before.third.onboard.selected.selected.gateway.unknown", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 559, - "text": "Could not select alternate gateway alias before third onboard (add output=${alt_gateway_add_output:-empty})", - "polarity": "fail", - "normalized_id": "could.not.select.alternate.gateway.alias.before.third.onboard.add.output.alt.gateway.add.output.empty", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 570, - "text": "Third onboard completed successfully", - "polarity": "pass", - "normalized_id": "third.onboard.completed.successfully", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 572, - "text": "Third onboard timed out after ${PHASE_TIMEOUT}s (exit 124)", - "polarity": "fail", - "normalized_id": "third.onboard.timed.out.after.phase.timeout.s.exit.124", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 575, - "text": "Third onboard exited $exit3 (expected 0)", - "polarity": "fail", - "normalized_id": "third.onboard.exited.exit3.expected.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 581, - "text": "Healthy gateway runtime reused on third onboard ($GATEWAY_ID_BEFORE3)", - "polarity": "pass", - "normalized_id": "healthy.gateway.runtime.reused.on.third.onboard.gateway.id.before3", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 583, - "text": "Gateway runtime changed on third onboard (before=$GATEWAY_ID_BEFORE3 after=$GATEWAY_ID_AFTER3)", - "polarity": "fail", - "normalized_id": "gateway.runtime.changed.on.third.onboard.before.gateway.id.before3.after.gateway.id.after3", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 587, - "text": "Port 8080 conflict on third onboard", - "polarity": "fail", - "normalized_id": "port.8080.conflict.on.third.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 589, - "text": "No port 8080 conflict on third onboard", - "polarity": "pass", - "normalized_id": "no.port.8080.conflict.on.third.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 593, - "text": "Port 18789 conflict on third onboard", - "polarity": "fail", - "normalized_id": "port.18789.conflict.on.third.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 595, - "text": "No port 18789 conflict on third onboard", - "polarity": "pass", - "normalized_id": "no.port.18789.conflict.on.third.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 604, - "text": "Named gateway reselected during third onboard", - "polarity": "pass", - "normalized_id": "named.gateway.reselected.during.third.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 606, - "text": "Named gateway was not reselected during third onboard (selected=${selected_gateway:-unknown})", - "polarity": "fail", - "normalized_id": "named.gateway.was.not.reselected.during.third.onboard.selected.selected.gateway.unknown", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 610, - "text": "Sandbox '$SANDBOX_B' created", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.b.created", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 612, - "text": "Sandbox '$SANDBOX_B' was not created", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.b.was.not.created", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 616, - "text": "First sandbox '$SANDBOX_A' still exists after creating '$SANDBOX_B'", - "polarity": "pass", - "normalized_id": "first.sandbox.sandbox.a.still.exists.after.creating.sandbox.b", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 618, - "text": "First sandbox '$SANDBOX_A' disappeared after creating '$SANDBOX_B' (regression: #849)", - "polarity": "fail", - "normalized_id": "first.sandbox.sandbox.a.disappeared.after.creating.sandbox.b.regression.849", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 638, - "text": "nemoclaw list shows dashboard ports for both test sandboxes (#2174)", - "polarity": "pass", - "normalized_id": "nemoclaw.list.shows.dashboard.ports.for.both.test.sandboxes.2174", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 640, - "text": "nemoclaw list did not show dashboard ports for both test sandboxes (a=${port_a:-missing} b=${port_b:-missing})", - "polarity": "fail", - "normalized_id": "nemoclaw.list.did.not.show.dashboard.ports.for.both.test.sandboxes.a.port.a.missing.b.port.b.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 646, - "text": "nemoclaw list shows distinct dashboard ports for test sandboxes (#2174)", - "polarity": "pass", - "normalized_id": "nemoclaw.list.shows.distinct.dashboard.ports.for.test.sandboxes.2174", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 648, - "text": "test sandboxes did not have distinct dashboard ports (#2174): ${SANDBOX_A}=${port_a:-missing} ${SANDBOX_B}=${port_b:-missing}", - "polarity": "fail", - "normalized_id": "test.sandboxes.did.not.have.distinct.dashboard.ports.2174.sandbox.a.port.a.missing.sandbox.b.port.b.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 672, - "text": "Probe-only connect recovered '$SANDBOX_B' dashboard forward", - "polarity": "pass", - "normalized_id": "probe.only.connect.recovered.sandbox.b.dashboard.forward", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 674, - "text": "Probe-only connect exited $probe_exit after stopping '$SANDBOX_B' dashboard forward", - "polarity": "fail", - "normalized_id": "probe.only.connect.exited.probe.exit.after.stopping.sandbox.b.dashboard.forward", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 685, - "text": "Second sandbox dashboard forward restored on its recorded port", - "polarity": "pass", - "normalized_id": "second.sandbox.dashboard.forward.restored.on.its.recorded.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 687, - "text": "Second sandbox dashboard forward owner mismatch on port $port_b (owner=${owner_b:-missing})", - "polarity": "fail", - "normalized_id": "second.sandbox.dashboard.forward.owner.mismatch.on.port.port.b.owner.owner.b.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 693, - "text": "First sandbox dashboard forward kept its recorded port", - "polarity": "pass", - "normalized_id": "first.sandbox.dashboard.forward.kept.its.recorded.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 695, - "text": "First sandbox dashboard forward owner mismatch on port $port_a (owner=${owner_a:-missing})", - "polarity": "fail", - "normalized_id": "first.sandbox.dashboard.forward.owner.mismatch.on.port.port.a.owner.owner.a.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 709, - "text": "OpenShell reports '$SANDBOX_A' absent after direct deletion", - "polarity": "pass", - "normalized_id": "openshell.reports.sandbox.a.absent.after.direct.deletion", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 711, - "text": "OpenShell still reports '$SANDBOX_A' after direct deletion", - "polarity": "fail", - "normalized_id": "openshell.still.reports.sandbox.a.after.direct.deletion", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 715, - "text": "Registry still contains stale '$SANDBOX_A' entry", - "polarity": "pass", - "normalized_id": "registry.still.contains.stale.sandbox.a.entry", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 717, - "text": "Registry was unexpectedly cleaned before status reconciliation", - "polarity": "fail", - "normalized_id": "registry.was.unexpectedly.cleaned.before.status.reconciliation", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 727, - "text": "Stale sandbox status exited 1", - "polarity": "pass", - "normalized_id": "stale.sandbox.status.exited.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 729, - "text": "Stale sandbox status exited $status_exit (expected 1)", - "polarity": "fail", - "normalized_id": "stale.sandbox.status.exited.status.exit.expected.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 733, - "text": "Stale registry entry was reconciled during status", - "polarity": "pass", - "normalized_id": "stale.registry.entry.was.reconciled.during.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 735, - "text": "Stale registry reconciliation message missing", - "polarity": "fail", - "normalized_id": "stale.registry.reconciliation.message.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 739, - "text": "Registry still contains '$SANDBOX_A' after status reconciliation", - "polarity": "fail", - "normalized_id": "registry.still.contains.sandbox.a.after.status.reconciliation", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 741, - "text": "Registry entry for '$SANDBOX_A' removed after status reconciliation", - "polarity": "pass", - "normalized_id": "registry.entry.for.sandbox.a.removed.after.status.reconciliation", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 760, - "text": "Post-stop status exited $gateway_status_exit", - "polarity": "pass", - "normalized_id": "post.stop.status.exited.gateway.status.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 762, - "text": "Post-stop status exited $gateway_status_exit (expected 0 or 1)", - "polarity": "fail", - "normalized_id": "post.stop.status.exited.gateway.status.exit.expected.0.or.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 768, - "text": "Gateway lifecycle response was explicit after gateway stop", - "polarity": "pass", - "normalized_id": "gateway.lifecycle.response.was.explicit.after.gateway.stop", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 770, - "text": "Gateway lifecycle response was not explicit after gateway stop", - "polarity": "fail", - "normalized_id": "gateway.lifecycle.response.was.not.explicit.after.gateway.stop", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 776, - "text": "Registry still contains '$SANDBOX_B' after gateway stop", - "polarity": "pass", - "normalized_id": "registry.still.contains.sandbox.b.after.gateway.stop", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 778, - "text": "Registry is missing '$SANDBOX_B' after gateway stop", - "polarity": "fail", - "normalized_id": "registry.is.missing.sandbox.b.after.gateway.stop", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 811, - "text": "Sandbox '$SANDBOX_A' still exists after cleanup", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.a.still.exists.after.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 813, - "text": "Sandbox '$SANDBOX_A' cleaned up", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.a.cleaned.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 817, - "text": "Sandbox '$SANDBOX_B' still exists after cleanup", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.b.still.exists.after.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 819, - "text": "Sandbox '$SANDBOX_B' cleaned up", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.b.cleaned.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 823, - "text": "Registry still contains test sandbox entries", - "polarity": "fail", - "normalized_id": "registry.still.contains.test.sandbox.entries", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 825, - "text": "Registry cleaned up", - "polarity": "pass", - "normalized_id": "registry.cleaned.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-double-onboard.sh", - "line": 828, - "text": "Final cleanup complete", - "polarity": "pass", - "normalized_id": "final.cleanup.complete", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-full-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-full-e2e.sh", - "line": 100, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 108, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 110, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 115, - "text": "NVIDIA_API_KEY is set (starts with nvapi-)", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set.starts.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 117, - "text": "NVIDIA_API_KEY not set or invalid — required for live inference", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid.required.for.live.inference", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 122, - "text": "Network access to integrate.api.nvidia.com", - "polarity": "pass", - "normalized_id": "network.access.to.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 124, - "text": "Cannot reach integrate.api.nvidia.com", - "polarity": "fail", - "normalized_id": "cannot.reach.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 129, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 134, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required for non-interactive install", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required.for.non.interactive.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 144, - "text": "Could not cd to repo root: $REPO", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 182, - "text": "install.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 184, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 190, - "text": "nemoclaw installed at $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.installed.at.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 192, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 198, - "text": "openshell installed ($(openshell --version 2>&1 || echo unknown))", - "polarity": "pass", - "normalized_id": "openshell.installed.openshell.version.2.1.echo.unknown", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 200, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 205, - "text": "nemoclaw --help exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.help.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 207, - "text": "nemoclaw --help failed", - "polarity": "fail", - "normalized_id": "nemoclaw.help.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 218, - "text": "nemoclaw list contains '${SANDBOX_NAME}'", - "polarity": "pass", - "normalized_id": "nemoclaw.list.contains.sandbox.name", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 220, - "text": "nemoclaw list does not contain '${SANDBOX_NAME}'", - "polarity": "fail", - "normalized_id": "nemoclaw.list.does.not.contain.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 223, - "text": "nemoclaw list failed: ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.failed.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 228, - "text": "nemoclaw ${SANDBOX_NAME} status exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.sandbox.name.status.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 230, - "text": "nemoclaw ${SANDBOX_NAME} status failed: ${status_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.sandbox.name.status.failed.status.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 237, - "text": "Inference configured via onboard", - "polarity": "pass", - "normalized_id": "inference.configured.via.onboard", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 239, - "text": "Inference not configured — onboard did not set up nvidia-prod provider", - "polarity": "fail", - "normalized_id": "inference.not.configured.onboard.did.not.set.up.nvidia.prod.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 242, - "text": "openshell inference get failed: ${inf_check:0:200}", - "polarity": "fail", - "normalized_id": "openshell.inference.get.failed.inf.check.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 248, - "text": "Policy applied to sandbox", - "polarity": "pass", - "normalized_id": "policy.applied.to.sandbox", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 250, - "text": "No network policy found on sandbox", - "polarity": "fail", - "normalized_id": "no.network.policy.found.on.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 255, - "text": "Policy presets (npm/pypi) detected in sandbox policy", - "polarity": "pass", - "normalized_id": "policy.presets.npm.pypi.detected.in.sandbox.policy", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 260, - "text": "openshell policy get failed: ${policy_output:0:200}", - "polarity": "fail", - "normalized_id": "openshell.policy.get.failed.policy.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 283, - "text": "[LIVE] Direct API: model responded with PONG", - "polarity": "pass", - "normalized_id": "live.direct.api.model.responded.with.pong", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 285, - "text": "[LIVE] Direct API: expected PONG, got: ${api_content:0:200}", - "polarity": "fail", - "normalized_id": "live.direct.api.expected.pong.got.api.content.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 288, - "text": "[LIVE] Direct API: empty response from curl", - "polarity": "fail", - "normalized_id": "live.direct.api.empty.response.from.curl", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 357, - "text": "[ROUTING] inference.local: OpenShell routed curl to NVIDIA Endpoints and returned PONG", - "polarity": "pass", - "normalized_id": "routing.inference.local.openshell.routed.curl.to.nvidia.endpoints.and.returned.pong", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 360, - "text": "[ROUTING] inference.local: expected PONG after 3 attempts, got: ${sandbox_content:0:200}", - "polarity": "fail", - "normalized_id": "routing.inference.local.expected.pong.after.3.attempts.got.sandbox.content.0.200", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 412, - "text": "[LIVE] openclaw agent: model answered 6×7=42 through openclaw → inference.local", - "polarity": "pass", - "normalized_id": "live.openclaw.agent.model.answered.6.7.42.through.openclaw.inference.local", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 414, - "text": "[LIVE] openclaw agent: expected '42' in agent reply, got: ${agent_reply:0:200}", - "polarity": "fail", - "normalized_id": "live.openclaw.agent.expected.42.in.agent.reply.got.agent.reply.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 432, - "text": "nemoclaw logs: produced output ($(echo ", - "polarity": "pass", - "normalized_id": "nemoclaw.logs.produced.output.echo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 434, - "text": "nemoclaw logs: no output", - "polarity": "fail", - "normalized_id": "nemoclaw.logs.no.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 450, - "text": "Sandbox ${SANDBOX_NAME} still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-full-e2e.sh", - "line": 452, - "text": "Sandbox ${SANDBOX_NAME} removed", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "assertions": [ - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 8, - "text": "$1", - "polarity": "pass", - "normalized_id": "1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 11, - "text": "$1", - "polarity": "fail", - "normalized_id": "1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 176, - "text": "$description", - "polarity": "pass", - "normalized_id": "description", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 178, - "text": "$description (missing pattern: $pattern)", - "polarity": "fail", - "normalized_id": "description.missing.pattern.pattern", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 185, - "text": "$description (unexpected pattern: $pattern)", - "polarity": "fail", - "normalized_id": "description.unexpected.pattern.pattern", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 187, - "text": "$description", - "polarity": "pass", - "normalized_id": "description", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 195, - "text": "npm ci failed", - "polarity": "fail", - "normalized_id": "npm.ci.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 197, - "text": "CLI build failed", - "polarity": "fail", - "normalized_id": "cli.build.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 208, - "text": "backup-all exits non-zero on protobuf mismatch", - "polarity": "pass", - "normalized_id": "backup.all.exits.non.zero.on.protobuf.mismatch", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 224, - "text": "backup-all unexpectedly succeeded with stale patched gateway image", - "polarity": "fail", - "normalized_id": "backup.all.unexpectedly.succeeded.with.stale.patched.gateway.image", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 225, - "text": "backup-all exits non-zero on stale patched gateway image", - "polarity": "pass", - "normalized_id": "backup.all.exits.non.zero.on.stale.patched.gateway.image", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 230, - "text": "sandbox list was called despite preflight image drift", - "polarity": "fail", - "normalized_id": "sandbox.list.was.called.despite.preflight.image.drift", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 232, - "text": "preflight image drift blocks sandbox list", - "polarity": "pass", - "normalized_id": "preflight.image.drift.blocks.sandbox.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-drift-preflight.sh", - "line": 235, - "text": "Gateway drift preflight regression guard completed", - "polarity": "pass", - "normalized_id": "gateway.drift.preflight.regression.guard.completed", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "assertions": [ - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 122, - "text": "openshell not found after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 123, - "text": "openshell-gateway not found after install", - "polarity": "fail", - "normalized_id": "openshell.gateway.not.found.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 187, - "text": "Sabotage markers (GLIBC_2.38/2.39 or 'openshell-gateway-sabotage') not observed in gateway log ${GATEWAY_ONBOARD_LOG} — the test may have failed before the sabotaged gateway was invoked, so the assertions below cannot be trusted. Inspect $START_LOG and $GATEWAY_ONBOARD_LOG above for the real cause.", - "polarity": "fail", - "normalized_id": "sabotage.markers.glibc.2.38.2.39.or.openshell.gateway.sabotage.not.observed.in.gateway.log.gateway.onboard.log.the.test.may.have.failed.before.the.sabotaged.gateway.was.invoked.so.the.assertions.below.cannot.be.trusted.inspect.start.log.and.gateway.onboard.log.above.for.the.real.cause", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 189, - "text": "Sabotage shim was invoked as expected (GLIBC/sabotage markers present in gateway log)", - "polarity": "pass", - "normalized_id": "sabotage.shim.was.invoked.as.expected.glibc.sabotage.markers.present.in.gateway.log", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 196, - "text": "Onboard reported '✓ Docker-driver gateway is healthy' although the gateway binary crashed on startup (#3111 false-positive health check)", - "polarity": "fail", - "normalized_id": "onboard.reported.docker.driver.gateway.is.healthy.although.the.gateway.binary.crashed.on.startup.3111.false.positive.health.check", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 198, - "text": "Onboard did not falsely log 'Docker-driver gateway is healthy' when the binary crashed", - "polarity": "pass", - "normalized_id": "onboard.did.not.falsely.log.docker.driver.gateway.is.healthy.when.the.binary.crashed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 205, - "text": "startGateway() resolved successfully despite a crashed binary — onboard would have proceeded to inference setup against a dead gateway", - "polarity": "fail", - "normalized_id": "startgateway.resolved.successfully.despite.a.crashed.binary.onboard.would.have.proceeded.to.inference.setup.against.a.dead.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 207, - "text": "startGateway() did not resolve successfully with a crashed binary (node exit=${NODE_EXIT})", - "polarity": "pass", - "normalized_id": "startgateway.did.not.resolve.successfully.with.a.crashed.binary.node.exit.node.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 215, - "text": "Onboard did not surface any gateway failure indicator to the user", - "polarity": "fail", - "normalized_id": "onboard.did.not.surface.any.gateway.failure.indicator.to.the.user", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 217, - "text": "Onboard surfaced a user-visible gateway failure message", - "polarity": "pass", - "normalized_id": "onboard.surfaced.a.user.visible.gateway.failure.message", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 227, - "text": "A non-zombie gateway pid (${LINGERING_PID}, state=${STATE}) is still alive after a simulated crash", - "polarity": "fail", - "normalized_id": "a.non.zombie.gateway.pid.lingering.pid.state.state.is.still.alive.after.a.simulated.crash", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 231, - "text": "No live (non-zombie) gateway process is running after the simulated crash", - "polarity": "pass", - "normalized_id": "no.live.non.zombie.gateway.process.is.running.after.the.simulated.crash", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gateway-health-honest.sh", - "line": 234, - "text": "#3111 coverage guard green: onboard correctly surfaces a crashed gateway", - "polarity": "pass", - "normalized_id": "3111.coverage.guard.green.onboard.correctly.surfaces.a.crashed.gateway", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "assertions": [ - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 153, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 161, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 163, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 169, - "text": "nvidia-smi works (GPU VRAM: ${VRAM_MB:-unknown} MB)", - "polarity": "pass", - "normalized_id": "nvidia.smi.works.gpu.vram.vram.mb.unknown.mb", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 171, - "text": "nvidia-smi failed — no NVIDIA GPU available", - "polarity": "fail", - "normalized_id": "nvidia.smi.failed.no.nvidia.gpu.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 176, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 181, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required for non-interactive install", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required.for.non.interactive.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 193, - "text": "Ollama already installed: $(ollama --version 2>/dev/null || echo unknown)", - "polarity": "pass", - "normalized_id": "ollama.already.installed.ollama.version.2.dev.null.echo.unknown", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 197, - "text": "Ollama installed: $(ollama --version 2>/dev/null || echo unknown)", - "polarity": "pass", - "normalized_id": "ollama.installed.ollama.version.2.dev.null.echo.unknown", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 199, - "text": "Ollama installation failed", - "polarity": "fail", - "normalized_id": "ollama.installation.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 216, - "text": "Existing Ollama stopped — port 11434 is free for onboard", - "polarity": "pass", - "normalized_id": "existing.ollama.stopped.port.11434.is.free.for.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 226, - "text": "Could not cd to repo root: $REPO", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 253, - "text": "install.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 255, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 262, - "text": "nemoclaw on PATH: $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.on.path.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 264, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 276, - "text": "nemoclaw list contains '${SANDBOX_NAME}'", - "polarity": "pass", - "normalized_id": "nemoclaw.list.contains.sandbox.name", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 278, - "text": "nemoclaw list does not contain '${SANDBOX_NAME}'", - "polarity": "fail", - "normalized_id": "nemoclaw.list.does.not.contain.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 281, - "text": "nemoclaw list failed: ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.failed.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 286, - "text": "nemoclaw ${SANDBOX_NAME} status exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.sandbox.name.status.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 288, - "text": "nemoclaw ${SANDBOX_NAME} status failed", - "polarity": "fail", - "normalized_id": "nemoclaw.sandbox.name.status.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 293, - "text": "Ollama running on 127.0.0.1:11434", - "polarity": "pass", - "normalized_id": "ollama.running.on.127.0.0.1.11434", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 295, - "text": "Ollama not running — onboard should have started it", - "polarity": "fail", - "normalized_id": "ollama.not.running.onboard.should.have.started.it", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 303, - "text": "Auth proxy running on :${PROXY_PORT} (HTTP $PROXY_LIVE_STATUS)", - "polarity": "pass", - "normalized_id": "auth.proxy.running.on.proxy.port.http.proxy.live.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 305, - "text": "Auth proxy not running on :${PROXY_PORT}", - "polarity": "fail", - "normalized_id": "auth.proxy.not.running.on.proxy.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 310, - "text": "Proxy token persisted at $TOKEN_FILE", - "polarity": "pass", - "normalized_id": "proxy.token.persisted.at.token.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 313, - "text": "Token file permissions: 600", - "polarity": "pass", - "normalized_id": "token.file.permissions.600", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 315, - "text": "Token file permissions: expected 600, got $PERMS", - "polarity": "fail", - "normalized_id": "token.file.permissions.expected.600.got.perms", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 318, - "text": "Proxy token file missing after first onboard", - "polarity": "fail", - "normalized_id": "proxy.token.file.missing.after.first.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 334, - "text": "Proxy accepts first-onboard token (200)", - "polarity": "pass", - "normalized_id": "proxy.accepts.first.onboard.token.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 336, - "text": "Proxy rejects first-onboard token (status: $FIRST_AUTH_STATUS)", - "polarity": "fail", - "normalized_id": "proxy.rejects.first.onboard.token.status.first.auth.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 349, - "text": "No models found in Ollama", - "polarity": "fail", - "normalized_id": "no.models.found.in.ollama", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 369, - "text": "openshell sandbox ssh-config failed", - "polarity": "fail", - "normalized_id": "openshell.sandbox.ssh.config.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 376, - "text": "First-onboard sandbox inference succeeded", - "polarity": "pass", - "normalized_id": "first.onboard.sandbox.inference.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 378, - "text": "First-onboard sandbox inference: expected PONG, got: ${sandbox_content:0:200}", - "polarity": "fail", - "normalized_id": "first.onboard.sandbox.inference.expected.pong.got.sandbox.content.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 381, - "text": "First-onboard sandbox inference: no response", - "polarity": "fail", - "normalized_id": "first.onboard.sandbox.inference.no.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 404, - "text": "Re-onboard completed (exit 0)", - "polarity": "pass", - "normalized_id": "re.onboard.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 406, - "text": "Re-onboard failed (exit $reonboard_exit)", - "polarity": "fail", - "normalized_id": "re.onboard.failed.exit.reonboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 422, - "text": "Proxy token file exists after re-onboard", - "polarity": "pass", - "normalized_id": "proxy.token.file.exists.after.re.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 424, - "text": "Proxy token file missing after re-onboard", - "polarity": "fail", - "normalized_id": "proxy.token.file.missing.after.re.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 435, - "text": "Token file permissions preserved: 600", - "polarity": "pass", - "normalized_id": "token.file.permissions.preserved.600", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 437, - "text": "Token file permissions: expected 600, got $PERMS", - "polarity": "fail", - "normalized_id": "token.file.permissions.expected.600.got.perms", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 445, - "text": "Auth proxy running on :${PROXY_PORT} after re-onboard (HTTP $PROXY_LIVE_STATUS)", - "polarity": "pass", - "normalized_id": "auth.proxy.running.on.proxy.port.after.re.onboard.http.proxy.live.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 447, - "text": "Auth proxy not running after re-onboard", - "polarity": "fail", - "normalized_id": "auth.proxy.not.running.after.re.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 457, - "text": "Proxy accepts persisted token after re-onboard (200 — not 401)", - "polarity": "pass", - "normalized_id": "proxy.accepts.persisted.token.after.re.onboard.200.not.401", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 459, - "text": "PROXY TOKEN DIVERGENCE DETECTED (#2553 regression)", - "polarity": "fail", - "normalized_id": "proxy.token.divergence.detected.2553.regression", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 460, - "text": "Token on disk does not match running proxy (status: $TOKEN_AUTH_STATUS)", - "polarity": "fail", - "normalized_id": "token.on.disk.does.not.match.running.proxy.status.token.auth.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 468, - "text": "Proxy rejects unauthenticated POST after re-onboard (401)", - "polarity": "pass", - "normalized_id": "proxy.rejects.unauthenticated.post.after.re.onboard.401", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 470, - "text": "Proxy should reject unauthenticated POST, got $UNAUTH_STATUS", - "polarity": "fail", - "normalized_id": "proxy.should.reject.unauthenticated.post.got.unauth.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 478, - "text": "Proxy rejects wrong token after re-onboard (401)", - "polarity": "pass", - "normalized_id": "proxy.rejects.wrong.token.after.re.onboard.401", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 480, - "text": "Proxy should reject wrong token, got $WRONG_STATUS", - "polarity": "fail", - "normalized_id": "proxy.should.reject.wrong.token.got.wrong.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 506, - "text": "openshell sandbox ssh-config failed after re-onboard", - "polarity": "fail", - "normalized_id": "openshell.sandbox.ssh.config.failed.after.re.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 513, - "text": "Sandbox inference after re-onboard succeeded", - "polarity": "pass", - "normalized_id": "sandbox.inference.after.re.onboard.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 518, - "text": "SANDBOX INFERENCE RETURNED 401 — token divergence (#2553 regression)", - "polarity": "fail", - "normalized_id": "sandbox.inference.returned.401.token.divergence.2553.regression", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 520, - "text": "Sandbox inference after re-onboard: expected PONG, got: ${sandbox_content:0:200}", - "polarity": "fail", - "normalized_id": "sandbox.inference.after.re.onboard.expected.pong.got.sandbox.content.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 524, - "text": "Sandbox inference after re-onboard: no response", - "polarity": "fail", - "normalized_id": "sandbox.inference.after.re.onboard.no.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 538, - "text": "Sandbox ${SANDBOX_NAME} still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 540, - "text": "Sandbox ${SANDBOX_NAME} removed from registry", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed.from.registry", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-gpu-double-onboard.sh", - "line": 548, - "text": "Cleanup complete", - "polarity": "pass", - "normalized_id": "cleanup.complete", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 133, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 141, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 143, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 149, - "text": "nvidia-smi works (GPU VRAM: ${VRAM_MB:-unknown} MB)", - "polarity": "pass", - "normalized_id": "nvidia.smi.works.gpu.vram.vram.mb.unknown.mb", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 151, - "text": "nvidia-smi failed — no NVIDIA GPU available", - "polarity": "fail", - "normalized_id": "nvidia.smi.failed.no.nvidia.gpu.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 156, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 161, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required for non-interactive install", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required.for.non.interactive.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 180, - "text": "Ollama already installed: $(ollama --version 2>/dev/null || echo unknown)", - "polarity": "pass", - "normalized_id": "ollama.already.installed.ollama.version.2.dev.null.echo.unknown", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 184, - "text": "Ollama installed: $(ollama --version 2>/dev/null || echo unknown)", - "polarity": "pass", - "normalized_id": "ollama.installed.ollama.version.2.dev.null.echo.unknown", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 186, - "text": "Ollama installation failed", - "polarity": "fail", - "normalized_id": "ollama.installation.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 206, - "text": "Existing Ollama stopped — port 11434 is free for onboard", - "polarity": "pass", - "normalized_id": "existing.ollama.stopped.port.11434.is.free.for.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 216, - "text": "Could not cd to repo root: $REPO", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 243, - "text": "install.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 245, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 252, - "text": "nemoclaw on PATH: $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.on.path.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 254, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 266, - "text": "nemoclaw list contains '${SANDBOX_NAME}'", - "polarity": "pass", - "normalized_id": "nemoclaw.list.contains.sandbox.name", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 268, - "text": "nemoclaw list does not contain '${SANDBOX_NAME}'", - "polarity": "fail", - "normalized_id": "nemoclaw.list.does.not.contain.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 271, - "text": "nemoclaw list failed: ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.failed.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 276, - "text": "nemoclaw ${SANDBOX_NAME} status exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.sandbox.name.status.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 278, - "text": "nemoclaw ${SANDBOX_NAME} status failed", - "polarity": "fail", - "normalized_id": "nemoclaw.sandbox.name.status.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 284, - "text": "Sandbox GPU is enabled by default", - "polarity": "pass", - "normalized_id": "sandbox.gpu.is.enabled.by.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 286, - "text": "Sandbox GPU is not enabled in status output", - "polarity": "fail", - "normalized_id": "sandbox.gpu.is.not.enabled.in.status.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 289, - "text": "Could not read sandbox GPU status", - "polarity": "fail", - "normalized_id": "could.not.read.sandbox.gpu.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 296, - "text": "Onboard GPU proof passed: nvidia-smi when available", - "polarity": "pass", - "normalized_id": "onboard.gpu.proof.passed.nvidia.smi.when.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 298, - "text": "Onboard GPU proof missing: nvidia-smi when available", - "polarity": "fail", - "normalized_id": "onboard.gpu.proof.missing.nvidia.smi.when.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 302, - "text": "Onboard GPU proof passed: /proc/self/task//comm write", - "polarity": "pass", - "normalized_id": "onboard.gpu.proof.passed.proc.self.task.tid.comm.write", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 304, - "text": "Onboard GPU proof missing: /proc comm write", - "polarity": "fail", - "normalized_id": "onboard.gpu.proof.missing.proc.comm.write", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 308, - "text": "Onboard GPU proof passed: cuInit(0)", - "polarity": "pass", - "normalized_id": "onboard.gpu.proof.passed.cuinit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 310, - "text": "Onboard GPU proof missing: cuInit(0)", - "polarity": "fail", - "normalized_id": "onboard.gpu.proof.missing.cuinit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 316, - "text": "Inference provider is Ollama-based", - "polarity": "pass", - "normalized_id": "inference.provider.is.ollama.based", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 318, - "text": "Inference provider is not ollama — got: ${inf_check:0:200}", - "polarity": "fail", - "normalized_id": "inference.provider.is.not.ollama.got.inf.check.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 321, - "text": "openshell inference get failed: ${inf_check:0:200}", - "polarity": "fail", - "normalized_id": "openshell.inference.get.failed.inf.check.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 326, - "text": "Ollama running on 127.0.0.1:11434 (started by onboard)", - "polarity": "pass", - "normalized_id": "ollama.running.on.127.0.0.1.11434.started.by.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 328, - "text": "Ollama not running — onboard should have started it", - "polarity": "fail", - "normalized_id": "ollama.not.running.onboard.should.have.started.it", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 341, - "text": "Proxy token persisted at $TOKEN_FILE", - "polarity": "pass", - "normalized_id": "proxy.token.persisted.at.token.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 343, - "text": "Proxy token file missing — onboard did not persist token", - "polarity": "fail", - "normalized_id": "proxy.token.file.missing.onboard.did.not.persist.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 350, - "text": "Token file permissions: 600", - "polarity": "pass", - "normalized_id": "token.file.permissions.600", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 352, - "text": "Token file permissions: expected 600, got $PERMS", - "polarity": "fail", - "normalized_id": "token.file.permissions.expected.600.got.perms", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 362, - "text": "Auth proxy running on :${PROXY_PORT} (HTTP $PROXY_LIVE_STATUS)", - "polarity": "pass", - "normalized_id": "auth.proxy.running.on.proxy.port.http.proxy.live.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 364, - "text": "Auth proxy not running on :${PROXY_PORT} — onboard should have started it", - "polarity": "fail", - "normalized_id": "auth.proxy.not.running.on.proxy.port.onboard.should.have.started.it", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 371, - "text": "Auth proxy rejects unauthenticated POST (401)", - "polarity": "pass", - "normalized_id": "auth.proxy.rejects.unauthenticated.post.401", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 373, - "text": "Auth proxy should return 401 for unauthenticated POST, got $PROXY_STATUS", - "polarity": "fail", - "normalized_id": "auth.proxy.should.return.401.for.unauthenticated.post.got.proxy.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 385, - "text": "Auth proxy accepts correct token (status: $PROXY_STATUS)", - "polarity": "pass", - "normalized_id": "auth.proxy.accepts.correct.token.status.proxy.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 387, - "text": "Auth proxy rejected the persisted token", - "polarity": "fail", - "normalized_id": "auth.proxy.rejected.the.persisted.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 404, - "text": "Container reachable: host.openshell.internal:${PROXY_PORT} (HTTP $CONTAINER_REACH_STATUS)", - "polarity": "pass", - "normalized_id": "container.reachable.host.openshell.internal.proxy.port.http.container.reach.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 406, - "text": "Container cannot reach proxy at host.openshell.internal:${PROXY_PORT}", - "polarity": "fail", - "normalized_id": "container.cannot.reach.proxy.at.host.openshell.internal.proxy.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 424, - "text": "Proxy still alive after kill (HTTP $DEAD_STATUS)", - "polarity": "fail", - "normalized_id": "proxy.still.alive.after.kill.http.dead.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 439, - "text": "Proxy recovered from persisted token after kill (HTTP $RECOVERED_LIVE_STATUS)", - "polarity": "pass", - "normalized_id": "proxy.recovered.from.persisted.token.after.kill.http.recovered.live.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 441, - "text": "Proxy did not restart from persisted token", - "polarity": "fail", - "normalized_id": "proxy.did.not.restart.from.persisted.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 450, - "text": "Recovered proxy accepts persisted token (status: $RECOVER_STATUS)", - "polarity": "pass", - "normalized_id": "recovered.proxy.accepts.persisted.token.status.recover.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 452, - "text": "Recovered proxy rejected persisted token", - "polarity": "fail", - "normalized_id": "recovered.proxy.rejected.persisted.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 485, - "text": "No models found in Ollama", - "polarity": "fail", - "normalized_id": "no.models.found.in.ollama", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 503, - "text": "[LOCAL] Direct Ollama: model responded with PONG", - "polarity": "pass", - "normalized_id": "local.direct.ollama.model.responded.with.pong", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 505, - "text": "[LOCAL] Direct Ollama: expected PONG, got: ${direct_content:0:200}", - "polarity": "fail", - "normalized_id": "local.direct.ollama.expected.pong.got.direct.content.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 508, - "text": "[LOCAL] Direct Ollama: empty response", - "polarity": "fail", - "normalized_id": "local.direct.ollama.empty.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 548, - "text": "[LOCAL] Sandbox inference: ${sandbox_probe_failure}", - "polarity": "fail", - "normalized_id": "local.sandbox.inference.sandbox.probe.failure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 552, - "text": "[LOCAL] Sandbox inference: Ollama responded through sandbox", - "polarity": "pass", - "normalized_id": "local.sandbox.inference.ollama.responded.through.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 555, - "text": "[LOCAL] Sandbox inference: expected PONG, got: ${sandbox_content:0:200}", - "polarity": "fail", - "normalized_id": "local.sandbox.inference.expected.pong.got.sandbox.content.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 558, - "text": "[LOCAL] Sandbox inference: no response from ${SANDBOX_INFERENCE_URL} inside sandbox", - "polarity": "fail", - "normalized_id": "local.sandbox.inference.no.response.from.sandbox.inference.url.inside.sandbox", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 575, - "text": "Sandbox ${SANDBOX_NAME} still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 577, - "text": "Sandbox ${SANDBOX_NAME} removed from registry", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed.from.registry", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 588, - "text": "uninstall.sh --delete-models completed", - "polarity": "pass", - "normalized_id": "uninstall.sh.delete.models.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 590, - "text": "uninstall.sh failed", - "polarity": "fail", - "normalized_id": "uninstall.sh.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 594, - "text": "$HOME/.nemoclaw directory still exists after uninstall", - "polarity": "fail", - "normalized_id": "home.nemoclaw.directory.still.exists.after.uninstall", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 596, - "text": "$HOME/.nemoclaw removed", - "polarity": "pass", - "normalized_id": "home.nemoclaw.removed", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-gpu-e2e.sh", - "line": 603, - "text": "Cleanup complete", - "polarity": "pass", - "normalized_id": "cleanup.complete", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 194, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 196, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 201, - "text": "NVIDIA_API_KEY is set (starts with nvapi-)", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set.starts.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 203, - "text": "NVIDIA_API_KEY not set or invalid", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 208, - "text": "NEMOCLAW_NON_INTERACTIVE=1", - "polarity": "pass", - "normalized_id": "nemoclaw.non.interactive.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 210, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 215, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1", - "polarity": "pass", - "normalized_id": "nemoclaw.accept.third.party.software.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 217, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 231, - "text": "Could not cd to repo root: $REPO", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 243, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 270, - "text": "install.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 272, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 280, - "text": "nemoclaw installed at $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.installed.at.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 282, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 287, - "text": "openshell installed ($(openshell --version 2>&1 || echo unknown))", - "polarity": "pass", - "normalized_id": "openshell.installed.openshell.version.2.1.echo.unknown", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 289, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 297, - "text": "nemoclaw list contains '${SANDBOX_NAME}'", - "polarity": "pass", - "normalized_id": "nemoclaw.list.contains.sandbox.name", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 299, - "text": "nemoclaw list does not contain '${SANDBOX_NAME}'", - "polarity": "fail", - "normalized_id": "nemoclaw.list.does.not.contain.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 302, - "text": "nemoclaw list failed: ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.failed.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 306, - "text": "Discord provider '${SANDBOX_NAME}-discord-bridge' exists in gateway", - "polarity": "pass", - "normalized_id": "discord.provider.sandbox.name.discord.bridge.exists.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 308, - "text": "Discord provider '${SANDBOX_NAME}-discord-bridge' not found in gateway", - "polarity": "fail", - "normalized_id": "discord.provider.sandbox.name.discord.bridge.not.found.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 326, - "text": "Hermes health probe returned ok with Discord enabled", - "polarity": "pass", - "normalized_id": "hermes.health.probe.returned.ok.with.discord.enabled", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 328, - "text": "Hermes health probe did not return ok after 15 attempts", - "polarity": "fail", - "normalized_id": "hermes.health.probe.did.not.return.ok.after.15.attempts", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 382, - "text": "config.yaml uses top-level discord and no platforms.discord", - "polarity": "pass", - "normalized_id": "config.yaml.uses.top.level.discord.and.no.platforms.discord", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 384, - "text": "config.yaml schema check failed: ${config_probe:0:400}", - "polarity": "fail", - "normalized_id": "config.yaml.schema.check.failed.config.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 411, - "text": ".hermes/.env contains Discord placeholder and allowed users", - "polarity": "pass", - "normalized_id": "hermes.env.contains.discord.placeholder.and.allowed.users", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 413, - "text": ".hermes/.env check failed: ${env_probe:0:400}", - "polarity": "fail", - "normalized_id": "hermes.env.check.failed.env.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 419, - "text": "Hermetic fake Discord Gateway started on host port ${FAKE_DISCORD_GATEWAY_PORT}", - "polarity": "pass", - "normalized_id": "hermetic.fake.discord.gateway.started.on.host.port.fake.discord.gateway.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 421, - "text": "Failed to start hermetic fake Discord Gateway", - "polarity": "fail", - "normalized_id": "failed.to.start.hermetic.fake.discord.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 426, - "text": "Applied native WebSocket policy with credential rewrite for Hermes fake Discord Gateway", - "polarity": "pass", - "normalized_id": "applied.native.websocket.policy.with.credential.rewrite.for.hermes.fake.discord.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 428, - "text": "Failed to apply Hermes fake Discord Gateway policy: $(tail -20 /tmp/nemoclaw-hermes-fake-discord-policy.log 2>/dev/null | tr '\\n' ' ' | cut -c1-300)", - "polarity": "fail", - "normalized_id": "failed.to.apply.hermes.fake.discord.gateway.policy.tail.20.tmp.nemoclaw.hermes.fake.discord.policy.log.2.dev.null.tr.n.cut.c1.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 441, - "text": "Hermes Python Discord Gateway path reaches READY through native OpenShell WebSocket policy", - "polarity": "pass", - "normalized_id": "hermes.python.discord.gateway.path.reaches.ready.through.native.openshell.websocket.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 443, - "text": "Hermes native Gateway probe could not import discord.py: ${native_gateway_protocol:0:300}", - "polarity": "fail", - "normalized_id": "hermes.native.gateway.probe.could.not.import.discord.py.native.gateway.protocol.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 445, - "text": "Hermes native Gateway protocol probe failed: ${native_gateway_protocol:0:300}", - "polarity": "fail", - "normalized_id": "hermes.native.gateway.protocol.probe.failed.native.gateway.protocol.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 451, - "text": "Hermes fake Gateway received host-side Discord token while sandbox sent only the placeholder", - "polarity": "pass", - "normalized_id": "hermes.fake.gateway.received.host.side.discord.token.while.sandbox.sent.only.the.placeholder", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 456, - "text": "Hermes fake Gateway did not prove WebSocket placeholder rewrite", - "polarity": "fail", - "normalized_id": "hermes.fake.gateway.did.not.prove.websocket.placeholder.rewrite", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 461, - "text": "Raw Discord token absent from Hermes config.yaml and .env", - "polarity": "pass", - "normalized_id": "raw.discord.token.absent.from.hermes.config.yaml.and.env", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 463, - "text": "Raw Discord token found in Hermes config files", - "polarity": "fail", - "normalized_id": "raw.discord.token.found.in.hermes.config.files", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 472, - "text": "Raw Discord token found in sandbox environment", - "polarity": "fail", - "normalized_id": "raw.discord.token.found.in.sandbox.environment", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 474, - "text": "Sandbox environment still contains DISCORD_PROXY bridge setting", - "polarity": "fail", - "normalized_id": "sandbox.environment.still.contains.discord.proxy.bridge.setting", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 476, - "text": "Raw Discord token absent from sandbox environment; no DISCORD_PROXY bridge setting", - "polarity": "pass", - "normalized_id": "raw.discord.token.absent.from.sandbox.environment.no.discord.proxy.bridge.setting", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 483, - "text": "Raw Discord token found in sandbox process list", - "polarity": "fail", - "normalized_id": "raw.discord.token.found.in.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 485, - "text": "Raw Discord token absent from sandbox process list", - "polarity": "pass", - "normalized_id": "raw.discord.token.absent.from.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 490, - "text": "Raw Discord token found on sandbox filesystem: ${sandbox_fs_hits:0:200}", - "polarity": "fail", - "normalized_id": "raw.discord.token.found.on.sandbox.filesystem.sandbox.fs.hits.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 492, - "text": "Raw Discord token absent from sandbox filesystem", - "polarity": "pass", - "normalized_id": "raw.discord.token.absent.from.sandbox.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 542, - "text": "Discord users/@me returned 200 with configured token", - "polarity": "pass", - "normalized_id": "discord.users.me.returned.200.with.configured.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 544, - "text": "Discord users/@me returned 401 - REST path reached Discord; this is not gateway IDENTIFY auth proof", - "polarity": "pass", - "normalized_id": "discord.users.me.returned.401.rest.path.reached.discord.this.is.not.gateway.identify.auth.proof", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 548, - "text": "Discord API call failed: ${dc_error:0:200}", - "polarity": "fail", - "normalized_id": "discord.api.call.failed.dc.error.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 550, - "text": "Unexpected Discord API response: ${dc_api:0:300}", - "polarity": "fail", - "normalized_id": "unexpected.discord.api.response.dc.api.0.300", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 577, - "text": "Hermes Discord proof used native WebSocket policy with no local facade, decode proxy, or DISCORD_PROXY residue", - "polarity": "pass", - "normalized_id": "hermes.discord.proof.used.native.websocket.policy.with.no.local.facade.decode.proxy.or.discord.proxy.residue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 579, - "text": "Local Discord bridge residue found after native Gateway proof: ${facade_residue:0:300}", - "polarity": "fail", - "normalized_id": "local.discord.bridge.residue.found.after.native.gateway.proof.facade.residue.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 592, - "text": "Sandbox ${SANDBOX_NAME} still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-discord-e2e.sh", - "line": 594, - "text": "Sandbox ${SANDBOX_NAME} removed", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 140, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 148, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 150, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 155, - "text": "NVIDIA_API_KEY is set (starts with nvapi-)", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set.starts.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 157, - "text": "NVIDIA_API_KEY not set or invalid — required for live inference", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid.required.for.live.inference", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 162, - "text": "Network access to integrate.api.nvidia.com", - "polarity": "pass", - "normalized_id": "network.access.to.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 164, - "text": "Cannot reach integrate.api.nvidia.com", - "polarity": "fail", - "normalized_id": "cannot.reach.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 169, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 174, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required for non-interactive install", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required.for.non.interactive.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 180, - "text": "agents/hermes/ directory and manifest.yaml exist", - "polarity": "pass", - "normalized_id": "agents.hermes.directory.and.manifest.yaml.exist", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 182, - "text": "agents/hermes/ not found — is the hermes-agent-support branch checked out?", - "polarity": "fail", - "normalized_id": "agents.hermes.not.found.is.the.hermes.agent.support.branch.checked.out", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 194, - "text": "Could not cd to repo root: $REPO", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 232, - "text": "install.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 234, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 241, - "text": "nemoclaw installed at $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.installed.at.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 243, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 249, - "text": "openshell installed ($(openshell --version 2>&1 || echo unknown))", - "polarity": "pass", - "normalized_id": "openshell.installed.openshell.version.2.1.echo.unknown", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 251, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 256, - "text": "nemoclaw --help exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.help.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 258, - "text": "nemoclaw --help failed", - "polarity": "fail", - "normalized_id": "nemoclaw.help.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 269, - "text": "nemoclaw list contains '${SANDBOX_NAME}'", - "polarity": "pass", - "normalized_id": "nemoclaw.list.contains.sandbox.name", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 271, - "text": "nemoclaw list does not contain '${SANDBOX_NAME}'", - "polarity": "fail", - "normalized_id": "nemoclaw.list.does.not.contain.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 274, - "text": "nemoclaw list failed: ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.failed.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 279, - "text": "nemoclaw ${SANDBOX_NAME} status exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.sandbox.name.status.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 281, - "text": "nemoclaw ${SANDBOX_NAME} status failed: ${status_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.sandbox.name.status.failed.status.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 288, - "text": "Onboard session records agent=hermes", - "polarity": "pass", - "normalized_id": "onboard.session.records.agent.hermes", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 290, - "text": "Onboard session does not contain agent=hermes", - "polarity": "fail", - "normalized_id": "onboard.session.does.not.contain.agent.hermes", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 294, - "text": "Session file not found: $session_file", - "polarity": "fail", - "normalized_id": "session.file.not.found.session.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 300, - "text": "Inference configured via onboard", - "polarity": "pass", - "normalized_id": "inference.configured.via.onboard", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 302, - "text": "Inference not configured — onboard did not set up nvidia-prod provider", - "polarity": "fail", - "normalized_id": "inference.not.configured.onboard.did.not.set.up.nvidia.prod.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 305, - "text": "openshell inference get failed: ${inf_check:0:200}", - "polarity": "fail", - "normalized_id": "openshell.inference.get.failed.inf.check.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 311, - "text": "Policy applied to sandbox", - "polarity": "pass", - "normalized_id": "policy.applied.to.sandbox", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 313, - "text": "No network policy found on sandbox", - "polarity": "fail", - "normalized_id": "no.network.policy.found.on.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 316, - "text": "openshell policy get failed: ${policy_output:0:200}", - "polarity": "fail", - "normalized_id": "openshell.policy.get.failed.policy.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 354, - "text": "Hermes health probe returned ok", - "polarity": "pass", - "normalized_id": "hermes.health.probe.returned.ok", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 357, - "text": "Hermes health probe did not return ok after 15 attempts", - "polarity": "fail", - "normalized_id": "hermes.health.probe.did.not.return.ok.after.15.attempts", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 361, - "text": "Could not get SSH config for sandbox ${SANDBOX_NAME}", - "polarity": "fail", - "normalized_id": "could.not.get.ssh.config.for.sandbox.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 376, - "text": "Hermes binary not found in sandbox", - "polarity": "fail", - "normalized_id": "hermes.binary.not.found.in.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 378, - "text": "Hermes binary found in sandbox: ${hermes_version:0:100}", - "polarity": "pass", - "normalized_id": "hermes.binary.found.in.sandbox.hermes.version.0.100", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 393, - "text": "Hermes config.yaml exists at /sandbox/.hermes/config.yaml", - "polarity": "pass", - "normalized_id": "hermes.config.yaml.exists.at.sandbox.hermes.config.yaml", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 395, - "text": "Hermes config.yaml not found at /sandbox/.hermes/config.yaml", - "polarity": "fail", - "normalized_id": "hermes.config.yaml.not.found.at.sandbox.hermes.config.yaml", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 409, - "text": "Hermes config directory is writable (mutable default)", - "polarity": "pass", - "normalized_id": "hermes.config.directory.is.writable.mutable.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 411, - "text": "Hermes config directory is read-only — should be writable by default", - "polarity": "fail", - "normalized_id": "hermes.config.directory.is.read.only.should.be.writable.by.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 427, - "text": "Hermes config/state directory exists at /sandbox/.hermes", - "polarity": "pass", - "normalized_id": "hermes.config.state.directory.exists.at.sandbox.hermes", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 429, - "text": "Hermes config/state directory not found at /sandbox/.hermes", - "polarity": "fail", - "normalized_id": "hermes.config.state.directory.not.found.at.sandbox.hermes", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 454, - "text": "[LIVE] Direct API: model responded with PONG", - "polarity": "pass", - "normalized_id": "live.direct.api.model.responded.with.pong", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 456, - "text": "[LIVE] Direct API: expected PONG, got: ${api_content:0:200}", - "polarity": "fail", - "normalized_id": "live.direct.api.expected.pong.got.api.content.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 459, - "text": "[LIVE] Direct API: empty response from curl", - "polarity": "fail", - "normalized_id": "live.direct.api.empty.response.from.curl", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 492, - "text": "[ROUTING] inference.local: OpenShell routed curl to NVIDIA Endpoints and returned PONG", - "polarity": "pass", - "normalized_id": "routing.inference.local.openshell.routed.curl.to.nvidia.endpoints.and.returned.pong", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 495, - "text": "[ROUTING] inference.local: expected PONG, got: ${sandbox_content:0:200}", - "polarity": "fail", - "normalized_id": "routing.inference.local.expected.pong.got.sandbox.content.0.200", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 498, - "text": "[ROUTING] inference.local: no response from inference.local inside Hermes sandbox", - "polarity": "fail", - "normalized_id": "routing.inference.local.no.response.from.inference.local.inside.hermes.sandbox", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 510, - "text": "nemoclaw logs: produced output ($(echo ", - "polarity": "pass", - "normalized_id": "nemoclaw.logs.produced.output.echo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 512, - "text": "nemoclaw logs: no output", - "polarity": "fail", - "normalized_id": "nemoclaw.logs.no.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 535, - "text": "OpenClaw agent manifest loads correctly", - "polarity": "pass", - "normalized_id": "openclaw.agent.manifest.loads.correctly", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 537, - "text": "OpenClaw agent manifest failed to load", - "polarity": "fail", - "normalized_id": "openclaw.agent.manifest.failed.to.load", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 542, - "text": "Hermes agent manifest loads correctly", - "polarity": "pass", - "normalized_id": "hermes.agent.manifest.loads.correctly", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 544, - "text": "Hermes agent manifest failed to load", - "polarity": "fail", - "normalized_id": "hermes.agent.manifest.failed.to.load", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 549, - "text": "Both agents listed by listAgents()", - "polarity": "pass", - "normalized_id": "both.agents.listed.by.listagents", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 551, - "text": "listAgents() did not return both openclaw and hermes", - "polarity": "fail", - "normalized_id": "listagents.did.not.return.both.openclaw.and.hermes", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 568, - "text": "Sandbox ${SANDBOX_NAME} still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-e2e.sh", - "line": 570, - "text": "Sandbox ${SANDBOX_NAME} removed", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "assertions": [ - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 84, - "text": "OpenShell inference get failed: ${output:0:240}", - "polarity": "fail", - "normalized_id": "openshell.inference.get.failed.output.0.240", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 91, - "text": "OpenShell route points at ${SWITCH_PROVIDER} / ${SWITCH_MODEL}", - "polarity": "pass", - "normalized_id": "openshell.route.points.at.switch.provider.switch.model", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 93, - "text": "OpenShell route did not switch to ${SWITCH_PROVIDER} / ${SWITCH_MODEL}: ${plain_output:0:400}", - "polarity": "fail", - "normalized_id": "openshell.route.did.not.switch.to.switch.provider.switch.model.plain.output.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 155, - "text": "Registry/session were not updated for switch: ${probe:0:400}", - "polarity": "fail", - "normalized_id": "registry.session.were.not.updated.for.switch.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 158, - "text": "Registry and onboard session record the switched Hermes provider/model", - "polarity": "pass", - "normalized_id": "registry.and.onboard.session.record.the.switched.hermes.provider.model", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 167, - "text": "Hermes health endpoint returns ok", - "polarity": "pass", - "normalized_id": "hermes.health.endpoint.returns.ok", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 172, - "text": "Hermes health endpoint did not return ok: ${health_response:0:240}", - "polarity": "fail", - "normalized_id": "hermes.health.endpoint.did.not.return.ok.health.response.0.240", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 178, - "text": "Could not read /sandbox/.hermes/config.yaml: ${config:0:240}", - "polarity": "fail", - "normalized_id": "could.not.read.sandbox.hermes.config.yaml.config.0.240", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 226, - "text": "Hermes config.yaml was not patched correctly: ${probe:0:400}", - "polarity": "fail", - "normalized_id": "hermes.config.yaml.was.not.patched.correctly.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 229, - "text": "Hermes config.yaml model block uses ${SWITCH_MODEL} via inference.local", - "polarity": "pass", - "normalized_id": "hermes.config.yaml.model.block.uses.switch.model.via.inference.local", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 237, - "text": "Hermes strict config hash matches config.yaml and .env", - "polarity": "pass", - "normalized_id": "hermes.strict.config.hash.matches.config.yaml.and.env", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 239, - "text": "Hermes strict config hash check failed: ${strict_check:0:240}", - "polarity": "fail", - "normalized_id": "hermes.strict.config.hash.check.failed.strict.check.0.240", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 245, - "text": "Hermes compatibility config hash matches config.yaml and .env", - "polarity": "pass", - "normalized_id": "hermes.compatibility.config.hash.matches.config.yaml.and.env", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 247, - "text": "Hermes compatibility config hash check failed: ${compat_check:0:240}", - "polarity": "fail", - "normalized_id": "hermes.compatibility.config.hash.check.failed.compat.check.0.240", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 264, - "text": "Hermes strict hash is root-owned and not writable", - "polarity": "pass", - "normalized_id": "hermes.strict.hash.is.root.owned.and.not.writable", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 266, - "text": "Hermes strict hash permissions are wrong: ${perms_probe:0:120}", - "polarity": "fail", - "normalized_id": "hermes.strict.hash.permissions.are.wrong.perms.probe.0.120", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 274, - "text": "Hermes .env was not rewritten by inference set", - "polarity": "pass", - "normalized_id": "hermes.env.was.not.rewritten.by.inference.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 276, - "text": "Hermes .env hash changed during inference set (${ENV_HASH_BEFORE:-missing} -> ${after:-missing})", - "polarity": "fail", - "normalized_id": "hermes.env.hash.changed.during.inference.set.env.hash.before.missing.after.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 305, - "text": "Hermes sandbox inference.local returned PONG with ${SWITCH_MODEL}", - "polarity": "pass", - "normalized_id": "hermes.sandbox.inference.local.returned.pong.with.switch.model", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 317, - "text": "Hermes sandbox inference.local did not work after switch: ${last_fail}", - "polarity": "fail", - "normalized_id": "hermes.sandbox.inference.local.did.not.work.after.switch.last.fail", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 343, - "text": "Hermes API chat works after inference switch", - "polarity": "pass", - "normalized_id": "hermes.api.chat.works.after.inference.switch", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 355, - "text": "Hermes API chat did not work after switch: ${last_fail}", - "polarity": "fail", - "normalized_id": "hermes.api.chat.did.not.work.after.switch.last.fail", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 392, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 396, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 398, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 403, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 405, - "text": "NVIDIA_API_KEY not set or invalid", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 410, - "text": "NEMOCLAW_NON_INTERACTIVE=1", - "polarity": "pass", - "normalized_id": "nemoclaw.non.interactive.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 412, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 417, - "text": "Third-party software acceptance is set", - "polarity": "pass", - "normalized_id": "third.party.software.acceptance.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 419, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 425, - "text": "Could not cd to repo root: $REPO", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 449, - "text": "install.sh completed", - "polarity": "pass", - "normalized_id": "install.sh.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 451, - "text": "install.sh failed (exit ${install_exit})", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 457, - "text": "nemohermes not found on PATH", - "polarity": "fail", - "normalized_id": "nemohermes.not.found.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 461, - "text": "openshell not found on PATH", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 464, - "text": "nemohermes and openshell are on PATH", - "polarity": "pass", - "normalized_id": "nemohermes.and.openshell.are.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 475, - "text": "nemohermes inference set completed without --sandbox", - "polarity": "pass", - "normalized_id": "nemohermes.inference.set.completed.without.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 477, - "text": "nemohermes inference set failed (exit ${switch_rc}): ${switch_output:0:500}", - "polarity": "fail", - "normalized_id": "nemohermes.inference.set.failed.exit.switch.rc.switch.output.0.500", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 484, - "text": "Hermes gateway process stayed running during switch", - "polarity": "pass", - "normalized_id": "hermes.gateway.process.stayed.running.during.switch", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 486, - "text": "Hermes gateway process changed during switch (${pid_before} -> ${pid_after})", - "polarity": "fail", - "normalized_id": "hermes.gateway.process.changed.during.switch.pid.before.pid.after", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 510, - "text": "Sandbox ${SANDBOX_NAME} still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-inference-switch.sh", - "line": 512, - "text": "Sandbox ${SANDBOX_NAME} removed", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 170, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 172, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 177, - "text": "NVIDIA_API_KEY is set (starts with nvapi-)", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set.starts.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 179, - "text": "NVIDIA_API_KEY not set or invalid", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 184, - "text": "NEMOCLAW_NON_INTERACTIVE=1", - "polarity": "pass", - "normalized_id": "nemoclaw.non.interactive.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 186, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 191, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1", - "polarity": "pass", - "normalized_id": "nemoclaw.accept.third.party.software.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 193, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 204, - "text": "Could not cd to repo root: $REPO", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 218, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 245, - "text": "install.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 247, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 255, - "text": "nemoclaw installed at $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.installed.at.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 257, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 262, - "text": "openshell installed ($(openshell --version 2>&1 || echo unknown))", - "polarity": "pass", - "normalized_id": "openshell.installed.openshell.version.2.1.echo.unknown", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 264, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 272, - "text": "nemoclaw list contains '${SANDBOX_NAME}'", - "polarity": "pass", - "normalized_id": "nemoclaw.list.contains.sandbox.name", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 274, - "text": "nemoclaw list does not contain '${SANDBOX_NAME}'", - "polarity": "fail", - "normalized_id": "nemoclaw.list.does.not.contain.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 277, - "text": "nemoclaw list failed: ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.failed.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 281, - "text": "Slack bot provider '${SANDBOX_NAME}-slack-bridge' exists in gateway", - "polarity": "pass", - "normalized_id": "slack.bot.provider.sandbox.name.slack.bridge.exists.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 283, - "text": "Slack bot provider '${SANDBOX_NAME}-slack-bridge' not found in gateway", - "polarity": "fail", - "normalized_id": "slack.bot.provider.sandbox.name.slack.bridge.not.found.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 287, - "text": "Slack app provider '${SANDBOX_NAME}-slack-app' exists in gateway", - "polarity": "pass", - "normalized_id": "slack.app.provider.sandbox.name.slack.app.exists.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 289, - "text": "Slack app provider '${SANDBOX_NAME}-slack-app' not found in gateway", - "polarity": "fail", - "normalized_id": "slack.app.provider.sandbox.name.slack.app.not.found.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 307, - "text": "Hermes health probe returned ok with Slack enabled", - "polarity": "pass", - "normalized_id": "hermes.health.probe.returned.ok.with.slack.enabled", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 309, - "text": "Hermes health probe did not return ok after 15 attempts", - "polarity": "fail", - "normalized_id": "hermes.health.probe.did.not.return.ok.after.15.attempts", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 342, - "text": "config.yaml has no generic platforms.slack block or Slack token keys", - "polarity": "pass", - "normalized_id": "config.yaml.has.no.generic.platforms.slack.block.or.slack.token.keys", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 344, - "text": "config.yaml check failed: ${config_probe:0:400}", - "polarity": "fail", - "normalized_id": "config.yaml.check.failed.config.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 366, - "text": ".hermes/.env contains Slack SDK-shaped resolver placeholders", - "polarity": "pass", - "normalized_id": "hermes.env.contains.slack.sdk.shaped.resolver.placeholders", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 368, - "text": ".hermes/.env check failed: ${env_probe:0:400}", - "polarity": "fail", - "normalized_id": "hermes.env.check.failed.env.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 373, - "text": "Raw Slack tokens absent from Hermes config files and logs", - "polarity": "pass", - "normalized_id": "raw.slack.tokens.absent.from.hermes.config.files.and.logs", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 375, - "text": "Raw Slack token found in Hermes config files or logs", - "polarity": "fail", - "normalized_id": "raw.slack.token.found.in.hermes.config.files.or.logs", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 382, - "text": "Raw Slack token found in sandbox process list", - "polarity": "fail", - "normalized_id": "raw.slack.token.found.in.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 384, - "text": "Raw Slack tokens absent from sandbox process list", - "polarity": "pass", - "normalized_id": "raw.slack.tokens.absent.from.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 397, - "text": "Sandbox policy contains Slack network policy", - "polarity": "pass", - "normalized_id": "sandbox.policy.contains.slack.network.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 399, - "text": "Sandbox policy missing Slack network policy", - "polarity": "fail", - "normalized_id": "sandbox.policy.missing.slack.network.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 405, - "text": "Slack policy is scoped to Hermes and Python binaries", - "polarity": "pass", - "normalized_id": "slack.policy.is.scoped.to.hermes.and.python.binaries", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 407, - "text": "Slack policy missing Hermes/Python binary allowlist", - "polarity": "fail", - "normalized_id": "slack.policy.missing.hermes.python.binary.allowlist", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 412, - "text": "Slack policy was replaced by or widened to Node", - "polarity": "fail", - "normalized_id": "slack.policy.was.replaced.by.or.widened.to.node", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 414, - "text": "Slack policy does not allow Node", - "polarity": "pass", - "normalized_id": "slack.policy.does.not.allow.node", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 419, - "text": "Slack policy includes Socket Mode websocket hosts", - "polarity": "pass", - "normalized_id": "slack.policy.includes.socket.mode.websocket.hosts", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 421, - "text": "Slack policy missing Socket Mode websocket hosts", - "polarity": "fail", - "normalized_id": "slack.policy.missing.socket.mode.websocket.hosts", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 425, - "text": "Slack REST policy enables OpenShell request-body credential rewrite", - "polarity": "pass", - "normalized_id": "slack.rest.policy.enables.openshell.request.body.credential.rewrite", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 427, - "text": "Slack policy missing request_body_credential_rewrite for REST alias rewrite", - "polarity": "fail", - "normalized_id": "slack.policy.missing.request.body.credential.rewrite.for.rest.alias.rewrite", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 430, - "text": "openshell policy get failed: ${policy_output:0:200}", - "polarity": "fail", - "normalized_id": "openshell.policy.get.failed.policy.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 448, - "text": "Hermes Slack sandbox has no decode proxy or Python placeholder-normalization preload", - "polarity": "pass", - "normalized_id": "hermes.slack.sandbox.has.no.decode.proxy.or.python.placeholder.normalization.preload", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 450, - "text": "Hermes Slack bridge residue found: ${bridge_residue:0:300}", - "polarity": "fail", - "normalized_id": "hermes.slack.bridge.residue.found.bridge.residue.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 537, - "text": "Slack API reached from Python through OpenShell alias substitution", - "polarity": "pass", - "normalized_id": "slack.api.reached.from.python.through.openshell.alias.substitution", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 541, - "text": "Slack Python API probe failed: ${slack_probe:0:400}", - "polarity": "fail", - "normalized_id": "slack.python.api.probe.failed.slack.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 544, - "text": "Unexpected Slack Python API response: ${slack_probe:0:400}", - "polarity": "fail", - "normalized_id": "unexpected.slack.python.api.response.slack.probe.0.400", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 556, - "text": "Sandbox ${SANDBOX_NAME} still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 558, - "text": "Sandbox ${SANDBOX_NAME} removed", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 562, - "text": "Slack app provider still exists after destroy", - "polarity": "fail", - "normalized_id": "slack.app.provider.still.exists.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-hermes-slack-e2e.sh", - "line": 565, - "text": "Slack app provider removed", - "polarity": "pass", - "normalized_id": "slack.app.provider.removed", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-inference-routing.sh", - "assertions": [ - { - "script": "test/e2e/test-inference-routing.sh", - "line": 211, - "text": "TC-INF-05: Setup", - "polarity": "fail", - "normalized_id": "tc.inf.05.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 220, - "text": "TC-INF-05: Setup", - "polarity": "fail", - "normalized_id": "tc.inf.05.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 230, - "text": "TC-INF-05a: Env vars", - "polarity": "fail", - "normalized_id": "tc.inf.05a.env.vars", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 232, - "text": "TC-INF-05a: Real API key absent from sandbox environment", - "polarity": "pass", - "normalized_id": "tc.inf.05a.real.api.key.absent.from.sandbox.environment", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 239, - "text": "TC-INF-05b: Process list", - "polarity": "fail", - "normalized_id": "tc.inf.05b.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 241, - "text": "TC-INF-05b: Real API key absent from sandbox process list", - "polarity": "pass", - "normalized_id": "tc.inf.05b.real.api.key.absent.from.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 271, - "text": "TC-INF-05c: Filesystem", - "polarity": "fail", - "normalized_id": "tc.inf.05c.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 273, - "text": "TC-INF-05c: Filesystem", - "polarity": "fail", - "normalized_id": "tc.inf.05c.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 275, - "text": "TC-INF-05c: Real API key absent from sandbox filesystem", - "polarity": "pass", - "normalized_id": "tc.inf.05c.real.api.key.absent.from.sandbox.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 277, - "text": "TC-INF-05c: Filesystem", - "polarity": "fail", - "normalized_id": "tc.inf.05c.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 284, - "text": "TC-INF-05d: Placeholder token present in sandbox (not the real key)", - "polarity": "pass", - "normalized_id": "tc.inf.05d.placeholder.token.present.in.sandbox.not.the.real.key", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 286, - "text": "TC-INF-05d: Placeholder", - "polarity": "fail", - "normalized_id": "tc.inf.05d.placeholder", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 310, - "text": "TC-INF-06: Exit code", - "polarity": "fail", - "normalized_id": "tc.inf.06.exit.code", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 313, - "text": "TC-INF-06: Onboard failed as expected (exit $exit_code)", - "polarity": "pass", - "normalized_id": "tc.inf.06.onboard.failed.as.expected.exit.exit.code", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 317, - "text": "TC-INF-06: Output contains classified error message", - "polarity": "pass", - "normalized_id": "tc.inf.06.output.contains.classified.error.message", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 319, - "text": "TC-INF-06: Error classification", - "polarity": "fail", - "normalized_id": "tc.inf.06.error.classification", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 328, - "text": "TC-INF-06: Stack trace", - "polarity": "fail", - "normalized_id": "tc.inf.06.stack.trace", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 330, - "text": "TC-INF-06: No raw stack trace in output", - "polarity": "pass", - "normalized_id": "tc.inf.06.no.raw.stack.trace.in.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 335, - "text": "TC-INF-06: Key exposure", - "polarity": "fail", - "normalized_id": "tc.inf.06.key.exposure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 337, - "text": "TC-INF-06: API key not exposed in output", - "polarity": "pass", - "normalized_id": "tc.inf.06.api.key.not.exposed.in.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 344, - "text": "TC-INF-06: Sandbox cleanup", - "polarity": "fail", - "normalized_id": "tc.inf.06.sandbox.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 347, - "text": "TC-INF-06: No active sandbox left behind (correct)", - "polarity": "pass", - "normalized_id": "tc.inf.06.no.active.sandbox.left.behind.correct", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 378, - "text": "TC-INF-07: Exit code", - "polarity": "fail", - "normalized_id": "tc.inf.07.exit.code", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 381, - "text": "TC-INF-07: Onboard failed as expected (exit $exit_code)", - "polarity": "pass", - "normalized_id": "tc.inf.07.onboard.failed.as.expected.exit.exit.code", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 385, - "text": "TC-INF-07: Output contains transport error classification", - "polarity": "pass", - "normalized_id": "tc.inf.07.output.contains.transport.error.classification", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 387, - "text": "TC-INF-07: Error classification", - "polarity": "fail", - "normalized_id": "tc.inf.07.error.classification", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 396, - "text": "TC-INF-07: Stack trace", - "polarity": "fail", - "normalized_id": "tc.inf.07.stack.trace", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 398, - "text": "TC-INF-07: No raw stack trace in output", - "polarity": "pass", - "normalized_id": "tc.inf.07.no.raw.stack.trace.in.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 405, - "text": "TC-INF-07: Sandbox cleanup", - "polarity": "fail", - "normalized_id": "tc.inf.07.sandbox.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 408, - "text": "TC-INF-07: No active sandbox left behind (correct)", - "polarity": "pass", - "normalized_id": "tc.inf.07.no.active.sandbox.left.behind.correct", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 448, - "text": "TC-INF-02: Onboard", - "polarity": "fail", - "normalized_id": "tc.inf.02.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 451, - "text": "TC-INF-02: Onboard with OpenAI succeeded", - "polarity": "pass", - "normalized_id": "tc.inf.02.onboard.with.openai.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 456, - "text": "TC-INF-02: SSH", - "polarity": "fail", - "normalized_id": "tc.inf.02.ssh", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 479, - "text": "TC-INF-02: OpenAI inference response received through sandbox proxy", - "polarity": "pass", - "normalized_id": "tc.inf.02.openai.inference.response.received.through.sandbox.proxy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 481, - "text": "TC-INF-02: OpenAI response received (content: ${content:0:100})", - "polarity": "pass", - "normalized_id": "tc.inf.02.openai.response.received.content.content.0.100", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 483, - "text": "TC-INF-02: Inference", - "polarity": "fail", - "normalized_id": "tc.inf.02.inference", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 522, - "text": "TC-INF-03: Onboard", - "polarity": "fail", - "normalized_id": "tc.inf.03.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 525, - "text": "TC-INF-03: Onboard with Anthropic succeeded", - "polarity": "pass", - "normalized_id": "tc.inf.03.onboard.with.anthropic.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 530, - "text": "TC-INF-03: SSH", - "polarity": "fail", - "normalized_id": "tc.inf.03.ssh", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 562, - "text": "TC-INF-03: Anthropic inference response received through sandbox proxy", - "polarity": "pass", - "normalized_id": "tc.inf.03.anthropic.inference.response.received.through.sandbox.proxy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 564, - "text": "TC-INF-03: Anthropic response received (content: ${content:0:100})", - "polarity": "pass", - "normalized_id": "tc.inf.03.anthropic.response.received.content.content.0.100", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 566, - "text": "TC-INF-03: Inference", - "polarity": "fail", - "normalized_id": "tc.inf.03.inference", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 609, - "text": "TC-INF-09: Onboard", - "polarity": "fail", - "normalized_id": "tc.inf.09.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 612, - "text": "TC-INF-09: Onboard with compatible endpoint succeeded", - "polarity": "pass", - "normalized_id": "tc.inf.09.onboard.with.compatible.endpoint.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 618, - "text": "TC-INF-09: SSH", - "polarity": "fail", - "normalized_id": "tc.inf.09.ssh", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 642, - "text": "TC-INF-09: Inference response received through sandbox proxy", - "polarity": "pass", - "normalized_id": "tc.inf.09.inference.response.received.through.sandbox.proxy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 644, - "text": "TC-INF-09: Inference response received (content: ${content:0:100})", - "polarity": "pass", - "normalized_id": "tc.inf.09.inference.response.received.content.content.0.100", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 646, - "text": "TC-INF-09: Inference", - "polarity": "fail", - "normalized_id": "tc.inf.09.inference", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 648, - "text": "TC-INF-09: Inference", - "polarity": "fail", - "normalized_id": "tc.inf.09.inference", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 676, - "text": "$PASS${NC}", - "polarity": "pass", - "normalized_id": "pass.nc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-inference-routing.sh", - "line": 677, - "text": "$FAIL${NC}", - "polarity": "fail", - "normalized_id": "fail.nc", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "assertions": [ - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 254, - "text": "${context}: connect --probe-only exited nonzero", - "polarity": "fail", - "normalized_id": "context.connect.probe.only.exited.nonzero", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 286, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 289, - "text": "Docker running", - "polarity": "pass", - "normalized_id": "docker.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 292, - "text": "NVIDIA_API_KEY not set or invalid", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 295, - "text": "NVIDIA_API_KEY set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 298, - "text": "NEMOCLAW_NON_INTERACTIVE=1 and NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 are required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.and.nemoclaw.accept.third.party.software.1.are.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 301, - "text": "Required env vars set", - "polarity": "pass", - "normalized_id": "required.env.vars.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 316, - "text": "cd $REPO_ROOT", - "polarity": "fail", - "normalized_id": "cd.repo.root", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 330, - "text": "install.sh failed (exit $install_exit). Last 30 lines:", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit.last.30.lines", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 336, - "text": "install.sh + onboard completed", - "polarity": "pass", - "normalized_id": "install.sh.onboard.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 345, - "text": "nemoclaw not on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 348, - "text": "nemoclaw on PATH", - "polarity": "pass", - "normalized_id": "nemoclaw.on.path", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 357, - "text": "Gateway never came up after onboard", - "polarity": "fail", - "normalized_id": "gateway.never.came.up.after.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 361, - "text": "Gateway up (pid=$INIT_PID)", - "polarity": "pass", - "normalized_id": "gateway.up.pid.init.pid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 364, - "text": "Initial gateway has guard chain active (proxy-env exports + gateway preloads loaded)", - "polarity": "pass", - "normalized_id": "initial.gateway.has.guard.chain.active.proxy.env.exports.gateway.preloads.loaded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 366, - "text": "Initial gateway missing library guard chain — fix is not deployed?", - "polarity": "fail", - "normalized_id": "initial.gateway.missing.library.guard.chain.fix.is.not.deployed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 372, - "text": "Initial gateway serves inference API (https://inference.local/v1/models responds)", - "polarity": "pass", - "normalized_id": "initial.gateway.serves.inference.api.https.inference.local.v1.models.responds", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 374, - "text": "Initial gateway alive but not serving inference — recovery is incomplete from user POV", - "polarity": "fail", - "normalized_id": "initial.gateway.alive.but.not.serving.inference.recovery.is.incomplete.from.user.pov", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 397, - "text": "Cycle $cycle: connect --probe-only did not leave /tmp/gateway.log evidence", - "polarity": "fail", - "normalized_id": "cycle.cycle.connect.probe.only.did.not.leave.tmp.gateway.log.evidence", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 404, - "text": "Cycle $cycle: gateway did not respawn within 45s", - "polarity": "fail", - "normalized_id": "cycle.cycle.gateway.did.not.respawn.within.45s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 409, - "text": "Cycle $cycle: PID unchanged ($new_pid) — kill did not land", - "polarity": "fail", - "normalized_id": "cycle.cycle.pid.unchanged.new.pid.kill.did.not.land", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 412, - "text": "Cycle $cycle: gateway respawned (pid $prev_pid → $new_pid)", - "polarity": "pass", - "normalized_id": "cycle.cycle.gateway.respawned.pid.prev.pid.new.pid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 415, - "text": "Cycle $cycle: respawned gateway retains guard chain (proxy-env + gateway preloads loaded)", - "polarity": "pass", - "normalized_id": "cycle.cycle.respawned.gateway.retains.guard.chain.proxy.env.gateway.preloads.loaded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 417, - "text": "Cycle $cycle: respawned gateway LOST guard chain — recovery hardening regressed", - "polarity": "fail", - "normalized_id": "cycle.cycle.respawned.gateway.lost.guard.chain.recovery.hardening.regressed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 424, - "text": "Cycle $cycle: respawned gateway serves inference API", - "polarity": "pass", - "normalized_id": "cycle.cycle.respawned.gateway.serves.inference.api", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 426, - "text": "Cycle $cycle: gateway up + guards active but inference API not serving", - "polarity": "fail", - "normalized_id": "cycle.cycle.gateway.up.guards.active.but.inference.api.not.serving", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 448, - "text": "proxy-env.sh is empty/missing already — cannot run negative case", - "polarity": "fail", - "normalized_id": "proxy.env.sh.is.empty.missing.already.cannot.run.negative.case", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 473, - "text": "Recovery emitted [gateway-recovery] WARNING when proxy-env.sh missing", - "polarity": "pass", - "normalized_id": "recovery.emitted.gateway.recovery.warning.when.proxy.env.sh.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 475, - "text": "Recovery silently launched without warning (regression of #2478 fix)", - "polarity": "fail", - "normalized_id": "recovery.silently.launched.without.warning.regression.of.2478.fix", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 480, - "text": "Recovery warning was logged, but gateway did not respawn within 45s", - "polarity": "fail", - "normalized_id": "recovery.warning.was.logged.but.gateway.did.not.respawn.within.45s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 495, - "text": "proxy-env.sh restore failed: expected $SNAPSHOT_SIZE bytes, got '${restored_size}'", - "polarity": "fail", - "normalized_id": "proxy.env.sh.restore.failed.expected.snapshot.size.bytes.got.restored.size", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 506, - "text": "Gateway not up entering soak phase", - "polarity": "fail", - "normalized_id": "gateway.not.up.entering.soak.phase", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 513, - "text": "Gateway up but guards not active entering soak — restore did not take", - "polarity": "fail", - "normalized_id": "gateway.up.but.guards.not.active.entering.soak.restore.did.not.take", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 518, - "text": "Gateway alive + guards active but inference API not serving entering soak", - "polarity": "fail", - "normalized_id": "gateway.alive.guards.active.but.inference.api.not.serving.entering.soak", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 522, - "text": "Gateway healthy with guards active and inference API serving (pid=$SOAK_START_PID)", - "polarity": "pass", - "normalized_id": "gateway.healthy.with.guards.active.and.inference.api.serving.pid.soak.start.pid", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 567, - "text": "No crash-loop detected during soak ($distinct distinct PIDs, $empty_samples empty samples)", - "polarity": "pass", - "normalized_id": "no.crash.loop.detected.during.soak.distinct.distinct.pids.empty.samples.empty.samples", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 569, - "text": "Crash-loop signature: $distinct distinct PIDs and $empty_samples empty samples in ${SOAK_SECONDS}s", - "polarity": "fail", - "normalized_id": "crash.loop.signature.distinct.distinct.pids.and.empty.samples.empty.samples.in.soak.seconds.s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 579, - "text": "Inference API available throughout soak ($inference_probes/$inference_probes probes succeeded)", - "polarity": "pass", - "normalized_id": "inference.api.available.throughout.soak.inference.probes.inference.probes.probes.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-issue-2478-crash-loop-recovery.sh", - "line": 581, - "text": "Inference API unavailable during soak ($inference_failures/$inference_probes probes failed)", - "polarity": "fail", - "normalized_id": "inference.api.unavailable.during.soak.inference.failures.inference.probes.probes.failed", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "assertions": [ - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 402, - "text": "K1: source CLI/OpenShell preparation failed (exit $prep_exit)", - "polarity": "fail", - "normalized_id": "k1.source.cli.openshell.preparation.failed.exit.prep.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 414, - "text": "K1: onboard completed for Kimi compatible endpoint sandbox", - "polarity": "pass", - "normalized_id": "k1.onboard.completed.for.kimi.compatible.endpoint.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 416, - "text": "K1: onboard failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "k1.onboard.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 482, - "text": "K2: openclaw.json has managed Kimi compat and plugin wiring", - "polarity": "pass", - "normalized_id": "k2.openclaw.json.has.managed.kimi.compat.and.plugin.wiring", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 484, - "text": "K2: openclaw.json Kimi compat/plugin wiring is wrong", - "polarity": "fail", - "normalized_id": "k2.openclaw.json.kimi.compat.plugin.wiring.is.wrong", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 492, - "text": "K3: sandbox inference.local models route reaches Kimi mock", - "polarity": "pass", - "normalized_id": "k3.sandbox.inference.local.models.route.reaches.kimi.mock", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 494, - "text": "K3: sandbox inference.local models route failed (${response:0:400})", - "polarity": "fail", - "normalized_id": "k3.sandbox.inference.local.models.route.failed.response.0.400", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 504, - "text": "K4: OpenClaw agent completed after Kimi tool results", - "polarity": "pass", - "normalized_id": "k4.openclaw.agent.completed.after.kimi.tool.results", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 506, - "text": "K4: OpenClaw agent did not complete successfully (exit $agent_exit)", - "polarity": "fail", - "normalized_id": "k4.openclaw.agent.did.not.complete.successfully.exit.agent.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 671, - "text": "K5: trajectory proves split Kimi exec calls completed cleanly", - "polarity": "pass", - "normalized_id": "k5.trajectory.proves.split.kimi.exec.calls.completed.cleanly", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 673, - "text": "K5: trajectory acceptance checks failed", - "polarity": "fail", - "normalized_id": "k5.trajectory.acceptance.checks.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 681, - "text": "K6: Kimi mock observed authenticated streamed tool-call and final-answer traffic", - "polarity": "pass", - "normalized_id": "k6.kimi.mock.observed.authenticated.streamed.tool.call.and.final.answer.traffic", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 683, - "text": "K6: Kimi mock did not observe both streamed agent requests", - "polarity": "fail", - "normalized_id": "k6.kimi.mock.did.not.observe.both.streamed.agent.requests", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 726, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 729, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 732, - "text": "python3 not found", - "polarity": "fail", - "normalized_id": "python3.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 735, - "text": "python3 is available", - "polarity": "pass", - "normalized_id": "python3.is.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 745, - "text": "K0: Kimi-compatible mock endpoint started", - "polarity": "pass", - "normalized_id": "k0.kimi.compatible.mock.endpoint.started", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-kimi-inference-compat.sh", - "line": 747, - "text": "K0: Kimi-compatible mock endpoint failed to start", - "polarity": "fail", - "normalized_id": "k0.kimi.compatible.mock.endpoint.failed.to.start", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "assertions": [ - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 164, - "text": "Pre-cleanup complete (clone dir pre-seeded)", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete.clone.dir.pre.seeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 172, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 174, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 179, - "text": "NVIDIA_API_KEY is set (starts with nvapi-)", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set.starts.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 181, - "text": "NVIDIA_API_KEY not set or invalid — required for live inference", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid.required.for.live.inference", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 186, - "text": "Network access to integrate.api.nvidia.com", - "polarity": "pass", - "normalized_id": "network.access.to.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 188, - "text": "Cannot reach integrate.api.nvidia.com", - "polarity": "fail", - "normalized_id": "cannot.reach.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 193, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 198, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required for non-interactive install", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required.for.non.interactive.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 203, - "text": "brev-launchable-ci-cpu.sh found at $REPO/scripts/", - "polarity": "pass", - "normalized_id": "brev.launchable.ci.cpu.sh.found.at.repo.scripts", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 205, - "text": "brev-launchable-ci-cpu.sh not found", - "polarity": "fail", - "normalized_id": "brev.launchable.ci.cpu.sh.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 235, - "text": "brev-launchable-ci-cpu.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "brev.launchable.ci.cpu.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 237, - "text": "brev-launchable-ci-cpu.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "brev.launchable.ci.cpu.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 263, - "text": "nemoclaw on PATH: $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.on.path.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 265, - "text": "nemoclaw not found on PATH after launchable install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.launchable.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 269, - "text": "nemoclaw --help exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.help.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 271, - "text": "nemoclaw --help failed", - "polarity": "fail", - "normalized_id": "nemoclaw.help.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 277, - "text": "openshell on PATH: $(command -v openshell) (${os_version})", - "polarity": "pass", - "normalized_id": "openshell.on.path.command.v.openshell.os.version", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 279, - "text": "openshell not found on PATH after launchable install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.launchable.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 291, - "text": "Node.js >= 22 installed: ${node_version}", - "polarity": "pass", - "normalized_id": "node.js.22.installed.node.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 300, - "text": "Node.js version too old: ${node_version} (need >= 20)", - "polarity": "fail", - "normalized_id": "node.js.version.too.old.node.version.need.20", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 304, - "text": "Node.js not found on PATH after launchable install", - "polarity": "fail", - "normalized_id": "node.js.not.found.on.path.after.launchable.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 309, - "text": "Docker running after launchable install", - "polarity": "pass", - "normalized_id": "docker.running.after.launchable.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 311, - "text": "Docker not running after launchable install", - "polarity": "fail", - "normalized_id": "docker.not.running.after.launchable.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 317, - "text": "Sentinel file exists: $SENTINEL", - "polarity": "pass", - "normalized_id": "sentinel.file.exists.sentinel", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 319, - "text": "Sentinel file missing: $SENTINEL", - "polarity": "fail", - "normalized_id": "sentinel.file.missing.sentinel", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 324, - "text": "NemoClaw cloned at $NEMOCLAW_CLONE_DIR", - "polarity": "pass", - "normalized_id": "nemoclaw.cloned.at.nemoclaw.clone.dir", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 326, - "text": "NemoClaw clone directory missing: $NEMOCLAW_CLONE_DIR", - "polarity": "fail", - "normalized_id": "nemoclaw.clone.directory.missing.nemoclaw.clone.dir", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 330, - "text": "CLI built (dist/ exists)", - "polarity": "pass", - "normalized_id": "cli.built.dist.exists", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 332, - "text": "CLI not built (dist/ missing)", - "polarity": "fail", - "normalized_id": "cli.not.built.dist.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 336, - "text": "Plugin built (nemoclaw/dist/ exists)", - "polarity": "pass", - "normalized_id": "plugin.built.nemoclaw.dist.exists", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 338, - "text": "Plugin not built (nemoclaw/dist/ missing)", - "polarity": "fail", - "normalized_id": "plugin.not.built.nemoclaw.dist.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 349, - "text": "Could not cd to $NEMOCLAW_CLONE_DIR", - "polarity": "fail", - "normalized_id": "could.not.cd.to.nemoclaw.clone.dir", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 371, - "text": "nemoclaw onboard completed (exit 0)", - "polarity": "pass", - "normalized_id": "nemoclaw.onboard.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 373, - "text": "nemoclaw onboard failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "nemoclaw.onboard.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 387, - "text": "nemoclaw list contains '${SANDBOX_NAME}'", - "polarity": "pass", - "normalized_id": "nemoclaw.list.contains.sandbox.name", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 389, - "text": "nemoclaw list does not contain '${SANDBOX_NAME}'", - "polarity": "fail", - "normalized_id": "nemoclaw.list.does.not.contain.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 392, - "text": "nemoclaw list failed: ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.failed.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 397, - "text": "nemoclaw ${SANDBOX_NAME} status exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.sandbox.name.status.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 399, - "text": "nemoclaw ${SANDBOX_NAME} status failed: ${status_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.sandbox.name.status.failed.status.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 405, - "text": "Inference configured via onboard (nvidia-prod)", - "polarity": "pass", - "normalized_id": "inference.configured.via.onboard.nvidia.prod", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 407, - "text": "Inference not configured — onboard did not set up nvidia-prod provider", - "polarity": "fail", - "normalized_id": "inference.not.configured.onboard.did.not.set.up.nvidia.prod.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 410, - "text": "openshell inference get failed: ${inf_check:0:200}", - "polarity": "fail", - "normalized_id": "openshell.inference.get.failed.inf.check.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 415, - "text": "Gateway container running", - "polarity": "pass", - "normalized_id": "gateway.container.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 440, - "text": "[LIVE] Direct API: model responded with PONG", - "polarity": "pass", - "normalized_id": "live.direct.api.model.responded.with.pong", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 442, - "text": "[LIVE] Direct API: expected PONG, got: ${api_content:0:200}", - "polarity": "fail", - "normalized_id": "live.direct.api.expected.pong.got.api.content.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 445, - "text": "[LIVE] Direct API: empty response from curl", - "polarity": "fail", - "normalized_id": "live.direct.api.empty.response.from.curl", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 502, - "text": "[ROUTING] inference.local: OpenShell routed curl to NVIDIA Endpoints and returned PONG", - "polarity": "pass", - "normalized_id": "routing.inference.local.openshell.routed.curl.to.nvidia.endpoints.and.returned.pong", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 504, - "text": "[ROUTING] inference.local: expected PONG after 3 attempts, got: ${sandbox_content:0:200}", - "polarity": "fail", - "normalized_id": "routing.inference.local.expected.pong.after.3.attempts.got.sandbox.content.0.200", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 540, - "text": "[LIVE] openclaw agent: model answered 6×7=42 through openclaw → inference.local", - "polarity": "pass", - "normalized_id": "live.openclaw.agent.model.answered.6.7.42.through.openclaw.inference.local", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 542, - "text": "[LIVE] openclaw agent: expected '42' in agent reply, got: ${agent_reply:0:200}", - "polarity": "fail", - "normalized_id": "live.openclaw.agent.expected.42.in.agent.reply.got.agent.reply.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 557, - "text": "Sandbox ${SANDBOX_NAME} still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 559, - "text": "Sandbox ${SANDBOX_NAME} removed", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-launchable-smoke.sh", - "line": 565, - "text": "Launchable clone directory cleaned up", - "polarity": "pass", - "normalized_id": "launchable.clone.directory.cleaned.up", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "assertions": [ - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 365, - "text": "C1: ${onboard_cmd_desc} completed for compatible endpoint + Telegram", - "polarity": "pass", - "normalized_id": "c1.onboard.cmd.desc.completed.for.compatible.endpoint.telegram", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 367, - "text": "C1: ${onboard_cmd_desc} failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "c1.onboard.cmd.desc.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 418, - "text": "C3: openclaw.json uses managed inference.local provider and Telegram config", - "polarity": "pass", - "normalized_id": "c3.openclaw.json.uses.managed.inference.local.provider.and.telegram.config", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 420, - "text": "C3: openclaw.json compatible endpoint shape is wrong", - "polarity": "fail", - "normalized_id": "c3.openclaw.json.compatible.endpoint.shape.is.wrong", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 458, - "text": "C4: Gateway stayed up after Telegram provider initialization", - "polarity": "pass", - "normalized_id": "c4.gateway.stayed.up.after.telegram.provider.initialization", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 460, - "text": "C4: Gateway is not serving after Telegram-compatible onboard (${result:0:200})", - "polarity": "fail", - "normalized_id": "c4.gateway.is.not.serving.after.telegram.compatible.onboard.result.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 481, - "text": "C5: Sandbox inference.local chat completion returned mock content", - "polarity": "pass", - "normalized_id": "c5.sandbox.inference.local.chat.completion.returned.mock.content", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 483, - "text": "C5: Sandbox inference.local chat completion failed (${response:0:400})", - "polarity": "fail", - "normalized_id": "c5.sandbox.inference.local.chat.completion.failed.response.0.400", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 501, - "text": "C8: openclaw agent turn — could not get SSH config", - "polarity": "fail", - "normalized_id": "c8.openclaw.agent.turn.could.not.get.ssh.config", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 524, - "text": "C8: openclaw agent turn failed with provider/transport error (exit ${rc}): ${raw:0:300}", - "polarity": "fail", - "normalized_id": "c8.openclaw.agent.turn.failed.with.provider.transport.error.exit.rc.raw.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 543, - "text": "C8: openclaw agent completed turn via compatible endpoint (http-proxy-fix.js FORWARD-mode path exercised)", - "polarity": "pass", - "normalized_id": "c8.openclaw.agent.completed.turn.via.compatible.endpoint.http.proxy.fix.js.forward.mode.path.exercised", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 545, - "text": "C8: openclaw agent turn failed (exit ${rc}); reply='${reply:0:200}', raw='${raw:0:200}'", - "polarity": "fail", - "normalized_id": "c8.openclaw.agent.turn.failed.exit.rc.reply.reply.0.200.raw.raw.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 558, - "text": "C9: Mock logged no proxy_hop_headers line for the agent turn — agent did not reach /v1/chat/completions", - "polarity": "fail", - "normalized_id": "c9.mock.logged.no.proxy.hop.headers.line.for.the.agent.turn.agent.did.not.reach.v1.chat.completions", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 565, - "text": "C9: No proxy hop headers leaked to the compatible endpoint upstream (http-proxy-fix.js strip verified)", - "polarity": "pass", - "normalized_id": "c9.no.proxy.hop.headers.leaked.to.the.compatible.endpoint.upstream.http.proxy.fix.js.strip.verified", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 567, - "text": "C9: Proxy hop headers leaked to upstream — http-proxy-fix.js strip broken: ${leaked}", - "polarity": "fail", - "normalized_id": "c9.proxy.hop.headers.leaked.to.upstream.http.proxy.fix.js.strip.broken.leaked", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 612, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 615, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 618, - "text": "python3 not found", - "polarity": "fail", - "normalized_id": "python3.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 621, - "text": "python3 is available", - "polarity": "pass", - "normalized_id": "python3.is.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 633, - "text": "C0: Compatible endpoint mock started", - "polarity": "pass", - "normalized_id": "c0.compatible.endpoint.mock.started", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 635, - "text": "C0: Compatible endpoint mock failed to start", - "polarity": "fail", - "normalized_id": "c0.compatible.endpoint.mock.failed.to.start", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 642, - "text": "C0b: Compatible endpoint mock is reachable through host address", - "polarity": "pass", - "normalized_id": "c0b.compatible.endpoint.mock.is.reachable.through.host.address", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 644, - "text": "C0b: Compatible endpoint mock is not reachable at ${COMPAT_ENDPOINT_URL}", - "polarity": "fail", - "normalized_id": "c0b.compatible.endpoint.mock.is.not.reachable.at.compat.endpoint.url", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 652, - "text": "C2: Onboard ran the compatible endpoint sandbox smoke check", - "polarity": "pass", - "normalized_id": "c2.onboard.ran.the.compatible.endpoint.sandbox.smoke.check", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 654, - "text": "C2: Onboard log does not show the compatible endpoint sandbox smoke check", - "polarity": "fail", - "normalized_id": "c2.onboard.log.does.not.show.the.compatible.endpoint.sandbox.smoke.check", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 659, - "text": "C2b: Gateway has the compatible-endpoint provider", - "polarity": "pass", - "normalized_id": "c2b.gateway.has.the.compatible.endpoint.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 661, - "text": "C2b: Gateway is missing the compatible-endpoint provider", - "polarity": "fail", - "normalized_id": "c2b.gateway.is.missing.the.compatible.endpoint.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 670, - "text": "C6: Compatible mock received authenticated chat traffic", - "polarity": "pass", - "normalized_id": "c6.compatible.mock.received.authenticated.chat.traffic", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-compatible-endpoint.sh", - "line": 672, - "text": "C6: Compatible mock did not record authenticated chat traffic", - "polarity": "fail", - "normalized_id": "c6.compatible.mock.did.not.record.authenticated.chat.traffic", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "assertions": [ - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 247, - "text": "NVIDIA_API_KEY not set", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 250, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 253, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 256, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 290, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 370, - "text": "Failed to append Slack policy to base sandbox policy", - "polarity": "fail", - "normalized_id": "failed.to.append.slack.policy.to.base.sandbox.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 373, - "text": "Slack network policy pre-merged into base policy", - "polarity": "pass", - "normalized_id": "slack.network.policy.pre.merged.into.base.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 378, - "text": "Cannot pre-merge Slack policy: missing base policy or preset file", - "polarity": "fail", - "normalized_id": "cannot.pre.merge.slack.policy.missing.base.policy.or.preset.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 419, - "text": "M0: install.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "m0.install.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 421, - "text": "M0: install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "m0.install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 429, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 432, - "text": "openshell installed ($(openshell --version 2>&1 || echo unknown))", - "polarity": "pass", - "normalized_id": "openshell.installed.openshell.version.2.1.echo.unknown", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 435, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 438, - "text": "nemoclaw installed at $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.installed.at.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 443, - "text": "M0b: Sandbox '$SANDBOX_NAME' is Ready", - "polarity": "pass", - "normalized_id": "m0b.sandbox.sandbox.name.is.ready", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 445, - "text": "M0b: Sandbox '$SANDBOX_NAME' not Ready (list: ${sandbox_list:0:200})", - "polarity": "fail", - "normalized_id": "m0b.sandbox.sandbox.name.not.ready.list.sandbox.list.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 463, - "text": "M-WA0: channels add whatsapp registered QR-only channel", - "polarity": "pass", - "normalized_id": "m.wa0.channels.add.whatsapp.registered.qr.only.channel", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 465, - "text": "M-WA0: channels add whatsapp failed or did not register channel", - "polarity": "fail", - "normalized_id": "m.wa0.channels.add.whatsapp.failed.or.did.not.register.channel", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 471, - "text": "M-WA1: Unexpected WhatsApp bridge provider exists in gateway", - "polarity": "fail", - "normalized_id": "m.wa1.unexpected.whatsapp.bridge.provider.exists.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 473, - "text": "M-WA1: WhatsApp QR-only channel creates no bridge provider", - "polarity": "pass", - "normalized_id": "m.wa1.whatsapp.qr.only.channel.creates.no.bridge.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 477, - "text": "M-WA2: registry.messagingChannels contains whatsapp after channel add", - "polarity": "pass", - "normalized_id": "m.wa2.registry.messagingchannels.contains.whatsapp.after.channel.add", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 479, - "text": "M-WA2: registry.messagingChannels missing whatsapp after channel add ($(registry_field messagingChannels))", - "polarity": "fail", - "normalized_id": "m.wa2.registry.messagingchannels.missing.whatsapp.after.channel.add.registry.field.messagingchannels", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 486, - "text": "M-WA3: WhatsApp policy preset applied before rebuild", - "polarity": "pass", - "normalized_id": "m.wa3.whatsapp.policy.preset.applied.before.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 488, - "text": "M-WA3: WhatsApp policy preset missing expected endpoints before rebuild", - "polarity": "fail", - "normalized_id": "m.wa3.whatsapp.policy.preset.missing.expected.endpoints.before.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 494, - "text": "M-WA4: Rebuild completed after WhatsApp channel add", - "polarity": "pass", - "normalized_id": "m.wa4.rebuild.completed.after.whatsapp.channel.add", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 496, - "text": "M-WA4: Rebuild failed after WhatsApp channel add", - "polarity": "fail", - "normalized_id": "m.wa4.rebuild.failed.after.whatsapp.channel.add", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 506, - "text": "M-WA5: WhatsApp policy preset survived rebuild with Node binary scope", - "polarity": "pass", - "normalized_id": "m.wa5.whatsapp.policy.preset.survived.rebuild.with.node.binary.scope", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 508, - "text": "M-WA5: WhatsApp policy preset missing expected endpoints/binaries after rebuild", - "polarity": "fail", - "normalized_id": "m.wa5.whatsapp.policy.preset.missing.expected.endpoints.binaries.after.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 513, - "text": "M-WA6: Sandbox '$SANDBOX_NAME' is Ready after WhatsApp rebuild", - "polarity": "pass", - "normalized_id": "m.wa6.sandbox.sandbox.name.is.ready.after.whatsapp.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 515, - "text": "M-WA6: Sandbox '$SANDBOX_NAME' not Ready after WhatsApp rebuild (list: ${sandbox_list:0:200})", - "polarity": "fail", - "normalized_id": "m.wa6.sandbox.sandbox.name.not.ready.after.whatsapp.rebuild.list.sandbox.list.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 521, - "text": "M1: Provider '${SANDBOX_NAME}-telegram-bridge' exists in gateway", - "polarity": "pass", - "normalized_id": "m1.provider.sandbox.name.telegram.bridge.exists.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 523, - "text": "M1: Provider '${SANDBOX_NAME}-telegram-bridge' not found in gateway", - "polarity": "fail", - "normalized_id": "m1.provider.sandbox.name.telegram.bridge.not.found.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 528, - "text": "M2: Provider '${SANDBOX_NAME}-discord-bridge' exists in gateway", - "polarity": "pass", - "normalized_id": "m2.provider.sandbox.name.discord.bridge.exists.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 530, - "text": "M2: Provider '${SANDBOX_NAME}-discord-bridge' not found in gateway", - "polarity": "fail", - "normalized_id": "m2.provider.sandbox.name.discord.bridge.not.found.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 537, - "text": "M-W1: Provider '${SANDBOX_NAME}-wechat-bridge' exists in gateway", - "polarity": "pass", - "normalized_id": "m.w1.provider.sandbox.name.wechat.bridge.exists.in.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 539, - "text": "M-W1: Provider '${SANDBOX_NAME}-wechat-bridge' not found in gateway (non-interactive QR-skip path may be broken)", - "polarity": "fail", - "normalized_id": "m.w1.provider.sandbox.name.wechat.bridge.not.found.in.gateway.non.interactive.qr.skip.path.may.be.broken", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 553, - "text": "M3: Real Telegram token leaked into sandbox env", - "polarity": "fail", - "normalized_id": "m3.real.telegram.token.leaked.into.sandbox.env", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 555, - "text": "M3: Sandbox TELEGRAM_BOT_TOKEN is a placeholder (not the real token)", - "polarity": "pass", - "normalized_id": "m3.sandbox.telegram.bot.token.is.a.placeholder.not.the.real.token", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 566, - "text": "M4: Real Discord token leaked into sandbox env", - "polarity": "fail", - "normalized_id": "m4.real.discord.token.leaked.into.sandbox.env", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 568, - "text": "M4: Sandbox DISCORD_BOT_TOKEN is a placeholder (not the real token)", - "polarity": "pass", - "normalized_id": "m4.sandbox.discord.bot.token.is.a.placeholder.not.the.real.token", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 575, - "text": "M5: At least one messaging placeholder detected in sandbox", - "polarity": "pass", - "normalized_id": "m5.at.least.one.messaging.placeholder.detected.in.sandbox", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 600, - "text": "M5a: Real Telegram token found in full sandbox environment dump", - "polarity": "fail", - "normalized_id": "m5a.real.telegram.token.found.in.full.sandbox.environment.dump", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 602, - "text": "M5a: Real Telegram token absent from full sandbox environment", - "polarity": "pass", - "normalized_id": "m5a.real.telegram.token.absent.from.full.sandbox.environment", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 609, - "text": "M5b: Real Telegram token found in sandbox process list", - "polarity": "fail", - "normalized_id": "m5b.real.telegram.token.found.in.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 611, - "text": "M5b: Real Telegram token absent from sandbox process list", - "polarity": "pass", - "normalized_id": "m5b.real.telegram.token.absent.from.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 618, - "text": "M5c: Real Telegram token found on sandbox filesystem: ${sandbox_fs_tg}", - "polarity": "fail", - "normalized_id": "m5c.real.telegram.token.found.on.sandbox.filesystem.sandbox.fs.tg", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 620, - "text": "M5c: Real Telegram token absent from sandbox filesystem", - "polarity": "pass", - "normalized_id": "m5c.real.telegram.token.absent.from.sandbox.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 626, - "text": "M5d: Telegram placeholder confirmed present in sandbox environment", - "polarity": "pass", - "normalized_id": "m5d.telegram.placeholder.confirmed.present.in.sandbox.environment", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 628, - "text": "M5d: Telegram placeholder not found in sandbox environment", - "polarity": "fail", - "normalized_id": "m5d.telegram.placeholder.not.found.in.sandbox.environment", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 638, - "text": "M5e: Real Discord token found in full sandbox environment dump", - "polarity": "fail", - "normalized_id": "m5e.real.discord.token.found.in.full.sandbox.environment.dump", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 640, - "text": "M5e: Real Discord token absent from full sandbox environment", - "polarity": "pass", - "normalized_id": "m5e.real.discord.token.absent.from.full.sandbox.environment", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 647, - "text": "M5f: Real Discord token found in sandbox process list", - "polarity": "fail", - "normalized_id": "m5f.real.discord.token.found.in.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 649, - "text": "M5f: Real Discord token absent from sandbox process list", - "polarity": "pass", - "normalized_id": "m5f.real.discord.token.absent.from.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 655, - "text": "M5g: Real Discord token found on sandbox filesystem: ${sandbox_fs_dc}", - "polarity": "fail", - "normalized_id": "m5g.real.discord.token.found.on.sandbox.filesystem.sandbox.fs.dc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 657, - "text": "M5g: Real Discord token absent from sandbox filesystem", - "polarity": "pass", - "normalized_id": "m5g.real.discord.token.absent.from.sandbox.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 663, - "text": "M5h: Discord placeholder confirmed present in sandbox environment", - "polarity": "pass", - "normalized_id": "m5h.discord.placeholder.confirmed.present.in.sandbox.environment", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 665, - "text": "M5h: Discord placeholder not found in sandbox environment", - "polarity": "fail", - "normalized_id": "m5h.discord.placeholder.not.found.in.sandbox.environment", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 680, - "text": "M-S5a: Real Slack bot token found in full sandbox environment dump", - "polarity": "fail", - "normalized_id": "m.s5a.real.slack.bot.token.found.in.full.sandbox.environment.dump", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 682, - "text": "M-S5a: Real Slack bot token absent from full sandbox environment", - "polarity": "pass", - "normalized_id": "m.s5a.real.slack.bot.token.absent.from.full.sandbox.environment", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 689, - "text": "M-S5b: Real Slack bot token found in sandbox process list", - "polarity": "fail", - "normalized_id": "m.s5b.real.slack.bot.token.found.in.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 691, - "text": "M-S5b: Real Slack bot token absent from sandbox process list", - "polarity": "pass", - "normalized_id": "m.s5b.real.slack.bot.token.absent.from.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 697, - "text": "M-S5c: Real Slack bot token found on sandbox filesystem: ${sandbox_fs_sl}", - "polarity": "fail", - "normalized_id": "m.s5c.real.slack.bot.token.found.on.sandbox.filesystem.sandbox.fs.sl", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 699, - "text": "M-S5c: Real Slack bot token absent from sandbox filesystem", - "polarity": "pass", - "normalized_id": "m.s5c.real.slack.bot.token.absent.from.sandbox.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 707, - "text": "M-S5d: Real Slack app token found in full sandbox environment dump", - "polarity": "fail", - "normalized_id": "m.s5d.real.slack.app.token.found.in.full.sandbox.environment.dump", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 709, - "text": "M-S5d: Real Slack app token absent from sandbox environment", - "polarity": "pass", - "normalized_id": "m.s5d.real.slack.app.token.absent.from.sandbox.environment", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 714, - "text": "M-S5d2: Real Slack app token found in sandbox process list", - "polarity": "fail", - "normalized_id": "m.s5d2.real.slack.app.token.found.in.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 716, - "text": "M-S5d2: Real Slack app token absent from sandbox process list", - "polarity": "pass", - "normalized_id": "m.s5d2.real.slack.app.token.absent.from.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 720, - "text": "M-S5e: Real Slack app token found on sandbox filesystem: ${sandbox_fs_sapp}", - "polarity": "fail", - "normalized_id": "m.s5e.real.slack.app.token.found.on.sandbox.filesystem.sandbox.fs.sapp", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 722, - "text": "M-S5e: Real Slack app token absent from sandbox filesystem", - "polarity": "pass", - "normalized_id": "m.s5e.real.slack.app.token.absent.from.sandbox.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 733, - "text": "M-S5f: Real Slack bot/app token spliced into openclaw.json — apply_slack_token_override regression?", - "polarity": "fail", - "normalized_id": "m.s5f.real.slack.bot.app.token.spliced.into.openclaw.json.apply.slack.token.override.regression", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 737, - "text": "M-S5f: openclaw.json holds both Bolt-shape Slack placeholders (no real token on disk)", - "polarity": "pass", - "normalized_id": "m.s5f.openclaw.json.holds.both.bolt.shape.slack.placeholders.no.real.token.on.disk", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 746, - "text": "M-S5g: removed Slack token rewriter preload still present in NODE_OPTIONS", - "polarity": "fail", - "normalized_id": "m.s5g.removed.slack.token.rewriter.preload.still.present.in.node.options", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 748, - "text": "M-S5g: Slack token rewriter preload absent from NODE_OPTIONS", - "polarity": "pass", - "normalized_id": "m.s5g.slack.token.rewriter.preload.absent.from.node.options", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 764, - "text": "M-W3: Real WeChat token leaked into sandbox env", - "polarity": "fail", - "normalized_id": "m.w3.real.wechat.token.leaked.into.sandbox.env", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 766, - "text": "M-W3: Sandbox WECHAT_BOT_TOKEN is a placeholder (not the real token)", - "polarity": "pass", - "normalized_id": "m.w3.sandbox.wechat.bot.token.is.a.placeholder.not.the.real.token", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 775, - "text": "M-W3a: Real WeChat token found in full sandbox environment dump", - "polarity": "fail", - "normalized_id": "m.w3a.real.wechat.token.found.in.full.sandbox.environment.dump", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 777, - "text": "M-W3a: Real WeChat token absent from full sandbox environment", - "polarity": "pass", - "normalized_id": "m.w3a.real.wechat.token.absent.from.full.sandbox.environment", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 784, - "text": "M-W3b: Real WeChat token found in sandbox process list", - "polarity": "fail", - "normalized_id": "m.w3b.real.wechat.token.found.in.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 786, - "text": "M-W3b: Real WeChat token absent from sandbox process list", - "polarity": "pass", - "normalized_id": "m.w3b.real.wechat.token.absent.from.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 794, - "text": "M-W3c: Real WeChat token found on sandbox filesystem: ${sandbox_fs_wc}", - "polarity": "fail", - "normalized_id": "m.w3c.real.wechat.token.found.on.sandbox.filesystem.sandbox.fs.wc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 796, - "text": "M-W3c: Real WeChat token absent from sandbox filesystem", - "polarity": "pass", - "normalized_id": "m.w3c.real.wechat.token.absent.from.sandbox.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 802, - "text": "M-W3d: WeChat placeholder confirmed present in sandbox environment", - "polarity": "pass", - "normalized_id": "m.w3d.wechat.placeholder.confirmed.present.in.sandbox.environment", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 804, - "text": "M-W3d: WeChat placeholder not found in sandbox environment", - "polarity": "fail", - "normalized_id": "m.w3d.wechat.placeholder.not.found.in.sandbox.environment", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 819, - "text": "M-WA7a: WhatsApp credential-like env var found in sandbox environment", - "polarity": "fail", - "normalized_id": "m.wa7a.whatsapp.credential.like.env.var.found.in.sandbox.environment", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 821, - "text": "M-WA7a: No WhatsApp credential-like env var present in sandbox environment", - "polarity": "pass", - "normalized_id": "m.wa7a.no.whatsapp.credential.like.env.var.present.in.sandbox.environment", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 827, - "text": "M-WA7b: WhatsApp credential placeholder found in sandbox process list", - "polarity": "fail", - "normalized_id": "m.wa7b.whatsapp.credential.placeholder.found.in.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 829, - "text": "M-WA7b: No WhatsApp credential placeholder present in sandbox process list", - "polarity": "pass", - "normalized_id": "m.wa7b.no.whatsapp.credential.placeholder.present.in.sandbox.process.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 842, - "text": "M-WA7c: WhatsApp host credential material found on sandbox filesystem: ${sandbox_fs_wa}", - "polarity": "fail", - "normalized_id": "m.wa7c.whatsapp.host.credential.material.found.on.sandbox.filesystem.sandbox.fs.wa", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 844, - "text": "M-WA7c: No WhatsApp host credential material found on sandbox filesystem", - "polarity": "pass", - "normalized_id": "m.wa7c.no.whatsapp.host.credential.material.found.on.sandbox.filesystem", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 864, - "text": "M6: Could not read openclaw.json channels (${channel_json:0:200})", - "polarity": "fail", - "normalized_id": "m6.could.not.read.openclaw.json.channels.channel.json.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 881, - "text": "M6: Telegram channel botToken present in openclaw.json", - "polarity": "pass", - "normalized_id": "m6.telegram.channel.bottoken.present.in.openclaw.json", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 888, - "text": "M7: Telegram botToken is not the host-side token (placeholder confirmed)", - "polarity": "pass", - "normalized_id": "m7.telegram.bottoken.is.not.the.host.side.token.placeholder.confirmed", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 890, - "text": "M7: Telegram botToken matches host-side token — credential leaked into config!", - "polarity": "fail", - "normalized_id": "m7.telegram.bottoken.matches.host.side.token.credential.leaked.into.config", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 905, - "text": "M8: Discord channel token present in openclaw.json", - "polarity": "pass", - "normalized_id": "m8.discord.channel.token.present.in.openclaw.json", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 912, - "text": "M9: Discord token is not the host-side token (placeholder confirmed)", - "polarity": "pass", - "normalized_id": "m9.discord.token.is.not.the.host.side.token.placeholder.confirmed", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 914, - "text": "M9: Discord token matches host-side token — credential leaked into config!", - "polarity": "fail", - "normalized_id": "m9.discord.token.matches.host.side.token.credential.leaked.into.config", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 929, - "text": "M10: Telegram channel is enabled", - "polarity": "pass", - "normalized_id": "m10.telegram.channel.is.enabled", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 944, - "text": "M11: Discord channel is enabled", - "polarity": "pass", - "normalized_id": "m11.discord.channel.is.enabled", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 959, - "text": "M11b: Telegram dmPolicy is 'allowlist'", - "polarity": "pass", - "normalized_id": "m11b.telegram.dmpolicy.is.allowlist", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 961, - "text": "M11b: Telegram dmPolicy is '$tg_dm_policy' (expected 'allowlist')", - "polarity": "fail", - "normalized_id": "m11b.telegram.dmpolicy.is.tg.dm.policy.expected.allowlist", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 989, - "text": "M11c: Telegram allowFrom contains all expected user IDs: $tg_allow_from", - "polarity": "pass", - "normalized_id": "m11c.telegram.allowfrom.contains.all.expected.user.ids.tg.allow.from", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 991, - "text": "M11c: Telegram allowFrom ($tg_allow_from) is missing IDs: ${missing_ids[*]} (expected all of: $TELEGRAM_IDS)", - "polarity": "fail", - "normalized_id": "m11c.telegram.allowfrom.tg.allow.from.is.missing.ids.missing.ids.expected.all.of.telegram.ids", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1007, - "text": "M11d: Telegram groupPolicy is 'open'", - "polarity": "pass", - "normalized_id": "m11d.telegram.grouppolicy.is.open", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1009, - "text": "M11d: Telegram groupPolicy is '$tg_group_policy' (expected 'open')", - "polarity": "fail", - "normalized_id": "m11d.telegram.grouppolicy.is.tg.group.policy.expected.open", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1025, - "text": "M11e: Slack channel configured with placeholder tokens (guard needed)", - "polarity": "pass", - "normalized_id": "m11e.slack.channel.configured.with.placeholder.tokens.guard.needed", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1037, - "text": "M11f: Slack dmPolicy is 'allowlist'", - "polarity": "pass", - "normalized_id": "m11f.slack.dmpolicy.is.allowlist", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1039, - "text": "M11f: Slack dmPolicy is '$sl_dm_policy' (expected 'allowlist')", - "polarity": "fail", - "normalized_id": "m11f.slack.dmpolicy.is.sl.dm.policy.expected.allowlist", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1051, - "text": "M11g: Slack groupPolicy is 'allowlist'", - "polarity": "pass", - "normalized_id": "m11g.slack.grouppolicy.is.allowlist", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1053, - "text": "M11g: Slack groupPolicy is '$sl_group_policy' (expected 'allowlist')", - "polarity": "fail", - "normalized_id": "m11g.slack.grouppolicy.is.sl.group.policy.expected.allowlist", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1077, - "text": "M11h: Slack wildcard channel config is not enabled", - "polarity": "fail", - "normalized_id": "m11h.slack.wildcard.channel.config.is.not.enabled", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1079, - "text": "M11h: Slack wildcard channel config does not require mention", - "polarity": "fail", - "normalized_id": "m11h.slack.wildcard.channel.config.does.not.require.mention", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1081, - "text": "M11h: Slack wildcard channel users is not a list", - "polarity": "fail", - "normalized_id": "m11h.slack.wildcard.channel.users.is.not.a.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1083, - "text": "M11h: Slack wildcard channel users is empty", - "polarity": "fail", - "normalized_id": "m11h.slack.wildcard.channel.users.is.empty", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1098, - "text": "M11h: Slack wildcard channel @mention allowlist contains expected user count (${expected_slack_id_count})", - "polarity": "pass", - "normalized_id": "m11h.slack.wildcard.channel.mention.allowlist.contains.expected.user.count.expected.slack.id.count", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1100, - "text": "M11h: Slack wildcard channel users missing ${#missing_slack_ids[@]} expected ID(s)", - "polarity": "fail", - "normalized_id": "m11h.slack.wildcard.channel.users.missing.missing.slack.ids.expected.id.s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1143, - "text": "M-WA8: WhatsApp account is enabled in openclaw.json", - "polarity": "pass", - "normalized_id": "m.wa8.whatsapp.account.is.enabled.in.openclaw.json", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1145, - "text": "M-WA8: WhatsApp account missing or disabled in openclaw.json (${whatsapp_account_json:0:200})", - "polarity": "fail", - "normalized_id": "m.wa8.whatsapp.account.missing.or.disabled.in.openclaw.json.whatsapp.account.json.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1149, - "text": "M-WA8a: WhatsApp health monitor is disabled for unpaired QR session", - "polarity": "pass", - "normalized_id": "m.wa8a.whatsapp.health.monitor.is.disabled.for.unpaired.qr.session", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1151, - "text": "M-WA8a: WhatsApp health monitor is not disabled (${whatsapp_account_json:0:200})", - "polarity": "fail", - "normalized_id": "m.wa8a.whatsapp.health.monitor.is.not.disabled.whatsapp.account.json.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1178, - "text": "M-WA9: WhatsApp config has no token/auth/session provider placeholders", - "polarity": "pass", - "normalized_id": "m.wa9.whatsapp.config.has.no.token.auth.session.provider.placeholders", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1180, - "text": "M-WA9: WhatsApp config contains secret-like fields: ${whatsapp_secret_fields}", - "polarity": "fail", - "normalized_id": "m.wa9.whatsapp.config.contains.secret.like.fields.whatsapp.secret.fields", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1196, - "text": "M-W8: WeChat account '$WECHAT_ACCOUNT' is enabled in openclaw.json (channels.openclaw-weixin)", - "polarity": "pass", - "normalized_id": "m.w8.wechat.account.wechat.account.is.enabled.in.openclaw.json.channels.openclaw.weixin", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1212, - "text": "M-W9: Real WeChat token spliced into accounts/${WECHAT_ACCOUNT}.json — seed-wechat-accounts.py placeholder regression", - "polarity": "fail", - "normalized_id": "m.w9.real.wechat.token.spliced.into.accounts.wechat.account.json.seed.wechat.accounts.py.placeholder.regression", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1214, - "text": "M-W9: WeChat per-account credential file uses the L7-resolved placeholder", - "polarity": "pass", - "normalized_id": "m.w9.wechat.per.account.credential.file.uses.the.l7.resolved.placeholder", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1216, - "text": "M-W9: WeChat per-account credential file has unexpected token shape: $(echo ", - "polarity": "fail", - "normalized_id": "m.w9.wechat.per.account.credential.file.has.unexpected.token.shape.echo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1235, - "text": "M-W10: WeChat accounts.json index contains '$WECHAT_ACCOUNT'", - "polarity": "pass", - "normalized_id": "m.w10.wechat.accounts.json.index.contains.wechat.account", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1237, - "text": "M-W10: WeChat accounts.json missing '$WECHAT_ACCOUNT' (raw: $(echo ", - "polarity": "fail", - "normalized_id": "m.w10.wechat.accounts.json.missing.wechat.account.raw.echo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1258, - "text": "M12: Node.js reached api.telegram.org (${tg_reach})", - "polarity": "pass", - "normalized_id": "m12.node.js.reached.api.telegram.org.tg.reach", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1264, - "text": "M12: Node.js could not reach api.telegram.org (${tg_reach:0:200})", - "polarity": "fail", - "normalized_id": "m12.node.js.could.not.reach.api.telegram.org.tg.reach.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1272, - "text": "M13-policy: Live policy contains Discord endpoints and Node binaries", - "polarity": "pass", - "normalized_id": "m13.policy.live.policy.contains.discord.endpoints.and.node.binaries", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1274, - "text": "M13-policy: Live policy is missing expected Discord preset endpoint/binary entries", - "polarity": "fail", - "normalized_id": "m13.policy.live.policy.is.missing.expected.discord.preset.endpoint.binary.entries", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1280, - "text": "M13-proxy: Sandbox uses the OpenShell gateway proxy", - "polarity": "pass", - "normalized_id": "m13.proxy.sandbox.uses.the.openshell.gateway.proxy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1282, - "text": "M13-proxy: Sandbox proxy env does not point at OpenShell gateway: ${live_proxy_env:0:200}", - "polarity": "fail", - "normalized_id": "m13.proxy.sandbox.proxy.env.does.not.point.at.openshell.gateway.live.proxy.env.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1303, - "text": "M13-curl: curl unexpectedly established a tunnel to Discord; binary whitelist may be too broad", - "polarity": "fail", - "normalized_id": "m13.curl.curl.unexpectedly.established.a.tunnel.to.discord.binary.whitelist.may.be.too.broad", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1346, - "text": "M13: Node.js reached Discord API and CDN through the same proxy (${dc_reach//$'\\n'/ })", - "polarity": "pass", - "normalized_id": "m13.node.js.reached.discord.api.and.cdn.through.the.same.proxy.dc.reach.n", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1348, - "text": "M13: Node.js was denied by the proxy despite the Discord preset being applied: ${dc_reach:0:300}", - "polarity": "fail", - "normalized_id": "m13.node.js.was.denied.by.the.proxy.despite.the.discord.preset.being.applied.dc.reach.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1352, - "text": "M13: Node.js could not reach Discord API/CDN (${dc_reach:0:200})", - "polarity": "fail", - "normalized_id": "m13.node.js.could.not.reach.discord.api.cdn.dc.reach.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1359, - "text": "M13-rest-a: Hermetic fake Discord REST API started on host port ${FAKE_DISCORD_REST_PORT}", - "polarity": "pass", - "normalized_id": "m13.rest.a.hermetic.fake.discord.rest.api.started.on.host.port.fake.discord.rest.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1368, - "text": "M13-rest-b: Applied Node-only HTTPS policy for fake Discord REST API", - "polarity": "pass", - "normalized_id": "m13.rest.b.applied.node.only.https.policy.for.fake.discord.rest.api", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1370, - "text": "M13-rest-b: Failed to apply fake Discord REST policy: $(tail -20 /tmp/nemoclaw-fake-discord-rest-policy.log 2>/dev/null | tr '\\n' ' ' | cut -c1-300)", - "polarity": "fail", - "normalized_id": "m13.rest.b.failed.to.apply.fake.discord.rest.policy.tail.20.tmp.nemoclaw.fake.discord.rest.policy.log.2.dev.null.tr.n.cut.c1.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1384, - "text": "M13-rest-c: Node reached the fake Discord REST API through OpenShell", - "polarity": "pass", - "normalized_id": "m13.rest.c.node.reached.the.fake.discord.rest.api.through.openshell", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1386, - "text": "M13-rest-c: Node failed to reach fake Discord REST API: ${fake_rest_node:0:300}", - "polarity": "fail", - "normalized_id": "m13.rest.c.node.failed.to.reach.fake.discord.rest.api.fake.rest.node.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1398, - "text": "M13-rest-d: curl was denied before reaching the fake Discord REST API", - "polarity": "pass", - "normalized_id": "m13.rest.d.curl.was.denied.before.reaching.the.fake.discord.rest.api", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1400, - "text": "M13-rest-d: curl unexpectedly established a tunnel to the fake Discord REST API", - "polarity": "fail", - "normalized_id": "m13.rest.d.curl.unexpectedly.established.a.tunnel.to.the.fake.discord.rest.api", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1402, - "text": "M13-rest-d: Fake Discord REST curl denial had unexpected shape: ${fake_rest_curl:0:300}", - "polarity": "fail", - "normalized_id": "m13.rest.d.fake.discord.rest.curl.denial.had.unexpected.shape.fake.rest.curl.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1414, - "text": "M13-rest-e: Fake server saw Node but no curl request", - "polarity": "pass", - "normalized_id": "m13.rest.e.fake.server.saw.node.but.no.curl.request", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1416, - "text": "M13-rest-e: Unexpected fake Discord REST capture counts: ${fake_rest_capture}", - "polarity": "fail", - "normalized_id": "m13.rest.e.unexpected.fake.discord.rest.capture.counts.fake.rest.capture", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1423, - "text": "M13b: Hermetic fake Discord Gateway started on host port ${FAKE_DISCORD_GATEWAY_PORT}", - "polarity": "pass", - "normalized_id": "m13b.hermetic.fake.discord.gateway.started.on.host.port.fake.discord.gateway.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1425, - "text": "M13b: Failed to start hermetic fake Discord Gateway", - "polarity": "fail", - "normalized_id": "m13b.failed.to.start.hermetic.fake.discord.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1430, - "text": "M13c: Applied native WebSocket policy with credential rewrite for fake Discord Gateway", - "polarity": "pass", - "normalized_id": "m13c.applied.native.websocket.policy.with.credential.rewrite.for.fake.discord.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1432, - "text": "M13c: Failed to apply fake Discord Gateway policy: $(tail -20 /tmp/nemoclaw-fake-discord-policy.log 2>/dev/null | tr '\\n' ' ' | cut -c1-300)", - "polarity": "fail", - "normalized_id": "m13c.failed.to.apply.fake.discord.gateway.policy.tail.20.tmp.nemoclaw.fake.discord.policy.log.2.dev.null.tr.n.cut.c1.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1442, - "text": "M13d: Native WebSocket upgrade reached fake Discord Gateway through OpenShell", - "polarity": "pass", - "normalized_id": "m13d.native.websocket.upgrade.reached.fake.discord.gateway.through.openshell", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1444, - "text": "M13d: Native WebSocket upgrade failed: ${dc_ws_native:0:300}", - "polarity": "fail", - "normalized_id": "m13d.native.websocket.upgrade.failed.dc.ws.native.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1451, - "text": "M13e: Discord HELLO, placeholder IDENTIFY, READY, and heartbeat ACK completed", - "polarity": "pass", - "normalized_id": "m13e.discord.hello.placeholder.identify.ready.and.heartbeat.ack.completed", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1453, - "text": "M13e: Discord Gateway protocol proof incomplete: ${dc_ws_native:0:400}", - "polarity": "fail", - "normalized_id": "m13e.discord.gateway.protocol.proof.incomplete.dc.ws.native.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1459, - "text": "M13f: Fake Gateway received host-side Discord token; sandbox-visible IDENTIFY used only the placeholder", - "polarity": "pass", - "normalized_id": "m13f.fake.gateway.received.host.side.discord.token.sandbox.visible.identify.used.only.the.placeholder", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1464, - "text": "M13f: Fake Gateway did not prove placeholder-to-token rewrite at the relay boundary", - "polarity": "fail", - "normalized_id": "m13f.fake.gateway.did.not.prove.placeholder.to.token.rewrite.at.the.relay.boundary", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1480, - "text": "M13g: Unregistered Discord WebSocket placeholder is rejected before upstream token exposure", - "polarity": "pass", - "normalized_id": "m13g.unregistered.discord.websocket.placeholder.is.rejected.before.upstream.token.exposure", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1482, - "text": "M13g: Unregistered Discord WebSocket placeholder reached READY or leaked upstream", - "polarity": "fail", - "normalized_id": "m13g.unregistered.discord.websocket.placeholder.reached.ready.or.leaked.upstream", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1488, - "text": "M14: curl to api.telegram.org blocked (binary restriction enforced)", - "polarity": "pass", - "normalized_id": "m14.curl.to.api.telegram.org.blocked.binary.restriction.enforced", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1490, - "text": "M14: curl returned empty (likely blocked by policy)", - "polarity": "pass", - "normalized_id": "m14.curl.returned.empty.likely.blocked.by.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1494, - "text": "M14: curl not available in sandbox (defense in depth)", - "polarity": "pass", - "normalized_id": "m14.curl.not.available.in.sandbox.defense.in.depth", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1528, - "text": "M15: Telegram getMe returned 200 — real token verified!", - "polarity": "pass", - "normalized_id": "m15.telegram.getme.returned.200.real.token.verified", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1533, - "text": "M15: Telegram getMe returned $tg_status — L7 proxy rewrote placeholder (fake token rejected by API)", - "polarity": "pass", - "normalized_id": "m15.telegram.getme.returned.tg.status.l7.proxy.rewrote.placeholder.fake.token.rejected.by.api", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1534, - "text": "M16: Full chain verified: sandbox → proxy → token rewrite → Telegram API", - "polarity": "pass", - "normalized_id": "m16.full.chain.verified.sandbox.proxy.token.rewrite.telegram.api", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1540, - "text": "M15: Telegram API call failed with error: ${tg_api:0:200}", - "polarity": "fail", - "normalized_id": "m15.telegram.api.call.failed.with.error.tg.api.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1542, - "text": "M15: Unexpected Telegram response (status=$tg_status): ${tg_api:0:200}", - "polarity": "fail", - "normalized_id": "m15.unexpected.telegram.response.status.tg.status.tg.api.0.200", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1569, - "text": "M17: Discord users/@me returned 200 — real token verified!", - "polarity": "pass", - "normalized_id": "m17.discord.users.me.returned.200.real.token.verified", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1571, - "text": "M17: Discord users/@me returned 401 — L7 proxy rewrote placeholder (fake token rejected by API)", - "polarity": "pass", - "normalized_id": "m17.discord.users.me.returned.401.l7.proxy.rewrote.placeholder.fake.token.rejected.by.api", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1575, - "text": "M17: Discord API call failed with error: ${dc_api:0:200}", - "polarity": "fail", - "normalized_id": "m17.discord.api.call.failed.with.error.dc.api.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1577, - "text": "M17: Unexpected Discord response (status=$dc_status): ${dc_api:0:200}", - "polarity": "fail", - "normalized_id": "m17.unexpected.discord.response.status.dc.status.dc.api.0.200", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1589, - "text": "M-S14a: Hermetic fake Slack API started on host port ${FAKE_SLACK_API_PORT}", - "polarity": "pass", - "normalized_id": "m.s14a.hermetic.fake.slack.api.started.on.host.port.fake.slack.api.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1591, - "text": "M-S14a: Failed to start hermetic fake Slack API", - "polarity": "fail", - "normalized_id": "m.s14a.failed.to.start.hermetic.fake.slack.api", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1596, - "text": "M-S14b: Applied REST policy for hermetic fake Slack API", - "polarity": "pass", - "normalized_id": "m.s14b.applied.rest.policy.for.hermetic.fake.slack.api", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1598, - "text": "M-S14b: Failed to apply fake Slack API policy: $(tail -20 /tmp/nemoclaw-fake-slack-policy.log 2>/dev/null | tr '\\n' ' ' | cut -c1-300)", - "polarity": "fail", - "normalized_id": "m.s14b.failed.to.apply.fake.slack.api.policy.tail.20.tmp.nemoclaw.fake.slack.policy.log.2.dev.null.tr.n.cut.c1.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1680, - "text": "M-S15: Slack auth.test returned ok:true — real token round-trip verified!", - "polarity": "pass", - "normalized_id": "m.s15.slack.auth.test.returned.ok.true.real.token.round.trip.verified", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1682, - "text": "M-S15: Slack auth.test returned invalid_auth — full chain verified (OpenShell alias rewrite → fake Slack)", - "polarity": "pass", - "normalized_id": "m.s15.slack.auth.test.returned.invalid.auth.full.chain.verified.openshell.alias.rewrite.fake.slack", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1685, - "text": "M-S15a: fake Slack saw host-side bot token in header and urlencoded body", - "polarity": "pass", - "normalized_id": "m.s15a.fake.slack.saw.host.side.bot.token.in.header.and.urlencoded.body", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1687, - "text": "M-S15a: fake Slack capture did not prove bot header/body rewrite: ${sl_capture:0:300}", - "polarity": "fail", - "normalized_id": "m.s15a.fake.slack.capture.did.not.prove.bot.header.body.rewrite.sl.capture.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1692, - "text": "M-S15: Slack API call failed with error: ${sl_api:0:200}", - "polarity": "fail", - "normalized_id": "m.s15.slack.api.call.failed.with.error.sl.api.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1694, - "text": "M-S15: OpenShell did not resolve the Bolt-shape alias", - "polarity": "fail", - "normalized_id": "m.s15.openshell.did.not.resolve.the.bolt.shape.alias", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1696, - "text": "M-S15: L7 proxy did not substitute the canonical placeholder — substitution chain broken", - "polarity": "fail", - "normalized_id": "m.s15.l7.proxy.did.not.substitute.the.canonical.placeholder.substitution.chain.broken", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1698, - "text": "M-S15: Unexpected Slack response (status=$sl_status): ${sl_api:0:200}", - "polarity": "fail", - "normalized_id": "m.s15.unexpected.slack.response.status.sl.status.sl.api.0.200", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1719, - "text": "M-S15b: L7 proxy substitutes openshell:resolve:env:SLACK_BOT_TOKEN at egress (parallels Telegram M15 / Discord M17)", - "polarity": "pass", - "normalized_id": "m.s15b.l7.proxy.substitutes.openshell.resolve.env.slack.bot.token.at.egress.parallels.telegram.m15.discord.m17", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1723, - "text": "M-S15b: L7 proxy passed canonical placeholder through unchanged — substitution not happening for SLACK_BOT_TOKEN", - "polarity": "fail", - "normalized_id": "m.s15b.l7.proxy.passed.canonical.placeholder.through.unchanged.substitution.not.happening.for.slack.bot.token", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1725, - "text": "M-S15b: Unexpected response (status=$sl_canon_status): ${sl_canonical:0:200}", - "polarity": "fail", - "normalized_id": "m.s15b.unexpected.response.status.sl.canon.status.sl.canonical.0.200", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1746, - "text": "M-S15c: unset-var failed closed before upstream exposure", - "polarity": "pass", - "normalized_id": "m.s15c.unset.var.failed.closed.before.upstream.exposure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1748, - "text": "M-S15c: unset-var triggered connection-level failure — proxy refuses to forward unsubstituted placeholder", - "polarity": "pass", - "normalized_id": "m.s15c.unset.var.triggered.connection.level.failure.proxy.refuses.to.forward.unsubstituted.placeholder", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1750, - "text": "M-S15c: unset-var returned HTTP 200 — proxy passed canonical placeholder through unchanged for unset env (substitution may be a no-op)", - "polarity": "fail", - "normalized_id": "m.s15c.unset.var.returned.http.200.proxy.passed.canonical.placeholder.through.unchanged.for.unset.env.substitution.may.be.a.no.op", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1752, - "text": "M-S15c: unset-var request reached fake Slack — unresolved placeholder escaped the proxy boundary", - "polarity": "fail", - "normalized_id": "m.s15c.unset.var.request.reached.fake.slack.unresolved.placeholder.escaped.the.proxy.boundary", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1773, - "text": "M-S16: apps.connections.open returned ok:true — real xapp token round-trip verified!", - "polarity": "pass", - "normalized_id": "m.s16.apps.connections.open.returned.ok.true.real.xapp.token.round.trip.verified", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1775, - "text": "M-S16: apps.connections.open auth-rejected — Socket Mode HTTPS leg verified (OpenShell alias rewrite → fake Slack)", - "polarity": "pass", - "normalized_id": "m.s16.apps.connections.open.auth.rejected.socket.mode.https.leg.verified.openshell.alias.rewrite.fake.slack", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1778, - "text": "M-S16a: fake Slack saw host-side app token in header and urlencoded body", - "polarity": "pass", - "normalized_id": "m.s16a.fake.slack.saw.host.side.app.token.in.header.and.urlencoded.body", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1780, - "text": "M-S16a: fake Slack capture did not prove app header/body rewrite: ${sl_app_capture:0:300}", - "polarity": "fail", - "normalized_id": "m.s16a.fake.slack.capture.did.not.prove.app.header.body.rewrite.sl.app.capture.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1785, - "text": "M-S16: OpenShell did not resolve the xapp- alias for Socket Mode path", - "polarity": "fail", - "normalized_id": "m.s16.openshell.did.not.resolve.the.xapp.alias.for.socket.mode.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1787, - "text": "M-S16: Unexpected apps.connections.open response (status=$sl_app_status): ${sl_app_api:0:200}", - "polarity": "fail", - "normalized_id": "m.s16.unexpected.apps.connections.open.response.status.sl.app.status.sl.app.api.0.200", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1811, - "text": "M-S16b: unset app-token failed closed before upstream exposure", - "polarity": "pass", - "normalized_id": "m.s16b.unset.app.token.failed.closed.before.upstream.exposure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1813, - "text": "M-S16b: L7 proxy substitutes openshell:resolve:env:SLACK_APP_TOKEN at egress (unset-var control diverged)", - "polarity": "pass", - "normalized_id": "m.s16b.l7.proxy.substitutes.openshell.resolve.env.slack.app.token.at.egress.unset.var.control.diverged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1815, - "text": "M-S16b: unset app-token env returned HTTP 200 — proxy may be passing canonical placeholders through unchanged", - "polarity": "fail", - "normalized_id": "m.s16b.unset.app.token.env.returned.http.200.proxy.may.be.passing.canonical.placeholders.through.unchanged", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1817, - "text": "M-S16b: unset app-token request reached fake Slack — unresolved placeholder escaped the proxy boundary", - "polarity": "fail", - "normalized_id": "m.s16b.unset.app.token.request.reached.fake.slack.unresolved.placeholder.escaped.the.proxy.boundary", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1826, - "text": "M-S16b: L7 proxy passed canonical placeholder through unchanged for SLACK_APP_TOKEN", - "polarity": "fail", - "normalized_id": "m.s16b.l7.proxy.passed.canonical.placeholder.through.unchanged.for.slack.app.token", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1828, - "text": "M-S16b: Unexpected response (status=$sl_app_canon_status): ${sl_app_canonical:0:200}", - "polarity": "fail", - "normalized_id": "m.s16b.unexpected.response.status.sl.app.canon.status.sl.app.canonical.0.200", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1847, - "text": "M-S17: Slack channel @mention allowlist accepts configured user and denies another user", - "polarity": "pass", - "normalized_id": "m.s17.slack.channel.mention.allowlist.accepts.configured.user.and.denies.another.user", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1850, - "text": "M-S17a: fake Slack saw host-side bot token for channel reply", - "polarity": "pass", - "normalized_id": "m.s17a.fake.slack.saw.host.side.bot.token.for.channel.reply", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1852, - "text": "M-S17a: fake Slack capture did not prove channel reply token rewrite: ${sl_post_capture:0:300}", - "polarity": "fail", - "normalized_id": "m.s17a.fake.slack.capture.did.not.prove.channel.reply.token.rewrite.sl.post.capture.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1856, - "text": "M-S17b: fake Slack captured non-secret channel/text metadata for channel reply", - "polarity": "pass", - "normalized_id": "m.s17b.fake.slack.captured.non.secret.channel.text.metadata.for.channel.reply", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1858, - "text": "M-S17b: fake Slack did not capture expected channel reply metadata: ${sl_message_capture:0:300}", - "polarity": "fail", - "normalized_id": "m.s17b.fake.slack.did.not.capture.expected.channel.reply.metadata.sl.message.capture.0.300", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1865, - "text": "M-S17: Slack channel @mention proof failed: ${sl_channel_proof:0:500}", - "polarity": "fail", - "normalized_id": "m.s17.slack.channel.mention.proof.failed.sl.channel.proof.0.500", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1880, - "text": "M18: Telegram getMe returned 200 with real token", - "polarity": "pass", - "normalized_id": "m18.telegram.getme.returned.200.with.real.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1882, - "text": "M18b: Telegram response contains ok:true", - "polarity": "pass", - "normalized_id": "m18b.telegram.response.contains.ok.true", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1885, - "text": "M18: Expected Telegram getMe 200 with real token, got: $tg_status", - "polarity": "fail", - "normalized_id": "m18.expected.telegram.getme.200.with.real.token.got.tg.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1915, - "text": "M19: Telegram sendMessage succeeded", - "polarity": "pass", - "normalized_id": "m19.telegram.sendmessage.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1917, - "text": "M19: Telegram sendMessage failed: ${send_result:0:200}", - "polarity": "fail", - "normalized_id": "m19.telegram.sendmessage.failed.send.result.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1929, - "text": "M20: Discord users/@me returned 200 with real token", - "polarity": "pass", - "normalized_id": "m20.discord.users.me.returned.200.with.real.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1931, - "text": "M20: Expected Discord users/@me 200 with real token, got: $dc_status", - "polarity": "fail", - "normalized_id": "m20.expected.discord.users.me.200.with.real.token.got.dc.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1963, - "text": "S1: Gateway is serving on port 18789 — Slack auth failure did not crash it", - "polarity": "pass", - "normalized_id": "s1.gateway.is.serving.on.port.18789.slack.auth.failure.did.not.crash.it", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1965, - "text": "S1: Gateway is not serving on port 18789 (${gw_port:0:200})", - "polarity": "fail", - "normalized_id": "s1.gateway.is.not.serving.on.port.18789.gw.port.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 1991, - "text": "S2: Gateway log shows Slack rejection was caught by channel guard", - "polarity": "pass", - "normalized_id": "s2.gateway.log.shows.slack.rejection.was.caught.by.channel.guard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 2016, - "text": "Cleanup: Sandbox '$SANDBOX_NAME' intentionally kept", - "polarity": "pass", - "normalized_id": "cleanup.sandbox.sandbox.name.intentionally.kept", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 2018, - "text": "Cleanup: Sandbox '$SANDBOX_NAME' still present after cleanup", - "polarity": "fail", - "normalized_id": "cleanup.sandbox.sandbox.name.still.present.after.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-messaging-providers.sh", - "line": 2020, - "text": "Cleanup: Sandbox '$SANDBOX_NAME' removed", - "polarity": "pass", - "normalized_id": "cleanup.sandbox.sandbox.name.removed", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "assertions": [ - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 94, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 96, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 101, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 103, - "text": "NVIDIA_API_KEY is required and must start with nvapi-", - "polarity": "fail", - "normalized_id": "nvidia.api.key.is.required.and.must.start.with.nvapi", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 116, - "text": "nemoclaw is available: $(nemoclaw --version 2>/dev/null || echo unknown)", - "polarity": "pass", - "normalized_id": "nemoclaw.is.available.nemoclaw.version.2.dev.null.echo.unknown", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 118, - "text": "nemoclaw not found after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.after.install", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 139, - "text": "Model Router onboard completed", - "polarity": "pass", - "normalized_id": "model.router.onboard.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 141, - "text": "Model Router onboard failed (exit ${onboard_rc}); see ${ONBOARD_LOG}", - "polarity": "fail", - "normalized_id": "model.router.onboard.failed.exit.onboard.rc.see.onboard.log", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 152, - "text": "model-router reports at least one healthy endpoint", - "polarity": "pass", - "normalized_id": "model.router.reports.at.least.one.healthy.endpoint", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 158, - "text": "model-router has no healthy endpoints; expected #3255 main-equivalent failure", - "polarity": "fail", - "normalized_id": "model.router.has.no.healthy.endpoints.expected.3255.main.equivalent.failure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 174, - "text": "inference.local returned a routed Model Router completion", - "polarity": "pass", - "normalized_id": "inference.local.returned.a.routed.model.router.completion", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 186, - "text": "Model Router inference.local did not return a routed completion; expected #3255 main-equivalent failure", - "polarity": "fail", - "normalized_id": "model.router.inference.local.did.not.return.a.routed.completion.expected.3255.main.equivalent.failure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-model-router-provider-routed-inference.sh", - "line": 193, - "text": "Model Router provider-routed inference guard passed", - "polarity": "pass", - "normalized_id": "model.router.provider.routed.inference.guard.passed", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-network-policy.sh", - "assertions": [ - { - "script": "test/e2e/test-network-policy.sh", - "line": 241, - "text": "TC-NET-01: Non-whitelisted URL blocked ($response)", - "polarity": "pass", - "normalized_id": "tc.net.01.non.whitelisted.url.blocked.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 243, - "text": "TC-NET-01: Deny default", - "polarity": "fail", - "normalized_id": "tc.net.01.deny.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 245, - "text": "TC-NET-01: Deny default", - "polarity": "fail", - "normalized_id": "tc.net.01.deny.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 257, - "text": "TC-NET-02: Setup", - "polarity": "fail", - "normalized_id": "tc.net.02.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 269, - "text": "TC-NET-02: PyPI reachable via pip after preset applied", - "polarity": "pass", - "normalized_id": "tc.net.02.pypi.reachable.via.pip.after.preset.applied", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 271, - "text": "TC-NET-02: PyPI reachable via pip (download started)", - "polarity": "pass", - "normalized_id": "tc.net.02.pypi.reachable.via.pip.download.started", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 273, - "text": "TC-NET-02: Whitelist", - "polarity": "fail", - "normalized_id": "tc.net.02.whitelist", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 305, - "text": "TC-NET-03: Setup", - "polarity": "fail", - "normalized_id": "tc.net.03.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 309, - "text": "TC-NET-03: Interactive policy-add", - "polarity": "fail", - "normalized_id": "tc.net.03.interactive.policy.add", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 325, - "text": "TC-NET-03: Endpoint reachable after live policy-add ($after)", - "polarity": "pass", - "normalized_id": "tc.net.03.endpoint.reachable.after.live.policy.add.after", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 327, - "text": "TC-NET-03: Live policy-add", - "polarity": "fail", - "normalized_id": "tc.net.03.live.policy.add", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 329, - "text": "TC-NET-03: Live policy-add", - "polarity": "fail", - "normalized_id": "tc.net.03.live.policy.add", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 356, - "text": "TC-NET-04: Dry-run printed endpoint info", - "polarity": "pass", - "normalized_id": "tc.net.04.dry.run.printed.endpoint.info", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 358, - "text": "TC-NET-04: Dry-run output", - "polarity": "fail", - "normalized_id": "tc.net.04.dry.run.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 371, - "text": "TC-NET-04: Policy unchanged after dry-run (blocked: $after)", - "polarity": "pass", - "normalized_id": "tc.net.04.policy.unchanged.after.dry.run.blocked.after", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 373, - "text": "TC-NET-04: Dry-run side effect", - "polarity": "fail", - "normalized_id": "tc.net.04.dry.run.side.effect", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 375, - "text": "TC-NET-04: Dry-run verification", - "polarity": "fail", - "normalized_id": "tc.net.04.dry.run.verification", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 397, - "text": "TC-NET-07: Inference via inference.local succeeded", - "polarity": "pass", - "normalized_id": "tc.net.07.inference.via.inference.local.succeeded", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 399, - "text": "TC-NET-07: Inference", - "polarity": "fail", - "normalized_id": "tc.net.07.inference", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 414, - "text": "TC-NET-07: Direct provider access blocked ($direct_response)", - "polarity": "pass", - "normalized_id": "tc.net.07.direct.provider.access.blocked.direct.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 416, - "text": "TC-NET-07: Direct provider", - "polarity": "fail", - "normalized_id": "tc.net.07.direct.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 418, - "text": "TC-NET-07: Direct provider", - "polarity": "fail", - "normalized_id": "tc.net.07.direct.provider", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 435, - "text": "TC-NET-05: Setup", - "polarity": "fail", - "normalized_id": "tc.net.05.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 445, - "text": "TC-NET-05: Sandbox start time unchanged after policy-add (no restart)", - "polarity": "pass", - "normalized_id": "tc.net.05.sandbox.start.time.unchanged.after.policy.add.no.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 449, - "text": "TC-NET-05: Hot-reload", - "polarity": "fail", - "normalized_id": "tc.net.05.hot.reload", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 471, - "text": "TC-NET-06: Setup", - "polarity": "fail", - "normalized_id": "tc.net.06.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 482, - "text": "TC-NET-06: npm reachable under permissive policy", - "polarity": "pass", - "normalized_id": "tc.net.06.npm.reachable.under.permissive.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 484, - "text": "TC-NET-06: Permissive", - "polarity": "fail", - "normalized_id": "tc.net.06.permissive", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 502, - "text": "+ ip +", - "polarity": "fail", - "normalized_id": "ip", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 505, - "text": "+ ip +", - "polarity": "fail", - "normalized_id": "ip", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 513, - "text": "TC-NET-09: SSRF validation correctly blocks dangerous IPs", - "polarity": "pass", - "normalized_id": "tc.net.09.ssrf.validation.correctly.blocks.dangerous.ips", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 515, - "text": "TC-NET-09: SSRF", - "polarity": "fail", - "normalized_id": "tc.net.09.ssrf", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 537, - "text": "$PASS${NC}", - "polarity": "pass", - "normalized_id": "pass.nc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-network-policy.sh", - "line": 538, - "text": "$FAIL${NC}", - "polarity": "fail", - "normalized_id": "fail.nc", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 78, - "text": "Node.js not found", - "polarity": "fail", - "normalized_id": "node.js.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 81, - "text": "Node.js available: $(node --version)", - "polarity": "pass", - "normalized_id": "node.js.available.node.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 84, - "text": "curl not found", - "polarity": "fail", - "normalized_id": "curl.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 87, - "text": "curl available", - "polarity": "pass", - "normalized_id": "curl.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 90, - "text": "Proxy script not found at $PROXY_SCRIPT", - "polarity": "fail", - "normalized_id": "proxy.script.not.found.at.proxy.script", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 93, - "text": "Proxy script exists", - "polarity": "pass", - "normalized_id": "proxy.script.exists", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 101, - "text": "Ollama already installed: $(ollama --version 2>/dev/null || echo unknown)", - "polarity": "pass", - "normalized_id": "ollama.already.installed.ollama.version.2.dev.null.echo.unknown", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 105, - "text": "Ollama installed", - "polarity": "pass", - "normalized_id": "ollama.installed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 107, - "text": "Ollama install failed", - "polarity": "fail", - "normalized_id": "ollama.install.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 125, - "text": "Ollama running on 127.0.0.1:${OLLAMA_PORT}", - "polarity": "pass", - "normalized_id": "ollama.running.on.127.0.0.1.ollama.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 127, - "text": "Ollama failed to start on 127.0.0.1:${OLLAMA_PORT}", - "polarity": "fail", - "normalized_id": "ollama.failed.to.start.on.127.0.0.1.ollama.port", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 134, - "text": "Model $MODEL pulled", - "polarity": "pass", - "normalized_id": "model.model.pulled", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 136, - "text": "Failed to pull $MODEL", - "polarity": "fail", - "normalized_id": "failed.to.pull.model", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 142, - "text": "Model $MODEL available in Ollama", - "polarity": "pass", - "normalized_id": "model.model.available.in.ollama", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 144, - "text": "Model $MODEL not found in /api/tags", - "polarity": "fail", - "normalized_id": "model.model.not.found.in.api.tags", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 173, - "text": "Auth proxy running on 0.0.0.0:${PROXY_PORT} (HTTP $STATUS)", - "polarity": "pass", - "normalized_id": "auth.proxy.running.on.0.0.0.0.proxy.port.http.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 175, - "text": "Auth proxy failed to start (no HTTP response: '$STATUS')", - "polarity": "fail", - "normalized_id": "auth.proxy.failed.to.start.no.http.response.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 188, - "text": "Unauthenticated POST /api/generate → 401", - "polarity": "pass", - "normalized_id": "unauthenticated.post.api.generate.401", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 190, - "text": "Expected 401 for unauthenticated POST, got $STATUS", - "polarity": "fail", - "normalized_id": "expected.401.for.unauthenticated.post.got.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 199, - "text": "Wrong token POST /api/generate → 401", - "polarity": "pass", - "normalized_id": "wrong.token.post.api.generate.401", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 201, - "text": "Expected 401 for wrong token, got $STATUS", - "polarity": "fail", - "normalized_id": "expected.401.for.wrong.token.got.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 210, - "text": "Correct token GET /api/tags → 200", - "polarity": "pass", - "normalized_id": "correct.token.get.api.tags.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 212, - "text": "Expected 200 for correct token, got $STATUS", - "polarity": "fail", - "normalized_id": "expected.200.for.correct.token.got.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 219, - "text": "Unauthenticated GET /api/tags → 401", - "polarity": "pass", - "normalized_id": "unauthenticated.get.api.tags.401", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 221, - "text": "Expected 401 for unauthenticated GET /api/tags, got $STATUS", - "polarity": "fail", - "normalized_id": "expected.401.for.unauthenticated.get.api.tags.got.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 228, - "text": "Unauthenticated POST /api/tags → 401", - "polarity": "pass", - "normalized_id": "unauthenticated.post.api.tags.401", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 230, - "text": "Expected 401 for unauthenticated POST /api/tags, got $STATUS", - "polarity": "fail", - "normalized_id": "expected.401.for.unauthenticated.post.api.tags.got.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 238, - "text": "Proxy strips auth header — Ollama responds normally", - "polarity": "pass", - "normalized_id": "proxy.strips.auth.header.ollama.responds.normally", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 240, - "text": "Proxy may not be stripping auth header correctly", - "polarity": "fail", - "normalized_id": "proxy.may.not.be.stripping.auth.header.correctly", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 269, - "text": "Inference through proxy: got chat completion response", - "polarity": "pass", - "normalized_id": "inference.through.proxy.got.chat.completion.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 271, - "text": "Inference through proxy: invalid response structure", - "polarity": "fail", - "normalized_id": "inference.through.proxy.invalid.response.structure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 275, - "text": "Inference through proxy: empty response", - "polarity": "fail", - "normalized_id": "inference.through.proxy.empty.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 297, - "text": "Inference through proxy: got /api/generate response", - "polarity": "pass", - "normalized_id": "inference.through.proxy.got.api.generate.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 299, - "text": "Inference through proxy: invalid /api/generate response", - "polarity": "fail", - "normalized_id": "inference.through.proxy.invalid.api.generate.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 303, - "text": "Inference through proxy: empty /api/generate response", - "polarity": "fail", - "normalized_id": "inference.through.proxy.empty.api.generate.response", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 315, - "text": "Inference without token → 401 (not forwarded to Ollama)", - "polarity": "pass", - "normalized_id": "inference.without.token.401.not.forwarded.to.ollama", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 317, - "text": "Expected 401 for unauthenticated inference, got $STATUS", - "polarity": "fail", - "normalized_id": "expected.401.for.unauthenticated.inference.got.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 327, - "text": "Token file exists at $TOKEN_FILE", - "polarity": "pass", - "normalized_id": "token.file.exists.at.token.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 329, - "text": "Token file missing", - "polarity": "fail", - "normalized_id": "token.file.missing", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 335, - "text": "Token file permissions: 600", - "polarity": "pass", - "normalized_id": "token.file.permissions.600", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 337, - "text": "Token file permissions: expected 600, got $PERMS", - "polarity": "fail", - "normalized_id": "token.file.permissions.expected.600.got.perms", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 343, - "text": "Token file content matches generated token", - "polarity": "pass", - "normalized_id": "token.file.content.matches.generated.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 345, - "text": "Token file content mismatch", - "polarity": "fail", - "normalized_id": "token.file.content.mismatch", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 363, - "text": "Proxy confirmed dead after kill", - "polarity": "pass", - "normalized_id": "proxy.confirmed.dead.after.kill", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 365, - "text": "Proxy still responding after kill (status: $STATUS)", - "polarity": "fail", - "normalized_id": "proxy.still.responding.after.kill.status.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 382, - "text": "Proxy restarted from persisted token (HTTP $STATUS)", - "polarity": "pass", - "normalized_id": "proxy.restarted.from.persisted.token.http.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 384, - "text": "Proxy failed to restart (no HTTP response: '$STATUS')", - "polarity": "fail", - "normalized_id": "proxy.failed.to.restart.no.http.response.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 404, - "text": "Inference works after proxy restart with persisted token", - "polarity": "pass", - "normalized_id": "inference.works.after.proxy.restart.with.persisted.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 406, - "text": "Inference failed after proxy restart", - "polarity": "fail", - "normalized_id": "inference.failed.after.proxy.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 411, - "text": "Persisted token matches original — no token rotation on restart", - "polarity": "pass", - "normalized_id": "persisted.token.matches.original.no.token.rotation.on.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 413, - "text": "Token changed on restart (should be the same persisted token)", - "polarity": "fail", - "normalized_id": "token.changed.on.restart.should.be.the.same.persisted.token", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 437, - "text": "Container can reach proxy at host.openshell.internal:${PROXY_PORT} (HTTP $CONTAINER_STATUS)", - "polarity": "pass", - "normalized_id": "container.can.reach.proxy.at.host.openshell.internal.proxy.port.http.container.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 439, - "text": "Container cannot reach proxy — reachability check would fail during onboard", - "polarity": "fail", - "normalized_id": "container.cannot.reach.proxy.reachability.check.would.fail.during.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 450, - "text": "Container CANNOT reach Ollama directly on ${OLLAMA_PORT} (localhost-only binding works)", - "polarity": "pass", - "normalized_id": "container.cannot.reach.ollama.directly.on.ollama.port.localhost.only.binding.works", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 452, - "text": "Container CAN reach Ollama on ${OLLAMA_PORT} — Ollama may be on 0.0.0.0", - "polarity": "fail", - "normalized_id": "container.can.reach.ollama.on.ollama.port.ollama.may.be.on.0.0.0.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 456, - "text": "Container reachability: skipped (no Docker)", - "polarity": "pass", - "normalized_id": "container.reachability.skipped.no.docker", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 487, - "text": "Confirmed: proxy running with old token, rejects new token (divergence exists)", - "polarity": "pass", - "normalized_id": "confirmed.proxy.running.with.old.token.rejects.new.token.divergence.exists", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 489, - "text": "Divergence not reproduced (old=$OLD_TOKEN_OK new=$NEW_TOKEN_OK) — aborting test", - "polarity": "fail", - "normalized_id": "divergence.not.reproduced.old.old.token.ok.new.new.token.ok.aborting.test", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 527, - "text": "After ensureOllamaAuthProxy: proxy accepts the file token (divergence fixed)", - "polarity": "pass", - "normalized_id": "after.ensureollamaauthproxy.proxy.accepts.the.file.token.divergence.fixed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 529, - "text": "After ensureOllamaAuthProxy: proxy still rejects file token (divergence NOT fixed)", - "polarity": "fail", - "normalized_id": "after.ensureollamaauthproxy.proxy.still.rejects.file.token.divergence.not.fixed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-ollama-auth-proxy-e2e.sh", - "line": 536, - "text": "Token divergence: skipped (no prior token)", - "polarity": "pass", - "normalized_id": "token.divergence.skipped.no.prior.token", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-onboard-inference-smoke.sh", - "assertions": [ - { - "script": "test/e2e/test-onboard-inference-smoke.sh", - "line": 156, - "text": "setupInference() accepted a configured route without proving the chat/completions path; onboard would later print Installation complete while the first real request returns HTTP 503 (#3253)", - "polarity": "fail", - "normalized_id": "setupinference.accepted.a.configured.route.without.proving.the.chat.completions.path.onboard.would.later.print.installation.complete.while.the.first.real.request.returns.http.503.3253", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-inference-smoke.sh", - "line": 158, - "text": "setupInference() did not accept a runtime-broken inference route", - "polarity": "pass", - "normalized_id": "setupinference.did.not.accept.a.runtime.broken.inference.route", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-inference-smoke.sh", - "line": 161, - "text": "onboard did not surface actionable inference smoke diagnostics (expected provider/model/api_base/credential env/upstream 503)", - "polarity": "fail", - "normalized_id": "onboard.did.not.surface.actionable.inference.smoke.diagnostics.expected.provider.model.api.base.credential.env.upstream.503", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-inference-smoke.sh", - "line": 163, - "text": "onboard surfaced actionable inference smoke diagnostics for the broken route", - "polarity": "pass", - "normalized_id": "onboard.surfaced.actionable.inference.smoke.diagnostics.for.the.broken.route", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "assertions": [ - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 123, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 131, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 133, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 138, - "text": "openshell CLI installed", - "polarity": "pass", - "normalized_id": "openshell.cli.installed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 140, - "text": "openshell CLI not found — cannot continue", - "polarity": "fail", - "normalized_id": "openshell.cli.not.found.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 145, - "text": "Node.js available", - "polarity": "pass", - "normalized_id": "node.js.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 147, - "text": "Node.js not found — cannot continue", - "polarity": "fail", - "normalized_id": "node.js.not.found.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 152, - "text": "NVIDIA_API_KEY is set (starts with nvapi-)", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set.starts.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 154, - "text": "NVIDIA_API_KEY not set or invalid — required for resume completion", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid.required.for.resume.completion", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 159, - "text": "Exported NVIDIA_API_KEY for the repair run (host writes nothing to disk; OpenShell gateway is the system of record)", - "polarity": "pass", - "normalized_id": "exported.nvidia.api.key.for.the.repair.run.host.writes.nothing.to.disk.openshell.gateway.is.the.system.of.record", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 187, - "text": "First onboard exited 1 (expected interrupted run)", - "polarity": "pass", - "normalized_id": "first.onboard.exited.1.expected.interrupted.run", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 189, - "text": "First onboard exited $first_exit (expected 1)", - "polarity": "fail", - "normalized_id": "first.onboard.exited.first.exit.expected.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 195, - "text": "Onboard session file created", - "polarity": "pass", - "normalized_id": "onboard.session.file.created", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 197, - "text": "Onboard session file missing after interrupted run", - "polarity": "fail", - "normalized_id": "onboard.session.file.missing.after.interrupted.run", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 201, - "text": "First run failed at policy setup as intended", - "polarity": "pass", - "normalized_id": "first.run.failed.at.policy.setup.as.intended", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 203, - "text": "First run did not fail at the expected policy step", - "polarity": "fail", - "normalized_id": "first.run.did.not.fail.at.the.expected.policy.step", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 207, - "text": "Sandbox '$SANDBOX_NAME' exists after interrupted run", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.exists.after.interrupted.run", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 209, - "text": "Sandbox '$SANDBOX_NAME' not found after interrupted run", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.not.found.after.interrupted.run", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 222, - "text": "Sandbox '$SANDBOX_NAME' removed to simulate stale recorded state", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed.to.simulate.stale.recorded.state", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 224, - "text": "Sandbox '$SANDBOX_NAME' still exists after forced deletion", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.exists.after.forced.deletion", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 239, - "text": "Resume completed after repairing missing sandbox", - "polarity": "pass", - "normalized_id": "resume.completed.after.repairing.missing.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 241, - "text": "Resume exited $repair_exit during missing-sandbox repair", - "polarity": "fail", - "normalized_id": "resume.exited.repair.exit.during.missing.sandbox.repair", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 247, - "text": "Repair resume skipped preflight", - "polarity": "pass", - "normalized_id": "repair.resume.skipped.preflight", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 249, - "text": "Repair resume did not skip preflight", - "polarity": "fail", - "normalized_id": "repair.resume.did.not.skip.preflight", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 253, - "text": "Repair resume skipped gateway", - "polarity": "pass", - "normalized_id": "repair.resume.skipped.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 255, - "text": "Repair resume did not skip gateway", - "polarity": "fail", - "normalized_id": "repair.resume.did.not.skip.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 259, - "text": "Repair resume detected missing sandbox", - "polarity": "pass", - "normalized_id": "repair.resume.detected.missing.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 261, - "text": "Repair resume did not report missing sandbox recreation", - "polarity": "fail", - "normalized_id": "repair.resume.did.not.report.missing.sandbox.recreation", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 266, - "text": "Repair resume recreated sandbox", - "polarity": "pass", - "normalized_id": "repair.resume.recreated.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 268, - "text": "Repair resume did not rerun sandbox creation", - "polarity": "fail", - "normalized_id": "repair.resume.did.not.rerun.sandbox.creation", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 272, - "text": "Repaired sandbox '$SANDBOX_NAME' is manageable", - "polarity": "pass", - "normalized_id": "repaired.sandbox.sandbox.name.is.manageable", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 274, - "text": "Repaired sandbox '$SANDBOX_NAME' status failed", - "polarity": "fail", - "normalized_id": "repaired.sandbox.sandbox.name.status.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 295, - "text": "Re-created interrupted session for conflict tests", - "polarity": "pass", - "normalized_id": "re.created.interrupted.session.for.conflict.tests", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 311, - "text": "Resume rejected conflicting sandbox name", - "polarity": "pass", - "normalized_id": "resume.rejected.conflicting.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 313, - "text": "Resume exited $sandbox_conflict_exit for conflicting sandbox (expected 1)", - "polarity": "fail", - "normalized_id": "resume.exited.sandbox.conflict.exit.for.conflicting.sandbox.expected.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 317, - "text": "Conflicting sandbox message is explicit", - "polarity": "pass", - "normalized_id": "conflicting.sandbox.message.is.explicit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 319, - "text": "Conflicting sandbox message missing or incorrect", - "polarity": "fail", - "normalized_id": "conflicting.sandbox.message.missing.or.incorrect", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 342, - "text": "Resume rejected conflicting provider/model", - "polarity": "pass", - "normalized_id": "resume.rejected.conflicting.provider.model", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 344, - "text": "Resume exited $provider_conflict_exit for conflicting provider/model (expected 1)", - "polarity": "fail", - "normalized_id": "resume.exited.provider.conflict.exit.for.conflicting.provider.model.expected.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 348, - "text": "Conflicting provider message is explicit", - "polarity": "pass", - "normalized_id": "conflicting.provider.message.is.explicit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 350, - "text": "Conflicting provider message missing or incorrect", - "polarity": "fail", - "normalized_id": "conflicting.provider.message.missing.or.incorrect", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 354, - "text": "Conflicting model message is explicit", - "polarity": "pass", - "normalized_id": "conflicting.model.message.is.explicit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 356, - "text": "Conflicting model message missing or incorrect", - "polarity": "fail", - "normalized_id": "conflicting.model.message.missing.or.incorrect", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 375, - "text": "Sandbox '$SANDBOX_NAME' still exists after cleanup", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.exists.after.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 377, - "text": "Sandbox '$SANDBOX_NAME' cleaned up", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.cleaned.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 381, - "text": "Onboard session file still exists after cleanup", - "polarity": "fail", - "normalized_id": "onboard.session.file.still.exists.after.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 383, - "text": "Onboard session file cleaned up", - "polarity": "pass", - "normalized_id": "onboard.session.file.cleaned.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-repair.sh", - "line": 386, - "text": "Final cleanup complete", - "polarity": "pass", - "normalized_id": "final.cleanup.complete", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "assertions": [ - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 96, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 104, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 106, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 111, - "text": "openshell CLI installed", - "polarity": "pass", - "normalized_id": "openshell.cli.installed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 113, - "text": "openshell CLI not found — cannot continue", - "polarity": "fail", - "normalized_id": "openshell.cli.not.found.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 118, - "text": "Node.js available", - "polarity": "pass", - "normalized_id": "node.js.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 120, - "text": "Node.js not found — cannot continue", - "polarity": "fail", - "normalized_id": "node.js.not.found.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 125, - "text": "NVIDIA_API_KEY is set (starts with nvapi-)", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set.starts.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 127, - "text": "NVIDIA_API_KEY not set or invalid — required for resume completion", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid.required.for.resume.completion", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 132, - "text": "Network access to integrate.api.nvidia.com", - "polarity": "pass", - "normalized_id": "network.access.to.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 134, - "text": "Cannot reach integrate.api.nvidia.com", - "polarity": "fail", - "normalized_id": "cannot.reach.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 139, - "text": "Exported NVIDIA_API_KEY for the resume run (host writes nothing to disk; OpenShell gateway is the system of record)", - "polarity": "pass", - "normalized_id": "exported.nvidia.api.key.for.the.resume.run.host.writes.nothing.to.disk.openshell.gateway.is.the.system.of.record", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 167, - "text": "First onboard exited 1 (expected interrupted run)", - "polarity": "pass", - "normalized_id": "first.onboard.exited.1.expected.interrupted.run", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 169, - "text": "First onboard exited $first_exit (expected 1)", - "polarity": "fail", - "normalized_id": "first.onboard.exited.first.exit.expected.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 175, - "text": "Sandbox '$SANDBOX_NAME' created before interruption", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.created.before.interruption", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 177, - "text": "Sandbox creation not confirmed in first run output", - "polarity": "fail", - "normalized_id": "sandbox.creation.not.confirmed.in.first.run.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 181, - "text": "First run failed at policy setup as intended", - "polarity": "pass", - "normalized_id": "first.run.failed.at.policy.setup.as.intended", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 183, - "text": "First run did not fail at the expected policy step", - "polarity": "fail", - "normalized_id": "first.run.did.not.fail.at.the.expected.policy.step", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 187, - "text": "Sandbox '$SANDBOX_NAME' exists after interrupted run", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.exists.after.interrupted.run", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 189, - "text": "Sandbox '$SANDBOX_NAME' not found after interrupted run", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.not.found.after.interrupted.run", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 193, - "text": "Onboard session file created", - "polarity": "pass", - "normalized_id": "onboard.session.file.created", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 195, - "text": "Onboard session file missing after interrupted run", - "polarity": "fail", - "normalized_id": "onboard.session.file.missing.after.interrupted.run", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 207, - "text": "Session file recorded openclaw completion and policy failure", - "polarity": "pass", - "normalized_id": "session.file.recorded.openclaw.completion.and.policy.failure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 208, - "text": "Session file did not record the expected interrupted state", - "polarity": "fail", - "normalized_id": "session.file.did.not.record.the.expected.interrupted.state", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 229, - "text": "Resume completed successfully", - "polarity": "pass", - "normalized_id": "resume.completed.successfully", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 231, - "text": "Resume exited $resume_exit (expected 0)", - "polarity": "fail", - "normalized_id": "resume.exited.resume.exit.expected.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 237, - "text": "Resume skipped preflight", - "polarity": "pass", - "normalized_id": "resume.skipped.preflight", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 239, - "text": "Resume did not skip preflight", - "polarity": "fail", - "normalized_id": "resume.did.not.skip.preflight", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 243, - "text": "Resume skipped gateway", - "polarity": "pass", - "normalized_id": "resume.skipped.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 245, - "text": "Resume did not skip gateway", - "polarity": "fail", - "normalized_id": "resume.did.not.skip.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 249, - "text": "Resume skipped sandbox", - "polarity": "pass", - "normalized_id": "resume.skipped.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 251, - "text": "Resume did not skip sandbox", - "polarity": "fail", - "normalized_id": "resume.did.not.skip.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 255, - "text": "Resume reran preflight unexpectedly", - "polarity": "fail", - "normalized_id": "resume.reran.preflight.unexpectedly", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 257, - "text": "Resume did not rerun preflight", - "polarity": "pass", - "normalized_id": "resume.did.not.rerun.preflight", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 261, - "text": "Resume reran gateway startup unexpectedly", - "polarity": "fail", - "normalized_id": "resume.reran.gateway.startup.unexpectedly", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 263, - "text": "Resume did not rerun gateway startup", - "polarity": "pass", - "normalized_id": "resume.did.not.rerun.gateway.startup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 267, - "text": "Resume reran sandbox creation unexpectedly", - "polarity": "fail", - "normalized_id": "resume.reran.sandbox.creation.unexpectedly", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 269, - "text": "Resume did not rerun sandbox creation", - "polarity": "pass", - "normalized_id": "resume.did.not.rerun.sandbox.creation", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 276, - "text": "Resume re-ran inference setup", - "polarity": "pass", - "normalized_id": "resume.re.ran.inference.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 278, - "text": "Resume skipped inference (already configured)", - "polarity": "pass", - "normalized_id": "resume.skipped.inference.already.configured", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 280, - "text": "Resume neither ran nor skipped inference setup", - "polarity": "fail", - "normalized_id": "resume.neither.ran.nor.skipped.inference.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 284, - "text": "Sandbox '$SANDBOX_NAME' is manageable after resume", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.is.manageable.after.resume", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 286, - "text": "Sandbox '$SANDBOX_NAME' status failed after resume", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.status.failed.after.resume", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 304, - "text": "Session file recorded full completion after resume", - "polarity": "pass", - "normalized_id": "session.file.recorded.full.completion.after.resume", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 305, - "text": "Session file did not record the expected completed state after resume", - "polarity": "fail", - "normalized_id": "session.file.did.not.record.the.expected.completed.state.after.resume", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 309, - "text": "Registry contains resumed sandbox entry", - "polarity": "pass", - "normalized_id": "registry.contains.resumed.sandbox.entry", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 311, - "text": "Registry does not contain resumed sandbox entry", - "polarity": "fail", - "normalized_id": "registry.does.not.contain.resumed.sandbox.entry", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 326, - "text": "Sandbox '$SANDBOX_NAME' still exists after cleanup", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.exists.after.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 328, - "text": "Sandbox '$SANDBOX_NAME' cleaned up", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.cleaned.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 332, - "text": "Onboard session file still exists after cleanup", - "polarity": "fail", - "normalized_id": "onboard.session.file.still.exists.after.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 334, - "text": "Onboard session file cleaned up", - "polarity": "pass", - "normalized_id": "onboard.session.file.cleaned.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-onboard-resume.sh", - "line": 337, - "text": "Final cleanup complete", - "polarity": "pass", - "normalized_id": "final.cleanup.complete", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "assertions": [ - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 96, - "text": "OpenShell inference get failed: ${output:0:240}", - "polarity": "fail", - "normalized_id": "openshell.inference.get.failed.output.0.240", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 103, - "text": "OpenShell route points at ${SWITCH_PROVIDER} / ${SWITCH_MODEL}", - "polarity": "pass", - "normalized_id": "openshell.route.points.at.switch.provider.switch.model", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 105, - "text": "OpenShell route did not switch to ${SWITCH_PROVIDER} / ${SWITCH_MODEL}: ${plain_output:0:400}", - "polarity": "fail", - "normalized_id": "openshell.route.did.not.switch.to.switch.provider.switch.model.plain.output.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 163, - "text": "Registry/session were not updated for switch: ${probe:0:400}", - "polarity": "fail", - "normalized_id": "registry.session.were.not.updated.for.switch.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 166, - "text": "Registry and onboard session record the switched provider/model", - "polarity": "pass", - "normalized_id": "registry.and.onboard.session.record.the.switched.provider.model", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 172, - "text": "Could not read /sandbox/.openclaw/openclaw.json: ${config:0:240}", - "polarity": "fail", - "normalized_id": "could.not.read.sandbox.openclaw.openclaw.json.config.0.240", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 202, - "text": "OpenClaw config was not patched correctly: ${probe:0:400}", - "polarity": "fail", - "normalized_id": "openclaw.config.was.not.patched.correctly.probe.0.400", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 205, - "text": "OpenClaw config uses inference/${SWITCH_MODEL}", - "polarity": "pass", - "normalized_id": "openclaw.config.uses.inference.switch.model", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 210, - "text": "OpenClaw config hash matches openclaw.json", - "polarity": "pass", - "normalized_id": "openclaw.config.hash.matches.openclaw.json", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 212, - "text": "OpenClaw config hash check failed: ${hash_check:0:240}", - "polarity": "fail", - "normalized_id": "openclaw.config.hash.check.failed.hash.check.0.240", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 241, - "text": "Sandbox inference.local returned PONG with ${SWITCH_MODEL}", - "polarity": "pass", - "normalized_id": "sandbox.inference.local.returned.pong.with.switch.model", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 253, - "text": "Sandbox inference.local did not work after switch: ${last_fail}", - "polarity": "fail", - "normalized_id": "sandbox.inference.local.did.not.work.after.switch.last.fail", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 261, - "text": "Could not get SSH config for OpenClaw agent turn", - "polarity": "fail", - "normalized_id": "could.not.get.ssh.config.for.openclaw.agent.turn", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 293, - "text": "OpenClaw agent answered through the switched inference route", - "polarity": "pass", - "normalized_id": "openclaw.agent.answered.through.the.switched.inference.route", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 295, - "text": "OpenClaw agent turn failed after switch (exit ${rc}); reply='${reply:0:200}', raw='${raw:0:200}'", - "polarity": "fail", - "normalized_id": "openclaw.agent.turn.failed.after.switch.exit.rc.reply.reply.0.200.raw.raw.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 328, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 332, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 334, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 339, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 341, - "text": "NVIDIA_API_KEY not set or invalid", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 346, - "text": "NEMOCLAW_NON_INTERACTIVE=1", - "polarity": "pass", - "normalized_id": "nemoclaw.non.interactive.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 348, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 353, - "text": "Third-party software acceptance is set", - "polarity": "pass", - "normalized_id": "third.party.software.acceptance.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 355, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 361, - "text": "Could not cd to repo root: $REPO", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 385, - "text": "install.sh completed", - "polarity": "pass", - "normalized_id": "install.sh.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 387, - "text": "install.sh failed (exit ${install_exit})", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 393, - "text": "nemoclaw not found on PATH", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 397, - "text": "openshell not found on PATH", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 400, - "text": "nemoclaw and openshell are on PATH", - "polarity": "pass", - "normalized_id": "nemoclaw.and.openshell.are.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 408, - "text": "nemoclaw inference set completed", - "polarity": "pass", - "normalized_id": "nemoclaw.inference.set.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 410, - "text": "nemoclaw inference set failed (exit ${switch_rc}): ${switch_output:0:500}", - "polarity": "fail", - "normalized_id": "nemoclaw.inference.set.failed.exit.switch.rc.switch.output.0.500", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 417, - "text": "OpenClaw gateway process stayed running during switch", - "polarity": "pass", - "normalized_id": "openclaw.gateway.process.stayed.running.during.switch", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 419, - "text": "OpenClaw gateway process changed during switch (${pid_before} -> ${pid_after})", - "polarity": "fail", - "normalized_id": "openclaw.gateway.process.changed.during.switch.pid.before.pid.after", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 440, - "text": "Sandbox ${SANDBOX_NAME} still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-inference-switch.sh", - "line": 442, - "text": "Sandbox ${SANDBOX_NAME} removed", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.removed", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "assertions": [ - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 68, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 70, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 75, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 77, - "text": "NVIDIA_API_KEY is required and must start with nvapi-", - "polarity": "fail", - "normalized_id": "nvidia.api.key.is.required.and.must.start.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 90, - "text": "nemoclaw is available: $(nemoclaw --version 2>/dev/null || echo unknown)", - "polarity": "pass", - "normalized_id": "nemoclaw.is.available.nemoclaw.version.2.dev.null.echo.unknown", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 92, - "text": "nemoclaw not found after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 134, - "text": "fresh sandbox onboard completed", - "polarity": "pass", - "normalized_id": "fresh.sandbox.onboard.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 136, - "text": "fresh sandbox onboard failed (exit ${onboard_rc}); see ${ONBOARD_LOG}", - "polarity": "fail", - "normalized_id": "fresh.sandbox.onboard.failed.exit.onboard.rc.see.onboard.log", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 187, - "text": "OpenClaw-style plugin runtime deps replacement hit #3513 EXDEV failure", - "polarity": "fail", - "normalized_id": "openclaw.style.plugin.runtime.deps.replacement.hit.3513.exdev.failure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 193, - "text": "runtime deps replacement exited ${agent_rc}; see ${AGENT_LOG}", - "polarity": "fail", - "normalized_id": "runtime.deps.replacement.exited.agent.rc.see.agent.log", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 198, - "text": "OpenClaw-style plugin runtime-deps replacement completed across filesystems", - "polarity": "pass", - "normalized_id": "openclaw.style.plugin.runtime.deps.replacement.completed.across.filesystems", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 200, - "text": "runtime deps replacement exited 0 but success marker was missing; see ${AGENT_LOG}", - "polarity": "fail", - "normalized_id": "runtime.deps.replacement.exited.0.but.success.marker.was.missing.see.agent.log", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openclaw-plugin-runtime-exdev.sh", - "line": 206, - "text": "OpenClaw plugin runtime-deps EXDEV guard passed", - "polarity": "pass", - "normalized_id": "openclaw.plugin.runtime.deps.exdev.guard.passed", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "assertions": [ - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 185, - "text": "macOS incomplete OpenShell install unexpectedly succeeded with fake payloads", - "polarity": "fail", - "normalized_id": "macos.incomplete.openshell.install.unexpectedly.succeeded.with.fake.payloads", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 194, - "text": "macOS installer did not detect missing openshell-gateway", - "polarity": "fail", - "normalized_id": "macos.installer.did.not.detect.missing.openshell.gateway", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 201, - "text": "macOS installer did not request the Darwin openshell-gateway asset", - "polarity": "fail", - "normalized_id": "macos.installer.did.not.request.the.darwin.openshell.gateway.asset", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 207, - "text": "macOS installer still requested the Darwin openshell-driver-vm asset", - "polarity": "fail", - "normalized_id": "macos.installer.still.requested.the.darwin.openshell.driver.vm.asset", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 211, - "text": "macOS OpenShell ${CURRENT_OPENSHELL_VERSION} incomplete install fetches Darwin gateway asset", - "polarity": "pass", - "normalized_id": "macos.openshell.current.openshell.version.incomplete.install.fetches.darwin.gateway.asset", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 280, - "text": "macOS installer still required openshell-driver-vm Hypervisor entitlement", - "polarity": "fail", - "normalized_id": "macos.installer.still.required.openshell.driver.vm.hypervisor.entitlement", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 287, - "text": "macOS installer still codesigned openshell-driver-vm", - "polarity": "fail", - "normalized_id": "macos.installer.still.codesigned.openshell.driver.vm", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 294, - "text": "macOS installer reinstalled instead of repairing an otherwise complete OpenShell install", - "polarity": "fail", - "normalized_id": "macos.installer.reinstalled.instead.of.repairing.an.otherwise.complete.openshell.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 298, - "text": "macOS OpenShell ${CURRENT_OPENSHELL_VERSION} installer does not require VM driver Hypervisor entitlement", - "polarity": "pass", - "normalized_id": "macos.openshell.current.openshell.version.installer.does.not.require.vm.driver.hypervisor.entitlement", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 303, - "text": "Dockerfile is missing the macOS VM rootfs compatibility ARG", - "polarity": "fail", - "normalized_id": "dockerfile.is.missing.the.macos.vm.rootfs.compatibility.arg", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 305, - "text": "Dockerfile patch helper does not patch the macOS VM rootfs compatibility ARG", - "polarity": "fail", - "normalized_id": "dockerfile.patch.helper.does.not.patch.the.macos.vm.rootfs.compatibility.arg", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 307, - "text": "onboard does not keep macOS Docker sandbox builds out of the VM rootfs compatibility path", - "polarity": "fail", - "normalized_id": "onboard.does.not.keep.macos.docker.sandbox.builds.out.of.the.vm.rootfs.compatibility.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 309, - "text": "Dockerfile does not relax OpenClaw state permissions for macOS VM rootfs remapping", - "polarity": "fail", - "normalized_id": "dockerfile.does.not.relax.openclaw.state.permissions.for.macos.vm.rootfs.remapping", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 311, - "text": "Hermes Dockerfile is missing the macOS VM rootfs compatibility ARG", - "polarity": "fail", - "normalized_id": "hermes.dockerfile.is.missing.the.macos.vm.rootfs.compatibility.arg", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 313, - "text": "Hermes Dockerfile does not relax Hermes state permissions for macOS VM rootfs remapping", - "polarity": "fail", - "normalized_id": "hermes.dockerfile.does.not.relax.hermes.state.permissions.for.macos.vm.rootfs.remapping", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 315, - "text": "Hermes Dockerfile does not relax trusted rc files for macOS VM ownership repair", - "polarity": "fail", - "normalized_id": "hermes.dockerfile.does.not.relax.trusted.rc.files.for.macos.vm.ownership.repair", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 316, - "text": "macOS Docker sandbox builds keep VM rootfs compatibility disabled", - "polarity": "pass", - "normalized_id": "macos.docker.sandbox.builds.keep.vm.rootfs.compatibility.disabled", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 407, - "text": "Compatible endpoint mock is listening at ${FAKE_BASE_URL}", - "polarity": "pass", - "normalized_id": "compatible.endpoint.mock.is.listening.at.fake.base.url", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 414, - "text": "compatible endpoint mock did not start", - "polarity": "fail", - "normalized_id": "compatible.endpoint.mock.did.not.start", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 440, - "text": "${label} NemoClaw installer failed", - "polarity": "fail", - "normalized_id": "label.nemoclaw.installer.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 460, - "text": "old NemoClaw install did not leave OpenShell ${OLD_OPENSHELL_VERSION}: $(openshell --version 2>&1 || true)", - "polarity": "fail", - "normalized_id": "old.nemoclaw.install.did.not.leave.openshell.old.openshell.version.openshell.version.2.1.true", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 462, - "text": "Old NemoClaw install selected $(openshell --version)", - "polarity": "pass", - "normalized_id": "old.nemoclaw.install.selected.openshell.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 469, - "text": "old installer source is ${old_head:-unknown}, expected ${expected_head:-$OLD_NEMOCLAW_REF}", - "polarity": "fail", - "normalized_id": "old.installer.source.is.old.head.unknown.expected.expected.head.old.nemoclaw.ref", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 470, - "text": "Old NemoClaw source is ${OLD_NEMOCLAW_REF} (${old_head:0:12})", - "polarity": "pass", - "normalized_id": "old.nemoclaw.source.is.old.nemoclaw.ref.old.head.0.12", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 473, - "text": "survivor sandbox did not become Ready before gateway upgrade", - "polarity": "fail", - "normalized_id": "survivor.sandbox.did.not.become.ready.before.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 475, - "text": "Old NemoClaw install registered survivor claw ${SURVIVOR_SANDBOX}", - "polarity": "pass", - "normalized_id": "old.nemoclaw.install.registered.survivor.claw.survivor.sandbox", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 477, - "text": "old NemoClaw install did not register survivor claw ${SURVIVOR_SANDBOX}", - "polarity": "fail", - "normalized_id": "old.nemoclaw.install.did.not.register.survivor.claw.survivor.sandbox", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 485, - "text": "failed to write survivor marker before gateway upgrade", - "polarity": "fail", - "normalized_id": "failed.to.write.survivor.marker.before.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 509, - "text": "failed to start survivor agent before gateway upgrade", - "polarity": "fail", - "normalized_id": "failed.to.start.survivor.agent.before.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 510, - "text": "survivor agent did not become healthy before gateway upgrade", - "polarity": "fail", - "normalized_id": "survivor.agent.did.not.become.healthy.before.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 512, - "text": "survivor agent pid was empty before gateway upgrade", - "polarity": "fail", - "normalized_id": "survivor.agent.pid.was.empty.before.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 514, - "text": "Old NemoClaw claw has live agent activity (pid ${SURVIVOR_AGENT_PID}) before gateway upgrade", - "polarity": "pass", - "normalized_id": "old.nemoclaw.claw.has.live.agent.activity.pid.survivor.agent.pid.before.gateway.upgrade", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 522, - "text": "current installer did not exercise the experimental OpenShell gateway upgrade acceptance path", - "polarity": "fail", - "normalized_id": "current.installer.did.not.exercise.the.experimental.openshell.gateway.upgrade.acceptance.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 525, - "text": "current NemoClaw install did not upgrade OpenShell to ${CURRENT_OPENSHELL_VERSION}: $(openshell --version 2>&1 || true)", - "polarity": "fail", - "normalized_id": "current.nemoclaw.install.did.not.upgrade.openshell.to.current.openshell.version.openshell.version.2.1.true", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 527, - "text": "Current NemoClaw install selected $(openshell --version)", - "polarity": "pass", - "normalized_id": "current.nemoclaw.install.selected.openshell.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 534, - "text": "gateway server did not report OpenShell ${CURRENT_OPENSHELL_VERSION} after upgrade", - "polarity": "fail", - "normalized_id": "gateway.server.did.not.report.openshell.current.openshell.version.after.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 536, - "text": "Gateway server reports OpenShell ${CURRENT_OPENSHELL_VERSION} after upgrade", - "polarity": "pass", - "normalized_id": "gateway.server.reports.openshell.current.openshell.version.after.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 539, - "text": "Current installer backed up the old running claw before replacing OpenShell", - "polarity": "pass", - "normalized_id": "current.installer.backed.up.the.old.running.claw.before.replacing.openshell", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 543, - "text": "current installer did not back up the old running claw before replacing OpenShell", - "polarity": "fail", - "normalized_id": "current.installer.did.not.back.up.the.old.running.claw.before.replacing.openshell", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 550, - "text": "survivor sandbox is not Ready after gateway upgrade", - "polarity": "fail", - "normalized_id": "survivor.sandbox.is.not.ready.after.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 557, - "text": "survivor marker changed after gateway upgrade: got '${marker}'", - "polarity": "fail", - "normalized_id": "survivor.marker.changed.after.gateway.upgrade.got.marker", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 558, - "text": "Durable OpenClaw workspace state was restored after gateway upgrade", - "polarity": "pass", - "normalized_id": "durable.openclaw.workspace.state.was.restored.after.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 565, - "text": "OpenClaw agent is not installed/configured after gateway upgrade", - "polarity": "fail", - "normalized_id": "openclaw.agent.is.not.installed.configured.after.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 566, - "text": "OpenClaw agent is installed and configured after gateway upgrade", - "polarity": "pass", - "normalized_id": "openclaw.agent.is.installed.and.configured.after.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 569, - "text": "NemoClaw registry retained survivor sandbox after gateway upgrade", - "polarity": "pass", - "normalized_id": "nemoclaw.registry.retained.survivor.sandbox.after.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 571, - "text": "NemoClaw registry lost survivor sandbox after gateway upgrade", - "polarity": "fail", - "normalized_id": "nemoclaw.registry.lost.survivor.sandbox.after.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 576, - "text": "nemoclaw list still shows survivor sandbox after gateway upgrade", - "polarity": "pass", - "normalized_id": "nemoclaw.list.still.shows.survivor.sandbox.after.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 578, - "text": "nemoclaw list does not show survivor sandbox after gateway upgrade: ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.does.not.show.survivor.sandbox.after.gateway.upgrade.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 581, - "text": "Survivor claw state remained reachable after OpenShell gateway upgrade", - "polarity": "pass", - "normalized_id": "survivor.claw.state.remained.reachable.after.openshell.gateway.upgrade", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 591, - "text": "Skipping live Docker-driver gateway restart regression on non-Linux host", - "polarity": "pass", - "normalized_id": "skipping.live.docker.driver.gateway.restart.regression.on.non.linux.host", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-openshell-gateway-upgrade.sh", - "line": 604, - "text": "Current NemoClaw installer upgraded old ${OLD_NEMOCLAW_REF} claw, restored state, and kept OpenClaw running on OpenShell ${CURRENT_OPENSHELL_VERSION}", - "polarity": "pass", - "normalized_id": "current.nemoclaw.installer.upgraded.old.old.nemoclaw.ref.claw.restored.state.and.kept.openclaw.running.on.openshell.current.openshell.version", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-openshell-version-pin.sh", - "assertions": [ - { - "script": "test/e2e/test-openshell-version-pin.sh", - "line": 215, - "text": "Installer hard-failed on sticky OpenShell 0.0.45 instead of reinstalling pinned 0.0.44 (#3474)", - "polarity": "fail", - "normalized_id": "installer.hard.failed.on.sticky.openshell.0.0.45.instead.of.reinstalling.pinned.0.0.44.3474", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-version-pin.sh", - "line": 217, - "text": "install-openshell.sh failed before proving sticky-version recovery (exit ${install_rc})", - "polarity": "fail", - "normalized_id": "install.openshell.sh.failed.before.proving.sticky.version.recovery.exit.install.rc", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-version-pin.sh", - "line": 219, - "text": "install-openshell.sh completed", - "polarity": "pass", - "normalized_id": "install.openshell.sh.completed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-openshell-version-pin.sh", - "line": 222, - "text": "Expected installer to download pinned OpenShell v0.0.44", - "polarity": "fail", - "normalized_id": "expected.installer.to.download.pinned.openshell.v0.0.44", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-version-pin.sh", - "line": 224, - "text": "Installer downloaded pinned OpenShell v0.0.44", - "polarity": "pass", - "normalized_id": "installer.downloaded.pinned.openshell.v0.0.44", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-openshell-version-pin.sh", - "line": 227, - "text": "Installer downloaded OpenShell v0.0.45 despite NemoClaw max 0.0.44", - "polarity": "fail", - "normalized_id": "installer.downloaded.openshell.v0.0.45.despite.nemoclaw.max.0.0.44", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-version-pin.sh", - "line": 229, - "text": "Installer did not download too-new OpenShell v0.0.45", - "polarity": "pass", - "normalized_id": "installer.did.not.download.too.new.openshell.v0.0.45", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-openshell-version-pin.sh", - "line": 232, - "text": "openshell binary was not replaced with pinned 0.0.44", - "polarity": "fail", - "normalized_id": "openshell.binary.was.not.replaced.with.pinned.0.0.44", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-openshell-version-pin.sh", - "line": 234, - "text": "Sticky openshell 0.0.45 was replaced with pinned 0.0.44", - "polarity": "pass", - "normalized_id": "sticky.openshell.0.0.45.was.replaced.with.pinned.0.0.44", - "mapping_status": "mapped" - } - ] - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "assertions": [ - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 169, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 171, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 176, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 178, - "text": "NVIDIA_API_KEY not set or invalid", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 183, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 188, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 193, - "text": "Passwordless sudo available", - "polarity": "pass", - "normalized_id": "passwordless.sudo.available", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 195, - "text": "Passwordless sudo required to edit $DAEMON_JSON", - "polarity": "fail", - "normalized_id": "passwordless.sudo.required.to.edit.daemon.json", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 200, - "text": "Cannot find install.sh at $REPO_ROOT/install.sh", - "polarity": "fail", - "normalized_id": "cannot.find.install.sh.at.repo.root.install.sh", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 203, - "text": "Repo root found: $REPO_ROOT", - "polarity": "pass", - "normalized_id": "repo.root.found.repo.root", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 249, - "text": "Failed to restart Docker after daemon.json change", - "polarity": "fail", - "normalized_id": "failed.to.restart.docker.after.daemon.json.change", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 260, - "text": "Docker did not come back up after restart", - "polarity": "fail", - "normalized_id": "docker.did.not.come.back.up.after.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 267, - "text": "Docker storage Driver is now overlayfs", - "polarity": "pass", - "normalized_id": "docker.storage.driver.is.now.overlayfs", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 280, - "text": "DriverStatus reports io.containerd.snapshotter.v1 (the bug-triggering config)", - "polarity": "pass", - "normalized_id": "driverstatus.reports.io.containerd.snapshotter.v1.the.bug.triggering.config", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 310, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 318, - "text": "Could not cd to repo root: $REPO_ROOT", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo.root", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 355, - "text": "install.sh + onboard completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.sh.onboard.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 357, - "text": "install.sh + onboard failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.onboard.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 367, - "text": "Onboard log contains the auto-fix detection message", - "polarity": "pass", - "normalized_id": "onboard.log.contains.the.auto.fix.detection.message", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 369, - "text": "Onboard log missing 'Detected Docker 26+ containerd-snapshotter overlayfs'", - "polarity": "fail", - "normalized_id": "onboard.log.missing.detected.docker.26.containerd.snapshotter.overlayfs", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 374, - "text": "Patched cluster image present: $patched_tag", - "polarity": "pass", - "normalized_id": "patched.cluster.image.present.patched.tag", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 376, - "text": "No nemoclaw-cluster:*-fuse-overlayfs-* image found after onboard", - "polarity": "fail", - "normalized_id": "no.nemoclaw.cluster.fuse.overlayfs.image.found.after.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 386, - "text": "Gateway container is running the patched image", - "polarity": "pass", - "normalized_id": "gateway.container.is.running.the.patched.image", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 388, - "text": "Gateway image '$gateway_image' does not match patched tag '$patched_tag'", - "polarity": "fail", - "normalized_id": "gateway.image.gateway.image.does.not.match.patched.tag.patched.tag", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 394, - "text": "Cluster log still contains the nested-overlay error after auto-fix", - "polarity": "fail", - "normalized_id": "cluster.log.still.contains.the.nested.overlay.error.after.auto.fix", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 396, - "text": "Cluster log clean of the nested-overlay error", - "polarity": "pass", - "normalized_id": "cluster.log.clean.of.the.nested.overlay.error", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 439, - "text": "ensurePatchedClusterImage returned the same tag on second invocation: $second_tag", - "polarity": "pass", - "normalized_id": "ensurepatchedclusterimage.returned.the.same.tag.on.second.invocation.second.tag", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 441, - "text": "ensurePatchedClusterImage tag mismatch (first=$patched_tag second=$second_tag)", - "polarity": "fail", - "normalized_id": "ensurepatchedclusterimage.tag.mismatch.first.patched.tag.second.second.tag", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 445, - "text": "Patched image was reused (Created timestamp unchanged: $before_created)", - "polarity": "pass", - "normalized_id": "patched.image.was.reused.created.timestamp.unchanged.before.created", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 447, - "text": "Patched image was rebuilt unexpectedly (before=$before_created after=$after_created)", - "polarity": "fail", - "normalized_id": "patched.image.was.rebuilt.unexpectedly.before.before.created.after.after.created", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 481, - "text": "Onboard with auto-fix disabled exited non-zero (exit $negative_exit) within $NEGATIVE_TIMEOUT s", - "polarity": "pass", - "normalized_id": "onboard.with.auto.fix.disabled.exited.non.zero.exit.negative.exit.within.negative.timeout.s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 483, - "text": "Onboard unexpectedly succeeded with NEMOCLAW_DISABLE_OVERLAY_FIX=1", - "polarity": "fail", - "normalized_id": "onboard.unexpectedly.succeeded.with.nemoclaw.disable.overlay.fix.1", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 534, - "text": "Cluster/install logs surface a nested-overlay failure signature ($overlay_evidence)", - "polarity": "pass", - "normalized_id": "cluster.install.logs.surface.a.nested.overlay.failure.signature.overlay.evidence", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-overlayfs-autofix.sh", - "line": 538, - "text": "Negative phase exited $negative_exit (not our timeout, no overlay signature) — likely unrelated flake", - "polarity": "fail", - "normalized_id": "negative.phase.exited.negative.exit.not.our.timeout.no.overlay.signature.likely.unrelated.flake", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "assertions": [ - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 96, - "text": "NVIDIA_API_KEY is required", - "polarity": "fail", - "normalized_id": "nvidia.api.key.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 97, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 102, - "text": "Could not parse expected Hermes version from manifest", - "polarity": "fail", - "normalized_id": "could.not.parse.expected.hermes.version.from.manifest", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 138, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 139, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 140, - "text": "NemoClaw installed", - "polarity": "pass", - "normalized_id": "nemoclaw.installed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 159, - "text": "Failed to build old Hermes base image", - "polarity": "fail", - "normalized_id": "failed.to.build.old.hermes.base.image", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 161, - "text": "Old Hermes base image built (${OLD_HERMES_VERSION})", - "polarity": "pass", - "normalized_id": "old.hermes.base.image.built.old.hermes.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 165, - "text": "Cached Hermes base tag now points at old version", - "polarity": "pass", - "normalized_id": "cached.hermes.base.tag.now.points.at.old.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 222, - "text": "Sandbox did not become Ready", - "polarity": "fail", - "normalized_id": "sandbox.did.not.become.ready", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 224, - "text": "Old Hermes sandbox created", - "polarity": "pass", - "normalized_id": "old.hermes.sandbox.created", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 231, - "text": "Failed to write marker file", - "polarity": "fail", - "normalized_id": "failed.to.write.marker.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 234, - "text": "Marker verification failed", - "polarity": "fail", - "normalized_id": "marker.verification.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 237, - "text": "Pre-rebuild Hermes .env missing Discord placeholder", - "polarity": "fail", - "normalized_id": "pre.rebuild.hermes.env.missing.discord.placeholder", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 240, - "text": "Pre-rebuild Hermes config.yaml missing platforms.discord", - "polarity": "fail", - "normalized_id": "pre.rebuild.hermes.config.yaml.missing.platforms.discord", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 278, - "text": "Markers written, sandbox registered", - "polarity": "pass", - "normalized_id": "markers.written.sandbox.registered", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 291, - "text": "Failed to build current Hermes base image", - "polarity": "fail", - "normalized_id": "failed.to.build.current.hermes.base.image", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 293, - "text": "Current Hermes base image built", - "polarity": "pass", - "normalized_id": "current.hermes.base.image.built", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 307, - "text": "Rebuild failed", - "polarity": "fail", - "normalized_id": "rebuild.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 309, - "text": "Rebuild completed", - "polarity": "pass", - "normalized_id": "rebuild.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 317, - "text": "Marker file survived rebuild", - "polarity": "pass", - "normalized_id": "marker.file.survived.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 319, - "text": "Marker file lost: got '${RESTORED}', expected '${MARKER_CONTENT}'", - "polarity": "fail", - "normalized_id": "marker.file.lost.got.restored.expected.marker.content", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 326, - "text": "Hermes binary still reports old version ${OLD_HERMES_REGISTRY_VERSION}", - "polarity": "fail", - "normalized_id": "hermes.binary.still.reports.old.version.old.hermes.registry.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 329, - "text": "Hermes binary reports expected version ${EXPECTED_HERMES_VERSION}", - "polarity": "pass", - "normalized_id": "hermes.binary.reports.expected.version.expected.hermes.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 331, - "text": "Hermes binary version mismatch: expected output to contain '${EXPECTED_HERMES_VERSION}'", - "polarity": "fail", - "normalized_id": "hermes.binary.version.mismatch.expected.output.to.contain.expected.hermes.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 338, - "text": "Hermes .env preserved Discord token placeholder", - "polarity": "pass", - "normalized_id": "hermes.env.preserved.discord.token.placeholder", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 340, - "text": "Hermes .env lost Discord placeholder after rebuild: ${RESTORED_ENV}", - "polarity": "fail", - "normalized_id": "hermes.env.lost.discord.placeholder.after.rebuild.restored.env", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 345, - "text": "Hermes config.yaml preserved platforms.discord", - "polarity": "pass", - "normalized_id": "hermes.config.yaml.preserved.platforms.discord", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 347, - "text": "Hermes config.yaml lost platforms.discord after rebuild: ${RESTORED_CONFIG}", - "polarity": "fail", - "normalized_id": "hermes.config.yaml.lost.platforms.discord.after.rebuild.restored.config", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 358, - "text": "Inference works after rebuild (NVIDIA API key + provider chain intact)", - "polarity": "pass", - "normalized_id": "inference.works.after.rebuild.nvidia.api.key.provider.chain.intact", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 373, - "text": "Registry agentVersion updated to ${REGISTRY_VERSION}", - "polarity": "pass", - "normalized_id": "registry.agentversion.updated.to.registry.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 375, - "text": "Registry agentVersion not updated: got '${REGISTRY_VERSION}', expected != '${OLD_HERMES_REGISTRY_VERSION}'", - "polarity": "fail", - "normalized_id": "registry.agentversion.not.updated.got.registry.version.expected.old.hermes.registry.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 383, - "text": "No credentials in backup", - "polarity": "pass", - "normalized_id": "no.credentials.in.backup", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 385, - "text": "Credentials found: $CRED_LEAKS", - "polarity": "fail", - "normalized_id": "credentials.found.cred.leaks", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-rebuild-hermes.sh", - "line": 388, - "text": "Backup directory missing: $BACKUP_DIR", - "polarity": "fail", - "normalized_id": "backup.directory.missing.backup.dir", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "assertions": [ - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 66, - "text": "NVIDIA_API_KEY is required", - "polarity": "fail", - "normalized_id": "nvidia.api.key.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 67, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 101, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 102, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 103, - "text": "NemoClaw installed", - "polarity": "pass", - "normalized_id": "nemoclaw.installed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 132, - "text": "Failed to build old base image", - "polarity": "fail", - "normalized_id": "failed.to.build.old.base.image", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 134, - "text": "Old base image built (OpenClaw ${OLD_OPENCLAW_VERSION})", - "polarity": "pass", - "normalized_id": "old.base.image.built.openclaw.old.openclaw.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 159, - "text": "Sandbox did not become Ready", - "polarity": "fail", - "normalized_id": "sandbox.did.not.become.ready", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 165, - "text": "Old sandbox created (OpenClaw ${OLD_OPENCLAW_VERSION})", - "polarity": "pass", - "normalized_id": "old.sandbox.created.openclaw.old.openclaw.version", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 172, - "text": "Failed to write marker file", - "polarity": "fail", - "normalized_id": "failed.to.write.marker.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 176, - "text": "Marker verification failed: got '${VERIFY}'", - "polarity": "fail", - "normalized_id": "marker.verification.failed.got.verify", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 228, - "text": "Markers written, sandbox registered", - "polarity": "pass", - "normalized_id": "markers.written.sandbox.registered", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 263, - "text": "Cannot locate nemoclaw module directory", - "polarity": "fail", - "normalized_id": "cannot.locate.nemoclaw.module.directory", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 272, - "text": "Failed to apply preset: ${preset}", - "polarity": "fail", - "normalized_id": "failed.to.apply.preset.preset", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 278, - "text": "npm preset active in gateway policy", - "polarity": "pass", - "normalized_id": "npm.preset.active.in.gateway.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 280, - "text": "npm preset not found in live gateway policy before rebuild", - "polarity": "fail", - "normalized_id": "npm.preset.not.found.in.live.gateway.policy.before.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 283, - "text": "pypi preset active in gateway policy", - "polarity": "pass", - "normalized_id": "pypi.preset.active.in.gateway.policy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 285, - "text": "pypi preset not found in live gateway policy before rebuild", - "polarity": "fail", - "normalized_id": "pypi.preset.not.found.in.live.gateway.policy.before.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 298, - "text": "Policy presets applied and verified", - "polarity": "pass", - "normalized_id": "policy.presets.applied.and.verified", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 314, - "text": "Failed to build current base image", - "polarity": "fail", - "normalized_id": "failed.to.build.current.base.image", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 316, - "text": "Current base image restored", - "polarity": "pass", - "normalized_id": "current.base.image.restored", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 322, - "text": "Rebuild failed", - "polarity": "fail", - "normalized_id": "rebuild.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 324, - "text": "Rebuild completed", - "polarity": "pass", - "normalized_id": "rebuild.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 332, - "text": "Marker file survived rebuild", - "polarity": "pass", - "normalized_id": "marker.file.survived.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 334, - "text": "Marker file lost: got '${RESTORED}', expected '${MARKER_CONTENT}'", - "polarity": "fail", - "normalized_id": "marker.file.lost.got.restored.expected.marker.content", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 340, - "text": "Could not get OpenClaw version from sandbox (empty output)", - "polarity": "fail", - "normalized_id": "could.not.get.openclaw.version.from.sandbox.empty.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 342, - "text": "Version still old after rebuild: ${NEW_VERSION}", - "polarity": "fail", - "normalized_id": "version.still.old.after.rebuild.new.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 344, - "text": "OpenClaw version upgraded: ${NEW_VERSION}", - "polarity": "pass", - "normalized_id": "openclaw.version.upgraded.new.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 356, - "text": "Registry agentVersion updated to ${REGISTRY_VERSION}", - "polarity": "pass", - "normalized_id": "registry.agentversion.updated.to.registry.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 358, - "text": "Registry agentVersion not updated: got '${REGISTRY_VERSION}', expected != '${OLD_OPENCLAW_VERSION}'", - "polarity": "fail", - "normalized_id": "registry.agentversion.not.updated.got.registry.version.expected.old.openclaw.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 369, - "text": "Inference works after rebuild (NVIDIA API key + provider chain intact)", - "polarity": "pass", - "normalized_id": "inference.works.after.rebuild.nvidia.api.key.provider.chain.intact", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 385, - "text": "No credentials in backup", - "polarity": "pass", - "normalized_id": "no.credentials.in.backup", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 387, - "text": "Credentials found: $CRED_LEAKS", - "polarity": "fail", - "normalized_id": "credentials.found.cred.leaks", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 390, - "text": "Backup directory missing: $BACKUP_DIR", - "polarity": "fail", - "normalized_id": "backup.directory.missing.backup.dir", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 407, - "text": "npm preset survived rebuild (in registry)", - "polarity": "pass", - "normalized_id": "npm.preset.survived.rebuild.in.registry", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 409, - "text": "npm preset LOST after rebuild — issue #1952", - "polarity": "fail", - "normalized_id": "npm.preset.lost.after.rebuild.issue.1952", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 412, - "text": "pypi preset survived rebuild (in registry)", - "polarity": "pass", - "normalized_id": "pypi.preset.survived.rebuild.in.registry", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 414, - "text": "pypi preset LOST after rebuild — issue #1952", - "polarity": "fail", - "normalized_id": "pypi.preset.lost.after.rebuild.issue.1952", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 420, - "text": "npm preset active in gateway policy after rebuild", - "polarity": "pass", - "normalized_id": "npm.preset.active.in.gateway.policy.after.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 422, - "text": "npm preset not in live gateway policy after rebuild — issue #1952", - "polarity": "fail", - "normalized_id": "npm.preset.not.in.live.gateway.policy.after.rebuild.issue.1952", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 425, - "text": "pypi preset active in gateway policy after rebuild", - "polarity": "pass", - "normalized_id": "pypi.preset.active.in.gateway.policy.after.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 427, - "text": "pypi preset not in live gateway policy after rebuild — issue #1952", - "polarity": "fail", - "normalized_id": "pypi.preset.not.in.live.gateway.policy.after.rebuild.issue.1952", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 446, - "text": "Backup manifest contains policyPresets: ${MANIFEST_PRESETS}", - "polarity": "pass", - "normalized_id": "backup.manifest.contains.policypresets.manifest.presets", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-rebuild-openclaw.sh", - "line": 448, - "text": "Backup manifest missing expected policyPresets (npm,pypi): got '${MANIFEST_PRESETS}' — issue #1952", - "polarity": "fail", - "normalized_id": "backup.manifest.missing.expected.policypresets.npm.pypi.got.manifest.presets.issue.1952", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "assertions": [ - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 86, - "text": "baseline container failed before config capture", - "polarity": "fail", - "normalized_id": "baseline.container.failed.before.config.capture", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 104, - "text": "baseline config hash valid", - "polarity": "pass", - "normalized_id": "baseline.config.hash.valid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 106, - "text": "baseline config hash invalid", - "polarity": "fail", - "normalized_id": "baseline.config.hash.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 116, - "text": "model overridden to $OVERRIDE_MODEL", - "polarity": "pass", - "normalized_id": "model.overridden.to.override.model", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 118, - "text": "expected model=$OVERRIDE_MODEL, got $ACTUAL", - "polarity": "fail", - "normalized_id": "expected.model.override.model.got.actual", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 125, - "text": "config hash valid after model override", - "polarity": "pass", - "normalized_id": "config.hash.valid.after.model.override", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 127, - "text": "config hash invalid after model override", - "polarity": "fail", - "normalized_id": "config.hash.invalid.after.model.override", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 138, - "text": "contextWindow overridden to 32768", - "polarity": "pass", - "normalized_id": "contextwindow.overridden.to.32768", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 140, - "text": "expected contextWindow=32768, got $ACTUAL", - "polarity": "fail", - "normalized_id": "expected.contextwindow.32768.got.actual", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 149, - "text": "maxTokens overridden to 16384", - "polarity": "pass", - "normalized_id": "maxtokens.overridden.to.16384", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 151, - "text": "expected maxTokens=16384, got $ACTUAL", - "polarity": "fail", - "normalized_id": "expected.maxtokens.16384.got.actual", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 160, - "text": "reasoning overridden to true", - "polarity": "pass", - "normalized_id": "reasoning.overridden.to.true", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 162, - "text": "expected reasoning=true, got $ACTUAL", - "polarity": "fail", - "normalized_id": "expected.reasoning.true.got.actual", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 173, - "text": "CORS origin added: $CORS", - "polarity": "pass", - "normalized_id": "cors.origin.added.cors", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 176, - "text": "CORS origin not found in allowedOrigins: ${ORIGINS}", - "polarity": "fail", - "normalized_id": "cors.origin.not.found.in.allowedorigins.origins", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 196, - "text": "all 5 overrides applied correctly", - "polarity": "pass", - "normalized_id": "all.5.overrides.applied.correctly", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 198, - "text": "combined override mismatch: model=$M ctx=$C max=$T reasoning=$R cors=$O", - "polarity": "fail", - "normalized_id": "combined.override.mismatch.model.m.ctx.c.max.t.reasoning.r.cors.o", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 206, - "text": "model override with control chars rejected", - "polarity": "pass", - "normalized_id": "model.override.with.control.chars.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 208, - "text": "model override with control chars was not rejected", - "polarity": "fail", - "normalized_id": "model.override.with.control.chars.was.not.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 214, - "text": "non-integer context window rejected", - "polarity": "pass", - "normalized_id": "non.integer.context.window.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 216, - "text": "non-integer context window was not rejected", - "polarity": "fail", - "normalized_id": "non.integer.context.window.was.not.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 222, - "text": "non-integer max tokens rejected", - "polarity": "pass", - "normalized_id": "non.integer.max.tokens.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 224, - "text": "non-integer max tokens was not rejected", - "polarity": "fail", - "normalized_id": "non.integer.max.tokens.was.not.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 230, - "text": "invalid reasoning value rejected", - "polarity": "pass", - "normalized_id": "invalid.reasoning.value.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 232, - "text": "invalid reasoning value was not rejected", - "polarity": "fail", - "normalized_id": "invalid.reasoning.value.was.not.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 238, - "text": "non-http CORS origin rejected", - "polarity": "pass", - "normalized_id": "non.http.cors.origin.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 240, - "text": "non-http CORS origin was not rejected", - "polarity": "fail", - "normalized_id": "non.http.cors.origin.was.not.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 246, - "text": "invalid inference API type rejected", - "polarity": "pass", - "normalized_id": "invalid.inference.api.type.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 248, - "text": "invalid inference API type was not rejected", - "polarity": "fail", - "normalized_id": "invalid.inference.api.type.was.not.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 258, - "text": "config unchanged after rejected override", - "polarity": "pass", - "normalized_id": "config.unchanged.after.rejected.override", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-runtime-overrides.sh", - "line": 260, - "text": "config was modified despite rejected override: model=$ACTUAL_MODEL ctx=$ACTUAL_CTX (expected model=$BASELINE_MODEL ctx=$BASELINE_CTX)", - "polarity": "fail", - "normalized_id": "config.was.modified.despite.rejected.override.model.actual.model.ctx.actual.ctx.expected.model.baseline.model.ctx.baseline.ctx", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "assertions": [ - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 338, - "text": "TC-SBX-01: nemoclaw list shows '$SANDBOX_A'", - "polarity": "pass", - "normalized_id": "tc.sbx.01.nemoclaw.list.shows.sandbox.a", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 340, - "text": "TC-SBX-01: List Sandboxes", - "polarity": "fail", - "normalized_id": "tc.sbx.01.list.sandboxes", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 375, - "text": "TC-SBX-02: Connect & Chat", - "polarity": "fail", - "normalized_id": "tc.sbx.02.connect.chat", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 402, - "text": "TC-SBX-02: Agent computed 6×7=42 through openclaw → inference.local", - "polarity": "pass", - "normalized_id": "tc.sbx.02.agent.computed.6.7.42.through.openclaw.inference.local", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 404, - "text": "TC-SBX-02: Connect & Chat", - "polarity": "fail", - "normalized_id": "tc.sbx.02.connect.chat", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 427, - "text": "TC-SBX-03: Status output contains all expected fields", - "polarity": "pass", - "normalized_id": "tc.sbx.03.status.output.contains.all.expected.fields", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 429, - "text": "TC-SBX-03: Status Fields", - "polarity": "fail", - "normalized_id": "tc.sbx.03.status.fields", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 442, - "text": "TC-SBX-04: Log Streaming", - "polarity": "fail", - "normalized_id": "tc.sbx.04.log.streaming", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 444, - "text": "TC-SBX-04: Log streaming produced output ($(echo ", - "polarity": "pass", - "normalized_id": "tc.sbx.04.log.streaming.produced.output.echo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 446, - "text": "TC-SBX-04: Log Streaming", - "polarity": "fail", - "normalized_id": "tc.sbx.04.log.streaming", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 454, - "text": "TC-SBX-04: Log --follow", - "polarity": "fail", - "normalized_id": "tc.sbx.04.log.follow", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 459, - "text": "TC-SBX-04: Log --follow cleanup", - "polarity": "fail", - "normalized_id": "tc.sbx.04.log.follow.cleanup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 461, - "text": "TC-SBX-04: Log --follow exited cleanly after kill", - "polarity": "pass", - "normalized_id": "tc.sbx.04.log.follow.exited.cleanly.after.kill", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 489, - "text": "TC-SBX-07: Registry rebuilt — '$SANDBOX_A' found after deletion", - "polarity": "pass", - "normalized_id": "tc.sbx.07.registry.rebuilt.sandbox.a.found.after.deletion", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 492, - "text": "TC-SBX-07: Registry Rebuild", - "polarity": "fail", - "normalized_id": "tc.sbx.07.registry.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 518, - "text": "TC-SBX-08: Process Recovery (status)", - "polarity": "fail", - "normalized_id": "tc.sbx.08.process.recovery.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 520, - "text": "TC-SBX-08: Status detected and recovered dead OpenClaw process", - "polarity": "pass", - "normalized_id": "tc.sbx.08.status.detected.and.recovered.dead.openclaw.process", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 522, - "text": "TC-SBX-08: Process Recovery (status)", - "polarity": "fail", - "normalized_id": "tc.sbx.08.process.recovery.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 529, - "text": "TC-SBX-08: SSH works after process recovery", - "polarity": "pass", - "normalized_id": "tc.sbx.08.ssh.works.after.process.recovery", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 531, - "text": "TC-SBX-08: Process Recovery (SSH)", - "polarity": "fail", - "normalized_id": "tc.sbx.08.process.recovery.ssh", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 550, - "text": "TC-SBX-05: Destroy ($target)", - "polarity": "fail", - "normalized_id": "tc.sbx.05.destroy.target", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 554, - "text": "TC-SBX-05: Destroy ($target)", - "polarity": "fail", - "normalized_id": "tc.sbx.05.destroy.target", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 556, - "text": "TC-SBX-05: '$target' removed from nemoclaw list", - "polarity": "pass", - "normalized_id": "tc.sbx.05.target.removed.from.nemoclaw.list", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 560, - "text": "TC-SBX-05: Destroy ($target)", - "polarity": "fail", - "normalized_id": "tc.sbx.05.destroy.target", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 562, - "text": "TC-SBX-05: '$target' removed from openshell sandbox list", - "polarity": "pass", - "normalized_id": "tc.sbx.05.target.removed.from.openshell.sandbox.list", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 630, - "text": "TC-SBX-06: Gateway recovered after docker kill", - "polarity": "pass", - "normalized_id": "tc.sbx.06.gateway.recovered.after.docker.kill", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 634, - "text": "TC-SBX-06: Gateway Recovery", - "polarity": "fail", - "normalized_id": "tc.sbx.06.gateway.recovery", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 648, - "text": "TC-SBX-10: Multi-Sandbox", - "polarity": "fail", - "normalized_id": "tc.sbx.10.multi.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 660, - "text": "TC-SBX-10: Both sandboxes visible in nemoclaw list", - "polarity": "pass", - "normalized_id": "tc.sbx.10.both.sandboxes.visible.in.nemoclaw.list", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 662, - "text": "TC-SBX-10: Multi-Sandbox", - "polarity": "fail", - "normalized_id": "tc.sbx.10.multi.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 687, - "text": "TC-SBX-10: Both sandboxes have non-empty metadata", - "polarity": "pass", - "normalized_id": "tc.sbx.10.both.sandboxes.have.non.empty.metadata", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 689, - "text": "TC-SBX-10: Multi-Sandbox Metadata", - "polarity": "fail", - "normalized_id": "tc.sbx.10.multi.sandbox.metadata", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 715, - "text": "TC-SBX-11: Isolation (A→B)", - "polarity": "fail", - "normalized_id": "tc.sbx.11.isolation.a.b", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 717, - "text": "TC-SBX-11: Sandbox A cannot reach sandbox B ($(echo ", - "polarity": "pass", - "normalized_id": "tc.sbx.11.sandbox.a.cannot.reach.sandbox.b.echo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 719, - "text": "TC-SBX-11: Isolation (A→B)", - "polarity": "fail", - "normalized_id": "tc.sbx.11.isolation.a.b", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 721, - "text": "TC-SBX-11: Isolation (A→B)", - "polarity": "fail", - "normalized_id": "tc.sbx.11.isolation.a.b", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 737, - "text": "TC-SBX-11: Isolation (B→A)", - "polarity": "fail", - "normalized_id": "tc.sbx.11.isolation.b.a", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 739, - "text": "TC-SBX-11: Sandbox B cannot reach sandbox A ($(echo ", - "polarity": "pass", - "normalized_id": "tc.sbx.11.sandbox.b.cannot.reach.sandbox.a.echo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 741, - "text": "TC-SBX-11: Isolation (B→A)", - "polarity": "fail", - "normalized_id": "tc.sbx.11.isolation.b.a", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 743, - "text": "TC-SBX-11: Isolation (B→A)", - "polarity": "fail", - "normalized_id": "tc.sbx.11.isolation.b.a", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 774, - "text": "$PASS${NC}", - "polarity": "pass", - "normalized_id": "pass.nc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-operations.sh", - "line": 775, - "text": "$FAIL${NC}", - "polarity": "fail", - "normalized_id": "fail.nc", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "assertions": [ - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 60, - "text": "NVIDIA_API_KEY is required", - "polarity": "fail", - "normalized_id": "nvidia.api.key.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 61, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 86, - "text": "Onboard failed", - "polarity": "fail", - "normalized_id": "onboard.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 88, - "text": "Sandbox created", - "polarity": "pass", - "normalized_id": "sandbox.created", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 95, - "text": "Version detection: agent version visible in status", - "polarity": "pass", - "normalized_id": "version.detection.agent.version.visible.in.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 106, - "text": "Failed to write marker file", - "polarity": "fail", - "normalized_id": "failed.to.write.marker.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 110, - "text": "Marker file verification failed: got '$VERIFY'", - "polarity": "fail", - "normalized_id": "marker.file.verification.failed.got.verify", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 112, - "text": "Marker file written and verified", - "polarity": "pass", - "normalized_id": "marker.file.written.and.verified", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 135, - "text": "Staleness warning appears on connect", - "polarity": "pass", - "normalized_id": "staleness.warning.appears.on.connect", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 145, - "text": "Rebuild failed", - "polarity": "fail", - "normalized_id": "rebuild.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 147, - "text": "Rebuild completed", - "polarity": "pass", - "normalized_id": "rebuild.completed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 154, - "text": "Marker file survived rebuild", - "polarity": "pass", - "normalized_id": "marker.file.survived.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 156, - "text": "Marker file missing or changed after rebuild: got '$RESTORED', expected '$MARKER_CONTENT'", - "polarity": "fail", - "normalized_id": "marker.file.missing.or.changed.after.rebuild.got.restored.expected.marker.content", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 171, - "text": "Registry agentVersion updated to $REGISTRY_VERSION", - "polarity": "pass", - "normalized_id": "registry.agentversion.updated.to.registry.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 173, - "text": "Registry agentVersion not updated: got '$REGISTRY_VERSION'", - "polarity": "fail", - "normalized_id": "registry.agentversion.not.updated.got.registry.version", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 184, - "text": "No credentials found in backup directory", - "polarity": "pass", - "normalized_id": "no.credentials.found.in.backup.directory", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-rebuild.sh", - "line": 186, - "text": "Credentials found in backup files: $CRED_LEAKS", - "polarity": "fail", - "normalized_id": "credentials.found.in.backup.files.cred.leaks", - "mapping_status": "mapped" - } - ] - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "assertions": [ - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 182, - "text": "Gateway recovered through NemoClaw status", - "polarity": "pass", - "normalized_id": "gateway.recovered.through.nemoclaw.status", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 192, - "text": "Gateway start command succeeded", - "polarity": "pass", - "normalized_id": "gateway.start.command.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 204, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 206, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 211, - "text": "NVIDIA_API_KEY is set (starts with nvapi-)", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set.starts.with.nvapi", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 213, - "text": "NVIDIA_API_KEY not set or invalid — required for live inference", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid.required.for.live.inference", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 218, - "text": "Network access to integrate.api.nvidia.com", - "polarity": "pass", - "normalized_id": "network.access.to.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 220, - "text": "Cannot reach integrate.api.nvidia.com", - "polarity": "fail", - "normalized_id": "cannot.reach.integrate.api.nvidia.com", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 225, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 230, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 235, - "text": "Cannot find install.sh at $REPO_ROOT/install.sh", - "polarity": "fail", - "normalized_id": "cannot.find.install.sh.at.repo.root.install.sh", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 238, - "text": "Repo root found: $REPO_ROOT", - "polarity": "pass", - "normalized_id": "repo.root.found.repo.root", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 255, - "text": "Pre-cleanup complete", - "polarity": "pass", - "normalized_id": "pre.cleanup.complete", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 265, - "text": "Could not cd to repo root: $REPO_ROOT", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root.repo.root", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 300, - "text": "install.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 302, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 308, - "text": "nemoclaw on PATH: $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.on.path.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 310, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 316, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 322, - "text": "openshell $OPENSHELL_VERSION >= $MIN_OPENSHELL (gateway resume + SSH secret + state persistence)", - "polarity": "pass", - "normalized_id": "openshell.openshell.version.min.openshell.gateway.resume.ssh.secret.state.persistence", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 324, - "text": "openshell $OPENSHELL_VERSION < $MIN_OPENSHELL — sandbox survival requires $MIN_OPENSHELL+", - "polarity": "fail", - "normalized_id": "openshell.openshell.version.min.openshell.sandbox.survival.requires.min.openshell", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 335, - "text": "NemoClaw registry contains '$SANDBOX_NAME'", - "polarity": "pass", - "normalized_id": "nemoclaw.registry.contains.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 337, - "text": "NemoClaw registry missing '$SANDBOX_NAME' — onboard may have failed", - "polarity": "fail", - "normalized_id": "nemoclaw.registry.missing.sandbox.name.onboard.may.have.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 343, - "text": "nemoclaw list shows '$SANDBOX_NAME'", - "polarity": "pass", - "normalized_id": "nemoclaw.list.shows.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 345, - "text": "nemoclaw list doesn't show '$SANDBOX_NAME': ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.doesn.t.show.sandbox.name.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 351, - "text": "openshell sandbox list shows '$SANDBOX_NAME'", - "polarity": "pass", - "normalized_id": "openshell.sandbox.list.shows.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 353, - "text": "openshell sandbox list doesn't show '$SANDBOX_NAME': ${os_list:0:200}", - "polarity": "fail", - "normalized_id": "openshell.sandbox.list.doesn.t.show.sandbox.name.os.list.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 359, - "text": "nemoclaw $SANDBOX_NAME status exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.sandbox.name.status.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 361, - "text": "nemoclaw $SANDBOX_NAME status failed: ${status_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.sandbox.name.status.failed.status.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 370, - "text": "Could not get SSH config for sandbox", - "polarity": "fail", - "normalized_id": "could.not.get.ssh.config.for.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 373, - "text": "SSH config obtained", - "polarity": "pass", - "normalized_id": "ssh.config.obtained", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 377, - "text": "SSH into sandbox works (baseline)", - "polarity": "pass", - "normalized_id": "ssh.into.sandbox.works.baseline", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 379, - "text": "SSH into sandbox failed (baseline) — cannot continue", - "polarity": "fail", - "normalized_id": "ssh.into.sandbox.failed.baseline.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 417, - "text": "[LIVE] Baseline: model responded with PONG through sandbox", - "polarity": "pass", - "normalized_id": "live.baseline.model.responded.with.pong.through.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 419, - "text": "[LIVE] Baseline: expected PONG after 3 attempts, got: ${baseline_content:0:200}", - "polarity": "fail", - "normalized_id": "live.baseline.expected.pong.after.3.attempts.got.baseline.content.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 438, - "text": "Planted workspace marker: /sandbox/.openclaw/.survival-marker-workspace", - "polarity": "pass", - "normalized_id": "planted.workspace.marker.sandbox.openclaw.survival.marker.workspace", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 440, - "text": "Could not plant workspace marker", - "polarity": "fail", - "normalized_id": "could.not.plant.workspace.marker", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 446, - "text": "Workspace marker verified before restart", - "polarity": "pass", - "normalized_id": "workspace.marker.verified.before.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 448, - "text": "Workspace marker read-back mismatch: expected '$MARKER_VALUE', got '$readback'", - "polarity": "fail", - "normalized_id": "workspace.marker.read.back.mismatch.expected.marker.value.got.readback", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 460, - "text": "Planted agent data marker: /sandbox/.openclaw/.survival-marker", - "polarity": "pass", - "normalized_id": "planted.agent.data.marker.sandbox.openclaw.survival.marker", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 462, - "text": "Could not plant agent data marker", - "polarity": "fail", - "normalized_id": "could.not.plant.agent.data.marker", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 484, - "text": "Planted nested marker: /sandbox/.openclaw/test-data/nested-marker.txt", - "polarity": "pass", - "normalized_id": "planted.nested.marker.sandbox.openclaw.test.data.nested.marker.txt", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 486, - "text": "Could not plant nested workspace marker", - "polarity": "fail", - "normalized_id": "could.not.plant.nested.workspace.marker", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 503, - "text": "Gateway runtime stopped", - "polarity": "pass", - "normalized_id": "gateway.runtime.stopped", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 505, - "text": "Gateway runtime still appears to be running after stop", - "polarity": "fail", - "normalized_id": "gateway.runtime.still.appears.to.be.running.after.stop", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 515, - "text": "Docker container confirmed stopped", - "polarity": "pass", - "normalized_id": "docker.container.confirmed.stopped", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 518, - "text": "Docker container not running", - "polarity": "pass", - "normalized_id": "docker.container.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 520, - "text": "Docker container still running: state=$container_state", - "polarity": "fail", - "normalized_id": "docker.container.still.running.state.container.state", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 523, - "text": "Docker-driver gateway process is not running", - "polarity": "pass", - "normalized_id": "docker.driver.gateway.process.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 545, - "text": "Gateway healthy after restart (attempt $attempt)", - "polarity": "pass", - "normalized_id": "gateway.healthy.after.restart.attempt.attempt", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 547, - "text": "Gateway did not become healthy within 300 seconds", - "polarity": "fail", - "normalized_id": "gateway.did.not.become.healthy.within.300.seconds", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 559, - "text": "openshell sandbox list shows '$SANDBOX_NAME' after restart", - "polarity": "pass", - "normalized_id": "openshell.sandbox.list.shows.sandbox.name.after.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 561, - "text": "openshell sandbox list: '$SANDBOX_NAME' NOT FOUND after restart (#486)", - "polarity": "fail", - "normalized_id": "openshell.sandbox.list.sandbox.name.not.found.after.restart.486", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 576, - "text": "Sandbox pod is '$sandbox_phase' after restart", - "polarity": "pass", - "normalized_id": "sandbox.pod.is.sandbox.phase.after.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 578, - "text": "Sandbox pod did not reach Running/Ready after restart", - "polarity": "fail", - "normalized_id": "sandbox.pod.did.not.reach.running.ready.after.restart", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 584, - "text": "NemoClaw registry still contains '$SANDBOX_NAME' after restart", - "polarity": "pass", - "normalized_id": "nemoclaw.registry.still.contains.sandbox.name.after.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 586, - "text": "NemoClaw registry lost '$SANDBOX_NAME' after restart (#486)", - "polarity": "fail", - "normalized_id": "nemoclaw.registry.lost.sandbox.name.after.restart.486", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 591, - "text": "nemoclaw list shows '$SANDBOX_NAME' after restart", - "polarity": "pass", - "normalized_id": "nemoclaw.list.shows.sandbox.name.after.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 593, - "text": "nemoclaw list doesn't show '$SANDBOX_NAME' after restart: ${list_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.list.doesn.t.show.sandbox.name.after.restart.list.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 611, - "text": "nemoclaw $SANDBOX_NAME status exits 0 after restart (no re-onboard needed)", - "polarity": "pass", - "normalized_id": "nemoclaw.sandbox.name.status.exits.0.after.restart.no.re.onboard.needed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 613, - "text": "nemoclaw $SANDBOX_NAME status TIMED OUT after restart (port forward or SSH recovery hung)", - "polarity": "fail", - "normalized_id": "nemoclaw.sandbox.name.status.timed.out.after.restart.port.forward.or.ssh.recovery.hung", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 615, - "text": "nemoclaw $SANDBOX_NAME status failed after restart (exit $status_exit): ${status_output:0:200}", - "polarity": "fail", - "normalized_id": "nemoclaw.sandbox.name.status.failed.after.restart.exit.status.exit.status.output.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 624, - "text": "Could not get SSH config after restart (#888 handshake failure?)", - "polarity": "fail", - "normalized_id": "could.not.get.ssh.config.after.restart.888.handshake.failure", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 645, - "text": "SSH config available after restart", - "polarity": "pass", - "normalized_id": "ssh.config.available.after.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 661, - "text": "SSH into sandbox works after restart (attempt $ssh_attempt, no handshake failure — #888/#1086)", - "polarity": "pass", - "normalized_id": "ssh.into.sandbox.works.after.restart.attempt.ssh.attempt.no.handshake.failure.888.1086", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 663, - "text": "SSH into sandbox FAILED after restart — handshake verification likely failed (#888/#1086)", - "polarity": "fail", - "normalized_id": "ssh.into.sandbox.failed.after.restart.handshake.verification.likely.failed.888.1086", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 678, - "text": "Workspace marker survived restart: $MARKER_VALUE", - "polarity": "pass", - "normalized_id": "workspace.marker.survived.restart.marker.value", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 680, - "text": "Workspace marker LOST: expected '$MARKER_VALUE', got '${post_restart_marker:-}' (#1086 state loss)", - "polarity": "fail", - "normalized_id": "workspace.marker.lost.expected.marker.value.got.post.restart.marker.empty.1086.state.loss", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 687, - "text": "Agent data marker survived restart", - "polarity": "pass", - "normalized_id": "agent.data.marker.survived.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 689, - "text": "Agent data marker LOST: expected '$MARKER_VALUE', got '${agent_marker:-}' (agent state destroyed)", - "polarity": "fail", - "normalized_id": "agent.data.marker.lost.expected.marker.value.got.agent.marker.empty.agent.state.destroyed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 696, - "text": "Nested workspace marker survived restart", - "polarity": "pass", - "normalized_id": "nested.workspace.marker.survived.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 698, - "text": "Nested workspace marker LOST: expected '$MARKER_VALUE', got '${nested_marker:-}'", - "polarity": "fail", - "normalized_id": "nested.workspace.marker.lost.expected.marker.value.got.nested.marker.empty", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 710, - "text": "Agent data directory still populated after restart", - "polarity": "pass", - "normalized_id": "agent.data.directory.still.populated.after.restart", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 712, - "text": "Agent data directory is empty after restart (@Koneisto overlay wipe)", - "polarity": "fail", - "normalized_id": "agent.data.directory.is.empty.after.restart.koneisto.overlay.wipe", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 752, - "text": "[LIVE] Post-restart: model responded with PONG through sandbox", - "polarity": "pass", - "normalized_id": "live.post.restart.model.responded.with.pong.through.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 756, - "text": "[LIVE] Post-restart: expected PONG after 3 attempts, got: ${post_content:0:200}", - "polarity": "fail", - "normalized_id": "live.post.restart.expected.pong.after.3.attempts.got.post.content.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 771, - "text": "Sandbox '$SANDBOX_NAME' still in registry after destroy", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.still.in.registry.after.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-sandbox-survival.sh", - "line": 773, - "text": "Sandbox '$SANDBOX_NAME' cleaned up", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.cleaned.up", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-shields-config.sh", - "assertions": [ - { - "script": "test/e2e/test-shields-config.sh", - "line": 75, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 77, - "text": "Docker is not running — cannot continue", - "polarity": "fail", - "normalized_id": "docker.is.not.running.cannot.continue", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 82, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 84, - "text": "NVIDIA_API_KEY not set or invalid", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 89, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 94, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 98, - "text": "Prerequisites OK", - "polarity": "pass", - "normalized_id": "prerequisites.ok", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 126, - "text": "install.sh failed (see $INSTALL_LOG)", - "polarity": "fail", - "normalized_id": "install.sh.failed.see.install.log", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 145, - "text": "nemoclaw not on PATH", - "polarity": "fail", - "normalized_id": "nemoclaw.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 149, - "text": "openshell not on PATH", - "polarity": "fail", - "normalized_id": "openshell.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 152, - "text": "NemoClaw installed (sandbox: $SANDBOX_NAME)", - "polarity": "pass", - "normalized_id": "nemoclaw.installed.sandbox.sandbox.name", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 166, - "text": "Config file mode is 660 (mutable default)", - "polarity": "pass", - "normalized_id": "config.file.mode.is.660.mutable.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 168, - "text": "Config file should start as mode 660: ${PERMS}", - "polarity": "fail", - "normalized_id": "config.file.should.start.as.mode.660.perms", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 172, - "text": "Config file owned by sandbox:sandbox (mutable default)", - "polarity": "pass", - "normalized_id": "config.file.owned.by.sandbox.sandbox.mutable.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 174, - "text": "Config file should be owned by sandbox:sandbox: ${PERMS}", - "polarity": "fail", - "normalized_id": "config.file.should.be.owned.by.sandbox.sandbox.perms", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 182, - "text": "Config directory mode is 2770 (mutable default)", - "polarity": "pass", - "normalized_id": "config.directory.mode.is.2770.mutable.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 184, - "text": "Config directory should be mode 2770: ${DIR_PERMS}", - "polarity": "fail", - "normalized_id": "config.directory.should.be.mode.2770.dir.perms", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 188, - "text": "Config directory owned by sandbox:sandbox (mutable default)", - "polarity": "pass", - "normalized_id": "config.directory.owned.by.sandbox.sandbox.mutable.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 190, - "text": "Config directory should be owned by sandbox:sandbox: ${DIR_PERMS}", - "polarity": "fail", - "normalized_id": "config.directory.should.be.owned.by.sandbox.sandbox.dir.perms", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 196, - "text": "Fresh sandbox status reports default mutable state", - "polarity": "pass", - "normalized_id": "fresh.sandbox.status.reports.default.mutable.state", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 198, - "text": "Fresh sandbox status should report NOT CONFIGURED mutable default: ${STATUS_DEFAULT}", - "polarity": "fail", - "normalized_id": "fresh.sandbox.status.should.report.not.configured.mutable.default.status.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 207, - "text": "Unified .openclaw layout has no .openclaw-data mirror or symlink bridge", - "polarity": "pass", - "normalized_id": "unified.openclaw.layout.has.no.openclaw.data.mirror.or.symlink.bridge", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 209, - "text": "Legacy .openclaw-data layout should not exist: ${LAYOUT_CHECK}", - "polarity": "fail", - "normalized_id": "legacy.openclaw.data.layout.should.not.exist.layout.check", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 221, - "text": "shields up succeeded", - "polarity": "pass", - "normalized_id": "shields.up.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 223, - "text": "shields up did not report success: ${SHIELDS_UP_OUTPUT}", - "polarity": "fail", - "normalized_id": "shields.up.did.not.report.success.shields.up.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 232, - "text": "Config file has restrictive permissions after shields up (${PERMS_UP})", - "polarity": "pass", - "normalized_id": "config.file.has.restrictive.permissions.after.shields.up.perms.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 234, - "text": "Config file should be locked after shields up: ${PERMS_UP}", - "polarity": "fail", - "normalized_id": "config.file.should.be.locked.after.shields.up.perms.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 239, - "text": "Config file ownership changed to root:root", - "polarity": "pass", - "normalized_id": "config.file.ownership.changed.to.root.root", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 241, - "text": "Config file ownership not changed to root:root: ${OWNER_UP}", - "polarity": "fail", - "normalized_id": "config.file.ownership.not.changed.to.root.root.owner.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 249, - "text": "Config file is read-only for sandbox user (shields UP)", - "polarity": "pass", - "normalized_id": "config.file.is.read.only.for.sandbox.user.shields.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 251, - "text": "Config file write rejected by OS (shields UP)", - "polarity": "pass", - "normalized_id": "config.file.write.rejected.by.os.shields.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 253, - "text": "Config file should be immutable but sandbox could write: ${WRITE_RESULT}", - "polarity": "fail", - "normalized_id": "config.file.should.be.immutable.but.sandbox.could.write.write.result", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 260, - "text": "Workspace state is read-only for sandbox user (shields UP)", - "polarity": "pass", - "normalized_id": "workspace.state.is.read.only.for.sandbox.user.shields.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 262, - "text": "Workspace write rejected by OS (shields UP)", - "polarity": "pass", - "normalized_id": "workspace.write.rejected.by.os.shields.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 264, - "text": "Workspace should be locked after shields up: ${WORKSPACE_WRITE_RESULT}", - "polarity": "fail", - "normalized_id": "workspace.should.be.locked.after.shields.up.workspace.write.result", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 275, - "text": "config get returns JSON", - "polarity": "pass", - "normalized_id": "config.get.returns.json", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 277, - "text": "config get did not return JSON: ${CONFIG_GET_OUTPUT}", - "polarity": "fail", - "normalized_id": "config.get.did.not.return.json.config.get.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 282, - "text": "config get leaks credentials", - "polarity": "fail", - "normalized_id": "config.get.leaks.credentials", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 284, - "text": "config get output has no credential leaks", - "polarity": "pass", - "normalized_id": "config.get.output.has.no.credential.leaks", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 289, - "text": "config get should strip gateway section", - "polarity": "fail", - "normalized_id": "config.get.should.strip.gateway.section", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 291, - "text": "config get strips gateway section", - "polarity": "pass", - "normalized_id": "config.get.strips.gateway.section", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 297, - "text": "config get --key dotpath works", - "polarity": "pass", - "normalized_id": "config.get.key.dotpath.works", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 311, - "text": "shields status reports UP", - "polarity": "pass", - "normalized_id": "shields.status.reports.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 313, - "text": "shields status should show UP: ${STATUS_OUTPUT}", - "polarity": "fail", - "normalized_id": "shields.status.should.show.up.status.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 326, - "text": "shields down succeeded", - "polarity": "pass", - "normalized_id": "shields.down.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 328, - "text": "shields down did not report success: ${SHIELDS_DOWN_OUTPUT}", - "polarity": "fail", - "normalized_id": "shields.down.did.not.report.success.shields.down.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 338, - "text": "Config file mode is 660 (restored to mutable default)", - "polarity": "pass", - "normalized_id": "config.file.mode.is.660.restored.to.mutable.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 340, - "text": "Config file should be mode 660 after shields down: ${PERMS_DOWN}", - "polarity": "fail", - "normalized_id": "config.file.should.be.mode.660.after.shields.down.perms.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 344, - "text": "Config file owned by sandbox:sandbox after shields down", - "polarity": "pass", - "normalized_id": "config.file.owned.by.sandbox.sandbox.after.shields.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 346, - "text": "Config file should be owned by sandbox:sandbox: ${PERMS_DOWN}", - "polarity": "fail", - "normalized_id": "config.file.should.be.owned.by.sandbox.sandbox.perms.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 354, - "text": "Config directory mode is 2770 (restored to mutable default)", - "polarity": "pass", - "normalized_id": "config.directory.mode.is.2770.restored.to.mutable.default", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 356, - "text": "Config directory should be mode 2770 after shields down: ${DIR_PERMS_DOWN}", - "polarity": "fail", - "normalized_id": "config.directory.should.be.mode.2770.after.shields.down.dir.perms.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 360, - "text": "Config directory owned by sandbox:sandbox after shields down", - "polarity": "pass", - "normalized_id": "config.directory.owned.by.sandbox.sandbox.after.shields.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 362, - "text": "Config directory should be owned by sandbox:sandbox: ${DIR_PERMS_DOWN}", - "polarity": "fail", - "normalized_id": "config.directory.should.be.owned.by.sandbox.sandbox.dir.perms.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 368, - "text": "Workspace state is writable again after shields down", - "polarity": "pass", - "normalized_id": "workspace.state.is.writable.again.after.shields.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 370, - "text": "Workspace should be writable after shields down: ${WORKSPACE_DOWN_RESULT}", - "polarity": "fail", - "normalized_id": "workspace.should.be.writable.after.shields.down.workspace.down.result", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 382, - "text": "shields status reports DOWN", - "polarity": "pass", - "normalized_id": "shields.status.reports.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 384, - "text": "shields status should show DOWN: ${STATUS_DOWN}", - "polarity": "fail", - "normalized_id": "shields.status.should.show.down.status.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 388, - "text": "shields status shows reason", - "polarity": "pass", - "normalized_id": "shields.status.shows.reason", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 390, - "text": "shields status should show reason: ${STATUS_DOWN}", - "polarity": "fail", - "normalized_id": "shields.status.should.show.reason.status.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 394, - "text": "shields status shows timeout remaining", - "polarity": "pass", - "normalized_id": "shields.status.shows.timeout.remaining", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 402, - "text": "shields up restored for audit trail test", - "polarity": "pass", - "normalized_id": "shields.up.restored.for.audit.trail.test", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 405, - "text": "Failed to restore shields up before audit phase: ${RESTORE_UP_OUTPUT}", - "polarity": "fail", - "normalized_id": "failed.to.restore.shields.up.before.audit.phase.restore.up.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 422, - "text": "Audit has ≥2 shields_up entries (got ${UP_COUNT})", - "polarity": "pass", - "normalized_id": "audit.has.2.shields.up.entries.got.up.count", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 424, - "text": "Expected ≥2 shields_up audit entries, got ${UP_COUNT}", - "polarity": "fail", - "normalized_id": "expected.2.shields.up.audit.entries.got.up.count", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 428, - "text": "Audit has ≥1 shields_down entries (got ${DOWN_COUNT})", - "polarity": "pass", - "normalized_id": "audit.has.1.shields.down.entries.got.down.count", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 430, - "text": "Expected ≥1 shields_down audit entries, got ${DOWN_COUNT}", - "polarity": "fail", - "normalized_id": "expected.1.shields.down.audit.entries.got.down.count", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 435, - "text": "Audit trail contains credentials", - "polarity": "fail", - "normalized_id": "audit.trail.contains.credentials", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 437, - "text": "Audit trail is credential-free", - "polarity": "pass", - "normalized_id": "audit.trail.is.credential.free", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 449, - "text": "All audit entries are valid JSON", - "polarity": "pass", - "normalized_id": "all.audit.entries.are.valid.json", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 451, - "text": "${INVALID_JSON} audit entries are invalid JSON", - "polarity": "fail", - "normalized_id": "invalid.json.audit.entries.are.invalid.json", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 454, - "text": "Audit file not found: $AUDIT_FILE", - "polarity": "fail", - "normalized_id": "audit.file.not.found.audit.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 469, - "text": "shields down with 10s timeout", - "polarity": "pass", - "normalized_id": "shields.down.with.10s.timeout", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 471, - "text": "shields should be DOWN: ${STATUS_TIMER}", - "polarity": "fail", - "normalized_id": "shields.should.be.down.status.timer", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 486, - "text": "Auto-restore timer re-locked config after timeout", - "polarity": "pass", - "normalized_id": "auto.restore.timer.re.locked.config.after.timeout", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 490, - "text": "Auto-restore timer did not re-lock within 60s", - "polarity": "fail", - "normalized_id": "auto.restore.timer.did.not.re.lock.within.60s", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 497, - "text": "Config locked after auto-restore (${PERMS_TIMER})", - "polarity": "pass", - "normalized_id": "config.locked.after.auto.restore.perms.timer", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 499, - "text": "Config should be locked after auto-restore, got: ${PERMS_TIMER}", - "polarity": "fail", - "normalized_id": "config.should.be.locked.after.auto.restore.got.perms.timer", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 511, - "text": "Double shields-up rejected", - "polarity": "pass", - "normalized_id": "double.shields.up.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 513, - "text": "Double shields-up should be rejected: ${DOUBLE_UP}", - "polarity": "fail", - "normalized_id": "double.shields.up.should.be.rejected.double.up", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 517, - "text": "Cleanup: shields down", - "polarity": "pass", - "normalized_id": "cleanup.shields.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 527, - "text": "Double shields-down rejected", - "polarity": "pass", - "normalized_id": "double.shields.down.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 529, - "text": "Double shields-down should be rejected: ${DOUBLE_DOWN}", - "polarity": "fail", - "normalized_id": "double.shields.down.should.be.rejected.double.down", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-shields-config.sh", - "line": 538, - "text": "Sandbox destroyed", - "polarity": "pass", - "normalized_id": "sandbox.destroyed", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "assertions": [ - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 92, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 95, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 98, - "text": "NVIDIA_API_KEY not set or invalid", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set.or.invalid", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 101, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 104, - "text": "Could not cd to repo root", - "polarity": "fail", - "normalized_id": "could.not.cd.to.repo.root", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 133, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 137, - "text": "NemoClaw installed", - "polarity": "pass", - "normalized_id": "nemoclaw.installed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 140, - "text": "nemoclaw not on PATH", - "polarity": "fail", - "normalized_id": "nemoclaw.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 144, - "text": "openshell not on PATH", - "polarity": "fail", - "normalized_id": "openshell.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 147, - "text": "CLIs on PATH", - "polarity": "pass", - "normalized_id": "clis.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 159, - "text": "Failed to inject ${SKILL_ID}", - "polarity": "fail", - "normalized_id": "failed.to.inject.skill.id", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 162, - "text": "${SKILL_ID} injected and queryable", - "polarity": "pass", - "normalized_id": "skill.id.injected.and.queryable", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 190, - "text": "Agent returned ${VERIFY_PHRASE} (attempt ${attempt}/${MAX_ATTEMPTS})", - "polarity": "pass", - "normalized_id": "agent.returned.verify.phrase.attempt.attempt.max.attempts", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 206, - "text": "Agent returned ${VERIFY_PHRASE} via fuzzy match (attempt ${attempt}/${MAX_ATTEMPTS})", - "polarity": "pass", - "normalized_id": "agent.returned.verify.phrase.via.fuzzy.match.attempt.attempt.max.attempts", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-skill-agent-e2e.sh", - "line": 224, - "text": "$last_fail", - "polarity": "fail", - "normalized_id": "last.fail", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "assertions": [ - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 83, - "text": "NVIDIA_API_KEY is required", - "polarity": "fail", - "normalized_id": "nvidia.api.key.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 84, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 118, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 119, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 120, - "text": "NemoClaw installed", - "polarity": "pass", - "normalized_id": "nemoclaw.installed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 127, - "text": "Failed to write marker file", - "polarity": "fail", - "normalized_id": "failed.to.write.marker.file", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 130, - "text": "Marker verification failed: got '${VERIFY}'", - "polarity": "fail", - "normalized_id": "marker.verification.failed.got.verify", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 132, - "text": "Marker file written", - "polarity": "pass", - "normalized_id": "marker.file.written", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 149, - "text": "snapshot create exited with code $_CAPTURE_RC: ${SNAPSHOT_OUTPUT}", - "polarity": "fail", - "normalized_id": "snapshot.create.exited.with.code.capture.rc.snapshot.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 156, - "text": "snapshot create succeeded", - "polarity": "pass", - "normalized_id": "snapshot.create.succeeded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 158, - "text": "snapshot create did not report success: ${SNAPSHOT_OUTPUT}", - "polarity": "fail", - "normalized_id": "snapshot.create.did.not.report.success.snapshot.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 172, - "text": "snapshot list exited with code $_CAPTURE_RC: ${LIST_OUTPUT}", - "polarity": "fail", - "normalized_id": "snapshot.list.exited.with.code.capture.rc.list.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 176, - "text": "snapshot list shows snapshots", - "polarity": "pass", - "normalized_id": "snapshot.list.shows.snapshots", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 178, - "text": "snapshot list shows no snapshots: ${LIST_OUTPUT}", - "polarity": "fail", - "normalized_id": "snapshot.list.shows.no.snapshots.list.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 183, - "text": "Failed to parse a snapshot timestamp from list output: ${LIST_OUTPUT}", - "polarity": "fail", - "normalized_id": "failed.to.parse.a.snapshot.timestamp.from.list.output.list.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 191, - "text": "Failed to modify sandbox state", - "polarity": "fail", - "normalized_id": "failed.to.modify.sandbox.state", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 195, - "text": "First marker should be deleted but got: ${GONE}", - "polarity": "fail", - "normalized_id": "first.marker.should.be.deleted.but.got.gone", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 199, - "text": "Second snapshot create failed (code $_CAPTURE_RC): ${_SECOND_SNAP}", - "polarity": "fail", - "normalized_id": "second.snapshot.create.failed.code.capture.rc.second.snap", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 201, - "text": "State modified, second snapshot created", - "polarity": "pass", - "normalized_id": "state.modified.second.snapshot.created", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 206, - "text": "Failed to perturb sandbox before latest restore", - "polarity": "fail", - "normalized_id": "failed.to.perturb.sandbox.before.latest.restore", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 215, - "text": "snapshot restore exited with code $_CAPTURE_RC: ${RESTORE_OUTPUT}", - "polarity": "fail", - "normalized_id": "snapshot.restore.exited.with.code.capture.rc.restore.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 219, - "text": "snapshot restore did not report success: ${RESTORE_OUTPUT}", - "polarity": "fail", - "normalized_id": "snapshot.restore.did.not.report.success.restore.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 223, - "text": "Latest restore did not recover the second marker: ${SECOND_CHECK}", - "polarity": "fail", - "normalized_id": "latest.restore.did.not.recover.the.second.marker.second.check", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 224, - "text": "Latest snapshot restored expected state", - "polarity": "pass", - "normalized_id": "latest.snapshot.restored.expected.state", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 233, - "text": "targeted snapshot restore exited with code $_CAPTURE_RC: ${TARGETED_OUTPUT}", - "polarity": "fail", - "normalized_id": "targeted.snapshot.restore.exited.with.code.capture.rc.targeted.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 237, - "text": "targeted snapshot restore did not report success: ${TARGETED_OUTPUT}", - "polarity": "fail", - "normalized_id": "targeted.snapshot.restore.did.not.report.success.targeted.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 241, - "text": "First snapshot did not restore the original marker: ${FIRST_CHECK}", - "polarity": "fail", - "normalized_id": "first.snapshot.did.not.restore.the.original.marker.first.check", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 243, - "text": "First snapshot should not contain the second marker", - "polarity": "fail", - "normalized_id": "first.snapshot.should.not.contain.the.second.marker", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 244, - "text": "First snapshot restored expected state", - "polarity": "pass", - "normalized_id": "first.snapshot.restored.expected.state", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 260, - "text": "No credentials in snapshot directories", - "polarity": "pass", - "normalized_id": "no.credentials.in.snapshot.directories", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 262, - "text": "Credentials found: $CRED_LEAKS", - "polarity": "fail", - "normalized_id": "credentials.found.cred.leaks", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 265, - "text": "Backup directory missing: $BACKUP_DIR", - "polarity": "fail", - "normalized_id": "backup.directory.missing.backup.dir", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 273, - "text": "snapshot help exited with code $_CAPTURE_RC: ${HELP_OUTPUT}", - "polarity": "fail", - "normalized_id": "snapshot.help.exited.with.code.capture.rc.help.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 278, - "text": "snapshot help shows create/list/restore", - "polarity": "pass", - "normalized_id": "snapshot.help.shows.create.list.restore", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-snapshot-commands.sh", - "line": 280, - "text": "snapshot help incomplete: ${HELP_OUTPUT}", - "polarity": "fail", - "normalized_id": "snapshot.help.incomplete.help.output", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-spark-install.sh", - "assertions": [ - { - "script": "test/e2e/test-spark-install.sh", - "line": 59, - "text": "Running on Linux", - "polarity": "pass", - "normalized_id": "running.on.linux", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 61, - "text": "This script is for DGX Spark (Linux). On other OS use Vitest: NEMOCLAW_E2E_SPARK_INSTALL=1 --project spark-install-cli (skipped there on non-Linux).", - "polarity": "fail", - "normalized_id": "this.script.is.for.dgx.spark.linux.on.other.os.use.vitest.nemoclaw.e2e.spark.install.1.project.spark.install.cli.skipped.there.on.non.linux", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 67, - "text": "Docker is running", - "polarity": "pass", - "normalized_id": "docker.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 69, - "text": "Docker is not running", - "polarity": "fail", - "normalized_id": "docker.is.not.running", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 74, - "text": "NEMOCLAW_NON_INTERACTIVE=1", - "polarity": "pass", - "normalized_id": "nemoclaw.non.interactive.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 76, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 81, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1", - "polarity": "pass", - "normalized_id": "nemoclaw.accept.third.party.software.1", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 83, - "text": "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 is required for non-interactive install", - "polarity": "fail", - "normalized_id": "nemoclaw.accept.third.party.software.1.is.required.for.non.interactive.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 89, - "text": "cd to repo: $REPO", - "polarity": "fail", - "normalized_id": "cd.to.repo.repo", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 93, - "text": "Using generic installer flow without Spark-specific setup", - "polarity": "pass", - "normalized_id": "using.generic.installer.flow.without.spark.specific.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 114, - "text": "install failed (exit $install_exit); last 80 lines of log:", - "polarity": "fail", - "normalized_id": "install.failed.exit.install.exit.last.80.lines.of.log", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 118, - "text": "install completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 135, - "text": "nemoclaw on PATH ($(command -v nemoclaw))", - "polarity": "pass", - "normalized_id": "nemoclaw.on.path.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 137, - "text": "nemoclaw not on PATH", - "polarity": "fail", - "normalized_id": "nemoclaw.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 142, - "text": "openshell on PATH", - "polarity": "pass", - "normalized_id": "openshell.on.path", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 144, - "text": "openshell not on PATH", - "polarity": "fail", - "normalized_id": "openshell.not.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 149, - "text": "nemoclaw --help exits 0", - "polarity": "pass", - "normalized_id": "nemoclaw.help.exits.0", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-spark-install.sh", - "line": 151, - "text": "nemoclaw --help failed", - "polarity": "fail", - "normalized_id": "nemoclaw.help.failed", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "assertions": [ - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 186, - "text": "TC-STATE-01: Setup", - "polarity": "fail", - "normalized_id": "tc.state.01.setup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 197, - "text": "TC-STATE-01: Backup completed successfully", - "polarity": "pass", - "normalized_id": "tc.state.01.backup.completed.successfully", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 199, - "text": "TC-STATE-01: Backup", - "polarity": "fail", - "normalized_id": "tc.state.01.backup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 207, - "text": "TC-STATE-01: Backup dir", - "polarity": "fail", - "normalized_id": "tc.state.01.backup.dir", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 225, - "text": "TC-STATE-01: BackupCaptureFiles", - "polarity": "fail", - "normalized_id": "tc.state.01.backupcapturefiles", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 228, - "text": "TC-STATE-01: BackupCaptureFiles — 5/5 .md files captured in host backup", - "polarity": "pass", - "normalized_id": "tc.state.01.backupcapturefiles.5.5.md.files.captured.in.host.backup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 232, - "text": "TC-STATE-01: BackupCaptureDir", - "polarity": "fail", - "normalized_id": "tc.state.01.backupcapturedir", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 236, - "text": "TC-STATE-01: BackupCaptureDir", - "polarity": "fail", - "normalized_id": "tc.state.01.backupcapturedir", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 239, - "text": "TC-STATE-01: BackupCaptureDir — memory directory captured in host backup", - "polarity": "pass", - "normalized_id": "tc.state.01.backupcapturedir.memory.directory.captured.in.host.backup", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 262, - "text": "TC-STATE-01: Destroy", - "polarity": "fail", - "normalized_id": "tc.state.01.destroy", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 265, - "text": "TC-STATE-01: Sandbox destroyed", - "polarity": "pass", - "normalized_id": "tc.state.01.sandbox.destroyed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 269, - "text": "TC-STATE-01: Re-onboard", - "polarity": "fail", - "normalized_id": "tc.state.01.re.onboard", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 272, - "text": "TC-STATE-01: Sandbox re-onboarded", - "polarity": "pass", - "normalized_id": "tc.state.01.sandbox.re.onboarded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 280, - "text": "TC-STATE-01: Restore completed successfully", - "polarity": "pass", - "normalized_id": "tc.state.01.restore.completed.successfully", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 282, - "text": "TC-STATE-01: Restore", - "polarity": "fail", - "normalized_id": "tc.state.01.restore", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 299, - "text": "TC-STATE-01: FilesRestore — ${files_restored}/5 workspace files restored correctly", - "polarity": "pass", - "normalized_id": "tc.state.01.filesrestore.files.restored.5.workspace.files.restored.correctly", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 301, - "text": "TC-STATE-01: FilesRestore", - "polarity": "fail", - "normalized_id": "tc.state.01.filesrestore", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 311, - "text": "TC-STATE-01: MemoryDirRestore — memory directory contents restored correctly", - "polarity": "pass", - "normalized_id": "tc.state.01.memorydirrestore.memory.directory.contents.restored.correctly", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 314, - "text": "TC-STATE-01: MemoryDirRestore", - "polarity": "fail", - "normalized_id": "tc.state.01.memorydirrestore", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 318, - "text": "TC-STATE-01: MemoryDirRestore", - "polarity": "fail", - "normalized_id": "tc.state.01.memorydirrestore", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 339, - "text": "$PASS${NC}", - "polarity": "pass", - "normalized_id": "pass.nc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-state-backup-restore.sh", - "line": 340, - "text": "$FAIL${NC}", - "polarity": "fail", - "normalized_id": "fail.nc", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "assertions": [ - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 149, - "text": "NVIDIA_API_KEY not set", - "polarity": "fail", - "normalized_id": "nvidia.api.key.not.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 152, - "text": "NVIDIA_API_KEY is set", - "polarity": "pass", - "normalized_id": "nvidia.api.key.is.set", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 155, - "text": "openshell not found on PATH", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 158, - "text": "openshell found", - "polarity": "pass", - "normalized_id": "openshell.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 161, - "text": "nemoclaw not found on PATH", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 164, - "text": "nemoclaw found", - "polarity": "pass", - "normalized_id": "nemoclaw.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 168, - "text": "Sandbox '${SANDBOX_NAME}' is running", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.is.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 170, - "text": "Sandbox '${SANDBOX_NAME}' not running — run test-full-e2e.sh first", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.not.running.run.test.full.e2e.sh.first", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 211, - "text": "T1: \\$(command) substitution was NOT executed", - "polarity": "pass", - "normalized_id": "t1.command.substitution.was.not.executed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 213, - "text": "T1: \\$(command) substitution was EXECUTED — injection successful!", - "polarity": "fail", - "normalized_id": "t1.command.substitution.was.executed.injection.successful", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 235, - "text": "T2: Backtick command substitution was NOT executed", - "polarity": "pass", - "normalized_id": "t2.backtick.command.substitution.was.not.executed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 237, - "text": "T2: Backtick command substitution was EXECUTED — injection successful!", - "polarity": "fail", - "normalized_id": "t2.backtick.command.substitution.was.executed.injection.successful", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 264, - "text": "T3: Single-quote breakout was NOT exploitable", - "polarity": "pass", - "normalized_id": "t3.single.quote.breakout.was.not.exploitable", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 266, - "text": "T3: Single-quote breakout was EXECUTED — injection successful!", - "polarity": "fail", - "normalized_id": "t3.single.quote.breakout.was.executed.injection.successful", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 292, - "text": "T4: \\${NVIDIA_API_KEY} expanded to actual key value — secret leaked!", - "polarity": "fail", - "normalized_id": "t4.nvidia.api.key.expanded.to.actual.key.value.secret.leaked", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 294, - "text": "T4: \\${NVIDIA_API_KEY} treated as literal string (not expanded)", - "polarity": "pass", - "normalized_id": "t4.nvidia.api.key.treated.as.literal.string.not.expanded", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 297, - "text": "T4: \\${NVIDIA_API_KEY} did not expand to key value (result: ${t4_result:0:100})", - "polarity": "pass", - "normalized_id": "t4.nvidia.api.key.did.not.expand.to.key.value.result.t4.result.0.100", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 334, - "text": "T5: NVIDIA_API_KEY found in HOST process table", - "polarity": "fail", - "normalized_id": "t5.nvidia.api.key.found.in.host.process.table", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 336, - "text": "T5: NVIDIA_API_KEY found in SANDBOX process table", - "polarity": "fail", - "normalized_id": "t5.nvidia.api.key.found.in.sandbox.process.table", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 338, - "text": "T5: API key not visible in process tables (host or sandbox)", - "polarity": "pass", - "normalized_id": "t5.api.key.not.visible.in.process.tables.host.or.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 363, - "text": "T6: SANDBOX_NAME 'foo;rm -rf /' rejected by validateName()", - "polarity": "pass", - "normalized_id": "t6.sandbox.name.foo.rm.rf.rejected.by.validatename", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 365, - "text": "T6: SANDBOX_NAME 'foo;rm -rf /' was ACCEPTED — validation bypass!", - "polarity": "fail", - "normalized_id": "t6.sandbox.name.foo.rm.rf.was.accepted.validation.bypass", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 382, - "text": "T7: SANDBOX_NAME '--help' rejected (option injection prevented)", - "polarity": "pass", - "normalized_id": "t7.sandbox.name.help.rejected.option.injection.prevented", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 384, - "text": "T7: SANDBOX_NAME '--help' was ACCEPTED — option injection possible!", - "polarity": "fail", - "normalized_id": "t7.sandbox.name.help.was.accepted.option.injection.possible", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 401, - "text": "T6/T7 extra: SANDBOX_NAME '${invalid_name}' correctly rejected", - "polarity": "pass", - "normalized_id": "t6.t7.extra.sandbox.name.invalid.name.correctly.rejected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 403, - "text": "T6/T7 extra: SANDBOX_NAME '${invalid_name}' was ACCEPTED", - "polarity": "fail", - "normalized_id": "t6.t7.extra.sandbox.name.invalid.name.was.accepted", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 429, - "text": "T8: Normal message passed through correctly", - "polarity": "pass", - "normalized_id": "t8.normal.message.passed.through.correctly", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 431, - "text": "T8: Normal message was not echoed back correctly (got: ${t8_result:0:200})", - "polarity": "fail", - "normalized_id": "t8.normal.message.was.not.echoed.back.correctly.got.t8.result.0.200", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 453, - "text": "T8b: Message with special characters processed without error", - "polarity": "pass", - "normalized_id": "t8b.message.with.special.characters.processed.without.error", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-telegram-injection.sh", - "line": 455, - "text": "T8b: Message with special characters caused empty/error response", - "polarity": "fail", - "normalized_id": "t8b.message.with.special.characters.caused.empty.error.response", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-token-rotation.sh", - "assertions": [ - { - "script": "test/e2e/test-token-rotation.sh", - "line": 196, - "text": "install.sh completed (exit 0)", - "polarity": "pass", - "normalized_id": "install.sh.completed.exit.0", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 203, - "text": "install.sh failed (exit $install_exit)", - "polarity": "fail", - "normalized_id": "install.sh.failed.exit.install.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 212, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 215, - "text": "openshell installed ($(openshell --version 2>&1 || echo unknown))", - "polarity": "pass", - "normalized_id": "openshell.installed.openshell.version.2.1.echo.unknown", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 218, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 221, - "text": "nemoclaw installed at $(command -v nemoclaw)", - "polarity": "pass", - "normalized_id": "nemoclaw.installed.at.command.v.nemoclaw", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 239, - "text": "Sandbox $SANDBOX_NAME created and running", - "polarity": "pass", - "normalized_id": "sandbox.sandbox.name.created.and.running", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 241, - "text": "Sandbox $SANDBOX_NAME not running after first onboard", - "polarity": "fail", - "normalized_id": "sandbox.sandbox.name.not.running.after.first.onboard", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 245, - "text": "Provider ${SANDBOX_NAME}-telegram-bridge exists", - "polarity": "pass", - "normalized_id": "provider.sandbox.name.telegram.bridge.exists", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 247, - "text": "Provider ${SANDBOX_NAME}-telegram-bridge not found", - "polarity": "fail", - "normalized_id": "provider.sandbox.name.telegram.bridge.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 251, - "text": "Provider ${SANDBOX_NAME}-discord-bridge exists", - "polarity": "pass", - "normalized_id": "provider.sandbox.name.discord.bridge.exists", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 253, - "text": "Provider ${SANDBOX_NAME}-discord-bridge not found", - "polarity": "fail", - "normalized_id": "provider.sandbox.name.discord.bridge.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 257, - "text": "Provider ${SANDBOX_NAME}-slack-bridge exists", - "polarity": "pass", - "normalized_id": "provider.sandbox.name.slack.bridge.exists", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 259, - "text": "Provider ${SANDBOX_NAME}-slack-bridge not found", - "polarity": "fail", - "normalized_id": "provider.sandbox.name.slack.bridge.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 263, - "text": "Provider ${SANDBOX_NAME}-slack-app exists", - "polarity": "pass", - "normalized_id": "provider.sandbox.name.slack.app.exists", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 265, - "text": "Provider ${SANDBOX_NAME}-slack-app not found", - "polarity": "fail", - "normalized_id": "provider.sandbox.name.slack.app.not.found", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 274, - "text": "Telegram credential hash stored for $SANDBOX_NAME", - "polarity": "pass", - "normalized_id": "telegram.credential.hash.stored.for.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 276, - "text": "Telegram credential hash not found for $SANDBOX_NAME in registry", - "polarity": "fail", - "normalized_id": "telegram.credential.hash.not.found.for.sandbox.name.in.registry", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 284, - "text": "Discord credential hash stored for $SANDBOX_NAME", - "polarity": "pass", - "normalized_id": "discord.credential.hash.stored.for.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 286, - "text": "Discord credential hash not found for $SANDBOX_NAME in registry", - "polarity": "fail", - "normalized_id": "discord.credential.hash.not.found.for.sandbox.name.in.registry", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 294, - "text": "Slack bot credential hash stored for $SANDBOX_NAME", - "polarity": "pass", - "normalized_id": "slack.bot.credential.hash.stored.for.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 296, - "text": "Slack bot credential hash not found for $SANDBOX_NAME in registry", - "polarity": "fail", - "normalized_id": "slack.bot.credential.hash.not.found.for.sandbox.name.in.registry", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 304, - "text": "Slack app credential hash stored for $SANDBOX_NAME", - "polarity": "pass", - "normalized_id": "slack.app.credential.hash.stored.for.sandbox.name", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 306, - "text": "Slack app credential hash not found for $SANDBOX_NAME in registry", - "polarity": "fail", - "normalized_id": "slack.app.credential.hash.not.found.for.sandbox.name.in.registry", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 323, - "text": "Phase 2 onboard failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "phase.2.onboard.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 328, - "text": "Credential rotation detected", - "polarity": "pass", - "normalized_id": "credential.rotation.detected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 330, - "text": "Credential rotation not detected in onboard output", - "polarity": "fail", - "normalized_id": "credential.rotation.not.detected.in.onboard.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 339, - "text": "Rotation message identifies telegram-bridge", - "polarity": "pass", - "normalized_id": "rotation.message.identifies.telegram.bridge", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 341, - "text": "Rotation message did not identify telegram-bridge", - "polarity": "fail", - "normalized_id": "rotation.message.did.not.identify.telegram.bridge", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 347, - "text": "Rotation message unexpectedly named discord-bridge (Discord token did not change)", - "polarity": "fail", - "normalized_id": "rotation.message.unexpectedly.named.discord.bridge.discord.token.did.not.change", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 351, - "text": "Rotation message did not name discord-bridge (Discord unchanged)", - "polarity": "pass", - "normalized_id": "rotation.message.did.not.name.discord.bridge.discord.unchanged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 355, - "text": "Rotation message unexpectedly named slack-bridge/slack-app (Slack tokens did not change)", - "polarity": "fail", - "normalized_id": "rotation.message.unexpectedly.named.slack.bridge.slack.app.slack.tokens.did.not.change", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 359, - "text": "Rotation message did not name slack-bridge or slack-app (Slack unchanged)", - "polarity": "pass", - "normalized_id": "rotation.message.did.not.name.slack.bridge.or.slack.app.slack.unchanged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 363, - "text": "Sandbox rebuild triggered by rotation", - "polarity": "pass", - "normalized_id": "sandbox.rebuild.triggered.by.rotation", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 365, - "text": "Sandbox rebuild not triggered", - "polarity": "fail", - "normalized_id": "sandbox.rebuild.not.triggered", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 371, - "text": "Sandbox running after Telegram rotation", - "polarity": "pass", - "normalized_id": "sandbox.running.after.telegram.rotation", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 373, - "text": "Sandbox not running after Telegram rotation", - "polarity": "fail", - "normalized_id": "sandbox.not.running.after.telegram.rotation", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 384, - "text": "Phase 3 onboard failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "phase.3.onboard.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 389, - "text": "Sandbox reused when tokens unchanged", - "polarity": "pass", - "normalized_id": "sandbox.reused.when.tokens.unchanged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 391, - "text": "Sandbox was not reused (unexpected rebuild)", - "polarity": "fail", - "normalized_id": "sandbox.was.not.reused.unexpected.rebuild", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 409, - "text": "Phase 4 onboard failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "phase.4.onboard.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 414, - "text": "Credential rotation detected", - "polarity": "pass", - "normalized_id": "credential.rotation.detected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 416, - "text": "Credential rotation not detected in onboard output", - "polarity": "fail", - "normalized_id": "credential.rotation.not.detected.in.onboard.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 423, - "text": "Rotation message identifies discord-bridge", - "polarity": "pass", - "normalized_id": "rotation.message.identifies.discord.bridge", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 425, - "text": "Rotation message did not identify discord-bridge", - "polarity": "fail", - "normalized_id": "rotation.message.did.not.identify.discord.bridge", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 431, - "text": "Rotation message unexpectedly named telegram-bridge (Telegram token did not change)", - "polarity": "fail", - "normalized_id": "rotation.message.unexpectedly.named.telegram.bridge.telegram.token.did.not.change", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 435, - "text": "Rotation message did not name telegram-bridge (Telegram unchanged)", - "polarity": "pass", - "normalized_id": "rotation.message.did.not.name.telegram.bridge.telegram.unchanged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 439, - "text": "Rotation message unexpectedly named slack-bridge/slack-app (Slack tokens did not change)", - "polarity": "fail", - "normalized_id": "rotation.message.unexpectedly.named.slack.bridge.slack.app.slack.tokens.did.not.change", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 443, - "text": "Rotation message did not name slack-bridge or slack-app (Slack unchanged)", - "polarity": "pass", - "normalized_id": "rotation.message.did.not.name.slack.bridge.or.slack.app.slack.unchanged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 447, - "text": "Sandbox rebuild triggered by rotation", - "polarity": "pass", - "normalized_id": "sandbox.rebuild.triggered.by.rotation", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 449, - "text": "Sandbox rebuild not triggered", - "polarity": "fail", - "normalized_id": "sandbox.rebuild.not.triggered", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 455, - "text": "Sandbox running after Discord rotation", - "polarity": "pass", - "normalized_id": "sandbox.running.after.discord.rotation", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 457, - "text": "Sandbox not running after Discord rotation", - "polarity": "fail", - "normalized_id": "sandbox.not.running.after.discord.rotation", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 468, - "text": "Phase 5 onboard failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "phase.5.onboard.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 473, - "text": "Sandbox reused when tokens unchanged", - "polarity": "pass", - "normalized_id": "sandbox.reused.when.tokens.unchanged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 475, - "text": "Sandbox was not reused (unexpected rebuild)", - "polarity": "fail", - "normalized_id": "sandbox.was.not.reused.unexpected.rebuild", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 493, - "text": "Phase 6 onboard failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "phase.6.onboard.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 498, - "text": "Credential rotation detected", - "polarity": "pass", - "normalized_id": "credential.rotation.detected", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 500, - "text": "Credential rotation not detected in onboard output", - "polarity": "fail", - "normalized_id": "credential.rotation.not.detected.in.onboard.output", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 507, - "text": "Rotation message identifies slack-bridge", - "polarity": "pass", - "normalized_id": "rotation.message.identifies.slack.bridge", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 509, - "text": "Rotation message did not identify slack-bridge", - "polarity": "fail", - "normalized_id": "rotation.message.did.not.identify.slack.bridge", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 515, - "text": "Rotation message identifies slack-app", - "polarity": "pass", - "normalized_id": "rotation.message.identifies.slack.app", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 517, - "text": "Rotation message did not identify slack-app", - "polarity": "fail", - "normalized_id": "rotation.message.did.not.identify.slack.app", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 523, - "text": "Rotation message unexpectedly named telegram-bridge (Telegram token did not change)", - "polarity": "fail", - "normalized_id": "rotation.message.unexpectedly.named.telegram.bridge.telegram.token.did.not.change", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 527, - "text": "Rotation message did not name telegram-bridge (Telegram unchanged)", - "polarity": "pass", - "normalized_id": "rotation.message.did.not.name.telegram.bridge.telegram.unchanged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 531, - "text": "Rotation message unexpectedly named discord-bridge (Discord token did not change)", - "polarity": "fail", - "normalized_id": "rotation.message.unexpectedly.named.discord.bridge.discord.token.did.not.change", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 535, - "text": "Rotation message did not name discord-bridge (Discord unchanged)", - "polarity": "pass", - "normalized_id": "rotation.message.did.not.name.discord.bridge.discord.unchanged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 539, - "text": "Sandbox rebuild triggered by Slack rotation", - "polarity": "pass", - "normalized_id": "sandbox.rebuild.triggered.by.slack.rotation", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 541, - "text": "Sandbox rebuild not triggered", - "polarity": "fail", - "normalized_id": "sandbox.rebuild.not.triggered", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 547, - "text": "Sandbox running after Slack rotation", - "polarity": "pass", - "normalized_id": "sandbox.running.after.slack.rotation", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 549, - "text": "Sandbox not running after Slack rotation", - "polarity": "fail", - "normalized_id": "sandbox.not.running.after.slack.rotation", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 560, - "text": "Phase 7 onboard failed (exit $onboard_exit)", - "polarity": "fail", - "normalized_id": "phase.7.onboard.failed.exit.onboard.exit", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 565, - "text": "Sandbox reused when tokens unchanged", - "polarity": "pass", - "normalized_id": "sandbox.reused.when.tokens.unchanged", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-token-rotation.sh", - "line": 567, - "text": "Sandbox was not reused (unexpected rebuild)", - "polarity": "fail", - "normalized_id": "sandbox.was.not.reused.unexpected.rebuild", - "mapping_status": "retired" - } - ] - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "assertions": [ - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 244, - "text": "TC-DEPLOY-01a / TC-DEPLOY-01b / TC-DEPLOY-01c", - "polarity": "fail", - "normalized_id": "tc.deploy.01a.tc.deploy.01b.tc.deploy.01c", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 260, - "text": "TC-DEPLOY-01a: LocalReadiness", - "polarity": "fail", - "normalized_id": "tc.deploy.01a.localreadiness", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 264, - "text": "TC-DEPLOY-01a: Local dashboard reachable (pre-check passed)", - "polarity": "pass", - "normalized_id": "tc.deploy.01a.local.dashboard.reachable.pre.check.passed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 275, - "text": "TC-DEPLOY-01a: Start", - "polarity": "fail", - "normalized_id": "tc.deploy.01a.start", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 289, - "text": "TC-DEPLOY-01a: Tunnel URL found in status ($tunnel_url)", - "polarity": "pass", - "normalized_id": "tc.deploy.01a.tunnel.url.found.in.status.tunnel.url", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 298, - "text": "TC-DEPLOY-01a: NoSpawn", - "polarity": "fail", - "normalized_id": "tc.deploy.01a.nospawn", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 302, - "text": "TC-DEPLOY-01a: CaptureBug", - "polarity": "fail", - "normalized_id": "tc.deploy.01a.capturebug", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 306, - "text": "TC-DEPLOY-01a: LocalOrigin", - "polarity": "fail", - "normalized_id": "tc.deploy.01a.localorigin", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 310, - "text": "TC-DEPLOY-01a: CloudflareRegister", - "polarity": "fail", - "normalized_id": "tc.deploy.01a.cloudflareregister", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 314, - "text": "TC-DEPLOY-01a: Start", - "polarity": "fail", - "normalized_id": "tc.deploy.01a.start", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 344, - "text": "TC-DEPLOY-01b: LocalRegression", - "polarity": "fail", - "normalized_id": "tc.deploy.01b.localregression", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 358, - "text": "TC-DEPLOY-01b: Tunnel serves OpenClaw dashboard (HTTP 200, marker matched)", - "polarity": "pass", - "normalized_id": "tc.deploy.01b.tunnel.serves.openclaw.dashboard.http.200.marker.matched", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 360, - "text": "TC-DEPLOY-01b", - "polarity": "fail", - "normalized_id": "tc.deploy.01b", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 365, - "text": "TC-DEPLOY-01b: CloudflareEdge", - "polarity": "fail", - "normalized_id": "tc.deploy.01b.cloudflareedge", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 379, - "text": "TC-DEPLOY-01c: Stop command", - "polarity": "fail", - "normalized_id": "tc.deploy.01c.stop.command", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 403, - "text": "TC-DEPLOY-01c: Stop", - "polarity": "fail", - "normalized_id": "tc.deploy.01c.stop", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 405, - "text": "TC-DEPLOY-01c: Tunnel URL absent after stop", - "polarity": "pass", - "normalized_id": "tc.deploy.01c.tunnel.url.absent.after.stop", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 407, - "text": "TC-DEPLOY-01c: Stop", - "polarity": "fail", - "normalized_id": "tc.deploy.01c.stop", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 429, - "text": "$PASS${NC}", - "polarity": "pass", - "normalized_id": "pass.nc", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-tunnel-lifecycle.sh", - "line": 430, - "text": "$FAIL${NC}", - "polarity": "fail", - "normalized_id": "fail.nc", - "mapping_status": "deferred" - } - ] - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "assertions": [ - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 54, - "text": "NVIDIA_API_KEY is required", - "polarity": "fail", - "normalized_id": "nvidia.api.key.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 55, - "text": "NEMOCLAW_NON_INTERACTIVE=1 is required", - "polarity": "fail", - "normalized_id": "nemoclaw.non.interactive.1.is.required", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 91, - "text": "nemoclaw not found on PATH after install", - "polarity": "fail", - "normalized_id": "nemoclaw.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 92, - "text": "openshell not found on PATH after install", - "polarity": "fail", - "normalized_id": "openshell.not.found.on.path.after.install", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 93, - "text": "NemoClaw installed", - "polarity": "pass", - "normalized_id": "nemoclaw.installed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 119, - "text": "Failed to build old base image", - "polarity": "fail", - "normalized_id": "failed.to.build.old.base.image", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 121, - "text": "Old base image built (OpenClaw ${OLD_OPENCLAW_VERSION})", - "polarity": "pass", - "normalized_id": "old.base.image.built.openclaw.old.openclaw.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 146, - "text": "Sandbox did not become Ready", - "polarity": "fail", - "normalized_id": "sandbox.did.not.become.ready", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 149, - "text": "Failed to read OpenClaw version from old sandbox", - "polarity": "fail", - "normalized_id": "failed.to.read.openclaw.version.from.old.sandbox", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 152, - "text": "Old sandbox created (OpenClaw ${OLD_OPENCLAW_VERSION})", - "polarity": "pass", - "normalized_id": "old.sandbox.created.openclaw.old.openclaw.version", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 186, - "text": "Sandbox registered with agentVersion=${OLD_OPENCLAW_VERSION}", - "polarity": "pass", - "normalized_id": "sandbox.registered.with.agentversion.old.openclaw.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 195, - "text": "Phase 5: upgrade-sandboxes --check detected stale sandbox", - "polarity": "pass", - "normalized_id": "phase.5.upgrade.sandboxes.check.detected.stale.sandbox", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 197, - "text": "upgrade-sandboxes --check says all up to date — stale sandbox NOT detected (#1904)", - "polarity": "fail", - "normalized_id": "upgrade.sandboxes.check.says.all.up.to.date.stale.sandbox.not.detected.1904", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 199, - "text": "upgrade-sandboxes --check produced unexpected output", - "polarity": "fail", - "normalized_id": "upgrade.sandboxes.check.produced.unexpected.output", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 205, - "text": "Sandbox rebuild failed", - "polarity": "fail", - "normalized_id": "sandbox.rebuild.failed", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 215, - "text": "Failed to read OpenClaw version after rebuild", - "polarity": "fail", - "normalized_id": "failed.to.read.openclaw.version.after.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 219, - "text": "Sandbox still running old OpenClaw ${OLD_OPENCLAW_VERSION} after rebuild — #1904 NOT fixed", - "polarity": "fail", - "normalized_id": "sandbox.still.running.old.openclaw.old.openclaw.version.after.rebuild.1904.not.fixed", - "mapping_status": "mapped" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 222, - "text": "Phase 6: Sandbox upgraded from OpenClaw ${OLD_OPENCLAW_VERSION} to ${NEW_OPENCLAW_VERSION}", - "polarity": "pass", - "normalized_id": "phase.6.sandbox.upgraded.from.openclaw.old.openclaw.version.to.new.openclaw.version", - "mapping_status": "retired" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 231, - "text": "Phase 7: All sandboxes up to date after rebuild", - "polarity": "pass", - "normalized_id": "phase.7.all.sandboxes.up.to.date.after.rebuild", - "mapping_status": "deferred" - }, - { - "script": "test/e2e/test-upgrade-stale-sandbox.sh", - "line": 233, - "text": "Phase 7: upgrade-sandboxes --check did not report 'up to date' after rebuild", - "polarity": "fail", - "normalized_id": "phase.7.upgrade.sandboxes.check.did.not.report.up.to.date.after.rebuild", - "mapping_status": "deferred" - } - ] - } - ], - "totals": { - "scripts": 54, - "assertions": 2101, - "zero_assertion_scripts": 2 - } -} From 0fbf595064152ebcb76ddda9f58464a63f3c1257 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 02:38:02 -0700 Subject: [PATCH 03/17] docs(e2e): clarify Vitest fixture layer terminology --- docs/about/release-notes.mdx | 2 +- test/e2e-scenario/docs/MIGRATION.md | 2 ++ test/e2e-scenario/docs/README.md | 7 +++++-- .../framework-tests/e2e-migration-inventory.test.ts | 1 + test/pr-workflow-contract.test.ts | 6 +++--- vitest.config.ts | 2 ++ 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/docs/about/release-notes.mdx b/docs/about/release-notes.mdx index 65fc31b98b..bc23b2ce97 100644 --- a/docs/about/release-notes.mdx +++ b/docs/about/release-notes.mdx @@ -185,7 +185,7 @@ NemoClaw v0.0.48 improves onboarding, sandbox builds, local inference, messaging NemoClaw v0.0.47 focused on release hardening and validation coverage: -- The scenario E2E framework gained baseline onboarding coverage for CLI setup, OpenShell gateway creation, sandbox state, inference routing, and smoke tests. +- The Vitest E2E fixture layer gained baseline onboarding coverage for CLI setup, OpenShell gateway creation, sandbox state, inference routing, and smoke tests. - Messaging provider scenarios now validate provider attachment, placeholder configuration, secret-leak prevention, bridge reachability, Discord gateway routing, Slack provider state, Telegram injection safety, and token-rotation isolation. - CLI command registration was refactored so public display defaults stay consistent across sandbox channel, host, log, policy, skill, and snapshot commands. - PR review advisor automation was added for maintainers, with deterministic GitHub context gathering and structured review comments. diff --git a/test/e2e-scenario/docs/MIGRATION.md b/test/e2e-scenario/docs/MIGRATION.md index 7f7bcd8032..fd26a8929c 100644 --- a/test/e2e-scenario/docs/MIGRATION.md +++ b/test/e2e-scenario/docs/MIGRATION.md @@ -44,6 +44,8 @@ The durable E2E system has one execution path: - NemoClaw fixtures own setup, onboarding, lifecycle mutations, expected-state probes, assertion helpers, expected-failure evidence, cleanup, artifacts, and secret redaction. +- The historical `test/e2e-scenario/framework/` path is fixture/support code, + not a separate E2E framework or runner. - Typed scenario definitions and matrix helpers describe stable scenario IDs and supported combinations without becoming a second runner. - Product-facing manifests describe desired setup/onboarding state, not test diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index 27d961a154..c7be5657c8 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -52,6 +52,9 @@ Live execution happens through Vitest fixtures: - `artifacts`, `secrets`, `cleanup`, and `shellProbe` provide shared fixture services. +The historical `test/e2e-scenario/framework/` path is a fixture/support layer, +not a separate E2E framework or runner. Vitest remains the only test harness. + `suiteIds` remain metadata for reporting and migration planning. They do not dispatch shell validation suites. @@ -99,8 +102,8 @@ test/e2e-scenario/ - Existing workflows such as `nightly-e2e.yaml`, `e2e-branch-validation.yaml`, `macos-e2e.yaml`, `wsl-e2e.yaml`, `ollama-proxy-e2e.yaml`, and `regression-e2e.yaml` still run direct legacy E2E scripts during migration. -- `vitest.config.ts` contains `e2e-scenario-framework` for fast framework tests - and `e2e-scenarios-live` for opt-in live scenario execution. +- `vitest.config.ts` contains `e2e-scenario-framework` for fast fixture/support + tests and `e2e-scenarios-live` for opt-in live scenario execution. ## Migration Tracking diff --git a/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts b/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts index f10b1a244d..4e6aa8fa5d 100644 --- a/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts @@ -38,6 +38,7 @@ describe("E2E migration tracking policy", () => { expect(docs).toContain("replacement Vitest coverage"); expect(docs).toContain("retirement rationale"); expect(docs).toContain("generated legacy assertion inventories"); + expect(docs).toContain("not a separate E2E framework or runner"); }); it("keeps durable taxonomy out of the repo-local migration docs", () => { diff --git a/test/pr-workflow-contract.test.ts b/test/pr-workflow-contract.test.ts index dab223f340..9cca0063aa 100644 --- a/test/pr-workflow-contract.test.ts +++ b/test/pr-workflow-contract.test.ts @@ -452,9 +452,9 @@ describe("pull request and main workflow contracts", () => { ); expect(vitestConfig).toContain('name: "installer-integration"'); - // The e2e scenario framework remains part of the sharded CLI project: - // its tests live under test/e2e-scenario, while the CLI project only - // excludes the legacy test/e2e tree and installer-integration tests. + // E2E fixture/support tests remain part of the sharded CLI project: + // they live under test/e2e-scenario, while the CLI project only excludes + // the legacy test/e2e tree and installer-integration tests. expect(cliShardRuns).toContain("npx vitest run --project cli"); expect(vitestConfig).toContain('name: "e2e-scenario-framework"'); expect(vitestConfig).toContain('include: ["test/**/*.test.{js,ts}", "src/**/*.test.ts"]'); diff --git a/vitest.config.ts b/vitest.config.ts index 2270f9afc2..2bdb7df644 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -62,6 +62,8 @@ export default defineConfig({ }, { test: { + // Fast tests for the E2E fixture/support layer. The project name is + // retained for CI compatibility; Vitest remains the only harness. name: "e2e-scenario-framework", testTimeout: testTimeout(), include: ["test/e2e-scenario/framework-tests/**/*.test.ts"], From 68e31573ae6008f59eb912ed8ae7fce2ad72d613 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 02:41:19 -0700 Subject: [PATCH 04/17] docs(e2e): align scenario advisor wording with Vitest --- test/e2e-scenario-advisor.test.ts | 14 ++++++++++---- tools/e2e-advisor/scenario-comment.mts | 10 +++++----- tools/e2e-advisor/scenarios-schema.json | 2 +- tools/e2e-advisor/scenarios.mts | 22 +++++++++++----------- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/test/e2e-scenario-advisor.test.ts b/test/e2e-scenario-advisor.test.ts index 88c9b4cb79..863706927e 100644 --- a/test/e2e-scenario-advisor.test.ts +++ b/test/e2e-scenario-advisor.test.ts @@ -33,7 +33,7 @@ function metadata( }; } -describe("E2E scenario advisor — prompt construction", () => { +describe("Vitest E2E scenario advisor — prompt construction", () => { it("user prompt embeds the metadata fields the advisor must echo back", () => { const prompt = buildPrompt({ baseRef: "origin/main", @@ -59,6 +59,8 @@ describe("E2E scenario advisor — prompt construction", () => { expect(systemPrompt).toContain(VITEST_SCENARIO_WORKFLOW); expect(systemPrompt).toContain("trusted advisor checkout"); expect(systemPrompt).toContain("recommend the `e2e-scenarios-all` fan-out"); + expect(systemPrompt).toContain("single NemoClaw E2E system"); + expect(systemPrompt).not.toContain("non-scenario E2E"); expect(systemPrompt).not.toContain("e2e-scenarios-all.yaml"); expect(systemPrompt).not.toContain("e2e-scenarios.yaml"); }); @@ -71,7 +73,7 @@ describe("E2E scenario advisor — prompt construction", () => { }); }); -describe("E2E scenario advisor — normalization contract", () => { +describe("Vitest E2E scenario advisor — normalization contract", () => { it("preserves valid recommendations and canonicalizes the dispatch command", () => { const raw = { version: 1, @@ -364,7 +366,7 @@ describe("E2E scenario advisor — normalization contract", () => { { required: [], optional: [], confidence: "low" }, metadata({ changedFiles: ["docs/foo.md"] }), ); - expect(normalized.noScenarioE2eReason).toMatch(/no scenario E2E impact/i); + expect(normalized.noScenarioE2eReason).toMatch(/no Vitest E2E scenario impact/i); }); it("rejects non-object advisor output", () => { @@ -373,7 +375,7 @@ describe("E2E scenario advisor — normalization contract", () => { }); }); -describe("E2E scenario advisor — summary and comment rendering", () => { +describe("Vitest E2E scenario advisor — summary and comment rendering", () => { function sampleResult(): ScenarioAdvisorResult { return { version: 1, @@ -398,6 +400,8 @@ describe("E2E scenario advisor — summary and comment rendering", () => { it("renders a summary that surfaces required scenarios with their dispatch line", () => { const summary = renderScenarioSummary(sampleResult()); + expect(summary).toContain("# Vitest E2E Scenario Advisor"); + expect(summary).toContain("Required Vitest E2E scenarios"); expect(summary).toContain("e2e-scenarios-all"); expect(summary).toContain( canonicalDispatchCommand(VITEST_SCENARIO_WORKFLOW, "e2e-scenarios-all"), @@ -413,6 +417,8 @@ describe("E2E scenario advisor — summary and comment rendering", () => { runUrl: "https://example.invalid/run", }); expect(comment).toContain(""); + expect(comment).toContain("## Vitest E2E Scenario Recommendation"); + expect(comment).toContain("Dispatch required Vitest E2E scenarios"); expect(comment).toContain("https://example.invalid/run"); }); }); diff --git a/tools/e2e-advisor/scenario-comment.mts b/tools/e2e-advisor/scenario-comment.mts index 0b6fa054fa..309a112913 100644 --- a/tools/e2e-advisor/scenario-comment.mts +++ b/tools/e2e-advisor/scenario-comment.mts @@ -88,18 +88,18 @@ export function buildScenarioComment({ const optionalLine = recommendationLine(optional); const dispatch = required.length > 0 - ? `\n\n**Dispatch required scenario E2E:**\n${required.map((item) => `- \`${item.dispatchCommand}\``).join("\n")}` + ? `\n\n**Dispatch required Vitest E2E scenarios:**\n${required.map((item) => `- \`${item.dispatchCommand}\``).join("\n")}` : ""; const run = runUrl ? `\n\n[Workflow run](${runUrl})` : ""; return `${marker} -## E2E Scenario Advisor Recommendation +## Vitest E2E Scenario Recommendation -**Required scenario E2E:** ${requiredLine} -**Optional scenario E2E:** ${optionalLine}${dispatch}${run} +**Required Vitest E2E scenarios:** ${requiredLine} +**Optional Vitest E2E scenarios:** ${optionalLine}${dispatch}${run}
-Full scenario advisor summary +Full Vitest E2E advisor summary ${summary.trim()} diff --git a/tools/e2e-advisor/scenarios-schema.json b/tools/e2e-advisor/scenarios-schema.json index edf9fbe9e9..f7a8a3c139 100644 --- a/tools/e2e-advisor/scenarios-schema.json +++ b/tools/e2e-advisor/scenarios-schema.json @@ -1,7 +1,7 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://github.com/NVIDIA/NemoClaw/tools/e2e-advisor/scenarios-schema.json", - "title": "NemoClaw E2E Scenario Advisor Result", + "title": "NemoClaw Vitest E2E Scenario Advisor Result", "type": "object", "required": [ "version", diff --git a/tools/e2e-advisor/scenarios.mts b/tools/e2e-advisor/scenarios.mts index 3a155250d6..f78d0a38c3 100755 --- a/tools/e2e-advisor/scenarios.mts +++ b/tools/e2e-advisor/scenarios.mts @@ -209,7 +209,7 @@ function writeUnavailableArtifacts( writeJson(paths.finalResult, result); fs.writeFileSync( paths.summary, - `# E2E Scenario Advisor\n\n${failed ? "Failed" : "Skipped"}: ${reason}\n`, + `# Vitest E2E Scenario Advisor\n\n${failed ? "Failed" : "Skipped"}: ${reason}\n`, ); if (failed) { console.error(`Scenario advisor analysis failed: ${reason}`); @@ -222,11 +222,11 @@ function logProgress(message: string): void { export function buildSystemPrompt(schema: AdvisorSchema): string { return [ - "You are the NemoClaw E2E Scenario advisor for CI.", + "You are the NemoClaw Vitest E2E scenario advisor for CI.", "", - "Your job is to recommend which **Vitest scenario E2E** jobs should run for a PR. Scenario E2E is the layered scenario suite under `test/e2e-scenario/`, dispatched via `.github/workflows/e2e-vitest-scenarios.yaml`.", + "Your job is to recommend which Vitest-backed E2E scenario dispatches should run for a PR. They are part of the single NemoClaw E2E system, dispatched via `.github/workflows/e2e-vitest-scenarios.yaml`.", "", - "You are a separate advisor from the general E2E recommendation advisor. Do not opine on legacy `test/e2e/` workflows or non-scenario E2E jobs; those are owned by the general advisor.", + "Limit recommendations to the Vitest scenario workflow. Broader direct legacy `test/e2e/` workflows are owned by the general E2E advisor until they migrate; do not describe them as a separate kind of E2E.", "", "Authoritative sources to inspect with your read-only tools:", "- `.github/workflows/e2e-vitest-scenarios.yaml` — canonical Vitest live scenario workflow.", @@ -236,10 +236,10 @@ export function buildSystemPrompt(schema: AdvisorSchema): string { "- `test/e2e-scenario/framework/` and `test/e2e-scenario/framework-tests/` — shared Vitest fixtures, clients, and phase helpers.", "", "Decision policy:", - "- Required (all scenarios): changes to scenario runtime/runner code, scenario catalog metadata, expected-state metadata, live support classification, shared fixtures, or the Vitest scenario workflow itself. Recommend the `e2e-scenarios-all` fan-out through `e2e-vitest-scenarios.yaml`.", + "- Required (all scenarios): changes to scenario registry, matrix emission, expected-state metadata, live support classification, shared fixtures, or the Vitest scenario workflow itself. Recommend the `e2e-scenarios-all` fan-out through `e2e-vitest-scenarios.yaml`.", "- Required (targeted): fixture, live test, manifest, runtime-support, or scenario changes that affect a specific subset. Recommend the smallest set of live-supported typed scenario IDs that exercises the changed surface.", "- Optional: adjacent scenarios that exercise the same suite on a different platform/onboarding (e.g. macOS, WSL, GPU) but are not the primary target. Special-runner scenarios (`gpu-`, `macos-`, `wsl-`, `brev-`) should usually be optional unless they are the only path that exercises the change.", - "- None: docs-only, comment-only, tests-only outside `test/e2e-scenario/`, or changes that cannot affect scenario E2E behavior. Set `noScenarioE2eReason` and return empty `required`/`optional` arrays.", + "- None: docs-only, comment-only, tests-only outside `test/e2e-scenario/`, or changes that cannot affect Vitest scenario behavior. Set `noScenarioE2eReason` and return empty `required`/`optional` arrays.", "", "Hard rules:", "- Only recommend live-supported typed scenario IDs that exist in the registry or the synthetic fan-out id `e2e-scenarios-all`. Do not invent IDs.", @@ -268,7 +268,7 @@ export function buildPrompt({ changedFiles: string[]; diff: string; }): string { - return `Return a scenario E2E recommendation for this PR. + return `Return a Vitest E2E scenario recommendation for this PR. Set these fields exactly: - version: 1 @@ -303,7 +303,7 @@ export function normalizeScenarioAdvisorResult( ? reasonField.trim() : reasonField === null || reasonField === undefined ? required.length === 0 && optional.length === 0 - ? "Advisor reported no scenario E2E impact." + ? "Advisor reported no Vitest E2E scenario impact." : null : null; @@ -374,13 +374,13 @@ function stringArrayWithinChanged(value: unknown, changedFiles: string[]): strin export function renderScenarioSummary(result: ScenarioAdvisorResult): string { const lines: string[] = []; - lines.push("# E2E Scenario Advisor"); + lines.push("# Vitest E2E Scenario Advisor"); lines.push(""); lines.push(`Base: \`${result.baseRef}\` `); lines.push(`Head: \`${result.headRef}\` `); lines.push(`Confidence: **${result.confidence}**`); lines.push(""); - lines.push("## Required scenario E2E"); + lines.push("## Required Vitest E2E scenarios"); if (result.required.length === 0) { lines.push(`- _None._ ${result.noScenarioE2eReason || ""}`.trim()); } else { @@ -390,7 +390,7 @@ export function renderScenarioSummary(result: ScenarioAdvisorResult): string { } } lines.push(""); - lines.push("## Optional scenario E2E"); + lines.push("## Optional Vitest E2E scenarios"); if (result.optional.length === 0) { lines.push("- _None._"); } else { From 2d8ac4e993510f1cdd02de94f7ff040e1d2ba73c Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 02:45:27 -0700 Subject: [PATCH 05/17] test(e2e): rename migration policy checks --- ...nventory.test.ts => e2e-migration-policy.test.ts} | 8 ++++---- ...test.ts => e2e-migration-source-of-truth.test.ts} | 12 ++++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) rename test/e2e-scenario/framework-tests/{e2e-migration-inventory.test.ts => e2e-migration-policy.test.ts} (86%) rename test/e2e-scenario/framework-tests/{e2e-migration-inventory-lock.test.ts => e2e-migration-source-of-truth.test.ts} (85%) diff --git a/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts b/test/e2e-scenario/framework-tests/e2e-migration-policy.test.ts similarity index 86% rename from test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts rename to test/e2e-scenario/framework-tests/e2e-migration-policy.test.ts index 4e6aa8fa5d..6eb28553c4 100644 --- a/test/e2e-scenario/framework-tests/e2e-migration-inventory.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-migration-policy.test.ts @@ -11,8 +11,8 @@ const SCENARIO_SUITE_DIR = path.join(REPO_ROOT, "test/e2e-scenario"); const MIGRATION_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "MIGRATION.md"); const README_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "README.md"); const RETIREMENT_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "RETIREMENT.md"); -const LEGACY_INVENTORY = path.join(SCENARIO_SUITE_DIR, "migration", "legacy-inventory.json"); -const LEGACY_ASSERTION_INVENTORY = path.join( +const FORBIDDEN_LEGACY_LEDGER = path.join(SCENARIO_SUITE_DIR, "migration", "legacy-inventory.json"); +const FORBIDDEN_LEGACY_ASSERTION_LEDGER = path.join( REPO_ROOT, "test", "e2e", @@ -26,8 +26,8 @@ function read(filePath: string): string { describe("E2E migration tracking policy", () => { it("does not use a repo-local JSON ledger as durable migration state", () => { - expect(fs.existsSync(LEGACY_INVENTORY)).toBe(false); - expect(fs.existsSync(LEGACY_ASSERTION_INVENTORY)).toBe(false); + expect(fs.existsSync(FORBIDDEN_LEGACY_LEDGER)).toBe(false); + expect(fs.existsSync(FORBIDDEN_LEGACY_ASSERTION_LEDGER)).toBe(false); }); it("documents GitHub issues and PRs as the migration source of truth", () => { diff --git a/test/e2e-scenario/framework-tests/e2e-migration-inventory-lock.test.ts b/test/e2e-scenario/framework-tests/e2e-migration-source-of-truth.test.ts similarity index 85% rename from test/e2e-scenario/framework-tests/e2e-migration-inventory-lock.test.ts rename to test/e2e-scenario/framework-tests/e2e-migration-source-of-truth.test.ts index c051bdd088..ab74c72a22 100644 --- a/test/e2e-scenario/framework-tests/e2e-migration-inventory-lock.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-migration-source-of-truth.test.ts @@ -1,23 +1,27 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -import { describe, expect, it } from "vitest"; import fs from "node:fs"; import path from "node:path"; +import { describe, expect, it } from "vitest"; const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); const SCENARIO_SUITE_DIR = path.join(REPO_ROOT, "test/e2e-scenario"); const MIGRATION_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "MIGRATION.md"); const README_DOC = path.join(SCENARIO_SUITE_DIR, "docs", "README.md"); -const MIGRATION_INVENTORY = path.join(SCENARIO_SUITE_DIR, "scenarios", "migration-inventory.ts"); +const FORBIDDEN_MUTABLE_MIGRATION_MODULE = path.join( + SCENARIO_SUITE_DIR, + "scenarios", + "migration-inventory.ts", +); function read(filePath: string): string { return fs.readFileSync(filePath, "utf8"); } -describe("E2E migration tracking hygiene", () => { +describe("E2E migration source-of-truth hygiene", () => { it("keeps mutable migration status out of the scenario source tree", () => { - expect(fs.existsSync(MIGRATION_INVENTORY)).toBe(false); + expect(fs.existsSync(FORBIDDEN_MUTABLE_MIGRATION_MODULE)).toBe(false); }); it("documents that migration state lives in issues and PRs", () => { From d473ba6afa90a8a7bdbb9fdd9ef112f56ff5c581 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 02:49:25 -0700 Subject: [PATCH 06/17] test(e2e): rename Vitest support project --- test/e2e-scenario/docs/MIGRATION.md | 4 ++-- test/e2e-scenario/docs/README.md | 10 +++++----- test/pr-workflow-contract.test.ts | 2 +- vitest.config.ts | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/test/e2e-scenario/docs/MIGRATION.md b/test/e2e-scenario/docs/MIGRATION.md index fd26a8929c..28b9c01798 100644 --- a/test/e2e-scenario/docs/MIGRATION.md +++ b/test/e2e-scenario/docs/MIGRATION.md @@ -92,8 +92,8 @@ npx tsx test/e2e-scenario/scenarios/run.ts --list npx tsx test/e2e-scenario/scenarios/run.ts --emit-live-matrix npx tsx test/e2e-scenario/scenarios/run.ts --emit-live-matrix --scenarios ubuntu-repo-cloud-openclaw -# Framework tests -npx vitest run --project e2e-scenario-framework --silent=false --reporter=default +# Fixture/support tests +npx vitest run --project e2e-vitest-support --silent=false --reporter=default # Opt-in live Vitest scenarios npm run build:cli diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index c7be5657c8..ea3a5e0836 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -70,8 +70,8 @@ npx tsx test/e2e-scenario/scenarios/run.ts --emit-live-matrix # Emit the matrix for selected scenario ids npx tsx test/e2e-scenario/scenarios/run.ts --emit-live-matrix --scenarios ubuntu-repo-cloud-openclaw -# Framework tests -npx vitest run --project e2e-scenario-framework --silent=false --reporter=default +# Fixture/support tests +npx vitest run --project e2e-vitest-support --silent=false --reporter=default # Opt-in live Vitest scenarios npm run build:cli @@ -85,9 +85,9 @@ paths must not be reintroduced. ```text test/e2e-scenario/ - docs/ # Framework guide, migration notes, retirement record + docs/ # Fixture guide, migration notes, retirement record framework/ # Vitest fixtures, clients, redaction, artifacts, cleanup - framework-tests/ # Fast framework and metadata tests + framework-tests/ # Fast fixture/support and metadata tests live/ # Opt-in live Vitest scenario tests manifests/ # Product-facing NemoClawInstance desired state scenarios/ # Typed registry, matrix helpers, expected states @@ -102,7 +102,7 @@ test/e2e-scenario/ - Existing workflows such as `nightly-e2e.yaml`, `e2e-branch-validation.yaml`, `macos-e2e.yaml`, `wsl-e2e.yaml`, `ollama-proxy-e2e.yaml`, and `regression-e2e.yaml` still run direct legacy E2E scripts during migration. -- `vitest.config.ts` contains `e2e-scenario-framework` for fast fixture/support +- `vitest.config.ts` contains `e2e-vitest-support` for fast fixture/support tests and `e2e-scenarios-live` for opt-in live scenario execution. ## Migration Tracking diff --git a/test/pr-workflow-contract.test.ts b/test/pr-workflow-contract.test.ts index 9cca0063aa..12327c8122 100644 --- a/test/pr-workflow-contract.test.ts +++ b/test/pr-workflow-contract.test.ts @@ -456,7 +456,7 @@ describe("pull request and main workflow contracts", () => { // they live under test/e2e-scenario, while the CLI project only excludes // the legacy test/e2e tree and installer-integration tests. expect(cliShardRuns).toContain("npx vitest run --project cli"); - expect(vitestConfig).toContain('name: "e2e-scenario-framework"'); + expect(vitestConfig).toContain('name: "e2e-vitest-support"'); expect(vitestConfig).toContain('include: ["test/**/*.test.{js,ts}", "src/**/*.test.ts"]'); expect(vitestConfig).toContain('"test/e2e/**"'); expect(vitestConfig).toContain('"test/install-preflight.test.ts"'); diff --git a/vitest.config.ts b/vitest.config.ts index 2bdb7df644..a90bc4795e 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -62,9 +62,9 @@ export default defineConfig({ }, { test: { - // Fast tests for the E2E fixture/support layer. The project name is - // retained for CI compatibility; Vitest remains the only harness. - name: "e2e-scenario-framework", + // Fast tests for the E2E fixture/support layer. Vitest remains the + // only harness; this project does not define a separate runner. + name: "e2e-vitest-support", testTimeout: testTimeout(), include: ["test/e2e-scenario/framework-tests/**/*.test.ts"], }, From 56c70714dda290b3d3ff7350abe836242ba0a1e1 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 02:51:51 -0700 Subject: [PATCH 07/17] docs(e2e): avoid runner wording in release notes --- docs/about/release-notes.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/about/release-notes.mdx b/docs/about/release-notes.mdx index bc23b2ce97..146f33b869 100644 --- a/docs/about/release-notes.mdx +++ b/docs/about/release-notes.mdx @@ -22,7 +22,7 @@ NemoClaw v0.0.61 improves sandbox network visibility, onboarding recovery, Herme - Onboarding and rebuild paths recover more reliably across host and provider drift. ARM64 image-tar upload failures receive a clear classification with an image-reference workaround, rebuild detaches sandbox providers before delete, rebuilt resume snapshots keep session state, and messaging selector key sequences work during onboarding. For more information, refer to [NemoClaw CLI Commands Reference](../reference/commands). - Local inference and Hermes setup cover more restart and configuration edge cases. Managed inference hostnames bypass host proxies, managed vLLM restarts after host reboot, DGX Station managed vLLM defaults to `Qwen/Qwen3.6-27B-FP8`, Hermes rejects dashboard port collisions during configuration, and Hermes recovery enforces the environment-secret boundary. For more information, refer to [Use a Local Inference Server](../inference/use-local-inference). - Messaging setup gives clearer feedback and stores more deterministic state. Slack now notifies the sender when a channel `@mention` is denied, operator-supplied placeholder keys can be registered during onboarding, `messagingPlan` persists into resume state, and channel conflict detection now uses the manifest-plan architecture. For more information, refer to [Messaging Channels](../manage-sandboxes/messaging-channels). -- Release validation now uses real shell assertions in the e2e scenario runner, includes an opt-in live scenario project, shards CLI coverage, adds a docs-only PR fast path, and trims slow CLI subprocess coverage. +- Release validation now runs real shell-boundary assertions through Vitest E2E support, includes an opt-in live scenario project, shards CLI coverage, adds a docs-only PR fast path, and trims slow CLI subprocess coverage. ## v0.0.60 From 0d1c4cb087164dea5ab647fc02c0ed8939e4e4c2 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 02:56:26 -0700 Subject: [PATCH 08/17] test(e2e): rename Vitest support tests directory --- test/e2e-scenario/docs/README.md | 2 +- test/e2e-scenario/framework/redaction.ts | 10 +++++----- .../e2e-clients.test.ts | 0 .../e2e-expected-state.test.ts | 0 .../e2e-fixture-context.test.ts | 0 .../e2e-live-project-config.test.ts | 0 .../e2e-live-registry-discovery.test.ts | 0 .../e2e-live-skip-name-contract.test.ts | 0 .../e2e-manifests.test.ts | 0 .../e2e-migration-policy.test.ts | 0 .../e2e-migration-source-of-truth.test.ts | 0 .../e2e-phase-environment.test.ts | 0 .../e2e-phase-lifecycle.test.ts | 0 .../e2e-phase-onboarding.test.ts | 0 .../e2e-phase-runtime.test.ts | 0 .../e2e-phase-state-validation.test.ts | 0 .../e2e-redaction-entry.test.ts | 0 .../e2e-redaction-parity.test.ts | 0 .../e2e-scenario-matrix.test.ts | 0 .../e2e-scenario-registry.test.ts | 0 .../e2e-scenarios-workflow.test.ts | 0 .../e2e-shell-supervisor.test.ts | 2 +- tools/e2e-advisor/scenarios.mts | 2 +- vitest.config.ts | 2 +- 24 files changed, 9 insertions(+), 9 deletions(-) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-clients.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-expected-state.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-fixture-context.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-live-project-config.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-live-registry-discovery.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-live-skip-name-contract.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-manifests.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-migration-policy.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-migration-source-of-truth.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-phase-environment.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-phase-lifecycle.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-phase-onboarding.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-phase-runtime.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-phase-state-validation.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-redaction-entry.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-redaction-parity.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-scenario-matrix.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-scenario-registry.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-scenarios-workflow.test.ts (100%) rename test/e2e-scenario/{framework-tests => support-tests}/e2e-shell-supervisor.test.ts (99%) diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index ea3a5e0836..529daea534 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -87,10 +87,10 @@ paths must not be reintroduced. test/e2e-scenario/ docs/ # Fixture guide, migration notes, retirement record framework/ # Vitest fixtures, clients, redaction, artifacts, cleanup - framework-tests/ # Fast fixture/support and metadata tests live/ # Opt-in live Vitest scenario tests manifests/ # Product-facing NemoClawInstance desired state scenarios/ # Typed registry, matrix helpers, expected states + support-tests/ # Fast fixture/support and metadata tests ``` ## CI Entry Points diff --git a/test/e2e-scenario/framework/redaction.ts b/test/e2e-scenario/framework/redaction.ts index 6a9916a6ef..8a56d1a509 100644 --- a/test/e2e-scenario/framework/redaction.ts +++ b/test/e2e-scenario/framework/redaction.ts @@ -21,9 +21,9 @@ * coupling the framework to product runtime modules. * * Tests: - * test/e2e-scenario/framework-tests/e2e-redaction-entry.test.ts - * test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts - * test/e2e-scenario/framework-tests/e2e-phase-environment.test.ts + * test/e2e-scenario/support-tests/e2e-redaction-entry.test.ts + * test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts + * test/e2e-scenario/support-tests/e2e-phase-environment.test.ts * - canonical token redaction parity with product runtime patterns * - explicit per-test redaction values * - child-env allowlist filtering for framework probes @@ -38,12 +38,12 @@ const EXPLICIT_REDACTED = "[REDACTED]"; // framework deliberately does not import from src/lib/security/ so it // stays decoupled from product runtime modules and the cross-tsconfig // boundary. A parity test -// (test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts) +// (test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts) // asserts these regex sources stay in lockstep with the canonical // product source so adding a token shape there keeps both layers // honest at once. // Exported only so the parity test -// (test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts) can +// (test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts) can // import the actual RegExp values rather than parsing source text. // Production code in this module continues to use them via the local // binding; nothing in the framework runtime imports these. diff --git a/test/e2e-scenario/framework-tests/e2e-clients.test.ts b/test/e2e-scenario/support-tests/e2e-clients.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-clients.test.ts rename to test/e2e-scenario/support-tests/e2e-clients.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-expected-state.test.ts b/test/e2e-scenario/support-tests/e2e-expected-state.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-expected-state.test.ts rename to test/e2e-scenario/support-tests/e2e-expected-state.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts b/test/e2e-scenario/support-tests/e2e-fixture-context.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts rename to test/e2e-scenario/support-tests/e2e-fixture-context.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-live-project-config.test.ts b/test/e2e-scenario/support-tests/e2e-live-project-config.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-live-project-config.test.ts rename to test/e2e-scenario/support-tests/e2e-live-project-config.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-live-registry-discovery.test.ts b/test/e2e-scenario/support-tests/e2e-live-registry-discovery.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-live-registry-discovery.test.ts rename to test/e2e-scenario/support-tests/e2e-live-registry-discovery.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-live-skip-name-contract.test.ts b/test/e2e-scenario/support-tests/e2e-live-skip-name-contract.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-live-skip-name-contract.test.ts rename to test/e2e-scenario/support-tests/e2e-live-skip-name-contract.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-manifests.test.ts b/test/e2e-scenario/support-tests/e2e-manifests.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-manifests.test.ts rename to test/e2e-scenario/support-tests/e2e-manifests.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-migration-policy.test.ts b/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-migration-policy.test.ts rename to test/e2e-scenario/support-tests/e2e-migration-policy.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-migration-source-of-truth.test.ts b/test/e2e-scenario/support-tests/e2e-migration-source-of-truth.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-migration-source-of-truth.test.ts rename to test/e2e-scenario/support-tests/e2e-migration-source-of-truth.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-phase-environment.test.ts b/test/e2e-scenario/support-tests/e2e-phase-environment.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-phase-environment.test.ts rename to test/e2e-scenario/support-tests/e2e-phase-environment.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-phase-lifecycle.test.ts b/test/e2e-scenario/support-tests/e2e-phase-lifecycle.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-phase-lifecycle.test.ts rename to test/e2e-scenario/support-tests/e2e-phase-lifecycle.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-phase-onboarding.test.ts b/test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-phase-onboarding.test.ts rename to test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-phase-runtime.test.ts b/test/e2e-scenario/support-tests/e2e-phase-runtime.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-phase-runtime.test.ts rename to test/e2e-scenario/support-tests/e2e-phase-runtime.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-phase-state-validation.test.ts b/test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-phase-state-validation.test.ts rename to test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-redaction-entry.test.ts b/test/e2e-scenario/support-tests/e2e-redaction-entry.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-redaction-entry.test.ts rename to test/e2e-scenario/support-tests/e2e-redaction-entry.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts b/test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-redaction-parity.test.ts rename to test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-matrix.test.ts b/test/e2e-scenario/support-tests/e2e-scenario-matrix.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-scenario-matrix.test.ts rename to test/e2e-scenario/support-tests/e2e-scenario-matrix.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-scenario-registry.test.ts b/test/e2e-scenario/support-tests/e2e-scenario-registry.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-scenario-registry.test.ts rename to test/e2e-scenario/support-tests/e2e-scenario-registry.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts b/test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts similarity index 100% rename from test/e2e-scenario/framework-tests/e2e-scenarios-workflow.test.ts rename to test/e2e-scenario/support-tests/e2e-scenarios-workflow.test.ts diff --git a/test/e2e-scenario/framework-tests/e2e-shell-supervisor.test.ts b/test/e2e-scenario/support-tests/e2e-shell-supervisor.test.ts similarity index 99% rename from test/e2e-scenario/framework-tests/e2e-shell-supervisor.test.ts rename to test/e2e-scenario/support-tests/e2e-shell-supervisor.test.ts index 48ca8e3070..06e04ab1af 100644 --- a/test/e2e-scenario/framework-tests/e2e-shell-supervisor.test.ts +++ b/test/e2e-scenario/support-tests/e2e-shell-supervisor.test.ts @@ -13,7 +13,7 @@ * Both come from the leaf modules under framework/shell/, so the * assertions live here at the leaf level. The end-to-end behaviour * (orchestrator log redaction, fixture artifact persistence, probe - * outcome mapping) stays covered by the existing framework-tests + * outcome mapping) stays covered by the existing support-tests * (e2e-phase-orchestrators, e2e-fixture-context). */ diff --git a/tools/e2e-advisor/scenarios.mts b/tools/e2e-advisor/scenarios.mts index f78d0a38c3..e515612836 100755 --- a/tools/e2e-advisor/scenarios.mts +++ b/tools/e2e-advisor/scenarios.mts @@ -233,7 +233,7 @@ export function buildSystemPrompt(schema: AdvisorSchema): string { "- `test/e2e-scenario/scenarios/registry.ts` and `test/e2e-scenario/scenarios/scenarios/` — typed scenario IDs and metadata.", "- `test/e2e-scenario/scenarios/runtime-support.ts` — which typed scenarios are wired for live Vitest execution.", "- `test/e2e-scenario/live/registry-scenarios.test.ts` — live Vitest registry scenario entry point.", - "- `test/e2e-scenario/framework/` and `test/e2e-scenario/framework-tests/` — shared Vitest fixtures, clients, and phase helpers.", + "- `test/e2e-scenario/framework/` and `test/e2e-scenario/support-tests/` — shared Vitest fixtures, clients, and phase helpers.", "", "Decision policy:", "- Required (all scenarios): changes to scenario registry, matrix emission, expected-state metadata, live support classification, shared fixtures, or the Vitest scenario workflow itself. Recommend the `e2e-scenarios-all` fan-out through `e2e-vitest-scenarios.yaml`.", diff --git a/vitest.config.ts b/vitest.config.ts index a90bc4795e..f510748dcd 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -66,7 +66,7 @@ export default defineConfig({ // only harness; this project does not define a separate runner. name: "e2e-vitest-support", testTimeout: testTimeout(), - include: ["test/e2e-scenario/framework-tests/**/*.test.ts"], + include: ["test/e2e-scenario/support-tests/**/*.test.ts"], }, }, { From 10cda3b29caac33304a3d67f986cc2f78fdea668 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 04:49:03 -0700 Subject: [PATCH 09/17] test(e2e): migrate strict tool-call probe to Vitest --- .github/workflows/regression-e2e.yaml | 26 +- .../live/strict-tool-call-probe.test.ts | 491 ++++++++++++++++++ test/e2e/test-strict-tool-call-probe.sh | 377 -------------- test/regression-e2e-workflow.test.ts | 24 + 4 files changed, 533 insertions(+), 385 deletions(-) create mode 100644 test/e2e-scenario/live/strict-tool-call-probe.test.ts delete mode 100755 test/e2e/test-strict-tool-call-probe.sh diff --git a/.github/workflows/regression-e2e.yaml b/.github/workflows/regression-e2e.yaml index 400ded2bae..3185bee903 100644 --- a/.github/workflows/regression-e2e.yaml +++ b/.github/workflows/regression-e2e.yaml @@ -284,21 +284,31 @@ jobs: uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6 with: node-version: "22" + cache: npm - - name: Run strict tool-call probe E2E test + - name: Install root dependencies + run: npm ci --ignore-scripts + + - name: Run strict tool-call probe Vitest E2E test env: NEMOCLAW_TEST_NO_SLEEP: "1" - run: bash test/e2e/test-strict-tool-call-probe.sh + NEMOCLAW_RUN_E2E_SCENARIOS: "1" + E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/strict-tool-call-probe + run: | + set -euo pipefail + npx vitest run --project e2e-scenarios-live \ + test/e2e-scenario/live/strict-tool-call-probe.test.ts \ + --silent=false --reporter=default - - name: Upload strict tool-call probe logs on failure - if: failure() + - name: Upload strict tool-call probe artifacts + if: always() uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: - name: strict-tool-call-probe-logs - path: | - /tmp/nemoclaw-e2e-strict-tool-call-probe.log - /tmp/nemoclaw-e2e-strict-tool-call-probe-node.log + name: strict-tool-call-probe-artifacts + path: e2e-artifacts/vitest/strict-tool-call-probe/ + include-hidden-files: false if-no-files-found: ignore + retention-days: 14 # ── Gateway drift preflight E2E ───────────────────────────── # Coverage guard for #3399 / #3423. A stale OpenShell gateway image can diff --git a/test/e2e-scenario/live/strict-tool-call-probe.test.ts b/test/e2e-scenario/live/strict-tool-call-probe.test.ts new file mode 100644 index 0000000000..8f02c7d5f5 --- /dev/null +++ b/test/e2e-scenario/live/strict-tool-call-probe.test.ts @@ -0,0 +1,491 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import assert from "node:assert/strict"; +import { type ChildProcessByStdio, spawn, spawnSync } from "node:child_process"; +import fs from "node:fs"; +import fsp from "node:fs/promises"; +import { createRequire } from "node:module"; +import os from "node:os"; +import path from "node:path"; +import type { Readable } from "node:stream"; + +import type { ArtifactSink } from "../framework/artifacts.ts"; +import { expect, test } from "../framework/e2e-test.ts"; +import { shouldRunLiveE2EScenarios } from "../framework/live-project-gate.ts"; + +// Migrated from test/e2e/test-strict-tool-call-probe.sh. This hermetic +// regression guard for #4537 exercises the Local Ollama strict Chat +// Completions tool-call validation path against local OpenAI-compatible mocks. + +const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); +const BUILD_TIMEOUT_MS = 120_000; +const PROBE_TIMEOUT_MS = 60_000; +const requireFromHere = createRequire(import.meta.url); +const runStrictToolCallProbeTest = shouldRunLiveE2EScenarios() ? test : test.skip; + +type JsonObject = Record; +type ValidationHelpers = { + validateOpenAiLikeSelection: ( + label: string, + endpoint: string, + model: string, + credentialEnv: string | null, + recoveryPrompt: string, + apiKey: string | null, + options: unknown, + ) => Promise; +}; +type ValidationModule = { + createInferenceSelectionValidationHelpers: (options: { + isNonInteractive: () => boolean; + agentProductName: () => string; + promptValidationRecovery: (_label: string, recovery: unknown) => Promise; + }) => ValidationHelpers; +}; +type LocalInferenceModule = { + buildOllamaProbeOptions: (skipVerify: boolean) => { + skipResponsesProbe?: unknown; + requireChatCompletionsToolCalling?: unknown; + }; +}; +type BuiltModules = { + validation: ValidationModule; + localInference: LocalInferenceModule; +}; +type MockEndpoint = { + endpoint: string; + readRequests: () => Array<{ method: string; url: string; body: JsonObject }>; + stop: () => Promise; +}; + +let builtModules: BuiltModules | null = null; + +function requireDist(...parts: string[]): T { + return requireFromHere(path.join(REPO_ROOT, "dist", "lib", ...parts)) as T; +} + +function loadBuiltModules(): BuiltModules { + builtModules ??= { + validation: requireDist("onboard", "inference-selection-validation"), + localInference: requireDist("inference", "local"), + }; + return builtModules; +} + +function assertObject(value: unknown, label: string): JsonObject { + assert.equal(typeof value, "object", `${label} must be an object`); + assert.notEqual(value, null, `${label} must not be null`); + assert.equal(Array.isArray(value), false, `${label} must not be an array`); + return value as JsonObject; +} + +function assertStrictPayload(payload: JsonObject): void { + assert.equal(payload.model, "mock-tool-model"); + assert.equal(payload.tool_choice, "required"); + assert.equal(payload.max_tokens, 256); + assert.equal(payload.stream, false); + assert.equal(payload.temperature, 0); + assert.ok(Array.isArray(payload.messages), "messages must be present"); + assert.ok(Array.isArray(payload.tools), "tools must be present"); + const tools = payload.tools as unknown[]; + assert.ok( + tools.some((tool) => { + const toolObject = assertObject(tool, "tool"); + const functionObject = assertObject(toolObject.function, "tool.function"); + return functionObject.name === "sessions_send"; + }), + "sessions_send tool must be present", + ); +} + +function makeValidationHelpers(recoveryCalls: unknown[]): ValidationHelpers { + return loadBuiltModules().validation.createInferenceSelectionValidationHelpers({ + isNonInteractive: () => false, + agentProductName: () => "NemoClaw", + promptValidationRecovery: async (_label, recovery) => { + recoveryCalls.push(recovery); + return "retry"; + }, + }); +} + +function strictOllamaProbeOptions(): unknown { + const options = loadBuiltModules().localInference.buildOllamaProbeOptions(false); + assert.equal(options.skipResponsesProbe, true); + assert.equal(options.requireChatCompletionsToolCalling, true); + return options; +} + +async function validate(endpoint: string, recoveryCalls: unknown[] = []): Promise { + const helpers = makeValidationHelpers(recoveryCalls); + return helpers.validateOpenAiLikeSelection( + "Local Ollama", + endpoint, + "mock-tool-model", + null, + "Choose a different Ollama model or select Other.", + null, + strictOllamaProbeOptions(), + ); +} + +function serverSource(): string { + return String.raw` +const fs = require("node:fs"); +const http = require("node:http"); + +const mode = process.env.MOCK_MODE; +const requestsFile = process.env.REQUESTS_FILE; +let count = 0; + +function toolCallResponse() { + return { + choices: [ + { + message: { + role: "assistant", + content: "", + tool_calls: [ + { + type: "function", + function: { + name: "sessions_send", + arguments: JSON.stringify({ message: "hello" }), + }, + }, + ], + }, + }, + ], + }; +} + +function plainTextResponse() { + return { choices: [{ message: { role: "assistant", content: "OK" } }] }; +} + +function responseForRequest() { + if (mode === "success") return { status: 200, body: toolCallResponse() }; + if (mode === "transient-502") { + return count === 1 + ? { status: 502, body: { error: { message: "transient upstream failure" } } } + : { status: 200, body: toolCallResponse() }; + } + if (mode === "plain-text") return { status: 200, body: plainTextResponse() }; + return { status: 500, body: { error: { message: "unknown mock mode" } } }; +} + +const server = http.createServer((req, res) => { + const chunks = []; + req.on("data", (chunk) => chunks.push(Buffer.from(chunk))); + req.on("end", () => { + count += 1; + const rawBody = Buffer.concat(chunks).toString("utf8"); + let parsedBody = null; + try { + parsedBody = rawBody ? JSON.parse(rawBody) : null; + } catch (error) { + parsedBody = { parseError: error.message, rawBody }; + } + fs.appendFileSync( + requestsFile, + JSON.stringify({ count, method: req.method, url: req.url, body: parsedBody }) + "\n", + ); + const response = responseForRequest(); + res.writeHead(response.status, { "Content-Type": "application/json" }); + res.end(JSON.stringify(response.body)); + }); +}); + +server.listen(0, "127.0.0.1", () => { + process.stdout.write(JSON.stringify({ port: server.address().port }) + "\n"); +}); +process.on("SIGTERM", () => server.close(() => process.exit(0))); +`; +} + +async function waitForMockPort( + child: ChildProcessByStdio, + mode: string, + stderr: () => string, +): Promise { + return new Promise((resolve, reject) => { + let stdout = ""; + const timeout = setTimeout(() => { + reject(new Error(`mock ${mode} did not report a port; stderr=${stderr()}`)); + }, 5000); + child.on("error", (error) => { + clearTimeout(timeout); + reject(error); + }); + child.on("exit", (code) => { + clearTimeout(timeout); + reject( + new Error(`mock ${mode} exited before ready with ${String(code)}; stderr=${stderr()}`), + ); + }); + child.stdout.on("data", (chunk) => { + stdout += chunk.toString("utf8"); + const line = stdout.split(/\r?\n/).find(Boolean); + if (!line) return; + clearTimeout(timeout); + try { + resolve(JSON.parse(line).port as number); + } catch (error) { + reject(error); + } + }); + }); +} + +async function startMockEndpoint(mode: string): Promise { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), `nemoclaw-strict-probe-${mode}-`)); + const requestsFile = path.join(dir, "requests.jsonl"); + fs.writeFileSync(requestsFile, ""); + const child = spawn(process.execPath, ["-e", serverSource()], { + env: { ...process.env, MOCK_MODE: mode, REQUESTS_FILE: requestsFile }, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stderr = ""; + child.stderr.on("data", (chunk) => { + stderr += chunk.toString("utf8"); + }); + + const port = await waitForMockPort(child, mode, () => stderr); + + return { + endpoint: `http://127.0.0.1:${String(port)}/v1`, + readRequests() { + const raw = fs.readFileSync(requestsFile, "utf8").trim(); + return raw ? raw.split(/\r?\n/).map((line) => JSON.parse(line)) : []; + }, + async stop() { + if (child.exitCode === null) { + child.kill("SIGTERM"); + await new Promise((resolve) => child.once("exit", resolve)); + } + fs.rmSync(dir, { recursive: true, force: true }); + }, + }; +} + +async function withMockEndpoint( + artifacts: ArtifactSink, + mode: string, + label: string, + exercise: (endpoint: string, readRequests: MockEndpoint["readRequests"]) => Promise, +): Promise { + const mock = await startMockEndpoint(mode); + try { + await exercise(mock.endpoint, () => mock.readRequests()); + } finally { + await artifacts.writeJson(`requests/${label}.json`, mock.readRequests()).catch(() => undefined); + await mock.stop(); + } +} + +function onboardingCallerScript(): string { + return String.raw` +const assert = require("node:assert/strict"); +const path = require("node:path"); + +function fromDist(...parts) { + return require(path.join(process.cwd(), "dist", "lib", ...parts)); +} + +process.env.NEMOCLAW_NON_INTERACTIVE = "1"; +process.env.NEMOCLAW_PROVIDER = "ollama"; +process.env.NEMOCLAW_MODEL = "mock-tool-model"; +process.env.NEMOCLAW_TEST_NO_SLEEP = "1"; + +const runner = fromDist("runner"); +runner.run = () => ({ status: 0 }); +runner.runShell = () => ({ status: 0 }); +runner.runCapture = (command) => { + const cmd = Array.isArray(command) ? command.join(" ") : String(command); + if (cmd.includes("command -v") && cmd.includes("ollama")) return ""; + if (cmd.includes("/api/tags")) { + return JSON.stringify({ models: [{ name: "mock-tool-model" }] }); + } + if (cmd.includes("/api/show")) { + return JSON.stringify({ capabilities: ["completion", "tools"] }); + } + if (cmd.includes("/api/ps")) { + return JSON.stringify({ models: [{ name: "mock-tool-model", context_length: 4096 }] }); + } + if (cmd.includes("127.0.0.1:8000/v1/models")) return ""; + return ""; +}; +runner.runCaptureEx = (command) => { + const cmd = Array.isArray(command) ? command.join(" ") : String(command); + if (cmd.includes("/api/generate")) { + return { stdout: JSON.stringify({ response: "hello" }), stderr: "", exitCode: 0, timedOut: false }; + } + return { stdout: "", stderr: "", exitCode: 0, timedOut: false }; +}; + +fromDist("onboard", "ollama-systemd").ensureOllamaLoopbackSystemdOverride = () => "ready"; +fromDist("onboard", "local-inference-topology").shouldFrontOllamaWithProxy = () => false; + +const credentials = fromDist("credentials", "store"); +credentials.prompt = async (message) => { + throw new Error("Unexpected prompt during non-interactive Ollama onboarding: " + message); +}; +credentials.ensureApiKey = async () => { + throw new Error("Unexpected API key request during Local Ollama onboarding"); +}; + +const lines = []; +const originalLog = console.log; +const originalError = console.error; +console.log = (...args) => lines.push(args.join(" ")); +console.error = (...args) => lines.push(args.join(" ")); + +(async () => { + try { + const { setupNim } = fromDist("onboard"); + const result = await setupNim(null, null); + originalLog(JSON.stringify({ result, lines })); + } catch (error) { + originalError(lines.join("\n")); + originalError(error && error.stack ? error.stack : error); + process.exit(1); + } finally { + console.log = originalLog; + console.error = originalError; + } +})(); +`; +} + +function runOnboardingCallerAgainstMock(endpoint: string): void { + const port = new URL(endpoint).port; + const result = spawnSync(process.execPath, ["-e", onboardingCallerScript()], { + cwd: REPO_ROOT, + encoding: "utf8", + env: { ...process.env, NEMOCLAW_OLLAMA_PORT: port }, + timeout: 15_000, + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + const payload = JSON.parse(result.stdout.trim().split(/\r?\n/).pop() ?? "{}") as { + result?: { provider?: string; model?: string; preferredInferenceApi?: string }; + }; + assert.equal(payload.result?.provider, "ollama-local"); + assert.equal(payload.result?.model, "mock-tool-model"); + assert.equal(payload.result?.preferredInferenceApi, "openai-completions"); +} + +runStrictToolCallProbeTest( + "strict Chat Completions tool-call probe uses bounded payloads and fails closed", + { + timeout: BUILD_TIMEOUT_MS + PROBE_TIMEOUT_MS, + }, + async ({ artifacts, host }) => { + await artifacts.writeJson("scenario.json", { + id: "strict-tool-call-probe", + runner: "vitest", + boundary: "host-openai-compatible-mock", + migratedFrom: "test/e2e/test-strict-tool-call-probe.sh", + }); + + const previousEnv = { + NEMOCLAW_TEST_NO_SLEEP: process.env.NEMOCLAW_TEST_NO_SLEEP, + NO_PROXY: process.env.NO_PROXY, + no_proxy: process.env.no_proxy, + }; + process.env.NEMOCLAW_TEST_NO_SLEEP = "1"; + process.env.NO_PROXY = [process.env.NO_PROXY, "127.0.0.1", "localhost"] + .filter(Boolean) + .join(","); + process.env.no_proxy = [process.env.no_proxy, "127.0.0.1", "localhost"] + .filter(Boolean) + .join(","); + + try { + const build = await host.command("npm", ["run", "build:cli"], { + artifactName: "strict-tool-call-probe-build-cli", + cwd: REPO_ROOT, + inheritEnv: true, + timeoutMs: BUILD_TIMEOUT_MS, + }); + expect(build.exitCode, `build failed\n${build.stderr}`).toBe(0); + + await withMockEndpoint( + artifacts, + "success", + "strict-success", + async (endpoint, readRequests) => { + const result = await validate(endpoint); + expect(result).toEqual({ ok: true, api: "openai-completions" }); + const requests = readRequests(); + assert.equal(requests.length, 1); + assert.equal(requests[0].method, "POST"); + assert.equal(requests[0].url, "/v1/chat/completions"); + assertStrictPayload(requests[0].body); + }, + ); + + await withMockEndpoint( + artifacts, + "success", + "onboarding-caller", + async (endpoint, readRequests) => { + runOnboardingCallerAgainstMock(endpoint); + const requests = readRequests(); + assert.equal(requests.length, 1); + assert.equal(requests[0].method, "POST"); + assert.equal(requests[0].url, "/v1/chat/completions"); + assertStrictPayload(requests[0].body); + }, + ); + + await withMockEndpoint( + artifacts, + "transient-502", + "transient-502", + async (endpoint, readRequests) => { + const result = await validate(endpoint); + expect(result).toEqual({ ok: true, api: "openai-completions" }); + const requests = readRequests(); + assert.equal(requests.length, 2); + assertStrictPayload(requests[0].body); + assertStrictPayload(requests[1].body); + }, + ); + + await withMockEndpoint( + artifacts, + "plain-text", + "plain-text-fails-closed", + async (endpoint, readRequests) => { + const recoveryCalls: unknown[] = []; + const result = await validate(endpoint, recoveryCalls); + expect(result).toEqual({ ok: false, retry: "retry" }); + const requests = readRequests(); + assert.equal(requests.length, 1); + assertStrictPayload(requests[0].body); + assert.equal(recoveryCalls.length, 1); + }, + ); + } finally { + if (previousEnv.NEMOCLAW_TEST_NO_SLEEP === undefined) { + delete process.env.NEMOCLAW_TEST_NO_SLEEP; + } else { + process.env.NEMOCLAW_TEST_NO_SLEEP = previousEnv.NEMOCLAW_TEST_NO_SLEEP; + } + if (previousEnv.NO_PROXY === undefined) { + delete process.env.NO_PROXY; + } else { + process.env.NO_PROXY = previousEnv.NO_PROXY; + } + if (previousEnv.no_proxy === undefined) { + delete process.env.no_proxy; + } else { + process.env.no_proxy = previousEnv.no_proxy; + } + await fsp.rm(artifacts.pathFor("tmp"), { recursive: true, force: true }); + } + }, +); diff --git a/test/e2e/test-strict-tool-call-probe.sh b/test/e2e/test-strict-tool-call-probe.sh deleted file mode 100755 index 09c8cb8c71..0000000000 --- a/test/e2e/test-strict-tool-call-probe.sh +++ /dev/null @@ -1,377 +0,0 @@ -#!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Coverage guard for #4537. The Local Ollama onboarding path is the only -# current caller that requires strict Chat Completions tool calls. This -# hermetic E2E exercises that validation path against an OpenAI-compatible -# mock endpoint so payload-shape and retry regressions do not require a GPU -# Ollama runner to catch. - -set -euo pipefail - -LOG_FILE="/tmp/nemoclaw-e2e-strict-tool-call-probe.log" -exec > >(tee "$LOG_FILE") 2>&1 - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -pass() { echo -e "${GREEN}[PASS]${NC} $1"; } -info() { echo -e "${YELLOW}[INFO]${NC} $1"; } -diag() { echo -e "${YELLOW}[DIAG]${NC} $1"; } -fail() { - echo -e "${RED}[FAIL]${NC} $1" >&2 - diag "strict tool-call probe log tail:" - tail -120 "$LOG_FILE" 2>/dev/null || true - exit 1 -} - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -cd "$REPO_ROOT" - -info "Preparing CLI build" -if [ ! -d node_modules ]; then - npm ci --ignore-scripts -fi -npm run build:cli - -info "Running strict Chat Completions tool-call probe against a hermetic mock" -set +e -NEMOCLAW_TEST_NO_SLEEP=1 node <<'NODE' 2>&1 | tee /tmp/nemoclaw-e2e-strict-tool-call-probe-node.log -const assert = require("node:assert/strict"); -const { spawn, spawnSync } = require("node:child_process"); -const fs = require("node:fs"); -const os = require("node:os"); -const path = require("node:path"); - -process.env.NEMOCLAW_TEST_NO_SLEEP = "1"; -process.env.NO_PROXY = [process.env.NO_PROXY, "127.0.0.1", "localhost"].filter(Boolean).join(","); -process.env.no_proxy = [process.env.no_proxy, "127.0.0.1", "localhost"].filter(Boolean).join(","); - -const { - createInferenceSelectionValidationHelpers, -} = require("./dist/lib/onboard/inference-selection-validation"); -const localInference = require("./dist/lib/inference/local"); - -function assertStrictPayload(payload) { - assert.equal(payload.model, "mock-tool-model"); - assert.equal(payload.tool_choice, "required"); - assert.equal(payload.max_tokens, 256); - assert.equal(payload.stream, false); - assert.equal(payload.temperature, 0); - assert.ok(Array.isArray(payload.messages), "messages must be present"); - assert.ok(Array.isArray(payload.tools), "tools must be present"); - assert.ok( - payload.tools.some((tool) => tool?.function?.name === "sessions_send"), - "sessions_send tool must be present", - ); -} - -function makeValidationHelpers(recoveryCalls) { - return createInferenceSelectionValidationHelpers({ - isNonInteractive: () => false, - agentProductName: () => "NemoClaw", - promptValidationRecovery: async (_label, recovery) => { - recoveryCalls.push(recovery); - return "retry"; - }, - }); -} - -function strictOllamaProbeOptions() { - const options = localInference.buildOllamaProbeOptions(false); - assert.equal(options.skipResponsesProbe, true); - assert.equal(options.requireChatCompletionsToolCalling, true); - return options; -} - -async function validate(endpoint, recoveryCalls = []) { - const helpers = makeValidationHelpers(recoveryCalls); - return helpers.validateOpenAiLikeSelection( - "Local Ollama", - endpoint, - "mock-tool-model", - null, - "Choose a different Ollama model or select Other.", - null, - strictOllamaProbeOptions(), - ); -} - -function serverSource() { - return String.raw` -const fs = require("node:fs"); -const http = require("node:http"); - -const mode = process.env.MOCK_MODE; -const requestsFile = process.env.REQUESTS_FILE; -let count = 0; - -function toolCallResponse() { - return { - choices: [ - { - message: { - role: "assistant", - content: "", - tool_calls: [ - { - type: "function", - function: { - name: "sessions_send", - arguments: JSON.stringify({ message: "hello" }), - }, - }, - ], - }, - }, - ], - }; -} - -function plainTextResponse() { - return { choices: [{ message: { role: "assistant", content: "OK" } }] }; -} - -function responseForRequest() { - if (mode === "success") return { status: 200, body: toolCallResponse() }; - if (mode === "transient-502") { - return count === 1 - ? { status: 502, body: { error: { message: "transient upstream failure" } } } - : { status: 200, body: toolCallResponse() }; - } - if (mode === "plain-text") return { status: 200, body: plainTextResponse() }; - return { status: 500, body: { error: { message: "unknown mock mode" } } }; -} - -const server = http.createServer((req, res) => { - const chunks = []; - req.on("data", (chunk) => chunks.push(Buffer.from(chunk))); - req.on("end", () => { - count += 1; - const rawBody = Buffer.concat(chunks).toString("utf8"); - let parsedBody = null; - try { - parsedBody = rawBody ? JSON.parse(rawBody) : null; - } catch (error) { - parsedBody = { parseError: error.message, rawBody }; - } - fs.appendFileSync( - requestsFile, - JSON.stringify({ count, method: req.method, url: req.url, body: parsedBody }) + "\n", - ); - const response = responseForRequest(); - res.writeHead(response.status, { "Content-Type": "application/json" }); - res.end(JSON.stringify(response.body)); - }); -}); - -server.listen(0, "127.0.0.1", () => { - process.stdout.write(JSON.stringify({ port: server.address().port }) + "\n"); -}); -process.on("SIGTERM", () => server.close(() => process.exit(0))); -`; -} - -async function startMockEndpoint(mode) { - const dir = fs.mkdtempSync(path.join(os.tmpdir(), `nemoclaw-strict-probe-${mode}-`)); - const requestsFile = path.join(dir, "requests.jsonl"); - fs.writeFileSync(requestsFile, ""); - const child = spawn(process.execPath, ["-e", serverSource()], { - env: { ...process.env, MOCK_MODE: mode, REQUESTS_FILE: requestsFile }, - stdio: ["ignore", "pipe", "pipe"], - }); - - let stderr = ""; - child.stderr.on("data", (chunk) => { - stderr += chunk.toString("utf8"); - process.stderr.write(`[mock ${mode}] ${chunk}`); - }); - - const port = await new Promise((resolve, reject) => { - let stdout = ""; - const timeout = setTimeout(() => { - reject(new Error(`mock ${mode} did not report a port; stderr=${stderr}`)); - }, 5000); - child.on("exit", (code) => { - clearTimeout(timeout); - reject(new Error(`mock ${mode} exited before ready with ${code}; stderr=${stderr}`)); - }); - child.stdout.on("data", (chunk) => { - stdout += chunk.toString("utf8"); - const line = stdout.split(/\r?\n/).find(Boolean); - if (!line) return; - clearTimeout(timeout); - try { - resolve(JSON.parse(line).port); - } catch (error) { - reject(error); - } - }); - }); - - return { - endpoint: `http://127.0.0.1:${port}/v1`, - readRequests() { - const raw = fs.readFileSync(requestsFile, "utf8").trim(); - return raw ? raw.split(/\r?\n/).map((line) => JSON.parse(line)) : []; - }, - async stop() { - if (child.exitCode === null) { - child.kill("SIGTERM"); - await new Promise((resolve) => child.once("exit", resolve)); - } - fs.rmSync(dir, { recursive: true, force: true }); - }, - }; -} - -async function withMockEndpoint(mode, exercise) { - const mock = await startMockEndpoint(mode); - try { - await exercise(mock.endpoint, () => mock.readRequests()); - } finally { - await mock.stop(); - } -} - -function runOnboardingCallerAgainstMock(endpoint) { - const port = new URL(endpoint).port; - const childScript = String.raw` -const assert = require("node:assert/strict"); - -process.env.NEMOCLAW_NON_INTERACTIVE = "1"; -process.env.NEMOCLAW_PROVIDER = "ollama"; -process.env.NEMOCLAW_MODEL = "mock-tool-model"; -process.env.NEMOCLAW_TEST_NO_SLEEP = "1"; - -const runner = require("./dist/lib/runner"); -runner.run = () => ({ status: 0 }); -runner.runShell = () => ({ status: 0 }); -runner.runCapture = (command) => { - const cmd = Array.isArray(command) ? command.join(" ") : String(command); - if (cmd.includes("command -v") && cmd.includes("ollama")) return ""; - if (cmd.includes("/api/tags")) { - return JSON.stringify({ models: [{ name: "mock-tool-model" }] }); - } - if (cmd.includes("/api/show")) { - return JSON.stringify({ capabilities: ["completion", "tools"] }); - } - if (cmd.includes("/api/ps")) { - return JSON.stringify({ models: [{ name: "mock-tool-model", context_length: 4096 }] }); - } - if (cmd.includes("127.0.0.1:8000/v1/models")) return ""; - return ""; -}; -runner.runCaptureEx = (command) => { - const cmd = Array.isArray(command) ? command.join(" ") : String(command); - if (cmd.includes("/api/generate")) { - return { stdout: JSON.stringify({ response: "hello" }), stderr: "", exitCode: 0, timedOut: false }; - } - return { stdout: "", stderr: "", exitCode: 0, timedOut: false }; -}; - -require("./dist/lib/onboard/ollama-systemd").ensureOllamaLoopbackSystemdOverride = () => "ready"; -require("./dist/lib/onboard/local-inference-topology").shouldFrontOllamaWithProxy = () => false; - -const credentials = require("./dist/lib/credentials/store"); -credentials.prompt = async (message) => { - throw new Error("Unexpected prompt during non-interactive Ollama onboarding: " + message); -}; -credentials.ensureApiKey = async () => { - throw new Error("Unexpected API key request during Local Ollama onboarding"); -}; - -const lines = []; -const originalLog = console.log; -const originalError = console.error; -console.log = (...args) => lines.push(args.join(" ")); -console.error = (...args) => lines.push(args.join(" ")); - -(async () => { - try { - const { setupNim } = require("./dist/lib/onboard"); - const result = await setupNim(null, null); - originalLog(JSON.stringify({ result, lines })); - } catch (error) { - originalError(lines.join("\n")); - originalError(error && error.stack ? error.stack : error); - process.exit(1); - } finally { - console.log = originalLog; - console.error = originalError; - } -})(); -`; - - const result = spawnSync(process.execPath, ["-e", childScript], { - cwd: process.cwd(), - encoding: "utf8", - env: { ...process.env, NEMOCLAW_OLLAMA_PORT: port }, - timeout: 15000, - }); - assert.equal(result.status, 0, result.stderr || result.stdout); - const payload = JSON.parse(result.stdout.trim().split(/\r?\n/).pop()); - assert.equal(payload.result.provider, "ollama-local"); - assert.equal(payload.result.model, "mock-tool-model"); - assert.equal(payload.result.preferredInferenceApi, "openai-completions"); -} - -(async () => { - await withMockEndpoint("success", async (endpoint, readRequests) => { - const result = await validate(endpoint); - assert.deepEqual(result, { ok: true, api: "openai-completions" }); - const requests = readRequests(); - assert.equal(requests.length, 1); - assert.equal(requests[0].method, "POST"); - assert.equal(requests[0].url, "/v1/chat/completions"); - assertStrictPayload(requests[0].body); - console.log("[PASS] strict validation succeeds with structured tool_calls"); - }); - - await withMockEndpoint("success", async (endpoint, readRequests) => { - runOnboardingCallerAgainstMock(endpoint); - const requests = readRequests(); - assert.equal(requests.length, 1); - assert.equal(requests[0].method, "POST"); - assert.equal(requests[0].url, "/v1/chat/completions"); - assertStrictPayload(requests[0].body); - console.log("[PASS] Local Ollama onboarding caller enforces strict Chat Completions validation"); - }); - - await withMockEndpoint("transient-502", async (endpoint, readRequests) => { - const result = await validate(endpoint); - assert.deepEqual(result, { ok: true, api: "openai-completions" }); - const requests = readRequests(); - assert.equal(requests.length, 2); - assertStrictPayload(requests[0].body); - assertStrictPayload(requests[1].body); - console.log("[PASS] strict validation retries a transient 502 and keeps bounded payloads"); - }); - - await withMockEndpoint("plain-text", async (endpoint, readRequests) => { - const recoveryCalls = []; - const result = await validate(endpoint, recoveryCalls); - assert.deepEqual(result, { ok: false, retry: "retry" }); - const requests = readRequests(); - assert.equal(requests.length, 1); - assertStrictPayload(requests[0].body); - assert.equal(recoveryCalls.length, 1); - console.log("[PASS] strict validation fails closed when no structured tool_call is returned"); - }); -})().catch((error) => { - console.error(error && error.stack ? error.stack : error); - process.exit(1); -}); -NODE -NODE_EXIT=$? -set -e - -if [ "$NODE_EXIT" -ne 0 ]; then - fail "strict Chat Completions tool-call probe harness failed" -fi - -pass "strict Chat Completions tool-call probe E2E passed" diff --git a/test/regression-e2e-workflow.test.ts b/test/regression-e2e-workflow.test.ts index 8a932973d2..1cfc066d9d 100644 --- a/test/regression-e2e-workflow.test.ts +++ b/test/regression-e2e-workflow.test.ts @@ -36,4 +36,28 @@ describe("Regression E2E workflow contract", () => { expect(selectorScript).not.toContain("docker-unreachable-gateway-start-e2e"); expect(selectorScript).not.toContain("docker_unreachable_gateway_start"); }); + + it("runs strict tool-call probe through Vitest artifacts", () => { + const job = workflow.jobs?.["strict-tool-call-probe-e2e"]; + const checkoutStep = job?.steps?.find((step) => + String(step.uses ?? "").startsWith("actions/checkout@"), + ); + const runStep = job?.steps?.find( + (step) => step.name === "Run strict tool-call probe Vitest E2E test", + ); + const uploadStep = job?.steps?.find( + (step) => step.name === "Upload strict tool-call probe artifacts", + ); + + expect(checkoutStep?.with?.["persist-credentials"]).toBe(false); + expect(runStep?.run).toContain("npx vitest run --project e2e-scenarios-live"); + expect(runStep?.run).toContain("test/e2e-scenario/live/strict-tool-call-probe.test.ts"); + expect(runStep?.run).not.toContain("test/e2e/test-strict-tool-call-probe.sh"); + expect(runStep?.env?.NEMOCLAW_RUN_E2E_SCENARIOS).toBe("1"); + expect(runStep?.env?.E2E_ARTIFACT_DIR).toBe( + "${{ github.workspace }}/e2e-artifacts/vitest/strict-tool-call-probe", + ); + expect(uploadStep?.with?.path).toBe("e2e-artifacts/vitest/strict-tool-call-probe/"); + expect(uploadStep?.with?.["include-hidden-files"]).toBe(false); + }); }); From f9422bcf885a20489284a84e2d4d7f88973ac0cd Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 07:46:45 -0700 Subject: [PATCH 10/17] test(e2e): guard legacy shell deletion evidence --- test/e2e-scenario/docs/MIGRATION.md | 14 ++- test/e2e-scenario/docs/README.md | 5 +- .../e2e-migration-policy.test.ts | 2 + test/pr-review-advisor.test.ts | 81 +++++++++++- tools/pr-review-advisor/analyze.mts | 116 +++++++++++++++++- 5 files changed, 208 insertions(+), 10 deletions(-) diff --git a/test/e2e-scenario/docs/MIGRATION.md b/test/e2e-scenario/docs/MIGRATION.md index 28b9c01798..e1d4e8a0e5 100644 --- a/test/e2e-scenario/docs/MIGRATION.md +++ b/test/e2e-scenario/docs/MIGRATION.md @@ -64,9 +64,17 @@ The useful deletion invariant is smaller: > A PR that deletes a legacy E2E script must show the replacement Vitest > coverage or explain the retirement rationale. -Record that evidence in the PR body and linked issue. The evidence should name -the legacy contract, the replacement Vitest coverage, any intentionally retired -behavior, and the verification that preserves fidelity. +Record that evidence in the PR body and linked issue. For each deleted script, +include a `Legacy E2E deletion evidence` block with: + +- `Script:` the deleted `test/e2e/test-*.sh` path. +- `Legacy contract:` the observable behavior the shell script protected. +- `Replacement Vitest coverage:` a `.test.ts` path, or `Retirement rationale:` + when the behavior is intentionally retired instead of replaced. +- `Intentionally retired behavior:` any assertions, probes, or workflow hooks + that are deliberately not preserved. +- `Fidelity verification:` the command, CI check, or review evidence proving the + Vitest coverage keeps the same contract value. ## Migration Pattern diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index 529daea534..cd1d4295e9 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -121,8 +121,9 @@ GitHub issues and PRs own changing migration status. The key issues are: The former repo-local `legacy-inventory.json` ledger and generated legacy assertion inventories are removed because they duplicated live GitHub state and drifted quickly. A PR that deletes a legacy E2E script must show the replacement -Vitest coverage or explain the retirement rationale in the PR body and linked -issue. +Vitest coverage or explain the retirement rationale in a per-script PR-body +`Legacy E2E deletion evidence` block, with the legacy contract, intentionally +retired behavior, and fidelity verification kept next to the deletion. Prefer new E2E coverage in Vitest fixtures. When shell, installer, process, platform, or full user-flow behavior is the contract, invoke that real boundary diff --git a/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts b/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts index 6eb28553c4..def7f1bd54 100644 --- a/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts +++ b/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts @@ -37,6 +37,8 @@ describe("E2E migration tracking policy", () => { expect(docs).toContain("source of truth"); expect(docs).toContain("replacement Vitest coverage"); expect(docs).toContain("retirement rationale"); + expect(docs).toContain("Legacy E2E deletion evidence"); + expect(docs).toContain("Fidelity verification"); expect(docs).toContain("generated legacy assertion inventories"); expect(docs).toContain("not a separate E2E framework or runner"); }); diff --git a/test/pr-review-advisor.test.ts b/test/pr-review-advisor.test.ts index 129db6ba74..86413efbac 100644 --- a/test/pr-review-advisor.test.ts +++ b/test/pr-review-advisor.test.ts @@ -5,21 +5,22 @@ import fs from "node:fs"; import path from "node:path"; import Ajv2020 from "ajv/dist/2020.js"; import { afterEach, describe, expect, it, vi } from "vitest"; - -import { buildComment } from "../tools/pr-review-advisor/comment.mts"; +import { githubGraphql } from "../tools/advisors/github.mts"; import { + assessLegacyE2eShellDeletionEvidence, buildPromptTurns, buildSystemPrompt, classifyMonolithDelta, classifyTestDepth, detectLocalizedPatchSignals, + findDeletedLegacyE2eShellScripts, normalizeReviewResult, readTrustedSecurityReviewSkill, renderDetailedReview, renderSummary, writePromptArtifacts, } from "../tools/pr-review-advisor/analyze.mts"; -import { githubGraphql } from "../tools/advisors/github.mts"; +import { buildComment } from "../tools/pr-review-advisor/comment.mts"; import { validatePrReviewAdvisorWorkflowBoundary } from "../tools/pr-review-advisor/workflow-boundary.mts"; const ROOT = path.resolve(import.meta.dirname, ".."); @@ -39,6 +40,7 @@ function metadata(overrides: Partial = {}): ReviewMetadata { previousAdvisorReview: null, workflowSignals: [], localizedPatchSignals: [], + legacyE2eShellDeletionEvidence: [], monolithDeltas: [], driftEvidence: [], github: null, @@ -223,6 +225,8 @@ describe("PR review advisor", () => { expect(prompt).toContain( "Any sourceOfTruthReview item with status=missing or status=needs_followup must also be represented as a finding", ); + expect(prompt).toContain("Legacy E2E deletion governance"); + expect(prompt).toContain("replacement Vitest coverage path or retirement rationale"); expect(prompt).toContain("multi-turn conversation"); expect(prompt).toContain( "In the final synthesis turn, return JSON only matching the schema provided in that turn", @@ -350,6 +354,77 @@ describe("PR review advisor", () => { expect(signals[0]?.reviewRule).toContain("invalid state"); }); + it("detects deleted legacy E2E shell scripts and complete PR-body evidence", () => { + const diff = `diff --git a/test/e2e/test-example.sh b/test/e2e/test-example.sh +deleted file mode 100755 +index 1234567..0000000 +--- a/test/e2e/test-example.sh ++++ /dev/null +@@ -1,2 +0,0 @@ +-#!/usr/bin/env bash +-echo ok +`; + const prBody = ` +## Legacy E2E deletion evidence + +- Script: \`test/e2e/test-example.sh\` + - Legacy contract: validates the example CLI path against a real shell. + - Replacement Vitest coverage: \`test/e2e-scenario/live/example.test.ts\` + - Intentionally retired behavior: none. + - Fidelity verification: \`npx vitest run --project e2e-scenarios-live test/e2e-scenario/live/example.test.ts\` +`; + + expect(findDeletedLegacyE2eShellScripts(diff)).toEqual(["test/e2e/test-example.sh"]); + expect(assessLegacyE2eShellDeletionEvidence(diff, prBody)).toEqual([ + expect.objectContaining({ + script: "test/e2e/test-example.sh", + hasScriptEvidenceBlock: true, + hasLegacyContract: true, + hasReplacementVitestCoverage: true, + hasRetirementRationale: false, + hasIntentionallyRetiredBehavior: true, + hasFidelityVerification: true, + missing: [], + }), + ]); + }); + + it("adds a blocker finding when a legacy E2E deletion lacks PR-body evidence", () => { + const diff = `diff --git a/test/e2e/test-example.sh b/test/e2e/test-example.sh +deleted file mode 100755 +--- a/test/e2e/test-example.sh ++++ /dev/null +@@ -1 +0,0 @@ +-echo ok +`; + const deletionEvidence = assessLegacyE2eShellDeletionEvidence( + diff, + "- Script: `test/e2e/test-example.sh`\n - Legacy contract: validates the example CLI path.\n", + ); + const result = normalizeReviewResult( + validResult({ findings: [], sourceOfTruthReview: [] }), + metadata({ + deterministic: { + ...metadata().deterministic, + legacyE2eShellDeletionEvidence: deletionEvidence, + }, + }), + ); + + expect(deletionEvidence[0]?.missing).toEqual([ + "replacement Vitest coverage path or retirement rationale", + "intentionally retired behavior", + "fidelity verification", + ]); + expect(result.findings[0]).toMatchObject({ + severity: "blocker", + category: "tests", + file: "test/e2e/test-example.sh", + title: "Legacy E2E deletion evidence is missing", + }); + expect(result.findings[0]?.evidence).toContain("replacement Vitest coverage path"); + }); + it("adds a finding when source-of-truth review is missing follow-up", () => { const result = normalizeReviewResult( validResult({ diff --git a/tools/pr-review-advisor/analyze.mts b/tools/pr-review-advisor/analyze.mts index 7c1388d151..57c0b7d26a 100755 --- a/tools/pr-review-advisor/analyze.mts +++ b/tools/pr-review-advisor/analyze.mts @@ -163,6 +163,7 @@ type DeterministicReviewContext = { testDepth: ReviewAdvisorResult["testDepth"]; workflowSignals: string[]; localizedPatchSignals: LocalizedPatchSignal[]; + legacyE2eShellDeletionEvidence: LegacyE2eShellDeletionEvidence[]; monolithDeltas: MonolithDelta[]; driftEvidence: DriftEvidence[]; previousAdvisorReview: PreviousAdvisorReview | null; @@ -194,6 +195,17 @@ type DriftEvidence = { renameHints: string[]; }; +type LegacyE2eShellDeletionEvidence = { + script: string; + hasScriptEvidenceBlock: boolean; + hasLegacyContract: boolean; + hasReplacementVitestCoverage: boolean; + hasRetirementRationale: boolean; + hasIntentionallyRetiredBehavior: boolean; + hasFidelityVerification: boolean; + missing: string[]; +}; + type OpenPrOverlap = { number: number; title: string; @@ -349,6 +361,10 @@ async function collectDeterministicContext(options: { const github = await collectGitHubContext(); const riskyAreas = detectRiskyAreas(options.changedFiles); const testDepth = classifyTestDepth(options.changedFiles, options.diff); + const legacyE2eShellDeletionEvidence = assessLegacyE2eShellDeletionEvidence( + options.diff, + pullRequestBodyText(github?.pullRequest), + ); return { diffStat: getDiffStat(options.baseRef, options.headRef), commits: getCommits(options.baseRef, options.headRef), @@ -357,6 +373,7 @@ async function collectDeterministicContext(options: { previousAdvisorReview: github?.previousAdvisorReview || null, workflowSignals: detectWorkflowSignals(options.changedFiles, options.diff), localizedPatchSignals: detectLocalizedPatchSignals(options.diff), + legacyE2eShellDeletionEvidence, monolithDeltas: computeMonolithDeltas(options.baseRef, options.changedFiles), driftEvidence: collectDriftEvidence(options.baseRef, options.changedFiles), github, @@ -443,6 +460,74 @@ function detectWorkflowSignals(changedFiles: string[], diff: string): string[] { return signals; } +export function findDeletedLegacyE2eShellScripts(diff: string): string[] { + const scripts = new Set(); + for (const block of diff.split(/\ndiff --git /)) { + const header = block.startsWith("diff --git ") ? block : `diff --git ${block}`; + const match = header.match(/^diff --git a\/(test\/e2e\/test-[^\s]+\.sh) b\/\1/m); + if (!match?.[1]) continue; + if (/^deleted file mode\b/m.test(header) || /^\+\+\+ \/dev\/null$/m.test(header)) { + scripts.add(match[1]); + } + } + return [...scripts].sort(); +} + +export function assessLegacyE2eShellDeletionEvidence( + diff: string, + prBody: string, +): LegacyE2eShellDeletionEvidence[] { + return findDeletedLegacyE2eShellScripts(diff).map((script) => { + const evidenceBlock = findDeletionEvidenceBlock(prBody, script); + const hasLegacyContract = /\blegacy contract\s*:/i.test(evidenceBlock); + const hasReplacementVitestCoverage = + /\breplacement vitest coverage\s*:/i.test(evidenceBlock) && + /\b(?:test|nemoclaw\/src)\/[^\s`)"']+\.test\.ts\b/.test(evidenceBlock); + const hasRetirementRationale = /\bretirement rationale\s*:/i.test(evidenceBlock); + const hasIntentionallyRetiredBehavior = /\bintentionally retired behavior\s*:/i.test(evidenceBlock); + const hasFidelityVerification = /\bfidelity verification\s*:/i.test(evidenceBlock); + const missing = [ + [evidenceBlock ? "" : "script evidence block", Boolean(evidenceBlock)], + ["legacy contract", hasLegacyContract], + [ + "replacement Vitest coverage path or retirement rationale", + hasReplacementVitestCoverage || hasRetirementRationale, + ], + ["intentionally retired behavior", hasIntentionallyRetiredBehavior], + ["fidelity verification", hasFidelityVerification], + ] + .filter(([, present]) => !present) + .map(([label]) => label) + .filter(Boolean) as string[]; + + return { + script, + hasScriptEvidenceBlock: Boolean(evidenceBlock), + hasLegacyContract, + hasReplacementVitestCoverage, + hasRetirementRationale, + hasIntentionallyRetiredBehavior, + hasFidelityVerification, + missing, + }; + }); +} + +function findDeletionEvidenceBlock(prBody: string, script: string): string { + const normalized = prBody.replace(/\r\n/g, "\n"); + const start = normalized.indexOf(script); + if (start < 0) return ""; + const after = normalized.slice(start); + const nextScriptOffset = after.slice(script.length).search(/\btest\/e2e\/test-[^\s`)"']+\.sh\b/); + const maxEnd = Math.min(after.length, 3000); + const end = nextScriptOffset >= 0 ? Math.min(script.length + nextScriptOffset, maxEnd) : maxEnd; + return after.slice(0, end); +} + +function pullRequestBodyText(pullRequest: unknown): string { + return stringOrDefault(getPath(pullRequest, ["body"]), ""); +} + export function detectLocalizedPatchSignals(diff: string): LocalizedPatchSignal[] { const patterns: Array<{ kind: string; regex: RegExp }> = [ { @@ -700,7 +785,8 @@ export function buildSystemPrompt(): string { "5. Correctness: bug-path tests, negative tests, branch coverage, refactor-vs-behavior drift, mocking purity, caller/callee contract verification. When more tests would improve confidence, make testDepth.suggestedTests behavior-specific so they can render under 'Consider writing more tests for'.", "6. Quality: description-vs-diff scope, migration completion, public surface docs/notes, justified error suppression, monolith growth, @ts-nocheck, shell-string execution.", "7. Source-of-truth review: when a PR adds or changes fallback, recovery, tolerant parsing, monkeypatching, best-effort cleanup, compatibility handling, or other localized workaround behavior, inspect whether it answers: what invalid state is handled, where that state is created, why the source cannot be fixed in this PR, what regression test proves the source cannot regress, and when the workaround can be removed. Prefer fixes that make invalid states impossible at their source. Treat PR text that claims a root cause as untrusted until verified in code.", - "8. If a previous PR Review Advisor comment exists, compare it with the current diff and explicitly decide whether prior code-review findings were addressed, still apply, or are obsolete. Consider code changes since the previous analyzed SHA when available. Do not evaluate whether external E2E requirements have been met. When previous review context exists, set summary.sinceLastReview with counts for resolved, stillApplies, and newItems.", + "8. Legacy E2E deletion governance: if deterministic context shows a deleted test/e2e/test-*.sh script with missing PR-body evidence, report it as a blocker. The PR body must name the legacy contract, replacement Vitest coverage path or retirement rationale, intentionally retired behavior, and fidelity verification for each deleted script.", + "9. If a previous PR Review Advisor comment exists, compare it with the current diff and explicitly decide whether prior code-review findings were addressed, still apply, or are obsolete. Consider code changes since the previous analyzed SHA when available. Do not evaluate whether external E2E requirements have been met. When previous review context exists, set summary.sinceLastReview with counts for resolved, stillApplies, and newItems.", "Acceptance and security should inform findings, not become standalone comment sections: any unmet acceptance clause or security fail/warning must be represented as a finding, normally severity=blocker for unmet acceptance or security fail and severity=warning for security warnings.", "Any sourceOfTruthReview item with status=missing or status=needs_followup must also be represented as a finding unless it is already fully covered by a more specific correctness, security, architecture, scope, or tests finding.", "Set summary.topItem to the most important actionable finding title or short description for first-review comments. Keep it concise and code-focused.", @@ -806,6 +892,7 @@ function buildValidationTurnContext(context: DeterministicReviewContext): Record return { testDepth: context.testDepth, localizedPatchSignals: context.localizedPatchSignals, + legacyE2eShellDeletionEvidence: context.legacyE2eShellDeletionEvidence, previousAdvisorReview: context.previousAdvisorReview, pullRequest: context.github?.pullRequest ?? null, linkedIssues: context.github?.linkedIssues ?? [], @@ -853,6 +940,10 @@ export function normalizeReviewResult(result: unknown, metadata: ReviewMetadata) if (!isRecord(result)) throw new Error("PR review advisor returned a non-object result"); const object = result as Record; const sourceOfTruthReview = sanitizeSourceOfTruthReview(object.sourceOfTruthReview); + const findings = addDeterministicFindings( + addSourceOfTruthFindings(sanitizeFindings(object.findings), sourceOfTruthReview), + metadata, + ); return { version: 1, baseRef: metadata.baseRef, @@ -860,7 +951,7 @@ export function normalizeReviewResult(result: unknown, metadata: ReviewMetadata) headSha: metadata.headSha, changedFiles: metadata.changedFiles, summary: sanitizeSummary(object.summary), - findings: addSourceOfTruthFindings(sanitizeFindings(object.findings), sourceOfTruthReview), + findings, acceptanceCoverage: sanitizeAcceptanceCoverage(object.acceptanceCoverage), securityCategories: sanitizeSecurityCategories(object.securityCategories), sourceOfTruthReview, @@ -965,6 +1056,27 @@ function addSourceOfTruthFindings(findings: Finding[], sourceOfTruthReview: Sour return [...injected, ...findings.slice(0, originalSlots)]; } +function addDeterministicFindings(findings: Finding[], metadata: ReviewMetadata): Finding[] { + const deletionEvidence = metadata.deterministic.legacyE2eShellDeletionEvidence ?? []; + const injected: Finding[] = []; + for (const evidence of deletionEvidence) { + if (evidence.missing.length === 0) continue; + injected.push({ + severity: "blocker", + category: "tests", + file: evidence.script, + line: null, + title: "Legacy E2E deletion evidence is missing", + description: `This PR deletes ${evidence.script} without complete PR-body evidence that preserves or retires the legacy contract.`, + recommendation: + "Add a per-script PR-body evidence block naming the legacy contract, replacement Vitest coverage path or retirement rationale, intentionally retired behavior, and fidelity verification.", + evidence: `Missing: ${evidence.missing.join(", ")}.`, + }); + } + const originalSlots = Math.max(0, 50 - injected.length); + return [...injected, ...findings.slice(0, originalSlots)]; +} + function sanitizeTestDepth(value: unknown, fallback: ReviewAdvisorResult["testDepth"]): ReviewAdvisorResult["testDepth"] { const object = isRecord(value) ? value : {}; return { From debe9586b9ee4f29f03fc1dc0097eb77f00dade4 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 08:00:43 -0700 Subject: [PATCH 11/17] test(e2e): validate legacy deletion replacement paths --- test/e2e-scenario/docs/MIGRATION.md | 9 +++-- test/e2e-scenario/docs/README.md | 3 +- .../e2e-migration-policy.test.ts | 2 + test/pr-review-advisor.test.ts | 37 ++++++++++++++++--- tools/pr-review-advisor/analyze.mts | 25 ++++++++++--- 5 files changed, 60 insertions(+), 16 deletions(-) diff --git a/test/e2e-scenario/docs/MIGRATION.md b/test/e2e-scenario/docs/MIGRATION.md index e1d4e8a0e5..d97f391499 100644 --- a/test/e2e-scenario/docs/MIGRATION.md +++ b/test/e2e-scenario/docs/MIGRATION.md @@ -64,13 +64,14 @@ The useful deletion invariant is smaller: > A PR that deletes a legacy E2E script must show the replacement Vitest > coverage or explain the retirement rationale. -Record that evidence in the PR body and linked issue. For each deleted script, -include a `Legacy E2E deletion evidence` block with: +Record that evidence in the PR body, which is the machine-checkable boundary for +the deletion. Link or summarize that PR evidence from the issue when useful. For +each deleted script, include a `Legacy E2E deletion evidence` block with: - `Script:` the deleted `test/e2e/test-*.sh` path. - `Legacy contract:` the observable behavior the shell script protected. -- `Replacement Vitest coverage:` a `.test.ts` path, or `Retirement rationale:` - when the behavior is intentionally retired instead of replaced. +- `Replacement Vitest coverage:` an existing `.test.ts` path, or `Retirement + rationale:` when the behavior is intentionally retired instead of replaced. - `Intentionally retired behavior:` any assertions, probes, or workflow hooks that are deliberately not preserved. - `Fidelity verification:` the command, CI check, or review evidence proving the diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index cd1d4295e9..a424b91f69 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -123,7 +123,8 @@ assertion inventories are removed because they duplicated live GitHub state and drifted quickly. A PR that deletes a legacy E2E script must show the replacement Vitest coverage or explain the retirement rationale in a per-script PR-body `Legacy E2E deletion evidence` block, with the legacy contract, intentionally -retired behavior, and fidelity verification kept next to the deletion. +retired behavior, and fidelity verification kept next to the deletion. The +replacement coverage path must point at an existing `.test.ts` file. Prefer new E2E coverage in Vitest fixtures. When shell, installer, process, platform, or full user-flow behavior is the contract, invoke that real boundary diff --git a/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts b/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts index def7f1bd54..0c5ffe1c9b 100644 --- a/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts +++ b/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts @@ -39,6 +39,8 @@ describe("E2E migration tracking policy", () => { expect(docs).toContain("retirement rationale"); expect(docs).toContain("Legacy E2E deletion evidence"); expect(docs).toContain("Fidelity verification"); + expect(docs).toContain("machine-checkable boundary"); + expect(docs).toContain("existing `.test.ts` file"); expect(docs).toContain("generated legacy assertion inventories"); expect(docs).toContain("not a separate E2E framework or runner"); }); diff --git a/test/pr-review-advisor.test.ts b/test/pr-review-advisor.test.ts index 86413efbac..d7f7694dc1 100644 --- a/test/pr-review-advisor.test.ts +++ b/test/pr-review-advisor.test.ts @@ -226,7 +226,7 @@ describe("PR review advisor", () => { "Any sourceOfTruthReview item with status=missing or status=needs_followup must also be represented as a finding", ); expect(prompt).toContain("Legacy E2E deletion governance"); - expect(prompt).toContain("replacement Vitest coverage path or retirement rationale"); + expect(prompt).toContain("existing replacement Vitest coverage path or retirement rationale"); expect(prompt).toContain("multi-turn conversation"); expect(prompt).toContain( "In the final synthesis turn, return JSON only matching the schema provided in that turn", @@ -369,9 +369,9 @@ index 1234567..0000000 - Script: \`test/e2e/test-example.sh\` - Legacy contract: validates the example CLI path against a real shell. - - Replacement Vitest coverage: \`test/e2e-scenario/live/example.test.ts\` + - Replacement Vitest coverage: \`test/e2e-scenario/live/openshell-version-pin.test.ts\` - Intentionally retired behavior: none. - - Fidelity verification: \`npx vitest run --project e2e-scenarios-live test/e2e-scenario/live/example.test.ts\` + - Fidelity verification: \`npx vitest run --project e2e-scenarios-live test/e2e-scenario/live/openshell-version-pin.test.ts\` `; expect(findDeletedLegacyE2eShellScripts(diff)).toEqual(["test/e2e/test-example.sh"]); @@ -381,6 +381,7 @@ index 1234567..0000000 hasScriptEvidenceBlock: true, hasLegacyContract: true, hasReplacementVitestCoverage: true, + replacementVitestCoveragePath: "test/e2e-scenario/live/openshell-version-pin.test.ts", hasRetirementRationale: false, hasIntentionallyRetiredBehavior: true, hasFidelityVerification: true, @@ -412,7 +413,7 @@ deleted file mode 100755 ); expect(deletionEvidence[0]?.missing).toEqual([ - "replacement Vitest coverage path or retirement rationale", + "existing replacement Vitest coverage path or retirement rationale", "intentionally retired behavior", "fidelity verification", ]); @@ -422,7 +423,33 @@ deleted file mode 100755 file: "test/e2e/test-example.sh", title: "Legacy E2E deletion evidence is missing", }); - expect(result.findings[0]?.evidence).toContain("replacement Vitest coverage path"); + expect(result.findings[0]?.evidence).toContain("existing replacement Vitest coverage path"); + }); + + it("rejects replacement Vitest paths that do not exist in the checkout", () => { + const diff = `diff --git a/test/e2e/test-example.sh b/test/e2e/test-example.sh +deleted file mode 100755 +--- a/test/e2e/test-example.sh ++++ /dev/null +@@ -1 +0,0 @@ +-echo ok +`; + const [evidence] = assessLegacyE2eShellDeletionEvidence( + diff, + ` +- Script: \`test/e2e/test-example.sh\` + - Legacy contract: validates the example CLI path. + - Replacement Vitest coverage: \`test/e2e-scenario/live/does-not-exist.test.ts\` + - Intentionally retired behavior: none. + - Fidelity verification: focused Vitest run. +`, + ); + + expect(evidence).toMatchObject({ + replacementVitestCoveragePath: "test/e2e-scenario/live/does-not-exist.test.ts", + hasReplacementVitestCoverage: false, + missing: ["existing replacement Vitest coverage path or retirement rationale"], + }); }); it("adds a finding when source-of-truth review is missing follow-up", () => { diff --git a/tools/pr-review-advisor/analyze.mts b/tools/pr-review-advisor/analyze.mts index 57c0b7d26a..d1647bb4a9 100755 --- a/tools/pr-review-advisor/analyze.mts +++ b/tools/pr-review-advisor/analyze.mts @@ -200,6 +200,7 @@ type LegacyE2eShellDeletionEvidence = { hasScriptEvidenceBlock: boolean; hasLegacyContract: boolean; hasReplacementVitestCoverage: boolean; + replacementVitestCoveragePath: string | null; hasRetirementRationale: boolean; hasIntentionallyRetiredBehavior: boolean; hasFidelityVerification: boolean; @@ -480,9 +481,8 @@ export function assessLegacyE2eShellDeletionEvidence( return findDeletedLegacyE2eShellScripts(diff).map((script) => { const evidenceBlock = findDeletionEvidenceBlock(prBody, script); const hasLegacyContract = /\blegacy contract\s*:/i.test(evidenceBlock); - const hasReplacementVitestCoverage = - /\breplacement vitest coverage\s*:/i.test(evidenceBlock) && - /\b(?:test|nemoclaw\/src)\/[^\s`)"']+\.test\.ts\b/.test(evidenceBlock); + const replacementVitestCoveragePath = extractReplacementVitestCoveragePath(evidenceBlock); + const hasReplacementVitestCoverage = replacementVitestCoveragePath !== null && repoFileExists(replacementVitestCoveragePath); const hasRetirementRationale = /\bretirement rationale\s*:/i.test(evidenceBlock); const hasIntentionallyRetiredBehavior = /\bintentionally retired behavior\s*:/i.test(evidenceBlock); const hasFidelityVerification = /\bfidelity verification\s*:/i.test(evidenceBlock); @@ -490,7 +490,7 @@ export function assessLegacyE2eShellDeletionEvidence( [evidenceBlock ? "" : "script evidence block", Boolean(evidenceBlock)], ["legacy contract", hasLegacyContract], [ - "replacement Vitest coverage path or retirement rationale", + "existing replacement Vitest coverage path or retirement rationale", hasReplacementVitestCoverage || hasRetirementRationale, ], ["intentionally retired behavior", hasIntentionallyRetiredBehavior], @@ -505,6 +505,7 @@ export function assessLegacyE2eShellDeletionEvidence( hasScriptEvidenceBlock: Boolean(evidenceBlock), hasLegacyContract, hasReplacementVitestCoverage, + replacementVitestCoveragePath, hasRetirementRationale, hasIntentionallyRetiredBehavior, hasFidelityVerification, @@ -513,6 +514,18 @@ export function assessLegacyE2eShellDeletionEvidence( }); } +function extractReplacementVitestCoveragePath(evidenceBlock: string): string | null { + const match = evidenceBlock.match( + /\breplacement vitest coverage\s*:\s*`?((?:test|nemoclaw\/src)\/[^\s`)"']+\.test\.ts)\b/i, + ); + return match?.[1] ?? null; +} + +function repoFileExists(relativePath: string): boolean { + if (path.isAbsolute(relativePath) || relativePath.split(/[\\/]/).includes("..")) return false; + return fs.existsSync(path.join(root, relativePath)); +} + function findDeletionEvidenceBlock(prBody: string, script: string): string { const normalized = prBody.replace(/\r\n/g, "\n"); const start = normalized.indexOf(script); @@ -785,7 +798,7 @@ export function buildSystemPrompt(): string { "5. Correctness: bug-path tests, negative tests, branch coverage, refactor-vs-behavior drift, mocking purity, caller/callee contract verification. When more tests would improve confidence, make testDepth.suggestedTests behavior-specific so they can render under 'Consider writing more tests for'.", "6. Quality: description-vs-diff scope, migration completion, public surface docs/notes, justified error suppression, monolith growth, @ts-nocheck, shell-string execution.", "7. Source-of-truth review: when a PR adds or changes fallback, recovery, tolerant parsing, monkeypatching, best-effort cleanup, compatibility handling, or other localized workaround behavior, inspect whether it answers: what invalid state is handled, where that state is created, why the source cannot be fixed in this PR, what regression test proves the source cannot regress, and when the workaround can be removed. Prefer fixes that make invalid states impossible at their source. Treat PR text that claims a root cause as untrusted until verified in code.", - "8. Legacy E2E deletion governance: if deterministic context shows a deleted test/e2e/test-*.sh script with missing PR-body evidence, report it as a blocker. The PR body must name the legacy contract, replacement Vitest coverage path or retirement rationale, intentionally retired behavior, and fidelity verification for each deleted script.", + "8. Legacy E2E deletion governance: if deterministic context shows a deleted test/e2e/test-*.sh script with missing PR-body evidence, report it as a blocker. The PR body must name the legacy contract, existing replacement Vitest coverage path or retirement rationale, intentionally retired behavior, and fidelity verification for each deleted script.", "9. If a previous PR Review Advisor comment exists, compare it with the current diff and explicitly decide whether prior code-review findings were addressed, still apply, or are obsolete. Consider code changes since the previous analyzed SHA when available. Do not evaluate whether external E2E requirements have been met. When previous review context exists, set summary.sinceLastReview with counts for resolved, stillApplies, and newItems.", "Acceptance and security should inform findings, not become standalone comment sections: any unmet acceptance clause or security fail/warning must be represented as a finding, normally severity=blocker for unmet acceptance or security fail and severity=warning for security warnings.", "Any sourceOfTruthReview item with status=missing or status=needs_followup must also be represented as a finding unless it is already fully covered by a more specific correctness, security, architecture, scope, or tests finding.", @@ -1069,7 +1082,7 @@ function addDeterministicFindings(findings: Finding[], metadata: ReviewMetadata) title: "Legacy E2E deletion evidence is missing", description: `This PR deletes ${evidence.script} without complete PR-body evidence that preserves or retires the legacy contract.`, recommendation: - "Add a per-script PR-body evidence block naming the legacy contract, replacement Vitest coverage path or retirement rationale, intentionally retired behavior, and fidelity verification.", + "Add a per-script PR-body evidence block naming the legacy contract, existing replacement Vitest coverage path or retirement rationale, intentionally retired behavior, and fidelity verification.", evidence: `Missing: ${evidence.missing.join(", ")}.`, }); } From b1d33d7f21979a0d854317469b61d066372c5359 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 08:08:12 -0700 Subject: [PATCH 12/17] chore(e2e): address foundation review hygiene --- test/e2e-scenario/docs/README.md | 2 +- tools/e2e-advisor/scenarios-schema.json | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index a424b91f69..8082a50806 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -3,7 +3,7 @@ # NemoClaw E2E Vitest Fixtures -NemoClaw E2E now has one target execution model: **Vitest as the harness** and +NemoClaw E2E now has one target execution model, Vitest as the harness and GitHub Actions as the matrix. Vitest owns discovery, filtering, timeouts, reporters, fixture lifecycle, skips, and CI integration. NemoClaw owns the domain layer: scenario metadata, phase fixtures, product clients, evidence diff --git a/tools/e2e-advisor/scenarios-schema.json b/tools/e2e-advisor/scenarios-schema.json index f7a8a3c139..64a7f74808 100644 --- a/tools/e2e-advisor/scenarios-schema.json +++ b/tools/e2e-advisor/scenarios-schema.json @@ -1,4 +1,6 @@ { + "SPDX-FileCopyrightText": "Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.", + "SPDX-License-Identifier": "Apache-2.0", "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://github.com/NVIDIA/NemoClaw/tools/e2e-advisor/scenarios-schema.json", "title": "NemoClaw Vitest E2E Scenario Advisor Result", From 6062f531123b44a67fc467d0bfd172ae93d2e27e Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 08:35:48 -0700 Subject: [PATCH 13/17] test(e2e): block retired ledger reintroduction --- test/pr-review-advisor.test.ts | 54 ++++++++++++++++++++++++++++- tools/pr-review-advisor/analyze.mts | 46 +++++++++++++++++++++++- 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/test/pr-review-advisor.test.ts b/test/pr-review-advisor.test.ts index d7f7694dc1..d458c58618 100644 --- a/test/pr-review-advisor.test.ts +++ b/test/pr-review-advisor.test.ts @@ -14,6 +14,7 @@ import { classifyTestDepth, detectLocalizedPatchSignals, findDeletedLegacyE2eShellScripts, + findRetiredE2eMigrationLedgerChanges, normalizeReviewResult, readTrustedSecurityReviewSkill, renderDetailedReview, @@ -41,6 +42,7 @@ function metadata(overrides: Partial = {}): ReviewMetadata { workflowSignals: [], localizedPatchSignals: [], legacyE2eShellDeletionEvidence: [], + retiredE2eMigrationLedgerChanges: [], monolithDeltas: [], driftEvidence: [], github: null, @@ -225,7 +227,8 @@ describe("PR review advisor", () => { expect(prompt).toContain( "Any sourceOfTruthReview item with status=missing or status=needs_followup must also be represented as a finding", ); - expect(prompt).toContain("Legacy E2E deletion governance"); + expect(prompt).toContain("Legacy E2E migration governance"); + expect(prompt).toContain("retired repo-local E2E migration ledger"); expect(prompt).toContain("existing replacement Vitest coverage path or retirement rationale"); expect(prompt).toContain("multi-turn conversation"); expect(prompt).toContain( @@ -452,6 +455,55 @@ deleted file mode 100755 }); }); + it("detects retired E2E migration ledgers only when added or modified", () => { + const diff = `diff --git a/test/e2e-scenario/migration/legacy-inventory.json b/test/e2e-scenario/migration/legacy-inventory.json +index 1111111..2222222 100644 +--- a/test/e2e-scenario/migration/legacy-inventory.json ++++ b/test/e2e-scenario/migration/legacy-inventory.json +@@ -1 +1 @@ +-{"status":"old"} ++{"status":"bridge-probe"} +diff --git a/test/e2e/docs/parity-inventory.generated.json b/test/e2e/docs/parity-inventory.generated.json +deleted file mode 100644 +--- a/test/e2e/docs/parity-inventory.generated.json ++++ /dev/null +@@ -1 +0,0 @@ +-{} +`; + + expect(findRetiredE2eMigrationLedgerChanges(diff)).toEqual([ + { + file: "test/e2e-scenario/migration/legacy-inventory.json", + change: "modified", + }, + ]); + }); + + it("adds a blocker finding when a retired E2E migration ledger is reintroduced", () => { + const result = normalizeReviewResult( + validResult({ findings: [], sourceOfTruthReview: [] }), + metadata({ + deterministic: { + ...metadata().deterministic, + retiredE2eMigrationLedgerChanges: [ + { + file: "test/e2e-scenario/migration/legacy-inventory.json", + change: "added", + }, + ], + }, + }), + ); + + expect(result.findings[0]).toMatchObject({ + severity: "blocker", + category: "tests", + file: "test/e2e-scenario/migration/legacy-inventory.json", + title: "Retired E2E migration ledger is being reintroduced", + }); + expect(result.findings[0]?.recommendation).toContain("Remove repo-local migration ledger"); + }); + it("adds a finding when source-of-truth review is missing follow-up", () => { const result = normalizeReviewResult( validResult({ diff --git a/tools/pr-review-advisor/analyze.mts b/tools/pr-review-advisor/analyze.mts index d1647bb4a9..b80e5d1eab 100755 --- a/tools/pr-review-advisor/analyze.mts +++ b/tools/pr-review-advisor/analyze.mts @@ -164,6 +164,7 @@ type DeterministicReviewContext = { workflowSignals: string[]; localizedPatchSignals: LocalizedPatchSignal[]; legacyE2eShellDeletionEvidence: LegacyE2eShellDeletionEvidence[]; + retiredE2eMigrationLedgerChanges: RetiredE2eMigrationLedgerChange[]; monolithDeltas: MonolithDelta[]; driftEvidence: DriftEvidence[]; previousAdvisorReview: PreviousAdvisorReview | null; @@ -207,6 +208,11 @@ type LegacyE2eShellDeletionEvidence = { missing: string[]; }; +type RetiredE2eMigrationLedgerChange = { + file: string; + change: "added" | "modified"; +}; + type OpenPrOverlap = { number: number; title: string; @@ -366,6 +372,7 @@ async function collectDeterministicContext(options: { options.diff, pullRequestBodyText(github?.pullRequest), ); + const retiredE2eMigrationLedgerChanges = findRetiredE2eMigrationLedgerChanges(options.diff); return { diffStat: getDiffStat(options.baseRef, options.headRef), commits: getCommits(options.baseRef, options.headRef), @@ -375,6 +382,7 @@ async function collectDeterministicContext(options: { workflowSignals: detectWorkflowSignals(options.changedFiles, options.diff), localizedPatchSignals: detectLocalizedPatchSignals(options.diff), legacyE2eShellDeletionEvidence, + retiredE2eMigrationLedgerChanges, monolithDeltas: computeMonolithDeltas(options.baseRef, options.changedFiles), driftEvidence: collectDriftEvidence(options.baseRef, options.changedFiles), github, @@ -474,6 +482,28 @@ export function findDeletedLegacyE2eShellScripts(diff: string): string[] { return [...scripts].sort(); } +export function findRetiredE2eMigrationLedgerChanges(diff: string): RetiredE2eMigrationLedgerChange[] { + const retiredLedgers = new Set([ + "test/e2e-scenario/migration/legacy-inventory.json", + "test/e2e/docs/parity-inventory.generated.json", + ]); + const changes = new Map(); + for (const block of diff.split(/\ndiff --git /)) { + const header = block.startsWith("diff --git ") ? block : `diff --git ${block}`; + const match = header.match(/^diff --git a\/(.+?) b\/(.+)$/m); + const before = match?.[1] ?? ""; + const after = match?.[2] ?? ""; + const file = retiredLedgers.has(after) ? after : retiredLedgers.has(before) ? before : ""; + if (!file) continue; + if (/^deleted file mode\b/m.test(header) || /^\+\+\+ \/dev\/null$/m.test(header)) continue; + changes.set(file, { + file, + change: /^new file mode\b/m.test(header) || /^--- \/dev\/null$/m.test(header) ? "added" : "modified", + }); + } + return [...changes.values()].sort((a, b) => a.file.localeCompare(b.file)); +} + export function assessLegacyE2eShellDeletionEvidence( diff: string, prBody: string, @@ -798,7 +828,7 @@ export function buildSystemPrompt(): string { "5. Correctness: bug-path tests, negative tests, branch coverage, refactor-vs-behavior drift, mocking purity, caller/callee contract verification. When more tests would improve confidence, make testDepth.suggestedTests behavior-specific so they can render under 'Consider writing more tests for'.", "6. Quality: description-vs-diff scope, migration completion, public surface docs/notes, justified error suppression, monolith growth, @ts-nocheck, shell-string execution.", "7. Source-of-truth review: when a PR adds or changes fallback, recovery, tolerant parsing, monkeypatching, best-effort cleanup, compatibility handling, or other localized workaround behavior, inspect whether it answers: what invalid state is handled, where that state is created, why the source cannot be fixed in this PR, what regression test proves the source cannot regress, and when the workaround can be removed. Prefer fixes that make invalid states impossible at their source. Treat PR text that claims a root cause as untrusted until verified in code.", - "8. Legacy E2E deletion governance: if deterministic context shows a deleted test/e2e/test-*.sh script with missing PR-body evidence, report it as a blocker. The PR body must name the legacy contract, existing replacement Vitest coverage path or retirement rationale, intentionally retired behavior, and fidelity verification for each deleted script.", + "8. Legacy E2E migration governance: if deterministic context shows a retired repo-local E2E migration ledger being added or modified, report it as a blocker. If deterministic context shows a deleted test/e2e/test-*.sh script with missing PR-body evidence, report it as a blocker. The PR body must name the legacy contract, existing replacement Vitest coverage path or retirement rationale, intentionally retired behavior, and fidelity verification for each deleted script.", "9. If a previous PR Review Advisor comment exists, compare it with the current diff and explicitly decide whether prior code-review findings were addressed, still apply, or are obsolete. Consider code changes since the previous analyzed SHA when available. Do not evaluate whether external E2E requirements have been met. When previous review context exists, set summary.sinceLastReview with counts for resolved, stillApplies, and newItems.", "Acceptance and security should inform findings, not become standalone comment sections: any unmet acceptance clause or security fail/warning must be represented as a finding, normally severity=blocker for unmet acceptance or security fail and severity=warning for security warnings.", "Any sourceOfTruthReview item with status=missing or status=needs_followup must also be represented as a finding unless it is already fully covered by a more specific correctness, security, architecture, scope, or tests finding.", @@ -906,6 +936,7 @@ function buildValidationTurnContext(context: DeterministicReviewContext): Record testDepth: context.testDepth, localizedPatchSignals: context.localizedPatchSignals, legacyE2eShellDeletionEvidence: context.legacyE2eShellDeletionEvidence, + retiredE2eMigrationLedgerChanges: context.retiredE2eMigrationLedgerChanges, previousAdvisorReview: context.previousAdvisorReview, pullRequest: context.github?.pullRequest ?? null, linkedIssues: context.github?.linkedIssues ?? [], @@ -1072,6 +1103,19 @@ function addSourceOfTruthFindings(findings: Finding[], sourceOfTruthReview: Sour function addDeterministicFindings(findings: Finding[], metadata: ReviewMetadata): Finding[] { const deletionEvidence = metadata.deterministic.legacyE2eShellDeletionEvidence ?? []; const injected: Finding[] = []; + for (const ledger of metadata.deterministic.retiredE2eMigrationLedgerChanges ?? []) { + injected.push({ + severity: "blocker", + category: "tests", + file: ledger.file, + line: null, + title: "Retired E2E migration ledger is being reintroduced", + description: `This PR ${ledger.change === "added" ? "adds" : "modifies"} ${ledger.file}, which is retired migration state.`, + recommendation: + "Remove repo-local migration ledger changes and record migration status, convergence evidence, and deletion rationale in the relevant GitHub issue or PR body instead.", + evidence: `${ledger.file} is a retired durable tracking ledger; #5126 makes GitHub issues and PRs the migration source of truth.`, + }); + } for (const evidence of deletionEvidence) { if (evidence.missing.length === 0) continue; injected.push({ From c2113a82002f2e49adf8a67350f2589b96bd17dc Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 09:11:59 -0700 Subject: [PATCH 14/17] test(release): disable signing in tag fixtures --- test/release-latest-tag.test.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/release-latest-tag.test.ts b/test/release-latest-tag.test.ts index b23be5005b..0771114ff1 100644 --- a/test/release-latest-tag.test.ts +++ b/test/release-latest-tag.test.ts @@ -31,9 +31,11 @@ function testEnv(extra: NodeJS.ProcessEnv = {}): NodeJS.ProcessEnv { GIT_AUTHOR_EMAIL: "release-test@example.com", GIT_COMMITTER_NAME: "Release Test", GIT_COMMITTER_EMAIL: "release-test@example.com", - GIT_CONFIG_COUNT: "1", + GIT_CONFIG_COUNT: "2", GIT_CONFIG_KEY_0: "tag.gpgSign", GIT_CONFIG_VALUE_0: "false", + GIT_CONFIG_KEY_1: "commit.gpgSign", + GIT_CONFIG_VALUE_1: "false", ...extra, }); } @@ -122,11 +124,13 @@ function runReleaseLatestWithoutIdentity( fs.mkdirSync(home); fs.mkdirSync(xdgConfigHome); const env = baseEnv({ - GIT_CONFIG_COUNT: "2", + GIT_CONFIG_COUNT: "3", GIT_CONFIG_KEY_0: "user.useConfigOnly", GIT_CONFIG_VALUE_0: "true", GIT_CONFIG_KEY_1: "tag.gpgSign", GIT_CONFIG_VALUE_1: "false", + GIT_CONFIG_KEY_2: "commit.gpgSign", + GIT_CONFIG_VALUE_2: "false", GITHUB_STEP_SUMMARY: fixture.summary, HOME: home, RELEASE_TAG: releaseTag, From 3c1ffccf8abd9c7f45640d181c98ea97fb7abcad Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 09:13:15 -0700 Subject: [PATCH 15/17] test(e2e): rename scenario fixtures surface --- test/e2e-scenario-advisor.test.ts | 4 +- test/e2e-scenario/docs/MIGRATION.md | 6 +- test/e2e-scenario/docs/README.md | 8 +-- test/e2e-scenario/docs/RETIREMENT.md | 2 +- .../{framework => fixtures}/artifacts.ts | 0 .../availability-env.ts | 4 +- .../{framework => fixtures}/cleanup.ts | 0 .../clients/command.ts | 0 .../clients/gateway.ts | 0 .../{framework => fixtures}/clients/host.ts | 0 .../{framework => fixtures}/clients/index.ts | 0 .../clients/provider.ts | 2 +- .../clients/sandbox.ts | 0 .../{framework => fixtures}/clients/state.ts | 0 .../{framework => fixtures}/e2e-test.ts | 0 .../live-project-gate.ts | 0 .../phases/environment.ts | 0 .../{framework => fixtures}/phases/index.ts | 0 .../phases/lifecycle.ts | 2 +- .../phases/onboarding.ts | 0 .../{framework => fixtures}/phases/runtime.ts | 0 .../phases/state-validation.ts | 2 +- .../{framework => fixtures}/redaction.ts | 58 +++++++++---------- .../{framework => fixtures}/secrets.ts | 2 +- .../{framework => fixtures}/shell-probe.ts | 4 +- .../shell/supervisor.ts | 2 +- .../shell/trusted-command.ts | 2 +- .../live/openshell-version-pin.test.ts | 4 +- .../live/registry-scenarios.test.ts | 4 +- .../live/ubuntu-repo-cli-smoke.test.ts | 2 +- test/e2e-scenario/scenarios/types.ts | 12 ++-- .../support-tests/e2e-clients.test.ts | 6 +- .../support-tests/e2e-fixture-context.test.ts | 10 ++-- .../e2e-live-project-config.test.ts | 2 +- .../e2e-migration-policy.test.ts | 2 +- .../e2e-phase-environment.test.ts | 10 ++-- .../support-tests/e2e-phase-lifecycle.test.ts | 10 ++-- .../e2e-phase-onboarding.test.ts | 12 ++-- .../support-tests/e2e-phase-runtime.test.ts | 8 +-- .../e2e-phase-state-validation.test.ts | 10 ++-- .../support-tests/e2e-redaction-entry.test.ts | 10 ++-- .../e2e-redaction-parity.test.ts | 34 +++++------ .../e2e-shell-supervisor.test.ts | 10 ++-- tools/e2e-advisor/scenarios.mts | 2 +- vitest.config.ts | 2 +- 45 files changed, 124 insertions(+), 124 deletions(-) rename test/e2e-scenario/{framework => fixtures}/artifacts.ts (100%) rename test/e2e-scenario/{framework => fixtures}/availability-env.ts (89%) rename test/e2e-scenario/{framework => fixtures}/cleanup.ts (100%) rename test/e2e-scenario/{framework => fixtures}/clients/command.ts (100%) rename test/e2e-scenario/{framework => fixtures}/clients/gateway.ts (100%) rename test/e2e-scenario/{framework => fixtures}/clients/host.ts (100%) rename test/e2e-scenario/{framework => fixtures}/clients/index.ts (100%) rename test/e2e-scenario/{framework => fixtures}/clients/provider.ts (99%) rename test/e2e-scenario/{framework => fixtures}/clients/sandbox.ts (100%) rename test/e2e-scenario/{framework => fixtures}/clients/state.ts (100%) rename test/e2e-scenario/{framework => fixtures}/e2e-test.ts (100%) rename test/e2e-scenario/{framework => fixtures}/live-project-gate.ts (100%) rename test/e2e-scenario/{framework => fixtures}/phases/environment.ts (100%) rename test/e2e-scenario/{framework => fixtures}/phases/index.ts (100%) rename test/e2e-scenario/{framework => fixtures}/phases/lifecycle.ts (99%) rename test/e2e-scenario/{framework => fixtures}/phases/onboarding.ts (100%) rename test/e2e-scenario/{framework => fixtures}/phases/runtime.ts (100%) rename test/e2e-scenario/{framework => fixtures}/phases/state-validation.ts (99%) rename test/e2e-scenario/{framework => fixtures}/redaction.ts (81%) rename test/e2e-scenario/{framework => fixtures}/secrets.ts (96%) rename test/e2e-scenario/{framework => fixtures}/shell-probe.ts (97%) rename test/e2e-scenario/{framework => fixtures}/shell/supervisor.ts (98%) rename test/e2e-scenario/{framework => fixtures}/shell/trusted-command.ts (96%) diff --git a/test/e2e-scenario-advisor.test.ts b/test/e2e-scenario-advisor.test.ts index 863706927e..c728627fa7 100644 --- a/test/e2e-scenario-advisor.test.ts +++ b/test/e2e-scenario-advisor.test.ts @@ -38,13 +38,13 @@ describe("Vitest E2E scenario advisor — prompt construction", () => { const prompt = buildPrompt({ baseRef: "origin/main", headRef: "HEAD", - changedFiles: ["test/e2e-scenario/framework/phases/onboarding.ts"], + changedFiles: ["test/e2e-scenario/fixtures/phases/onboarding.ts"], diff: "+ echo ok", }); // Caller of normalizeScenarioAdvisorResult re-injects metadata, but the // prompt must still surface enough context for the model to reason. expect(prompt).toContain("origin/main"); - expect(prompt).toContain("test/e2e-scenario/framework/phases/onboarding.ts"); + expect(prompt).toContain("test/e2e-scenario/fixtures/phases/onboarding.ts"); expect(prompt).toContain("+ echo ok"); }); diff --git a/test/e2e-scenario/docs/MIGRATION.md b/test/e2e-scenario/docs/MIGRATION.md index d97f391499..4757f63312 100644 --- a/test/e2e-scenario/docs/MIGRATION.md +++ b/test/e2e-scenario/docs/MIGRATION.md @@ -21,7 +21,7 @@ The scenario runner cutover is complete: - `e2e-vitest-scenarios.yaml` is the scenario workflow. - `test/e2e-scenario/live/registry-scenarios.test.ts` is the registry-driven live scenario entrypoint. -- `test/e2e-scenario/framework/` owns phase fixtures, clients, artifact +- `test/e2e-scenario/fixtures/` owns phase fixtures, clients, artifact capture, redaction, cleanup, and shell-probe bridges. - `test/e2e-scenario/scenarios/run.ts` only lists scenarios and emits the live Vitest matrix. @@ -44,8 +44,8 @@ The durable E2E system has one execution path: - NemoClaw fixtures own setup, onboarding, lifecycle mutations, expected-state probes, assertion helpers, expected-failure evidence, cleanup, artifacts, and secret redaction. -- The historical `test/e2e-scenario/framework/` path is fixture/support code, - not a separate E2E framework or runner. +- `test/e2e-scenario/fixtures/` is fixture/support code, not a test harness + or runner. - Typed scenario definitions and matrix helpers describe stable scenario IDs and supported combinations without becoming a second runner. - Product-facing manifests describe desired setup/onboarding state, not test diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index 8082a50806..563819bfbb 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -25,7 +25,7 @@ runner cutover; migrate them by contract using the rules in `MIGRATION.md`. | Live scenario IDs and metadata | `test/e2e-scenario/scenarios/registry.ts`, `test/e2e-scenario/scenarios/scenarios/baseline.ts` | | GitHub Actions matrix emission | `test/e2e-scenario/scenarios/run.ts --emit-live-matrix` | | Live scenario execution | `test/e2e-scenario/live/registry-scenarios.test.ts` | -| Phase fixtures and clients | `test/e2e-scenario/framework/` | +| Phase fixtures and clients | `test/e2e-scenario/fixtures/` | | Expected-state probes | `test/e2e-scenario/scenarios/expected-states.ts` | | Product-facing setup/onboarding state | `test/e2e-scenario/manifests/*.yaml` | | Legacy direct E2E coverage | `test/e2e/test-*.sh` and their workflows | @@ -52,8 +52,8 @@ Live execution happens through Vitest fixtures: - `artifacts`, `secrets`, `cleanup`, and `shellProbe` provide shared fixture services. -The historical `test/e2e-scenario/framework/` path is a fixture/support layer, -not a separate E2E framework or runner. Vitest remains the only test harness. +The `test/e2e-scenario/fixtures/` path is fixture/support code, not a test +harness or runner. Vitest remains the only test harness. `suiteIds` remain metadata for reporting and migration planning. They do not dispatch shell validation suites. @@ -86,7 +86,7 @@ paths must not be reintroduced. ```text test/e2e-scenario/ docs/ # Fixture guide, migration notes, retirement record - framework/ # Vitest fixtures, clients, redaction, artifacts, cleanup + fixtures/ # Vitest fixtures, clients, redaction, artifacts, cleanup live/ # Opt-in live Vitest scenario tests manifests/ # Product-facing NemoClawInstance desired state scenarios/ # Typed registry, matrix helpers, expected states diff --git a/test/e2e-scenario/docs/RETIREMENT.md b/test/e2e-scenario/docs/RETIREMENT.md index 03e55d1743..644298f592 100644 --- a/test/e2e-scenario/docs/RETIREMENT.md +++ b/test/e2e-scenario/docs/RETIREMENT.md @@ -45,7 +45,7 @@ and artifact shape operators needed from the retired workflows: - `.github/workflows/e2e-vitest-scenarios.yaml` runs the live matrix. - `test/e2e-scenario/live/registry-scenarios.test.ts` executes supported registry scenarios through Vitest. -- `test/e2e-scenario/framework/` owns fixtures, clients, shell-probe bridges, +- `test/e2e-scenario/fixtures/` owns fixtures, clients, shell-probe bridges, artifact writing, cleanup, and redaction. ## What Was Not Removed diff --git a/test/e2e-scenario/framework/artifacts.ts b/test/e2e-scenario/fixtures/artifacts.ts similarity index 100% rename from test/e2e-scenario/framework/artifacts.ts rename to test/e2e-scenario/fixtures/artifacts.ts diff --git a/test/e2e-scenario/framework/availability-env.ts b/test/e2e-scenario/fixtures/availability-env.ts similarity index 89% rename from test/e2e-scenario/framework/availability-env.ts rename to test/e2e-scenario/fixtures/availability-env.ts index 55b2ce6bd7..b05dfa2986 100644 --- a/test/e2e-scenario/framework/availability-env.ts +++ b/test/e2e-scenario/fixtures/availability-env.ts @@ -18,9 +18,9 @@ export function buildAvailabilityProbeEnv( ): NodeJS.ProcessEnv { // Availability probes run outside live scenario phases, but they need // the same child-env and PATH policy. Add only Docker - // discovery knobs on top of the shared framework boundary. + // discovery knobs on top of the shared fixture boundary. return buildChildEnv(base, { additionalAllowedEnv: AVAILABILITY_PROBE_EXTRA_ENV_KEYS, - frameworkOverlay: {}, + fixtureOverlay: {}, }); } diff --git a/test/e2e-scenario/framework/cleanup.ts b/test/e2e-scenario/fixtures/cleanup.ts similarity index 100% rename from test/e2e-scenario/framework/cleanup.ts rename to test/e2e-scenario/fixtures/cleanup.ts diff --git a/test/e2e-scenario/framework/clients/command.ts b/test/e2e-scenario/fixtures/clients/command.ts similarity index 100% rename from test/e2e-scenario/framework/clients/command.ts rename to test/e2e-scenario/fixtures/clients/command.ts diff --git a/test/e2e-scenario/framework/clients/gateway.ts b/test/e2e-scenario/fixtures/clients/gateway.ts similarity index 100% rename from test/e2e-scenario/framework/clients/gateway.ts rename to test/e2e-scenario/fixtures/clients/gateway.ts diff --git a/test/e2e-scenario/framework/clients/host.ts b/test/e2e-scenario/fixtures/clients/host.ts similarity index 100% rename from test/e2e-scenario/framework/clients/host.ts rename to test/e2e-scenario/fixtures/clients/host.ts diff --git a/test/e2e-scenario/framework/clients/index.ts b/test/e2e-scenario/fixtures/clients/index.ts similarity index 100% rename from test/e2e-scenario/framework/clients/index.ts rename to test/e2e-scenario/fixtures/clients/index.ts diff --git a/test/e2e-scenario/framework/clients/provider.ts b/test/e2e-scenario/fixtures/clients/provider.ts similarity index 99% rename from test/e2e-scenario/framework/clients/provider.ts rename to test/e2e-scenario/fixtures/clients/provider.ts index 257c7c8b6a..e6bf00fb13 100644 --- a/test/e2e-scenario/framework/clients/provider.ts +++ b/test/e2e-scenario/fixtures/clients/provider.ts @@ -19,7 +19,7 @@ export interface TrustedProviderEndpoint { export interface TrustedProviderEndpointOptions { /** - * Static framework-owned trust configuration for external HTTPS provider + * Static fixture-owned trust configuration for external HTTPS provider * endpoints. Do not populate this from scenario manifests or user input. */ allowedHosts?: readonly string[]; diff --git a/test/e2e-scenario/framework/clients/sandbox.ts b/test/e2e-scenario/fixtures/clients/sandbox.ts similarity index 100% rename from test/e2e-scenario/framework/clients/sandbox.ts rename to test/e2e-scenario/fixtures/clients/sandbox.ts diff --git a/test/e2e-scenario/framework/clients/state.ts b/test/e2e-scenario/fixtures/clients/state.ts similarity index 100% rename from test/e2e-scenario/framework/clients/state.ts rename to test/e2e-scenario/fixtures/clients/state.ts diff --git a/test/e2e-scenario/framework/e2e-test.ts b/test/e2e-scenario/fixtures/e2e-test.ts similarity index 100% rename from test/e2e-scenario/framework/e2e-test.ts rename to test/e2e-scenario/fixtures/e2e-test.ts diff --git a/test/e2e-scenario/framework/live-project-gate.ts b/test/e2e-scenario/fixtures/live-project-gate.ts similarity index 100% rename from test/e2e-scenario/framework/live-project-gate.ts rename to test/e2e-scenario/fixtures/live-project-gate.ts diff --git a/test/e2e-scenario/framework/phases/environment.ts b/test/e2e-scenario/fixtures/phases/environment.ts similarity index 100% rename from test/e2e-scenario/framework/phases/environment.ts rename to test/e2e-scenario/fixtures/phases/environment.ts diff --git a/test/e2e-scenario/framework/phases/index.ts b/test/e2e-scenario/fixtures/phases/index.ts similarity index 100% rename from test/e2e-scenario/framework/phases/index.ts rename to test/e2e-scenario/fixtures/phases/index.ts diff --git a/test/e2e-scenario/framework/phases/lifecycle.ts b/test/e2e-scenario/fixtures/phases/lifecycle.ts similarity index 99% rename from test/e2e-scenario/framework/phases/lifecycle.ts rename to test/e2e-scenario/fixtures/phases/lifecycle.ts index 6617bddd1e..263f7c6f96 100644 --- a/test/e2e-scenario/framework/phases/lifecycle.ts +++ b/test/e2e-scenario/fixtures/phases/lifecycle.ts @@ -165,7 +165,7 @@ export class LifecyclePhaseFixture { // Final step: drive the user-visible action that exposed #4423. // We invoke status through the host CLI client so artifacts are // captured and the command goes through the same - // shellProbe/redaction layer the rest of the framework uses. + // shellProbe/redaction layer the rest of the fixture code uses. // Status is allowed to fail (exit non-zero) because on unfixed // code it intentionally fails after destroying state — the // post-action invariants are checked by state-validation. diff --git a/test/e2e-scenario/framework/phases/onboarding.ts b/test/e2e-scenario/fixtures/phases/onboarding.ts similarity index 100% rename from test/e2e-scenario/framework/phases/onboarding.ts rename to test/e2e-scenario/fixtures/phases/onboarding.ts diff --git a/test/e2e-scenario/framework/phases/runtime.ts b/test/e2e-scenario/fixtures/phases/runtime.ts similarity index 100% rename from test/e2e-scenario/framework/phases/runtime.ts rename to test/e2e-scenario/fixtures/phases/runtime.ts diff --git a/test/e2e-scenario/framework/phases/state-validation.ts b/test/e2e-scenario/fixtures/phases/state-validation.ts similarity index 99% rename from test/e2e-scenario/framework/phases/state-validation.ts rename to test/e2e-scenario/fixtures/phases/state-validation.ts index ea09977afd..47651635f2 100644 --- a/test/e2e-scenario/framework/phases/state-validation.ts +++ b/test/e2e-scenario/fixtures/phases/state-validation.ts @@ -19,7 +19,7 @@ import type { ExpectedState, StateProbeId } from "../../scenarios/types.ts"; import type { NemoClawInstance } from "./onboarding.ts"; // Mirror of `src/lib/state/registry.ts::REGISTRY_FILE`. The fixture -// owns its own copy because the framework code must not import from +// owns its own copy because the fixture code must not import from // `src/lib/**` (CLI source) — that boundary keeps the live runner // honest about probing only host-observable state. const NEMOCLAW_REGISTRY_RELPATH = [".nemoclaw", "sandboxes.json"] as const; diff --git a/test/e2e-scenario/framework/redaction.ts b/test/e2e-scenario/fixtures/redaction.ts similarity index 81% rename from test/e2e-scenario/framework/redaction.ts rename to test/e2e-scenario/fixtures/redaction.ts index 8a56d1a509..965f4205d9 100644 --- a/test/e2e-scenario/framework/redaction.ts +++ b/test/e2e-scenario/fixtures/redaction.ts @@ -2,12 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 /** - * Framework-owned secret hygiene at the spawn boundary. + * Fixture-owned secret hygiene at the spawn boundary. * - * Spec ownership: redaction and child-env minimization are FRAMEWORK + * Spec ownership: redaction and child-env minimization are FIXTURE * INFRASTRUCTURE, not a per-action / per-script / per-workflow concern. - * Children spawned by framework command boundaries must (a) receive a minimal, - * typed env (framework allowlist + per-action declared `secretEnv` + * Children spawned by fixture command boundaries must (a) receive a minimal, + * typed env (fixture allowlist + per-action declared `secretEnv` * passthrough only), and (b) have their stdout/stderr passed through * redaction before any byte reaches an evidence log or * PhaseResult.message. There is no opt-out flag, no env switch, no @@ -16,9 +16,9 @@ * rest of this PR. * * Pattern source-of-truth: src/lib/security/secret-patterns.ts. We - * import the canonical regex sets and apply them here so framework + * import the canonical regex sets and apply them here so fixture-layer * redaction stays in lockstep with product-runtime redaction without - * coupling the framework to product runtime modules. + * coupling the fixture layer to product runtime modules. * * Tests: * test/e2e-scenario/support-tests/e2e-redaction-entry.test.ts @@ -26,7 +26,7 @@ * test/e2e-scenario/support-tests/e2e-phase-environment.test.ts * - canonical token redaction parity with product runtime patterns * - explicit per-test redaction values - * - child-env allowlist filtering for framework probes + * - child-env allowlist filtering for fixture probes */ import type { Readable, Writable } from "node:stream"; @@ -34,8 +34,8 @@ import type { Readable, Writable } from "node:stream"; const REDACTED = ""; const EXPLICIT_REDACTED = "[REDACTED]"; -// Framework-local mirror of src/lib/security/secret-patterns.ts. The -// framework deliberately does not import from src/lib/security/ so it +// Fixture-local mirror of src/lib/security/secret-patterns.ts. The +// fixture layer deliberately does not import from src/lib/security/ so it // stays decoupled from product runtime modules and the cross-tsconfig // boundary. A parity test // (test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts) @@ -46,7 +46,7 @@ const EXPLICIT_REDACTED = "[REDACTED]"; // (test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts) can // import the actual RegExp values rather than parsing source text. // Production code in this module continues to use them via the local -// binding; nothing in the framework runtime imports these. +// binding; nothing in the fixture runtime imports these. export const TOKEN_PREFIX_PATTERNS: RegExp[] = [ /nvapi-[A-Za-z0-9_-]{10,}/g, /nvcf-[A-Za-z0-9_-]{10,}/g, @@ -110,11 +110,11 @@ export function redactString(text: string, explicitValues?: Iterable): s return out; } -// Env keys the framework guarantees children may always see. Anything -// outside this set, outside FRAMEWORK_ENV_PREFIXES, and not declared +// Env keys the fixture layer guarantees children may always see. Anything +// outside this set, outside FIXTURE_ENV_PREFIXES, and not declared // in PhaseAction.secretEnv / AssertionStep.secretEnv is dropped before // the child spawns. -const FRAMEWORK_ENV_ALLOWLIST: ReadonlySet = new Set([ +const FIXTURE_ENV_ALLOWLIST: ReadonlySet = new Set([ "PATH", "HOME", "SHELL", @@ -134,12 +134,12 @@ const FRAMEWORK_ENV_ALLOWLIST: ReadonlySet = new Set([ "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE", ]); -const FRAMEWORK_ENV_PREFIXES: readonly string[] = ["E2E_", "NEMOCLAW_LOG_"]; +const FIXTURE_ENV_PREFIXES: readonly string[] = ["E2E_", "NEMOCLAW_LOG_"]; // Shape required of any declared secretEnv key — must look like a // secret-bearing variable. Prevents accidental allowlisting of // non-secret values via the secretEnv channel and keeps the -// "framework-allowlist vs declared-secret" distinction honest. +// "fixture-allowlist vs declared-secret" distinction honest. const SECRET_ENV_KEY_SHAPE = /^[A-Z][A-Z0-9_]*(?:API[_]?KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|PASSPHRASE|PRIVATE[_]?KEY|ACCESS[_]?KEY)$/; @@ -150,20 +150,20 @@ export function isValidSecretEnvKey(key: string): boolean { export interface BuildChildEnvOptions { /** Per-action / per-step declared secret-bearing env keys to pass through. */ secretEnv?: readonly string[]; - /** Additional non-secret env keys required by a framework-owned spawn helper. */ + /** Additional non-secret env keys required by a fixture-owned spawn helper. */ additionalAllowedEnv?: readonly string[]; - /** Framework-controlled overlay (E2E_CONTEXT_DIR, E2E_PHASE, E2E_*_ID). */ - frameworkOverlay: NodeJS.ProcessEnv; + /** Fixture-controlled overlay (E2E_CONTEXT_DIR, E2E_PHASE, E2E_*_ID). */ + fixtureOverlay: NodeJS.ProcessEnv; } /** * Build the child's env from `base` (typically `process.env`) by * keeping only: - * 1. keys in FRAMEWORK_ENV_ALLOWLIST - * 2. keys starting with one of FRAMEWORK_ENV_PREFIXES + * 1. keys in FIXTURE_ENV_ALLOWLIST + * 2. keys starting with one of FIXTURE_ENV_PREFIXES * 3. non-secret keys explicitly declared in `opts.additionalAllowedEnv` * 4. keys explicitly declared in `opts.secretEnv` (validated shape) - * then layering `opts.frameworkOverlay` on top. + * then layering `opts.fixtureOverlay` on top. * * Throws if a `secretEnv` entry doesn't match the secret-key shape; * better to fail loudly at compile/runtime than silently leak a @@ -176,11 +176,11 @@ export function buildChildEnv( const out: NodeJS.ProcessEnv = {}; for (const [key, value] of Object.entries(base)) { if (value === undefined) continue; - if (FRAMEWORK_ENV_ALLOWLIST.has(key)) { + if (FIXTURE_ENV_ALLOWLIST.has(key)) { out[key] = value; continue; } - if (FRAMEWORK_ENV_PREFIXES.some((prefix) => key.startsWith(prefix))) { + if (FIXTURE_ENV_PREFIXES.some((prefix) => key.startsWith(prefix))) { out[key] = value; continue; } @@ -208,14 +208,14 @@ export function buildChildEnv( out[key] = base[key]; } } - Object.assign(out, opts.frameworkOverlay); + Object.assign(out, opts.fixtureOverlay); // The install action drops nemoclaw / openshell shims under // ~/.local/bin (the historical repo-current install location). // On Ubuntu GH runners ~/.local/bin is on the default PATH; on // self-hosted GPU runners and inside WSL it often is not, so the // onboarding action's child runs without nemoclaw on PATH and // dies with 'nemoclaw: command not found'. Add ~/.local/bin to - // every child's PATH at the framework boundary so the install + // every child's PATH at the fixture boundary so the install // location is consistent across phases. Idempotent equivalent of // the install-path-refresh.sh nemoclaw_ensure_local_bin_on_path // helper, applied centrally instead of per-script. @@ -250,15 +250,15 @@ export function pipeRedacted( } /** - * Compact array of all framework env keys the child sees by default. + * Compact array of all fixture env keys the child sees by default. * Exported for tests/diagnostics; do not use to bypass the boundary. */ -export function frameworkEnvAllowlistSnapshot(): { +export function fixtureEnvAllowlistSnapshot(): { keys: string[]; prefixes: string[]; } { return { - keys: [...FRAMEWORK_ENV_ALLOWLIST].sort(), - prefixes: [...FRAMEWORK_ENV_PREFIXES], + keys: [...FIXTURE_ENV_ALLOWLIST].sort(), + prefixes: [...FIXTURE_ENV_PREFIXES], }; } diff --git a/test/e2e-scenario/framework/secrets.ts b/test/e2e-scenario/fixtures/secrets.ts similarity index 96% rename from test/e2e-scenario/framework/secrets.ts rename to test/e2e-scenario/fixtures/secrets.ts index 6f3d59e6d9..ff18bc607f 100644 --- a/test/e2e-scenario/framework/secrets.ts +++ b/test/e2e-scenario/fixtures/secrets.ts @@ -10,7 +10,7 @@ const SENSITIVE_NAME_PATTERN = /(api[_-]?key|token|secret|password|credential)/i * * Holds the per-test view of `process.env` and lets fixtures discover * sensitive values by name. Redaction itself is owned by the canonical - * entry point in framework/redaction.ts; this class only + * entry point in fixtures/redaction.ts; this class only * supplies the explicit values it knows about and delegates. There is * no separate fixture redaction pattern source. */ diff --git a/test/e2e-scenario/framework/shell-probe.ts b/test/e2e-scenario/fixtures/shell-probe.ts similarity index 97% rename from test/e2e-scenario/framework/shell-probe.ts rename to test/e2e-scenario/fixtures/shell-probe.ts index 2fd3dc8114..f78582a89f 100644 --- a/test/e2e-scenario/framework/shell-probe.ts +++ b/test/e2e-scenario/fixtures/shell-probe.ts @@ -12,9 +12,9 @@ import type { TrustedShellCommand } from "./shell/trusted-command.ts"; * * The lifecycle boundary (detached process-group cleanup, SIGTERM -> * SIGKILL escalation, timeout, AbortSignal) is owned by - * framework/shell/supervisor.ts and shared with the phase orchestrator + * fixtures/shell/supervisor.ts and shared with the phase orchestrator * and probe helpers. The trusted-command brand + NUL-byte guard live - * in framework/shell/trusted-command.ts. This file layers the + * in fixtures/shell/trusted-command.ts. This file layers the * fixture-specific policy on top: redaction at the canonical entry * point, artefact persistence, and explicit-env-by-default. */ diff --git a/test/e2e-scenario/framework/shell/supervisor.ts b/test/e2e-scenario/fixtures/shell/supervisor.ts similarity index 98% rename from test/e2e-scenario/framework/shell/supervisor.ts rename to test/e2e-scenario/fixtures/shell/supervisor.ts index 1da9845776..492c6e5b62 100644 --- a/test/e2e-scenario/framework/shell/supervisor.ts +++ b/test/e2e-scenario/fixtures/shell/supervisor.ts @@ -9,7 +9,7 @@ import type { ChildProcess } from "node:child_process"; * * Spec ownership: detached process-group cleanup, SIGTERM -> SIGKILL * escalation, timeout enforcement, and AbortSignal handling are - * FRAMEWORK INFRASTRUCTURE. Every TS spawn site delegates here so the + * FIXTURE INFRASTRUCTURE. Every TS spawn site delegates here so the * cleanup contract stays in one place. Callers keep their own spawn() * call so per-site argv contracts (literal `bash -c` scripts with * positional argv, host-CLI argv arrays, trusted-command descriptors) diff --git a/test/e2e-scenario/framework/shell/trusted-command.ts b/test/e2e-scenario/fixtures/shell/trusted-command.ts similarity index 96% rename from test/e2e-scenario/framework/shell/trusted-command.ts rename to test/e2e-scenario/fixtures/shell/trusted-command.ts index 1c211c8a59..54aa0fc93a 100644 --- a/test/e2e-scenario/framework/shell/trusted-command.ts +++ b/test/e2e-scenario/fixtures/shell/trusted-command.ts @@ -5,7 +5,7 @@ * Trusted command descriptor + NUL-byte guard shared by every E2E * TypeScript spawn site. * - * Spec ownership: command shape validation is FRAMEWORK INFRASTRUCTURE, + * Spec ownership: command shape validation is FIXTURE INFRASTRUCTURE, * not a per-helper concern. Whether the spawn site is the fixture layer * (ShellProbe), the phase orchestrator (PhaseOrchestrator.runAction / * runShellStep), or the probe helpers (spawnBash, runHostCmd, docs / diff --git a/test/e2e-scenario/live/openshell-version-pin.test.ts b/test/e2e-scenario/live/openshell-version-pin.test.ts index b20db553dd..84a77066c9 100644 --- a/test/e2e-scenario/live/openshell-version-pin.test.ts +++ b/test/e2e-scenario/live/openshell-version-pin.test.ts @@ -6,8 +6,8 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; -import { type ArtifactSink } from "../framework/artifacts.ts"; -import { expect, test } from "../framework/e2e-test.ts"; +import { type ArtifactSink } from "../fixtures/artifacts.ts"; +import { expect, test } from "../fixtures/e2e-test.ts"; // Migrated from test/e2e/test-openshell-version-pin.sh (regression guard for // #3474). The legacy bash script is a hermetic installer-script behavioral diff --git a/test/e2e-scenario/live/registry-scenarios.test.ts b/test/e2e-scenario/live/registry-scenarios.test.ts index 7391f20dcd..0e94f7e157 100644 --- a/test/e2e-scenario/live/registry-scenarios.test.ts +++ b/test/e2e-scenario/live/registry-scenarios.test.ts @@ -4,8 +4,8 @@ import fs from "node:fs"; import path from "node:path"; -import { expect, test } from "../framework/e2e-test.ts"; -import type { LifecycleProfile } from "../framework/phases/index.ts"; +import { expect, test } from "../fixtures/e2e-test.ts"; +import type { LifecycleProfile } from "../fixtures/phases/index.ts"; import { listScenarios } from "../scenarios/registry.ts"; import { liveScenarioSupport, liveScenarioTestName } from "../scenarios/runtime-support.ts"; import { buildLiveScenarioRunPlan } from "./run-plan.ts"; diff --git a/test/e2e-scenario/live/ubuntu-repo-cli-smoke.test.ts b/test/e2e-scenario/live/ubuntu-repo-cli-smoke.test.ts index c818650741..d9ef8613cb 100644 --- a/test/e2e-scenario/live/ubuntu-repo-cli-smoke.test.ts +++ b/test/e2e-scenario/live/ubuntu-repo-cli-smoke.test.ts @@ -4,7 +4,7 @@ import fs from "node:fs"; import path from "node:path"; -import { expect, test } from "../framework/e2e-test.ts"; +import { expect, test } from "../fixtures/e2e-test.ts"; const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); const CLI_DIST_ENTRYPOINT = path.join(REPO_ROOT, "dist", "nemoclaw.js"); diff --git a/test/e2e-scenario/scenarios/types.ts b/test/e2e-scenario/scenarios/types.ts index 0310c34404..a95f928f97 100644 --- a/test/e2e-scenario/scenarios/types.ts +++ b/test/e2e-scenario/scenarios/types.ts @@ -150,10 +150,10 @@ export interface AssertionStep { }; evidencePath?: string; reliability?: AssertionStepReliability; - // Declared parent-env keys this step requires beyond the framework's + // Declared parent-env keys this step requires beyond the fixture layer's // allowlist. Anything not allowlisted and not declared here is - // dropped before spawn. See framework/redaction.ts. Each entry - // must match the secret-key shape; the framework rejects non-secret + // dropped before spawn. See fixtures/redaction.ts. Each entry + // must match the secret-key shape; the fixture layer rejects non-secret // names to keep the allowlist-vs-declared-secret boundary honest. secretEnv?: readonly string[]; // When true, a probe/pending step that resolves as "skipped" is @@ -234,10 +234,10 @@ export interface PhaseAction { // keep working without coupling them to the action's stable id. aliasPath?: string; // Declared parent-env keys this action requires beyond the - // framework's allowlist (PATH, HOME, E2E_*, NEMOCLAW_*, ...). + // fixture layer's allowlist (PATH, HOME, E2E_*, NEMOCLAW_*, ...). // Anything not allowlisted and not declared here is dropped before - // spawn. See framework/redaction.ts. Each entry must match the - // secret-key shape; the framework rejects non-secret names so the + // spawn. See fixtures/redaction.ts. Each entry must match the + // secret-key shape; the fixture layer rejects non-secret names so the // allowlist-vs-declared-secret boundary stays honest. Cloud install // declares ["NVIDIA_API_KEY"]; slack onboarding declares the slack // tokens it actually needs; etc. diff --git a/test/e2e-scenario/support-tests/e2e-clients.test.ts b/test/e2e-scenario/support-tests/e2e-clients.test.ts index 77e8824fe2..05be65beaa 100644 --- a/test/e2e-scenario/support-tests/e2e-clients.test.ts +++ b/test/e2e-scenario/support-tests/e2e-clients.test.ts @@ -7,7 +7,7 @@ import path from "node:path"; import { describe, expect, it } from "vitest"; -import { assertExitZero, type CommandRunner } from "../framework/clients/index.ts"; +import { assertExitZero, type CommandRunner } from "../fixtures/clients/index.ts"; import { GatewayClient, HostCliClient, @@ -15,12 +15,12 @@ import { SandboxClient, StateClient, trustedProviderEndpoint, -} from "../framework/clients/index.ts"; +} from "../fixtures/clients/index.ts"; import type { ShellProbeResult, ShellProbeRunOptions, TrustedShellCommand, -} from "../framework/shell-probe.ts"; +} from "../fixtures/shell-probe.ts"; interface RunnerCall { command: string; diff --git a/test/e2e-scenario/support-tests/e2e-fixture-context.test.ts b/test/e2e-scenario/support-tests/e2e-fixture-context.test.ts index c1f19c8e0b..1e5cbc003d 100644 --- a/test/e2e-scenario/support-tests/e2e-fixture-context.test.ts +++ b/test/e2e-scenario/support-tests/e2e-fixture-context.test.ts @@ -7,15 +7,15 @@ import path from "node:path"; import { describe, expect, expectTypeOf, it } from "vitest"; -import { ArtifactSink, createArtifactSink } from "../framework/artifacts.ts"; -import { assertCleanupPassed, CleanupRegistry } from "../framework/cleanup.ts"; -import { test as e2eTest } from "../framework/e2e-test.ts"; -import { SecretStore } from "../framework/secrets.ts"; +import { ArtifactSink, createArtifactSink } from "../fixtures/artifacts.ts"; +import { assertCleanupPassed, CleanupRegistry } from "../fixtures/cleanup.ts"; +import { test as e2eTest } from "../fixtures/e2e-test.ts"; +import { SecretStore } from "../fixtures/secrets.ts"; import { ShellProbe, trustedShellCommand, type TrustedShellCommand, -} from "../framework/shell-probe.ts"; +} from "../fixtures/shell-probe.ts"; const delay = (ms: number): Promise => new Promise((resolve) => setTimeout(resolve, ms)); diff --git a/test/e2e-scenario/support-tests/e2e-live-project-config.test.ts b/test/e2e-scenario/support-tests/e2e-live-project-config.test.ts index 208cab1a43..4679999a8d 100644 --- a/test/e2e-scenario/support-tests/e2e-live-project-config.test.ts +++ b/test/e2e-scenario/support-tests/e2e-live-project-config.test.ts @@ -7,7 +7,7 @@ import { shouldRunBranchValidationE2E, shouldRunInstallerIntegration, shouldRunLiveE2EScenarios, -} from "../framework/live-project-gate.ts"; +} from "../fixtures/live-project-gate.ts"; import config from "../../../vitest.config.ts"; import { readYaml, type WorkflowStep } from "../../helpers/e2e-workflow-contract.ts"; diff --git a/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts b/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts index 0c5ffe1c9b..c809e9880f 100644 --- a/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts +++ b/test/e2e-scenario/support-tests/e2e-migration-policy.test.ts @@ -42,7 +42,7 @@ describe("E2E migration tracking policy", () => { expect(docs).toContain("machine-checkable boundary"); expect(docs).toContain("existing `.test.ts` file"); expect(docs).toContain("generated legacy assertion inventories"); - expect(docs).toContain("not a separate E2E framework or runner"); + expect(docs).toMatch(/not a test\s+harness or runner/); }); it("keeps durable taxonomy out of the repo-local migration docs", () => { diff --git a/test/e2e-scenario/support-tests/e2e-phase-environment.test.ts b/test/e2e-scenario/support-tests/e2e-phase-environment.test.ts index 59e1049735..acf00a359d 100644 --- a/test/e2e-scenario/support-tests/e2e-phase-environment.test.ts +++ b/test/e2e-scenario/support-tests/e2e-phase-environment.test.ts @@ -7,15 +7,15 @@ import path from "node:path"; import { describe, expect, expectTypeOf, it } from "vitest"; -import { ArtifactSink } from "../framework/artifacts.ts"; -import { HostCliClient, type CommandRunner } from "../framework/clients/index.ts"; -import type { E2EScenarioFixtures } from "../framework/e2e-test.ts"; -import { EnvironmentPhaseFixture, type DockerRuntimeReady } from "../framework/phases/index.ts"; +import { ArtifactSink } from "../fixtures/artifacts.ts"; +import { HostCliClient, type CommandRunner } from "../fixtures/clients/index.ts"; +import type { E2EScenarioFixtures } from "../fixtures/e2e-test.ts"; +import { EnvironmentPhaseFixture, type DockerRuntimeReady } from "../fixtures/phases/index.ts"; import type { ShellProbeResult, ShellProbeRunOptions, TrustedShellCommand, -} from "../framework/shell-probe.ts"; +} from "../fixtures/shell-probe.ts"; import type { ScenarioEnvironment } from "../scenarios/types.ts"; interface RunnerCall { diff --git a/test/e2e-scenario/support-tests/e2e-phase-lifecycle.test.ts b/test/e2e-scenario/support-tests/e2e-phase-lifecycle.test.ts index 16407719c6..a960495412 100644 --- a/test/e2e-scenario/support-tests/e2e-phase-lifecycle.test.ts +++ b/test/e2e-scenario/support-tests/e2e-phase-lifecycle.test.ts @@ -3,19 +3,19 @@ import { describe, expect, expectTypeOf, it } from "vitest"; -import { HostCliClient, SandboxClient, type CommandRunner } from "../framework/clients/index.ts"; -import type { E2EScenarioFixtures } from "../framework/e2e-test.ts"; +import { HostCliClient, SandboxClient, type CommandRunner } from "../fixtures/clients/index.ts"; +import type { E2EScenarioFixtures } from "../fixtures/e2e-test.ts"; import { buildBackupContainerName, LifecyclePhaseFixture, type LifecycleCleanup, -} from "../framework/phases/lifecycle.ts"; -import type { NemoClawInstance } from "../framework/phases/index.ts"; +} from "../fixtures/phases/lifecycle.ts"; +import type { NemoClawInstance } from "../fixtures/phases/index.ts"; import type { ShellProbeResult, ShellProbeRunOptions, TrustedShellCommand, -} from "../framework/shell-probe.ts"; +} from "../fixtures/shell-probe.ts"; interface RunnerCall { command: string; diff --git a/test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts b/test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts index 7add981d6c..7475fd0a9f 100644 --- a/test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts +++ b/test/e2e-scenario/support-tests/e2e-phase-onboarding.test.ts @@ -6,16 +6,16 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; -import { ArtifactSink } from "../framework/artifacts.ts"; -import { HostCliClient, type CommandRunner } from "../framework/clients/index.ts"; -import type { E2EScenarioFixtures } from "../framework/e2e-test.ts"; -import { OnboardingPhaseFixture, type OnboardingSecrets } from "../framework/phases/index.ts"; -import type { EnvironmentReady } from "../framework/phases/index.ts"; +import { ArtifactSink } from "../fixtures/artifacts.ts"; +import { HostCliClient, type CommandRunner } from "../fixtures/clients/index.ts"; +import type { E2EScenarioFixtures } from "../fixtures/e2e-test.ts"; +import { OnboardingPhaseFixture, type OnboardingSecrets } from "../fixtures/phases/index.ts"; +import type { EnvironmentReady } from "../fixtures/phases/index.ts"; import type { ShellProbeResult, ShellProbeRunOptions, TrustedShellCommand, -} from "../framework/shell-probe.ts"; +} from "../fixtures/shell-probe.ts"; interface RunnerCall { command: string; diff --git a/test/e2e-scenario/support-tests/e2e-phase-runtime.test.ts b/test/e2e-scenario/support-tests/e2e-phase-runtime.test.ts index 926c54b5fe..4df3452ead 100644 --- a/test/e2e-scenario/support-tests/e2e-phase-runtime.test.ts +++ b/test/e2e-scenario/support-tests/e2e-phase-runtime.test.ts @@ -8,18 +8,18 @@ import { SandboxClient, trustedProviderEndpoint, type CommandRunner, -} from "../framework/clients/index.ts"; -import type { E2EScenarioFixtures } from "../framework/e2e-test.ts"; +} from "../fixtures/clients/index.ts"; +import type { E2EScenarioFixtures } from "../fixtures/e2e-test.ts"; import { inferenceRouteUrl, RuntimePhaseFixture, type NemoClawInstance, -} from "../framework/phases/index.ts"; +} from "../fixtures/phases/index.ts"; import type { ShellProbeResult, ShellProbeRunOptions, TrustedShellCommand, -} from "../framework/shell-probe.ts"; +} from "../fixtures/shell-probe.ts"; interface RunnerCall { command: string; diff --git a/test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts b/test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts index ca69fc221b..2cd437d0b9 100644 --- a/test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts +++ b/test/e2e-scenario/support-tests/e2e-phase-state-validation.test.ts @@ -7,20 +7,20 @@ import path from "node:path"; import { describe, expect, expectTypeOf, it } from "vitest"; -import { ArtifactSink } from "../framework/artifacts.ts"; +import { ArtifactSink } from "../fixtures/artifacts.ts"; import { GatewayClient, HostCliClient, SandboxClient, type CommandRunner, -} from "../framework/clients/index.ts"; -import type { E2EScenarioFixtures } from "../framework/e2e-test.ts"; -import { StateValidationPhaseFixture, type NemoClawInstance } from "../framework/phases/index.ts"; +} from "../fixtures/clients/index.ts"; +import type { E2EScenarioFixtures } from "../fixtures/e2e-test.ts"; +import { StateValidationPhaseFixture, type NemoClawInstance } from "../fixtures/phases/index.ts"; import type { ShellProbeResult, ShellProbeRunOptions, TrustedShellCommand, -} from "../framework/shell-probe.ts"; +} from "../fixtures/shell-probe.ts"; interface RunnerCall { command: string; diff --git a/test/e2e-scenario/support-tests/e2e-redaction-entry.test.ts b/test/e2e-scenario/support-tests/e2e-redaction-entry.test.ts index 2850553199..fd7fb9f5d8 100644 --- a/test/e2e-scenario/support-tests/e2e-redaction-entry.test.ts +++ b/test/e2e-scenario/support-tests/e2e-redaction-entry.test.ts @@ -2,10 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 /** - * Single-entry contract for the framework redactor. + * Single-entry contract for the fixture redactor. * * Both per-test explicit secret values and canonical secret-shape - * matches must flow through `redactString` so the framework has one + * matches must flow through `redactString` so the fixture layer has one * redaction entry point. This file asserts the contract so any future * helper that wants to add an explicit-value path stays inside the * canonical entry rather than introducing a parallel one. @@ -17,10 +17,10 @@ import { describe, expect, it } from "vitest"; -import { SecretStore } from "../framework/secrets.ts"; -import { redactString } from "../framework/redaction.ts"; +import { SecretStore } from "../fixtures/secrets.ts"; +import { redactString } from "../fixtures/redaction.ts"; -describe("framework redaction entry point", () => { +describe("fixture redaction entry point", () => { it("redacts explicit values with [REDACTED] and canonical shapes with ", () => { const explicit = "test-secret-aBcD"; const canonical = `nvapi-${"x".repeat(24)}`; diff --git a/test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts b/test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts index 8c90f31352..a42cb5643e 100644 --- a/test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts +++ b/test/e2e-scenario/support-tests/e2e-redaction-parity.test.ts @@ -2,19 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 /** - * Parity test: the framework's local secret-pattern set - * (test/e2e-scenario/framework/redaction.ts) must stay in + * Parity test: the fixture layer's local secret-pattern set + * (test/e2e-scenario/fixtures/redaction.ts) must stay in * lockstep with the canonical product source * (src/lib/security/secret-patterns.ts). * - * The framework deliberately mirrors rather than imports — see the - * "Framework-local mirror" comment in redaction.ts for why — but the + * The fixture layer deliberately mirrors rather than imports — see the + * "Fixture-local mirror" comment in redaction.ts for why — but the * mirror is only safe if it is actually a mirror. This test imports * the RegExp arrays from both modules and compares them by behavior * (`.source` + `.flags`) rather than by source-text shape, so the * source-shape budget (ci/source-shape-test-budget.json) stays at 0. * - * The framework-runtime decoupling is preserved: redaction.ts itself + * The fixture-runtime decoupling is preserved: redaction.ts itself * does not import from src/lib/security/. Only this test crosses the * boundary, which is the entire point of a parity test. */ @@ -22,9 +22,9 @@ import { describe, expect, it } from "vitest"; import { - CONTEXT_PATTERNS as FRAMEWORK_CONTEXT_PATTERNS, - TOKEN_PREFIX_PATTERNS as FRAMEWORK_TOKEN_PREFIX_PATTERNS, -} from "../framework/redaction.ts"; + CONTEXT_PATTERNS as FIXTURE_CONTEXT_PATTERNS, + TOKEN_PREFIX_PATTERNS as FIXTURE_TOKEN_PREFIX_PATTERNS, +} from "../fixtures/redaction.ts"; import { CONTEXT_PATTERNS as PRODUCT_CONTEXT_PATTERNS, TOKEN_PREFIX_PATTERNS as PRODUCT_TOKEN_PREFIX_PATTERNS, @@ -34,20 +34,20 @@ function fingerprint(patterns: readonly RegExp[]): string[] { return patterns.map((re) => `${re.source}::${re.flags}`); } -describe("framework redaction parity with product source-of-truth", () => { - it("framework token prefix patterns match product token prefix patterns", () => { - const framework = fingerprint(FRAMEWORK_TOKEN_PREFIX_PATTERNS); +describe("fixture redaction parity with product source-of-truth", () => { + it("fixture token prefix patterns match product token prefix patterns", () => { + const fixture = fingerprint(FIXTURE_TOKEN_PREFIX_PATTERNS); const product = fingerprint(PRODUCT_TOKEN_PREFIX_PATTERNS); - expect(framework.length).toBeGreaterThan(0); + expect(fixture.length).toBeGreaterThan(0); expect(product.length).toBeGreaterThan(0); - expect(framework).toEqual(product); + expect(fixture).toEqual(product); }); - it("framework context patterns match product context patterns", () => { - const framework = fingerprint(FRAMEWORK_CONTEXT_PATTERNS); + it("fixture context patterns match product context patterns", () => { + const fixture = fingerprint(FIXTURE_CONTEXT_PATTERNS); const product = fingerprint(PRODUCT_CONTEXT_PATTERNS); - expect(framework.length).toBeGreaterThan(0); + expect(fixture.length).toBeGreaterThan(0); expect(product.length).toBeGreaterThan(0); - expect(framework).toEqual(product); + expect(fixture).toEqual(product); }); }); diff --git a/test/e2e-scenario/support-tests/e2e-shell-supervisor.test.ts b/test/e2e-scenario/support-tests/e2e-shell-supervisor.test.ts index 06e04ab1af..35c031d1c7 100644 --- a/test/e2e-scenario/support-tests/e2e-shell-supervisor.test.ts +++ b/test/e2e-scenario/support-tests/e2e-shell-supervisor.test.ts @@ -10,7 +10,7 @@ * with SIGTERM -> SIGKILL escalation, so a bash child that * ignores SIGTERM (e.g. `trap "" TERM`) still dies on timeout. * - * Both come from the leaf modules under framework/shell/, so the + * Both come from the leaf modules under fixtures/shell/, so the * assertions live here at the leaf level. The end-to-end behaviour * (orchestrator log redaction, fixture artifact persistence, probe * outcome mapping) stays covered by the existing support-tests @@ -20,12 +20,12 @@ import { spawn } from "node:child_process"; import { describe, expect, it } from "vitest"; -import { superviseChild } from "../framework/shell/supervisor.ts"; -import { trustedShellCommand, validateShellToken } from "../framework/shell/trusted-command.ts"; +import { superviseChild } from "../fixtures/shell/supervisor.ts"; +import { trustedShellCommand, validateShellToken } from "../fixtures/shell/trusted-command.ts"; const NUL = String.fromCharCode(0); -describe("framework/shell/trusted-command", () => { +describe("fixtures/shell/trusted-command", () => { it("validateShellToken rejects NUL bytes with a labelled error", () => { expect(() => validateShellToken(`a${NUL}b`, "argv[0]")).toThrowError( /argv\[0\] cannot contain NUL bytes/, @@ -68,7 +68,7 @@ describe("framework/shell/trusted-command", () => { }); }); -describe("framework/shell/supervisor", () => { +describe("fixtures/shell/supervisor", () => { it("returns exitCode 0 when the child exits cleanly", async () => { const child = spawn("bash", ["-c", "exit 0"], { detached: true, diff --git a/tools/e2e-advisor/scenarios.mts b/tools/e2e-advisor/scenarios.mts index e515612836..0e20ae94d4 100755 --- a/tools/e2e-advisor/scenarios.mts +++ b/tools/e2e-advisor/scenarios.mts @@ -233,7 +233,7 @@ export function buildSystemPrompt(schema: AdvisorSchema): string { "- `test/e2e-scenario/scenarios/registry.ts` and `test/e2e-scenario/scenarios/scenarios/` — typed scenario IDs and metadata.", "- `test/e2e-scenario/scenarios/runtime-support.ts` — which typed scenarios are wired for live Vitest execution.", "- `test/e2e-scenario/live/registry-scenarios.test.ts` — live Vitest registry scenario entry point.", - "- `test/e2e-scenario/framework/` and `test/e2e-scenario/support-tests/` — shared Vitest fixtures, clients, and phase helpers.", + "- `test/e2e-scenario/fixtures/` and `test/e2e-scenario/support-tests/` — shared Vitest fixtures, clients, and phase helpers.", "", "Decision policy:", "- Required (all scenarios): changes to scenario registry, matrix emission, expected-state metadata, live support classification, shared fixtures, or the Vitest scenario workflow itself. Recommend the `e2e-scenarios-all` fan-out through `e2e-vitest-scenarios.yaml`.", diff --git a/vitest.config.ts b/vitest.config.ts index f510748dcd..05f57627ed 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -7,7 +7,7 @@ import { shouldRunBranchValidationE2E, shouldRunInstallerIntegration, shouldRunLiveE2EScenarios, -} from "./test/e2e-scenario/framework/live-project-gate.ts"; +} from "./test/e2e-scenario/fixtures/live-project-gate.ts"; import { testTimeout } from "./test/helpers/timeouts"; const isGithubActions = process.env.GITHUB_ACTIONS === "true"; From ddb9fb856e6f29b0f9641d0726a19019fec03906 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 04:49:03 -0700 Subject: [PATCH 16/17] test(e2e): migrate strict tool-call probe to Vitest --- .github/workflows/regression-e2e.yaml | 26 +- .../live/strict-tool-call-probe.test.ts | 491 ++++++++++++++++++ test/e2e/test-strict-tool-call-probe.sh | 377 -------------- test/regression-e2e-workflow.test.ts | 24 + 4 files changed, 533 insertions(+), 385 deletions(-) create mode 100644 test/e2e-scenario/live/strict-tool-call-probe.test.ts delete mode 100755 test/e2e/test-strict-tool-call-probe.sh diff --git a/.github/workflows/regression-e2e.yaml b/.github/workflows/regression-e2e.yaml index 400ded2bae..3185bee903 100644 --- a/.github/workflows/regression-e2e.yaml +++ b/.github/workflows/regression-e2e.yaml @@ -284,21 +284,31 @@ jobs: uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6 with: node-version: "22" + cache: npm - - name: Run strict tool-call probe E2E test + - name: Install root dependencies + run: npm ci --ignore-scripts + + - name: Run strict tool-call probe Vitest E2E test env: NEMOCLAW_TEST_NO_SLEEP: "1" - run: bash test/e2e/test-strict-tool-call-probe.sh + NEMOCLAW_RUN_E2E_SCENARIOS: "1" + E2E_ARTIFACT_DIR: ${{ github.workspace }}/e2e-artifacts/vitest/strict-tool-call-probe + run: | + set -euo pipefail + npx vitest run --project e2e-scenarios-live \ + test/e2e-scenario/live/strict-tool-call-probe.test.ts \ + --silent=false --reporter=default - - name: Upload strict tool-call probe logs on failure - if: failure() + - name: Upload strict tool-call probe artifacts + if: always() uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: - name: strict-tool-call-probe-logs - path: | - /tmp/nemoclaw-e2e-strict-tool-call-probe.log - /tmp/nemoclaw-e2e-strict-tool-call-probe-node.log + name: strict-tool-call-probe-artifacts + path: e2e-artifacts/vitest/strict-tool-call-probe/ + include-hidden-files: false if-no-files-found: ignore + retention-days: 14 # ── Gateway drift preflight E2E ───────────────────────────── # Coverage guard for #3399 / #3423. A stale OpenShell gateway image can diff --git a/test/e2e-scenario/live/strict-tool-call-probe.test.ts b/test/e2e-scenario/live/strict-tool-call-probe.test.ts new file mode 100644 index 0000000000..8f02c7d5f5 --- /dev/null +++ b/test/e2e-scenario/live/strict-tool-call-probe.test.ts @@ -0,0 +1,491 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import assert from "node:assert/strict"; +import { type ChildProcessByStdio, spawn, spawnSync } from "node:child_process"; +import fs from "node:fs"; +import fsp from "node:fs/promises"; +import { createRequire } from "node:module"; +import os from "node:os"; +import path from "node:path"; +import type { Readable } from "node:stream"; + +import type { ArtifactSink } from "../framework/artifacts.ts"; +import { expect, test } from "../framework/e2e-test.ts"; +import { shouldRunLiveE2EScenarios } from "../framework/live-project-gate.ts"; + +// Migrated from test/e2e/test-strict-tool-call-probe.sh. This hermetic +// regression guard for #4537 exercises the Local Ollama strict Chat +// Completions tool-call validation path against local OpenAI-compatible mocks. + +const REPO_ROOT = path.resolve(import.meta.dirname, "../../.."); +const BUILD_TIMEOUT_MS = 120_000; +const PROBE_TIMEOUT_MS = 60_000; +const requireFromHere = createRequire(import.meta.url); +const runStrictToolCallProbeTest = shouldRunLiveE2EScenarios() ? test : test.skip; + +type JsonObject = Record; +type ValidationHelpers = { + validateOpenAiLikeSelection: ( + label: string, + endpoint: string, + model: string, + credentialEnv: string | null, + recoveryPrompt: string, + apiKey: string | null, + options: unknown, + ) => Promise; +}; +type ValidationModule = { + createInferenceSelectionValidationHelpers: (options: { + isNonInteractive: () => boolean; + agentProductName: () => string; + promptValidationRecovery: (_label: string, recovery: unknown) => Promise; + }) => ValidationHelpers; +}; +type LocalInferenceModule = { + buildOllamaProbeOptions: (skipVerify: boolean) => { + skipResponsesProbe?: unknown; + requireChatCompletionsToolCalling?: unknown; + }; +}; +type BuiltModules = { + validation: ValidationModule; + localInference: LocalInferenceModule; +}; +type MockEndpoint = { + endpoint: string; + readRequests: () => Array<{ method: string; url: string; body: JsonObject }>; + stop: () => Promise; +}; + +let builtModules: BuiltModules | null = null; + +function requireDist(...parts: string[]): T { + return requireFromHere(path.join(REPO_ROOT, "dist", "lib", ...parts)) as T; +} + +function loadBuiltModules(): BuiltModules { + builtModules ??= { + validation: requireDist("onboard", "inference-selection-validation"), + localInference: requireDist("inference", "local"), + }; + return builtModules; +} + +function assertObject(value: unknown, label: string): JsonObject { + assert.equal(typeof value, "object", `${label} must be an object`); + assert.notEqual(value, null, `${label} must not be null`); + assert.equal(Array.isArray(value), false, `${label} must not be an array`); + return value as JsonObject; +} + +function assertStrictPayload(payload: JsonObject): void { + assert.equal(payload.model, "mock-tool-model"); + assert.equal(payload.tool_choice, "required"); + assert.equal(payload.max_tokens, 256); + assert.equal(payload.stream, false); + assert.equal(payload.temperature, 0); + assert.ok(Array.isArray(payload.messages), "messages must be present"); + assert.ok(Array.isArray(payload.tools), "tools must be present"); + const tools = payload.tools as unknown[]; + assert.ok( + tools.some((tool) => { + const toolObject = assertObject(tool, "tool"); + const functionObject = assertObject(toolObject.function, "tool.function"); + return functionObject.name === "sessions_send"; + }), + "sessions_send tool must be present", + ); +} + +function makeValidationHelpers(recoveryCalls: unknown[]): ValidationHelpers { + return loadBuiltModules().validation.createInferenceSelectionValidationHelpers({ + isNonInteractive: () => false, + agentProductName: () => "NemoClaw", + promptValidationRecovery: async (_label, recovery) => { + recoveryCalls.push(recovery); + return "retry"; + }, + }); +} + +function strictOllamaProbeOptions(): unknown { + const options = loadBuiltModules().localInference.buildOllamaProbeOptions(false); + assert.equal(options.skipResponsesProbe, true); + assert.equal(options.requireChatCompletionsToolCalling, true); + return options; +} + +async function validate(endpoint: string, recoveryCalls: unknown[] = []): Promise { + const helpers = makeValidationHelpers(recoveryCalls); + return helpers.validateOpenAiLikeSelection( + "Local Ollama", + endpoint, + "mock-tool-model", + null, + "Choose a different Ollama model or select Other.", + null, + strictOllamaProbeOptions(), + ); +} + +function serverSource(): string { + return String.raw` +const fs = require("node:fs"); +const http = require("node:http"); + +const mode = process.env.MOCK_MODE; +const requestsFile = process.env.REQUESTS_FILE; +let count = 0; + +function toolCallResponse() { + return { + choices: [ + { + message: { + role: "assistant", + content: "", + tool_calls: [ + { + type: "function", + function: { + name: "sessions_send", + arguments: JSON.stringify({ message: "hello" }), + }, + }, + ], + }, + }, + ], + }; +} + +function plainTextResponse() { + return { choices: [{ message: { role: "assistant", content: "OK" } }] }; +} + +function responseForRequest() { + if (mode === "success") return { status: 200, body: toolCallResponse() }; + if (mode === "transient-502") { + return count === 1 + ? { status: 502, body: { error: { message: "transient upstream failure" } } } + : { status: 200, body: toolCallResponse() }; + } + if (mode === "plain-text") return { status: 200, body: plainTextResponse() }; + return { status: 500, body: { error: { message: "unknown mock mode" } } }; +} + +const server = http.createServer((req, res) => { + const chunks = []; + req.on("data", (chunk) => chunks.push(Buffer.from(chunk))); + req.on("end", () => { + count += 1; + const rawBody = Buffer.concat(chunks).toString("utf8"); + let parsedBody = null; + try { + parsedBody = rawBody ? JSON.parse(rawBody) : null; + } catch (error) { + parsedBody = { parseError: error.message, rawBody }; + } + fs.appendFileSync( + requestsFile, + JSON.stringify({ count, method: req.method, url: req.url, body: parsedBody }) + "\n", + ); + const response = responseForRequest(); + res.writeHead(response.status, { "Content-Type": "application/json" }); + res.end(JSON.stringify(response.body)); + }); +}); + +server.listen(0, "127.0.0.1", () => { + process.stdout.write(JSON.stringify({ port: server.address().port }) + "\n"); +}); +process.on("SIGTERM", () => server.close(() => process.exit(0))); +`; +} + +async function waitForMockPort( + child: ChildProcessByStdio, + mode: string, + stderr: () => string, +): Promise { + return new Promise((resolve, reject) => { + let stdout = ""; + const timeout = setTimeout(() => { + reject(new Error(`mock ${mode} did not report a port; stderr=${stderr()}`)); + }, 5000); + child.on("error", (error) => { + clearTimeout(timeout); + reject(error); + }); + child.on("exit", (code) => { + clearTimeout(timeout); + reject( + new Error(`mock ${mode} exited before ready with ${String(code)}; stderr=${stderr()}`), + ); + }); + child.stdout.on("data", (chunk) => { + stdout += chunk.toString("utf8"); + const line = stdout.split(/\r?\n/).find(Boolean); + if (!line) return; + clearTimeout(timeout); + try { + resolve(JSON.parse(line).port as number); + } catch (error) { + reject(error); + } + }); + }); +} + +async function startMockEndpoint(mode: string): Promise { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), `nemoclaw-strict-probe-${mode}-`)); + const requestsFile = path.join(dir, "requests.jsonl"); + fs.writeFileSync(requestsFile, ""); + const child = spawn(process.execPath, ["-e", serverSource()], { + env: { ...process.env, MOCK_MODE: mode, REQUESTS_FILE: requestsFile }, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stderr = ""; + child.stderr.on("data", (chunk) => { + stderr += chunk.toString("utf8"); + }); + + const port = await waitForMockPort(child, mode, () => stderr); + + return { + endpoint: `http://127.0.0.1:${String(port)}/v1`, + readRequests() { + const raw = fs.readFileSync(requestsFile, "utf8").trim(); + return raw ? raw.split(/\r?\n/).map((line) => JSON.parse(line)) : []; + }, + async stop() { + if (child.exitCode === null) { + child.kill("SIGTERM"); + await new Promise((resolve) => child.once("exit", resolve)); + } + fs.rmSync(dir, { recursive: true, force: true }); + }, + }; +} + +async function withMockEndpoint( + artifacts: ArtifactSink, + mode: string, + label: string, + exercise: (endpoint: string, readRequests: MockEndpoint["readRequests"]) => Promise, +): Promise { + const mock = await startMockEndpoint(mode); + try { + await exercise(mock.endpoint, () => mock.readRequests()); + } finally { + await artifacts.writeJson(`requests/${label}.json`, mock.readRequests()).catch(() => undefined); + await mock.stop(); + } +} + +function onboardingCallerScript(): string { + return String.raw` +const assert = require("node:assert/strict"); +const path = require("node:path"); + +function fromDist(...parts) { + return require(path.join(process.cwd(), "dist", "lib", ...parts)); +} + +process.env.NEMOCLAW_NON_INTERACTIVE = "1"; +process.env.NEMOCLAW_PROVIDER = "ollama"; +process.env.NEMOCLAW_MODEL = "mock-tool-model"; +process.env.NEMOCLAW_TEST_NO_SLEEP = "1"; + +const runner = fromDist("runner"); +runner.run = () => ({ status: 0 }); +runner.runShell = () => ({ status: 0 }); +runner.runCapture = (command) => { + const cmd = Array.isArray(command) ? command.join(" ") : String(command); + if (cmd.includes("command -v") && cmd.includes("ollama")) return ""; + if (cmd.includes("/api/tags")) { + return JSON.stringify({ models: [{ name: "mock-tool-model" }] }); + } + if (cmd.includes("/api/show")) { + return JSON.stringify({ capabilities: ["completion", "tools"] }); + } + if (cmd.includes("/api/ps")) { + return JSON.stringify({ models: [{ name: "mock-tool-model", context_length: 4096 }] }); + } + if (cmd.includes("127.0.0.1:8000/v1/models")) return ""; + return ""; +}; +runner.runCaptureEx = (command) => { + const cmd = Array.isArray(command) ? command.join(" ") : String(command); + if (cmd.includes("/api/generate")) { + return { stdout: JSON.stringify({ response: "hello" }), stderr: "", exitCode: 0, timedOut: false }; + } + return { stdout: "", stderr: "", exitCode: 0, timedOut: false }; +}; + +fromDist("onboard", "ollama-systemd").ensureOllamaLoopbackSystemdOverride = () => "ready"; +fromDist("onboard", "local-inference-topology").shouldFrontOllamaWithProxy = () => false; + +const credentials = fromDist("credentials", "store"); +credentials.prompt = async (message) => { + throw new Error("Unexpected prompt during non-interactive Ollama onboarding: " + message); +}; +credentials.ensureApiKey = async () => { + throw new Error("Unexpected API key request during Local Ollama onboarding"); +}; + +const lines = []; +const originalLog = console.log; +const originalError = console.error; +console.log = (...args) => lines.push(args.join(" ")); +console.error = (...args) => lines.push(args.join(" ")); + +(async () => { + try { + const { setupNim } = fromDist("onboard"); + const result = await setupNim(null, null); + originalLog(JSON.stringify({ result, lines })); + } catch (error) { + originalError(lines.join("\n")); + originalError(error && error.stack ? error.stack : error); + process.exit(1); + } finally { + console.log = originalLog; + console.error = originalError; + } +})(); +`; +} + +function runOnboardingCallerAgainstMock(endpoint: string): void { + const port = new URL(endpoint).port; + const result = spawnSync(process.execPath, ["-e", onboardingCallerScript()], { + cwd: REPO_ROOT, + encoding: "utf8", + env: { ...process.env, NEMOCLAW_OLLAMA_PORT: port }, + timeout: 15_000, + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + const payload = JSON.parse(result.stdout.trim().split(/\r?\n/).pop() ?? "{}") as { + result?: { provider?: string; model?: string; preferredInferenceApi?: string }; + }; + assert.equal(payload.result?.provider, "ollama-local"); + assert.equal(payload.result?.model, "mock-tool-model"); + assert.equal(payload.result?.preferredInferenceApi, "openai-completions"); +} + +runStrictToolCallProbeTest( + "strict Chat Completions tool-call probe uses bounded payloads and fails closed", + { + timeout: BUILD_TIMEOUT_MS + PROBE_TIMEOUT_MS, + }, + async ({ artifacts, host }) => { + await artifacts.writeJson("scenario.json", { + id: "strict-tool-call-probe", + runner: "vitest", + boundary: "host-openai-compatible-mock", + migratedFrom: "test/e2e/test-strict-tool-call-probe.sh", + }); + + const previousEnv = { + NEMOCLAW_TEST_NO_SLEEP: process.env.NEMOCLAW_TEST_NO_SLEEP, + NO_PROXY: process.env.NO_PROXY, + no_proxy: process.env.no_proxy, + }; + process.env.NEMOCLAW_TEST_NO_SLEEP = "1"; + process.env.NO_PROXY = [process.env.NO_PROXY, "127.0.0.1", "localhost"] + .filter(Boolean) + .join(","); + process.env.no_proxy = [process.env.no_proxy, "127.0.0.1", "localhost"] + .filter(Boolean) + .join(","); + + try { + const build = await host.command("npm", ["run", "build:cli"], { + artifactName: "strict-tool-call-probe-build-cli", + cwd: REPO_ROOT, + inheritEnv: true, + timeoutMs: BUILD_TIMEOUT_MS, + }); + expect(build.exitCode, `build failed\n${build.stderr}`).toBe(0); + + await withMockEndpoint( + artifacts, + "success", + "strict-success", + async (endpoint, readRequests) => { + const result = await validate(endpoint); + expect(result).toEqual({ ok: true, api: "openai-completions" }); + const requests = readRequests(); + assert.equal(requests.length, 1); + assert.equal(requests[0].method, "POST"); + assert.equal(requests[0].url, "/v1/chat/completions"); + assertStrictPayload(requests[0].body); + }, + ); + + await withMockEndpoint( + artifacts, + "success", + "onboarding-caller", + async (endpoint, readRequests) => { + runOnboardingCallerAgainstMock(endpoint); + const requests = readRequests(); + assert.equal(requests.length, 1); + assert.equal(requests[0].method, "POST"); + assert.equal(requests[0].url, "/v1/chat/completions"); + assertStrictPayload(requests[0].body); + }, + ); + + await withMockEndpoint( + artifacts, + "transient-502", + "transient-502", + async (endpoint, readRequests) => { + const result = await validate(endpoint); + expect(result).toEqual({ ok: true, api: "openai-completions" }); + const requests = readRequests(); + assert.equal(requests.length, 2); + assertStrictPayload(requests[0].body); + assertStrictPayload(requests[1].body); + }, + ); + + await withMockEndpoint( + artifacts, + "plain-text", + "plain-text-fails-closed", + async (endpoint, readRequests) => { + const recoveryCalls: unknown[] = []; + const result = await validate(endpoint, recoveryCalls); + expect(result).toEqual({ ok: false, retry: "retry" }); + const requests = readRequests(); + assert.equal(requests.length, 1); + assertStrictPayload(requests[0].body); + assert.equal(recoveryCalls.length, 1); + }, + ); + } finally { + if (previousEnv.NEMOCLAW_TEST_NO_SLEEP === undefined) { + delete process.env.NEMOCLAW_TEST_NO_SLEEP; + } else { + process.env.NEMOCLAW_TEST_NO_SLEEP = previousEnv.NEMOCLAW_TEST_NO_SLEEP; + } + if (previousEnv.NO_PROXY === undefined) { + delete process.env.NO_PROXY; + } else { + process.env.NO_PROXY = previousEnv.NO_PROXY; + } + if (previousEnv.no_proxy === undefined) { + delete process.env.no_proxy; + } else { + process.env.no_proxy = previousEnv.no_proxy; + } + await fsp.rm(artifacts.pathFor("tmp"), { recursive: true, force: true }); + } + }, +); diff --git a/test/e2e/test-strict-tool-call-probe.sh b/test/e2e/test-strict-tool-call-probe.sh deleted file mode 100755 index 09c8cb8c71..0000000000 --- a/test/e2e/test-strict-tool-call-probe.sh +++ /dev/null @@ -1,377 +0,0 @@ -#!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Coverage guard for #4537. The Local Ollama onboarding path is the only -# current caller that requires strict Chat Completions tool calls. This -# hermetic E2E exercises that validation path against an OpenAI-compatible -# mock endpoint so payload-shape and retry regressions do not require a GPU -# Ollama runner to catch. - -set -euo pipefail - -LOG_FILE="/tmp/nemoclaw-e2e-strict-tool-call-probe.log" -exec > >(tee "$LOG_FILE") 2>&1 - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -pass() { echo -e "${GREEN}[PASS]${NC} $1"; } -info() { echo -e "${YELLOW}[INFO]${NC} $1"; } -diag() { echo -e "${YELLOW}[DIAG]${NC} $1"; } -fail() { - echo -e "${RED}[FAIL]${NC} $1" >&2 - diag "strict tool-call probe log tail:" - tail -120 "$LOG_FILE" 2>/dev/null || true - exit 1 -} - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -cd "$REPO_ROOT" - -info "Preparing CLI build" -if [ ! -d node_modules ]; then - npm ci --ignore-scripts -fi -npm run build:cli - -info "Running strict Chat Completions tool-call probe against a hermetic mock" -set +e -NEMOCLAW_TEST_NO_SLEEP=1 node <<'NODE' 2>&1 | tee /tmp/nemoclaw-e2e-strict-tool-call-probe-node.log -const assert = require("node:assert/strict"); -const { spawn, spawnSync } = require("node:child_process"); -const fs = require("node:fs"); -const os = require("node:os"); -const path = require("node:path"); - -process.env.NEMOCLAW_TEST_NO_SLEEP = "1"; -process.env.NO_PROXY = [process.env.NO_PROXY, "127.0.0.1", "localhost"].filter(Boolean).join(","); -process.env.no_proxy = [process.env.no_proxy, "127.0.0.1", "localhost"].filter(Boolean).join(","); - -const { - createInferenceSelectionValidationHelpers, -} = require("./dist/lib/onboard/inference-selection-validation"); -const localInference = require("./dist/lib/inference/local"); - -function assertStrictPayload(payload) { - assert.equal(payload.model, "mock-tool-model"); - assert.equal(payload.tool_choice, "required"); - assert.equal(payload.max_tokens, 256); - assert.equal(payload.stream, false); - assert.equal(payload.temperature, 0); - assert.ok(Array.isArray(payload.messages), "messages must be present"); - assert.ok(Array.isArray(payload.tools), "tools must be present"); - assert.ok( - payload.tools.some((tool) => tool?.function?.name === "sessions_send"), - "sessions_send tool must be present", - ); -} - -function makeValidationHelpers(recoveryCalls) { - return createInferenceSelectionValidationHelpers({ - isNonInteractive: () => false, - agentProductName: () => "NemoClaw", - promptValidationRecovery: async (_label, recovery) => { - recoveryCalls.push(recovery); - return "retry"; - }, - }); -} - -function strictOllamaProbeOptions() { - const options = localInference.buildOllamaProbeOptions(false); - assert.equal(options.skipResponsesProbe, true); - assert.equal(options.requireChatCompletionsToolCalling, true); - return options; -} - -async function validate(endpoint, recoveryCalls = []) { - const helpers = makeValidationHelpers(recoveryCalls); - return helpers.validateOpenAiLikeSelection( - "Local Ollama", - endpoint, - "mock-tool-model", - null, - "Choose a different Ollama model or select Other.", - null, - strictOllamaProbeOptions(), - ); -} - -function serverSource() { - return String.raw` -const fs = require("node:fs"); -const http = require("node:http"); - -const mode = process.env.MOCK_MODE; -const requestsFile = process.env.REQUESTS_FILE; -let count = 0; - -function toolCallResponse() { - return { - choices: [ - { - message: { - role: "assistant", - content: "", - tool_calls: [ - { - type: "function", - function: { - name: "sessions_send", - arguments: JSON.stringify({ message: "hello" }), - }, - }, - ], - }, - }, - ], - }; -} - -function plainTextResponse() { - return { choices: [{ message: { role: "assistant", content: "OK" } }] }; -} - -function responseForRequest() { - if (mode === "success") return { status: 200, body: toolCallResponse() }; - if (mode === "transient-502") { - return count === 1 - ? { status: 502, body: { error: { message: "transient upstream failure" } } } - : { status: 200, body: toolCallResponse() }; - } - if (mode === "plain-text") return { status: 200, body: plainTextResponse() }; - return { status: 500, body: { error: { message: "unknown mock mode" } } }; -} - -const server = http.createServer((req, res) => { - const chunks = []; - req.on("data", (chunk) => chunks.push(Buffer.from(chunk))); - req.on("end", () => { - count += 1; - const rawBody = Buffer.concat(chunks).toString("utf8"); - let parsedBody = null; - try { - parsedBody = rawBody ? JSON.parse(rawBody) : null; - } catch (error) { - parsedBody = { parseError: error.message, rawBody }; - } - fs.appendFileSync( - requestsFile, - JSON.stringify({ count, method: req.method, url: req.url, body: parsedBody }) + "\n", - ); - const response = responseForRequest(); - res.writeHead(response.status, { "Content-Type": "application/json" }); - res.end(JSON.stringify(response.body)); - }); -}); - -server.listen(0, "127.0.0.1", () => { - process.stdout.write(JSON.stringify({ port: server.address().port }) + "\n"); -}); -process.on("SIGTERM", () => server.close(() => process.exit(0))); -`; -} - -async function startMockEndpoint(mode) { - const dir = fs.mkdtempSync(path.join(os.tmpdir(), `nemoclaw-strict-probe-${mode}-`)); - const requestsFile = path.join(dir, "requests.jsonl"); - fs.writeFileSync(requestsFile, ""); - const child = spawn(process.execPath, ["-e", serverSource()], { - env: { ...process.env, MOCK_MODE: mode, REQUESTS_FILE: requestsFile }, - stdio: ["ignore", "pipe", "pipe"], - }); - - let stderr = ""; - child.stderr.on("data", (chunk) => { - stderr += chunk.toString("utf8"); - process.stderr.write(`[mock ${mode}] ${chunk}`); - }); - - const port = await new Promise((resolve, reject) => { - let stdout = ""; - const timeout = setTimeout(() => { - reject(new Error(`mock ${mode} did not report a port; stderr=${stderr}`)); - }, 5000); - child.on("exit", (code) => { - clearTimeout(timeout); - reject(new Error(`mock ${mode} exited before ready with ${code}; stderr=${stderr}`)); - }); - child.stdout.on("data", (chunk) => { - stdout += chunk.toString("utf8"); - const line = stdout.split(/\r?\n/).find(Boolean); - if (!line) return; - clearTimeout(timeout); - try { - resolve(JSON.parse(line).port); - } catch (error) { - reject(error); - } - }); - }); - - return { - endpoint: `http://127.0.0.1:${port}/v1`, - readRequests() { - const raw = fs.readFileSync(requestsFile, "utf8").trim(); - return raw ? raw.split(/\r?\n/).map((line) => JSON.parse(line)) : []; - }, - async stop() { - if (child.exitCode === null) { - child.kill("SIGTERM"); - await new Promise((resolve) => child.once("exit", resolve)); - } - fs.rmSync(dir, { recursive: true, force: true }); - }, - }; -} - -async function withMockEndpoint(mode, exercise) { - const mock = await startMockEndpoint(mode); - try { - await exercise(mock.endpoint, () => mock.readRequests()); - } finally { - await mock.stop(); - } -} - -function runOnboardingCallerAgainstMock(endpoint) { - const port = new URL(endpoint).port; - const childScript = String.raw` -const assert = require("node:assert/strict"); - -process.env.NEMOCLAW_NON_INTERACTIVE = "1"; -process.env.NEMOCLAW_PROVIDER = "ollama"; -process.env.NEMOCLAW_MODEL = "mock-tool-model"; -process.env.NEMOCLAW_TEST_NO_SLEEP = "1"; - -const runner = require("./dist/lib/runner"); -runner.run = () => ({ status: 0 }); -runner.runShell = () => ({ status: 0 }); -runner.runCapture = (command) => { - const cmd = Array.isArray(command) ? command.join(" ") : String(command); - if (cmd.includes("command -v") && cmd.includes("ollama")) return ""; - if (cmd.includes("/api/tags")) { - return JSON.stringify({ models: [{ name: "mock-tool-model" }] }); - } - if (cmd.includes("/api/show")) { - return JSON.stringify({ capabilities: ["completion", "tools"] }); - } - if (cmd.includes("/api/ps")) { - return JSON.stringify({ models: [{ name: "mock-tool-model", context_length: 4096 }] }); - } - if (cmd.includes("127.0.0.1:8000/v1/models")) return ""; - return ""; -}; -runner.runCaptureEx = (command) => { - const cmd = Array.isArray(command) ? command.join(" ") : String(command); - if (cmd.includes("/api/generate")) { - return { stdout: JSON.stringify({ response: "hello" }), stderr: "", exitCode: 0, timedOut: false }; - } - return { stdout: "", stderr: "", exitCode: 0, timedOut: false }; -}; - -require("./dist/lib/onboard/ollama-systemd").ensureOllamaLoopbackSystemdOverride = () => "ready"; -require("./dist/lib/onboard/local-inference-topology").shouldFrontOllamaWithProxy = () => false; - -const credentials = require("./dist/lib/credentials/store"); -credentials.prompt = async (message) => { - throw new Error("Unexpected prompt during non-interactive Ollama onboarding: " + message); -}; -credentials.ensureApiKey = async () => { - throw new Error("Unexpected API key request during Local Ollama onboarding"); -}; - -const lines = []; -const originalLog = console.log; -const originalError = console.error; -console.log = (...args) => lines.push(args.join(" ")); -console.error = (...args) => lines.push(args.join(" ")); - -(async () => { - try { - const { setupNim } = require("./dist/lib/onboard"); - const result = await setupNim(null, null); - originalLog(JSON.stringify({ result, lines })); - } catch (error) { - originalError(lines.join("\n")); - originalError(error && error.stack ? error.stack : error); - process.exit(1); - } finally { - console.log = originalLog; - console.error = originalError; - } -})(); -`; - - const result = spawnSync(process.execPath, ["-e", childScript], { - cwd: process.cwd(), - encoding: "utf8", - env: { ...process.env, NEMOCLAW_OLLAMA_PORT: port }, - timeout: 15000, - }); - assert.equal(result.status, 0, result.stderr || result.stdout); - const payload = JSON.parse(result.stdout.trim().split(/\r?\n/).pop()); - assert.equal(payload.result.provider, "ollama-local"); - assert.equal(payload.result.model, "mock-tool-model"); - assert.equal(payload.result.preferredInferenceApi, "openai-completions"); -} - -(async () => { - await withMockEndpoint("success", async (endpoint, readRequests) => { - const result = await validate(endpoint); - assert.deepEqual(result, { ok: true, api: "openai-completions" }); - const requests = readRequests(); - assert.equal(requests.length, 1); - assert.equal(requests[0].method, "POST"); - assert.equal(requests[0].url, "/v1/chat/completions"); - assertStrictPayload(requests[0].body); - console.log("[PASS] strict validation succeeds with structured tool_calls"); - }); - - await withMockEndpoint("success", async (endpoint, readRequests) => { - runOnboardingCallerAgainstMock(endpoint); - const requests = readRequests(); - assert.equal(requests.length, 1); - assert.equal(requests[0].method, "POST"); - assert.equal(requests[0].url, "/v1/chat/completions"); - assertStrictPayload(requests[0].body); - console.log("[PASS] Local Ollama onboarding caller enforces strict Chat Completions validation"); - }); - - await withMockEndpoint("transient-502", async (endpoint, readRequests) => { - const result = await validate(endpoint); - assert.deepEqual(result, { ok: true, api: "openai-completions" }); - const requests = readRequests(); - assert.equal(requests.length, 2); - assertStrictPayload(requests[0].body); - assertStrictPayload(requests[1].body); - console.log("[PASS] strict validation retries a transient 502 and keeps bounded payloads"); - }); - - await withMockEndpoint("plain-text", async (endpoint, readRequests) => { - const recoveryCalls = []; - const result = await validate(endpoint, recoveryCalls); - assert.deepEqual(result, { ok: false, retry: "retry" }); - const requests = readRequests(); - assert.equal(requests.length, 1); - assertStrictPayload(requests[0].body); - assert.equal(recoveryCalls.length, 1); - console.log("[PASS] strict validation fails closed when no structured tool_call is returned"); - }); -})().catch((error) => { - console.error(error && error.stack ? error.stack : error); - process.exit(1); -}); -NODE -NODE_EXIT=$? -set -e - -if [ "$NODE_EXIT" -ne 0 ]; then - fail "strict Chat Completions tool-call probe harness failed" -fi - -pass "strict Chat Completions tool-call probe E2E passed" diff --git a/test/regression-e2e-workflow.test.ts b/test/regression-e2e-workflow.test.ts index 8a932973d2..1cfc066d9d 100644 --- a/test/regression-e2e-workflow.test.ts +++ b/test/regression-e2e-workflow.test.ts @@ -36,4 +36,28 @@ describe("Regression E2E workflow contract", () => { expect(selectorScript).not.toContain("docker-unreachable-gateway-start-e2e"); expect(selectorScript).not.toContain("docker_unreachable_gateway_start"); }); + + it("runs strict tool-call probe through Vitest artifacts", () => { + const job = workflow.jobs?.["strict-tool-call-probe-e2e"]; + const checkoutStep = job?.steps?.find((step) => + String(step.uses ?? "").startsWith("actions/checkout@"), + ); + const runStep = job?.steps?.find( + (step) => step.name === "Run strict tool-call probe Vitest E2E test", + ); + const uploadStep = job?.steps?.find( + (step) => step.name === "Upload strict tool-call probe artifacts", + ); + + expect(checkoutStep?.with?.["persist-credentials"]).toBe(false); + expect(runStep?.run).toContain("npx vitest run --project e2e-scenarios-live"); + expect(runStep?.run).toContain("test/e2e-scenario/live/strict-tool-call-probe.test.ts"); + expect(runStep?.run).not.toContain("test/e2e/test-strict-tool-call-probe.sh"); + expect(runStep?.env?.NEMOCLAW_RUN_E2E_SCENARIOS).toBe("1"); + expect(runStep?.env?.E2E_ARTIFACT_DIR).toBe( + "${{ github.workspace }}/e2e-artifacts/vitest/strict-tool-call-probe", + ); + expect(uploadStep?.with?.path).toBe("e2e-artifacts/vitest/strict-tool-call-probe/"); + expect(uploadStep?.with?.["include-hidden-files"]).toBe(false); + }); }); From 334ea0debf5ae4d34155da6c55e6180310cbc0d1 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 09:19:02 -0700 Subject: [PATCH 17/17] test(e2e): refresh fixture imports --- test/e2e-scenario/live/strict-tool-call-probe.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/e2e-scenario/live/strict-tool-call-probe.test.ts b/test/e2e-scenario/live/strict-tool-call-probe.test.ts index 8f02c7d5f5..17b522451d 100644 --- a/test/e2e-scenario/live/strict-tool-call-probe.test.ts +++ b/test/e2e-scenario/live/strict-tool-call-probe.test.ts @@ -10,9 +10,9 @@ import os from "node:os"; import path from "node:path"; import type { Readable } from "node:stream"; -import type { ArtifactSink } from "../framework/artifacts.ts"; -import { expect, test } from "../framework/e2e-test.ts"; -import { shouldRunLiveE2EScenarios } from "../framework/live-project-gate.ts"; +import type { ArtifactSink } from "../fixtures/artifacts.ts"; +import { expect, test } from "../fixtures/e2e-test.ts"; +import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts"; // Migrated from test/e2e/test-strict-tool-call-probe.sh. This hermetic // regression guard for #4537 exercises the Local Ollama strict Chat