diff --git a/.github/workflows/carl-install-smoke.yml b/.github/workflows/carl-install-smoke.yml new file mode 100644 index 000000000..0a08c6092 --- /dev/null +++ b/.github/workflows/carl-install-smoke.yml @@ -0,0 +1,99 @@ +# Carl-install smoke — runs the EXACT install command Carl runs, then +# verifies the page Carl opens after install actually serves usable HTML. +# +# Closes the gap that let #950 merge with the Mac install path doing a +# hidden 5-15min Rust source build despite the README claiming "Docker- +# first: no compilation needed." Existing CI gates (verify-architectures, +# verify-after-rebuild, validate, install-and-run-gate) all passed because +# they validate image presence + revision label + service health on a +# CI-only docker compose. They never exercised `curl install.sh | bash`. +# +# Status: ADVISORY for the first week of operation (per docs/CARL-CI-PLAN.md +# rollout section). Once we have <2% false-fail rate over 1 week, flip to +# REQUIRED via the PrimaryBranches ruleset PUT. Until then, this workflow +# runs but doesn't block merge — letting us tune the smoke without locking +# the merge button on flakes. + +name: Carl Install Smoke + +on: + pull_request: + branches: [canary, main] + paths: + # Run when anything that affects Carl's install path changes. + # No need to re-run on TS-only widget changes that don't touch + # install/docker; those are covered by other gates. + - 'install.sh' + - 'install.ps1' + - 'setup.sh' + - 'bootstrap.sh' + - 'src/scripts/install*.sh' + - 'src/scripts/lib/install-common.sh' + - 'docker/**' + - 'docker-compose*.yml' + - 'src/.dockerignore' + - 'src/workers/.dockerignore' + - 'scripts/ci/carl-install-smoke.sh' + - '.github/workflows/carl-install-smoke.yml' + push: + branches: [canary, main] + # Manual trigger so anyone can validate Carl's path against any branch + # without opening a throwaway PR. + workflow_dispatch: + inputs: + install_ref: + description: 'Git ref to fetch install.sh from (sha / branch / tag)' + required: false + default: '' + +jobs: + carl-install-smoke-amd64: + name: carl-install-smoke (linux/amd64) + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: read + packages: read + steps: + - uses: actions/checkout@v4 + with: + # PR HEAD, not the synthetic merge commit. Otherwise github.sha + # is the merge commit and the install.sh we'd fetch from raw. + # githubusercontent.com wouldn't be the one in this PR. Same + # rationale as docker-images.yml's ref pattern. + ref: ${{ github.event.pull_request.head.sha || github.sha }} + # Smoke uses the local script directly; no need for full history. + fetch-depth: 1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to ghcr.io (so install.sh can pull pre-built images) + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + + - name: Run carl-install smoke + env: + # Pass the PR HEAD sha so the smoke fetches the install.sh from + # THIS PR (not main). Falls back to manual workflow_dispatch input + # when not in a PR context. + CARL_INSTALL_REF: ${{ github.event.pull_request.head.sha || inputs.install_ref || github.sha }} + # 25-min cap on the docker-only install. Hybrid (Mac source-build) + # path would exceed this — by design, that's the gate firing on + # the README/install mismatch. + CARL_INSTALL_TIMEOUT_SEC: '1500' + # Generous health wait — model-init can take 3-5min on cold pull. + CARL_HEALTH_TIMEOUT_SEC: '300' + # CI shouldn't leave docker compose stacks running. + SKIP_TEARDOWN: '0' + run: bash scripts/ci/carl-install-smoke.sh + + - name: Upload install + page artifacts on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: carl-install-debug-${{ github.event.pull_request.head.sha || github.sha }} + path: | + /tmp/carl-smoke-*.install.log + /tmp/carl-smoke-*.page.html + retention-days: 7 + if-no-files-found: ignore diff --git a/bootstrap.sh b/bootstrap.sh index c99a7ff45..7b3e71d4e 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -127,13 +127,13 @@ echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━ echo "" case "$MODE" in browser) - echo -e " UI: ${GREEN}http://localhost:9000${NC}" + echo -e " UI: ${GREEN}http://localhost:9003${NC}" ;; cli) echo -e " CLI: ${GREEN}./jtag${NC}" ;; headless) - echo -e " Server: ${GREEN}http://localhost:9000${NC} (API only)" + echo -e " Server: ${GREEN}http://localhost:9003${NC} (API only)" ;; esac echo -e " Stop: ${GREEN}cd $INSTALL_DIR/src && npm stop${NC}" diff --git a/docs/CARL-CI-PLAN.md b/docs/CARL-CI-PLAN.md new file mode 100644 index 000000000..8d3c1746b --- /dev/null +++ b/docs/CARL-CI-PLAN.md @@ -0,0 +1,230 @@ +# Carl-Grade CI: closing the broken-merge gap + +**Status:** plan / in-progress on `fix/install-carl-mac-windows` +**Owner:** anvil (mac), green-022a (windows), bigmama-wsl (linux/cuda) +**Driver:** anvil + +## The problem we're solving + +#950 merged with the install path on Mac doing a hidden 5-15min Rust source +build despite the README claiming "Docker-first: pulls pre-built images, no +compilation needed." The CI gates that exist today (verify-architectures, +verify-after-rebuild, validate, install-and-run-gate) caught: + +- Multi-arch presence at `:pr-N` ✅ +- Per-arch revision label matches HEAD SHA ✅ +- TS/Rust compile clean ✅ +- docker-compose-up + widget-server health responds ✅ + +What they did NOT catch: + +- **Carl's actual install command** (`curl install.sh | bash`) was never + exercised by CI. +- **README claim** (no compilation needed) vs **install.sh behavior** + (5-15min Rust build on Mac) was never reconciled. +- **First chat message** the user would send was never validated to produce + a clean response (no `` XML, no vision hallucination). +- **Browser-loaded UI** was never verified to actually render and accept + user input through the same path Carl would use. + +So #950 went green on its CI gates but Carl's install experience is +materially different from the README's promise. That's the gap this work +closes. + +## Design principles + +1. **Test the user's path, not a CI-only path.** The same `install.sh` that + Carl invokes from `curl ... | bash` runs in CI. No CI-only smoke + substitutes. + +2. **Test the user's first action, not just service health.** After install + succeeds, CI sends a chat message + an image, and asserts the response + reads like a non-broken product (no XML leak, no hallucination markers, + real Vision description). + +3. **Cross-platform from day one.** amd64-linux is mandatory; arm64-mac is + high-priority via self-hosted runner OR developer-pre-push gate; Windows + (via WSL2 or PowerShell) is third tier but not optional. + +4. **Conservative-by-default required-checks.** New gates added as REQUIRED + in the PrimaryBranches ruleset only after they demonstrate <2% false-fail + rate over 1 week. False positives erode trust faster than they protect. + +5. **Same script for CI and humans.** Per Joel 2026-04-23: "make your own + testing easy." Every gate is a one-line shell invocation any of us can + run locally in 30 seconds. + +## What lands in THIS PR + +### A. Carl-install validation in CI (the headline) + +A new CI job `carl-install-and-chat-smoke` that: + +1. On a fresh ubuntu-latest GHA runner (amd64), does: + ``` + CONTINUUM_DIR=/tmp/carl-probe \ + bash <(curl -fsSL https://raw.githubusercontent.com/CambrianTech/continuum/$GITHUB_SHA/install.sh) + ``` + The actual install path Carl runs. + +2. Times the install (target: <15 min for the Carl-mode docker-only path). + +3. After install completes, hits `http://localhost:9003/health` (existing + health check, kept) PLUS a new `chat-smoke` script: + - POSTs a chat message ("hello, who are you?") via the REST API + - Waits up to 60s for a response + - Asserts response: no `` XML, no `:` prefix, + >100 chars, doesn't claim it cannot do something it actually can + +4. POSTs a chat message with an image attachment (test fixture + `test-data/images/image-2.jpg` — small, public CC0): + - Asserts Vision AI's response describes the actual image content + - Asserts non-vision personas EITHER skip the response OR honestly say + they cannot see images (no hallucinated content) + +5. Tears down. Captures docker logs on failure to GHA artifacts so we can + diagnose without re-running. + +**Required check:** `carl-install-and-chat-smoke` becomes required for +canary→main promotion (after 1 week of <2% false-fail rate to confirm +stability). For PR→canary promotion, it's required from day one — canary +is where we discover regressions, that's its job. + +### B. Mac-mode install rationalization + +**Update 2026-04-25 (anvil, after reading install.sh:118-123):** B.1 is +not a choice we have. Apple's hypervisor blocks GPU passthrough to +containers (confirmed by Docker Feb 2026, comment in install.sh). Mac +NEEDS to run continuum-core natively for Metal acceleration. The 5-15min +Rust build is architectural, not a bug. Going with B.2. + +**B.2 (current plan):** README updated to admit the hybrid split: +- Linux: docker-first, no compilation (matches the existing README claim) +- Mac: docker for support services + native continuum-core for Metal + (~10min first build, incremental after; happens automatically as part + of `curl install.sh | bash` — no separate command, no env flag) + +Implementation: +- README's headline install section gets a small per-platform table or + inline note explaining the wall-clock difference. +- install.sh prints an upfront banner on Mac estimating build time + (so Carl knows to expect ~10min, not ~3min). +- `--quiet` mode keeps existing behavior; just clearer messaging. + +(Considered B.3: ship TWO install commands — install-mac.sh vs install.sh. +Rejected: more docs surface, more drift risk, fragments the support story. +One entry point with honest messaging beats two entry points with shorter +average time.) + +### C. Browser smoke test (puppeteer) + +Within the same CI job, after install + chat-smoke pass: + +1. Launch headless Chrome via puppeteer +2. Navigate to `http://localhost:9003/` +3. Assert page loads (no chrome-error://) +4. Type "hello" into the chat input +5. Assert response renders within 30s +6. Capture screenshot for the GHA artifact (so we have visual evidence) + +Catches the chrome-error trap class of bug — when widget-server isn't ready +fast enough, browser stays in a recoverable state. + +### D. install.sh idempotence and friendly retry + +When install.sh is interrupted partway (Carl Ctrl+C's, network drops), +re-running should resume from where it left off, not retry from scratch. +Specifically: + +- Skip `git clone` if repo already at $CONTINUUM_DIR with correct origin +- Skip `docker compose pull` if all images present locally with current tags +- Skip prereq install steps that already report installed +- ONLY repeat the failed step + everything after it + +Most of this is already in install.sh's check-then-install pattern; verify +end-to-end and document the resume behavior in the README. + +### E. Browser pre-open delay + +install.sh currently opens the browser after compose-up returns. compose-up +returns when containers START, not when widget-server is HEALTHY. Result: +chrome-error trap when browser hits localhost:9003 0.5 sec before the +server is listening. + +Fix: install.sh polls widget-server `/health` with a 60s timeout BEFORE +running `open http://localhost:9003/`. If health doesn't come up, print a +human-readable timeout message + log dump command instead of opening the +browser to an error. + +### F. Friendlier first-fail messaging + +When install.sh fails (any phase), the error output should: +- Name the phase (`Phase 4/8: Python ML environment`) +- Show the actual failing command + its stderr +- Print 1-line guidance for that specific failure ("If pip install timed + out, retry: `python -m pip install --retries 5 ...`") +- Capture full log to a clipboardable path (`/tmp/continuum-install-*.log`) + +Carl shouldn't have to read the script source to understand what broke. + +## What does NOT land in this PR (deferred to follow-ups) + +- **Self-hosted GPU runner** (bigmama's box as a GHA runner) — bigger + infra lift, do once Carl-install-and-chat-smoke is stable on amd64. +- **Persona-airc bridge** (#967) — separate value stream. +- **(d) tool_use XML parser fix** (#76) — the `chat-smoke` step in this PR + ASSERTS clean output, so #76 is now a hard prerequisite for the smoke + to pass. Decide: fix #76 first then ship this PR's smoke as required, or + ship the smoke as advisory until #76 lands. +- **Recipe substrate** (#71/#73) and **Phase C paging** — independent + workstreams, queued. + +## Rollout + +1. **This PR adds the smoke + the Mac-mode rationalization** to canary. +2. CI runs the new smoke as ADVISORY (not blocking) for 1 week to gather + false-positive rate data. +3. After 1 week of <2% false-fail, flip to REQUIRED via the PrimaryBranches + ruleset (gh api PUT). +4. Canary→main promotion is gated on the smoke passing. +5. New install regressions become impossible to merge without explicit + `--no-verify` (which the team's standing rule forbids per Joel). + +## Per-platform validation + +| Platform | Validator | Notes | +|---|---|---| +| linux/amd64 | GHA runner (`ubuntu-latest`) | Always-on. Carl's dominant platform per HF data. | +| linux/amd64 + GPU | bigmama-wsl box, eventually self-hosted runner | Real Carl path; covers vision/persona functionality | +| darwin/arm64 | anvil mac (manual probe), eventually puppeteer-on-mac in CI | Dev's dominant platform | +| windows + WSL2 | green-022a (manual probe), bigmama-wsl secondary | Carl's secondary platform | +| windows native (powershell) | green-022a (manual probe via install.ps1) | New platform — rely on green's dogfood | + +Each push to canary should have at least the linux/amd64 smoke green before +promotion. The other tiers are progressively-tightening. + +## Success criteria + +- [ ] Carl-install-and-chat-smoke runs on every PR; passes for unchanged- + install diffs in <15 min. +- [ ] README's "Docker-first: no compilation needed" claim is true on all + platforms (Carl mode default). +- [ ] Browser smoke catches the chrome-error trap class. +- [ ] After 1 week, smoke is REQUIRED in the PrimaryBranches ruleset. +- [ ] No future PR can land that breaks Carl's install without explicit + bypass (which the team's discipline forbids). + +## Coordination + +- **anvil:** drives the plan, implements A (Carl-install smoke), B + (Mac-mode), E (browser pre-open delay), F (friendlier failures). +- **green-022a:** drives the install.ps1 / Windows-native parity with the + shared logic in `src/scripts/lib/install-common.sh`. Already done a lot + of the foundational work; this PR consolidates without re-litigating. +- **bigmama-wsl:** Linux/CUDA Carl probe (manual, for ground truth before + self-hosted runner lands), reviews + maintains the Linux side of + install-common.sh. Eventually owns the self-hosted GPU runner. +- **joel-mac-dm:** out of scope unless airc-side identity work surfaces a + conflict; airc PR #70 already shipped what we need for #967 anyway. +- **joel:** approves the README-vs-behavior reconciliation choice (B.1 vs + B.2) and the timing of "advisory → required" transition for the smoke. diff --git a/install.ps1 b/install.ps1 index f4e82d96e..c0d34d5e3 100644 --- a/install.ps1 +++ b/install.ps1 @@ -214,9 +214,9 @@ if ($bootstrapExit -eq 0) { Write-Ok 'Continuum is up.' Write-Host '' switch ($Mode) { - 'browser' { Write-Host ' UI: http://localhost:9000' } + 'browser' { Write-Host ' UI: http://localhost:9003' } 'cli' { Write-Host ' CLI: continuum (from any new shell)' } - 'headless' { Write-Host ' Server: http://localhost:9000 (API only)' } + 'headless' { Write-Host ' Server: http://localhost:9003 (API only)' } } Write-Host ' Verify: continuum doctor' Write-Host '' diff --git a/install.sh b/install.sh index 51d6a57b6..17398eac8 100755 --- a/install.sh +++ b/install.sh @@ -21,13 +21,62 @@ REPO="https://github.com/CambrianTech/continuum.git" INSTALL_DIR="${CONTINUUM_DIR:-$HOME/continuum}" CONTINUUM_DATA="$HOME/.continuum" +# ── Friendly-failure infrastructure ───────────────────────── +# When install.sh fails partway, Carl needs to know WHICH phase died, +# not just what bash printed. PHASE gets updated as we enter each +# section; the ERR trap reads it + maps to phase-specific guidance. +# Empirically (2026-04-25): existing failures dump bash's last line +# of stderr with no context. Carl can't tell if it's a Docker thing, +# a Tailscale thing, a model-download thing, or a Rust build thing +# without reading install.sh source. +PHASE="(starting up)" +INSTALL_LOG="${INSTALL_LOG:-/tmp/continuum-install-$$.log}" +exec > >(tee -a "$INSTALL_LOG") 2>&1 + +phase_guidance() { + case "$PHASE" in + *"detect environment"*) echo "Verify uname -s + uname -m return expected values; check disk space (df -h /).";; + *"pre-clone bootstrap"*) echo "Install git + docker first; on Mac, ensure Docker Desktop is running.";; + *"clone"*|*"update repo"*) echo "Check network: ping github.com; verify INSTALL_DIR ($INSTALL_DIR) is writable.";; + *"shared modules"*) echo "Re-clone may be incomplete; rm -rf $INSTALL_DIR && re-run installer.";; + *"configuration"*) echo "Check $CONTINUUM_DATA exists + is writable; mkdir -p $CONTINUUM_DATA && chmod 700 $CONTINUUM_DATA.";; + *"TLS certs"*) echo "Tailscale + cert step is optional; export CONTINUUM_NO_TLS=1 and re-run.";; + *"compose files"*) echo "Verify docker-compose.yml exists in $INSTALL_DIR; the install repo may be incomplete.";; + *"pull"*|*"images"*) echo "Network or GHCR auth issue; docker login ghcr.io and retry.";; + *"start support services"*|*"bring up"*) echo "Check Docker Desktop has enough RAM (≥30GB). docker compose -f $INSTALL_DIR/docker-compose.yml logs --tail=100";; + *"widget-server health"*) echo "Compose came up but widget-server isn't serving. docker compose -f $INSTALL_DIR/docker-compose.yml logs widget-server --tail=100";; + *) echo "Capture full log + open an issue: cat $INSTALL_LOG | gh issue create -t 'install fail @ $PHASE' -b -";; + esac +} + +on_install_fail() { + local rc=$? + # Trap fires on any non-zero exit (set -e). Avoid recursing if the + # ERR trap itself trips a sub-shell. + trap - ERR EXIT + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo " ❌ Install failed during phase: $PHASE (exit $rc)" + echo "" + echo " Suggestion: $(phase_guidance)" + echo "" + echo " Full log: $INSTALL_LOG" + echo " Last 30 lines:" + tail -30 "$INSTALL_LOG" | sed 's/^/ /' + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + exit "$rc" +} +trap on_install_fail ERR + echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo " Continuum Installer" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo " Log: $INSTALL_LOG" echo "" # ── 1. Detect environment ─────────────────────────────────── +PHASE="detect environment" info "Detecting environment..." OS="$(uname -s)" @@ -49,6 +98,7 @@ case "$OS" in esac # ── 2. Pre-clone bootstrap: git + minimal Docker presence check ──── +PHASE="pre-clone bootstrap" # We can't source the canonical module library yet (lives in the repo). # Just verify prerequisites so the clone can happen. Deeper checks live # in the canonical modules that run after the clone. @@ -532,6 +582,7 @@ case "$OS" in esac # ── 3. Clone / update repo ───────────────────────────────── +PHASE="clone / update repo" if [ -d "$INSTALL_DIR/.git" ]; then info "Updating existing installation..." cd "$INSTALL_DIR" @@ -543,6 +594,7 @@ else fi # ── 4. Shared modules (same code that Dev runs via npm start) ──── +PHASE="shared modules" # docs/infrastructure/INSTALL-ARCHITECTURE.md §Module-shape: the canonical # module library at src/scripts/lib/install-common.sh defines # mod_submodules_init + mod_docker_wsl_integration + log/sudo primitives. @@ -577,6 +629,7 @@ ok "Source: $INSTALL_DIR" mod_continuum_bin_link "$INSTALL_DIR/bin/continuum" # ── 4. Configuration ─────────────────────────────────────── +PHASE="configuration" mkdir -p "$CONTINUUM_DATA" CONFIG_FILE="$CONTINUUM_DATA/config.env" @@ -600,6 +653,7 @@ else fi # ── 5. TLS certs (Tailscale) ────────────────────────────── +PHASE="TLS certs (optional)" TS_HOSTNAME="" if command -v tailscale &>/dev/null; then TS_HOSTNAME=$(tailscale status --json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('Self',{}).get('DNSName','').rstrip('.'))" 2>/dev/null || echo "") @@ -624,6 +678,7 @@ else fi # ── 6. Pick compose files + profile ─────────────────────── +PHASE="compose files" # Base file is always loaded. On GPU hosts, layer docker-compose.gpu.yml # so continuum-core picks up the cuda image override (otherwise compose # silently uses the CPU image and inference falls back to CPU). The same @@ -654,6 +709,7 @@ elif [[ "$HAS_GPU" == "true" ]]; then fi # ── 7. Pull support-service images ───────────────────────── +PHASE="pull images" # Image tag resolution: compose files honor ${CONTINUUM_IMAGE_TAG:-latest}. # Main-branch installs (Carl's default) use :latest. Reviewers validating # a PR before merge can pin the PR's staged image set: @@ -669,6 +725,7 @@ info "Pulling container images (tag: ${CONTINUUM_IMAGE_TAG:-latest})..." $CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>/dev/null || warn "Some images not published yet — will build locally" # ── 8. Start support services ────────────────────────────── +PHASE="start support services" # Inverse of parallel-start.sh's cross-mode detection: if native Dev-mode # processes (continuum-core-server, tsx orchestrator) are running, docker # compose up will collide on ports 9001/9100/7880-82/9003/5432. Warn so @@ -717,33 +774,71 @@ if [[ "$OS" == "Darwin" ]]; then warn "npm start failed — check logs at ~/.continuum/jtag/logs/system/continuum-core.log" fi -# ── 8. Wait for health ───────────────────────────────────── -info "Waiting for services..." -for i in {1..30}; do - if curl -sf http://localhost:9003 &>/dev/null || curl -sf https://localhost:9003 -k &>/dev/null; then +# ── 8. Wait for widget-server health ─────────────────────── +PHASE="widget-server health" +# Carl's experience hinges on this gate: if we open the browser before +# widget-server is actually serving, Chrome lands on the failed URL, +# replaces the location bar with chrome-error://chromewebdata/, and any +# subsequent reload tries to navigate from chrome-error back to http: — +# which the browser blocks as a cross-scheme navigation. Carl is then +# stuck on an error page with no clean recovery. Empirically: 2026-04-25 +# joel hit "Unsafe attempt to load URL http://localhost:9003/ from frame +# with URL chrome-error://chromewebdata/" exactly because of this race. +# +# Two changes vs the prior 'curl -sf' wait: +# 1. Hit /health specifically (widget-server's health endpoint at +# JTAGEndpoints.HEALTH = '/health'). A 200 here means widget-server +# is actually serving HTTP, not just that the port is open. +# 2. If we never get a 200 in HEALTH_TIMEOUT_SEC, DO NOT open the +# browser. Print actionable diagnostic + a manual-open command for +# Carl to use after he checks the logs. Opening to a not-yet-ready +# server is the bug; refusing to open is the correct behavior. +info "Waiting for widget-server health (timeout ${HEALTH_TIMEOUT_SEC:=120}s)..." +HEALTH_OK=0 +for i in $(seq 1 "$HEALTH_TIMEOUT_SEC"); do + # --fail returns non-zero on 4xx/5xx; --max-time keeps each probe snappy + # so the loop stays close to a 1s cadence even when the server hangs. + if curl -sf --max-time 2 http://localhost:9003/health >/dev/null 2>&1 \ + || curl -sfk --max-time 2 https://localhost:9003/health >/dev/null 2>&1; then + HEALTH_OK=1 + ok "widget-server healthy after ${i}s" break fi - [ $i -eq 30 ] && warn "Services still starting — check: $CONTAINER_CMD compose logs" - sleep 2 + sleep 1 done -# ── 9. Determine URL + open browser ──────────────────────── +# ── 9. Determine URL + open browser (only if healthy) ────── +PHASE="open browser" if [ -n "$TS_HOSTNAME" ] && [ -f "$CONTINUUM_DATA/$TS_HOSTNAME.crt" ]; then URL="https://$TS_HOSTNAME:9003" else URL="http://localhost:9003" fi -case "$OS" in - Darwin) open "$URL" 2>/dev/null || true ;; - Linux) - if grep -qi microsoft /proc/version 2>/dev/null; then - cmd.exe /c start "" "$URL" 2>/dev/null || true - else - xdg-open "$URL" 2>/dev/null || true - fi - ;; -esac +if [ "$HEALTH_OK" -eq 1 ]; then + case "$OS" in + Darwin) open "$URL" 2>/dev/null || true ;; + Linux) + if grep -qi microsoft /proc/version 2>/dev/null; then + cmd.exe /c start "" "$URL" 2>/dev/null || true + else + xdg-open "$URL" 2>/dev/null || true + fi + ;; + esac +else + warn "widget-server not healthy after ${HEALTH_TIMEOUT_SEC}s — NOT opening browser." + warn " Opening Chrome to a not-yet-ready URL traps you on a chrome-error page" + warn " that cannot cleanly recover. Diagnose + retry instead:" + echo "" + echo " Logs: $CONTAINER_CMD compose -f $INSTALL_DIR/docker-compose.yml logs --tail=200" + echo " Status: $CONTAINER_CMD compose -f $INSTALL_DIR/docker-compose.yml ps" + echo " Retry: curl -v http://localhost:9003/health" + echo "" + echo " Once the health endpoint returns 200, open the URL manually:" + echo " $URL" + echo "" +fi # ── Done ──────────────────────────────────────────────────── echo "" diff --git a/scripts/ci/carl-install-smoke.sh b/scripts/ci/carl-install-smoke.sh new file mode 100755 index 000000000..4293aaf37 --- /dev/null +++ b/scripts/ci/carl-install-smoke.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +# carl-install-smoke.sh — run the EXACT install command Carl runs, then +# assert the user-facing surface actually serves usable content. +# +# Why this gate: existing install-and-run-gate.sh validates the docker +# compose stack itself (images present, services healthy on :9003). It does +# NOT validate that `curl install.sh | bash` — Carl's actual entry point — +# completes cleanly, or that the page Carl opens after install renders +# something usable instead of chrome-error / empty. +# +# This gate closes that gap. Same one-line invocation works for CI and +# humans (per Joel's "make your own testing easy" rule): +# +# bash scripts/ci/carl-install-smoke.sh +# +# Optional env: +# CARL_INSTALL_TIMEOUT_SEC=900 full install timeout (default 15min) +# CARL_HEALTH_TIMEOUT_SEC=180 widget-server /health wait (default 3min) +# CARL_INSTALL_DIR=/tmp/carl-N install location (default fresh tmp) +# CARL_INSTALL_REF=$GIT_SHA which install.sh to fetch from main +# SKIP_TEARDOWN=1 keep stack running after probe (debug) +# +# Exit codes: +# 0 — install completed AND page rendered usable HTML +# 1 — install.sh failed +# 2 — install.sh succeeded but widget-server never returned 200 on /health +# 3 — widget-server returned 200 but page body looks broken +# (empty / contains chrome-error / contains "container exited") + +set -uo pipefail + +CARL_INSTALL_TIMEOUT_SEC="${CARL_INSTALL_TIMEOUT_SEC:-900}" +CARL_HEALTH_TIMEOUT_SEC="${CARL_HEALTH_TIMEOUT_SEC:-180}" +CARL_INSTALL_DIR="${CARL_INSTALL_DIR:-/tmp/carl-smoke-$$}" +CARL_INSTALL_REF="${CARL_INSTALL_REF:-${GITHUB_SHA:-main}}" +SKIP_TEARDOWN="${SKIP_TEARDOWN:-0}" + +INSTALL_LOG="${CARL_INSTALL_DIR}.install.log" +PAGE_BODY="${CARL_INSTALL_DIR}.page.html" + +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo " carl-install-smoke" +echo " CARL_INSTALL_DIR=$CARL_INSTALL_DIR" +echo " CARL_INSTALL_REF=$CARL_INSTALL_REF" +echo " CARL_INSTALL_TIMEOUT_SEC=$CARL_INSTALL_TIMEOUT_SEC" +echo " CARL_HEALTH_TIMEOUT_SEC=$CARL_HEALTH_TIMEOUT_SEC" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +teardown() { + local rc=$? + if [ "$SKIP_TEARDOWN" != "1" ] && [ -d "$CARL_INSTALL_DIR" ]; then + echo "" + echo "━━━ tearing down $CARL_INSTALL_DIR ━━━" + if [ -f "$CARL_INSTALL_DIR/docker-compose.yml" ]; then + ( cd "$CARL_INSTALL_DIR" && docker compose down -v 2>&1 | tail -3 ) || true + fi + rm -rf "$CARL_INSTALL_DIR" + fi + exit "$rc" +} +trap teardown EXIT INT TERM + +# ── 1. Run Carl's exact install command ─────────────────────── +echo "" +echo "━━━ running install.sh from $CARL_INSTALL_REF ━━━" +echo " log: $INSTALL_LOG" + +# Carl runs: curl -fsSL | bash +# We do the same, but pin to the exact ref under test (defaults to GITHUB_SHA +# in CI so we exercise THIS PR's install script, not main's). +INSTALL_URL="https://raw.githubusercontent.com/CambrianTech/continuum/${CARL_INSTALL_REF}/install.sh" + +# Time the install. 15-min timeout for the docker-only path (Carl's expected +# experience). Hybrid Mac path (with Rust source build) will exceed this on +# a fresh runner — that's fine, it'll fail the gate, which is the design +# (the README claims docker-only; install should match). +INSTALL_START=$(date +%s) +if ! timeout "$CARL_INSTALL_TIMEOUT_SEC" bash -c \ + "CONTINUUM_DIR='$CARL_INSTALL_DIR' bash <(curl -fsSL '$INSTALL_URL')" \ + >"$INSTALL_LOG" 2>&1; then + INSTALL_DUR=$(( $(date +%s) - INSTALL_START )) + echo "❌ install.sh failed or timed out after ${INSTALL_DUR}s" + echo "" + echo " Last 50 lines of install log:" + tail -50 "$INSTALL_LOG" | sed 's/^/ /' + exit 1 +fi +INSTALL_DUR=$(( $(date +%s) - INSTALL_START )) +echo "✅ install.sh completed in ${INSTALL_DUR}s" + +# ── 2. Wait for widget-server /health ───────────────────────── +# install.sh has its own health-wait now (piece E in this PR), but we +# re-check here in case the user used SKIP_HEALTH=1 or ran an older +# install.sh without the wait. Belt + suspenders. +echo "" +echo "━━━ waiting up to ${CARL_HEALTH_TIMEOUT_SEC}s for widget-server /health ━━━" +HEALTH_OK=0 +for i in $(seq 1 "$CARL_HEALTH_TIMEOUT_SEC"); do + if curl -sf --max-time 2 http://localhost:9003/health >/dev/null 2>&1; then + HEALTH_OK=1 + echo " /health 200 after ${i}s" + break + fi + sleep 1 +done + +if [ "$HEALTH_OK" -ne 1 ]; then + echo "❌ widget-server never returned 200 on /health within ${CARL_HEALTH_TIMEOUT_SEC}s" + echo "" + if [ -f "$CARL_INSTALL_DIR/docker-compose.yml" ]; then + echo " docker compose ps:" + ( cd "$CARL_INSTALL_DIR" && docker compose ps 2>&1 | sed 's/^/ /' ) || true + echo "" + echo " Last 30 lines of widget-server logs:" + ( cd "$CARL_INSTALL_DIR" && docker compose logs --tail=30 widget-server 2>&1 | sed 's/^/ /' ) || true + fi + exit 2 +fi + +# ── 3. Validate the page Carl will open ─────────────────────── +# /health says "server is alive" but doesn't say "the page Carl opens +# renders usable HTML." A naked health endpoint can return 200 while the +# main page returns a stack trace or empty body. Probe the actual root. +echo "" +echo "━━━ probing root page Carl opens (http://localhost:9003/) ━━━" +ROOT_CODE=$(curl -sS -o "$PAGE_BODY" -w "%{http_code}" http://localhost:9003/ 2>/dev/null || echo "000") +ROOT_BYTES=$(wc -c < "$PAGE_BODY" 2>/dev/null || echo 0) +echo " HTTP status: $ROOT_CODE" +echo " Body bytes: $ROOT_BYTES" + +if [[ ! "$ROOT_CODE" =~ ^2 ]]; then + echo "❌ root page returned non-2xx ($ROOT_CODE)" + exit 3 +fi + +if [ "$ROOT_BYTES" -lt 100 ]; then + echo "❌ root page body is suspiciously small ($ROOT_BYTES bytes); Carl would see a blank page." + echo " First 500 bytes:" + head -c 500 "$PAGE_BODY" | sed 's/^/ /' + exit 3 +fi + +# Sanity: page should look like HTML, not a stack trace or compose error. +if ! grep -qiE "<(html|head|body|continuum)" "$PAGE_BODY" 2>/dev/null; then + echo "❌ root page body doesn't look like HTML; Carl would see something broken." + echo " First 500 bytes:" + head -c 500 "$PAGE_BODY" | sed 's/^/ /' + exit 3 +fi + +# Negative checks: any of these in the body = broken-feeling page. +for marker in "chrome-error" "container exited" "ECONNREFUSED" "Cannot GET /" "Internal Server Error"; do + if grep -qF "$marker" "$PAGE_BODY"; then + echo "❌ root page contains failure marker: '$marker'" + echo " Context:" + grep -F "$marker" "$PAGE_BODY" | head -3 | sed 's/^/ /' + exit 3 + fi +done + +echo "✅ root page looks like real HTML (${ROOT_BYTES} bytes, no failure markers)" + +# ── Done ────────────────────────────────────────────────────── +echo "" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo " ✅ carl-install-smoke PASSED" +echo " Install duration: ${INSTALL_DUR}s" +echo " Health latency: $(( $(date +%s) - INSTALL_START - INSTALL_DUR ))s after install" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" diff --git a/src/scripts/parallel-start.sh b/src/scripts/parallel-start.sh index d6f5e9c2c..14cf8f25e 100755 --- a/src/scripts/parallel-start.sh +++ b/src/scripts/parallel-start.sh @@ -204,20 +204,47 @@ if [ ! -f "target/release/continuum-core-server" ]; then echo -e " [Rust] ${YELLOW}First build detected — this takes 5-15 minutes. Showing progress...${NC}" CARGO_QUIET="" fi + +# Wrapper around `cargo build -p `. On incremental builds (CARGO_QUIET +# non-empty) we capture-then-display, which keeps the log clean. On first +# builds (CARGO_QUIET empty) we tee so cargo's "Compiling crate vX.Y.Z" +# lines stream live to the terminal — without this, the user saw the +# "First build detected — Showing progress..." banner then total silence +# for 5-15 minutes because $(cargo ...) blocks until cargo exits. We still +# capture into $OUT for preflight_check_cargo_xcode + the failure path. +build_pkg() { + local pkg="$1"; shift + if [ -n "$CARGO_QUIET" ]; then + OUT=$(cargo build --release -p "$pkg" "$@" --quiet 2>&1) \ + || { BUILD_OUTPUT+="$OUT"; RESULT=1; } + else + local tmp + tmp=$(mktemp) + cargo build --release -p "$pkg" "$@" 2>&1 | tee "$tmp" + local rc=${PIPESTATUS[0]} + OUT=$(cat "$tmp") + rm -f "$tmp" + if [ "$rc" -ne 0 ]; then + BUILD_OUTPUT+="$OUT" + RESULT=1 + fi + fi +} + for pkg in archive-worker jtag-mcp; do - OUT=$(cargo build --release -p $pkg $CARGO_QUIET 2>&1) || { BUILD_OUTPUT+="$OUT"; RESULT=1; } + build_pkg "$pkg" done # continuum-core: all GPU features (metal+accelerate on macOS, cuda on Linux) if [ -n "$GPU_FEAT" ]; then - OUT=$(cargo build --release -p continuum-core --features "$GPU_FEAT" $CARGO_QUIET 2>&1) || { BUILD_OUTPUT+="$OUT"; RESULT=1; } + build_pkg continuum-core --features "$GPU_FEAT" else - OUT=$(cargo build --release -p continuum-core $CARGO_QUIET 2>&1) || { BUILD_OUTPUT+="$OUT"; RESULT=1; } + build_pkg continuum-core fi # inference-grpc: GPU backend only (metal or cuda, no accelerate) if [ -n "$GPU_BACKEND" ]; then - OUT=$(cargo build --release -p inference-grpc --features "$GPU_BACKEND" $CARGO_QUIET 2>&1) || { BUILD_OUTPUT+="$OUT"; RESULT=1; } + build_pkg inference-grpc --features "$GPU_BACKEND" else - OUT=$(cargo build --release -p inference-grpc $CARGO_QUIET 2>&1) || { BUILD_OUTPUT+="$OUT"; RESULT=1; } + build_pkg inference-grpc fi # Filter ts-rs noise and display echo "$BUILD_OUTPUT" | grep -v -E "ts-rs failed to parse|failed to parse serde|= note:|skip_serializing_if|^\s*\|?\s*$|^$" | sed 's/^/ [Rust] /'