diff --git a/.changeset/README.md b/.changeset/README.md new file mode 100644 index 0000000..4e3df29 --- /dev/null +++ b/.changeset/README.md @@ -0,0 +1,29 @@ +# Changesets + +This directory holds [Changesets](https://github.com/changesets/changesets) used +to coordinate versioning and publishing of the public packages in this monorepo: + +- `@finalrun/common` +- `@finalrun/report-web` +- `@finalrun/finalrun-agent` + +`@finalrun/goal-executor` and `@finalrun/device-node` are intentionally ignored +(they ship only as `bundleDependencies` inside `@finalrun/finalrun-agent`). + +## Workflow + +1. After making a change in a public package, run: + ``` + npx changeset + ``` + Pick the packages and the bump type (patch/minor/major) and write a short + summary. A markdown file appears in this directory. + +2. Commit the changeset file alongside the code change in the same PR. + +3. On `main`, to cut a release: + ``` + npx changeset version # bumps package.json versions, updates deps, writes CHANGELOG + git commit -am "chore: release" + npx changeset publish # publishes bumped packages to npm + ``` diff --git a/.changeset/config.json b/.changeset/config.json new file mode 100644 index 0000000..f900740 --- /dev/null +++ b/.changeset/config.json @@ -0,0 +1,11 @@ +{ + "$schema": "https://unpkg.com/@changesets/config@3.0.0/schema.json", + "changelog": "@changesets/cli/changelog", + "commit": false, + "fixed": [], + "linked": [], + "access": "public", + "baseBranch": "main", + "updateInternalDependencies": "patch", + "ignore": ["@finalrun/goal-executor", "@finalrun/device-node"] +} diff --git a/.changeset/publish-common-report-web.md b/.changeset/publish-common-report-web.md new file mode 100644 index 0000000..509b9e9 --- /dev/null +++ b/.changeset/publish-common-report-web.md @@ -0,0 +1,13 @@ +--- +'@finalrun/common': minor +'@finalrun/report-web': minor +--- + +Publish `@finalrun/common` and `@finalrun/report-web` to npm for the first +time. Previously common shipped only as a `bundleDependency` of +`@finalrun/finalrun-agent`, and report-web was unpublished (consumed only +via yalc / local tarballs). + +`@finalrun/report-web` is also fully migrated from a Next.js App Router +shell to a Vite SPA; the library exports (`/ui`, `/ui/styles.css`, +`/routes`) are unchanged. diff --git a/.github/release-notes-template.md b/.github/release-notes-template.md new file mode 100644 index 0000000..09bc0f6 --- /dev/null +++ b/.github/release-notes-template.md @@ -0,0 +1,51 @@ +## Install + +### macOS / Linux + +One command. No Node.js, no npm, nothing else required. + +```sh +curl -fsSL https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.sh | bash +``` + +For CI / non-interactive environments (binary only, no runtime tarball, no prompts): + +```sh +curl -fsSL https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.sh | bash -s -- --ci +``` + +CI environments (`CI=1`) get this behavior automatically even without the flag. + +### Windows (PowerShell) + +```powershell +irm https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.ps1 | iex +``` + +For CI / non-interactive environments: + +```powershell +$env:FINALRUN_NON_INTERACTIVE=1; irm https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.ps1 | iex +``` + +Windows hosts support **Android local execution and all cloud commands.** iOS local execution requires macOS (xcodebuild) — use `finalrun cloud` for iOS testing from Windows. First run may show a SmartScreen warning since the binary is not yet code-signed; click "More info → Run anyway". + +## Artifacts + +| Platform | Binary | Runtime tarball | +|---|---|---| +| macOS Apple Silicon | `finalrun-darwin-arm64` | `finalrun-runtime--darwin-arm64.tar.gz` | +| macOS Intel | `finalrun-darwin-x64` | `finalrun-runtime--darwin-x64.tar.gz` | +| Linux x86_64 | `finalrun-linux-x64` | `finalrun-runtime--linux-x64.tar.gz` | +| Linux ARM64 | `finalrun-linux-arm64` | `finalrun-runtime--linux-arm64.tar.gz` | +| Windows x86_64 | `finalrun-windows-x64.exe` | `finalrun-runtime--windows-x64.tar.gz` | + +Each artifact ships with a matching `.sha256` sidecar. + +## Upgrading from a previous version + +```sh +finalrun upgrade +``` + +The CLI re-runs the install script with sensible defaults (auto-detects whether you previously installed the runtime tarball, preserves your install directory). diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..328be24 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,257 @@ +name: Release + +# Manual-only trigger. Click "Run workflow" in the Actions tab. +# +# Flow when triggered from main (or any branch): +# 1. Reads version from packages/cli/package.json on the chosen branch. +# 2. Fails fast if the matching tag (v) already exists. +# 3. Builds all 5 Bun binaries (darwin-arm64/x64, linux-x64/arm64, windows-x64). +# 4. Builds all 5 runtime tarballs (per-platform). +# 5. Smoke-tests the Windows binary on a real windows-latest runner. +# 6. Tags the exact build SHA as v and pushes the tag. +# 7. Creates a GitHub Release at that tag, attaches all artifacts + +# their .sha256 sidecars, marks it as "latest". +# +# To cut a release: +# 1. Bump packages/cli/package.json version on a branch (default main). +# 2. Commit + push to the branch. +# 3. Actions tab → Release → Run workflow → choose branch → Run. +on: + workflow_dispatch: + inputs: + branch: + description: 'Branch to release from' + required: false + default: 'main' + +permissions: + contents: write # create tag + GitHub Release + +# Serialize releases so two clicks of "Run workflow" can't race on tag +# creation, both build the full artifact set, and then duel on `git push`. +# cancel-in-progress: false → queue rather than abort the in-flight run. +concurrency: + group: release-${{ github.event.inputs.branch }} + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-24.04 + outputs: + version: ${{ steps.version.outputs.version }} + tag: ${{ steps.version.outputs.tag }} + sha: ${{ steps.version.outputs.sha }} + is_latest: ${{ steps.version.outputs.is_latest }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.inputs.branch }} + fetch-depth: 0 # need history + tags so we can validate uniqueness + + - name: Resolve & validate version + id: version + run: | + set -euo pipefail + VERSION=$(node -p "require('./packages/cli/package.json').version") + TAG="v${VERSION}" + SHA=$(git rev-parse HEAD) + BRANCH="${{ github.event.inputs.branch }}" + + # Strict semver: 0.1.7, 1.2.3, 0.2.0-rc.1, 0.2.0-beta.1, etc. + # Rejects empty, non-numeric, or trailing-junk versions. + if ! [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[0-9A-Za-z.-]+)?(\+[0-9A-Za-z.-]+)?$ ]]; then + echo "::error::Invalid semver in packages/cli/package.json: '$VERSION'" + exit 1 + fi + + # Refuse if the tag already exists locally OR on origin (the local + # check alone misses tags pushed by other contributors / CI runs). + if git rev-parse --verify --quiet "refs/tags/$TAG" >/dev/null; then + echo "::error::Tag $TAG already exists locally. Bump packages/cli/package.json or delete the existing tag." + exit 1 + fi + if git ls-remote --tags --exit-code origin "refs/tags/$TAG" >/dev/null 2>&1; then + echo "::error::Tag $TAG already exists on origin. Bump packages/cli/package.json." + exit 1 + fi + + # Pre-release tags (e.g. 0.2.0-rc.1) should NOT clobber the + # "latest" pointer; only stable releases from main do that. + IS_LATEST="false" + if [[ "$BRANCH" == "main" && ! "$VERSION" =~ - ]]; then + IS_LATEST="true" + fi + + echo "version=$VERSION" >> "$GITHUB_OUTPUT" + echo "tag=$TAG" >> "$GITHUB_OUTPUT" + echo "sha=$SHA" >> "$GITHUB_OUTPUT" + echo "is_latest=$IS_LATEST" >> "$GITHUB_OUTPUT" + echo "Releasing $TAG from $BRANCH commit $SHA (is-latest: $IS_LATEST)" + + - uses: actions/setup-node@v4 + with: + node-version: '20.19' + # No cache: 'npm' — this repo intentionally does not commit + # package-lock.json (.gitignore line 4), so setup-node's npm + # cache feature has nothing to key on. + # Same reason `npm install` instead of `npm ci`: ci requires a lockfile. + - run: npm install + + - uses: oven-sh/setup-bun@v2 + with: + bun-version: '1.3.11' + + - name: Build all workspace dist files + run: | + npm run build --workspace=@finalrun/common + npm run build --workspace=@finalrun/cloud-core + npm run build --workspace=@finalrun/device-node + npm run build --workspace=@finalrun/goal-executor + npm run build --workspace=@finalrun/report-web + npm run build --workspace=@finalrun/finalrun-agent + + - name: Build binaries (all 5 targets via Bun cross-compile) + run: ./scripts/build-binary.sh + + - name: Build runtime tarballs (all 5 targets) + run: | + for t in darwin-arm64 darwin-x64 linux-x64 linux-arm64 windows-x64; do + npm run build:tarball --workspace=@finalrun/local-runtime -- --target="$t" + done + + - name: Stage release artifacts + run: | + mkdir -p release-artifacts + cp dist/binaries/finalrun-* release-artifacts/ + cp packages/local-runtime/dist/finalrun-runtime-*.tar.gz* release-artifacts/ + ls -la release-artifacts/ + + - uses: actions/upload-artifact@v4 + with: + name: release-artifacts + path: release-artifacts/ + retention-days: 7 + + # Validate the Windows .exe actually executes on a real Windows host before + # we tag and publish. Bun cross-compiles the binary from ubuntu-24.04, so + # without this gate a Windows-specific runtime breakage (missing DLL, bad + # PE header, embedded-runtime mismatch) would only surface after release. + # Cheap insurance: ~30s of windows-latest runtime per release. + smoke-windows: + needs: build + runs-on: windows-latest + steps: + - uses: actions/download-artifact@v4 + with: + name: release-artifacts + path: release-artifacts/ + + - name: Smoke-test the Windows binary + shell: pwsh + run: | + $exe = "release-artifacts/finalrun-windows-x64.exe" + if (-not (Test-Path $exe)) { + Write-Error "Expected $exe in downloaded artifacts but did not find it." + exit 1 + } + Write-Host "Running: $exe --version" + & $exe --version + if ($LASTEXITCODE -ne 0) { + Write-Error "finalrun.exe --version exited $LASTEXITCODE" + exit 1 + } + Write-Host "Running: $exe --help" + & $exe --help + if ($LASTEXITCODE -ne 0) { + Write-Error "finalrun.exe --help exited $LASTEXITCODE" + exit 1 + } + + release: + needs: [build, smoke-windows] + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ needs.build.outputs.sha }} + fetch-depth: 0 + + - uses: actions/download-artifact@v4 + with: + name: release-artifacts + path: release-artifacts/ + + - name: Tag the build commit and push the tag + env: + TAG: ${{ needs.build.outputs.tag }} + SHA: ${{ needs.build.outputs.sha }} + run: | + set -euo pipefail + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git tag -a "$TAG" "$SHA" -m "Release $TAG" + git push origin "$TAG" + + - name: Build release notes from template + CHANGELOG section + env: + VERSION: ${{ needs.build.outputs.version }} + run: | + set -euo pipefail + + # Extract the matching `## [VERSION]` section from CHANGELOG.md. + # Maintainers must update CHANGELOG.md (manually, or via Claude) + # in the release PR — the workflow refuses to publish without it, + # so a release can't accidentally go out without notes. + CHANGELOG_SECTION=$(awk -v marker="## [${VERSION}]" ' + index($0, marker) == 1 { capture = 1; print; next } + capture && /^## \[/ { exit } + capture { print } + ' CHANGELOG.md) + + if [ -z "$CHANGELOG_SECTION" ]; then + echo "::error::CHANGELOG.md has no '## [${VERSION}]' section." + echo "::error::Add an entry under that heading describing what changed in this release, then re-run the workflow." + exit 1 + fi + + # Combine the static install instructions (from the template) + # with the version-specific changelog section. The template is + # the same across releases; the changelog section changes. + { + cat .github/release-notes-template.md + echo "" + echo "---" + echo "" + echo "## What's changed in this release" + echo "" + # Skip the first line (`## [VERSION] - DATE`) since we just + # printed our own heading; keep the rest verbatim. + echo "$CHANGELOG_SECTION" | tail -n +2 + } > /tmp/release-notes.md + + echo "::group::Final release notes" + cat /tmp/release-notes.md + echo "::endgroup::" + + - name: Create GitHub Release with all artifacts + env: + GH_TOKEN: ${{ github.token }} + TAG: ${{ needs.build.outputs.tag }} + VERSION: ${{ needs.build.outputs.version }} + IS_LATEST: ${{ needs.build.outputs.is_latest }} + run: | + set -euo pipefail + # Pre-releases (e.g. 0.2.0-rc.1) and releases from non-main + # branches don't update the "latest" pointer that install.sh and + # the README badges resolve against. + extra_flags=() + if [ "$IS_LATEST" = "true" ]; then + extra_flags+=(--latest) + else + extra_flags+=(--prerelease) + fi + gh release create "$TAG" \ + --title "FinalRun $VERSION" \ + --notes-file /tmp/release-notes.md \ + "${extra_flags[@]}" \ + release-artifacts/* diff --git a/.gitignore b/.gitignore index d4defd2..c3b57ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Dependencies node_modules/ packages/*/node_modules/ +package-lock.json # TypeScript build output packages/*/dist/ @@ -23,6 +24,7 @@ packages/*/coverage/ # npm pack artifacts /finalrun-agent-*.tgz +packages/*/*.tgz # Local environment files .env @@ -59,3 +61,4 @@ pnpm-debug.log* /.codex /.gemini +dist/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 312fda1..3617f65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,126 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +## [0.1.10] - 2026-04-26 + +### Added — Windows x86_64 support + +FinalRun now ships a `finalrun-windows-x64.exe` binary and a matching `finalrun-runtime-0.1.10-windows-x64.tar.gz` runtime bundle on every release. + +End users on Windows install via PowerShell: + +```powershell +irm https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.ps1 | iex +``` + +The Windows installer (`scripts/install.ps1`) mirrors the bash installer's structure: downloads the binary, extracts the runtime tarball, wires up the per-user PATH, and walks the user through Android tooling setup (Android Studio detection + `scrcpy` install via `winget` with a `choco` fallback). The iOS prompt is intentionally absent — iOS local execution requires `xcodebuild` (macOS-only). Cloud commands work the same on Windows for both Android and iOS. + +`finalrun upgrade` now branches on platform: Windows hosts re-run `install.ps1` via `powershell.exe -Command "irm | iex"`; macOS/Linux hosts continue to re-run `install.sh` via `bash -c "curl | bash"`. Both honor the `--ci` flag. + +Built via Bun's `bun-windows-x64` cross-compile target on the existing Linux-based release runner; no new CI infrastructure or code signing pipeline yet (Windows users will see a SmartScreen warning on first run — click "More info → Run anyway"). A new `smoke-windows` job in the release workflow runs the cross-compiled `.exe` on a real `windows-latest` runner before tagging, blocking the release if the binary fails to execute. + +### Notes + +- Windows ARM64 is not supported — Bun does not currently provide a `bun-windows-arm64` cross-compile target. +- `scripts/install.sh` continues to reject Windows hosts (Cygwin / MinGW / MSYS / Git Bash). Windows installs go through `install.ps1`, not the bash installer. +- The Windows runtime tarball is byte-equivalent to the Linux x64 tarball (Android-only payload); the existing `isDarwin` gate in `buildRuntimeTarball.mjs` correctly excludes iOS bundles for non-darwin targets. + +## [0.1.9] - 2026-04-26 + +### Changed + +- `finalrun upgrade` flag space mirrors the v0.1.8 installer: `--cloud-only` and `--full-setup` are removed in favor of `--ci`. When neither is passed, the upgrade mode is inferred from whether the local runtime tarball is currently installed (binary-only if not, full setup if yes). + +### Fixed + +- Cloud submissions (`cloud test`, `cloud upload`) drop the env file when the active environment comes from `.finalrun/config.yaml`'s `env:` default. The zip now ships `.finalrun/env/.yaml` whether the env is set via `--env` or resolved from config defaults. Regression introduced in v0.1.8 — the server-side check would 500 with `Environment "" was requested, but .finalrun/env does not exist`. +- `LocalRuntimeMissingError` recovery hint no longer references the removed `install.sh --full-setup` flag — it now points at plain `curl … | bash` (full setup is the default). +- `install.sh` platform-prompt-exhausted warning no longer suggests "re-run without --ci" (that path is unreachable from `--ci` mode). It now suggests re-running the installer or running `finalrun doctor` to diagnose host tooling. + +## [0.1.8] - 2026-04-25 + +### Changed — distribution model + +The CLI is no longer published to npm. It now ships as a self-contained Bun-compiled binary plus a per-platform runtime tarball, both uploaded to GitHub Releases. End users install via: + +```sh +# Full local-dev setup (binary + runtime tarball + platform tools + skills) +curl -fsSL https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.sh | bash + +# CI / non-interactive (binary only) +curl -fsSL https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.sh | bash -s -- --ci +``` + +No Node.js required. CI environments (auto-detected via `CI=1`) skip the interactive setup automatically even without the `--ci` flag. + +### Breaking — installer flags + +The first cut of v0.1.8 shipped with `--cloud-only` and `--full-setup` flags plus TTY-detection that interacted badly with `curl | bash` (the script could hang mid-install when `exec -.tar.gz` (driver bundles, gRPC proto, Vite SPA dist) that ships alongside the binary on GitHub Releases +- `finalrun upgrade` subcommand: re-runs the install script with sensible defaults (auto-detects whether the runtime tarball was previously installed; preserves `FINALRUN_DIR`) +- `--cloud-only` / `--full-setup` flags on `install.sh` for explicit override of the TTY auto-detection +- 30-minute fetch timeout on cloud submissions and app uploads (overridable via `FINALRUN_SUBMIT_TIMEOUT_MS` and `FINALRUN_UPLOAD_TIMEOUT_MS`); stalled connections surface as a clear "connection stalled" message instead of hung spinners +- `FINALRUN_REPORT_APP_DIR` and `FINALRUN_RUNTIME_ROOT` env vars for pointing the CLI at custom asset locations +- GitHub Actions `Release` workflow (manual `workflow_dispatch` trigger) that builds all 4 binaries + 4 runtime tarballs, tags the build commit, and creates the GitHub Release with all 16 artifacts +- `RELEASING.md` runbook documenting the manual release flow, pre-release tags, re-running failed jobs, local dry-run, and rollback + +### Changed + +- `FINALRUN_CLOUD_URL` default switched from `cloud-dev.finalrun.app` to `cloud.finalrun.app`. Override the env var to re-target dev infra. +- Cloud `cloud test` and `cloud upload` now stream the app file to the multipart body via `fs.openAsBlob` instead of `fs.readFileSync` — large APKs/IPAs no longer materialize into a single Buffer in memory +- Cloud `cloud test` ships only the env file matching `--env`, not every YAML under `.finalrun/env/` (was leaking other environments' bindings) +- `install.sh` rewritten: downloads the binary first, TTY-detects, runs interactive setup (platform prompt, brew installs, doctor verification, skills install) only when on a real terminal. Prompts have 30-second read timeouts that fall through to the conservative path. Brew install failures now correctly fail the setup step (previously short-circuited via `&& ok`). +- The CLI's `bin/finalrun.ts` now lazy-loads the heavy modules (`testRunner`, `doctorRunner`, `reportServer`, `reportServerManager`) so cloud commands don't pull them at startup. Local commands fail fast with `LocalRuntimeMissingError` and a recovery URL when the runtime tarball isn't installed. +- Test runner is now a portable Node script (`packages/cli/scripts/runTests.mjs`) walking `dist/` instead of `node --test "dist/**/*.test.js"` (which needs Node 21+ for native glob; we declare `>= 20.19`) + +### Removed + +- `npm install -g @finalrun/finalrun-agent` — no longer published. `packages/cli` is `private: true`. Existing npm-installed copies keep working until users `finalrun upgrade` or re-run the install URL. +- `packages/cli/scripts/installAssets.mjs`, `preparePackage.mjs`, `cleanupPackage.mjs` — npm-publication scripts no longer needed +- `packages/cli/package.json` no longer has `postinstall`, `prepack`, `postpack`, `bundleDependencies`, or `publishConfig` +- Client-side APK/IPA inspection in cloud submissions — server validates platform / packageName / simulator-compatibility authoritatively + +### Fixed + +- Bun-compiled binary's `__dirname` is the build-machine source path; resolving `package.json` via filesystem walk-up failed on every machine other than the one that built the binary. The CLI version is now read via `require('../package.json')` at module load (compiled to a CJS require by tsc and inlined into the bundle by Bun). +- Runtime tarball location now honors `$FINALRUN_DIR` (default `~/.finalrun`), matching the install script's convention. Previously the binary's resolver only checked `$HOME/.finalrun` regardless of where the install script extracted the tarball. +- Cloud submit/upload spinners no longer remain spinning after an unparseable JSON body or a server-side rejection — both paths now `spinner.fail` before rethrowing +- `install.sh` rejects `--cloud-only` and `--full-setup` together; refuses Windows hosts up front (Cygwin / MinGW / MSYS) instead of 404-ing on a non-existent `finalrun-windows-x64.exe`; validates the GitHub `/releases/latest` redirect target shape before parsing the tag +- Release workflow gained a `concurrency:` block (two simultaneous "Run workflow" clicks now queue rather than race on tag creation), strict semver regex on the version, origin tag-existence check (not just local), and `--latest` only when releasing from `main` with a stable (non-pre-release) version + +## [0.1.7] - 2026-04-20 + +### Added + +- Per-feature model and reasoning effort selection via workspace YAML +- Mintlify community docs site with restructured Get Started (Intro, Installation, Quickstart) and a hero landing page +- `docs.finalrun.app` link surfaced in the README top nav and Documentation section + +### Changed + +- Slimmer planner hierarchy via Dart-aligned planner/grounder split for lower token cost +- Hardened per-feature model resolution and run-context capture +- Documented supported config shapes and per-provider reasoning levels +- Removed deprecated `toPromptElements` and unused hierarchy helpers + +### Fixed + +- Route OpenAI through the Responses API so `reasoningEffort` actually takes effect +- Pin Anthropic to `outputFormat` structured-output mode and enforce it via zod schema +- Drop the outer `output` wrapper and `.int()` from Anthropic schemas to satisfy the tool-schema validator +- Resolve per-feature provider/model in the post-merge summary logs + ## [0.1.6] - 2026-04-15 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 360b753..30d5fc6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,12 +10,14 @@ cd finalrun-agent npm ci ``` -Published npm package: +End-user install (binary, not npm): ```sh -npm install -g @finalrun/finalrun-agent +curl -fsSL https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.sh | bash ``` +The CLI is no longer published to npm — it ships as a Bun-compiled binary plus a per-platform runtime tarball uploaded to GitHub Releases. See [RELEASING.md](./RELEASING.md) for how a release is cut. + Build the workspace packages: ```sh @@ -40,7 +42,9 @@ npm run format:check - `packages/common`: shared models, types, and utilities - `packages/device-node`: device detection, gRPC communication, and platform-specific device runtime logic - `packages/goal-executor`: AI planning and action execution -- `packages/cli`: published CLI package +- `packages/cli`: CLI source — Bun-compiled into the distributed binary +- `packages/cloud-core`: pure cloud-submission logic shared by the CLI binary +- `packages/local-runtime`: builder for the per-platform runtime tarball (driver bundles, gRPC proto, Vite SPA dist) that ships alongside the binary on GitHub Releases - `packages/report-web`: local report UI - `drivers/android`: Android driver sources - `drivers/ios`: iOS simulator driver sources diff --git a/README.md b/README.md index 510c074..509befb 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,8 @@

- - npm version + + GitHub release License: Apache-2.0 @@ -16,6 +16,8 @@

finalrun.app  •  + Docs +  •  Blog  •  Cloud Device Waitlist @@ -49,11 +51,19 @@ ## Install +**macOS / Linux** + ```sh curl -fsSL https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.sh | bash ``` -Sets up Node.js, the CLI, AI coding agent skills, and platform tools. Run `finalrun doctor` to verify host readiness. +**Windows (PowerShell)** + +```powershell +irm https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.ps1 | iex +``` + +iOS local testing requires macOS (`xcodebuild`). On Windows, use `finalrun cloud` for iOS — Android works locally. CI flags, env overrides, and the artifact list live in the [latest release notes](https://github.com/final-run/finalrun-agent/releases/latest). ## Write and Run Your First Test Using AI Agents @@ -133,6 +143,8 @@ finalrun suite auth_smoke.yaml --platform android --model google/gemini-3-flash- ## Documentation +Full docs: **[docs.finalrun.app](https://docs.finalrun.app/)** + - [Autotrigger FinalRun tests (AI agents)](docs/autotrigger-finalrun.md) — when coding agents should generate, validate, and run tests after UI work - [YAML Tests](docs/yaml-tests.md) — test format, fields, suites, and environment placeholders - [CLI Reference](docs/cli-reference.md) — all commands, flags, and report tools diff --git a/RELEASING.md b/RELEASING.md new file mode 100644 index 0000000..20ebd13 --- /dev/null +++ b/RELEASING.md @@ -0,0 +1,198 @@ +# Releasing finalrun-agent + +This is the runbook for cutting a new release. The normal path uses GitHub Actions and takes one click. There's also a manual fallback for when CI is unavailable or you want to release from your laptop. + +## What a release contains + +Every release ships **10 download files** plus a checksum file for each (20 files total) on the GitHub Releases page: + +- A small `finalrun` program for each of: macOS Apple Silicon, macOS Intel, Linux x86_64, Linux ARM64, Windows x86_64. +- A "runtime bundle" (`.tar.gz`) for each of those five platforms — this contains the extra files local-test execution needs (driver app builds, gRPC schema, the report-server web UI). The Windows runtime is Android-only; iOS local execution requires macOS. + +Users install the `finalrun` program by running: + +```sh +curl -fsSL https://raw.githubusercontent.com/final-run/finalrun-agent/main/scripts/install.sh | bash +``` + +The installer downloads the right binary for their machine, and (in interactive mode) the matching runtime bundle. + +There is **no npm publication.** The CLI is a binary, not an npm package. + +--- + +## How to cut a release (the normal way) + +You do three things. Steps 1 and 2 are a small PR. Step 3 is one click. + +### 1. Open a release PR + +Make a branch and bump the version: + +```sh +git checkout -b release/vX.Y.Z +npm version X.Y.Z -w @finalrun/finalrun-agent --no-git-tag-version +``` + +Then edit [`CHANGELOG.md`](./CHANGELOG.md) and add a section for your new version under `## [Unreleased]`. Use this format: + +```markdown +## [X.Y.Z] - YYYY-MM-DD + +### Added +- (new things) + +### Changed +- (behavior changes) + +### Fixed +- (bug fixes) +``` + +This is the **only** place you write release notes. The release process pulls this section from `CHANGELOG.md` and puts it on the GitHub Releases page automatically — you never edit the GitHub Releases page directly. + +Commit and push: + +```sh +git add packages/cli/package.json package.json CHANGELOG.md +git commit -m "Release vX.Y.Z" +git push -u origin release/vX.Y.Z +``` + +Open a PR from this branch to `main`, get review, merge. + +### 2. Trigger the release + +After the PR is merged, in your browser: + +1. Go to the repo's **Actions** tab on GitHub +2. Click **Release** in the left sidebar +3. Click **Run workflow** on the right, pick `main`, click the green **Run workflow** button + +Or from your terminal: + +```sh +gh workflow run release.yml -f branch=main +gh run watch # follow progress live +``` + +### 3. Verify it shipped + +The workflow takes about 4 minutes. When it's done: + +```sh +gh release view vX.Y.Z # see the release page contents +``` + +Or open `https://github.com/final-run/finalrun-agent/releases/tag/vX.Y.Z` in a browser. + +You should see: + +- 20 downloadable files (10 binaries/tarballs + 10 checksum files) +- A release body that includes install instructions and your CHANGELOG section + +That's it — `finalrun upgrade` on user machines, and fresh `curl ... | bash` runs, will now pull your new version. + +--- + +## What the workflow checks before publishing + +The workflow refuses to release if any of these fail. This is your safety net. + +- The version in `packages/cli/package.json` must look like a valid version (e.g. `1.2.3` or `0.2.0-rc.1`). +- A tag named `vX.Y.Z` must not already exist on origin (so you can't accidentally overwrite a previous release). +- `CHANGELOG.md` must have a `## [X.Y.Z]` section. **No release notes, no release.** + +If any of these fail, the workflow exits early with a message telling you exactly what to fix. Nothing ships. + +--- + +## Manual fallback (no CI needed) + +Use this when GitHub Actions is down, you don't have access to it, or you want to release straight from your laptop. The result is identical — same files, same release page. + +You'll need: + +- `bun` installed: `curl -fsSL https://bun.sh/install | bash` (one time) +- `gh` CLI logged in: `gh auth login` (one time) +- About 5 minutes + +Steps: + +```sh +# 1. Be on the merged release commit on main +git checkout main && git pull + +# 2. Set the version you're releasing +VERSION=X.Y.Z + +# 3. Build all 10 release files +./scripts/build-binary.sh +for t in darwin-arm64 darwin-x64 linux-x64 linux-arm64 windows-x64; do + npm run build:tarball --workspace=@finalrun/local-runtime -- --target=$t +done + +# 4. Tag the commit and push the tag +git tag -a "v$VERSION" -m "Release v$VERSION" +git push origin "v$VERSION" + +# 5. Build the release notes (combines the static install instructions with +# your CHANGELOG section — same logic the workflow uses) +awk -v marker="## [${VERSION}]" ' + index($0, marker) == 1 { c=1; print; next } + c && /^## \[/ { exit } + c { print } +' CHANGELOG.md > /tmp/version-notes.md + +{ + cat .github/release-notes-template.md + echo "" + echo "---" + echo "" + echo "## What's changed in this release" + echo "" + tail -n +2 /tmp/version-notes.md +} > /tmp/release-body.md + +# 6. Create the release with all artifacts attached +gh release create "v$VERSION" \ + --title "FinalRun $VERSION" \ + --notes-file /tmp/release-body.md \ + --latest \ + dist/binaries/finalrun-* \ + packages/local-runtime/dist/finalrun-runtime-*.tar.gz* +``` + +For a pre-release (e.g. `0.2.0-rc.1`), swap `--latest` for `--prerelease` so it doesn't displace the current "latest" pointer. + +--- + +## If the workflow fails partway through + +The workflow is designed so the tag isn't created until the build has succeeded. So if it fails before that point, just **fix the issue and re-run** — there's no leftover state to clean up. + +If it fails AFTER the tag is created (rare — only happens if the GitHub Releases upload itself flakes), do this cleanup before retrying: + +```sh +git push origin :refs/tags/vX.Y.Z # delete the tag from GitHub +git tag -d vX.Y.Z # delete it locally too +gh release delete vX.Y.Z --yes # delete the partial release if any +``` + +Then re-trigger the workflow. + +--- + +## Rolling back a release that shipped broken + +If a release goes out and turns out to be broken: + +```sh +gh release delete vX.Y.Z --yes # this rolls back the "latest" pointer +git push origin :refs/tags/vX.Y.Z # delete the tag +git tag -d vX.Y.Z # locally too +``` + +Now fix the issue on a new PR, then cut a fresh release (either re-using `vX.Y.Z` or moving to `vX.Y.Z+1` — your call). + +Note: anyone who already installed the broken version still has it on their disk. They'll get the new version when they run `finalrun upgrade` or re-run the curl install command. The public install URL goes through "latest", so deleting the broken release immediately stops new users from getting it. diff --git a/docs/cli-reference.md b/docs/cli-reference.md index b42355e..6041397 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -26,12 +26,14 @@ Flags for `test` and `suite`: | `--model ` | AI model (e.g. `google/gemini-3-flash-preview`). Falls back to `.finalrun/config.yaml`. | | `--env ` | Environment name (matches `.finalrun/env/.yaml`). Falls back to config. | | `--app ` | Path to `.apk` or `.app` binary. Overrides the app identity in config. See [configuration.md](configuration.md) for details. | -| `--api-key ` | Override the provider API key. | +| `--api-key ` | Override the provider API key. Only valid when a single provider is in use across all features; use env vars when features target multiple providers. | | `--debug` | Enable debug logging. | | `--max-iterations ` | Limit AI action iterations per step. | CLI flags always take precedence over `.finalrun/config.yaml`. +For workspace-level `model`, `reasoning`, and per-feature `features:` overrides (including mixed-provider setups), see [configuration.md](configuration.md#supported-configurations). + ### Examples ```sh diff --git a/docs/codebase-walkthrough.md b/docs/codebase-walkthrough.md index b849990..009772d 100644 --- a/docs/codebase-walkthrough.md +++ b/docs/codebase-walkthrough.md @@ -500,6 +500,8 @@ Standard Grounder (hierarchy-based) **Why Vercel AI SDK?** It provides a unified interface across providers, so the goal executor doesn't need provider-specific code for each LLM. +Model and reasoning effort are configurable per feature (planner, grounder, and the specialized grounders) via the `features:` block in `.finalrun/config.yaml`. See [configuration.md](configuration.md) for the YAML shape. + --- ## 8. The Device Layer (Physical Actions) diff --git a/docs/configuration.md b/docs/configuration.md index 0128f28..13a5a60 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -31,9 +31,14 @@ The workspace config defines defaults used by the CLI when flags are omitted. | `app.bundleId` | iOS bundle identifier (e.g. `com.example.myapp`) | | `env` | Default environment name (used when `--env` is omitted) | | `model` | Default AI model in `provider/model` format (used when `--model` is omitted) | +| `reasoning` | Default reasoning effort for all features: `minimal`, `low`, `medium`, or `high`. `minimal` is OpenAI-only. | +| `features..model` | Per-feature model override in `provider/model` format. | +| `features..reasoning` | Per-feature reasoning effort override. | At least one of `app.packageName` or `app.bundleId` is required. +Valid feature names: `planner`, `grounder`, `visual-grounder`, `scroll-index-grounder`, `input-focus-grounder`, `launch-app-grounder`, `set-location-grounder`. + ### Example ```yaml @@ -43,8 +48,99 @@ app: bundleId: com.example.myapp env: dev model: google/gemini-3-flash-preview +reasoning: medium + +# Optional — unlisted features inherit the default model and reasoning. +features: + planner: + model: anthropic/claude-opus-4-7 + reasoning: high + scroll-index-grounder: + reasoning: low +``` + +### Supported Providers + +Use any of these prefixes in the `provider/model` format: + +- `openai/` (e.g. `openai/gpt-5.4-mini`) +- `google/` (e.g. `google/gemini-3-flash-preview`) +- `anthropic/` (e.g. `anthropic/claude-opus-4-7`) + +Model names are passed straight to the provider — consult the provider's docs for which models accept reasoning effort. + +### Reasoning Levels by Provider + +| Provider | Accepted `reasoning` values | +|---|---| +| `openai` | `minimal`, `low`, `medium`, `high` | +| `google` | `low`, `medium`, `high` | +| `anthropic` | `low`, `medium`, `high` | + +Setting `reasoning: minimal` on a Google- or Anthropic-routed feature fails at run time with a message naming the offending feature. + +When neither workspace `reasoning:` nor a per-feature `reasoning:` is set, FinalRun applies built-in fallbacks: + +- `planner` → `medium` +- every grounder (`grounder`, `visual-grounder`, `scroll-index-grounder`, `input-focus-grounder`, `launch-app-grounder`, `set-location-grounder`) → `low` + +### Anthropic Model Compatibility + +`anthropic/...` models must be Claude 4.5 or later (Sonnet 4.5+, Opus 4.5+, Haiku 4.5+, including Sonnet 4.6, Opus 4.6, and Opus 4.7). FinalRun uses Anthropic's native structured-output API (`output_config.format`) for guaranteed JSON, and only Claude 4.5+ supports it. Older Anthropic models will return HTTP 400 from the API. OpenAI and Google paths have no equivalent restriction. + +### Supported Configurations + +Three shapes are supported. Pick the simplest one that fits. + +**1. One model, one reasoning level (simplest).** Every feature uses the same model and effort: + +```yaml +model: openai/gpt-5.4-mini +reasoning: low +``` + +**2. Same provider, per-feature reasoning tuning.** One API key, one provider, but effort tuned per feature: + +```yaml +model: openai/gpt-5.4-mini +reasoning: low + +features: + planner: + reasoning: high # planner only — keeps the workspace model + scroll-index-grounder: + reasoning: minimal # cheap fast grounding + # unlisted features inherit model + reasoning from the top +``` + +**3. Mixed providers across features.** Different providers for different features: + +```yaml +model: google/gemini-3-flash-preview # default for anything unlisted +reasoning: medium + +features: + planner: + model: anthropic/claude-opus-4-7 + reasoning: high + grounder: + model: openai/gpt-5.4-mini + reasoning: minimal ``` +Mixed-provider mode requires **every** referenced provider's env var to be set (`OPENAI_API_KEY`, `GOOGLE_API_KEY`, `ANTHROPIC_API_KEY` — see [environment.md](environment.md)). The `--api-key` CLI flag is rejected in this mode. + +### Per-Feature Overrides + +The `features:` block lets you tune each LLM call independently. Each feature drives a distinct prompt: + +- `planner` — decides the next user action from the current screen. +- `grounder` — picks the UI element for an action. +- `visual-grounder` — visual fallback when text grounding fails. +- `scroll-index-grounder`, `input-focus-grounder`, `launch-app-grounder`, `set-location-grounder` — specialized grounders for their respective actions. + +Both `model` and `reasoning` are optional per feature. Any unset field falls back to the workspace-level default (`model:` / `reasoning:`), and any unlisted feature inherits both defaults. + ## App Identity FinalRun needs to know which app to launch on the device. The app identity is resolved in this order: diff --git a/docs/environment.md b/docs/environment.md index 1d7eabb..24daf8c 100644 --- a/docs/environment.md +++ b/docs/environment.md @@ -63,6 +63,8 @@ FinalRun resolves API keys by provider prefix: Keys are read from `process.env` and from workspace-root `.env` / `.env.`. You can also pass `--api-key` to override. +If `.finalrun/config.yaml` uses different providers across features (via the `features:` block in [configuration.md](configuration.md)), set the env var for each provider you reference. `--api-key` is only accepted when a single provider is in play. + ## Git: Keep Secrets Out of the Repo **Do not commit** `.env` files. Add the following to your app repository's `.gitignore`: diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 9d7b311..a38f810 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -6,6 +6,12 @@ FinalRun looks for `.finalrun/` by walking up from your current directory. Make **`Error: API key not configured`** Set the matching environment variable for your model provider. For `google/...`, set `GOOGLE_API_KEY` in your `.env` or shell. See [environment.md](environment.md#ai-provider-api-keys). +**`Error: --api-key is only valid when a single provider is active`** +Your `.finalrun/config.yaml` targets multiple providers via the `features:` block (see [configuration.md](configuration.md)). Drop `--api-key` and set each provider's env var instead: `OPENAI_API_KEY`, `GOOGLE_API_KEY`, `ANTHROPIC_API_KEY`. + +**`Error: Reasoning level "minimal" is only supported for OpenAI`** +The `minimal` reasoning level only exists on OpenAI. Change the workspace or feature override to `low`, `medium`, or `high` — or route that feature to an OpenAI model. + **`Error: No Android emulator running`** Start an emulator with `emulator -avd ` or launch one from Android Studio. Run `finalrun doctor --platform android` to verify. diff --git a/mintlify-docs/.atlas-analysis.json b/mintlify-docs/.atlas-analysis.json new file mode 100644 index 0000000..f050d12 --- /dev/null +++ b/mintlify-docs/.atlas-analysis.json @@ -0,0 +1,84 @@ +{ + "projectType": "cli-tool", + "projectName": "FinalRun", + "projectDescription": "An AI-driven CLI that tests Android and iOS apps using natural language YAML specs, executing steps on real devices or emulators with Gemini, GPT, or Claude.", + "theme": "luma", + "primaryColor": "#3f4fe8", + "lightColor": "#d0a7f7", + "darkColor": "#6c4cfc", + "navigation": { + "tabs": [ + { + "tab": "Docs", + "groups": [ + { + "group": "Get Started", + "pages": [ + "introduction", + "quickstart", + "installation" + ] + }, + { + "group": "Writing Tests", + "pages": [ + "tests/yaml-format", + "tests/suites", + "tests/placeholders" + ] + }, + { + "group": "Configuration", + "pages": [ + "configuration/workspace", + "configuration/environments", + "configuration/ai-providers" + ] + }, + { + "group": "Running Tests", + "pages": [ + "running/cli-reference", + "running/ai-agent-skills", + "running/reports" + ] + }, + { + "group": "Help", + "pages": [ + "troubleshooting", + "faq" + ] + } + ] + } + ] + }, + "keyFeatures": [ + "Natural language YAML test specs for Android and iOS", + "AI-powered test execution using Gemini, GPT, or Claude", + "Three-phase test model: setup, steps, expected_state", + "AI agent skills for generating, running, and fixing tests", + "Local report viewer with video, screenshots, and device logs", + "BYOK (Bring Your Own Key) — use your own AI provider API key", + "Multi-environment support with secrets and variable bindings", + "One-command install and host readiness check via finalrun doctor" + ], + "publicApiSurface": [ + "finalrun test ", + "finalrun suite ", + "finalrun check [selectors...]", + "finalrun doctor", + "finalrun runs", + "finalrun start-server", + "finalrun stop-server", + "finalrun server-status", + "/finalrun-generate-test skill", + "/finalrun-use-cli skill", + "/finalrun-test-and-fix skill", + ".finalrun/config.yaml", + ".finalrun/tests//.yaml", + ".finalrun/suites/.yaml", + ".finalrun/env/.yaml" + ] +} diff --git a/mintlify-docs/.mintignore b/mintlify-docs/.mintignore new file mode 100644 index 0000000..9922f06 --- /dev/null +++ b/mintlify-docs/.mintignore @@ -0,0 +1,7 @@ +# Mintlify automatically ignores these files and directories: +# .git, .github, .claude, .agents, .idea, node_modules, +# README.md, LICENSE.md, CHANGELOG.md, CONTRIBUTING.md + +# Draft content +drafts/ +*.draft.mdx diff --git a/mintlify-docs/AGENTS.md b/mintlify-docs/AGENTS.md new file mode 100644 index 0000000..cebd973 --- /dev/null +++ b/mintlify-docs/AGENTS.md @@ -0,0 +1,33 @@ +> **First-time setup**: Customize this file for your project. Prompt the user to customize this file for their project. +> For Mintlify product knowledge (components, configuration, writing standards), +> install the Mintlify skill: `npx skills add https://mintlify.com/docs` + +# Documentation project instructions + +## About this project + +- This is a documentation site built on [Mintlify](https://mintlify.com) +- Pages are MDX files with YAML frontmatter +- Configuration lives in `docs.json` +- Run `mint dev` to preview locally +- Run `mint broken-links` to check links + +## Terminology + +{/* Add product-specific terms and preferred usage */} +{/* Example: Use "workspace" not "project", "member" not "user" */} + +## Style preferences + +{/* Add any project-specific style rules below */} + +- Use active voice and second person ("you") +- Keep sentences concise — one idea per sentence +- Use sentence case for headings +- Bold for UI elements: Click **Settings** +- Code formatting for file names, commands, paths, and code references + +## Content boundaries + +{/* Define what should and shouldn't be documented */} +{/* Example: Don't document internal admin features */} diff --git a/mintlify-docs/CONTRIBUTING.md b/mintlify-docs/CONTRIBUTING.md new file mode 100644 index 0000000..8863ee4 --- /dev/null +++ b/mintlify-docs/CONTRIBUTING.md @@ -0,0 +1,34 @@ +> **Customize this file**: Tailor this template to your project by noting specific contribution types you're looking for, adding a Code of Conduct, or adjusting the writing guidelines to match your style. + +# Contribute to the documentation + +Thank you for your interest in contributing to our documentation! This guide will help you get started. + +## How to contribute + +### Option 1: Edit directly on GitHub + +1. Navigate to the page you want to edit +2. Click the "Edit this file" button (the pencil icon) +3. Make your changes and submit a pull request + +### Option 2: Local development + +1. Fork and clone this repository +2. Install the Mintlify CLI: `npm i -g mint` +3. Create a branch for your changes +4. Make changes +5. Navigate to the docs directory and run `mint dev` +6. Preview your changes at `http://localhost:3000` +7. Commit your changes and submit a pull request + +For more details on local development, see our [development guide](development.mdx). + +## Writing guidelines + +- **Use active voice**: "Run the command" not "The command should be run" +- **Address the reader directly**: Use "you" instead of "the user" +- **Keep sentences concise**: Aim for one idea per sentence +- **Lead with the goal**: Start instructions with what the user wants to accomplish +- **Use consistent terminology**: Don't alternate between synonyms for the same concept +- **Include examples**: Show, don't just tell diff --git a/mintlify-docs/LICENSE b/mintlify-docs/LICENSE new file mode 100644 index 0000000..5411374 --- /dev/null +++ b/mintlify-docs/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Mintlify + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/mintlify-docs/README.md b/mintlify-docs/README.md new file mode 100644 index 0000000..4552fbc --- /dev/null +++ b/mintlify-docs/README.md @@ -0,0 +1,55 @@ +# Mintlify Starter Kit + +Use the starter kit to get your docs deployed and ready to customize. + +Click the green **Use this template** button at the top of this repo to copy the Mintlify starter kit. The starter kit contains examples with + +- Guide pages +- Navigation +- Customizations +- API reference pages +- Use of popular components + +**[Follow the full quickstart guide](https://starter.mintlify.com/quickstart)** + +## AI-assisted writing + +Set up your AI coding tool to work with Mintlify: + +```bash +npx skills add https://mintlify.com/docs +``` + +This command installs Mintlify's documentation skill for your configured AI tools like Claude Code, Cursor, Windsurf, and others. The skill includes component reference, writing standards, and workflow guidance. + +See the [AI tools guides](/ai-tools) for tool-specific setup. + +## Development + +Install the [Mintlify CLI](https://www.npmjs.com/package/mint) to preview your documentation changes locally. To install, use the following command: + +``` +npm i -g mint +``` + +Run the following command at the root of your documentation, where your `docs.json` is located: + +``` +mint dev +``` + +View your local preview at `http://localhost:3000`. + +## Publishing changes + +Install our GitHub app from your [dashboard](https://dashboard.mintlify.com/settings/organization/github-app) to propagate changes from your repo to your deployment. Changes are deployed to production automatically after pushing to the default branch. + +## Need help? + +### Troubleshooting + +- If your dev environment isn't running: Run `mint update` to ensure you have the most recent version of the CLI. +- If a page loads as a 404: Make sure you are running in a folder with a valid `docs.json`. + +### Resources +- [Mintlify documentation](https://mintlify.com/docs) diff --git a/mintlify-docs/configuration/ai-providers.mdx b/mintlify-docs/configuration/ai-providers.mdx new file mode 100644 index 0000000..de22a23 --- /dev/null +++ b/mintlify-docs/configuration/ai-providers.mdx @@ -0,0 +1,92 @@ +--- +title: "Connect FinalRun to Google, OpenAI, or Anthropic AI" +sidebarTitle: "AI Providers" +description: "Bring your own API key to FinalRun and configure Google Gemini, OpenAI GPT, or Anthropic Claude as the AI model that drives your Android and iOS test runs." +--- + +FinalRun uses a bring-your-own-key (BYOK) model — it does not proxy AI requests through its own infrastructure. When you run a test, the CLI calls your chosen AI provider directly using the API key you supply. This gives you full visibility into token usage and billing, and lets you use whichever model tier your team has access to. + + + Prefer managed AI without provisioning a provider account? See [Cloud API Key](/configuration/cloud-api-key) for the FinalRun Cloud option. + + +## Supported providers + +| Provider prefix | Environment variable | Recommended model family | +|---|---|---| +| `google/...` | `GOOGLE_API_KEY` | Gemini 3 family and above | +| `openai/...` | `OPENAI_API_KEY` | GPT-5 family and above | +| `anthropic/...` | `ANTHROPIC_API_KEY` | Claude Sonnet 4 / Opus 4 and above | + +The provider is inferred from the prefix of the `--model` value or the `model` field in `.finalrun/config.yaml`. + +## Setting your API key + +You can supply an API key in three ways: + + + + Add the key to a `.env` file at your workspace root. This is the recommended approach for local development. + + ```bash + echo "GOOGLE_API_KEY=your-key-here" > .env + ``` + + The file is read automatically on every run. See [Managing environments and secrets](/configuration/environments) for dotenv load order details. + + + Export the variable in your shell session or CI environment: + + ```bash + export GOOGLE_API_KEY=your-key-here + ``` + + Shell environment variables take the highest priority in FinalRun's load order. + + + Pass the key directly as a CLI flag for a one-off run without modifying any file: + + ```bash + finalrun test smoke.yaml --api-key your-key-here --model google/gemini-3-flash-preview + ``` + + + +## Setting a default model + +Add a `model` field to `.finalrun/config.yaml` so you don't need to pass `--model` on every command: + +```yaml .finalrun/config.yaml +model: google/gemini-3-flash-preview +``` + +The model value must use `provider/model-name` format. Examples: `google/gemini-3-flash-preview`, `anthropic/claude-sonnet-4-6`, `openai/gpt-5`. + + + Set `model` in `.finalrun/config.yaml` once during workspace setup. After that, commands like `finalrun test smoke.yaml --platform android` work without an explicit `--model` flag. + + +## Provider setup examples + + + +```bash Google Gemini +echo "GOOGLE_API_KEY=your-key-here" > .env +finalrun test smoke.yaml --platform android --model google/gemini-3-flash-preview +``` + +```bash OpenAI GPT +echo "OPENAI_API_KEY=your-key-here" > .env +finalrun test smoke.yaml --platform android --model openai/gpt-5 +``` + +```bash Anthropic Claude +echo "ANTHROPIC_API_KEY=your-key-here" > .env +finalrun test smoke.yaml --platform ios --model anthropic/claude-sonnet-4-6 +``` + + + + + Test runs consume AI provider tokens. Standard API billing from your provider applies — FinalRun does not add any markup or usage fees on top of provider costs. + diff --git a/mintlify-docs/configuration/cloud-api-key.mdx b/mintlify-docs/configuration/cloud-api-key.mdx new file mode 100644 index 0000000..10fde34 --- /dev/null +++ b/mintlify-docs/configuration/cloud-api-key.mdx @@ -0,0 +1,67 @@ +--- +title: "Get your FinalRun Cloud API key" +sidebarTitle: "Cloud API Key" +description: "Sign up for FinalRun Cloud, generate an API key, and set FINALRUN_API_KEY for the CLI." +--- + +FinalRun Cloud runs tests on FinalRun's hosted infrastructure with managed AI access, so you don't need to bring your own provider key. New accounts include $5 of credits. + + + +## 1. Sign up + +Create an account at [https://cloud.finalrun.app](https://cloud.finalrun.app). + +## 2. Generate an API key + +In the dashboard, open the **API keys** section and create a new key. Copy the value — it is shown only once. + +## 3. Set `FINALRUN_API_KEY` + +The CLI reads `FINALRUN_API_KEY` from the same sources as provider keys. + + + + Add the key to a `.env` file at your workspace root: + + ```bash + echo "FINALRUN_API_KEY=your-key-here" > .env + ``` + + See [Managing environments and secrets](/configuration/environments) for dotenv load order details. + + + Export the variable in your shell session or CI environment: + + ```bash + export FINALRUN_API_KEY=your-key-here + ``` + + Shell variables take the highest priority in FinalRun's load order. + + + Pass the key directly as a CLI flag: + + ```bash + finalrun cloud test smoke.yaml --api-key your-key-here + ``` + + + +## Run a cloud test + +```bash +finalrun cloud test smoke.yaml --platform android +``` + + + Prefer to use your own AI provider account instead? See [AI Providers](/configuration/ai-providers) for the bring-your-own-key setup. + diff --git a/mintlify-docs/configuration/environments.mdx b/mintlify-docs/configuration/environments.mdx new file mode 100644 index 0000000..19caa99 --- /dev/null +++ b/mintlify-docs/configuration/environments.mdx @@ -0,0 +1,101 @@ +--- +title: "FinalRun environments: secrets, variables, and overrides" +sidebarTitle: "Environments" +description: "Use named environment files and dotenv files to manage variable bindings, secret placeholders, and per-environment app identity overrides in FinalRun." +--- + +Environments are named profiles that group variable bindings, secret placeholders, and optional app identity overrides. You define an environment by creating a YAML file at `.finalrun/env/.yaml`, then activate it with `--env ` or by setting `env: ` in `.finalrun/config.yaml`. Separating environments lets you run the same test specs against development, staging, and production configurations without touching the tests themselves. + +## Environment file structure + +Each environment file lives at `.finalrun/env/.yaml` and can contain three top-level blocks: + + + Per-environment app identity override. Values here take priority over the workspace default in `config.yaml`. Useful when your staging or debug build uses a different package name or bundle ID. + + + + Placeholder bindings for sensitive values. Each value uses `${SHELL_ENV_VAR}` syntax. The CLI resolves each placeholder from the shell environment or from workspace-root dotenv files at runtime. Do not put real secrets in this file. + + + + Plain, non-sensitive values such as locale strings, feature flags, or base URLs. Safe to commit. + + +### Full example + +```yaml .finalrun/env/dev.yaml +app: + packageName: com.example.myapp.debug + bundleId: com.example.myapp.debug + +secrets: + email: ${TEST_USER_EMAIL} + password: ${TEST_USER_PASSWORD} + +variables: + locale: en-US +``` + +In your test specs, you can reference these values as `${secrets.email}` and `${variables.locale}`. + +## Dotenv files for secret values + +Real secret values — API keys, passwords, tokens — belong in dotenv files at the workspace root, not in the YAML binding file. + +| File | Purpose | +|---|---| +| `.env` | Shared defaults loaded for all runs | +| `.env.` | Environment-specific values loaded when `--env ` is active (e.g. `.env.dev` for `--env dev`) | + +FinalRun finds the workspace root by walking up from your shell's current directory, so dotenv paths are always anchored to the folder that contains `.finalrun/`, regardless of where you run the CLI from. + +### Load order + +For an active environment named `N`, the CLI loads values in this order: + + + + Environment-specific dotenv file is read first. Keys set here take precedence over the shared file. + + + Shared dotenv file fills in any keys not already set by `.env.N`. + + + Shell environment variables win if the same key exists in both a dotenv file and the current shell session. + + + +This same load order applies to both `secrets` placeholder resolution and AI provider API key resolution. + +## Using environments with the CLI + +Pass `--env` to activate a named environment for any command: + +```bash +# Run a test against the dev environment on Android +finalrun test smoke.yaml --env dev --platform android + +# Validate workspace configuration for staging +finalrun check --env staging +``` + +When you set `env: dev` in `.finalrun/config.yaml`, `--env` becomes optional and the CLI uses `dev` by default. + +## Keeping secrets out of version control + + + Never commit `.env` files. Add the following to your repository's `.gitignore`: + + ```text .gitignore + .env + .env.* + !.env.example + ``` + + This ignores `.env`, `.env.dev`, `.env.staging`, and any similar files while keeping `.env.example` tracked. + + + + Create a `.env.example` file that lists every required variable with placeholder values. Commit it so that team members know exactly which variables to set in their local `.env` file. + diff --git a/mintlify-docs/configuration/workspace.mdx b/mintlify-docs/configuration/workspace.mdx new file mode 100644 index 0000000..b012304 --- /dev/null +++ b/mintlify-docs/configuration/workspace.mdx @@ -0,0 +1,112 @@ +--- +title: "FinalRun workspace config: app identity and defaults" +sidebarTitle: "Workspace" +description: "Set up .finalrun/config.yaml with app identity, default environment, and AI model fields to configure your FinalRun workspace for Android and iOS testing." +--- + +Every FinalRun project is anchored to a workspace root — the directory that contains the `.finalrun/` folder. The workspace holds your configuration file, test specs, optional suite manifests, and per-environment binding files. Understanding the layout and the fields in `config.yaml` lets the CLI resolve the right app, environment, and AI model without you needing to pass flags on every run. + +## Workspace layout + +```text +my-app/ # workspace root + .env # optional + .env.dev # optional + .finalrun/ + config.yaml # workspace configuration + tests/ # YAML test specs (required) + smoke.yaml + auth/ + login.yaml + suites/ # suite manifests (optional) + auth_smoke.yaml + env/ # environment bindings (optional) + dev.yaml +``` + +The `tests/` directory is the only required subdirectory. `suites/` and `env/` are optional and only needed when you run suites or use named environments. + +## `.finalrun/config.yaml` fields + +The workspace config defines defaults that the CLI uses when flags are omitted. Place this file at `.finalrun/config.yaml` in your workspace root. + + + Human-readable name for the app. Optional — used only for display purposes. + + + + Android package identifier (e.g. `com.example.myapp`). Required if you run Android tests and do not pass `--app`. + + + + iOS bundle identifier (e.g. `com.example.myapp`). Required if you run iOS tests and do not pass `--app`. + + + + Default environment name. Used when you omit the `--env` flag. Must match a file under `.finalrun/env/.yaml` if one exists. + + + + Default AI model in `provider/model` format (e.g. `google/gemini-3-flash-preview`). Used when you omit `--model`. + + + + At least one of `app.packageName` or `app.bundleId` is required unless you always pass `--app` on the command line. + + +### Example config + +```yaml .finalrun/config.yaml +app: + name: MyApp + packageName: com.example.myapp + bundleId: com.example.myapp +env: dev +model: google/gemini-3-flash-preview +``` + +## App identity resolution + +FinalRun resolves which app to launch on the device using the following priority order: + + + + When you pass `--app `, FinalRun uses that binary directly. It extracts the package name (Android) or bundle ID (iOS) from the binary and ignores any `app` block in config files. + + + If an active environment file at `.finalrun/env/.yaml` contains an `app` block, those values override the workspace defaults. + + + If neither of the above applies, FinalRun falls back to the `app` block in `.finalrun/config.yaml`. + + + +### Using the `--app` flag + +Pass a local binary to run a specific build without changing any config file: + +```bash +finalrun test smoke.yaml --platform android --app path/to/your.apk +finalrun test smoke.yaml --platform ios --app path/to/YourApp.app +``` + +The CLI: +- Extracts the package name (Android) or bundle ID (iOS) from the binary +- Infers the platform from the file extension (`.apk` → Android, `.app` → iOS) +- Validates that the binary matches the `--platform` flag if both are provided + + + CLI flags always override values in `config.yaml`. You can use flags for one-off runs without modifying your workspace config. + + +### Per-environment app overrides + +If your app uses different identifiers per environment — for example, a `.staging` suffix — set the override in the corresponding env file instead of changing `config.yaml`: + +```yaml .finalrun/env/staging.yaml +app: + packageName: com.example.myapp.staging + bundleId: com.example.myapp.staging +``` + +Any environment that does not define its own `app` block falls back to the workspace default in `.finalrun/config.yaml`. diff --git a/mintlify-docs/docs.json b/mintlify-docs/docs.json new file mode 100644 index 0000000..de91abc --- /dev/null +++ b/mintlify-docs/docs.json @@ -0,0 +1,70 @@ +{ + "$schema": "https://mintlify.com/docs.json", + "name": "FinalRun", + "theme": "luma", + "colors": { + "primary": "#3f4fe8", + "light": "#d0a7f7", + "dark": "#6c4cfc" + }, + "logo": { + "light": "/logo/finalrun-logo.png", + "dark": "/logo/finalrun-logo-dark-theme.png" + }, + "favicon": "https://media.brand.dev/31164f48-397c-4036-929b-8153e11d15c1.jpg", + "navbar": { + "primary": { + "type": "github", + "href": "https://github.com/final-run/finalrun-agent" + } + }, + "navigation": { + "tabs": [ + { + "tab": "Docs", + "groups": [ + { + "group": "Get Started", + "pages": [ + "index", + "installation", + "quickstart" + ] + }, + { + "group": "Writing Tests", + "pages": [ + "tests/yaml-format", + "tests/suites", + "tests/placeholders" + ] + }, + { + "group": "Configuration", + "pages": [ + "configuration/workspace", + "configuration/environments", + "configuration/ai-providers", + "configuration/cloud-api-key" + ] + }, + { + "group": "Running Tests", + "pages": [ + "running/cli-reference", + "running/ai-agent-skills", + "running/reports" + ] + }, + { + "group": "Help", + "pages": [ + "troubleshooting", + "faq" + ] + } + ] + } + ] + } +} diff --git a/mintlify-docs/faq.mdx b/mintlify-docs/faq.mdx new file mode 100644 index 0000000..fefcb01 --- /dev/null +++ b/mintlify-docs/faq.mdx @@ -0,0 +1,103 @@ +--- +title: "FinalRun FAQ: pricing, AI providers, and CI/CD usage" +sidebarTitle: "FAQ" +description: "Answers to common FinalRun questions: pricing, supported AI providers, platform compatibility, CI/CD usage, three-phase test model, and artifact storage." +--- + +FinalRun is an open-source, CLI-based tool — no account required, no platform lock-in. The questions below cover the most common things people ask about how FinalRun works, what it costs, and how to get the most out of it. + + + + No. FinalRun is open-source and runs entirely from the command line. Install the CLI, set your AI provider API key, and start running tests. There is no account, signup, or FinalRun subscription required. + + + + FinalRun supports three AI providers. You bring your own key (BYOK) — costs are billed directly by your provider at standard API rates. + + | Provider | Supported models | Environment variable | + |---|---|---| + | Google | Gemini 3+ | `GOOGLE_API_KEY` | + | OpenAI | GPT-5+ | `OPENAI_API_KEY` | + | Anthropic | Claude Sonnet 4 / Opus 4+ | `ANTHROPIC_API_KEY` | + + Set the key in your shell or in a `.env` file at your workspace root. You can also override it for a single run with the `--api-key` flag. + + + + Currently FinalRun targets Android emulators (AVDs) and iOS simulators for local runs. Support for cloud devices and physical hardware is on the roadmap. + + If you want early access to cloud device support, [join the waitlist](https://docs.google.com/forms/d/e/1FAIpQLScOTaNWjvxIG8Ywn6THHYJuqBM-b86Y-Fx39YVoBVhHuBDZ2w/viewform?usp=publish-editor). + + + + - **Android** — any macOS, Linux, or Windows machine with Android SDK tools (`adb`, `emulator`, `scrcpy`) installed and a running Android Virtual Device. + - **iOS** — macOS only. Requires Xcode command line tools with `xcrun simctl`. + + Run `finalrun doctor` to check that all required dependencies are present on your machine before running tests. + + + + FinalRun itself is free and open-source. You pay your AI provider — Google, OpenAI, or Anthropic — for the tokens consumed during test execution. The cost depends on the model you choose and how long the test takes to complete. There are no additional charges from FinalRun. + + + + Yes. Install the CLI in your CI environment and set the required environment variables — your AI provider API key, `ANDROID_HOME` (for Android), and any secrets your test specs reference. Then call `finalrun test` or `finalrun suite` as a step in your pipeline. + + ```sh + finalrun test smoke.yaml --platform android --model google/gemini-3-flash-preview + ``` + + Run `finalrun check` before your test step to catch workspace configuration errors early, before you consume any API tokens. + + + + Every FinalRun test has three phases: + + - **`setup`** — optional actions that prepare a clean state before the test starts (for example, clearing app data). + - **`steps`** — the ordered, plain-English instructions the AI executes on your device screen. + - **`expected_state`** — the UI conditions the AI verifies once all steps have completed. + + A test passes only when all three phases succeed. If any phase fails, FinalRun stops the run and records the failure with the current screenshot, video, and device log. + + + + `finalrun check` validates your entire workspace before you run any tests. It checks: + + - Selector definitions + - Suite manifests + - Environment bindings (secrets and variables) + - App overrides + + Running `finalrun check` before a test run catches configuration errors early, so you are not spending API tokens on a run that will fail at startup. + + ```sh + finalrun check --env dev --platform android + ``` + + + + FinalRun ships a set of agent skills that let you generate tests, run them, and fix failures — all from your AI coding agent chat. Install the skills with: + + ```sh + npx skills add final-run/finalrun-agent + ``` + + Once installed, three slash commands are available in your AI coding agent: + + | Command | What it does | + |---|---| + | `/finalrun-generate-test` | Reads your source code, infers app identity, and generates complete YAML test specs | + | `/finalrun-use-cli` | Validates and runs your tests using the CLI | + | `/finalrun-test-and-fix` | Runs the full generate → run → diagnose → fix loop until the test is green | + + + + Artifacts for each run — including video, screenshots, and device logs — are stored at: + + ``` + ~/.finalrun/workspaces//artifacts/ + ``` + + Use `finalrun runs` to list all recorded runs for your current workspace, and `finalrun start-server` to open the visual report UI where you can browse results interactively. + + diff --git a/mintlify-docs/index.mdx b/mintlify-docs/index.mdx new file mode 100644 index 0000000..09d4315 --- /dev/null +++ b/mintlify-docs/index.mdx @@ -0,0 +1,42 @@ +--- +title: "FinalRun: AI-powered Android and iOS test automation" +sidebarTitle: "Introduction" +description: "FinalRun lets you write plain-English test specs in YAML and run them on Android and iOS using AI models like Gemini, GPT, or Claude." +--- + +FinalRun is an AI-powered CLI for testing Android and iOS apps with natural language. +You write test scenarios in YAML, describing actions the way a person would. +FinalRun launches your app on a real device or emulator and uses an AI model to see the screen and perform each action — tapping, swiping, typing, and verifying results. +When the run finishes, you get a pass/fail report complete with video, screenshots, and device logs. + +