coder · johnstcn · Feb 19, 2026
diff --git a/e2e/cast/README.md b/e2e/cast/README.md
@@ -0,0 +1,70 @@
+# Cast-Based E2E Testing
+
+This directory contains a cast-based end-to-end (E2E) test for the AgentAPI project. The framework simulates realistic agent interactions by replaying asciicast v2 recordings.
+
+## TL;DR
+
+```shell
+go test ./e2e/cast
+```
+
+## How it Works
+
+The testing framework (`cast_test.go`) does the following:
+- Starts the AgentAPI server with a fake agent (`cmd/cast_agent.go`).
+- The fake agent replays a `.cast` file (asciicast v2 format), writing terminal output to stdout and validating stdin against the recorded input events.
+- The testing framework sends messages to the fake agent via the AgentAPI and validates the responses.
+
+## Adding or Updating Fixtures
+
+### Step 1: Record with asciinema
+
+The recording captures both agentapi's output (what it sends to the agent) and the agent's responses.
+Use `asciinema` to record agentapi wrapping the agent. Below is an example for Claude:
+
+```shell
+# Set terminal to minimum size Claude supports (80 columns)
+stty cols 80 rows 1000
+
+# Build agentapi (or use an existing known-good version)
+agentapi server -t claude -- asciinema rec --stdin testdata/claude.cast --command 'echo hello | claude'
+
+# Then interact with Claude via AgentAPI (either web UI or API):
+# 1. Wait for Claude to respond to the initial prompt
+# 2. Type a test message (e.g., "This is just a test.")
+# 3. Wait for Claude's reply
+# 4. Press Ctrl+C to exit
+```
+
+**Important notes:**
+- The terminal dimensions (80x1000) must match the `--term-width` and `--term-height` flags in the test (see `defaultCmdFn` in `cast_test.go`).
+- Recording agentapi (not Claude directly) captures the exact byte sequences that agentapi sends, including bracketed paste mode escape sequences.
+- The `--stdin` flag captures your input, which the test uses to validate that agentapi sends the correct bytes.
+- To overwrite an existing fixture, add `--overwrite` to the `asciinema` invocation.
+
+### Step 2: Create the sidecar script file
+
+Create a matching sidecar file `testdata/my-fixture.txt` that lists the expected conversation in order, one entry per line:
+
+```
+user	hello
+agent	Hello! How can I help you today?
+user	This is just a test.
+agent	Got it! Let me know if you need anything.
+```
+
+Each line is `<role><TAB><message>`. Valid roles are `agent` or `user`.
+
+**Flow explanation:**
+- Line 1: The initial prompt passed to the agent at startup
+- Line 2: The agent's reply to the initial prompt
+- Line 3: The test message sent via the AgentAPI
+- Line 4: The agent's reply to the test message
+
+Note: The AgentAPI merges the startup sequence (welcome screen + initial prompt + initial reply) into the first agent message. The test expects 3 messages from the API, even though the script file has 4 entries.
+
+### Step 3: Review and update the test
+
+> **Caution:** Review the recording before committing. Remove or redact any API keys, tokens, or other sensitive data that may appear in the terminal output (stdout events), recorded keystrokes (`"i"` events from `--stdin`), or environment variables captured in the cast header.
+
+To use a new fixture, update the `castFile` and `scriptFile` constants in `cast_test.go` to reference the new fixture files, then update the assertions in `TestE2E` to match the new conversation.
diff --git a/e2e/cast/cast_test.go b/e2e/cast/cast_test.go
@@ -0,0 +1,259 @@
+package cast_test
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+
+	agentapisdk "github.com/coder/agentapi-sdk-go"
+	"github.com/stretchr/testify/require"
+)
+
+const (
+	castTestTimeout        = 30 * time.Second
+	castOperationTimeout   = 10 * time.Second
+	castHealthCheckTimeout = 10 * time.Second
+	castFile               = "testdata/claude.cast"
+	scriptFile             = "testdata/claude.txt"
+)
+
+type scriptEntry struct {
+	Role    string // "user" or "agent"
+	Message string
+}
+
+func loadScript(t testing.TB, path string) []scriptEntry {
+	t.Helper()
+	data, err := os.ReadFile(path)
+	require.NoError(t, err)
+	var entries []scriptEntry
+	sc := bufio.NewScanner(bytes.NewReader(data))
+	for sc.Scan() {
+		line := sc.Text()
+		if line == "" {
+			continue
+		}
+		role, msg, ok := strings.Cut(line, "\t")
+		require.True(t, ok, "malformed script line: %q", line)
+		require.Contains(t, []string{"user", "agent"}, role,
+			"unexpected role %q in script line: %q", role, line)
+		entries = append(entries, scriptEntry{Role: role, Message: msg})
+	}
+	require.NoError(t, sc.Err())
+	return entries
+}
+
+func TestE2E(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping integration test in short mode")
+	}
+
+	script := loadScript(t, scriptFile)
+	require.GreaterOrEqual(t, len(script), 4, "claude.txt must have at least 4 entries")
+	require.Equal(t, "user", script[0].Role)  // initial prompt (startup)
+	require.Equal(t, "agent", script[1].Role) // reply to initial prompt
+	require.Equal(t, "user", script[2].Role)  // test message sent via API
+	require.Equal(t, "agent", script[3].Role) // reply to test message
+	// Note: The API merges the startup sequence (initial prompt + initial reply)
+	// into the first agent message, so we expect 3 messages from the API, not 4.
+	initialPromptReply := script[1].Message
+	userMessage := script[2].Message
+	agentReply := script[3].Message
+
+	ctx, cancel := context.WithTimeout(context.Background(), castTestTimeout)
+	defer cancel()
+
+	apiClient := setup(ctx, t)
+
+	// Agent should be running while processing the initial greeting.
+	statusResp, err := apiClient.GetStatus(ctx)
+	require.NoError(t, err)
+	require.Equal(t, agentapisdk.StatusRunning, statusResp.Status)
+
+	require.NoError(t, waitAgentAPIStable(ctx, t, apiClient, castOperationTimeout, "initial stable"))
+
+	_, err = apiClient.PostMessage(ctx, agentapisdk.PostMessageParams{
+		Content: userMessage,
+		Type:    agentapisdk.MessageTypeUser,
+	})
+	require.NoError(t, err, "failed to send message")
+
+	// Agent should be running while processing the reply.
+	statusResp, err = apiClient.GetStatus(ctx)
+	require.NoError(t, err)
+	require.Equal(t, agentapisdk.StatusRunning, statusResp.Status)
+
+	require.NoError(t, waitAgentAPIStable(ctx, t, apiClient, castOperationTimeout, "post message"))
+
+	msgResp, err := apiClient.GetMessages(ctx)
+	require.NoError(t, err, "failed to get messages")
+	require.Len(t, msgResp.Messages, 3)
+	// First message is agent greeting (contains startup screen + initial prompt reply)
+	require.Contains(t, msgResp.Messages[0].Content, initialPromptReply)
+	require.Contains(t, msgResp.Messages[1].Content, userMessage)
+	require.Contains(t, msgResp.Messages[2].Content, agentReply)
+}
+
+func defaultCmdFn(ctx context.Context, t testing.TB, serverPort int, binaryPath, cwd string) (string, []string) {
+	// Terminal dimensions must match the cast file (80x1000) for correct ANSI escape sequence positioning.
+	return binaryPath, []string{"server", fmt.Sprintf("--port=%d", serverPort), "--term-width=80", "--term-height=1000", "--", "go", "run", filepath.Join(cwd, "cmd", "cast_agent.go"), castFile}
+}
+
+func setup(ctx context.Context, t testing.TB) *agentapisdk.Client {
+	t.Helper()
+
+	binaryPath := os.Getenv("AGENTAPI_BINARY_PATH")
+	if binaryPath == "" {
+		cwd, err := os.Getwd()
+		require.NoError(t, err, "Failed to get current working directory")
+		// We're in e2e/cast, so go up two levels to reach the repo root
+		binaryPath = filepath.Join(cwd, "..", "..", "out", "agentapi")
+		t.Logf("Building binary at %s", binaryPath)
+		buildCmd := exec.CommandContext(ctx, "go", "build", "-o", binaryPath, ".")
+		buildCmd.Dir = filepath.Join(cwd, "..", "..")
+		t.Logf("run: %s", buildCmd.String())
+		require.NoError(t, buildCmd.Run(), "Failed to build binary")
+	}
+
+	serverPort, err := getFreePort()
+	require.NoError(t, err, "Failed to get free port for server")
+
+	cwd, err := os.Getwd()
+	require.NoError(t, err, "Failed to get current working directory")
+
+	bin, args := defaultCmdFn(ctx, t, serverPort, binaryPath, cwd)
+	t.Logf("Running command: %s %s", bin, strings.Join(args, " "))
+	cmd := exec.CommandContext(ctx, bin, args...)
+
+	stdout, err := cmd.StdoutPipe()
+	require.NoError(t, err, "Failed to create stdout pipe")
+
+	stderr, err := cmd.StderrPipe()
+	require.NoError(t, err, "Failed to create stderr pipe")
+
+	err = cmd.Start()
+	require.NoError(t, err, "Failed to start agentapi server")
+
+	var wg sync.WaitGroup
+	wg.Add(2)
+
+	go func() {
+		defer wg.Done()
+		logOutput(t, "SERVER-STDOUT", stdout)
+	}()
+
+	go func() {
+		defer wg.Done()
+		logOutput(t, "SERVER-STDERR", stderr)
+	}()
+
+	t.Cleanup(func() {
+		if cmd.Process != nil {
+			_ = cmd.Process.Kill()
+			_ = cmd.Wait()
+		}
+		wg.Wait()
+	})
+
+	serverURL := fmt.Sprintf("http://localhost:%d", serverPort)
+	require.NoError(t, waitForServer(ctx, t, serverURL, castHealthCheckTimeout), "Server not ready")
+	apiClient, err := agentapisdk.NewClient(serverURL)
+	require.NoError(t, err, "Failed to create agentapi SDK client")
+
+	return apiClient
+}
+
+func logOutput(t testing.TB, prefix string, r io.Reader) {
+	t.Helper()
+	scanner := bufio.NewScanner(r)
+	for scanner.Scan() {
+		t.Logf("[%s] %s", prefix, scanner.Text())
+	}
+}
+
+func waitForServer(ctx context.Context, t testing.TB, url string, timeout time.Duration) error {
+	t.Helper()
+	client := &http.Client{Timeout: time.Second}
+	healthCtx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	ticker := time.NewTicker(100 * time.Millisecond)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-healthCtx.Done():
+			require.Failf(t, "failed to start server", "server at %s not ready within timeout: %w", url, healthCtx.Err())
+		case <-ticker.C:
+			resp, err := client.Get(url)
+			if err == nil {
+				_ = resp.Body.Close()
+				return nil
+			}
+			t.Logf("Server not ready yet: %s", err)
+		}
+	}
+}
+
+func waitAgentAPIStable(ctx context.Context, t testing.TB, apiClient *agentapisdk.Client, waitFor time.Duration, msg string) error {
+	t.Helper()
+	waitCtx, waitCancel := context.WithTimeout(ctx, waitFor)
+	defer waitCancel()
+
+	start := time.Now()
+	var currStatus agentapisdk.AgentStatus
+	var lastMessage string
+	defer func() {
+		elapsed := time.Since(start)
+		t.Logf("%s: agent API status: %s (elapsed: %s)", msg, currStatus, elapsed.Round(100*time.Millisecond))
+		if t.Failed() && lastMessage != "" {
+			fmt.Fprintf(os.Stderr, "\n=== Last agent message ===\n%s\n=== End last agent message ===\n", lastMessage)
+		}
+	}()
+	evts, errs, err := apiClient.SubscribeEvents(ctx)
+	require.NoError(t, err, "failed to subscribe to events")
+	for {
+		select {
+		case <-waitCtx.Done():
+			return waitCtx.Err()
+		case evt := <-evts:
+			if esc, ok := evt.(agentapisdk.EventStatusChange); ok {
+				currStatus = esc.Status
+				if currStatus == agentapisdk.StatusStable {
+					return nil
+				}
+			} else if emc, ok := evt.(agentapisdk.EventMessageUpdate); ok {
+				lastMessage = emc.Message
+				t.Logf("Got message event: id=%d role=%s len=%d", emc.Id, emc.Role, len(emc.Message))
+			}
+		case err := <-errs:
+			return fmt.Errorf("read events: %w", err)
+		}
+	}
+}
+
+func getFreePort() (int, error) {
+	addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
+	if err != nil {
+		return 0, err
+	}
+
+	l, err := net.ListenTCP("tcp", addr)
+	if err != nil {
+		return 0, err
+	}
+	defer func() { _ = l.Close() }()
+
+	return l.Addr().(*net.TCPAddr).Port, nil
+}