Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions e2e/cast/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Cast-Based E2E Testing

This directory contains a cast-based end-to-end (E2E) test for the AgentAPI project. The framework simulates realistic agent interactions by replaying asciicast v2 recordings.

## TL;DR

```shell
go test ./e2e/cast
```

## How it Works

The testing framework (`cast_test.go`) does the following:
- Starts the AgentAPI server with a fake agent (`cmd/cast_agent.go`).
- The fake agent replays a `.cast` file (asciicast v2 format), writing terminal output to stdout and validating stdin against the recorded input events.
- The testing framework sends messages to the fake agent via the AgentAPI and validates the responses.

## Adding or Updating Fixtures

### Step 1: Record with asciinema

The recording captures both agentapi's output (what it sends to the agent) and the agent's responses.
Use `asciinema` to record agentapi wrapping the agent. Below is an example for Claude:

```shell
# Set terminal to minimum size Claude supports (80 columns)
stty cols 80 rows 1000

# Build agentapi (or use an existing known-good version)
agentapi server -t claude -- asciinema rec --stdin testdata/claude.cast --command 'echo hello | claude'

# Then interact with Claude via AgentAPI (either web UI or API):
# 1. Wait for Claude to respond to the initial prompt
# 2. Type a test message (e.g., "This is just a test.")
# 3. Wait for Claude's reply
# 4. Press Ctrl+C to exit
```

**Important notes:**
- The terminal dimensions (80x1000) must match the `--term-width` and `--term-height` flags in the test (see `defaultCmdFn` in `cast_test.go`).
- Recording agentapi (not Claude directly) captures the exact byte sequences that agentapi sends, including bracketed paste mode escape sequences.
- The `--stdin` flag captures your input, which the test uses to validate that agentapi sends the correct bytes.
- To overwrite an existing fixture, add `--overwrite` to the `asciinema` invocation.

### Step 2: Create the sidecar script file

Create a matching sidecar file `testdata/my-fixture.txt` that lists the expected conversation in order, one entry per line:

```
user hello
agent Hello! How can I help you today?
user This is just a test.
agent Got it! Let me know if you need anything.
```

Each line is `<role><TAB><message>`. Valid roles are `agent` or `user`.

**Flow explanation:**
- Line 1: The initial prompt passed to the agent at startup
- Line 2: The agent's reply to the initial prompt
- Line 3: The test message sent via the AgentAPI
- Line 4: The agent's reply to the test message

Note: The AgentAPI merges the startup sequence (welcome screen + initial prompt + initial reply) into the first agent message. The test expects 3 messages from the API, even though the script file has 4 entries.

### Step 3: Review and update the test

> **Caution:** Review the recording before committing. Remove or redact any API keys, tokens, or other sensitive data that may appear in the terminal output (stdout events), recorded keystrokes (`"i"` events from `--stdin`), or environment variables captured in the cast header.

To use a new fixture, update the `castFile` and `scriptFile` constants in `cast_test.go` to reference the new fixture files, then update the assertions in `TestE2E` to match the new conversation.
259 changes: 259 additions & 0 deletions e2e/cast/cast_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
package cast_test

import (
"bufio"
"bytes"
"context"
"fmt"
"io"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"testing"
"time"

agentapisdk "github.com/coder/agentapi-sdk-go"
"github.com/stretchr/testify/require"
)

const (
castTestTimeout = 30 * time.Second
castOperationTimeout = 10 * time.Second
castHealthCheckTimeout = 10 * time.Second
castFile = "testdata/claude.cast"
scriptFile = "testdata/claude.txt"
)

type scriptEntry struct {
Role string // "user" or "agent"
Message string
}

func loadScript(t testing.TB, path string) []scriptEntry {
t.Helper()
data, err := os.ReadFile(path)
require.NoError(t, err)
var entries []scriptEntry
sc := bufio.NewScanner(bytes.NewReader(data))
for sc.Scan() {
line := sc.Text()
if line == "" {
continue
}
role, msg, ok := strings.Cut(line, "\t")
require.True(t, ok, "malformed script line: %q", line)
require.Contains(t, []string{"user", "agent"}, role,
"unexpected role %q in script line: %q", role, line)
entries = append(entries, scriptEntry{Role: role, Message: msg})
}
require.NoError(t, sc.Err())
return entries
}

func TestE2E(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}

script := loadScript(t, scriptFile)
require.GreaterOrEqual(t, len(script), 4, "claude.txt must have at least 4 entries")
require.Equal(t, "user", script[0].Role) // initial prompt (startup)
require.Equal(t, "agent", script[1].Role) // reply to initial prompt
require.Equal(t, "user", script[2].Role) // test message sent via API
require.Equal(t, "agent", script[3].Role) // reply to test message
// Note: The API merges the startup sequence (initial prompt + initial reply)
// into the first agent message, so we expect 3 messages from the API, not 4.
initialPromptReply := script[1].Message
userMessage := script[2].Message
agentReply := script[3].Message

ctx, cancel := context.WithTimeout(context.Background(), castTestTimeout)
defer cancel()

apiClient := setup(ctx, t)

// Agent should be running while processing the initial greeting.
statusResp, err := apiClient.GetStatus(ctx)
require.NoError(t, err)
require.Equal(t, agentapisdk.StatusRunning, statusResp.Status)

require.NoError(t, waitAgentAPIStable(ctx, t, apiClient, castOperationTimeout, "initial stable"))

_, err = apiClient.PostMessage(ctx, agentapisdk.PostMessageParams{
Content: userMessage,
Type: agentapisdk.MessageTypeUser,
})
require.NoError(t, err, "failed to send message")

// Agent should be running while processing the reply.
statusResp, err = apiClient.GetStatus(ctx)
require.NoError(t, err)
require.Equal(t, agentapisdk.StatusRunning, statusResp.Status)

require.NoError(t, waitAgentAPIStable(ctx, t, apiClient, castOperationTimeout, "post message"))

msgResp, err := apiClient.GetMessages(ctx)
require.NoError(t, err, "failed to get messages")
require.Len(t, msgResp.Messages, 3)
// First message is agent greeting (contains startup screen + initial prompt reply)
require.Contains(t, msgResp.Messages[0].Content, initialPromptReply)
require.Contains(t, msgResp.Messages[1].Content, userMessage)
require.Contains(t, msgResp.Messages[2].Content, agentReply)
}

func defaultCmdFn(ctx context.Context, t testing.TB, serverPort int, binaryPath, cwd string) (string, []string) {
// Terminal dimensions must match the cast file (80x1000) for correct ANSI escape sequence positioning.
return binaryPath, []string{"server", fmt.Sprintf("--port=%d", serverPort), "--term-width=80", "--term-height=1000", "--", "go", "run", filepath.Join(cwd, "cmd", "cast_agent.go"), castFile}
}

func setup(ctx context.Context, t testing.TB) *agentapisdk.Client {
t.Helper()

binaryPath := os.Getenv("AGENTAPI_BINARY_PATH")
if binaryPath == "" {
cwd, err := os.Getwd()
require.NoError(t, err, "Failed to get current working directory")
// We're in e2e/cast, so go up two levels to reach the repo root
binaryPath = filepath.Join(cwd, "..", "..", "out", "agentapi")
t.Logf("Building binary at %s", binaryPath)
buildCmd := exec.CommandContext(ctx, "go", "build", "-o", binaryPath, ".")
buildCmd.Dir = filepath.Join(cwd, "..", "..")
t.Logf("run: %s", buildCmd.String())
require.NoError(t, buildCmd.Run(), "Failed to build binary")
}

serverPort, err := getFreePort()
require.NoError(t, err, "Failed to get free port for server")

cwd, err := os.Getwd()
require.NoError(t, err, "Failed to get current working directory")

bin, args := defaultCmdFn(ctx, t, serverPort, binaryPath, cwd)
t.Logf("Running command: %s %s", bin, strings.Join(args, " "))
cmd := exec.CommandContext(ctx, bin, args...)

stdout, err := cmd.StdoutPipe()
require.NoError(t, err, "Failed to create stdout pipe")

stderr, err := cmd.StderrPipe()
require.NoError(t, err, "Failed to create stderr pipe")

err = cmd.Start()
require.NoError(t, err, "Failed to start agentapi server")

var wg sync.WaitGroup
wg.Add(2)

go func() {
defer wg.Done()
logOutput(t, "SERVER-STDOUT", stdout)
}()

go func() {
defer wg.Done()
logOutput(t, "SERVER-STDERR", stderr)
}()

t.Cleanup(func() {
if cmd.Process != nil {
_ = cmd.Process.Kill()
_ = cmd.Wait()
}
wg.Wait()
})

serverURL := fmt.Sprintf("http://localhost:%d", serverPort)
require.NoError(t, waitForServer(ctx, t, serverURL, castHealthCheckTimeout), "Server not ready")
apiClient, err := agentapisdk.NewClient(serverURL)
require.NoError(t, err, "Failed to create agentapi SDK client")

return apiClient
}

func logOutput(t testing.TB, prefix string, r io.Reader) {
t.Helper()
scanner := bufio.NewScanner(r)
for scanner.Scan() {
t.Logf("[%s] %s", prefix, scanner.Text())
}
}

func waitForServer(ctx context.Context, t testing.TB, url string, timeout time.Duration) error {
t.Helper()
client := &http.Client{Timeout: time.Second}
healthCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()

for {
select {
case <-healthCtx.Done():
require.Failf(t, "failed to start server", "server at %s not ready within timeout: %w", url, healthCtx.Err())
case <-ticker.C:
resp, err := client.Get(url)
if err == nil {
_ = resp.Body.Close()
return nil
}
t.Logf("Server not ready yet: %s", err)
}
}
}

func waitAgentAPIStable(ctx context.Context, t testing.TB, apiClient *agentapisdk.Client, waitFor time.Duration, msg string) error {
t.Helper()
waitCtx, waitCancel := context.WithTimeout(ctx, waitFor)
defer waitCancel()

start := time.Now()
var currStatus agentapisdk.AgentStatus
var lastMessage string
defer func() {
elapsed := time.Since(start)
t.Logf("%s: agent API status: %s (elapsed: %s)", msg, currStatus, elapsed.Round(100*time.Millisecond))
if t.Failed() && lastMessage != "" {
fmt.Fprintf(os.Stderr, "\n=== Last agent message ===\n%s\n=== End last agent message ===\n", lastMessage)
}
}()
evts, errs, err := apiClient.SubscribeEvents(ctx)
require.NoError(t, err, "failed to subscribe to events")
for {
select {
case <-waitCtx.Done():
return waitCtx.Err()
case evt := <-evts:
if esc, ok := evt.(agentapisdk.EventStatusChange); ok {
currStatus = esc.Status
if currStatus == agentapisdk.StatusStable {
return nil
}
} else if emc, ok := evt.(agentapisdk.EventMessageUpdate); ok {
lastMessage = emc.Message
t.Logf("Got message event: id=%d role=%s len=%d", emc.Id, emc.Role, len(emc.Message))
}
case err := <-errs:
return fmt.Errorf("read events: %w", err)
}
}
}

func getFreePort() (int, error) {
addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
if err != nil {
return 0, err
}

l, err := net.ListenTCP("tcp", addr)
if err != nil {
return 0, err
}
defer func() { _ = l.Close() }()

return l.Addr().(*net.TCPAddr).Port, nil
}
Loading
Loading