diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b16dc08 --- /dev/null +++ b/.env.example @@ -0,0 +1,12 @@ +# Agent scenario tester + Claude Code — copy to `.env` in this repo root and fill in values. +# See tools/agent-scenario-tester/README.md + +# Required for: optional LLM-as-judge (`--judge` / RUN_LLM_JUDGE); often also used by Claude CLI for API access. +ANTHROPIC_API_KEY= + +# Optional: enable LLM judge without passing `--judge` each time (1 / true / yes). +# RUN_LLM_JUDGE=0 + +# Optional: model for the judge (defaults to claude-sonnet-4-20250514). +# JUDGE_MODEL= +# EVAL_SCORE_MODEL= diff --git a/.github/workflows/test-examples.yml b/.github/workflows/test-examples.yml index 9c762ad..c2da44e 100644 --- a/.github/workflows/test-examples.yml +++ b/.github/workflows/test-examples.yml @@ -70,10 +70,29 @@ jobs: working-directory: skills/event-gateway/examples/fastapi run: pytest test_webhook.py -v + test-outpost-saas: + name: Outpost SaaS examples (nextjs-saas + fastapi-saas) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Run test-examples.sh outpost + run: ./scripts/test-examples.sh outpost + summary: name: Test Summary runs-on: ubuntu-latest - needs: [test-express, test-nextjs, test-fastapi] + needs: [test-express, test-nextjs, test-fastapi, test-outpost-saas] if: always() steps: - name: Check test results @@ -83,7 +102,8 @@ jobs: if [ "${{ needs.test-express.result }}" == "failure" ] || \ [ "${{ needs.test-nextjs.result }}" == "failure" ] || \ - [ "${{ needs.test-fastapi.result }}" == "failure" ]; then + [ "${{ needs.test-fastapi.result }}" == "failure" ] || \ + [ "${{ needs.test-outpost-saas.result }}" == "failure" ]; then echo "**Result:** Some tests failed" >> $GITHUB_STEP_SUMMARY exit 1 fi diff --git a/.gitignore b/.gitignore index 736a875..9b672e0 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ build/ *.egg-info/ .next/ out/ +.turbo/ # IDE and editor files .idea/ @@ -45,6 +46,9 @@ htmlcov/ # Package lock files (not tracked for example projects) package-lock.json yarn.lock +pnpm-lock.yaml +uv.lock +bun.lock # Agent scenario tester test-results/ diff --git a/AGENTS.md b/AGENTS.md index f4a0a15..20f9a8e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -34,7 +34,7 @@ All skills MUST conform to the [Agent Skills specification](https://agentskills. - **Hookdeck** = the company brand, carried by the repo name (`hookdeck/agent-skills`) - **Event Gateway** ("Hookdeck Event Gateway") = the inbound product. Receives, routes, processes, and delivers webhooks/events. [Docs](https://hookdeck.com/docs/). -- **Outpost** ("Hookdeck Outpost") = the outbound product. Open-source infrastructure for sending webhooks and events to user-preferred destinations (HTTP, SQS, RabbitMQ, Pub/Sub, EventBridge, Kafka). [Docs](https://outpost.hookdeck.com/docs/). [GitHub](https://github.com/hookdeck/outpost). +- **Outpost** ("Hookdeck Outpost") = the outbound product. Open-source infrastructure for sending webhooks and events to user-preferred destinations (HTTP, SQS, RabbitMQ, Pub/Sub, and more—see docs for the current matrix). [Docs](https://hookdeck.com/docs/outpost). [GitHub](https://github.com/hookdeck/outpost). Skills are prefixed by product name: `event-gateway` or `outpost`. The company brand is not repeated in skill names because it's carried by the repo. @@ -93,6 +93,15 @@ hookdeck/agent-skills/ fastapi/ # Hookdeck signature verification handler outpost/ # Outpost skill (separate product) SKILL.md + references/ + outpost-quickstarts.md # Canonical quickstart + llms.txt links + outpost-scope.md # Scope ladder, topic reconciliation, BFF pointers + outpost-verify.md # Trimmed “before you stop” checklist + nextjs-saas-integration-map.md + fastapi-saas-integration-map.md + examples/ + nextjs-saas/ + fastapi-saas/ AGENTS.md # This file CLAUDE.md CONTRIBUTING.md @@ -265,6 +274,7 @@ Rules provide concise, always-on guidance. Keep them minimal; link to skills for - **Cursor first:** Lead with Cursor plugin install (`/add-plugin hookdeck`). What the plugin does. - **Retain generic skills:** Keep `npx skills add hookdeck/agent-skills` and full Agent Skills usage for Claude, ChatGPT, and other agents. Do not remove the generic install path. +- **Human-facing doc links in README.md:** Do not link to Hookdeck documentation URLs that end in `.md` (for example `https://hookdeck.com/docs/cli/mcp.md`). Those `.md` endpoints exist mainly for agents and tools fetching markdown. In the repo root README, use pages without `.md` (for example [MCP & Skills](https://hookdeck.com/docs/mcp), [CLI](https://hookdeck.com/docs/cli)) and describe where to find a subsection (e.g. MCP in the CLI doc’s Event Gateway operations table) when there is no dedicated HTML path. ### Plugin description and keywords diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e7d4b6e..08470d7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,8 +31,16 @@ cd agent-skills cd skills/event-gateway/examples/express && npm install && npm test cd skills/event-gateway/examples/nextjs && npm install && npm test cd skills/event-gateway/examples/fastapi && pip install -r requirements.txt && pytest test_webhook.py -v + +# Outpost SaaS reference (Next.js + Outpost SDK; large tree — npm install may take a minute) +cd skills/outpost/examples/nextjs-saas && npm install && npm test + +# Outpost FastAPI SaaS (backend only — pip install in backend/) +cd skills/outpost/examples/fastapi-saas/backend && python3 -m venv venv && source venv/bin/activate && pip install pytest httpx 'fastapi>=0.114' && pytest test_outpost_wire.py -q ``` +When you bump dependencies in **Outpost** SaaS examples (`skills/outpost/examples/nextjs-saas` or `skills/outpost/examples/fastapi-saas`), update the **Example stack snapshot** table in `skills/outpost/SKILL.md` to match the new pins, and run `./scripts/test-examples.sh outpost` before opening a PR. + **Agent scenario tests** (end-to-end: install skills, run Claude, score report): see [TESTING.md](TESTING.md#agent-scenario-testing-two-layers). From repo root: `./scripts/test-agent-scenario.sh run receive-webhooks express` or `./scripts/test-agent-scenario.sh list`. ## Repository Structure diff --git a/README.md b/README.md index 001592b..43c2b8b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Hookdeck Agent Skills -Equip your AI coding agent with webhook and event-driven architecture expertise. Receive, queue, route, and deliver webhooks with [Hookdeck Event Gateway](https://hookdeck.com), test webhooks locally with the Hookdeck CLI, and build outbound webhook delivery with [Outpost](https://outpost.hookdeck.com). +Equip your AI coding agent with webhook and event-driven architecture expertise. Receive, queue, route, and deliver webhooks with [Hookdeck Event Gateway](https://hookdeck.com), test webhooks locally with the Hookdeck CLI, and build outbound webhook delivery with [Hookdeck Outpost](https://hookdeck.com/docs/outpost). statement-breakpoint +CREATE TABLE IF NOT EXISTS "invitations" ( + "id" serial PRIMARY KEY NOT NULL, + "team_id" integer NOT NULL, + "email" varchar(255) NOT NULL, + "role" varchar(50) NOT NULL, + "invited_by" integer NOT NULL, + "invited_at" timestamp DEFAULT now() NOT NULL, + "status" varchar(20) DEFAULT 'pending' NOT NULL +); +--> statement-breakpoint +CREATE TABLE IF NOT EXISTS "team_members" ( + "id" serial PRIMARY KEY NOT NULL, + "user_id" integer NOT NULL, + "team_id" integer NOT NULL, + "role" varchar(50) NOT NULL, + "joined_at" timestamp DEFAULT now() NOT NULL +); +--> statement-breakpoint +CREATE TABLE IF NOT EXISTS "teams" ( + "id" serial PRIMARY KEY NOT NULL, + "name" varchar(100) NOT NULL, + "created_at" timestamp DEFAULT now() NOT NULL, + "updated_at" timestamp DEFAULT now() NOT NULL, + "stripe_customer_id" text, + "stripe_subscription_id" text, + "stripe_product_id" text, + "plan_name" varchar(50), + "subscription_status" varchar(20), + CONSTRAINT "teams_stripe_customer_id_unique" UNIQUE("stripe_customer_id"), + CONSTRAINT "teams_stripe_subscription_id_unique" UNIQUE("stripe_subscription_id") +); +--> statement-breakpoint +CREATE TABLE IF NOT EXISTS "users" ( + "id" serial PRIMARY KEY NOT NULL, + "name" varchar(100), + "email" varchar(255) NOT NULL, + "password_hash" text NOT NULL, + "role" varchar(20) DEFAULT 'member' NOT NULL, + "created_at" timestamp DEFAULT now() NOT NULL, + "updated_at" timestamp DEFAULT now() NOT NULL, + "deleted_at" timestamp, + CONSTRAINT "users_email_unique" UNIQUE("email") +); +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "activity_logs" ADD CONSTRAINT "activity_logs_team_id_teams_id_fk" FOREIGN KEY ("team_id") REFERENCES "public"."teams"("id") ON DELETE no action ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "activity_logs" ADD CONSTRAINT "activity_logs_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE no action ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "invitations" ADD CONSTRAINT "invitations_team_id_teams_id_fk" FOREIGN KEY ("team_id") REFERENCES "public"."teams"("id") ON DELETE no action ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "invitations" ADD CONSTRAINT "invitations_invited_by_users_id_fk" FOREIGN KEY ("invited_by") REFERENCES "public"."users"("id") ON DELETE no action ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "team_members" ADD CONSTRAINT "team_members_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE no action ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; +--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "team_members" ADD CONSTRAINT "team_members_team_id_teams_id_fk" FOREIGN KEY ("team_id") REFERENCES "public"."teams"("id") ON DELETE no action ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$; diff --git a/skills/outpost/examples/nextjs-saas/lib/db/migrations/meta/0000_snapshot.json b/skills/outpost/examples/nextjs-saas/lib/db/migrations/meta/0000_snapshot.json new file mode 100644 index 0000000..622eb97 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/db/migrations/meta/0000_snapshot.json @@ -0,0 +1,389 @@ +{ + "id": "261fd993-fb2c-43e7-89d6-cd58786c5f58", + "prevId": "00000000-0000-0000-0000-000000000000", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.activity_logs": { + "name": "activity_logs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "team_id": { + "name": "team_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "action": { + "name": "action", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "timestamp": { + "name": "timestamp", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "ip_address": { + "name": "ip_address", + "type": "varchar(45)", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": { + "activity_logs_team_id_teams_id_fk": { + "name": "activity_logs_team_id_teams_id_fk", + "tableFrom": "activity_logs", + "tableTo": "teams", + "columnsFrom": [ + "team_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "activity_logs_user_id_users_id_fk": { + "name": "activity_logs_user_id_users_id_fk", + "tableFrom": "activity_logs", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.invitations": { + "name": "invitations", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "team_id": { + "name": "team_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "varchar(50)", + "primaryKey": false, + "notNull": true + }, + "invited_by": { + "name": "invited_by", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "invited_at": { + "name": "invited_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "status": { + "name": "status", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'pending'" + } + }, + "indexes": {}, + "foreignKeys": { + "invitations_team_id_teams_id_fk": { + "name": "invitations_team_id_teams_id_fk", + "tableFrom": "invitations", + "tableTo": "teams", + "columnsFrom": [ + "team_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "invitations_invited_by_users_id_fk": { + "name": "invitations_invited_by_users_id_fk", + "tableFrom": "invitations", + "tableTo": "users", + "columnsFrom": [ + "invited_by" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.team_members": { + "name": "team_members", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "team_id": { + "name": "team_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "varchar(50)", + "primaryKey": false, + "notNull": true + }, + "joined_at": { + "name": "joined_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": { + "team_members_user_id_users_id_fk": { + "name": "team_members_user_id_users_id_fk", + "tableFrom": "team_members", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "team_members_team_id_teams_id_fk": { + "name": "team_members_team_id_teams_id_fk", + "tableFrom": "team_members", + "tableTo": "teams", + "columnsFrom": [ + "team_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.teams": { + "name": "teams", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_subscription_id": { + "name": "stripe_subscription_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "stripe_product_id": { + "name": "stripe_product_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "plan_name": { + "name": "plan_name", + "type": "varchar(50)", + "primaryKey": false, + "notNull": false + }, + "subscription_status": { + "name": "subscription_status", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "teams_stripe_customer_id_unique": { + "name": "teams_stripe_customer_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_customer_id" + ] + }, + "teams_stripe_subscription_id_unique": { + "name": "teams_stripe_subscription_id_unique", + "nullsNotDistinct": false, + "columns": [ + "stripe_subscription_id" + ] + } + } + }, + "public.users": { + "name": "users", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "email": { + "name": "email", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "password_hash": { + "name": "password_hash", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'member'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "users_email_unique": { + "name": "users_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + } + } + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/skills/outpost/examples/nextjs-saas/lib/db/migrations/meta/_journal.json b/skills/outpost/examples/nextjs-saas/lib/db/migrations/meta/_journal.json new file mode 100644 index 0000000..fd44474 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/db/migrations/meta/_journal.json @@ -0,0 +1,13 @@ +{ + "version": "7", + "dialect": "postgresql", + "entries": [ + { + "idx": 0, + "version": "7", + "when": 1726443359662, + "tag": "0000_soft_the_anarchist", + "breakpoints": true + } + ] +} \ No newline at end of file diff --git a/skills/outpost/examples/nextjs-saas/lib/db/queries.ts b/skills/outpost/examples/nextjs-saas/lib/db/queries.ts new file mode 100644 index 0000000..1bcdf64 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/db/queries.ts @@ -0,0 +1,130 @@ +import { desc, and, eq, isNull } from 'drizzle-orm'; +import { db } from './drizzle'; +import { activityLogs, teamMembers, teams, users } from './schema'; +import { cookies } from 'next/headers'; +import { verifyToken } from '@/lib/auth/session'; + +export async function getUser() { + const sessionCookie = (await cookies()).get('session'); + if (!sessionCookie || !sessionCookie.value) { + return null; + } + + const sessionData = await verifyToken(sessionCookie.value); + if ( + !sessionData || + !sessionData.user || + typeof sessionData.user.id !== 'number' + ) { + return null; + } + + if (new Date(sessionData.expires) < new Date()) { + return null; + } + + const user = await db + .select() + .from(users) + .where(and(eq(users.id, sessionData.user.id), isNull(users.deletedAt))) + .limit(1); + + if (user.length === 0) { + return null; + } + + return user[0]; +} + +export async function getTeamByStripeCustomerId(customerId: string) { + const result = await db + .select() + .from(teams) + .where(eq(teams.stripeCustomerId, customerId)) + .limit(1); + + return result.length > 0 ? result[0] : null; +} + +export async function updateTeamSubscription( + teamId: number, + subscriptionData: { + stripeSubscriptionId: string | null; + stripeProductId: string | null; + planName: string | null; + subscriptionStatus: string; + } +) { + await db + .update(teams) + .set({ + ...subscriptionData, + updatedAt: new Date() + }) + .where(eq(teams.id, teamId)); +} + +export async function getUserWithTeam(userId: number) { + const result = await db + .select({ + user: users, + teamId: teamMembers.teamId + }) + .from(users) + .leftJoin(teamMembers, eq(users.id, teamMembers.userId)) + .where(eq(users.id, userId)) + .limit(1); + + return result[0]; +} + +export async function getActivityLogs() { + const user = await getUser(); + if (!user) { + throw new Error('User not authenticated'); + } + + return await db + .select({ + id: activityLogs.id, + action: activityLogs.action, + timestamp: activityLogs.timestamp, + ipAddress: activityLogs.ipAddress, + userName: users.name + }) + .from(activityLogs) + .leftJoin(users, eq(activityLogs.userId, users.id)) + .where(eq(activityLogs.userId, user.id)) + .orderBy(desc(activityLogs.timestamp)) + .limit(10); +} + +export async function getTeamForUser() { + const user = await getUser(); + if (!user) { + return null; + } + + const result = await db.query.teamMembers.findFirst({ + where: eq(teamMembers.userId, user.id), + with: { + team: { + with: { + teamMembers: { + with: { + user: { + columns: { + id: true, + name: true, + email: true + } + } + } + } + } + } + } + }); + + return result?.team || null; +} diff --git a/skills/outpost/examples/nextjs-saas/lib/db/schema.ts b/skills/outpost/examples/nextjs-saas/lib/db/schema.ts new file mode 100644 index 0000000..1d047ce --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/db/schema.ts @@ -0,0 +1,142 @@ +import { + pgTable, + serial, + varchar, + text, + timestamp, + integer, +} from 'drizzle-orm/pg-core'; +import { relations } from 'drizzle-orm'; + +export const users = pgTable('users', { + id: serial('id').primaryKey(), + name: varchar('name', { length: 100 }), + email: varchar('email', { length: 255 }).notNull().unique(), + passwordHash: text('password_hash').notNull(), + role: varchar('role', { length: 20 }).notNull().default('member'), + createdAt: timestamp('created_at').notNull().defaultNow(), + updatedAt: timestamp('updated_at').notNull().defaultNow(), + deletedAt: timestamp('deleted_at'), +}); + +export const teams = pgTable('teams', { + id: serial('id').primaryKey(), + name: varchar('name', { length: 100 }).notNull(), + createdAt: timestamp('created_at').notNull().defaultNow(), + updatedAt: timestamp('updated_at').notNull().defaultNow(), + stripeCustomerId: text('stripe_customer_id').unique(), + stripeSubscriptionId: text('stripe_subscription_id').unique(), + stripeProductId: text('stripe_product_id'), + planName: varchar('plan_name', { length: 50 }), + subscriptionStatus: varchar('subscription_status', { length: 20 }), +}); + +export const teamMembers = pgTable('team_members', { + id: serial('id').primaryKey(), + userId: integer('user_id') + .notNull() + .references(() => users.id), + teamId: integer('team_id') + .notNull() + .references(() => teams.id), + role: varchar('role', { length: 50 }).notNull(), + joinedAt: timestamp('joined_at').notNull().defaultNow(), +}); + +export const activityLogs = pgTable('activity_logs', { + id: serial('id').primaryKey(), + teamId: integer('team_id') + .notNull() + .references(() => teams.id), + userId: integer('user_id').references(() => users.id), + action: text('action').notNull(), + timestamp: timestamp('timestamp').notNull().defaultNow(), + ipAddress: varchar('ip_address', { length: 45 }), +}); + +export const invitations = pgTable('invitations', { + id: serial('id').primaryKey(), + teamId: integer('team_id') + .notNull() + .references(() => teams.id), + email: varchar('email', { length: 255 }).notNull(), + role: varchar('role', { length: 50 }).notNull(), + invitedBy: integer('invited_by') + .notNull() + .references(() => users.id), + invitedAt: timestamp('invited_at').notNull().defaultNow(), + status: varchar('status', { length: 20 }).notNull().default('pending'), +}); + +export const teamsRelations = relations(teams, ({ many }) => ({ + teamMembers: many(teamMembers), + activityLogs: many(activityLogs), + invitations: many(invitations), +})); + +export const usersRelations = relations(users, ({ many }) => ({ + teamMembers: many(teamMembers), + invitationsSent: many(invitations), +})); + +export const invitationsRelations = relations(invitations, ({ one }) => ({ + team: one(teams, { + fields: [invitations.teamId], + references: [teams.id], + }), + invitedBy: one(users, { + fields: [invitations.invitedBy], + references: [users.id], + }), +})); + +export const teamMembersRelations = relations(teamMembers, ({ one }) => ({ + user: one(users, { + fields: [teamMembers.userId], + references: [users.id], + }), + team: one(teams, { + fields: [teamMembers.teamId], + references: [teams.id], + }), +})); + +export const activityLogsRelations = relations(activityLogs, ({ one }) => ({ + team: one(teams, { + fields: [activityLogs.teamId], + references: [teams.id], + }), + user: one(users, { + fields: [activityLogs.userId], + references: [users.id], + }), +})); + +export type User = typeof users.$inferSelect; +export type NewUser = typeof users.$inferInsert; +export type Team = typeof teams.$inferSelect; +export type NewTeam = typeof teams.$inferInsert; +export type TeamMember = typeof teamMembers.$inferSelect; +export type NewTeamMember = typeof teamMembers.$inferInsert; +export type ActivityLog = typeof activityLogs.$inferSelect; +export type NewActivityLog = typeof activityLogs.$inferInsert; +export type Invitation = typeof invitations.$inferSelect; +export type NewInvitation = typeof invitations.$inferInsert; +export type TeamDataWithMembers = Team & { + teamMembers: (TeamMember & { + user: Pick; + })[]; +}; + +export enum ActivityType { + SIGN_UP = 'SIGN_UP', + SIGN_IN = 'SIGN_IN', + SIGN_OUT = 'SIGN_OUT', + UPDATE_PASSWORD = 'UPDATE_PASSWORD', + DELETE_ACCOUNT = 'DELETE_ACCOUNT', + UPDATE_ACCOUNT = 'UPDATE_ACCOUNT', + CREATE_TEAM = 'CREATE_TEAM', + REMOVE_TEAM_MEMBER = 'REMOVE_TEAM_MEMBER', + INVITE_TEAM_MEMBER = 'INVITE_TEAM_MEMBER', + ACCEPT_INVITATION = 'ACCEPT_INVITATION', +} diff --git a/skills/outpost/examples/nextjs-saas/lib/db/seed.ts b/skills/outpost/examples/nextjs-saas/lib/db/seed.ts new file mode 100644 index 0000000..08f08a4 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/db/seed.ts @@ -0,0 +1,89 @@ +import Stripe from 'stripe'; +import { hashPassword } from '@/lib/auth/session'; +import { db } from './drizzle'; +import { users, teams, teamMembers } from './schema'; + +/** Seed runs under `tsx`; avoid importing `lib/payments/stripe.ts` (pulls Outpost + `server-only`). */ +const stripe = new Stripe(process.env.STRIPE_SECRET_KEY!, { + apiVersion: '2025-08-27.basil', +}); + +async function createStripeProducts() { + console.log('Creating Stripe products and prices...'); + + const baseProduct = await stripe.products.create({ + name: 'Base', + description: 'Base subscription plan', + }); + + await stripe.prices.create({ + product: baseProduct.id, + unit_amount: 800, // $8 in cents + currency: 'usd', + recurring: { + interval: 'month', + trial_period_days: 7, + }, + }); + + const plusProduct = await stripe.products.create({ + name: 'Plus', + description: 'Plus subscription plan', + }); + + await stripe.prices.create({ + product: plusProduct.id, + unit_amount: 1200, // $12 in cents + currency: 'usd', + recurring: { + interval: 'month', + trial_period_days: 7, + }, + }); + + console.log('Stripe products and prices created successfully.'); +} + +async function seed() { + const email = 'test@test.com'; + const password = 'admin123'; + const passwordHash = await hashPassword(password); + + const [user] = await db + .insert(users) + .values([ + { + email: email, + passwordHash: passwordHash, + role: "owner", + }, + ]) + .returning(); + + console.log('Initial user created.'); + + const [team] = await db + .insert(teams) + .values({ + name: 'Test Team', + }) + .returning(); + + await db.insert(teamMembers).values({ + teamId: team.id, + userId: user.id, + role: 'owner', + }); + + await createStripeProducts(); +} + +seed() + .catch((error) => { + console.error('Seed process failed:', error); + process.exit(1); + }) + .finally(() => { + console.log('Seed process finished. Exiting...'); + process.exit(0); + }); diff --git a/skills/outpost/examples/nextjs-saas/lib/db/setup.ts b/skills/outpost/examples/nextjs-saas/lib/db/setup.ts new file mode 100644 index 0000000..c23d7d4 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/db/setup.ts @@ -0,0 +1,216 @@ +import { exec } from 'node:child_process'; +import { promises as fs } from 'node:fs'; +import { promisify } from 'node:util'; +import readline from 'node:readline'; +import crypto from 'node:crypto'; +import path from 'node:path'; +import os from 'node:os'; + +const execAsync = promisify(exec); + +function question(query: string): Promise { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + + return new Promise((resolve) => + rl.question(query, (ans) => { + rl.close(); + resolve(ans); + }) + ); +} + +async function checkStripeCLI() { + console.log( + 'Step 1: Checking if Stripe CLI is installed and authenticated...' + ); + try { + await execAsync('stripe --version'); + console.log('Stripe CLI is installed.'); + + // Check if Stripe CLI is authenticated + try { + await execAsync('stripe config --list'); + console.log('Stripe CLI is authenticated.'); + } catch (error) { + console.log( + 'Stripe CLI is not authenticated or the authentication has expired.' + ); + console.log('Please run: stripe login'); + const answer = await question( + 'Have you completed the authentication? (y/n): ' + ); + if (answer.toLowerCase() !== 'y') { + console.log( + 'Please authenticate with Stripe CLI and run this script again.' + ); + process.exit(1); + } + + // Verify authentication after user confirms login + try { + await execAsync('stripe config --list'); + console.log('Stripe CLI authentication confirmed.'); + } catch (error) { + console.error( + 'Failed to verify Stripe CLI authentication. Please try again.' + ); + process.exit(1); + } + } + } catch (error) { + console.error( + 'Stripe CLI is not installed. Please install it and try again.' + ); + console.log('To install Stripe CLI, follow these steps:'); + console.log('1. Visit: https://docs.stripe.com/stripe-cli'); + console.log( + '2. Download and install the Stripe CLI for your operating system' + ); + console.log('3. After installation, run: stripe login'); + console.log( + 'After installation and authentication, please run this setup script again.' + ); + process.exit(1); + } +} + +async function getPostgresURL(): Promise { + console.log('Step 2: Setting up Postgres'); + const dbChoice = await question( + 'Do you want to use a local Postgres instance with Docker (L) or a remote Postgres instance (R)? (L/R): ' + ); + + if (dbChoice.toLowerCase() === 'l') { + console.log('Setting up local Postgres instance with Docker...'); + await setupLocalPostgres(); + return 'postgres://postgres:postgres@localhost:54322/postgres'; + } else { + console.log( + 'You can find Postgres databases at: https://vercel.com/marketplace?category=databases' + ); + return await question('Enter your POSTGRES_URL: '); + } +} + +async function setupLocalPostgres() { + console.log('Checking if Docker is installed...'); + try { + await execAsync('docker --version'); + console.log('Docker is installed.'); + } catch (error) { + console.error( + 'Docker is not installed. Please install Docker and try again.' + ); + console.log( + 'To install Docker, visit: https://docs.docker.com/get-docker/' + ); + process.exit(1); + } + + console.log('Creating docker-compose.yml file...'); + const dockerComposeContent = ` +services: + postgres: + image: postgres:16.4-alpine + container_name: next_saas_starter_postgres + environment: + POSTGRES_DB: postgres + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + ports: + - "54322:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + +volumes: + postgres_data: +`; + + await fs.writeFile( + path.join(process.cwd(), 'docker-compose.yml'), + dockerComposeContent + ); + console.log('docker-compose.yml file created.'); + + console.log('Starting Docker container with `docker compose up -d`...'); + try { + await execAsync('docker compose up -d'); + console.log('Docker container started successfully.'); + } catch (error) { + console.error( + 'Failed to start Docker container. Please check your Docker installation and try again.' + ); + process.exit(1); + } +} + +async function getStripeSecretKey(): Promise { + console.log('Step 3: Getting Stripe Secret Key'); + console.log( + 'You can find your Stripe Secret Key at: https://dashboard.stripe.com/test/apikeys' + ); + return await question('Enter your Stripe Secret Key: '); +} + +async function createStripeWebhook(): Promise { + console.log('Step 4: Creating Stripe webhook...'); + try { + const { stdout } = await execAsync('stripe listen --print-secret'); + const match = stdout.match(/whsec_[a-zA-Z0-9]+/); + if (!match) { + throw new Error('Failed to extract Stripe webhook secret'); + } + console.log('Stripe webhook created.'); + return match[0]; + } catch (error) { + console.error( + 'Failed to create Stripe webhook. Check your Stripe CLI installation and permissions.' + ); + if (os.platform() === 'win32') { + console.log( + 'Note: On Windows, you may need to run this script as an administrator.' + ); + } + throw error; + } +} + +function generateAuthSecret(): string { + console.log('Step 5: Generating AUTH_SECRET...'); + return crypto.randomBytes(32).toString('hex'); +} + +async function writeEnvFile(envVars: Record) { + console.log('Step 6: Writing environment variables to .env'); + const envContent = Object.entries(envVars) + .map(([key, value]) => `${key}=${value}`) + .join('\n'); + + await fs.writeFile(path.join(process.cwd(), '.env'), envContent); + console.log('.env file created with the necessary variables.'); +} + +async function main() { + await checkStripeCLI(); + + const POSTGRES_URL = await getPostgresURL(); + const STRIPE_SECRET_KEY = await getStripeSecretKey(); + const STRIPE_WEBHOOK_SECRET = await createStripeWebhook(); + const BASE_URL = 'http://localhost:3000'; + const AUTH_SECRET = generateAuthSecret(); + + await writeEnvFile({ + POSTGRES_URL, + STRIPE_SECRET_KEY, + STRIPE_WEBHOOK_SECRET, + BASE_URL, + AUTH_SECRET, + }); + + console.log('🎉 Setup completed successfully!'); +} + +main().catch(console.error); diff --git a/skills/outpost/examples/nextjs-saas/lib/outpost/auth.ts b/skills/outpost/examples/nextjs-saas/lib/outpost/auth.ts new file mode 100644 index 0000000..a7fa387 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/outpost/auth.ts @@ -0,0 +1,18 @@ +import 'server-only'; +import { getUser } from '@/lib/db/queries'; +import { getUserWithTeam } from '@/lib/db/queries'; +import { toTenantId } from './client'; + +/** + * Resolves the signed-in user's Outpost tenant ID from their team membership. + * Returns null if the user is not authenticated or not in a team. + */ +export async function getOutpostTenantId(): Promise { + const user = await getUser(); + if (!user) return null; + + const userWithTeam = await getUserWithTeam(user.id); + if (!userWithTeam?.teamId) return null; + + return toTenantId(userWithTeam.teamId); +} diff --git a/skills/outpost/examples/nextjs-saas/lib/outpost/client.ts b/skills/outpost/examples/nextjs-saas/lib/outpost/client.ts new file mode 100644 index 0000000..a4e724d --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/outpost/client.ts @@ -0,0 +1,17 @@ +import 'server-only'; +import { Outpost } from '@hookdeck/outpost-sdk'; + +// Singleton Outpost admin client. +// The OUTPOST_API_KEY must be kept server-side only — never expose it to the browser. +// SDK default serverURL for Hookdeck-hosted Outpost matches official quickstarts; confirm at https://hookdeck.com/docs/outpost/quickstarts/hookdeck-outpost-typescript +export const outpost = new Outpost({ + apiKey: process.env.OUTPOST_API_KEY!, +}); + +/** + * Map an internal team ID to an Outpost tenant ID. + * Using a stable string prefix ensures IDs don't collide with other resources. + */ +export function toTenantId(teamId: number): string { + return String(teamId); +} diff --git a/skills/outpost/examples/nextjs-saas/lib/outpost/destination-types-wire.test.ts b/skills/outpost/examples/nextjs-saas/lib/outpost/destination-types-wire.test.ts new file mode 100644 index 0000000..130d399 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/outpost/destination-types-wire.test.ts @@ -0,0 +1,34 @@ +import { describe, it, expect } from 'vitest'; +import { normalizeDestinationTypesPayload } from './destination-types-wire'; + +describe('normalizeDestinationTypesPayload', () => { + it('returns empty array for non-array input', () => { + expect(normalizeDestinationTypesPayload(null)).toEqual([]); + expect(normalizeDestinationTypesPayload({})).toEqual([]); + }); + + it('maps snake_case API fields to camelCase', () => { + const raw = [ + { + type: 'webhook', + label: 'Webhook', + remote_setup_url: 'https://example.com/setup', + config_fields: [ + { + key: 'url', + type: 'string', + label: 'URL', + required: true, + sensitive: false, + }, + ], + credential_fields: [], + }, + ]; + const out = normalizeDestinationTypesPayload(raw); + expect(out).toHaveLength(1); + expect(out[0].type).toBe('webhook'); + expect(out[0].remoteSetupUrl).toBe('https://example.com/setup'); + expect(out[0].configFields?.[0].key).toBe('url'); + }); +}); diff --git a/skills/outpost/examples/nextjs-saas/lib/outpost/destination-types-wire.ts b/skills/outpost/examples/nextjs-saas/lib/outpost/destination-types-wire.ts new file mode 100644 index 0000000..f395096 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/outpost/destination-types-wire.ts @@ -0,0 +1,92 @@ +/** + * Normalize GET /destination-types JSON into the camelCase shape the dashboard expects. + * The Outpost API includes `key` on each schema field; the published SDK still strips it + * when parsing — this path keeps `key` so config/credentials payloads match the API. + */ + +export type DestinationTypeFieldWire = { + key?: string; + type: string; + label?: string; + description?: string; + required: boolean; + sensitive?: boolean; + default?: string; + minlength?: number; + maxlength?: number; + pattern?: string; + options?: Array<{ label: string; value: string }>; +}; + +export type DestinationTypeWire = { + type?: string; + label?: string; + description?: string; + icon?: string; + instructions?: string; + remoteSetupUrl?: string; + configFields?: DestinationTypeFieldWire[]; + credentialFields?: DestinationTypeFieldWire[]; +}; + +type RawField = { + key?: string; + type: string; + label?: string; + description?: string; + required: boolean; + sensitive?: boolean; + default?: string; + minlength?: number; + maxlength?: number; + pattern?: string; + options?: Array<{ label: string; value: string }>; +}; + +type RawDestinationType = { + type?: string; + label?: string; + description?: string; + icon?: string; + instructions?: string; + remote_setup_url?: string; + config_fields?: RawField[]; + credential_fields?: RawField[]; +}; + +function mapField(f: RawField): DestinationTypeFieldWire { + return { + key: f.key, + type: f.type, + label: f.label, + description: f.description, + required: f.required, + sensitive: f.sensitive, + default: f.default, + minlength: f.minlength, + maxlength: f.maxlength, + pattern: f.pattern, + options: f.options, + }; +} + +export function normalizeDestinationTypesPayload( + raw: unknown, +): DestinationTypeWire[] { + if (!Array.isArray(raw)) { + return []; + } + return raw.map((item): DestinationTypeWire => { + const t = item as RawDestinationType; + return { + type: t.type, + label: t.label, + description: t.description, + icon: t.icon, + instructions: t.instructions, + remoteSetupUrl: t.remote_setup_url, + configFields: t.config_fields?.map(mapField), + credentialFields: t.credential_fields?.map(mapField), + }; + }); +} diff --git a/skills/outpost/examples/nextjs-saas/lib/outpost/index.ts b/skills/outpost/examples/nextjs-saas/lib/outpost/index.ts new file mode 100644 index 0000000..e8c8971 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/outpost/index.ts @@ -0,0 +1,39 @@ +import 'server-only'; +import { outpost, toTenantId } from './client'; + +export { toTenantId }; + +/** + * Idempotently ensures an Outpost tenant exists for the given team. + * Call this whenever a team is created or on first contact. + */ +export async function upsertTenant(teamId: number): Promise { + try { + await outpost.tenants.upsert(toTenantId(teamId)); + } catch (err) { + console.error('[outpost] Failed to upsert tenant', teamId, err); + // Non-fatal: don't block the main flow + } +} + +/** + * Publish a typed event for a team's tenant. + * Topic must exist in the Outpost project. + */ +export async function publishEvent( + teamId: number, + topic: string, + data: Record +): Promise { + try { + await outpost.publish.event({ + tenantId: toTenantId(teamId), + topic, + eligibleForRetry: true, + data, + }); + } catch (err) { + console.error('[outpost] Failed to publish event', topic, teamId, err); + // Non-fatal: don't block the main flow + } +} diff --git a/skills/outpost/examples/nextjs-saas/lib/payments/actions.ts b/skills/outpost/examples/nextjs-saas/lib/payments/actions.ts new file mode 100644 index 0000000..26492c1 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/payments/actions.ts @@ -0,0 +1,15 @@ +'use server'; + +import { redirect } from 'next/navigation'; +import { createCheckoutSession, createCustomerPortalSession } from './stripe'; +import { withTeam } from '@/lib/auth/middleware'; + +export const checkoutAction = withTeam(async (formData, team) => { + const priceId = formData.get('priceId') as string; + await createCheckoutSession({ team: team, priceId }); +}); + +export const customerPortalAction = withTeam(async (_, team) => { + const portalSession = await createCustomerPortalSession(team); + redirect(portalSession.url); +}); diff --git a/skills/outpost/examples/nextjs-saas/lib/payments/stripe.ts b/skills/outpost/examples/nextjs-saas/lib/payments/stripe.ts new file mode 100644 index 0000000..4f90dce --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/payments/stripe.ts @@ -0,0 +1,304 @@ +import Stripe from 'stripe'; +import { redirect } from 'next/navigation'; +import { Team } from '@/lib/db/schema'; +import { + getTeamByStripeCustomerId, + getUser, + updateTeamSubscription +} from '@/lib/db/queries'; +import { publishEvent } from '@/lib/outpost'; + +/** Explicit opt-out of Stripe API (pricing uses placeholders; checkout/webhooks disabled). */ +export function isStripeMockMode(): boolean { + return process.env.STRIPE_MOCK === '1'; +} + +/** + * Use placeholder catalog when Stripe is explicitly mocked, or in development + * when no secret key is set (so `next dev` works without a Stripe account). + * Production builds still require a real key or STRIPE_MOCK=1 for `/pricing`. + */ +export function useStripeCatalogMock(): boolean { + if (isStripeMockMode()) return true; + if (process.env.STRIPE_SECRET_KEY?.trim()) return false; + return process.env.NODE_ENV !== 'production'; +} + +let stripeClient: Stripe | null = null; + +export function getStripe(): Stripe { + if (isStripeMockMode()) { + throw new Error( + 'Stripe API is disabled (STRIPE_MOCK=1). Unset STRIPE_MOCK and set STRIPE_SECRET_KEY for checkout and webhooks.' + ); + } + const key = process.env.STRIPE_SECRET_KEY?.trim(); + if (!key) { + throw new Error( + 'STRIPE_SECRET_KEY is required for checkout and webhooks. For local UI without Stripe, set STRIPE_MOCK=1 or rely on dev catalog mock (see README).' + ); + } + if (!stripeClient) { + stripeClient = new Stripe(key, { + apiVersion: '2025-08-27.basil' + }); + } + return stripeClient; +} + +type StripeProductSummary = { + id: string; + name: string; + description: string | null; + defaultPriceId: string | undefined; +}; + +type StripePriceSummary = { + id: string; + productId: string; + unitAmount: number | null; + currency: string; + interval: string | undefined; + trialPeriodDays: number | null | undefined; +}; + +function mockStripeProducts(): StripeProductSummary[] { + return [ + { + id: 'prod_mock_base', + name: 'Base', + description: 'Mock product (STRIPE_MOCK or dev without key)', + defaultPriceId: 'price_mock_base' + }, + { + id: 'prod_mock_plus', + name: 'Plus', + description: 'Mock product (STRIPE_MOCK or dev without key)', + defaultPriceId: 'price_mock_plus' + } + ]; +} + +function mockStripePrices(): StripePriceSummary[] { + return [ + { + id: 'price_mock_base', + productId: 'prod_mock_base', + unitAmount: 800, + currency: 'usd', + interval: 'month' as const, + trialPeriodDays: 7 + }, + { + id: 'price_mock_plus', + productId: 'prod_mock_plus', + unitAmount: 1200, + currency: 'usd', + interval: 'month' as const, + trialPeriodDays: 7 + } + ]; +} + +export async function createCheckoutSession({ + team, + priceId +}: { + team: Team | null; + priceId: string; +}) { + if (isStripeMockMode() || !process.env.STRIPE_SECRET_KEY?.trim()) { + redirect('/dashboard?notice=stripe-disabled'); + } + + const user = await getUser(); + const stripe = getStripe(); + + if (!team || !user) { + redirect(`/sign-up?redirect=checkout&priceId=${priceId}`); + } + + const session = await stripe.checkout.sessions.create({ + payment_method_types: ['card'], + line_items: [ + { + price: priceId, + quantity: 1 + } + ], + mode: 'subscription', + success_url: `${process.env.BASE_URL}/api/stripe/checkout?session_id={CHECKOUT_SESSION_ID}`, + cancel_url: `${process.env.BASE_URL}/pricing`, + customer: team.stripeCustomerId || undefined, + client_reference_id: user.id.toString(), + allow_promotion_codes: true, + subscription_data: { + trial_period_days: 14 + } + }); + + redirect(session.url!); +} + +export async function createCustomerPortalSession(team: Team) { + if (isStripeMockMode() || !process.env.STRIPE_SECRET_KEY?.trim()) { + redirect('/dashboard?notice=stripe-disabled'); + } + + const stripe = getStripe(); + + if (!team.stripeCustomerId || !team.stripeProductId) { + redirect('/pricing'); + } + + let configuration: Stripe.BillingPortal.Configuration; + const configurations = await stripe.billingPortal.configurations.list(); + + if (configurations.data.length > 0) { + configuration = configurations.data[0]; + } else { + const product = await stripe.products.retrieve(team.stripeProductId); + if (!product.active) { + throw new Error("Team's product is not active in Stripe"); + } + + const prices = await stripe.prices.list({ + product: product.id, + active: true + }); + if (prices.data.length === 0) { + throw new Error("No active prices found for the team's product"); + } + + configuration = await stripe.billingPortal.configurations.create({ + business_profile: { + headline: 'Manage your subscription' + }, + features: { + subscription_update: { + enabled: true, + default_allowed_updates: ['price', 'quantity', 'promotion_code'], + proration_behavior: 'create_prorations', + products: [ + { + product: product.id, + prices: prices.data.map((price) => price.id) + } + ] + }, + subscription_cancel: { + enabled: true, + mode: 'at_period_end', + cancellation_reason: { + enabled: true, + options: [ + 'too_expensive', + 'missing_features', + 'switched_service', + 'unused', + 'other' + ] + } + }, + payment_method_update: { + enabled: true + } + } + }); + } + + return stripe.billingPortal.sessions.create({ + customer: team.stripeCustomerId, + return_url: `${process.env.BASE_URL}/dashboard`, + configuration: configuration.id + }); +} + +export async function handleSubscriptionChange( + subscription: Stripe.Subscription +) { + const customerId = subscription.customer as string; + const subscriptionId = subscription.id; + const status = subscription.status; + + const team = await getTeamByStripeCustomerId(customerId); + + if (!team) { + console.error('Team not found for Stripe customer:', customerId); + return; + } + + let planName: string | null = null; + + if (status === 'active' || status === 'trialing') { + const plan = subscription.items.data[0]?.plan; + planName = (plan?.product as Stripe.Product).name ?? null; + await updateTeamSubscription(team.id, { + stripeSubscriptionId: subscriptionId, + stripeProductId: plan?.product as string, + planName, + subscriptionStatus: status + }); + } else if (status === 'canceled' || status === 'unpaid') { + await updateTeamSubscription(team.id, { + stripeSubscriptionId: null, + stripeProductId: null, + planName: null, + subscriptionStatus: status + }); + } + + // Notify subscribers that the team's subscription changed. + // Topic: subscription.updated (add to Outpost project if not present) + await publishEvent(team.id, 'subscription.updated', { + teamId: team.id, + status, + planName, + subscriptionId, + }); +} + +export async function getStripePrices(): Promise { + if (useStripeCatalogMock()) { + return mockStripePrices(); + } + + const stripe = getStripe(); + const prices = await stripe.prices.list({ + expand: ['data.product'], + active: true, + type: 'recurring' + }); + + return prices.data.map((price) => ({ + id: price.id, + productId: + typeof price.product === 'string' ? price.product : price.product.id, + unitAmount: price.unit_amount, + currency: price.currency, + interval: price.recurring?.interval, + trialPeriodDays: price.recurring?.trial_period_days + })); +} + +export async function getStripeProducts(): Promise { + if (useStripeCatalogMock()) { + return mockStripeProducts(); + } + + const stripe = getStripe(); + const products = await stripe.products.list({ + active: true, + expand: ['data.default_price'] + }); + + return products.data.map((product) => ({ + id: product.id, + name: product.name, + description: product.description, + defaultPriceId: + typeof product.default_price === 'string' + ? product.default_price + : product.default_price?.id + })); +} diff --git a/skills/outpost/examples/nextjs-saas/lib/utils.ts b/skills/outpost/examples/nextjs-saas/lib/utils.ts new file mode 100644 index 0000000..bd0c391 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/lib/utils.ts @@ -0,0 +1,6 @@ +import { clsx, type ClassValue } from "clsx" +import { twMerge } from "tailwind-merge" + +export function cn(...inputs: ClassValue[]) { + return twMerge(clsx(inputs)) +} diff --git a/skills/outpost/examples/nextjs-saas/middleware.ts b/skills/outpost/examples/nextjs-saas/middleware.ts new file mode 100644 index 0000000..1c8a638 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/middleware.ts @@ -0,0 +1,49 @@ +import { NextResponse } from 'next/server'; +import type { NextRequest } from 'next/server'; +import { signToken, verifyToken } from '@/lib/auth/session'; + +const protectedRoutes = '/dashboard'; + +export async function middleware(request: NextRequest) { + const { pathname } = request.nextUrl; + const sessionCookie = request.cookies.get('session'); + const isProtectedRoute = pathname.startsWith(protectedRoutes); + + if (isProtectedRoute && !sessionCookie) { + return NextResponse.redirect(new URL('/sign-in', request.url)); + } + + let res = NextResponse.next(); + + if (sessionCookie && request.method === 'GET') { + try { + const parsed = await verifyToken(sessionCookie.value); + const expiresInOneDay = new Date(Date.now() + 24 * 60 * 60 * 1000); + + res.cookies.set({ + name: 'session', + value: await signToken({ + ...parsed, + expires: expiresInOneDay.toISOString() + }), + httpOnly: true, + secure: true, + sameSite: 'lax', + expires: expiresInOneDay + }); + } catch (error) { + console.error('Error updating session:', error); + res.cookies.delete('session'); + if (isProtectedRoute) { + return NextResponse.redirect(new URL('/sign-in', request.url)); + } + } + } + + return res; +} + +export const config = { + matcher: ['/((?!api|_next/static|_next/image|favicon.ico).*)'], + runtime: 'nodejs' +}; diff --git a/skills/outpost/examples/nextjs-saas/next.config.ts b/skills/outpost/examples/nextjs-saas/next.config.ts new file mode 100644 index 0000000..e4617d5 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/next.config.ts @@ -0,0 +1,10 @@ +import type { NextConfig } from 'next'; + +const nextConfig: NextConfig = { + experimental: { + ppr: true, + clientSegmentCache: true + } +}; + +export default nextConfig; diff --git a/skills/outpost/examples/nextjs-saas/package.json b/skills/outpost/examples/nextjs-saas/package.json new file mode 100644 index 0000000..6c4162b --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/package.json @@ -0,0 +1,47 @@ +{ + "private": true, + "scripts": { + "dev": "next dev --turbopack", + "build": "next build", + "start": "next start", + "db:setup": "npx tsx lib/db/setup.ts", + "db:seed": "npx tsx lib/db/seed.ts", + "db:generate": "drizzle-kit generate", + "db:migrate": "drizzle-kit migrate", + "db:studio": "drizzle-kit studio", + "test": "vitest run" + }, + "dependencies": { + "@hookdeck/outpost-sdk": "^1.0.0", + "@tailwindcss/postcss": "4.1.7", + "@types/node": "^22.15.18", + "@types/react": "19.1.4", + "@types/react-dom": "19.1.5", + "autoprefixer": "^10.4.21", + "bcryptjs": "^3.0.2", + "class-variance-authority": "^0.7.1", + "clsx": "^2.1.1", + "dotenv": "^16.5.0", + "drizzle-kit": "^0.31.1", + "drizzle-orm": "^0.43.1", + "jose": "^6.0.11", + "lucide-react": "^0.511.0", + "next": "15.6.0-canary.59", + "postcss": "^8.5.3", + "postgres": "^3.4.5", + "radix-ui": "^1.4.2", + "react": "19.1.0", + "react-dom": "19.1.0", + "server-only": "^0.0.1", + "stripe": "^18.1.0", + "swr": "^2.3.3", + "tailwind-merge": "^3.3.0", + "tailwindcss": "4.1.7", + "tw-animate-css": "^1.3.0", + "typescript": "^5.8.3", + "zod": "^3.24.4" + }, + "devDependencies": { + "vitest": "^1.6.0" + } +} diff --git a/skills/outpost/examples/nextjs-saas/postcss.config.mjs b/skills/outpost/examples/nextjs-saas/postcss.config.mjs new file mode 100644 index 0000000..a34a3d5 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/postcss.config.mjs @@ -0,0 +1,5 @@ +export default { + plugins: { + '@tailwindcss/postcss': {}, + }, +}; diff --git a/skills/outpost/examples/nextjs-saas/tsconfig.json b/skills/outpost/examples/nextjs-saas/tsconfig.json new file mode 100644 index 0000000..dd9a51f --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/tsconfig.json @@ -0,0 +1,42 @@ +{ + "compilerOptions": { + "target": "ESNext", + "lib": [ + "dom", + "dom.iterable", + "esnext" + ], + "allowJs": true, + "skipLibCheck": true, + "strict": true, + "noEmit": true, + "esModuleInterop": true, + "module": "esnext", + "moduleResolution": "bundler", + "resolveJsonModule": true, + "isolatedModules": true, + "jsx": "react-jsx", + "incremental": true, + "baseUrl": ".", + "plugins": [ + { + "name": "next" + } + ], + "paths": { + "@/*": [ + "./*" + ] + } + }, + "include": [ + "next-env.d.ts", + "**/*.ts", + "**/*.tsx", + ".next/types/**/*.ts", + ".next/dev/types/**/*.ts" + ], + "exclude": [ + "node_modules" + ] +} diff --git a/skills/outpost/examples/nextjs-saas/vitest.config.ts b/skills/outpost/examples/nextjs-saas/vitest.config.ts new file mode 100644 index 0000000..8e730d5 --- /dev/null +++ b/skills/outpost/examples/nextjs-saas/vitest.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + globals: true, + environment: 'node', + }, +}); diff --git a/skills/outpost/references/fastapi-saas-integration-map.md b/skills/outpost/references/fastapi-saas-integration-map.md new file mode 100644 index 0000000..8ea0aa6 --- /dev/null +++ b/skills/outpost/references/fastapi-saas-integration-map.md @@ -0,0 +1,41 @@ +# FastAPI SaaS example — Outpost integration map + +Use this file **before** browsing the full [examples/fastapi-saas/](../examples/fastapi-saas/) tree. The template is a **full-stack** app (FastAPI + React + Docker). For Outpost work, focus on the **backend BFF** and **domain publish** sites below—open deeper files only when they match the task. + +**Do not paste these files into another repository.** Reproduce the same **behavior** (env vars, `outpost.py` BFF patterns, domain publish via `httpx` to `POST …/publish`) in the user’s FastAPI layout and settings system. For FastAPI, `httpx`, and `outpost_sdk` **1.x** pins, see [SKILL.md](../SKILL.md#full-stack-reference-examples-advanced) and [examples/fastapi-saas/backend/pyproject.toml](../examples/fastapi-saas/backend/pyproject.toml). + +--- + +## Mental model + +1. **`OUTPOST_API_KEY`** — Server-only (see `app/core/config.py`). +2. **Tenant** — Here, `tenant_id = str(user.id)` (one Outpost tenant per signed-up user). +3. **Browser → your API → Outpost** — Frontend calls `/api/v1/outpost/...` (via generated client / OpenAPI); FastAPI routes in `outpost.py` proxy to Hookdeck Outpost with the admin key. +4. **Domain publish** — Real events (e.g. signup) use `httpx` to `POST …/publish` in background tasks, separate from the dashboard **test-publish** endpoint. + +--- + +## Start here (backend) + +| Goal | File(s) | +|------|---------| +| Env defaults (`OUTPOST_API_KEY`, `OUTPOST_API_BASE_URL`) | [backend/app/core/config.py](../examples/fastapi-saas/backend/app/core/config.py) | +| BFF routes: headers, `_base()`, error mapping, destinations, attempts, retry, test publish | [backend/app/api/routes/outpost.py](../examples/fastapi-saas/backend/app/api/routes/outpost.py) | +| Router registration | [backend/app/api/main.py](../examples/fastapi-saas/backend/app/api/main.py) | +| Signup → `user.created` publish (background task) | [backend/app/api/routes/users.py](../examples/fastapi-saas/backend/app/api/routes/users.py) (`_publish_user_created`) | +| Standalone wire tests (no DB, no live Outpost) | [backend/test_outpost_wire.py](../examples/fastapi-saas/backend/test_outpost_wire.py) — mirrors `_raise_for_outpost` in `outpost.py` | + +--- + +## Frontend (optional) + +The React app adds dashboard pages that call the OpenAPI client for `/outpost/*`. Only open the frontend when the task is UI-specific; search `outpost` or `webhooks` under [frontend/src/](../examples/fastapi-saas/frontend/src/) after reading `outpost.py`. + +--- + +## What to skip unless asked + +- Docker Compose, Traefik, Playwright E2E, generic CRUD (`items`, etc.). +- Full `alembic/` migrations unless aligning tenant identity with your own schema. + +This keeps Outpost integration scoped to a **small set of backend files** plus optional frontend routes. diff --git a/skills/outpost/references/nextjs-saas-integration-map.md b/skills/outpost/references/nextjs-saas-integration-map.md new file mode 100644 index 0000000..8767b3d --- /dev/null +++ b/skills/outpost/references/nextjs-saas-integration-map.md @@ -0,0 +1,83 @@ +# Next.js SaaS example — Outpost integration map + +Use this file **before** opening the full [examples/nextjs-saas/](../examples/nextjs-saas/) tree. The example is a **complete SaaS app** (auth, Stripe, Drizzle, dashboard UI). Agents should treat it as a **patterns reference**, not required reading end-to-end. + +**Do not paste these files into another repository.** Reproduce the same **behavior** (env vars, BFF routes under `app/api/outpost/*`, `lib/outpost` helpers, publish call sites) using the target app’s own layout and dependencies. For framework and SDK versions (including Next canary and `@hookdeck/outpost-sdk` **1.x**), see [SKILL.md](../SKILL.md#full-stack-reference-examples-advanced) and the canonical pins in [examples/nextjs-saas/package.json](../examples/nextjs-saas/package.json). + +**Why a map:** Keep the entry skill small—use `SKILL.md` for overview and this map for **specific files** per job; drill into the tree only when the task needs that file. + +--- + +## Mental model + +1. **Admin API key** — Only on the server (`OUTPOST_API_KEY`). Never exposed to the browser. +2. **Tenant** — One Outpost tenant per customer team (here: `String(team.id)` via `toTenantId`). +3. **Publish** — Domain code calls `publishEvent(teamId, topic, data)` after real business actions. +4. **Destinations UI** — Dashboard talks to **your** BFF routes under `app/api/outpost/*`, which call Outpost with the admin key. + +You do **not** need to read marketing pages, login forms, or generic CRUD to understand Outpost wiring. + +--- + +## Start here (small set of files) + +| Goal | File(s) | Notes | +|------|---------|--------| +| SDK singleton + base URL + `toTenantId` | [lib/outpost/client.ts](../examples/nextjs-saas/lib/outpost/client.ts) | `import 'server-only'`; `@hookdeck/outpost-sdk` | +| Upsert tenant + publish helper | [lib/outpost/index.ts](../examples/nextjs-saas/lib/outpost/index.ts) | `upsertTenant`, `publishEvent` → `outpost.publish.event` | +| Map session → tenant id for API routes | [lib/outpost/auth.ts](../examples/nextjs-saas/lib/outpost/auth.ts) | Used by all BFF routes | +| Normalize destination-type API (optional) | [lib/outpost/destination-types-wire.ts](../examples/nextjs-saas/lib/outpost/destination-types-wire.ts) | Keeps `key` on schema fields when SDK omits them | +| Vitest for wire helper | [lib/outpost/destination-types-wire.test.ts](../examples/nextjs-saas/lib/outpost/destination-types-wire.test.ts) | No live Outpost required | + +--- + +## BFF API routes (`app/api/outpost/`) + +All proxy Outpost using the server key; the client only calls same-origin `/api/outpost/...`. + +| Route area | Purpose | +|------------|---------| +| [destination-types/route.ts](../examples/nextjs-saas/app/api/outpost/destination-types/route.ts) | List destination types for the create-destination UI | +| [topics/route.ts](../examples/nextjs-saas/app/api/outpost/topics/route.ts) | Topics for checkboxes / validation | +| [destinations/route.ts](../examples/nextjs-saas/app/api/outpost/destinations/route.ts), [destinations/[id]/route.ts](../examples/nextjs-saas/app/api/outpost/destinations/[id]/route.ts) | CRUD destinations for current tenant | +| [test-publish/route.ts](../examples/nextjs-saas/app/api/outpost/test-publish/route.ts) | Synthetic publish for dashboard testing | +| [events/route.ts](../examples/nextjs-saas/app/api/outpost/events/route.ts), [events/[id]/attempts/route.ts](../examples/nextjs-saas/app/api/outpost/events/[id]/attempts/route.ts) | Activity / attempts | +| [retry/route.ts](../examples/nextjs-saas/app/api/outpost/retry/route.ts) | Manual retry | + +Open only the route that matches the user’s task (e.g. “add destinations list” → `destinations` + `auth`). + +--- + +## Where domain events are published + +| Area | File | Topics / notes | +|------|------|----------------| +| Auth / sign-up | [app/(login)/actions.ts](../examples/nextjs-saas/app/(login)/actions.ts) | `user.created`, team member invite/remove, etc. | +| Stripe webhooks | [lib/payments/stripe.ts](../examples/nextjs-saas/lib/payments/stripe.ts) | e.g. `subscription.updated` | + +Search the repo for `publishEvent(` or `outpost.publish` if you need every call site. + +--- + +## Dashboard UI (optional) + +- [app/(dashboard)/dashboard/destinations/page.tsx](../examples/nextjs-saas/app/(dashboard)/dashboard/destinations/page.tsx) — Customer-facing destinations + test publish UX. Read **after** the `lib/outpost` + `app/api/outpost` layers if the task is UI-specific. + +--- + +## Env vars (see also example README) + +| Variable | Role | +|----------|------| +| `OUTPOST_API_KEY` | Admin API key (server only) | +| `OUTPOST_API_BASE_URL` | Override API base (managed vs self-hosted) | + +--- + +## What to skip unless asked + +- `app/(dashboard)/` pages other than destinations (general, security, activity) for pure “wire Outpost” tasks. +- `lib/db/*` except when aligning tenant id with your own schema. +- Stripe and seed scripts unless the task touches subscription-driven publishes. + +This keeps integration work scoped to **~10 files** instead of the whole application. diff --git a/skills/outpost/references/outpost-quickstarts.md b/skills/outpost/references/outpost-quickstarts.md new file mode 100644 index 0000000..5997366 --- /dev/null +++ b/skills/outpost/references/outpost-quickstarts.md @@ -0,0 +1,19 @@ +# Outpost quickstarts (canonical links) + +**Full doc tree (agents):** [https://hookdeck.com/docs/outpost/llms.txt](https://hookdeck.com/docs/outpost/llms.txt) — plain-text index of pages as `.md` URLs; fetch once when you need breadth. + +Base: `https://hookdeck.com/docs/outpost` (no trailing slash). Paths match the [dashboard agent prompt](https://github.com/hookdeck/outpost/blob/main/docs/agent-evaluation/hookdeck-outpost-agent-prompt.md) `{{DOCS_URL}}` pattern. + +| Path | Use | +|------|-----| +| [/quickstarts/hookdeck-outpost-curl](https://hookdeck.com/docs/outpost/quickstarts/hookdeck-outpost-curl) | Smallest path: HTTP / curl only | +| [/quickstarts/hookdeck-outpost-typescript](https://hookdeck.com/docs/outpost/quickstarts/hookdeck-outpost-typescript) | TypeScript SDK (`@hookdeck/outpost-sdk`) | +| [/quickstarts/hookdeck-outpost-python](https://hookdeck.com/docs/outpost/quickstarts/hookdeck-outpost-python) | Python SDK (`outpost_sdk`) | +| [/quickstarts/hookdeck-outpost-go](https://hookdeck.com/docs/outpost/quickstarts/hookdeck-outpost-go) | Go SDK | + +For self-hosted deployment: + +- [Docker quickstart](https://hookdeck.com/docs/outpost/self-hosting/quickstarts/docker) +- [Kubernetes quickstart](https://hookdeck.com/docs/outpost/self-hosting/quickstarts/kubernetes) +- [Railway quickstart](https://hookdeck.com/docs/outpost/self-hosting/quickstarts/railway) +- [Self-hosting configuration](https://hookdeck.com/docs/outpost/self-hosting/configuration) diff --git a/skills/outpost/references/outpost-scope.md b/skills/outpost/references/outpost-scope.md new file mode 100644 index 0000000..73d0206 --- /dev/null +++ b/skills/outpost/references/outpost-scope.md @@ -0,0 +1,56 @@ +# Outpost integration scope (agent ladder) + +Condensed from the [Hookdeck Outpost agent prompt template](https://github.com/hookdeck/outpost/blob/main/docs/agent-evaluation/hookdeck-outpost-agent-prompt.md). **Placeholders** (`{{TOPICS_LIST}}`, `{{TEST_DESTINATION_URL}}`, injected API base) stay **dashboard-only**; this file is the reusable ladder for skills and chat. + +## Three paths + +1. **Quick path** — Smallest runnable artifact: one shell script (curl) or **one source file** per the official **quickstart** (`npx tsx`, `python`, `go run`, …). No app framework, no multi-route server, no dev-server “project,” unless the user clearly asked for an app. +2. **New minimal application** — A **new** small service or UI (pages, forms, demo in a browser). Official **server SDK** for the stack they name; stay framework-agnostic unless they specify a framework. +3. **Existing application** — Changes **inside their repo**. Same SDK-on-server rules; integrate on **real** domain paths. Use full-stack BFF + UI guidance when the product already has customer-facing settings. + +**Default when ambiguous:** Prefer **Quick path**. If they only name a language (“TypeScript example,” “try it”) and do **not** ask for an app, UI, pages, or repo integration → deliver **only** the quickstart-shaped artifact (or curl if no language). + +**Language ≠ architecture:** TypeScript / Python / Go pick **which quickstart and SDK**. They do **not** mean “build a web application.” + +## Mapping hints + +| They said | Likely path | +|-----------|-------------| +| “Example,” “quickstart,” “fastest,” “simplest,” “just show me,” or only a language with no app context | Quick path | +| “Small app,” “UI,” “page,” “form,” “demo site,” “dashboard” (greenfield) | New minimal application | +| “Our app,” “existing code,” “add to my API,” “integrate into this repo” | Existing application | + +When two paths seem possible, prefer **Quick path** unless they clearly want UI or repo integration. + +## Language → doc + +- No language + simplest → **curl quickstart** + OpenAPI. +- TypeScript / Node → **TypeScript quickstart** + `@hookdeck/outpost-sdk`. +- Python → **Python quickstart** + `outpost_sdk` (e.g. `publish.event` uses `request={...}`, not TS-style kwargs). +- Go → **Go quickstart** + official Go SDK. +- curl / HTTP only / REST without SDK → **curl quickstart** + OpenAPI. + +Do **not** mix argument styles across languages. + +## Test webhook destination URLs + +For a **disposable webhook URL** in examples or READMEs, default to **[Hookdeck Console](https://console.hookdeck.com)** (create a Source; use the `https://hkdk.events/…` URL as the destination URL in curl). Do **not** use fictional Hookdeck paths like `https://hookdeck.com/webhook/create`. Avoid third-party echo sites (e.g. `webhook.site`) unless the user asked for one. + +## Topic reconciliation (domain-first) + +Derive **`topic`** strings from **real state changes**. If the project’s configured topic list is missing a name the app should emit, **do not** bend the product model to fit the list—tell the operator to **add the topic in Hookdeck** and refresh their prompt/config. Only narrow publishes when they **explicitly** ask for a minimal wiring demo. + +## SDK vs OpenAPI (BFF / dashboard UI) + +- Prefer the **official server SDK** when Hookdeck provides one for the backend language ([SDKs](https://hookdeck.com/docs/outpost/sdks)). +- **Wire JSON** matches **OpenAPI** (often **snake_case**); SDKs rename in language types (e.g. TypeScript **camelCase**). +- The **browser** should see the JSON your BFF actually returns—or **normalize** (e.g. forward raw `GET /destination-types`). +- On create/update, schema fields’ **`key`** maps into `config` / `credentials` per OpenAPI. + +Detail: [Building your own UI — authentication](https://hookdeck.com/docs/outpost/guides/building-your-own-ui#authentication) and [Wire JSON, SDK responses, and your UI](https://hookdeck.com/docs/outpost/guides/building-your-own-ui#wire-json-sdk-responses-and-your-ui). + +## Minimum depth (existing app) + +1. **Topic reconciliation** — every `topic` in `publish` exists in the project **or** the operator is told exactly which topics to add. +2. **Domain publish** — at least one **`publish` on a real state-change path**, not only a synthetic test route (unless scoped to wiring-only). +3. **Same tenant mapping** everywhere you call Outpost for that customer. diff --git a/skills/outpost/references/outpost-verify.md b/skills/outpost/references/outpost-verify.md new file mode 100644 index 0000000..c066cf0 --- /dev/null +++ b/skills/outpost/references/outpost-verify.md @@ -0,0 +1,21 @@ +# Before you stop (Outpost) — trimmed checklist + +From the [Hookdeck Outpost agent prompt](https://github.com/hookdeck/outpost/blob/main/docs/agent-evaluation/hookdeck-outpost-agent-prompt.md) **Before you stop (verify)**. Apply **only** items that fit the task; skip the rest (e.g. skip full-stack items for a curl-only flow). + +## Always (when you produced or changed runnable code) + +- [ ] **Test destination URL:** If README or examples need a throwaway webhook URL, point to **[Hookdeck Console](https://console.hookdeck.com)** (Source URL)—not `https://hookdeck.com/webhook/create` (invalid) and not third-party echo sites (e.g. `webhook.site`) unless the user requested one. +- [ ] **Ran** the smallest end-to-end check that fits (script once, one new API path, or smoke the UI/API flow) and saw a clear success signal (event id, HTTP 2xx, expected output). +- [ ] **Secrets:** Platform Outpost API key stays **server-side** / **environment** only — not in client bundles, not hard-coded in committed source. +- [ ] **Repeatable:** Env vars, how to run, and how to verify are stated briefly (README, comments, or chat — match task size). + +## When editing an existing application + +- [ ] **Topic reconciliation:** Every **`topic`** in `publish` is configured in the Outpost project **or** README/chat tells the operator exactly which topics to add in Hookdeck — **domain-first**; do not retarget real features to wrong topic names unless they explicitly asked for a minimal demo scope. +- [ ] **Domain publish:** At least one **`publish` on a real application path** (entity create/update, signup, …), not solely a synthetic “test event” endpoint — unless scoped to wiring-only. +- [ ] **Test publish (if you added one):** Separate from domain logic; does not replace domain publish. +- [ ] **Build integrity:** Lockfiles, route registries, and generated outputs stay consistent so a clean install + typecheck/build (or documented CI) would pass. + +## When you added or changed customer-facing destination UI + +- [ ] **Full-stack UI bar:** Walked **Planning and contract**, **Destinations experience**, and **Activity, attempts, and retries** in [Building your own UI — implementation checklists](https://hookdeck.com/docs/outpost/guides/building-your-own-ui#implementation-checklists): list → detail → destination-scoped activity; create/edit driven by **`GET /destination-types`** (including each field’s **`key`** in `config` / `credentials`); **separate server-side test publish** when customers manage destinations. *Skip if API-only or operator excluded activity UI—then document verification instead.* diff --git a/tools/agent-scenario-tester/README.md b/tools/agent-scenario-tester/README.md index 24c334d..d785609 100644 --- a/tools/agent-scenario-tester/README.md +++ b/tools/agent-scenario-tester/README.md @@ -4,23 +4,47 @@ A tool for evaluating whether AI agents can successfully use [Hookdeck agent ski ## What This Does -The agent scenario tester installs Hookdeck's agent skills into Claude Code, gives it a realistic developer task (like "receive webhooks from Stripe"), lets the agent run, and scores the output against a rubric. It answers the question: **can agents actually use these skills to get things done?** +The agent scenario tester installs **one** Hookdeck skill into Claude Code (see `skillUnderTest` in [`scenarios.yaml`](../../scenarios.yaml)), gives it a realistic developer task, lets the agent run, and scores the output with: + +1. **Heuristic assessor** — regex-style checks on `run.log` and `README.md` (always on). +2. **Optional LLM-as-judge** — Anthropic Messages API scores the transcript against success criteria (`--judge` or `RUN_LLM_JUDGE=1`; same idea as [Outpost `llm-judge.ts`](https://github.com/hookdeck/outpost/blob/main/docs/agent-evaluation/src/llm-judge.ts)). This is Layer 2 of Hookdeck's agent skills testing — the evaluation layer. (Layer 1 is static quality linting via Tessl.) See [TESTING.md](../../TESTING.md) for the full picture. ## Why This Exists -When you build resources for AI agents, you lose the traditional feedback loops. Developers file support tickets and ask questions on Discord. Agents don't — they either succeed or silently move on. Evals are the feedback loop you get back. +When you build resources for AI agents, you lose the traditional feedback loops. Evals are the feedback loop you get back. + +Scenarios include Event Gateway flows, metrics discovery, provider composition, and **Outpost managed quickstart** (tenant → destination → publish). + +Each run produces a `report.md` with heuristic scores. With `--judge`, you also get `llm-score.json` and an **LLM judge** section appended to `report.md`. + +## `skillUnderTest` + +Per scenario, exactly **one** skill directory is copied into `.claude/skills/`: + +| Value | Installed skill | +|-------|-----------------| +| `event-gateway` | Default when omitted — existing scenarios. | +| `outpost` | Outbound Outpost skill (e.g. `outpost-managed-quickstart`). | +| `hookdeck` | Router umbrella skill. | -The tester runs three scenarios that test increasingly interesting agent behaviors: +## LLM judge (optional) -| Scenario | Tests | Key question | -|----------|-------|-------------| -| `receive-webhooks` | Core skill usage | Can the agent follow the skill to set up webhook receiving? | -| `receive-provider-webhooks` | Composition | Does the agent discover and install a Stripe-specific skill on its own? | -| `investigate-delivery-health` | Discovery | Does the agent find diagnostic tools (CLI metrics, MCP) when they aren't mentioned in the prompt? | +- **Flag:** `--judge` on `run` or `assess`. +- **Env:** `RUN_LLM_JUDGE=1` (or `true` / `yes`) to enable without passing the flag. +- **Secrets:** `ANTHROPIC_API_KEY` (required when judge runs). +- **Model:** optional `JUDGE_MODEL` or `EVAL_SCORE_MODEL`; default matches Outpost eval (`claude-sonnet-4-20250514`). -Each run produces a `report.md` scored against a rubric of 17–19 points covering: skill discovery, setup, scaffold, listen, iterate, code quality, and composition. +**Rubric source:** if the scenario defines `successCriteriaMarkdown` in YAML, that text is sent to the judge. Otherwise criteria are derived from the heuristic `evaluation.checks` list (rendered as markdown). + +**Artifacts:** `llm-score.json` (structured result) and an appended **## LLM judge** section in `report.md`. + +Judge runs **after** the heuristic report is written. It does **not** execute the agent’s shell or HTTP — it reads `run.log`, **generated text files on disk** (source, `package.json`, `.env.example`, etc., with framework entry paths first), then `README.md` when present (same signals as the heuristic assessor, including code). + +## Repo `.env` (optional) + +If a file named `.env` exists at the **agent-skills repo root** (next to `scenarios.yaml`), the CLI loads it on each command **before** subcommands run. Existing environment variables are **not** overridden. Copy [`.env.example`](../../.env.example) to `.env` and set at least `ANTHROPIC_API_KEY` when using the LLM judge (or when your Claude CLI relies on it). ## Usage @@ -28,19 +52,18 @@ Each run produces a `report.md` scored against a rubric of 17–19 points coveri ```bash ./scripts/test-agent-scenario.sh run receive-webhooks express -./scripts/test-agent-scenario.sh assess receive-provider-webhooks-express-stripe-20260212145955. +./scripts/test-agent-scenario.sh run receive-webhooks express --judge +./scripts/test-agent-scenario.sh run outpost-managed-quickstart express +./scripts/test-agent-scenario.sh assess receive-provider-webhooks-express-stripe-20260212145955. --judge ``` ### From this directory -Path to the entrypoint is **relative to cwd**. Use `src/index.ts` (not `tools/agent-scenario-tester/src/index.ts`): +Use `src/index.ts` (cwd must allow resolving `scenarios.yaml` up the tree, or run from repo root): ```bash -# list / run (from repo root is easier) npx tsx src/index.ts list -npx tsx src/index.ts assess receive-provider-webhooks-express-stripe-20260212145955. -# or -npm run assess -- receive-provider-webhooks-express-stripe-20260212145955. +npx tsx src/index.ts run outpost-managed-quickstart express --judge ``` `assess` infers scenario/framework/provider from the result directory name and updates `test-results//report.md`. @@ -49,18 +72,17 @@ npm run assess -- receive-provider-webhooks-express-stripe-20260212145955. Each scenario run produces a result directory under `test-results/` containing: -- `report.md` — Detailed rubric scoring with pass/fail for each criterion -- `transcript.json` — Full agent transcript showing every tool call, doc access, and decision -- Agent-generated code artifacts (the actual code the agent wrote) +- `report.md` — Heuristic rubric + scores; optional **LLM judge** section when enabled +- `run.log` — Full Claude Code CLI output +- `llm-score.json` — When judge ran: structured JSON (criteria, overall pass, summary) +- Agent-generated project files (Express / Next / FastAPI scaffold plus agent edits) ## The Iteration Loop -The workflow is: run scenarios, read the reports, improve the skills (or the tester itself), re-run, compare scores. CI runs these weekly to catch regressions. - -The A/B unit is **skill version x scenario set -> score delta**. Change a skill, re-run, see if agents perform better. +Run scenarios, read reports and `run.log`, improve skills or prompts, re-run, compare heuristic scores and judge summaries. ## Related -- [TESTING.md](../../TESTING.md) — Full testing strategy: both layers, scoring approaches, and context -- [Agent Skills spec](https://www.agent-skills.dev/) — The open specification for agent skills -- [Outpost agent evaluation](https://github.com/hookdeck/outpost/tree/main/docs/agent-evaluation) — The next layer up: evaluating complete onboarding journeys, not just individual skills +- [TESTING.md](../../TESTING.md) — Full testing strategy: both layers, CI, judge env vars +- [Agent Skills spec](https://www.agent-skills.dev/) +- [Outpost agent evaluation](https://github.com/hookdeck/outpost/tree/main/docs/agent-evaluation) — Onboarding / docs eval (separate product); this repo’s judge pattern is aligned with its Messages API approach diff --git a/tools/agent-scenario-tester/package.json b/tools/agent-scenario-tester/package.json index 5caa41d..695bc3b 100644 --- a/tools/agent-scenario-tester/package.json +++ b/tools/agent-scenario-tester/package.json @@ -11,13 +11,20 @@ "list": "tsx src/index.ts list", "assess": "tsx src/index.ts assess" }, - "keywords": ["testing", "agent-skills", "scenarios", "evaluation"], + "keywords": [ + "testing", + "agent-skills", + "scenarios", + "evaluation" + ], "license": "ISC", "dependencies": { "commander": "^12.0.0", + "dotenv": "^16.4.7", "js-yaml": "^4.1.0" }, "devDependencies": { + "@types/js-yaml": "^4.0.9", "@types/node": "^20.10.0", "tsx": "^4.20.6", "typescript": "^5.3.3" diff --git a/tools/agent-scenario-tester/src/assess.ts b/tools/agent-scenario-tester/src/assess.ts index 28e0f3f..dd35a30 100644 --- a/tools/agent-scenario-tester/src/assess.ts +++ b/tools/agent-scenario-tester/src/assess.ts @@ -8,6 +8,8 @@ * leaves README default, we still pass Stage 01/03. To reduce that: when the * project has a README that mentions Hookdeck, we require those checks to pass * from the README (so the agent must have documented in-repo). + * - Stage 01 first check: README must document Hookdeck CLI install with either a + * human-facing hookdeck.com/docs/cli URL (not .md) or an explicit install shell command. * - Skill discovery passes if log or readme mentions verification-code.md or * hookdeck listen — can be loose if the agent cites the skill in the reply. * - Stage 02 is code-based (handler content) so generally reliable. @@ -83,6 +85,16 @@ function getDocForSetupListen(combinedDoc: string, readme: string): string { return combinedDoc; } +/** Human-facing CLI install docs (README), not agent-facing .md fetch URLs. */ +function hookdeckCliInstallDocumented(doc: string): boolean { + const hasInstallCmd = + /brew\s+install\s+[^\n\r]*hookdeck|npm\s+(i|install)\s+(-g\s+|--global\s+|install\s+-g\s+|install\s+hookdeck-cli\s+-g\s+)hookdeck-cli|yarn\s+global\s+add\s+hookdeck-cli|scoop\s+install\s+hookdeck/i.test( + doc + ); + const hasHumanInstallLink = /hookdeck\.com\/docs\/cli(?!\.md)(?:[#?/:\w-]*)?/i.test(doc); + return hasInstallCmd || hasHumanInstallLink; +} + function passesCheck( check: string, stage: string, @@ -90,19 +102,52 @@ function passesCheck( combinedDoc: string, readme: string, handler: string, - provider?: string + provider: string | undefined, + scenario: ScenarioConfig ): boolean { const doc = combinedDoc; const setupListenDoc = getDocForSetupListen(combinedDoc, readme); const code = handler; + if (scenario.name === 'outpost-managed-quickstart') { + if (stage === 'Stage - Outpost discovery') { + if (index === 0) { + return /outpost|SKILL\.md|outpost-scope|outpost-verify|quickstart|docs\/outpost|hookdeck\.com\/docs\/outpost/i.test( + doc + ); + } + if (index === 1) { + const hasOutpost = /hookdeck\.com\/docs\/outpost|docs\/outpost|Hookdeck Outpost|Outpost API/i.test(doc); + const onlyInbound = + /hookdeck listen/i.test(doc) && + !/tenant|destination|publish|docs\/outpost|hookdeck\.com\/docs\/outpost/i.test(doc); + return hasOutpost && !onlyInbound; + } + return false; + } + if (stage === 'Stage - Outpost API path') { + if (index === 0) return /tenant|tenants|upsert/i.test(doc); + if (index === 1) return /destination|webhook|url|subscribe/i.test(doc); + if (index === 2) return /publish|ingest|\/events|POST.*event/i.test(doc); + if (index === 3) return /curl|REST|openapi|api\/v1|application\/json|x-api-key/i.test(doc); + return false; + } + if (stage === 'Stage - Outpost verify') { + if (index === 0) { + return /verif|attempt|deliver|log|activity|200|2xx|retry|Dashboard/i.test(doc); + } + if (index === 1) return /hookdeck\.com\/docs\/outpost|docs\/outpost/i.test(doc); + return false; + } + } + if (stage === 'Skill discovery') { if (index === 0) return /verification-code\.md|02-scaffold\.md|SKILL\.md|event-gateway skill/i.test(doc); if (index === 1) return /hookdeck listen|scaffold|setup|workflow/i.test(doc); return false; } if (stage === 'Stage 01 - Setup') { - if (index === 0) return /install.*hookdeck|brew.*hookdeck|npm.*hookdeck|hookdeck.*install|CLI/i.test(setupListenDoc); + if (index === 0) return hookdeckCliInstallDocumented(setupListenDoc); if (index === 1) return /hookdeck listen|hookdeck login/i.test(setupListenDoc); if (index === 2) return /Source URL|connection|Connection/i.test(setupListenDoc); return false; @@ -187,7 +232,7 @@ export function assessResult( for (const section of scenario.evaluation) { const checks: CheckResult[] = section.checks.map((check, index) => ({ check, - passed: passesCheck(check, section.stage, index, combinedDoc, readme, handler, provider), + passed: passesCheck(check, section.stage, index, combinedDoc, readme, handler, provider, scenario), })); const passedCount = checks.filter(c => c.passed).length; const score = section.checks.length > 0 ? Math.round((passedCount / section.checks.length) * section.points) : 0; diff --git a/tools/agent-scenario-tester/src/index.ts b/tools/agent-scenario-tester/src/index.ts index 54c8308..14049e6 100644 --- a/tools/agent-scenario-tester/src/index.ts +++ b/tools/agent-scenario-tester/src/index.ts @@ -6,18 +6,41 @@ import { Command } from 'commander'; import * as fs from 'fs'; -import os from 'os'; import path from 'path'; import { findRepoRoot, listScenarios, loadScenario, parseResultDirName } from './config.js'; +import { loadRepoDotenv } from './repo-dotenv.js'; +import { judgeEnabledFromEnv, runLlmJudgeAndAppendReport } from './llm-judge.js'; import { checkAll } from './preflight.js'; import { initializeProject } from './project.js'; import { listGeneratedFiles, writeReport } from './results.js'; import { runClaude } from './runner.js'; import { installSkills } from './skills.js'; -import type { Framework } from './types.js'; +import type { Framework, ScenarioConfig } from './types.js'; + +async function maybeRunLlmJudge( + resultDir: string, + scenario: ScenarioConfig, + framework: Framework, + dryRun: boolean, + judgeFlag: boolean +): Promise { + const want = (judgeFlag || judgeEnabledFromEnv()) && !dryRun; + if (!want) return; + const key = process.env.ANTHROPIC_API_KEY?.trim(); + if (!key) { + throw new Error('LLM judge requested (--judge or RUN_LLM_JUDGE=1) but ANTHROPIC_API_KEY is not set'); + } + console.log('\nRunning LLM judge (Anthropic Messages API)...'); + await runLlmJudgeAndAppendReport({ resultDir, scenario, framework, apiKey: key }); + console.log(`Wrote ${path.join(resultDir, 'llm-score.json')} and appended judge section to report.md`); +} const program = new Command(); +program.hook('preAction', () => { + loadRepoDotenv(process.cwd()); +}); + program .name('agent-scenario-tester') .description('End-to-end agent scenario testing for Hookdeck agent-skills') @@ -46,7 +69,8 @@ program .description( 'Re-run the assessor on an existing result directory and update report.md (e.g. after fixing the assessor or when handler is in src/index.js)' ) - .action((resultDirArg: string) => { + .option('--judge', 'Run LLM-as-judge after heuristics (requires ANTHROPIC_API_KEY)') + .action(async (resultDirArg: string, opts: { judge?: boolean }) => { try { const repoRoot = findRepoRoot(process.cwd()); const resultsDir = path.join(repoRoot, 'test-results'); @@ -100,6 +124,7 @@ program prompt: resolved.prompt, }; writeReport(result, reportFile); + await maybeRunLlmJudge(resultDir, resolved.config, resolved.framework, false, Boolean(opts.judge)); console.log(`Updated ${reportFile}`); console.log(` Scenario: ${resolved.config.name}, Framework: ${resolved.framework}${resolved.provider ? `, Provider: ${resolved.provider}` : ''}`); } catch (e) { @@ -115,11 +140,12 @@ program .option('--dry-run', 'Show what would be done without executing') .option('--verbose', 'Verbose Claude output') .option('--timeout ', 'Max time for Claude (default 300)', '300') + .option('--judge', 'Run LLM-as-judge after heuristics (requires ANTHROPIC_API_KEY)') .action( async ( scenarioName: string, framework: string, - opts: { provider?: string; dryRun?: boolean; verbose?: boolean; timeout?: string } + opts: { provider?: string; dryRun?: boolean; verbose?: boolean; timeout?: string; judge?: boolean } ) => { const frameworkTyped = framework as Framework; const timeoutMs = parseInt(opts.timeout ?? '300', 10) * 1000; @@ -155,6 +181,7 @@ program console.log('Installing skills...'); installSkills(resultDir, repoRoot, resolved); + console.log(`Installed skill: ${resolved.config.skillUnderTest ?? 'event-gateway'}`); console.log('Running Claude Code (this may take 2–5 minutes). Output below:\n'); const { durationSeconds } = await runClaude(resultDir, resolved.prompt, logFile, { @@ -176,6 +203,7 @@ program prompt: resolved.prompt, }; writeReport(result, reportFile); + await maybeRunLlmJudge(resultDir, resolved.config, frameworkTyped, Boolean(opts.dryRun), Boolean(opts.judge)); console.log('\n========================================'); console.log(' Test Complete'); @@ -184,6 +212,9 @@ program console.log(`Result directory (report + log + agent output): ${resultDir}`); console.log(` report.md evaluation checklist and scoring`); console.log(` run.log full Claude output`); + if (opts.judge || judgeEnabledFromEnv()) { + console.log(` llm-score.json optional LLM judge output (when enabled and not dry-run)`); + } console.log(` (rest) generated project files`); } catch (e) { console.error((e as Error).message); diff --git a/tools/agent-scenario-tester/src/llm-judge.ts b/tools/agent-scenario-tester/src/llm-judge.ts new file mode 100644 index 0000000..e3155da --- /dev/null +++ b/tools/agent-scenario-tester/src/llm-judge.ts @@ -0,0 +1,383 @@ +/** + * Optional LLM-as-judge scoring via Anthropic Messages API. + * Adapted from hookdeck/outpost docs/agent-evaluation/src/llm-judge.ts for agent-skills scenario runs: + * rubric = scenario success criteria + transcript (run.log + optional README). + */ + +import * as fs from 'fs'; +import path from 'path'; +import { listGeneratedFiles } from './results.js'; +import type { Framework, ScenarioConfig } from './types.js'; + +const ANTHROPIC_MESSAGES_URL = 'https://api.anthropic.com/v1/messages'; +export const DEFAULT_JUDGE_MODEL = 'claude-sonnet-4-20250514'; +const MAX_TRANSCRIPT_CHARS = 180_000; +/** Max chars per generated file embedded in the judge transcript. */ +const MAX_CHARS_PER_ARTIFACT = 48_000; +const SKIP_ARTIFACT_BASENAMES = new Set(['report.md', 'run.log', 'llm-score.json']); +/** Relative paths considered text for judge context (exclude images, zips, etc.). */ +const TEXT_ARTIFACT_RE = /\.(js|cjs|mjs|ts|tsx|jsx|py|json|yaml|yml|toml|mdx?|css|html|sh|txt)$/i; + +export interface LlmCriterionJudgment { + readonly criterion: string; + readonly pass: boolean; + readonly evidence: string; +} + +export interface LlmJudgeReport { + readonly version: 1; + readonly model: string; + readonly scenarioName: string; + readonly overall_transcript_pass: boolean; + readonly execution_in_transcript: { + readonly pass: boolean | null; + readonly note: string; + }; + readonly criteria: readonly LlmCriterionJudgment[]; + readonly summary: string; +} + +function stripJsonFence(text: string): string { + const t = text.trim(); + const m = t.match(/^```(?:json)?\s*([\s\S]*?)```$/m); + if (m) return m[1].trim(); + return t; +} + +function parseJudgeJson(text: string): Omit & { version?: number } { + const raw = stripJsonFence(text); + const parsed = JSON.parse(raw) as Record; + const overall = Boolean(parsed.overall_transcript_pass); + const criteriaIn = parsed.criteria; + const criteria: LlmCriterionJudgment[] = []; + if (Array.isArray(criteriaIn)) { + for (const c of criteriaIn) { + if (typeof c !== 'object' || c === null) continue; + const o = c as Record; + criteria.push({ + criterion: String(o.criterion ?? o.id ?? 'unnamed'), + pass: Boolean(o.pass), + evidence: String(o.evidence ?? ''), + }); + } + } + const exec = parsed.execution_in_transcript; + let execution_in_transcript: LlmJudgeReport['execution_in_transcript'] = { + pass: null, + note: 'Not specified by judge.', + }; + if (typeof exec === 'object' && exec !== null) { + const e = exec as Record; + execution_in_transcript = { + pass: typeof e.pass === 'boolean' ? e.pass : null, + note: String(e.note ?? ''), + }; + } + return { + overall_transcript_pass: overall, + execution_in_transcript, + criteria, + summary: String(parsed.summary ?? ''), + }; +} + +const JUDGE_SYSTEM = `You are an expert evaluator for Hookdeck agent-skills scenario runs. +You judge whether an AI assistant's replies (and any code or README it wrote) satisfy the scenario Success criteria (markdown rubric provided by the user message). +The user message includes run.log, then generated files the agent wrote on disk (source, config), then README — use all of it when scoring. +Be strict: a criterion passes only if the transcript clearly satisfies it. +You cannot run shell or HTTP — do not claim execution passed; use execution_in_transcript.pass = null and explain in note. +Output ONLY valid JSON (no markdown fences, no commentary outside JSON) matching this shape: +{ + "overall_transcript_pass": boolean, + "execution_in_transcript": { "pass": null, "note": "string explaining you did not execute code" }, + "criteria": [ + { "criterion": "short label from checklist", "pass": boolean, "evidence": "1-3 sentences; quote or paraphrase assistant" } + ], + "summary": "2-4 sentences overall" +} +Map each major bullet/checkbox line from Success criteria to one criteria[] entry (merge tiny sub-bullets if needed).`; + +/** + * Build markdown rubric for the judge: explicit field wins; else derive from evaluation.checks. + */ +export function buildCriteriaMarkdown(scenario: ScenarioConfig): string { + const explicit = scenario.successCriteriaMarkdown?.trim(); + if (explicit) { + return explicit.startsWith('##') ? explicit : `## Success criteria\n\n${explicit}`; + } + const lines: string[] = ['## Success criteria', '']; + for (const section of scenario.evaluation) { + lines.push(`### ${section.stage}`); + for (const check of section.checks) { + lines.push(`- [ ] ${check}`); + } + lines.push(''); + } + return lines.join('\n').trim(); +} + +function readFileSafe(resultDir: string, ...parts: string[]): string { + try { + return fs.readFileSync(path.join(resultDir, ...parts), 'utf-8'); + } catch { + return ''; + } +} + +function frameworkPriorityRelPaths(framework: Framework): readonly string[] { + switch (framework) { + case 'express': + return ['index.js', 'src/index.js', 'server.js', 'app.js']; + case 'nextjs': + return ['app/webhooks/route.ts', 'src/app/webhooks/route.ts', 'pages/api/webhooks.ts']; + case 'fastapi': + return ['main.py', 'src/main.py']; + default: + return []; + } +} + +function truncateBlock(label: string, content: string, maxChars: number): string { + if (content.length <= maxChars) return content; + return ( + content.slice(0, maxChars) + + `\n\n[… ${label}: truncated at ${maxChars} chars; file length was ${content.length} …]\n` + ); +} + +/** + * Ordered list of generated text files to show the judge (handler paths first, then rest). + * README is attached separately; skip it here to avoid duplication. + */ +function orderedJudgeArtifactPaths(resultDir: string, framework: Framework): string[] { + let rels: string[]; + try { + rels = listGeneratedFiles(resultDir); + } catch { + rels = []; + } + const set = new Set(rels); + const out: string[] = []; + const seen = new Set(); + + for (const p of frameworkPriorityRelPaths(framework)) { + if (set.has(p) && !seen.has(p)) { + seen.add(p); + out.push(p); + } + } + for (const p of rels.slice().sort()) { + if (seen.has(p)) continue; + const base = path.basename(p); + if (SKIP_ARTIFACT_BASENAMES.has(base)) continue; + if (p === 'README.md' || p === 'README') continue; + if (!TEXT_ARTIFACT_RE.test(p)) continue; + seen.add(p); + out.push(p); + } + return out; +} + +function buildGeneratedArtifactsMarkdown(resultDir: string, framework: Framework, maxTotalChars: number): string { + const paths = orderedJudgeArtifactPaths(resultDir, framework); + if (paths.length === 0) return ''; + + const header = '## Generated files (agent output on disk)\n\n'; + const chunks: string[] = []; + let used = header.length; + let filesIncluded = 0; + + for (const rel of paths) { + const abs = path.join(resultDir, rel); + let body: string; + try { + const stat = fs.statSync(abs); + if (!stat.isFile()) continue; + body = fs.readFileSync(abs, 'utf-8'); + } catch { + continue; + } + if (body.includes('\u0000')) continue; + + const capped = truncateBlock(rel, body, MAX_CHARS_PER_ARTIFACT); + const fence = '`'.repeat(3); + const block = `### ${rel}\n\n${fence}\n${capped}\n${fence}\n`; + if (used + block.length > maxTotalChars) { + const omitted = paths.length - filesIncluded; + if (omitted > 0) { + chunks.push(`[… omitted ${omitted} generated file(s) (context limit) …]\n`); + } + break; + } + chunks.push(block); + used += block.length; + filesIncluded += 1; + } + + return header + chunks.join('\n'); +} + +/** + * Transcript for judging: run.log, generated source/config on disk, then README (aligned with heuristic assess). + */ +export function buildTranscriptForJudge(resultDir: string, framework: Framework): string { + const logRaw = readFileSafe(resultDir, 'run.log').trim(); + let readme = readFileSafe(resultDir, 'README.md'); + if (!readme) readme = readFileSafe(resultDir, 'README'); + const readmeTrim = readme.trim(); + + const logSection = truncateBlock('run.log', logRaw, 72_000); + const readmeSection = readmeTrim ? truncateBlock('README', readmeTrim, 56_000) : ''; + + const overhead = + 2000 + + logSection.length + + (readmeSection ? readmeSection.length + 80 : 0) + + '## Transcript (Claude Code run.log)\n\n'.length + + (readmeSection ? '## README (if written)\n\n'.length + 20 : 0); + const genBudget = Math.max(16_000, MAX_TRANSCRIPT_CHARS - overhead); + + const genSection = buildGeneratedArtifactsMarkdown(resultDir, framework, genBudget); + + const parts: string[] = ['## Transcript (Claude Code run.log)', '', logSection]; + if (genSection.trim()) { + parts.push('', '---', '', genSection.trimEnd()); + } + if (readmeSection) { + parts.push('', '---', '', '## README (if written)', '', readmeSection); + } + + let text = parts.join('\n'); + if (text.length > MAX_TRANSCRIPT_CHARS) { + text = + text.slice(0, MAX_TRANSCRIPT_CHARS) + + '\n\n[… transcript truncated for judge context …]\n'; + } + return text; +} + +export function formatLlmJudgeReportMarkdown(report: LlmJudgeReport): string { + const lines: string[] = [ + '## LLM judge', + '', + `**Model:** ${report.model}`, + `**Overall transcript pass:** ${report.overall_transcript_pass ? 'YES' : 'NO'}`, + `**Execution (transcript-only):** pass=${String(report.execution_in_transcript.pass)} — ${report.execution_in_transcript.note}`, + '', + '### Per criterion', + '', + ]; + for (const c of report.criteria) { + lines.push(`- **${c.pass ? 'PASS' : 'FAIL'}** — ${c.criterion}`); + lines.push(` - ${c.evidence}`); + } + lines.push(''); + lines.push('### Summary'); + lines.push(''); + lines.push(report.summary); + lines.push(''); + return lines.join('\n'); +} + +export async function runLlmJudge(options: { + readonly scenario: ScenarioConfig; + readonly transcript: string; + readonly apiKey: string; + readonly model?: string; +}): Promise { + const model = + options.model?.trim() || + process.env.JUDGE_MODEL?.trim() || + process.env.EVAL_SCORE_MODEL?.trim() || + DEFAULT_JUDGE_MODEL; + const criteriaBlock = buildCriteriaMarkdown(options.scenario); + + const userContent = `## Success criteria (rubric) + +${criteriaBlock} + +--- + +## Transcript for review + +${options.transcript} + +--- + +Judge the transcript against the Success criteria. The transcript includes generated files on disk — treat their contents as the agent’s delivered code. Execution (running curl against a live API) is NOT evidenced here unless the transcript explicitly describes successful HTTP results; normally set execution_in_transcript.pass to null.`; + + const res = await fetch(ANTHROPIC_MESSAGES_URL, { + method: 'POST', + headers: { + 'content-type': 'application/json', + 'x-api-key': options.apiKey, + 'anthropic-version': '2023-06-01', + }, + body: JSON.stringify({ + model, + max_tokens: 8192, + system: JUDGE_SYSTEM, + messages: [{ role: 'user', content: userContent }], + }), + }); + + if (!res.ok) { + const errText = await res.text(); + throw new Error(`Anthropic API ${res.status}: ${errText.slice(0, 2000)}`); + } + + const body = (await res.json()) as { + content?: readonly { type?: string; text?: string }[]; + }; + const textBlock = body.content?.find((c) => c.type === 'text'); + const text = textBlock?.text ?? ''; + let judged: ReturnType; + try { + judged = parseJudgeJson(text); + } catch { + throw new Error(`Judge did not return parseable JSON. First 800 chars:\n${text.slice(0, 800)}`); + } + + return { + version: 1, + model, + scenarioName: options.scenario.name, + overall_transcript_pass: judged.overall_transcript_pass, + execution_in_transcript: judged.execution_in_transcript, + criteria: judged.criteria, + summary: judged.summary, + }; +} + +/** + * Writes llm-score.json and appends LLM judge section to report.md. + */ +export async function runLlmJudgeAndAppendReport(options: { + readonly resultDir: string; + readonly scenario: ScenarioConfig; + readonly framework: Framework; + readonly apiKey: string; + readonly model?: string; +}): Promise { + const transcript = buildTranscriptForJudge(options.resultDir, options.framework); + const report = await runLlmJudge({ + scenario: options.scenario, + transcript, + apiKey: options.apiKey, + model: options.model, + }); + + const jsonPath = path.join(options.resultDir, 'llm-score.json'); + fs.writeFileSync(jsonPath, JSON.stringify(report, null, 2), 'utf-8'); + + const md = formatLlmJudgeReportMarkdown(report); + const reportPath = path.join(options.resultDir, 'report.md'); + fs.appendFileSync(reportPath, '\n' + md, 'utf-8'); + + return report; +} + +export function judgeEnabledFromEnv(): boolean { + const v = process.env.RUN_LLM_JUDGE?.trim().toLowerCase(); + return v === '1' || v === 'true' || v === 'yes'; +} diff --git a/tools/agent-scenario-tester/src/repo-dotenv.ts b/tools/agent-scenario-tester/src/repo-dotenv.ts new file mode 100644 index 0000000..316a57e --- /dev/null +++ b/tools/agent-scenario-tester/src/repo-dotenv.ts @@ -0,0 +1,23 @@ +/** + * Load repo-root `.env` before commands run (does not override existing process.env). + */ + +import { config as dotenvConfig } from 'dotenv'; +import * as fs from 'fs'; +import * as path from 'path'; +import { findRepoRoot } from './config.js'; + +/** + * If `scenarios.yaml` can be resolved from cwd, load `/.env` when the file exists. + */ +export function loadRepoDotenv(cwd: string = process.cwd()): void { + try { + const repoRoot = findRepoRoot(cwd); + const envPath = path.join(repoRoot, '.env'); + if (fs.existsSync(envPath)) { + dotenvConfig({ path: envPath }); + } + } catch { + // No scenarios.yaml (e.g. wrong cwd); skip silently. + } +} diff --git a/tools/agent-scenario-tester/src/skills.ts b/tools/agent-scenario-tester/src/skills.ts index 468eba6..7386505 100644 --- a/tools/agent-scenario-tester/src/skills.ts +++ b/tools/agent-scenario-tester/src/skills.ts @@ -1,40 +1,60 @@ /** - * Install only the event-gateway skill for scenario runs. - * We do not install provider skills (e.g. stripe-webhooks) from webhook-skills. - * The event-gateway skill guides the agent that those skills exist and how to - * use them (layered composition); the agent may choose to install the provider - * skill (e.g. npx skills add hookdeck/webhook-skills --skill stripe-webhooks -y -g) - * and then use it. + * Install exactly one skill for scenario runs (see skillUnderTest on scenario config). + * Provider skills (e.g. stripe-webhooks) from webhook-skills are not installed here. */ import * as fs from 'fs'; import path from 'path'; import type { ResolvedScenario } from './types.js'; +import type { SkillUnderTest } from './types.js'; -/** - * Install event-gateway skill from repo (path or hookdeck/agent-skills). - * repoRoot is the path to the agent-skills repo (where skills/ lives). - */ -export function installEventGatewaySkill(projectDir: string, repoRoot: string): void { +function copySkill(projectDir: string, repoRoot: string, skillDirName: string): void { const skillsDir = path.join(projectDir, '.claude', 'skills'); - const eventGatewaySrc = path.join(repoRoot, 'skills', 'event-gateway'); - if (!fs.existsSync(eventGatewaySrc)) { - throw new Error(`event-gateway skill not found at ${eventGatewaySrc}`); + const src = path.join(repoRoot, 'skills', skillDirName); + if (!fs.existsSync(src)) { + throw new Error(`Skill not found at ${src}`); } fs.mkdirSync(skillsDir, { recursive: true }); - const target = path.join(skillsDir, 'event-gateway'); + const target = path.join(skillsDir, skillDirName); if (fs.existsSync(target)) { fs.rmSync(target, { recursive: true }); } - fs.cpSync(eventGatewaySrc, target, { recursive: true }); + fs.cpSync(src, target, { recursive: true }); +} + +export function installEventGatewaySkill(projectDir: string, repoRoot: string): void { + copySkill(projectDir, repoRoot, 'event-gateway'); +} + +export function installOutpostSkill(projectDir: string, repoRoot: string): void { + copySkill(projectDir, repoRoot, 'outpost'); +} + +export function installHookdeckSkill(projectDir: string, repoRoot: string): void { + copySkill(projectDir, repoRoot, 'hookdeck'); +} + +function resolveSkillUnderTest(resolved: ResolvedScenario): SkillUnderTest { + const s = resolved.config.skillUnderTest; + if (s === 'hookdeck' || s === 'outpost' || s === 'event-gateway') return s; + return 'event-gateway'; } /** - * Install all skills required for the scenario. We only install event-gateway. - * Provider skills (stripe-webhooks, shopify-webhooks, etc.) are not installed - * here; the agent is guided by the event-gateway skill to discover and - * install them if needed. + * Install the single skill required for the scenario. */ -export function installSkills(projectDir: string, repoRoot: string, _resolved: ResolvedScenario): void { - installEventGatewaySkill(projectDir, repoRoot); +export function installSkills(projectDir: string, repoRoot: string, resolved: ResolvedScenario): void { + const which = resolveSkillUnderTest(resolved); + switch (which) { + case 'hookdeck': + installHookdeckSkill(projectDir, repoRoot); + break; + case 'outpost': + installOutpostSkill(projectDir, repoRoot); + break; + case 'event-gateway': + default: + installEventGatewaySkill(projectDir, repoRoot); + break; + } } diff --git a/tools/agent-scenario-tester/src/types.ts b/tools/agent-scenario-tester/src/types.ts index 5db23f2..1d4bd92 100644 --- a/tools/agent-scenario-tester/src/types.ts +++ b/tools/agent-scenario-tester/src/types.ts @@ -6,6 +6,9 @@ export type Framework = 'express' | 'nextjs' | 'fastapi'; export type WorkflowStage = 'setup' | 'scaffold' | 'listen' | 'iterate'; +/** Exactly one skill copied into .claude/skills/ for the run. Omitted defaults to event-gateway. */ +export type SkillUnderTest = 'hookdeck' | 'event-gateway' | 'outpost'; + export interface EvaluationSection { stage: string; points: number; @@ -18,6 +21,12 @@ export interface ScenarioConfig { description: string; stages: WorkflowStage[]; prompt: string; + /** Which skill directory to install (default: event-gateway). */ + skillUnderTest?: SkillUnderTest; + /** + * Optional markdown rubric for the LLM judge. If omitted, criteria are derived from evaluation.checks. + */ + successCriteriaMarkdown?: string; providers?: string[]; providerDefault?: string; providerConfig?: Record;