From 46c4778c16992670582d6d7e7d1a27ddb0ee9ded Mon Sep 17 00:00:00 2001 From: Joshua Lansford Date: Thu, 28 May 2026 09:55:58 -0400 Subject: [PATCH 1/9] Adding word check pass through fluent-api. --- proposals/ai-tools-integration-suggestion.md | 950 +++++++++++++++++++ proposals/ai-tools-integration-summary.md | 30 + 2 files changed, 980 insertions(+) create mode 100644 proposals/ai-tools-integration-suggestion.md create mode 100644 proposals/ai-tools-integration-summary.md diff --git a/proposals/ai-tools-integration-suggestion.md b/proposals/ai-tools-integration-suggestion.md new file mode 100644 index 0000000..338c37b --- /dev/null +++ b/proposals/ai-tools-integration-suggestion.md @@ -0,0 +1,950 @@ +# AI-Tools Integration on fluent-api — Proposal + +**Status:** Draft for review. +**Scope:** Extend fluent-api to expose AI tools implemented by fluent-ai, starting with Greek-Room's *Repeated Words* check. The exposed pattern is meant to absorb every future AI tool (LLM drafting, embeddings, fine-tuning, other Greek-Room checks) without renegotiating the contract. +**Companion document:** [`fluent-api/proposals/ai-tools-integration-summary.md`](ai-tools-integration-summary.md) — short reviewer orientation. +**Predecessors on the fluent-ai side:** [`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md), [`fluent-ai/greek-room-integration-suggestion.md`](../../fluent-ai/greek-room-integration-suggestion.md), [`fluent-ai/greek-room-integration-decisions.md`](../../fluent-ai/greek-room-integration-decisions.md). + +--- + +## 1. Background + +fluent-ai is the Python/FastAPI backend dedicated to AI-tool integrations. It has merged its first such integration — Greek-Room's *Repeated Words* check — exposed at: + +``` +POST /api/v1/tools/greek-room/repeated-words +Header: X-API-Key: +``` + +with a `ToolJobResponse[RepeatedWordsResult]` envelope that already accommodates a future async-queue mode (`status: queued|running|completed|failed|cancelled`, `job_id`, `created_at`, `completed_at`). Today it always returns `status: "completed"` synchronously; the queue substrate is deferred until a slow tool needs it. See the predecessor documents linked above for the full architectural rationale. + +fluent-api is the Node/TypeScript backend that fronts the editor (fluent-web). It currently has no awareness of fluent-ai. This proposal describes how to put the *Repeated Words* check on the menu by routing it through fluent-api, while shaping the integration so the next AI tool drops in with minimum effort. + +The user-facing motivation is the editor: eventually each repeated word should get a corrective squiggle below it, similar to a spell-checker. That endgame is **out of scope for this PR**, but it sets the constraint that the surface fluent-api exposes must be cheap and re-callable per editor save, not stateful or session-coupled. + +### 1.1 Related repositories + +All four sibling projects live under the same GitHub org (`eten-tech-foundation`). Per fluent-platform's setup convention they are cloned side-by-side in the same parent directory. + +| Repo | Remote | Role | +|------|--------|------| +| **fluent-api** | [github.com/eten-tech-foundation/fluent-api](https://github.com/eten-tech-foundation/fluent-api) | Node/TypeScript REST API (Hono + Drizzle + BetterAuth). The subject of this proposal. | +| **fluent-ai** | [github.com/eten-tech-foundation/fluent-ai](https://github.com/eten-tech-foundation/fluent-ai) | Python/FastAPI service hosting AI-tool integrations (Greek-Room, future LLM tools). The upstream we are calling into. | +| **fluent-platform** | [github.com/eten-tech-foundation/fluent-platform](https://github.com/eten-tech-foundation/fluent-platform) | Container-first orchestrator. Owns the shared PostgreSQL, the unified compose stack, and helper scripts. Touched by this proposal — see §12.4. | +| **fluent-web** | [github.com/eten-tech-foundation/fluent-web](https://github.com/eten-tech-foundation/fluent-web) | React/Vite frontend (the editor). Not touched in this PR; the frontend hook is a follow-up. | + +Relative paths in this document (e.g. `../../fluent-platform/...`) assume the standard side-by-side layout that fluent-platform's setup script produces. + +--- + +## 2. Scope of this PR + +**In scope (this PR):** +1. A single new endpoint on fluent-api: `POST /ai/tools/greek-room/repeated-words`. +2. A shared utility — `callFluentAi(toolPath, body, schema)` — used by all per-tool routes to handle envelope unwrap, error translation, and (later) polling. +3. A new domain folder, `src/domains/ai-tools/`, containing routes/services/types for tool endpoints. +4. Two new env vars wired through `src/env.ts`: `FLUENT_AI_URL` and `FLUENT_AI_KEY`. +5. A new permission alias `PERMISSIONS.AI_TOOLS_USE` that maps to the same underlying value as `CONTENT_UPDATE`. +6. Tests mirroring the existing fluent-api test patterns plus one smoke test runnable from the host. + +**Explicitly deferred (future PRs):** +- Async job polling endpoint on fluent-api (`GET /ai/tools/jobs/{job_id}` or similar). Not built because fluent-ai also has not built the corresponding endpoint yet — both sides chose "lightweight now" per fluent-ai decision **D1**. +- Frontend (fluent-web) hooks and squiggle UI. Frontend is a separate session/PR. +- DB persistence of tool runs / findings. No `ai_tool_runs` or `check_results` table is introduced. +- Net-new cross-repo docker orchestration. The substrate already exists as [`fluent-platform`](../../fluent-platform/README.md); this PR adds two small entries (`FLUENT_AI_URL` override) to [`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) and ships them alongside the fluent-api change. See §12 for details. +- Rate limits, request-size limits, MCP facade, SSE/WebSocket streaming, scheduled runs, multi-tenant fairness. All deferred at the fluent-ai level and inherited here. + +--- + +## 3. Architectural decisions summary + +These are the decisions captured during the spec discussion. Each is restated here so reviewers can discuss the conclusion without reading the supporting analysis. + +| # | Decision | Short rationale | +|---|----------|-----------------| +| **D1** | PR scope is "minimum proxy" — no DB persistence, no job queue exercised in this PR. | Repeated-words is fast (<1s) and re-runnable; persistence is not motivated by this tool. Defer until a slow tool justifies a `ai_tool_runs` table. | +| **D2** | URL is `POST /ai/tools/greek-room/repeated-words`. | Introduces `/ai/` as fluent-api's first top-level service-family namespace. Telegraphs "network-bound, potentially slow, possibly async" — characteristics that local CRUD endpoints don't share. Per-tool URL preserves OpenAPI type-safety. Alternatives: `/checks/repeated-words` (more in convention but hides the proxy nature), nested under `/chapter-assignments/{id}/` (requires server-side enrichment which we reject in D8). | +| **D3** | Polling lives in the *browser* via TanStack Query's `refetchInterval`, not in fluent-api. fluent-api is a thin pass-through for both kickoff and (future) polling. | Decouples slow tools from fluent-api's request budget. Aligns with fluent-web's existing TanStack Query usage. The polling code path is not exercised today because fluent-ai always returns `status: "completed"` synchronously. | +| **D4** | File layout: shared utility at [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts); per-tool routes/services in [`fluent-api/src/domains/ai-tools/`](../src/domains/ai-tools/). One route file for all tools; per-tool Zod schemas keep OpenAPI documentation fully typed. | Mirrors the existing [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../src/lib/services/notifications/mailgun.service.ts) pattern for "free functions wrapping a third-party API" and the existing [`fluent-api/src/lib/db-retry.ts`](../src/lib/db-retry.ts) pattern for "higher-order utility used by many call sites." Avoids a single one-size-fits-all dispatcher that would degrade OpenAPI schemas to `dict[str, Any]`. | +| **D5** | Service discovery / docker networking is handled by the existing [`fluent-platform`](../../fluent-platform/README.md) orchestrator. This PR adds two env vars on the fluent-api side and one `environment:` override on the fluent-platform side (`FLUENT_AI_URL: http://ai:8200`). See §12. | fluent-platform already wires `db`, `api`, `worker`, `ai`, `web` together on a shared network; we plug in to that substrate rather than invent a new one. | +| **D6** | A single shared `FLUENT_AI_KEY` is provisioned for the fluent-api → fluent-ai hop. If another consumer of fluent-ai appears later, it gets its own key. | Per-user keys give zero security benefit at this layer (everyone going through fluent-api is already authenticated to fluent-api). Single key minimizes IT complexity. | +| **D7** | Error translation specifics deferred to implementation. If conformity between the two error systems is awkward, prefer harmonizing fluent-ai toward fluent-api's patterns rather than the other way. | At the spec level there are no hard constraints; the safe defaults (5xx from fluent-ai → 502 on fluent-api with `ErrorCode.AI_SERVICE_UNAVAILABLE`) are obvious. | +| **D8** | No request enrichment. fluent-api forwards the request body to fluent-ai verbatim. fluent-web sends the full `RepeatedWordsRequest` shape including `lang_code`, `lang_name`, `project_id`, `project_name`, `verses[]`. | Maximum flexibility for the caller. Avoids coupling fluent-api to fluent-ai's request schema (today and tomorrow). | +| **D9** | The full `ToolJobResponse` envelope is passed through to fluent-web unchanged. No unwrap to `result` for the synchronous case. | Forward-compatible with TanStack-based polling — the same hook code consumes the envelope today (`status: completed`) and tomorrow (`status: queued` → polled to `completed`). | +| **D10** | Auth on the new endpoint: introduce `PERMISSIONS.AI_TOOLS_USE` as an *alias* with the same underlying value as `CONTENT_UPDATE`. | Cosmetically separates "can edit content" from "can invoke AI tools" without making a real distinction yet. Trivial to peel apart later. | +| **D11** | A smoke test analogous to [`fluent-ai/scripts/smoke_repeated_words.py`](../../fluent-ai/scripts/smoke_repeated_words.py) is added, runnable from the host with both services up. | Lets devs verify the cross-service plumbing without running the full vitest suite. | +| **D12** | This work ships as a **coordinated pair of PRs**: one against fluent-api (the bulk of the work) and one small PR against fluent-platform (compose env-var override + 1–2 README lines). Either order of merge is fine; both should be ready for review together. | The fluent-platform PR is small and contains no logic, so it can land first to unblock ecosystem-mode dev. Reviewers should be able to read both PRs side-by-side. | + +--- + +## 4. End-to-end picture + +### 4.1 Today (synchronous, status = "completed") + +```mermaid +sequenceDiagram + participant W as fluent-web (future hook) + participant A as fluent-api + participant I as fluent-ai + + W->>A: POST /ai/tools/greek-room/repeated-words
Cookie BetterAuth session
{verses, lang_code, ...} + Note over A: authenticateUser
requirePermission(AI_TOOLS_USE) + A->>I: POST /api/v1/tools/greek-room/repeated-words
X-API-Key FLUENT_AI_KEY
(body forwarded verbatim) + Note over I: Run repeated-words
(synchronous, <1s) + I-->>A: 200 ToolJobResponse {
status: completed,
result: {findings, summary},
job_id, created_at, completed_at
} + A-->>W: 200 ToolJobResponse (envelope passthrough) +``` + +### 4.2 Tomorrow (async, status = "queued" → polled) + +```mermaid +sequenceDiagram + participant W as fluent-web (TanStack useQuery) + participant A as fluent-api + participant I as fluent-ai + + W->>A: POST /ai/tools/some-slow-tool + A->>I: POST /api/v1/tools/some-slow-tool + I-->>A: 202 ToolJobResponse {status: queued, job_id} + A-->>W: 202 ToolJobResponse {status: queued, job_id} + + loop until status terminal + W->>A: GET /ai/tools/jobs/{job_id} + A->>I: GET /api/v1/tools/jobs/{job_id} + I-->>A: ToolJobResponse {status, result?} + A-->>W: ToolJobResponse {status, result?} + end + + Note over W: TanStack refetchInterval
stops when status in
{completed, failed, cancelled} +``` + +The interesting property: **the request/response shapes are identical** between 4.1 and 4.2. The only difference is `status` and whether `result` is populated. fluent-web's hook composes a `useMutation` for kickoff with a conditional `useQuery` that polls iff `status === "queued" | "running"`. + +--- + +## 5. URL and endpoint shape + +### 5.1 The URL + +``` +POST /ai/tools/greek-room/repeated-words +``` + +This introduces `/ai/` as fluent-api's first top-level service-family namespace. The full URL inventory survey conducted during the spec session is reproduced in [Appendix A](#appendix-a--fluent-api-url-inventory-at-time-of-writing). Today fluent-api's URLs are flat, plural-noun, unprefixed; nested URLs reflect ownership (`/projects/{id}/users`). There is no existing service-family namespace; `/usfm` *is not* a top-level prefix but a nested sub-resource under `/project-units/{id}`. + +#### Why `/ai/tools/greek-room/repeated-words` over the alternatives + +- **`/checks/repeated-words`** would be more in-convention (two segments, domain noun, hides the proxy nature). It was the leading candidate during the spec discussion and is preserved as an alternative. Its weakness is informational: the URL gives no hint about the network hop, which makes it harder to reason about timeouts, error budgets, and observability when the system grows. +- **`/tools/greek-room/repeated-words`** (mirroring fluent-ai exactly) loses the "AI service" signal but keeps the per-tool path. Same departure-from-convention cost as the chosen option, with less informational payload. +- **`/chapter-assignments/{id}/checks/repeated-words`** (nesting under the editable subject) would be the most in-convention nesting style. It is unsuitable here because pass-through input (D8) means the parent ID would not actually be consulted server-side — it would lie about the resource model. Honorable mention only. + +#### Forward compatibility under `/ai/` + +The path layout makes room for the polling endpoint without name collisions: + +- `POST /ai/tools/{family}/{tool-name}` — kickoff (this PR for `greek-room/repeated-words`). +- `GET /ai/tools/jobs/{job_id}` — poll (future, when first slow tool ships). + +Note that the existing [`fluent-api/src/domains/usfm/usfm.route.ts`](../src/domains/usfm/usfm.route.ts) already owns `GET /jobs/{job_id}` for pg-boss USFM-export polling. **Keeping the AI-tools polling endpoint under `/ai/tools/jobs/{id}` avoids that collision** and lets the two job systems coexist with different response shapes (pg-boss-native vs. fluent-ai's `ToolJobResponse` envelope). + +### 5.2 OpenAPI documentation + +Each tool gets its own `createRoute({...})` call in [`fluent-api/src/domains/ai-tools/ai-tools.route.ts`](../src/domains/ai-tools/ai-tools.route.ts) with: + +- A typed `RepeatedWordsRequestSchema` (Zod schema mirroring fluent-ai's `RepeatedWordsRequest`). +- A typed `RepeatedWordsResponseSchema` wrapping the `ToolJobResponse[RepeatedWordsResult]` envelope. +- Proper 4xx/5xx response schemas using the existing `Result` → HTTP-status conventions ([`fluent-api/src/lib/types.ts`](../src/lib/types.ts)). + +This means the `/reference` Scalar docs at fluent-api's root will display the full request/response shape for each tool. No `dict[str, Any]` degradation. Adding a new tool means adding a new `createRoute(...)` block in the same file, registering it on the OpenAPIHono app — three to ten lines plus schemas. + +--- + +## 6. File layout + +``` +fluent-api/src/ +├── env.ts # +FLUENT_AI_URL, +FLUENT_AI_KEY +│ +├── lib/ +│ ├── permissions.ts # +PERMISSIONS.AI_TOOLS_USE (alias of CONTENT_UPDATE) +│ ├── types.ts # +ErrorCode.AI_SERVICE_UNAVAILABLE, +ErrorCode.AI_TOOL_EXECUTION_FAILED +│ └── services/ +│ └── fluent-ai/ # NEW +│ ├── fluent-ai.client.ts # callFluentAi(toolPath, body, schema): Result> +│ └── fluent-ai.types.ts # ToolJobResponse, JobStatus union, ToolJobError shape +│ +├── domains/ +│ └── ai-tools/ # NEW +│ ├── ai-tools.route.ts # POST /ai/tools/greek-room/repeated-words (per-tool routes go here) +│ ├── ai-tools.service.ts # callRepeatedWords(req): one-line wrappers per tool +│ └── ai-tools.types.ts # Per-tool Zod schemas: RepeatedWordsRequestSchema, RepeatedWordsResultSchema, ... +│ +└── server/ + └── server.ts # Register ai-tools routes (mirrors how existing domains register) +``` + +### 6.1 Why this layout + +The fluent-api codebase already has the right precedent for both pieces: + +- **`lib/services/fluent-ai/`** mirrors [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../src/lib/services/notifications/mailgun.service.ts) — free functions exported from a service file under `lib/services/{vendor}/{vendor}.service.ts`. The Mailgun file returns `Promise>` and reads its credentials directly from `process.env`. Our `callFluentAi` follows the same shape. +- **`callFluentAi` as a higher-order utility** mirrors [`fluent-api/src/lib/db-retry.ts`](../src/lib/db-retry.ts)'s `withDatabaseRetry(operation, options)` pattern. One shared utility, many call sites, no code duplication, no over-generalization. +- **`domains/ai-tools/`** as a domain folder mirrors every other domain in the codebase (`domains/projects/`, `domains/translated-verses/`, etc.). Routes/services/types separated. Hono `createRoute` per endpoint. `Result` returned from services and converted via `getHttpStatus(error)` in routes. + +### 6.2 Why not a generic dispatcher + +A generic `POST /ai/dispatch` endpoint accepting `{tool: string, params: unknown}` was considered and rejected (this echoes the fluent-ai-side decision **D2** in [`fluent-ai/greek-room-integration-decisions.md`](../../fluent-ai/greek-room-integration-decisions.md)). The reasons are the same in TypeScript-land: + +- OpenAPI/Scalar docs would degrade to `unknown` payloads. +- Each new tool would lose its named, typed request/response in the docs. +- Per-tool observability (route-level logging, request-time histograms) becomes harder. +- A future MCP facade can still be layered on top of per-tool URLs without invalidating them. + +### 6.3 Why one route file for all tools instead of one per tool + +`ai-tools.route.ts` co-locates every tool endpoint so adding a new tool requires touching exactly two files (`ai-tools.service.ts` for the wrapper, `ai-tools.route.ts` for the route + schemas). When this file becomes uncomfortably large (~5+ tools), a split by tool *family* — `ai-tools.greek-room.route.ts`, `ai-tools.openai.route.ts`, etc. — is the natural next step. Not warranted at one tool. + +--- + +## 7. The shared utility: `callFluentAi` + +The single piece of *new mechanism* this PR introduces is the function in [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts). + +### 7.1 Signature + +```ts +import { z } from '@hono/zod-openapi'; +import type { Result } from '@/lib/types'; +import type { ToolJobResponse } from './fluent-ai.types'; + +export async function callFluentAi( + toolPath: string, // e.g. 'tools/greek-room/repeated-words' (no leading slash; no /api/v1) + body: TReq, + resultSchema: z.ZodType, // for runtime validation of the result field on success + options?: { + signal?: AbortSignal; // honored if caller wants timeout / cancellation + timeoutMs?: number; // default 30_000 + }, +): Promise>>; +``` + +### 7.2 What it does + +1. Reads `env.FLUENT_AI_URL` and `env.FLUENT_AI_KEY` (validated at boot in [`fluent-api/src/env.ts`](../src/env.ts)). +2. POSTs to `${FLUENT_AI_URL}/api/v1/${toolPath}` with: + - `Content-Type: application/json` + - `X-API-Key: ${FLUENT_AI_KEY}` + - body serialized as JSON +3. Honors the caller's `AbortSignal` if provided; otherwise applies a default 30-second timeout via a derived signal. (Tunable per-call.) +4. On HTTP-level success (2xx), parses the response body as `ToolJobResponse` and validates the `result` field against `resultSchema` *if and only if* `status === "completed"`. (When status is `queued|running`, `result` is `null` and is not validated.) +5. Returns `{ ok: true, data: envelope }` — note this is the **full envelope**, not the unwrapped result. Callers that care only about the synchronous-completed case can `if (envelope.status === "completed") return envelope.result`. Callers that want to support the future polling case can inspect `envelope.status` and `envelope.job_id`. +6. On HTTP error (4xx/5xx), network error, parse error, or schema-validation error, returns `{ ok: false, error: {...} }` using the error mapping in §9. + +### 7.3 What it does **not** do (in this PR) + +- It does not poll. A `pollUntilComplete: true` option, or a sibling `pollToolJob(jobId, resultSchema)` function, can be added in the future PR that ships the first slow tool. Today the polling code path is not in scope because fluent-ai has not yet shipped the polling endpoint either. +- It does not cache. Each call is independent. Per-tool caching (e.g. memoizing on `(toolPath, hash(body))`) is a future optimization for expensive idempotent tools. +- It does not retry on transport failure. `withDatabaseRetry`-style retries are intentionally not applied because most AI tool failures are *semantic*, not *transport-flaky*. If a user-facing retry policy is wanted, it belongs at the route layer or in the frontend hook, not in this utility. + +### 7.4 Why this shape + +Compare it to the existing utilities it's modeled on: + +- [`withDatabaseRetry(operation, options)`](../src/lib/db-retry.ts) is a higher-order async wrapper. `callFluentAi` is also a higher-order async wrapper, parameterized by request/result types and the runtime Zod schema. +- [`sendInvitationEmail({email, ticketUrl, ...})`](../src/lib/services/notifications/mailgun.service.ts) is a free function in `lib/services/` that wraps a third-party API and returns `Promise>`. `callFluentAi` is a free function in `lib/services/` that wraps a third-party API and returns `Promise>`. + +The point of `callFluentAi` is **not** to be the only function callers ever touch. Each tool gets a typed wrapper in [`ai-tools.service.ts`](../src/domains/ai-tools/ai-tools.service.ts) that calls `callFluentAi` once. The wrapper is what the route file imports; the shared utility is a private implementation detail. + +### 7.5 Example per-tool wrapper + +```ts +// fluent-api/src/domains/ai-tools/ai-tools.service.ts + +import { callFluentAi } from '@/lib/services/fluent-ai/fluent-ai.client'; +import type { Result } from '@/lib/types'; +import type { ToolJobResponse } from '@/lib/services/fluent-ai/fluent-ai.types'; +import { + type RepeatedWordsRequest, + type RepeatedWordsResult, + RepeatedWordsResultSchema, +} from './ai-tools.types'; + +export async function callRepeatedWords( + req: RepeatedWordsRequest, +): Promise>> { + return callFluentAi( + 'tools/greek-room/repeated-words', + req, + RepeatedWordsResultSchema, + ); +} +``` + +Adding a future tool (say, `coherence-check`) is the same five-line pattern: + +```ts +export async function callCoherenceCheck( + req: CoherenceCheckRequest, +): Promise>> { + return callFluentAi( + 'tools/some-family/coherence-check', + req, + CoherenceCheckResultSchema, + ); +} +``` + +### 7.6 Module-level singleton vs. per-call config + +`callFluentAi` reads env at module scope, not per call. This means changing `FLUENT_AI_URL` or `FLUENT_AI_KEY` requires restarting fluent-api — same property as Mailgun, pg-boss, BetterAuth, AppInsights, all of which already work this way in fluent-api. For tests, dependency injection of a base URL is achieved by stubbing `fetch` (vitest's `vi.spyOn(global, 'fetch')`), not by passing config to `callFluentAi`. This matches the existing test conventions in fluent-api. + +--- + +## 8. Request and response shapes + +### 8.1 The forward direction (fluent-web → fluent-api → fluent-ai) + +Per **D8** (no enrichment), the request body shape on `POST /ai/tools/greek-room/repeated-words` is **identical** to fluent-ai's `RepeatedWordsRequest`. Codified in Zod in [`fluent-api/src/domains/ai-tools/ai-tools.types.ts`](../src/domains/ai-tools/ai-tools.types.ts): + +```ts +export const VerseInputSchema = z.object({ + snt_id: z.string().min(1), + text: z.string(), +}); + +export const RepeatedWordsRequestSchema = z.object({ + lang_code: z.string().min(1), + lang_name: z.string().min(1), + project_id: z.union([z.string(), z.number()]), + project_name: z.string().min(1), + verses: z.array(VerseInputSchema).min(1), +}); + +export type RepeatedWordsRequest = z.infer; +``` + +Notes: +- `project_id` is intentionally permissive (`string | number`) to match fluent-ai's Pydantic model, which accepts either. fluent-api's own `project.id` is an integer. +- `verses` is required and non-empty (`.min(1)`) so we can fail fast at the route layer rather than incur a round-trip to fluent-ai for a trivially-invalid request. +- The field naming uses fluent-ai's snake_case verbatim (`lang_code`, `snt_id`). This is a deliberate departure from fluent-api's camelCase elsewhere; the alternative (rename in fluent-api, re-rename in fluent-ai) buys nothing and risks drift. The OpenAPI docs make the snake_case visible to the frontend. + +### 8.2 The reverse direction (fluent-ai → fluent-api → fluent-web) + +Per **D9** (envelope pass-through), the response body is fluent-ai's `ToolJobResponse[RepeatedWordsResult]` verbatim: + +```ts +// fluent-api/src/lib/services/fluent-ai/fluent-ai.types.ts + +export type JobStatus = + | 'queued' + | 'running' + | 'completed' + | 'failed' + | 'cancelled'; + +export interface ToolJobError { + type: string; // e.g. 'TOOL_EXECUTION_ERROR' + message: string; + details?: unknown; +} + +export interface ToolJobResponse { + job_id: string; // UUID + tool: string; // e.g. 'greek-room/repeated-words' + status: JobStatus; + result: TResult | null; + error: ToolJobError | null; + created_at: string; // ISO-8601 timestamp + completed_at: string | null; +} +``` + +```ts +// fluent-api/src/domains/ai-tools/ai-tools.types.ts (continued) + +export const RepeatedWordsFindingSchema = z.object({ + snt_id: z.string(), + repeated_word: z.string(), + surf: z.string(), + start_position: z.number().int().nonnegative(), + legitimate: z.boolean(), + severity: z.enum(['info', 'warning', 'error']), +}); + +export const RepeatedWordsSummarySchema = z.object({ + total_findings: z.number().int().nonnegative(), + verses_with_findings: z.number().int().nonnegative(), + verses_total: z.number().int().nonnegative(), +}); + +export const RepeatedWordsResultSchema = z.object({ + findings: z.array(RepeatedWordsFindingSchema), + summary: RepeatedWordsSummarySchema, +}); + +export type RepeatedWordsResult = z.infer; + +export const RepeatedWordsResponseSchema = z.object({ + job_id: z.string().uuid(), + tool: z.literal('greek-room/repeated-words'), + status: z.enum(['queued', 'running', 'completed', 'failed', 'cancelled']), + result: RepeatedWordsResultSchema.nullable(), + error: z + .object({ + type: z.string(), + message: z.string(), + details: z.unknown().optional(), + }) + .nullable(), + created_at: z.string().datetime({ offset: true }), + completed_at: z.string().datetime({ offset: true }).nullable(), +}); +``` + +The `RepeatedWordsResponseSchema` is what the Hono route declares as its 200 response, so OpenAPI docs show the full envelope. fluent-web's hook receives the envelope and inspects `status` and `result` directly. + +### 8.3 Status codes from fluent-api + +| Outcome | HTTP | Body | +|---------|------|------| +| Tool completed synchronously | `200 OK` | `ToolJobResponse` with `status: "completed"` | +| Tool started asynchronously (future) | `202 Accepted` | `ToolJobResponse` with `status: "queued"` | +| Caller not authenticated | `401 Unauthorized` | fluent-api's standard `Result` error | +| Caller authenticated but lacks `AI_TOOLS_USE` | `403 Forbidden` | fluent-api's standard `Result` error | +| Request body fails Zod validation | `400 Bad Request` | fluent-api's standard validation error | +| fluent-ai returns 4xx (bad request, auth failure, etc.) | `502 Bad Gateway` | fluent-api error with `code: AI_SERVICE_UNAVAILABLE` and the upstream message in `details` | +| fluent-ai returns 5xx | `502 Bad Gateway` | same as above | +| Network timeout / connection refused | `502 Bad Gateway` | same as above | +| Envelope `status === "failed"` from fluent-ai | `502 Bad Gateway` | fluent-api error with `code: AI_TOOL_EXECUTION_FAILED` and the envelope `error` propagated | + +The 502 choice for upstream failures mirrors what fluent-ai itself does for its own upstream tool failures (`ToolExecutionException` → 502 per fluent-ai decision **D6**). It signals "this isn't a problem with the caller's request; the dependency is misbehaving." + +--- + +## 9. Authentication and authorization + +### 9.1 Two distinct auth boundaries + +| Boundary | Mechanism | Established by | Established when | +|----------|-----------|---------------|------------------| +| fluent-web → fluent-api | BetterAuth session cookie | This codebase, existing | Pre-existing | +| fluent-api → fluent-ai | Single shared `X-API-Key` | This PR, env-driven | This PR | + +These boundaries do not bridge directly: there is no propagation of "user X is calling this tool" beyond fluent-api. Audit logs on the fluent-ai side will see the single shared identity. If per-user attribution is wanted later, the request envelope can carry an opaque `requested_by` claim — out of scope for this PR. + +### 9.2 The route guards + +```ts +// fluent-api/src/domains/ai-tools/ai-tools.route.ts (excerpt) + +const repeatedWordsRoute = createRoute({ + method: 'post', + path: '/ai/tools/greek-room/repeated-words', + middleware: [ + authenticateUser, + requirePermission(PERMISSIONS.AI_TOOLS_USE), + ] as const, + request: { + body: { + content: { + 'application/json': { schema: RepeatedWordsRequestSchema }, + }, + }, + }, + responses: { + 200: { + content: { 'application/json': { schema: RepeatedWordsResponseSchema } }, + description: 'Repeated-words check completed', + }, + 202: { + content: { 'application/json': { schema: RepeatedWordsResponseSchema } }, + description: 'Repeated-words check accepted; poll for result', + }, + 400: { description: 'Invalid request body' }, + 401: { description: 'Not authenticated' }, + 403: { description: 'Missing AI_TOOLS_USE permission' }, + 502: { description: 'Upstream fluent-ai error' }, + }, +}); +``` + +### 9.3 `PERMISSIONS.AI_TOOLS_USE` + +Per **D10**, this is introduced as an *alias* of `CONTENT_UPDATE`: + +```ts +// fluent-api/src/lib/permissions.ts (excerpt) + +export const PERMISSIONS = { + // ... existing permissions ... + CONTENT_UPDATE: 'content:update', + AI_TOOLS_USE: 'content:update', // intentional alias + // ... +} as const; +``` + +The alias has the same string value, which means `requirePermission(PERMISSIONS.AI_TOOLS_USE)` resolves to the same check as `requirePermission(PERMISSIONS.CONTENT_UPDATE)`. The semantic separation is **purely cosmetic** today — it documents intent at call sites and reserves the option to break it out into a distinct permission later (with its own DB row in the `permissions` table and its own role mappings) without touching any code that already imports `PERMISSIONS.AI_TOOLS_USE`. + +If reviewers prefer a real new permission row from day one, that's a defensible alternative; it costs a migration and seeding work and gives no immediate user-visible benefit. The alias approach was chosen because it's reversible from either direction. + +### 9.4 The `X-API-Key` for fluent-ai + +Per **D6**, fluent-api carries a single `FLUENT_AI_KEY` for *all* fluent-ai calls. The key is read once at module scope in `callFluentAi`. Rotation is "set new env, restart fluent-api"; fluent-ai supports multiple active keys per its existing `ai_api_keys` table, so old key + new key can coexist briefly during a rolling restart. + +--- + +## 10. Error translation + +Per **D7**, the exact mapping is settled at implementation time, and if conformity work surfaces we prefer to harmonize fluent-ai toward fluent-api's patterns. This section describes the *minimum viable* mapping that the implementation should ship with; reviewers should challenge anything they want changed before coding starts. + +### 10.1 New `ErrorCode` entries on fluent-api + +Two new entries are added to [`fluent-api/src/lib/types.ts`](../src/lib/types.ts)'s `ErrorCode` enum: + +```ts +export enum ErrorCode { + // ... existing entries ... + AI_SERVICE_UNAVAILABLE = 'AI_SERVICE_UNAVAILABLE', + AI_TOOL_EXECUTION_FAILED = 'AI_TOOL_EXECUTION_FAILED', +} +``` + +Both map to HTTP 502 via `ErrorHttpStatus`: + +```ts +export const ErrorHttpStatus: Record = { + // ... existing entries ... + [ErrorCode.AI_SERVICE_UNAVAILABLE]: 502, + [ErrorCode.AI_TOOL_EXECUTION_FAILED]: 502, +}; +``` + +`AI_SERVICE_UNAVAILABLE` covers transport-level / availability problems (network errors, 5xx from fluent-ai, schema parse errors, timeouts). `AI_TOOL_EXECUTION_FAILED` covers the case where fluent-ai successfully returned an envelope with `status: "failed"` — the dependency is *up* but the tool itself rejected the work. + +### 10.2 Mapping table + +| Source | Translates to | +|--------|---------------| +| `fetch` throws (network down, DNS, connection refused) | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'fluent-ai unreachable', details: { cause: error.message } })` | +| `fetch` times out (default 30s) | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'fluent-ai request timed out', details: { timeoutMs } })` | +| fluent-ai returns 5xx | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: '', details: { status, body } })` | +| fluent-ai returns 4xx | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: '', details: { status, body } })` — yes, also 502 on our side; 4xx from fluent-ai represents a misconfiguration or a contract drift, neither of which is the *caller's* fault, so we shield them with 502 rather than relay a 4xx that they cannot act on | +| Response body fails JSON parse or envelope schema validation | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'malformed response from fluent-ai', details: { cause } })` | +| Envelope `status === "failed"` (fluent-ai reachable; tool refused) | `Result.err({ code: AI_TOOL_EXECUTION_FAILED, message: envelope.error?.message ?? 'tool execution failed', details: { type: envelope.error?.type, ... } })` | +| Envelope `status === "cancelled"` | Same as `failed` — propagate `AI_TOOL_EXECUTION_FAILED` | +| Envelope `status === "completed"` | `Result.ok(envelope)` | +| Envelope `status === "queued"` or `"running"` | `Result.ok(envelope)` — the route layer decides whether to return 200 or 202 based on `status` | + +### 10.3 Route-level translation + +The Hono route handler uses `getHttpStatus(error)` from [`fluent-api/src/lib/types.ts`](../src/lib/types.ts) exactly as every existing fluent-api route does. The new `AI_*` codes plug into the same conversion path: + +```ts +// fluent-api/src/domains/ai-tools/ai-tools.route.ts (excerpt) + +aiToolsRouter.openapi(repeatedWordsRoute, async (c) => { + const body = c.req.valid('json'); + const result = await callRepeatedWords(body); + + if (!result.ok) { + return c.json( + { error: result.error.message, code: result.error.code, details: result.error.details }, + getHttpStatus(result.error), + ); + } + + const envelope = result.data; + const status = envelope.status === 'completed' || envelope.status === 'failed' || envelope.status === 'cancelled' + ? 200 + : 202; + return c.json(envelope, status); +}); +``` + +### 10.4 What's intentionally *not* in here + +- **No automatic retries** on transport failure. The caller (or the frontend hook) decides. +- **No structured "user-facing-vs-internal" error categorization** beyond the `code + message + details` shape that fluent-api already uses everywhere. fluent-web is expected to display `error.message` directly and surface `error.details` only to logged-in admins. +- **No localization of error strings.** Errors from fluent-ai come through as English; that's an upstream concern. + +### 10.5 Possible follow-up harmonization with fluent-ai + +If during implementation the team finds fluent-ai's error envelope shape (`{type, message, details}`) is awkward to consume on the fluent-api side — e.g. the `type` field collides with TypeScript reserved words at certain call sites, or `details` needs a `Record` constraint that fluent-ai doesn't enforce — the path of least resistance is to file a small change against fluent-ai to align its error envelope with fluent-api's expectations. Per **D7**, we'd rather change the less-mature fluent-ai shape than introduce a translation layer. + +--- + +## 11. The job-queue protocol — forward compatibility + +This section describes what this PR sets up but does not exercise. It is deliberately detailed so reviewers can sanity-check that the contract is sound before fluent-ai (and a slow tool) actually need it. + +### 11.1 The contract today vs. tomorrow + +**Today** every response from fluent-ai is synchronous with `status: "completed"`. fluent-api hands the envelope to fluent-web as a 200 response. No polling occurs. + +**Tomorrow**, when fluent-ai introduces a slow tool, it can return `202 Accepted` with `status: "queued"` and a real `job_id` that exists in fluent-ai's job table. The protocol fluent-ai will (eventually) expose is the existing fluent-ai decision **D3** envelope plus a new polling endpoint: + +``` +GET /api/v1/tools/jobs/{job_id} +→ ToolJobResponse with current status and (if completed) result +``` + +Returns 200 in all states (queued/running/completed/failed/cancelled). The HTTP status is *not* used to communicate terminal vs. non-terminal — only the envelope's `status` field is. + +### 11.2 fluent-api's pass-through polling endpoint (future) + +When fluent-ai adds the polling endpoint, fluent-api adds: + +``` +GET /ai/tools/jobs/{job_id} +→ Pass-through of fluent-ai's response, with the same auth (BetterAuth session + AI_TOOLS_USE permission) +``` + +Implementation will be a second helper alongside `callFluentAi`: + +```ts +// future, not in this PR +export async function pollToolJob( + jobId: string, + resultSchema: z.ZodType, +): Promise>>; +``` + +### 11.3 Why polling lives in the browser, not in fluent-api + +Per **D3**. The detailed reasoning, repeated for completeness: + +- **Decouples slow tools from fluent-api's request budget.** A 5-minute tool does not hold a browser-to-fluent-api socket open for 5 minutes through whatever proxies, load balancers, or middle boxes sit between them. +- **Matches the editor UX shape.** When the eventual squiggle-on-typing UX is built, the browser already has its own state machine for "user has typed, debounce, kick off check, show pending indicator, show squiggles when result arrives." Putting polling on the server adds nothing to that loop. +- **TanStack Query has the right primitives.** `refetchInterval` accepts a function that inspects the current data and returns `false` to stop polling — i.e., literally `(data) => isTerminal(data.status) ? false : 1500`. No custom polling library needed. +- **Aligns with the existing fluent-web pattern.** Every existing fluent-web API hook calls `fetch` directly; there is no centralized server-state abstraction beyond TanStack itself. Adding server-side polling would be the foreign element. + +### 11.4 What the frontend hook will look like (out of scope, sketched) + +This is *not* part of this PR, but is sketched here so reviewers can see that the backend contract is consumable. + +```ts +// fluent-web/src/lib/api/useToolJob.ts (future) + +import { useQuery } from '@tanstack/react-query'; +import type { ToolJobResponse } from './tool-job-types'; + +const TERMINAL: Set['status']> = new Set(['completed', 'failed', 'cancelled']); + +export function useToolJob( + jobId: string | null, + opts?: { pollIntervalMs?: number; enabled?: boolean }, +) { + return useQuery>({ + queryKey: ['ai-tools', 'jobs', jobId], + queryFn: () => + fetch(`${config.api.url}/ai/tools/jobs/${jobId}`, { credentials: 'include' }).then(r => r.json()), + enabled: !!jobId && (opts?.enabled ?? true), + refetchInterval: (q) => (q.state.data && TERMINAL.has(q.state.data.status)) ? false : (opts?.pollIntervalMs ?? 1500), + }); +} +``` + +```ts +// fluent-web/src/features/checks/hooks/useRepeatedWords.ts (future) + +export function useRepeatedWords() { + const [pendingJobId, setPendingJobId] = useState(null); + + const kickoff = useMutation({ + mutationFn: (req: RepeatedWordsRequest) => + fetch(`${config.api.url}/ai/tools/greek-room/repeated-words`, { + method: 'POST', + credentials: 'include', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(req), + }).then(r => r.json() as Promise>), + onSuccess: (envelope) => { + if (envelope.status === 'queued' || envelope.status === 'running') { + setPendingJobId(envelope.job_id); + } + }, + }); + + const polled = useToolJob(pendingJobId); + + // Today, only kickoff.data is ever populated. Tomorrow, polled.data takes over. + const envelope = polled.data ?? (kickoff.data?.status === 'completed' ? kickoff.data : null); + + return { kickoff, envelope }; +} +``` + +### 11.5 No frontend code in this PR + +Per the user's instruction during the spec discussion, frontend work is a separate session. The above sketches are appendix material so reviewers can confirm the backend contract is sufficient for the eventual frontend implementation. + +--- + +## 12. Service discovery, environment, and Docker networking + +The cross-repo orchestration substrate already exists as [`fluent-platform`](../../fluent-platform/README.md). Its [`compose.yaml`](../../fluent-platform/compose.yaml) brings up `db`, `api`, `worker`, `ai`, and `web` on a shared Docker/Podman network with service names usable as DNS, plus a shared PostgreSQL instance with role-based schema separation. This section describes how this PR plugs into that substrate and the small changes needed in fluent-api and fluent-platform. + +### 12.1 The two runtime modes + +Per [`fluent-platform/README.md`](../../fluent-platform/README.md), fluent-api runs in one of two modes: + +- **Ecosystem mode** — started via `./fluent.sh up` from `fluent-platform/`. fluent-ai is also up, reachable at `http://ai:8200` on the internal network (service name `ai` from [`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) line 82). +- **Standalone mode** — started via `./fapi.sh up` from `fluent-api/`. fluent-ai is *not* running unless the dev started it separately. fluent-api needs to gracefully report unavailability rather than crash. + +Both modes are first-class. The integration must work in both. + +### 12.2 Env vars (fluent-api side) + +Two new entries in [`fluent-api/src/env.ts`](../src/env.ts): + +```ts +const envSchema = z.object({ + // ... existing ... + FLUENT_AI_URL: z.string().url(), // ecosystem mode: http://ai:8200 — standalone: http://localhost:8200 + FLUENT_AI_KEY: z.string().min(1), // dev value: fai_dev_admin +}); +``` + +Both are required (no defaults). Zod failure on boot prints a clear error and exits, matching how fluent-api already handles `DATABASE_URL`, `BETTER_AUTH_SECRET`, etc. + +### 12.3 `fluent-api/.env.example` additions + +```dotenv +# Fluent-AI integration +# Base URL of the fluent-ai service (no trailing slash, no /api/v1 suffix). +# - Ecosystem mode (via fluent-platform): http://ai:8200 +# - Standalone fluent-api against standalone fluent-ai: http://localhost:8200 +FLUENT_AI_URL=http://localhost:8200 + +# Shared API key for calling fluent-ai. Matches a row in fluent-ai's ai_api_keys table. +# Dev value seeded by fluent-ai: fai_dev_admin +FLUENT_AI_KEY=fai_dev_admin +``` + +The `.env.example` documents the standalone-mode default because that's the path a dev hits first when running `./fapi.sh up` and copying `.env.example` to `.env`. Ecosystem-mode overrides are applied at the platform-compose layer (§12.4). + +### 12.4 Companion change in fluent-platform + +[`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) currently passes fluent-api's `.env` verbatim via `env_file: ${API_CONTEXT:-../fluent-api}/.env`. To make ecosystem mode work regardless of what the dev wrote in `fluent-api/.env`, the platform compose should explicitly override the URL for the `api` service: + +```yaml + api: + # ... existing ... + env_file: ${API_CONTEXT:-../fluent-api}/.env + environment: + DATABASE_URL: postgres://postgres:postgres@db:5432/fluent + EXPORTS_DIR: /app/exports + # New entries: + FLUENT_AI_URL: http://ai:8200 + # FLUENT_AI_KEY intentionally NOT overridden here — sourced from fluent-api/.env, + # which must match fluent-ai's ai_api_keys seed (dev value: fai_dev_admin) +``` + +`FLUENT_AI_URL` is overridden because it's deployment-topology-dependent. `FLUENT_AI_KEY` is *not* overridden because it's a shared secret — the same value belongs in `fluent-api/.env` (for the caller) and in fluent-ai's `ai_api_keys` table (which the dev seed already populates). Overriding only on one side would invite drift. + +This is a small fluent-platform PR that should land alongside the fluent-api PR. Both repos ship together; the spec calls this out as a release-coordination item in §15. + +### 12.5 Startup ordering + +[`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) line 110–112 currently has `ai` declaring `depends_on: api: service_healthy`. So when the stack starts: + +1. `db` becomes healthy +2. `api` starts, becomes healthy +3. `ai` and `worker` and `web` start +4. Brief window where `api` is up but `ai` is still booting + +If a dev (or test) hits the `/ai/tools/...` endpoint during that window, fluent-api's `callFluentAi` will hit `ECONNREFUSED` and return `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. This is the correct behavior — no need for retries, no need to invert the `depends_on` direction. Worth noting only so reviewers don't mistake the 502 they see during startup for a bug. (An optional improvement: add an `ai` healthcheck and let `api` declare a soft dependency on it. Out of scope for this PR but a candidate for the fluent-platform follow-up.) + +### 12.6 Standalone-mode behavior when fluent-ai isn't running + +When a dev runs only `./fapi.sh up` without fluent-ai, the `/ai/tools/...` endpoints will return `502 Bad Gateway` with `code: AI_SERVICE_UNAVAILABLE`. This is acceptable: the rest of fluent-api works, and the dev sees a clear signal that they need to bring fluent-ai up (or switch to ecosystem mode) if they want to exercise the AI integration. + +### 12.7 README updates + +- **fluent-api's README** gains a short subsection under "Running locally" pointing to fluent-platform for ecosystem mode and explaining the standalone-mode caveat. +- **fluent-platform's README** has a Services table at line 61–68 listing `api`, `ai`, `web`, `worker`, `db`. The proposed compose change in §12.4 doesn't add new services so this table is unaffected, but the Environment Configuration section (line 166+) should mention that `FLUENT_AI_KEY` must be set in `fluent-api/.env` to enable the AI tools endpoints. + +### 12.8 What `callFluentAi` does *not* assume about networking + +The client is unaware of whether fluent-ai is at `localhost:8200`, `ai:8200`, `https://fluent-ai.internal.example.com`, or anywhere else. It reads `FLUENT_AI_URL` verbatim, appends `/api/v1/${toolPath}`, and POSTs. This means: + +- Switching from standalone to ecosystem mode is a single env var change (handled automatically by the platform compose override). +- Switching to a staging or production deployment is a single env var change. +- TLS works automatically if `FLUENT_AI_URL` starts with `https://` — `fetch` handles it. + +### 12.9 Production / deployment + +Per [`fluent-platform/README.md`](../../fluent-platform/README.md) §"Deployment (placeholder - not active 2026-05-08)", Azure Bicep templates live in [`fluent-platform/deploy/azure/`](../../fluent-platform/deploy/azure/) but aren't active yet. When production deployment lands, `FLUENT_AI_URL` and `FLUENT_AI_KEY` will be wired through the same environment-injection mechanism the rest of the app uses (Azure App Settings / Key Vault references). No fluent-api code change is required for that transition. + +--- + +## 13. Testing strategy + +Per **D11**, the test footprint mirrors the existing fluent-api conventions. Three layers: + +### 13.1 Unit tests — `callFluentAi` + +File: `fluent-api/src/lib/services/fluent-ai/fluent-ai.client.test.ts` + +Test surface, all with `global.fetch` stubbed via `vi.spyOn(global, 'fetch')`: + +- Happy path: completed envelope → returns `Result.ok(envelope)`. +- Happy path: queued envelope → returns `Result.ok(envelope)` (the route layer, not the client, decides 200 vs 202). +- Failed envelope (`status: "failed"`) → returns `Result.err({ code: AI_TOOL_EXECUTION_FAILED, ... })`. +- Cancelled envelope → returns `Result.err({ code: AI_TOOL_EXECUTION_FAILED, ... })`. +- fluent-ai returns 4xx → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- fluent-ai returns 5xx → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- `fetch` rejects (network error) → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- Response body fails JSON parse → `Result.err({ code: AI_SERVICE_UNAVAILABLE, message contains "malformed" })`. +- Response envelope passes parsing but `result` field fails the result schema → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- Default 30s timeout fires via fake timers → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- Caller-supplied `AbortSignal` triggers → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- Request shape: `X-API-Key` header is present, equals `env.FLUENT_AI_KEY`, `Content-Type` is `application/json`, URL is `${FLUENT_AI_URL}/api/v1/${toolPath}`. + +### 13.2 Domain tests — `ai-tools.route.ts` + +File: `fluent-api/src/domains/ai-tools/ai-tools.route.test.ts` + +Test surface, modeled on the existing route tests like [`fluent-api/src/domains/translated-verses/translated-verses.route.test.ts`](../src/domains/translated-verses/translated-verses.route.test.ts): + +- Unauthenticated request → 401. +- Authenticated but missing `AI_TOOLS_USE` → 403. +- Invalid request body (e.g. empty `verses`) → 400 with Zod details. +- Authenticated + permitted + valid body + happy-path mock of `callRepeatedWords` returning completed envelope → 200, envelope passed through verbatim. +- Same but mock returns queued envelope → 202, envelope passed through. +- Same but mock returns failed envelope → 502, error body. +- Same but mock returns transport error → 502, error body. +- Mock is asserted to have been called with the exact request body the caller sent (verifies no enrichment). + +### 13.3 Smoke test — `scripts/smoke-repeated-words.ts` + +A standalone script mirroring [`fluent-ai/scripts/smoke_repeated_words.py`](../../fluent-ai/scripts/smoke_repeated_words.py). Runs from the host against a live fluent-api + fluent-ai pair, posts a known-good body, and asserts: + +- Returns 200 (today; 202 once fluent-ai goes async). +- Envelope `status` is `completed` (today). +- `result.findings` is an array. +- `result.summary.total_findings` equals `result.findings.length`. + +Invoked via an npm script: `npm run smoke:repeated-words`. Not part of `npm test` (it requires a live stack). Documented in fluent-api's README alongside the existing dev workflow. + +### 13.4 What is *not* covered + +- **No end-to-end fluent-web → fluent-api → fluent-ai test.** That's a frontend concern that will land with the frontend PR. +- **No load tests** for the polling endpoint (which doesn't exist yet on either side). +- **No contract tests** auto-generated from fluent-ai's OpenAPI spec. This would be valuable, but introducing a contract-testing framework (Pact, openapi-typescript code generation, etc.) is its own decision worth a separate spec. For now, the Zod schemas in fluent-api are the contract, hand-maintained against [`fluent-ai/src/app/schemas/greek_room.py`](../../fluent-ai/src/app/schemas/greek_room.py) and [`fluent-ai/src/app/schemas/tool_job.py`](../../fluent-ai/src/app/schemas/tool_job.py). + +### 13.5 Test infrastructure inherited + +- Vitest config in [`fluent-api/vitest.config.ts`](../vitest.config.ts) — no changes. +- Existing test helpers in `fluent-api/src/tests/` (auth fixtures, request helpers) — reused as-is for the domain tests. +- No new test dependencies. + +--- + +## 14. Future work + +Items that are out of scope for this PR but enabled by the foundations laid here. None of these is blocked on a redesign; they all plug into the same `callFluentAi` / `ToolJobResponse` shape. + +### 14.1 The polling endpoint and slow tools + +When fluent-ai introduces a tool that justifies the queue substrate (per fluent-ai decision **D1**, currently deferred), it will ship: + +- A backing `ai.tool_jobs` table. +- An in-process worker for execution. +- `GET /api/v1/tools/jobs/{job_id}` for status polling. + +The matching fluent-api work is small: + +- Add `pollToolJob(jobId, resultSchema)` sibling to `callFluentAi` in [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts). +- Add `GET /ai/tools/jobs/{job_id}` route in [`fluent-api/src/domains/ai-tools/ai-tools.route.ts`](../src/domains/ai-tools/ai-tools.route.ts) with the same `authenticateUser + requirePermission(AI_TOOLS_USE)` middleware. +- No DB persistence needed on the fluent-api side — fluent-api remains a thin pass-through; the job state of record lives in fluent-ai's `ai.tool_jobs` table. + +### 14.2 Frontend hook and editor squiggles + +A separate PR against fluent-web will introduce the `useToolJob` + `useRepeatedWords` hooks sketched in §11.4, then drive editor squiggle UI from the `findings` array. The backend surface is already shaped to feed that UI directly (`snt_id`, `surf`, `start_position`, `severity` on each finding). + +### 14.3 Additional Greek-Room checks + +Greek-Room exposes other static-analysis tools (punctuation, untranslated text, character-set sanity, etc.). Each will land in fluent-ai as a sibling tool, then surface in fluent-api with the same five-line pattern shown in §7.5. No new mechanism needed. + +### 14.4 Other AI tool families + +The same pattern absorbs LLM drafting, embeddings, fine-tuning, and any other tool family fluent-ai grows into. The naming convention `tools/{family}/{tool-name}` (e.g. `tools/openai/draft-suggestion`, `tools/embeddings/similarity`) keeps OpenAPI documentation organized. + +### 14.5 Per-user attribution + +Today fluent-ai sees a single shared identity (`FLUENT_AI_KEY`). If audit / billing / rate-limiting needs per-user attribution later, fluent-api can pass an opaque `X-Requested-By` header carrying the BetterAuth user ID. fluent-ai logs it; no change to the request body. + +### 14.6 Caching for idempotent tools + +`callFluentAi` is intentionally cache-free today. Some future tools may be both expensive and deterministic on their input — in which case a `(toolPath, hash(body))` cache (in-memory or Redis) makes sense. Drops in at the `callFluentAi` layer without changing call sites. + +### 14.7 Retries on transport failure + +Currently `callFluentAi` does not retry on network errors. If experience shows transient failures are common, a `withRetry` wrapper (analogous to [`withDatabaseRetry`](../src/lib/db-retry.ts)) can be added at the client level. Out of scope today because the failure mode of the only tool is "semantic," not "flaky." + +### 14.8 MCP facade + +A future Model Context Protocol facade (referenced as out-of-scope in [`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md)) could be layered over fluent-ai. fluent-api would call it via `callFluentAi` exactly as today — the only difference is the base URL. + +### 14.9 fluent-platform refinements + +Two small, optional improvements identified while writing this spec: + +- Add a healthcheck to the `ai` service in [`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) and let `api` declare a soft dependency on it. Would eliminate the brief startup window where the AI endpoints return 502. Not pursued in this PR because the 502 response is already graceful. +- Document the `FLUENT_AI_KEY` ↔ fluent-ai `ai_api_keys` table relationship in [`fluent-platform/docs/`](../../fluent-platform/docs/) for new developers. + +--- + +## 15. Open questions for reviewer + +These are the items the spec discussion landed on but where reviewer pushback would meaningfully change the outcome. Each one has a recommended position (the doc reflects this); each one can be flipped without restructuring the rest of the proposal. + +### 15.1 URL layout: is `POST /ai/tools/greek-room/repeated-words` the right shape? + +**Recommended:** Yes — see **D2** and §5. + +**Alternatives:** +- `POST /checks/repeated-words` — closer to the verbiage we use elsewhere ("checks" rather than "tools"). Downside: hides the network-bound, possibly-async nature of these endpoints. +- `POST /chapter-assignments/{id}/checks/repeated-words` — nests the check under the resource it operates on. Rejected because it requires fluent-api to enrich the request body from `chapter_assignment_id` → verses + language metadata, which couples fluent-api to fluent-ai's input schema (rejected by **D8**). +- `POST /tools/dispatch` with `{tool: "...", params: {...}}` — collapses the type system at the wire boundary. Same reason fluent-ai rejected this (see [`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md) §1). + +**Decision needed from reviewer:** confirm `/ai/tools/{family}/{tool}` or push back with a preference. + +### 15.2 Permission: `PERMISSIONS.AI_TOOLS_USE` as a string-value alias of `CONTENT_UPDATE`? + +**Recommended:** Yes, alias — see **D10** and §9.3. + +**Alternatives:** +- Introduce a real new permission row in the `permissions` table with its own role mappings. Requires a migration and seed update. Gives nothing user-visible today but is the "cleaner" RBAC story. +- Reuse `PERMISSIONS.CONTENT_UPDATE` directly at the call site (no alias). Loses the documentary value of seeing "AI_TOOLS_USE" at the route. + +**Decision needed from reviewer:** confirm the alias approach or push back for either of the alternatives. + +### 15.3 Envelope pass-through vs. unwrapping `result` for the sync case? + +**Recommended:** Pass through the full `ToolJobResponse` — see **D9** and §8.2. + +**Alternatives:** +- For the synchronous case only, return just the `result` field (i.e. `{findings, summary}`) and 200, reserving the envelope for when fluent-ai goes async. Simpler today; mildly more breaking when polling lands. +- Pass through always but add a thin `result_only` query parameter for callers that want the unwrapped shape. Adds API surface for negligible benefit. + +**Decision needed from reviewer:** confirm pass-through or push back for unwrap-now-envelope-later. + +### 15.4 No request enrichment vs. server-side context augmentation? + +**Recommended:** No enrichment — see **D8** and §8.1. + +**Alternatives:** +- fluent-api looks up `chapter_assignment_id` (or `project_id`) and adds verses + language metadata server-side. Caller sends a thin reference, fluent-api fattens it before forwarding. Trades client flexibility for harder-to-spoof inputs. +- Hybrid: caller sends the full body, fluent-api *validates* certain fields against its own data (e.g. confirms the caller has access to that `project_id`). Lighter than full enrichment. + +**Decision needed from reviewer:** confirm no enrichment, or push back for either alternative. + +### 15.5 Anything else the reviewer wants surfaced + +If reviewers identify a concern not captured above, please raise it as a comment on the PR. The relevant pre-decisions are summarized in §3 and the rationale is in the predecessor docs ([`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md), [`fluent-ai/greek-room-integration-suggestion.md`](../../fluent-ai/greek-room-integration-suggestion.md), [`fluent-ai/greek-room-integration-decisions.md`](../../fluent-ai/greek-room-integration-decisions.md)). + +--- + + diff --git a/proposals/ai-tools-integration-summary.md b/proposals/ai-tools-integration-summary.md new file mode 100644 index 0000000..0d6482a --- /dev/null +++ b/proposals/ai-tools-integration-summary.md @@ -0,0 +1,30 @@ +# AI-Tools Integration on fluent-api — Architecture Review Summary + +**Purpose:** Reviewer orientation for the proposed AI-tools integration. Long-form proposal lives in the sibling [`ai-tools-integration-suggestion.md`](ai-tools-integration-suggestion.md) if more detail is wanted; this summary is intended to stand on its own. Ships as a coordinated pair of PRs — fluent-api (the bulk) plus a small fluent-platform PR adding one compose env-var override (per **D12**). + +## What's being proposed + +Expose fluent-ai's Greek-Room *Repeated Words* check through fluent-api as the first AI-tool endpoint, using a pattern designed to absorb every future AI tool (LLM drafting, embeddings, other Greek-Room checks) with a five-line per-tool wrapper. + +## Core architectural decisions for review + +1. **New top-level namespace `POST /ai/tools/greek-room/repeated-words`** — introduces `/ai/` as fluent-api's first service-family prefix, signaling "network-bound, possibly async." Per-tool URL preserves OpenAPI type-safety; a generic `/dispatch` endpoint was rejected for the same reasons fluent-ai rejected it. Leaves room for a future `GET /ai/tools/jobs/{job_id}` polling endpoint without colliding with the existing pg-boss `/jobs/{id}` route under `/usfm`. + +2. **One shared utility, `callFluentAi(toolPath, body, schema)`** — a higher-order async wrapper at `lib/services/fluent-ai/`, modeled on the existing Mailgun service and `withDatabaseRetry` patterns. Each tool gets a typed wrapper in `domains/ai-tools/`; adding a new tool is two files, three to ten lines plus schemas. + +3. **Envelope pass-through** — fluent-api forwards `ToolJobResponse[T]` from fluent-ai to fluent-web verbatim (`status`, `job_id`, `result`, `error`, timestamps). Same hook code handles sync `completed` today and `queued → polled → completed` tomorrow. Polling lives in the browser via TanStack Query's `refetchInterval`, not on fluent-api. + +4. **No request enrichment** — body forwarded to fluent-ai verbatim, including `lang_code`, `project_id`, `verses[]`. Avoids coupling fluent-api to fluent-ai's input schema. + +5. **Reuse existing fluent-api substrate** — BetterAuth session + `requirePermission` for caller auth; a single shared `FLUENT_AI_KEY` (env-driven) for the fluent-api → fluent-ai hop; `Result` + `getHttpStatus` for errors (two new codes: `AI_SERVICE_UNAVAILABLE` and `AI_TOOL_EXECUTION_FAILED`, both → 502); fluent-platform's existing compose network for service discovery (`http://ai:8200`). + +## Explicitly out of scope (deferred) + +Polling endpoint on either side, DB persistence of tool runs, frontend hooks and graphical UI, rate limits, request-size limits, MCP facade, SSE/WebSocket streaming, contract tests, per-user attribution, caching, transport retries. + +## Areas where input would be most valuable + +1. **URL layout** — is `POST /ai/tools/greek-room/repeated-words` the right shape, vs. `/checks/repeated-words` or nesting under `/chapter-assignments/{id}/`? +2. **Permission alias** — `PERMISSIONS.AI_TOOLS_USE` as a string-value alias of `CONTENT_UPDATE`, vs. a real new permission row with migration + seeding? +3. **Envelope pass-through** — return the full `ToolJobResponse` today, vs. unwrap `result` for the sync case and reshape later when polling lands? +4. **No request enrichment** — forward verbatim, vs. server-side lookup of `chapter_assignment_id` → verses, vs. a validation-only hybrid? From f3c4d55dfc0dac9c2c730bc27983ebbfa937d110 Mon Sep 17 00:00:00 2001 From: Joshua Lansford Date: Thu, 28 May 2026 11:05:47 -0400 Subject: [PATCH 2/9] Moved proposals into docs folder according to request. --- .../repeated-word-check}/ai-tools-integration-suggestion.md | 0 .../repeated-word-check}/ai-tools-integration-summary.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename {proposals => docs/proposals/repeated-word-check}/ai-tools-integration-suggestion.md (100%) rename {proposals => docs/proposals/repeated-word-check}/ai-tools-integration-summary.md (100%) diff --git a/proposals/ai-tools-integration-suggestion.md b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md similarity index 100% rename from proposals/ai-tools-integration-suggestion.md rename to docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md diff --git a/proposals/ai-tools-integration-summary.md b/docs/proposals/repeated-word-check/ai-tools-integration-summary.md similarity index 100% rename from proposals/ai-tools-integration-summary.md rename to docs/proposals/repeated-word-check/ai-tools-integration-summary.md From 1a9cbfae146b742293650f116910d7db3cf9d340 Mon Sep 17 00:00:00 2001 From: Joshua Lansford Date: Sun, 31 May 2026 11:52:18 -0400 Subject: [PATCH 3/9] docs: apply prettier formatting to repeated-word-check proposals Mechanical prettier --write only (no semantic changes): - emphasis markers *foo* -> _foo_ - markdown table separator padding - reflow of TS code samples inside fenced blocks --- .../ai-tools-integration-suggestion.md | 241 +++++++++--------- .../ai-tools-integration-summary.md | 2 +- 2 files changed, 121 insertions(+), 122 deletions(-) diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md index 338c37b..d23f935 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md @@ -1,7 +1,7 @@ # AI-Tools Integration on fluent-api — Proposal **Status:** Draft for review. -**Scope:** Extend fluent-api to expose AI tools implemented by fluent-ai, starting with Greek-Room's *Repeated Words* check. The exposed pattern is meant to absorb every future AI tool (LLM drafting, embeddings, fine-tuning, other Greek-Room checks) without renegotiating the contract. +**Scope:** Extend fluent-api to expose AI tools implemented by fluent-ai, starting with Greek-Room's _Repeated Words_ check. The exposed pattern is meant to absorb every future AI tool (LLM drafting, embeddings, fine-tuning, other Greek-Room checks) without renegotiating the contract. **Companion document:** [`fluent-api/proposals/ai-tools-integration-summary.md`](ai-tools-integration-summary.md) — short reviewer orientation. **Predecessors on the fluent-ai side:** [`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md), [`fluent-ai/greek-room-integration-suggestion.md`](../../fluent-ai/greek-room-integration-suggestion.md), [`fluent-ai/greek-room-integration-decisions.md`](../../fluent-ai/greek-room-integration-decisions.md). @@ -9,7 +9,7 @@ ## 1. Background -fluent-ai is the Python/FastAPI backend dedicated to AI-tool integrations. It has merged its first such integration — Greek-Room's *Repeated Words* check — exposed at: +fluent-ai is the Python/FastAPI backend dedicated to AI-tool integrations. It has merged its first such integration — Greek-Room's _Repeated Words_ check — exposed at: ``` POST /api/v1/tools/greek-room/repeated-words @@ -18,7 +18,7 @@ Header: X-API-Key: with a `ToolJobResponse[RepeatedWordsResult]` envelope that already accommodates a future async-queue mode (`status: queued|running|completed|failed|cancelled`, `job_id`, `created_at`, `completed_at`). Today it always returns `status: "completed"` synchronously; the queue substrate is deferred until a slow tool needs it. See the predecessor documents linked above for the full architectural rationale. -fluent-api is the Node/TypeScript backend that fronts the editor (fluent-web). It currently has no awareness of fluent-ai. This proposal describes how to put the *Repeated Words* check on the menu by routing it through fluent-api, while shaping the integration so the next AI tool drops in with minimum effort. +fluent-api is the Node/TypeScript backend that fronts the editor (fluent-web). It currently has no awareness of fluent-ai. This proposal describes how to put the _Repeated Words_ check on the menu by routing it through fluent-api, while shaping the integration so the next AI tool drops in with minimum effort. The user-facing motivation is the editor: eventually each repeated word should get a corrective squiggle below it, similar to a spell-checker. That endgame is **out of scope for this PR**, but it sets the constraint that the surface fluent-api exposes must be cheap and re-callable per editor save, not stateful or session-coupled. @@ -26,12 +26,12 @@ The user-facing motivation is the editor: eventually each repeated word should g All four sibling projects live under the same GitHub org (`eten-tech-foundation`). Per fluent-platform's setup convention they are cloned side-by-side in the same parent directory. -| Repo | Remote | Role | -|------|--------|------| -| **fluent-api** | [github.com/eten-tech-foundation/fluent-api](https://github.com/eten-tech-foundation/fluent-api) | Node/TypeScript REST API (Hono + Drizzle + BetterAuth). The subject of this proposal. | -| **fluent-ai** | [github.com/eten-tech-foundation/fluent-ai](https://github.com/eten-tech-foundation/fluent-ai) | Python/FastAPI service hosting AI-tool integrations (Greek-Room, future LLM tools). The upstream we are calling into. | +| Repo | Remote | Role | +| ------------------- | ---------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| **fluent-api** | [github.com/eten-tech-foundation/fluent-api](https://github.com/eten-tech-foundation/fluent-api) | Node/TypeScript REST API (Hono + Drizzle + BetterAuth). The subject of this proposal. | +| **fluent-ai** | [github.com/eten-tech-foundation/fluent-ai](https://github.com/eten-tech-foundation/fluent-ai) | Python/FastAPI service hosting AI-tool integrations (Greek-Room, future LLM tools). The upstream we are calling into. | | **fluent-platform** | [github.com/eten-tech-foundation/fluent-platform](https://github.com/eten-tech-foundation/fluent-platform) | Container-first orchestrator. Owns the shared PostgreSQL, the unified compose stack, and helper scripts. Touched by this proposal — see §12.4. | -| **fluent-web** | [github.com/eten-tech-foundation/fluent-web](https://github.com/eten-tech-foundation/fluent-web) | React/Vite frontend (the editor). Not touched in this PR; the frontend hook is a follow-up. | +| **fluent-web** | [github.com/eten-tech-foundation/fluent-web](https://github.com/eten-tech-foundation/fluent-web) | React/Vite frontend (the editor). Not touched in this PR; the frontend hook is a follow-up. | Relative paths in this document (e.g. `../../fluent-platform/...`) assume the standard side-by-side layout that fluent-platform's setup script produces. @@ -40,6 +40,7 @@ Relative paths in this document (e.g. `../../fluent-platform/...`) assume the st ## 2. Scope of this PR **In scope (this PR):** + 1. A single new endpoint on fluent-api: `POST /ai/tools/greek-room/repeated-words`. 2. A shared utility — `callFluentAi(toolPath, body, schema)` — used by all per-tool routes to handle envelope unwrap, error translation, and (later) polling. 3. A new domain folder, `src/domains/ai-tools/`, containing routes/services/types for tool endpoints. @@ -48,6 +49,7 @@ Relative paths in this document (e.g. `../../fluent-platform/...`) assume the st 6. Tests mirroring the existing fluent-api test patterns plus one smoke test runnable from the host. **Explicitly deferred (future PRs):** + - Async job polling endpoint on fluent-api (`GET /ai/tools/jobs/{job_id}` or similar). Not built because fluent-ai also has not built the corresponding endpoint yet — both sides chose "lightweight now" per fluent-ai decision **D1**. - Frontend (fluent-web) hooks and squiggle UI. Frontend is a separate session/PR. - DB persistence of tool runs / findings. No `ai_tool_runs` or `check_results` table is introduced. @@ -60,20 +62,20 @@ Relative paths in this document (e.g. `../../fluent-platform/...`) assume the st These are the decisions captured during the spec discussion. Each is restated here so reviewers can discuss the conclusion without reading the supporting analysis. -| # | Decision | Short rationale | -|---|----------|-----------------| -| **D1** | PR scope is "minimum proxy" — no DB persistence, no job queue exercised in this PR. | Repeated-words is fast (<1s) and re-runnable; persistence is not motivated by this tool. Defer until a slow tool justifies a `ai_tool_runs` table. | -| **D2** | URL is `POST /ai/tools/greek-room/repeated-words`. | Introduces `/ai/` as fluent-api's first top-level service-family namespace. Telegraphs "network-bound, potentially slow, possibly async" — characteristics that local CRUD endpoints don't share. Per-tool URL preserves OpenAPI type-safety. Alternatives: `/checks/repeated-words` (more in convention but hides the proxy nature), nested under `/chapter-assignments/{id}/` (requires server-side enrichment which we reject in D8). | -| **D3** | Polling lives in the *browser* via TanStack Query's `refetchInterval`, not in fluent-api. fluent-api is a thin pass-through for both kickoff and (future) polling. | Decouples slow tools from fluent-api's request budget. Aligns with fluent-web's existing TanStack Query usage. The polling code path is not exercised today because fluent-ai always returns `status: "completed"` synchronously. | -| **D4** | File layout: shared utility at [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts); per-tool routes/services in [`fluent-api/src/domains/ai-tools/`](../src/domains/ai-tools/). One route file for all tools; per-tool Zod schemas keep OpenAPI documentation fully typed. | Mirrors the existing [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../src/lib/services/notifications/mailgun.service.ts) pattern for "free functions wrapping a third-party API" and the existing [`fluent-api/src/lib/db-retry.ts`](../src/lib/db-retry.ts) pattern for "higher-order utility used by many call sites." Avoids a single one-size-fits-all dispatcher that would degrade OpenAPI schemas to `dict[str, Any]`. | -| **D5** | Service discovery / docker networking is handled by the existing [`fluent-platform`](../../fluent-platform/README.md) orchestrator. This PR adds two env vars on the fluent-api side and one `environment:` override on the fluent-platform side (`FLUENT_AI_URL: http://ai:8200`). See §12. | fluent-platform already wires `db`, `api`, `worker`, `ai`, `web` together on a shared network; we plug in to that substrate rather than invent a new one. | -| **D6** | A single shared `FLUENT_AI_KEY` is provisioned for the fluent-api → fluent-ai hop. If another consumer of fluent-ai appears later, it gets its own key. | Per-user keys give zero security benefit at this layer (everyone going through fluent-api is already authenticated to fluent-api). Single key minimizes IT complexity. | -| **D7** | Error translation specifics deferred to implementation. If conformity between the two error systems is awkward, prefer harmonizing fluent-ai toward fluent-api's patterns rather than the other way. | At the spec level there are no hard constraints; the safe defaults (5xx from fluent-ai → 502 on fluent-api with `ErrorCode.AI_SERVICE_UNAVAILABLE`) are obvious. | -| **D8** | No request enrichment. fluent-api forwards the request body to fluent-ai verbatim. fluent-web sends the full `RepeatedWordsRequest` shape including `lang_code`, `lang_name`, `project_id`, `project_name`, `verses[]`. | Maximum flexibility for the caller. Avoids coupling fluent-api to fluent-ai's request schema (today and tomorrow). | -| **D9** | The full `ToolJobResponse` envelope is passed through to fluent-web unchanged. No unwrap to `result` for the synchronous case. | Forward-compatible with TanStack-based polling — the same hook code consumes the envelope today (`status: completed`) and tomorrow (`status: queued` → polled to `completed`). | -| **D10** | Auth on the new endpoint: introduce `PERMISSIONS.AI_TOOLS_USE` as an *alias* with the same underlying value as `CONTENT_UPDATE`. | Cosmetically separates "can edit content" from "can invoke AI tools" without making a real distinction yet. Trivial to peel apart later. | -| **D11** | A smoke test analogous to [`fluent-ai/scripts/smoke_repeated_words.py`](../../fluent-ai/scripts/smoke_repeated_words.py) is added, runnable from the host with both services up. | Lets devs verify the cross-service plumbing without running the full vitest suite. | -| **D12** | This work ships as a **coordinated pair of PRs**: one against fluent-api (the bulk of the work) and one small PR against fluent-platform (compose env-var override + 1–2 README lines). Either order of merge is fine; both should be ready for review together. | The fluent-platform PR is small and contains no logic, so it can land first to unblock ecosystem-mode dev. Reviewers should be able to read both PRs side-by-side. | +| # | Decision | Short rationale | +| ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **D1** | PR scope is "minimum proxy" — no DB persistence, no job queue exercised in this PR. | Repeated-words is fast (<1s) and re-runnable; persistence is not motivated by this tool. Defer until a slow tool justifies a `ai_tool_runs` table. | +| **D2** | URL is `POST /ai/tools/greek-room/repeated-words`. | Introduces `/ai/` as fluent-api's first top-level service-family namespace. Telegraphs "network-bound, potentially slow, possibly async" — characteristics that local CRUD endpoints don't share. Per-tool URL preserves OpenAPI type-safety. Alternatives: `/checks/repeated-words` (more in convention but hides the proxy nature), nested under `/chapter-assignments/{id}/` (requires server-side enrichment which we reject in D8). | +| **D3** | Polling lives in the _browser_ via TanStack Query's `refetchInterval`, not in fluent-api. fluent-api is a thin pass-through for both kickoff and (future) polling. | Decouples slow tools from fluent-api's request budget. Aligns with fluent-web's existing TanStack Query usage. The polling code path is not exercised today because fluent-ai always returns `status: "completed"` synchronously. | +| **D4** | File layout: shared utility at [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts); per-tool routes/services in [`fluent-api/src/domains/ai-tools/`](../src/domains/ai-tools/). One route file for all tools; per-tool Zod schemas keep OpenAPI documentation fully typed. | Mirrors the existing [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../src/lib/services/notifications/mailgun.service.ts) pattern for "free functions wrapping a third-party API" and the existing [`fluent-api/src/lib/db-retry.ts`](../src/lib/db-retry.ts) pattern for "higher-order utility used by many call sites." Avoids a single one-size-fits-all dispatcher that would degrade OpenAPI schemas to `dict[str, Any]`. | +| **D5** | Service discovery / docker networking is handled by the existing [`fluent-platform`](../../fluent-platform/README.md) orchestrator. This PR adds two env vars on the fluent-api side and one `environment:` override on the fluent-platform side (`FLUENT_AI_URL: http://ai:8200`). See §12. | fluent-platform already wires `db`, `api`, `worker`, `ai`, `web` together on a shared network; we plug in to that substrate rather than invent a new one. | +| **D6** | A single shared `FLUENT_AI_KEY` is provisioned for the fluent-api → fluent-ai hop. If another consumer of fluent-ai appears later, it gets its own key. | Per-user keys give zero security benefit at this layer (everyone going through fluent-api is already authenticated to fluent-api). Single key minimizes IT complexity. | +| **D7** | Error translation specifics deferred to implementation. If conformity between the two error systems is awkward, prefer harmonizing fluent-ai toward fluent-api's patterns rather than the other way. | At the spec level there are no hard constraints; the safe defaults (5xx from fluent-ai → 502 on fluent-api with `ErrorCode.AI_SERVICE_UNAVAILABLE`) are obvious. | +| **D8** | No request enrichment. fluent-api forwards the request body to fluent-ai verbatim. fluent-web sends the full `RepeatedWordsRequest` shape including `lang_code`, `lang_name`, `project_id`, `project_name`, `verses[]`. | Maximum flexibility for the caller. Avoids coupling fluent-api to fluent-ai's request schema (today and tomorrow). | +| **D9** | The full `ToolJobResponse` envelope is passed through to fluent-web unchanged. No unwrap to `result` for the synchronous case. | Forward-compatible with TanStack-based polling — the same hook code consumes the envelope today (`status: completed`) and tomorrow (`status: queued` → polled to `completed`). | +| **D10** | Auth on the new endpoint: introduce `PERMISSIONS.AI_TOOLS_USE` as an _alias_ with the same underlying value as `CONTENT_UPDATE`. | Cosmetically separates "can edit content" from "can invoke AI tools" without making a real distinction yet. Trivial to peel apart later. | +| **D11** | A smoke test analogous to [`fluent-ai/scripts/smoke_repeated_words.py`](../../fluent-ai/scripts/smoke_repeated_words.py) is added, runnable from the host with both services up. | Lets devs verify the cross-service plumbing without running the full vitest suite. | +| **D12** | This work ships as a **coordinated pair of PRs**: one against fluent-api (the bulk of the work) and one small PR against fluent-platform (compose env-var override + 1–2 README lines). Either order of merge is fine; both should be ready for review together. | The fluent-platform PR is small and contains no logic, so it can land first to unblock ecosystem-mode dev. Reviewers should be able to read both PRs side-by-side. | --- @@ -130,7 +132,7 @@ The interesting property: **the request/response shapes are identical** between POST /ai/tools/greek-room/repeated-words ``` -This introduces `/ai/` as fluent-api's first top-level service-family namespace. The full URL inventory survey conducted during the spec session is reproduced in [Appendix A](#appendix-a--fluent-api-url-inventory-at-time-of-writing). Today fluent-api's URLs are flat, plural-noun, unprefixed; nested URLs reflect ownership (`/projects/{id}/users`). There is no existing service-family namespace; `/usfm` *is not* a top-level prefix but a nested sub-resource under `/project-units/{id}`. +This introduces `/ai/` as fluent-api's first top-level service-family namespace. The full URL inventory survey conducted during the spec session is reproduced in [Appendix A](#appendix-a--fluent-api-url-inventory-at-time-of-writing). Today fluent-api's URLs are flat, plural-noun, unprefixed; nested URLs reflect ownership (`/projects/{id}/users`). There is no existing service-family namespace; `/usfm` _is not_ a top-level prefix but a nested sub-resource under `/project-units/{id}`. #### Why `/ai/tools/greek-room/repeated-words` over the alternatives @@ -202,13 +204,13 @@ A generic `POST /ai/dispatch` endpoint accepting `{tool: string, params: unknown ### 6.3 Why one route file for all tools instead of one per tool -`ai-tools.route.ts` co-locates every tool endpoint so adding a new tool requires touching exactly two files (`ai-tools.service.ts` for the wrapper, `ai-tools.route.ts` for the route + schemas). When this file becomes uncomfortably large (~5+ tools), a split by tool *family* — `ai-tools.greek-room.route.ts`, `ai-tools.openai.route.ts`, etc. — is the natural next step. Not warranted at one tool. +`ai-tools.route.ts` co-locates every tool endpoint so adding a new tool requires touching exactly two files (`ai-tools.service.ts` for the wrapper, `ai-tools.route.ts` for the route + schemas). When this file becomes uncomfortably large (~5+ tools), a split by tool _family_ — `ai-tools.greek-room.route.ts`, `ai-tools.openai.route.ts`, etc. — is the natural next step. Not warranted at one tool. --- ## 7. The shared utility: `callFluentAi` -The single piece of *new mechanism* this PR introduces is the function in [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts). +The single piece of _new mechanism_ this PR introduces is the function in [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts). ### 7.1 Signature @@ -218,13 +220,13 @@ import type { Result } from '@/lib/types'; import type { ToolJobResponse } from './fluent-ai.types'; export async function callFluentAi( - toolPath: string, // e.g. 'tools/greek-room/repeated-words' (no leading slash; no /api/v1) + toolPath: string, // e.g. 'tools/greek-room/repeated-words' (no leading slash; no /api/v1) body: TReq, - resultSchema: z.ZodType, // for runtime validation of the result field on success + resultSchema: z.ZodType, // for runtime validation of the result field on success options?: { - signal?: AbortSignal; // honored if caller wants timeout / cancellation - timeoutMs?: number; // default 30_000 - }, + signal?: AbortSignal; // honored if caller wants timeout / cancellation + timeoutMs?: number; // default 30_000 + } ): Promise>>; ``` @@ -236,7 +238,7 @@ export async function callFluentAi( - `X-API-Key: ${FLUENT_AI_KEY}` - body serialized as JSON 3. Honors the caller's `AbortSignal` if provided; otherwise applies a default 30-second timeout via a derived signal. (Tunable per-call.) -4. On HTTP-level success (2xx), parses the response body as `ToolJobResponse` and validates the `result` field against `resultSchema` *if and only if* `status === "completed"`. (When status is `queued|running`, `result` is `null` and is not validated.) +4. On HTTP-level success (2xx), parses the response body as `ToolJobResponse` and validates the `result` field against `resultSchema` _if and only if_ `status === "completed"`. (When status is `queued|running`, `result` is `null` and is not validated.) 5. Returns `{ ok: true, data: envelope }` — note this is the **full envelope**, not the unwrapped result. Callers that care only about the synchronous-completed case can `if (envelope.status === "completed") return envelope.result`. Callers that want to support the future polling case can inspect `envelope.status` and `envelope.job_id`. 6. On HTTP error (4xx/5xx), network error, parse error, or schema-validation error, returns `{ ok: false, error: {...} }` using the error mapping in §9. @@ -244,7 +246,7 @@ export async function callFluentAi( - It does not poll. A `pollUntilComplete: true` option, or a sibling `pollToolJob(jobId, resultSchema)` function, can be added in the future PR that ships the first slow tool. Today the polling code path is not in scope because fluent-ai has not yet shipped the polling endpoint either. - It does not cache. Each call is independent. Per-tool caching (e.g. memoizing on `(toolPath, hash(body))`) is a future optimization for expensive idempotent tools. -- It does not retry on transport failure. `withDatabaseRetry`-style retries are intentionally not applied because most AI tool failures are *semantic*, not *transport-flaky*. If a user-facing retry policy is wanted, it belongs at the route layer or in the frontend hook, not in this utility. +- It does not retry on transport failure. `withDatabaseRetry`-style retries are intentionally not applied because most AI tool failures are _semantic_, not _transport-flaky_. If a user-facing retry policy is wanted, it belongs at the route layer or in the frontend hook, not in this utility. ### 7.4 Why this shape @@ -270,13 +272,9 @@ import { } from './ai-tools.types'; export async function callRepeatedWords( - req: RepeatedWordsRequest, + req: RepeatedWordsRequest ): Promise>> { - return callFluentAi( - 'tools/greek-room/repeated-words', - req, - RepeatedWordsResultSchema, - ); + return callFluentAi('tools/greek-room/repeated-words', req, RepeatedWordsResultSchema); } ``` @@ -284,13 +282,9 @@ Adding a future tool (say, `coherence-check`) is the same five-line pattern: ```ts export async function callCoherenceCheck( - req: CoherenceCheckRequest, + req: CoherenceCheckRequest ): Promise>> { - return callFluentAi( - 'tools/some-family/coherence-check', - req, - CoherenceCheckResultSchema, - ); + return callFluentAi('tools/some-family/coherence-check', req, CoherenceCheckResultSchema); } ``` @@ -324,6 +318,7 @@ export type RepeatedWordsRequest = z.infer; ``` Notes: + - `project_id` is intentionally permissive (`string | number`) to match fluent-ai's Pydantic model, which accepts either. fluent-api's own `project.id` is an integer. - `verses` is required and non-empty (`.min(1)`) so we can fail fast at the route layer rather than incur a round-trip to fluent-ai for a trivially-invalid request. - The field naming uses fluent-ai's snake_case verbatim (`lang_code`, `snt_id`). This is a deliberate departure from fluent-api's camelCase elsewhere; the alternative (rename in fluent-api, re-rename in fluent-ai) buys nothing and risks drift. The OpenAPI docs make the snake_case visible to the frontend. @@ -335,26 +330,21 @@ Per **D9** (envelope pass-through), the response body is fluent-ai's `ToolJobRes ```ts // fluent-api/src/lib/services/fluent-ai/fluent-ai.types.ts -export type JobStatus = - | 'queued' - | 'running' - | 'completed' - | 'failed' - | 'cancelled'; +export type JobStatus = 'queued' | 'running' | 'completed' | 'failed' | 'cancelled'; export interface ToolJobError { - type: string; // e.g. 'TOOL_EXECUTION_ERROR' + type: string; // e.g. 'TOOL_EXECUTION_ERROR' message: string; details?: unknown; } export interface ToolJobResponse { - job_id: string; // UUID - tool: string; // e.g. 'greek-room/repeated-words' + job_id: string; // UUID + tool: string; // e.g. 'greek-room/repeated-words' status: JobStatus; result: TResult | null; error: ToolJobError | null; - created_at: string; // ISO-8601 timestamp + created_at: string; // ISO-8601 timestamp completed_at: string | null; } ``` @@ -405,17 +395,17 @@ The `RepeatedWordsResponseSchema` is what the Hono route declares as its 200 res ### 8.3 Status codes from fluent-api -| Outcome | HTTP | Body | -|---------|------|------| -| Tool completed synchronously | `200 OK` | `ToolJobResponse` with `status: "completed"` | -| Tool started asynchronously (future) | `202 Accepted` | `ToolJobResponse` with `status: "queued"` | -| Caller not authenticated | `401 Unauthorized` | fluent-api's standard `Result` error | -| Caller authenticated but lacks `AI_TOOLS_USE` | `403 Forbidden` | fluent-api's standard `Result` error | -| Request body fails Zod validation | `400 Bad Request` | fluent-api's standard validation error | -| fluent-ai returns 4xx (bad request, auth failure, etc.) | `502 Bad Gateway` | fluent-api error with `code: AI_SERVICE_UNAVAILABLE` and the upstream message in `details` | -| fluent-ai returns 5xx | `502 Bad Gateway` | same as above | -| Network timeout / connection refused | `502 Bad Gateway` | same as above | -| Envelope `status === "failed"` from fluent-ai | `502 Bad Gateway` | fluent-api error with `code: AI_TOOL_EXECUTION_FAILED` and the envelope `error` propagated | +| Outcome | HTTP | Body | +| ------------------------------------------------------- | ------------------ | ------------------------------------------------------------------------------------------ | +| Tool completed synchronously | `200 OK` | `ToolJobResponse` with `status: "completed"` | +| Tool started asynchronously (future) | `202 Accepted` | `ToolJobResponse` with `status: "queued"` | +| Caller not authenticated | `401 Unauthorized` | fluent-api's standard `Result` error | +| Caller authenticated but lacks `AI_TOOLS_USE` | `403 Forbidden` | fluent-api's standard `Result` error | +| Request body fails Zod validation | `400 Bad Request` | fluent-api's standard validation error | +| fluent-ai returns 4xx (bad request, auth failure, etc.) | `502 Bad Gateway` | fluent-api error with `code: AI_SERVICE_UNAVAILABLE` and the upstream message in `details` | +| fluent-ai returns 5xx | `502 Bad Gateway` | same as above | +| Network timeout / connection refused | `502 Bad Gateway` | same as above | +| Envelope `status === "failed"` from fluent-ai | `502 Bad Gateway` | fluent-api error with `code: AI_TOOL_EXECUTION_FAILED` and the envelope `error` propagated | The 502 choice for upstream failures mirrors what fluent-ai itself does for its own upstream tool failures (`ToolExecutionException` → 502 per fluent-ai decision **D6**). It signals "this isn't a problem with the caller's request; the dependency is misbehaving." @@ -425,10 +415,10 @@ The 502 choice for upstream failures mirrors what fluent-ai itself does for its ### 9.1 Two distinct auth boundaries -| Boundary | Mechanism | Established by | Established when | -|----------|-----------|---------------|------------------| -| fluent-web → fluent-api | BetterAuth session cookie | This codebase, existing | Pre-existing | -| fluent-api → fluent-ai | Single shared `X-API-Key` | This PR, env-driven | This PR | +| Boundary | Mechanism | Established by | Established when | +| ----------------------- | ------------------------- | ----------------------- | ---------------- | +| fluent-web → fluent-api | BetterAuth session cookie | This codebase, existing | Pre-existing | +| fluent-api → fluent-ai | Single shared `X-API-Key` | This PR, env-driven | This PR | These boundaries do not bridge directly: there is no propagation of "user X is calling this tool" beyond fluent-api. Audit logs on the fluent-ai side will see the single shared identity. If per-user attribution is wanted later, the request envelope can carry an opaque `requested_by` claim — out of scope for this PR. @@ -440,10 +430,7 @@ These boundaries do not bridge directly: there is no propagation of "user X is c const repeatedWordsRoute = createRoute({ method: 'post', path: '/ai/tools/greek-room/repeated-words', - middleware: [ - authenticateUser, - requirePermission(PERMISSIONS.AI_TOOLS_USE), - ] as const, + middleware: [authenticateUser, requirePermission(PERMISSIONS.AI_TOOLS_USE)] as const, request: { body: { content: { @@ -470,7 +457,7 @@ const repeatedWordsRoute = createRoute({ ### 9.3 `PERMISSIONS.AI_TOOLS_USE` -Per **D10**, this is introduced as an *alias* of `CONTENT_UPDATE`: +Per **D10**, this is introduced as an _alias_ of `CONTENT_UPDATE`: ```ts // fluent-api/src/lib/permissions.ts (excerpt) @@ -478,7 +465,7 @@ Per **D10**, this is introduced as an *alias* of `CONTENT_UPDATE`: export const PERMISSIONS = { // ... existing permissions ... CONTENT_UPDATE: 'content:update', - AI_TOOLS_USE: 'content:update', // intentional alias + AI_TOOLS_USE: 'content:update', // intentional alias // ... } as const; ``` @@ -489,13 +476,13 @@ If reviewers prefer a real new permission row from day one, that's a defensible ### 9.4 The `X-API-Key` for fluent-ai -Per **D6**, fluent-api carries a single `FLUENT_AI_KEY` for *all* fluent-ai calls. The key is read once at module scope in `callFluentAi`. Rotation is "set new env, restart fluent-api"; fluent-ai supports multiple active keys per its existing `ai_api_keys` table, so old key + new key can coexist briefly during a rolling restart. +Per **D6**, fluent-api carries a single `FLUENT_AI_KEY` for _all_ fluent-ai calls. The key is read once at module scope in `callFluentAi`. Rotation is "set new env, restart fluent-api"; fluent-ai supports multiple active keys per its existing `ai_api_keys` table, so old key + new key can coexist briefly during a rolling restart. --- ## 10. Error translation -Per **D7**, the exact mapping is settled at implementation time, and if conformity work surfaces we prefer to harmonize fluent-ai toward fluent-api's patterns. This section describes the *minimum viable* mapping that the implementation should ship with; reviewers should challenge anything they want changed before coding starts. +Per **D7**, the exact mapping is settled at implementation time, and if conformity work surfaces we prefer to harmonize fluent-ai toward fluent-api's patterns. This section describes the _minimum viable_ mapping that the implementation should ship with; reviewers should challenge anything they want changed before coding starts. ### 10.1 New `ErrorCode` entries on fluent-api @@ -519,21 +506,21 @@ export const ErrorHttpStatus: Record = { }; ``` -`AI_SERVICE_UNAVAILABLE` covers transport-level / availability problems (network errors, 5xx from fluent-ai, schema parse errors, timeouts). `AI_TOOL_EXECUTION_FAILED` covers the case where fluent-ai successfully returned an envelope with `status: "failed"` — the dependency is *up* but the tool itself rejected the work. +`AI_SERVICE_UNAVAILABLE` covers transport-level / availability problems (network errors, 5xx from fluent-ai, schema parse errors, timeouts). `AI_TOOL_EXECUTION_FAILED` covers the case where fluent-ai successfully returned an envelope with `status: "failed"` — the dependency is _up_ but the tool itself rejected the work. ### 10.2 Mapping table -| Source | Translates to | -|--------|---------------| -| `fetch` throws (network down, DNS, connection refused) | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'fluent-ai unreachable', details: { cause: error.message } })` | -| `fetch` times out (default 30s) | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'fluent-ai request timed out', details: { timeoutMs } })` | -| fluent-ai returns 5xx | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: '', details: { status, body } })` | -| fluent-ai returns 4xx | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: '', details: { status, body } })` — yes, also 502 on our side; 4xx from fluent-ai represents a misconfiguration or a contract drift, neither of which is the *caller's* fault, so we shield them with 502 rather than relay a 4xx that they cannot act on | -| Response body fails JSON parse or envelope schema validation | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'malformed response from fluent-ai', details: { cause } })` | -| Envelope `status === "failed"` (fluent-ai reachable; tool refused) | `Result.err({ code: AI_TOOL_EXECUTION_FAILED, message: envelope.error?.message ?? 'tool execution failed', details: { type: envelope.error?.type, ... } })` | -| Envelope `status === "cancelled"` | Same as `failed` — propagate `AI_TOOL_EXECUTION_FAILED` | -| Envelope `status === "completed"` | `Result.ok(envelope)` | -| Envelope `status === "queued"` or `"running"` | `Result.ok(envelope)` — the route layer decides whether to return 200 or 202 based on `status` | +| Source | Translates to | +| ------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `fetch` throws (network down, DNS, connection refused) | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'fluent-ai unreachable', details: { cause: error.message } })` | +| `fetch` times out (default 30s) | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'fluent-ai request timed out', details: { timeoutMs } })` | +| fluent-ai returns 5xx | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: '', details: { status, body } })` | +| fluent-ai returns 4xx | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: '', details: { status, body } })` — yes, also 502 on our side; 4xx from fluent-ai represents a misconfiguration or a contract drift, neither of which is the _caller's_ fault, so we shield them with 502 rather than relay a 4xx that they cannot act on | +| Response body fails JSON parse or envelope schema validation | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'malformed response from fluent-ai', details: { cause } })` | +| Envelope `status === "failed"` (fluent-ai reachable; tool refused) | `Result.err({ code: AI_TOOL_EXECUTION_FAILED, message: envelope.error?.message ?? 'tool execution failed', details: { type: envelope.error?.type, ... } })` | +| Envelope `status === "cancelled"` | Same as `failed` — propagate `AI_TOOL_EXECUTION_FAILED` | +| Envelope `status === "completed"` | `Result.ok(envelope)` | +| Envelope `status === "queued"` or `"running"` | `Result.ok(envelope)` — the route layer decides whether to return 200 or 202 based on `status` | ### 10.3 Route-level translation @@ -549,19 +536,22 @@ aiToolsRouter.openapi(repeatedWordsRoute, async (c) => { if (!result.ok) { return c.json( { error: result.error.message, code: result.error.code, details: result.error.details }, - getHttpStatus(result.error), + getHttpStatus(result.error) ); } const envelope = result.data; - const status = envelope.status === 'completed' || envelope.status === 'failed' || envelope.status === 'cancelled' - ? 200 - : 202; + const status = + envelope.status === 'completed' || + envelope.status === 'failed' || + envelope.status === 'cancelled' + ? 200 + : 202; return c.json(envelope, status); }); ``` -### 10.4 What's intentionally *not* in here +### 10.4 What's intentionally _not_ in here - **No automatic retries** on transport failure. The caller (or the frontend hook) decides. - **No structured "user-facing-vs-internal" error categorization** beyond the `code + message + details` shape that fluent-api already uses everywhere. fluent-web is expected to display `error.message` directly and surface `error.details` only to logged-in admins. @@ -588,7 +578,7 @@ GET /api/v1/tools/jobs/{job_id} → ToolJobResponse with current status and (if completed) result ``` -Returns 200 in all states (queued/running/completed/failed/cancelled). The HTTP status is *not* used to communicate terminal vs. non-terminal — only the envelope's `status` field is. +Returns 200 in all states (queued/running/completed/failed/cancelled). The HTTP status is _not_ used to communicate terminal vs. non-terminal — only the envelope's `status` field is. ### 11.2 fluent-api's pass-through polling endpoint (future) @@ -605,7 +595,7 @@ Implementation will be a second helper alongside `callFluentAi`: // future, not in this PR export async function pollToolJob( jobId: string, - resultSchema: z.ZodType, + resultSchema: z.ZodType ): Promise>>; ``` @@ -620,7 +610,7 @@ Per **D3**. The detailed reasoning, repeated for completeness: ### 11.4 What the frontend hook will look like (out of scope, sketched) -This is *not* part of this PR, but is sketched here so reviewers can see that the backend contract is consumable. +This is _not_ part of this PR, but is sketched here so reviewers can see that the backend contract is consumable. ```ts // fluent-web/src/lib/api/useToolJob.ts (future) @@ -628,18 +618,25 @@ This is *not* part of this PR, but is sketched here so reviewers can see that th import { useQuery } from '@tanstack/react-query'; import type { ToolJobResponse } from './tool-job-types'; -const TERMINAL: Set['status']> = new Set(['completed', 'failed', 'cancelled']); +const TERMINAL: Set['status']> = new Set([ + 'completed', + 'failed', + 'cancelled', +]); export function useToolJob( jobId: string | null, - opts?: { pollIntervalMs?: number; enabled?: boolean }, + opts?: { pollIntervalMs?: number; enabled?: boolean } ) { return useQuery>({ queryKey: ['ai-tools', 'jobs', jobId], queryFn: () => - fetch(`${config.api.url}/ai/tools/jobs/${jobId}`, { credentials: 'include' }).then(r => r.json()), + fetch(`${config.api.url}/ai/tools/jobs/${jobId}`, { credentials: 'include' }).then((r) => + r.json() + ), enabled: !!jobId && (opts?.enabled ?? true), - refetchInterval: (q) => (q.state.data && TERMINAL.has(q.state.data.status)) ? false : (opts?.pollIntervalMs ?? 1500), + refetchInterval: (q) => + q.state.data && TERMINAL.has(q.state.data.status) ? false : (opts?.pollIntervalMs ?? 1500), }); } ``` @@ -657,7 +654,7 @@ export function useRepeatedWords() { credentials: 'include', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(req), - }).then(r => r.json() as Promise>), + }).then((r) => r.json() as Promise>), onSuccess: (envelope) => { if (envelope.status === 'queued' || envelope.status === 'running') { setPendingJobId(envelope.job_id); @@ -689,7 +686,7 @@ The cross-repo orchestration substrate already exists as [`fluent-platform`](../ Per [`fluent-platform/README.md`](../../fluent-platform/README.md), fluent-api runs in one of two modes: - **Ecosystem mode** — started via `./fluent.sh up` from `fluent-platform/`. fluent-ai is also up, reachable at `http://ai:8200` on the internal network (service name `ai` from [`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) line 82). -- **Standalone mode** — started via `./fapi.sh up` from `fluent-api/`. fluent-ai is *not* running unless the dev started it separately. fluent-api needs to gracefully report unavailability rather than crash. +- **Standalone mode** — started via `./fapi.sh up` from `fluent-api/`. fluent-ai is _not_ running unless the dev started it separately. fluent-api needs to gracefully report unavailability rather than crash. Both modes are first-class. The integration must work in both. @@ -700,8 +697,8 @@ Two new entries in [`fluent-api/src/env.ts`](../src/env.ts): ```ts const envSchema = z.object({ // ... existing ... - FLUENT_AI_URL: z.string().url(), // ecosystem mode: http://ai:8200 — standalone: http://localhost:8200 - FLUENT_AI_KEY: z.string().min(1), // dev value: fai_dev_admin + FLUENT_AI_URL: z.string().url(), // ecosystem mode: http://ai:8200 — standalone: http://localhost:8200 + FLUENT_AI_KEY: z.string().min(1), // dev value: fai_dev_admin }); ``` @@ -728,19 +725,19 @@ The `.env.example` documents the standalone-mode default because that's the path [`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) currently passes fluent-api's `.env` verbatim via `env_file: ${API_CONTEXT:-../fluent-api}/.env`. To make ecosystem mode work regardless of what the dev wrote in `fluent-api/.env`, the platform compose should explicitly override the URL for the `api` service: ```yaml - api: - # ... existing ... - env_file: ${API_CONTEXT:-../fluent-api}/.env - environment: - DATABASE_URL: postgres://postgres:postgres@db:5432/fluent - EXPORTS_DIR: /app/exports - # New entries: - FLUENT_AI_URL: http://ai:8200 - # FLUENT_AI_KEY intentionally NOT overridden here — sourced from fluent-api/.env, - # which must match fluent-ai's ai_api_keys seed (dev value: fai_dev_admin) +api: + # ... existing ... + env_file: ${API_CONTEXT:-../fluent-api}/.env + environment: + DATABASE_URL: postgres://postgres:postgres@db:5432/fluent + EXPORTS_DIR: /app/exports + # New entries: + FLUENT_AI_URL: http://ai:8200 + # FLUENT_AI_KEY intentionally NOT overridden here — sourced from fluent-api/.env, + # which must match fluent-ai's ai_api_keys seed (dev value: fai_dev_admin) ``` -`FLUENT_AI_URL` is overridden because it's deployment-topology-dependent. `FLUENT_AI_KEY` is *not* overridden because it's a shared secret — the same value belongs in `fluent-api/.env` (for the caller) and in fluent-ai's `ai_api_keys` table (which the dev seed already populates). Overriding only on one side would invite drift. +`FLUENT_AI_URL` is overridden because it's deployment-topology-dependent. `FLUENT_AI_KEY` is _not_ overridden because it's a shared secret — the same value belongs in `fluent-api/.env` (for the caller) and in fluent-ai's `ai_api_keys` table (which the dev seed already populates). Overriding only on one side would invite drift. This is a small fluent-platform PR that should land alongside the fluent-api PR. Both repos ship together; the spec calls this out as a release-coordination item in §15. @@ -764,7 +761,7 @@ When a dev runs only `./fapi.sh up` without fluent-ai, the `/ai/tools/...` endpo - **fluent-api's README** gains a short subsection under "Running locally" pointing to fluent-platform for ecosystem mode and explaining the standalone-mode caveat. - **fluent-platform's README** has a Services table at line 61–68 listing `api`, `ai`, `web`, `worker`, `db`. The proposed compose change in §12.4 doesn't add new services so this table is unaffected, but the Environment Configuration section (line 166+) should mention that `FLUENT_AI_KEY` must be set in `fluent-api/.env` to enable the AI tools endpoints. -### 12.8 What `callFluentAi` does *not* assume about networking +### 12.8 What `callFluentAi` does _not_ assume about networking The client is unaware of whether fluent-ai is at `localhost:8200`, `ai:8200`, `https://fluent-ai.internal.example.com`, or anywhere else. It reads `FLUENT_AI_URL` verbatim, appends `/api/v1/${toolPath}`, and POSTs. This means: @@ -827,7 +824,7 @@ A standalone script mirroring [`fluent-ai/scripts/smoke_repeated_words.py`](../. Invoked via an npm script: `npm run smoke:repeated-words`. Not part of `npm test` (it requires a live stack). Documented in fluent-api's README alongside the existing dev workflow. -### 13.4 What is *not* covered +### 13.4 What is _not_ covered - **No end-to-end fluent-web → fluent-api → fluent-ai test.** That's a frontend concern that will land with the frontend PR. - **No load tests** for the polling endpoint (which doesn't exist yet on either side). @@ -905,6 +902,7 @@ These are the items the spec discussion landed on but where reviewer pushback wo **Recommended:** Yes — see **D2** and §5. **Alternatives:** + - `POST /checks/repeated-words` — closer to the verbiage we use elsewhere ("checks" rather than "tools"). Downside: hides the network-bound, possibly-async nature of these endpoints. - `POST /chapter-assignments/{id}/checks/repeated-words` — nests the check under the resource it operates on. Rejected because it requires fluent-api to enrich the request body from `chapter_assignment_id` → verses + language metadata, which couples fluent-api to fluent-ai's input schema (rejected by **D8**). - `POST /tools/dispatch` with `{tool: "...", params: {...}}` — collapses the type system at the wire boundary. Same reason fluent-ai rejected this (see [`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md) §1). @@ -916,6 +914,7 @@ These are the items the spec discussion landed on but where reviewer pushback wo **Recommended:** Yes, alias — see **D10** and §9.3. **Alternatives:** + - Introduce a real new permission row in the `permissions` table with its own role mappings. Requires a migration and seed update. Gives nothing user-visible today but is the "cleaner" RBAC story. - Reuse `PERMISSIONS.CONTENT_UPDATE` directly at the call site (no alias). Loses the documentary value of seeing "AI_TOOLS_USE" at the route. @@ -926,6 +925,7 @@ These are the items the spec discussion landed on but where reviewer pushback wo **Recommended:** Pass through the full `ToolJobResponse` — see **D9** and §8.2. **Alternatives:** + - For the synchronous case only, return just the `result` field (i.e. `{findings, summary}`) and 200, reserving the envelope for when fluent-ai goes async. Simpler today; mildly more breaking when polling lands. - Pass through always but add a thin `result_only` query parameter for callers that want the unwrapped shape. Adds API surface for negligible benefit. @@ -936,8 +936,9 @@ These are the items the spec discussion landed on but where reviewer pushback wo **Recommended:** No enrichment — see **D8** and §8.1. **Alternatives:** + - fluent-api looks up `chapter_assignment_id` (or `project_id`) and adds verses + language metadata server-side. Caller sends a thin reference, fluent-api fattens it before forwarding. Trades client flexibility for harder-to-spoof inputs. -- Hybrid: caller sends the full body, fluent-api *validates* certain fields against its own data (e.g. confirms the caller has access to that `project_id`). Lighter than full enrichment. +- Hybrid: caller sends the full body, fluent-api _validates_ certain fields against its own data (e.g. confirms the caller has access to that `project_id`). Lighter than full enrichment. **Decision needed from reviewer:** confirm no enrichment, or push back for either alternative. @@ -946,5 +947,3 @@ These are the items the spec discussion landed on but where reviewer pushback wo If reviewers identify a concern not captured above, please raise it as a comment on the PR. The relevant pre-decisions are summarized in §3 and the rationale is in the predecessor docs ([`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md), [`fluent-ai/greek-room-integration-suggestion.md`](../../fluent-ai/greek-room-integration-suggestion.md), [`fluent-ai/greek-room-integration-decisions.md`](../../fluent-ai/greek-room-integration-decisions.md)). --- - - diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-summary.md b/docs/proposals/repeated-word-check/ai-tools-integration-summary.md index 0d6482a..56eae9e 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-summary.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-summary.md @@ -4,7 +4,7 @@ ## What's being proposed -Expose fluent-ai's Greek-Room *Repeated Words* check through fluent-api as the first AI-tool endpoint, using a pattern designed to absorb every future AI tool (LLM drafting, embeddings, other Greek-Room checks) with a five-line per-tool wrapper. +Expose fluent-ai's Greek-Room _Repeated Words_ check through fluent-api as the first AI-tool endpoint, using a pattern designed to absorb every future AI tool (LLM drafting, embeddings, other Greek-Room checks) with a five-line per-tool wrapper. ## Core architectural decisions for review From 0a72ceea4d0ea0b1924f6fb8fda3c3a803668d68 Mon Sep 17 00:00:00 2001 From: Joshua Lansford Date: Sun, 31 May 2026 12:43:44 -0400 Subject: [PATCH 4/9] docs: satisfy eslint import rules in proposal code samples The repo's @antfu/eslint-config (formatters: true) lints TS code blocks inside markdown. Adjust the illustrative import statements in the repeated-word-check proposal to satisfy perfectionist/sort-imports and import/consistent-type-specifier-style: - separate import groups with blank lines - hoist inline type specifiers to top-level 'import type' Hand-formatted (not raw eslint --fix output) so the samples stay readable. No prose changed. Verified: 'eslint .' and 'prettier --check .' both pass. --- .../ai-tools-integration-suggestion.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md index d23f935..3ac693f 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md @@ -216,7 +216,9 @@ The single piece of _new mechanism_ this PR introduces is the function in [`flue ```ts import { z } from '@hono/zod-openapi'; + import type { Result } from '@/lib/types'; + import type { ToolJobResponse } from './fluent-ai.types'; export async function callFluentAi( @@ -262,14 +264,14 @@ The point of `callFluentAi` is **not** to be the only function callers ever touc ```ts // fluent-api/src/domains/ai-tools/ai-tools.service.ts -import { callFluentAi } from '@/lib/services/fluent-ai/fluent-ai.client'; -import type { Result } from '@/lib/types'; import type { ToolJobResponse } from '@/lib/services/fluent-ai/fluent-ai.types'; -import { - type RepeatedWordsRequest, - type RepeatedWordsResult, - RepeatedWordsResultSchema, -} from './ai-tools.types'; +import type { Result } from '@/lib/types'; + +import { callFluentAi } from '@/lib/services/fluent-ai/fluent-ai.client'; + +import type { RepeatedWordsRequest, RepeatedWordsResult } from './ai-tools.types'; + +import { RepeatedWordsResultSchema } from './ai-tools.types'; export async function callRepeatedWords( req: RepeatedWordsRequest @@ -616,6 +618,7 @@ This is _not_ part of this PR, but is sketched here so reviewers can see that th // fluent-web/src/lib/api/useToolJob.ts (future) import { useQuery } from '@tanstack/react-query'; + import type { ToolJobResponse } from './tool-job-types'; const TERMINAL: Set['status']> = new Set([ From 00cebd98164a6b999d942de4391ed37f44503a3c Mon Sep 17 00:00:00 2001 From: Joshua Lansford Date: Sun, 31 May 2026 12:54:14 -0400 Subject: [PATCH 5/9] docs: document C++20 (CXXFLAGS) requirement for npm install on Node 24 --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index a8b1ac3..c3175a8 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,18 @@ The backend/server for the Fluent ecosystem built with Hono and OpenAPI. This is npm install ``` + > **Note (Node 24+):** A transitive dependency (`tree-sitter`, pulled in by + > `@antfu/eslint-config`'s markdown formatter) compiles a native addon + > against Node's V8 headers, which require C++20. If `npm install` fails + > with a `node-gyp` build error such as `error: "C++20 or later required."`, + > run the install with the C++20 standard enabled (this is what CI does): + > + > ```bash + > CXXFLAGS="-std=c++20" npm install + > ``` + > + > This requires a C++ toolchain (e.g. `build-essential` on Debian/Ubuntu). + 3. **Set up environment variables:** ```bash From f719287090111776ff73629f9b6f0f4ccbee8b52 Mon Sep 17 00:00:00 2001 From: Joshua Lansford Date: Tue, 2 Jun 2026 16:46:10 -0400 Subject: [PATCH 6/9] docs: record PR #173 reviewer resolutions for AI-tools proposal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture kaseywright's review outcomes (approved 2026-06-02) directly in the proposal docs so the decisions persist beyond the PR thread: - D8/§8.1: intentional snake_case divergence accepted; add in-code-comment guardrail - D10/§9.3: AI_TOOLS_USE permission alias confirmed; document for future reference - D9/§8.2: envelope pass-through confirmed, conditioned on standard web-client format - §15: mark open questions resolved, each linking its originating review comment --- .../ai-tools-integration-suggestion.md | 31 +++++++++++++++++-- .../ai-tools-integration-summary.md | 9 ++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md index 3ac693f..6506a03 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md @@ -71,9 +71,9 @@ These are the decisions captured during the spec discussion. Each is restated he | **D5** | Service discovery / docker networking is handled by the existing [`fluent-platform`](../../fluent-platform/README.md) orchestrator. This PR adds two env vars on the fluent-api side and one `environment:` override on the fluent-platform side (`FLUENT_AI_URL: http://ai:8200`). See §12. | fluent-platform already wires `db`, `api`, `worker`, `ai`, `web` together on a shared network; we plug in to that substrate rather than invent a new one. | | **D6** | A single shared `FLUENT_AI_KEY` is provisioned for the fluent-api → fluent-ai hop. If another consumer of fluent-ai appears later, it gets its own key. | Per-user keys give zero security benefit at this layer (everyone going through fluent-api is already authenticated to fluent-api). Single key minimizes IT complexity. | | **D7** | Error translation specifics deferred to implementation. If conformity between the two error systems is awkward, prefer harmonizing fluent-ai toward fluent-api's patterns rather than the other way. | At the spec level there are no hard constraints; the safe defaults (5xx from fluent-ai → 502 on fluent-api with `ErrorCode.AI_SERVICE_UNAVAILABLE`) are obvious. | -| **D8** | No request enrichment. fluent-api forwards the request body to fluent-ai verbatim. fluent-web sends the full `RepeatedWordsRequest` shape including `lang_code`, `lang_name`, `project_id`, `project_name`, `verses[]`. | Maximum flexibility for the caller. Avoids coupling fluent-api to fluent-ai's request schema (today and tomorrow). | -| **D9** | The full `ToolJobResponse` envelope is passed through to fluent-web unchanged. No unwrap to `result` for the synchronous case. | Forward-compatible with TanStack-based polling — the same hook code consumes the envelope today (`status: completed`) and tomorrow (`status: queued` → polled to `completed`). | -| **D10** | Auth on the new endpoint: introduce `PERMISSIONS.AI_TOOLS_USE` as an _alias_ with the same underlying value as `CONTENT_UPDATE`. | Cosmetically separates "can edit content" from "can invoke AI tools" without making a real distinction yet. Trivial to peel apart later. | +| **D8** | No request enrichment. fluent-api forwards the request body to fluent-ai verbatim. fluent-web sends the full `RepeatedWordsRequest` shape including `lang_code`, `lang_name`, `project_id`, `project_name`, `verses[]`. **Reviewer-confirmed (kaseywright, 2026-06-02):** forwarding verbatim is approved; the snake_case field naming is an _intentional, contained_ exception to fluent-api's camelCase convention, scoped to the AI-tools domain. See §8.1 — please leave this divergence as-is rather than "normalizing" it to camelCase, since it intentionally mirrors the fluent-ai contract. | Maximum flexibility for the caller. Avoids coupling fluent-api to fluent-ai's request schema (today and tomorrow). The alternatives (per-service renaming or mapper functions) add cost without a corresponding benefit; keeping the divergence contained in the AI-tools domain is the lesser evil. | +| **D9** | The full `ToolJobResponse` envelope is passed through to fluent-web unchanged. No unwrap to `result` for the synchronous case. **Reviewer-confirmed (kaseywright, 2026-06-02):** approved on the condition that the response delivered to the web client conforms to the standard response format already in place on fluent-api (see §8.2). | Forward-compatible with TanStack-based polling — the same hook code consumes the envelope today (`status: completed`) and tomorrow (`status: queued` → polled to `completed`). | +| **D10** | Auth on the new endpoint: introduce `PERMISSIONS.AI_TOOLS_USE` as an _alias_ with the same underlying value as `CONTENT_UPDATE`. **Reviewer-confirmed (kaseywright, 2026-06-02):** alias approach approved; the trade-off (flexibility vs. user-results) was noted as acceptable provided the decision is documented here for future reference. See §9.3. | Cosmetically separates "can edit content" from "can invoke AI tools" without making a real distinction yet. Trivial to peel apart later. | | **D11** | A smoke test analogous to [`fluent-ai/scripts/smoke_repeated_words.py`](../../fluent-ai/scripts/smoke_repeated_words.py) is added, runnable from the host with both services up. | Lets devs verify the cross-service plumbing without running the full vitest suite. | | **D12** | This work ships as a **coordinated pair of PRs**: one against fluent-api (the bulk of the work) and one small PR against fluent-platform (compose env-var override + 1–2 README lines). Either order of merge is fine; both should be ready for review together. | The fluent-platform PR is small and contains no logic, so it can land first to unblock ecosystem-mode dev. Reviewers should be able to read both PRs side-by-side. | @@ -325,6 +325,17 @@ Notes: - `verses` is required and non-empty (`.min(1)`) so we can fail fast at the route layer rather than incur a round-trip to fluent-ai for a trivially-invalid request. - The field naming uses fluent-ai's snake_case verbatim (`lang_code`, `snt_id`). This is a deliberate departure from fluent-api's camelCase elsewhere; the alternative (rename in fluent-api, re-rename in fluent-ai) buys nothing and risks drift. The OpenAPI docs make the snake_case visible to the frontend. +> **ℹ️ Intentional convention exception — please leave as-is.** _Reviewer-confirmed by kaseywright on 2026-06-02 ([PR #173, review comment](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813)); decision **D8**._ The snake_case field names in `src/domains/ai-tools/` and `src/lib/services/fluent-ai/` are an _intentional_ divergence from fluent-api's camelCase convention, kept so the wire contract with fluent-ai stays a verbatim pass-through. The reviewer noted that the naming-case divergence is something he'd normally prefer to avoid, but that the alternatives (per-service renaming or mapper functions) are no more rewarding, so this contained exception is the accepted trade-off. When working in this area, please keep these fields in snake_case rather than "normalizing" them to camelCase — renaming them would silently break the fluent-ai contract. The exception is scoped strictly to the AI-tools domain; the rest of fluent-api remains camelCase. +> +> **Implementation note:** it helps to place a short code comment next to the snake_case Zod schemas (at least in [`ai-tools.types.ts`](../src/domains/ai-tools/ai-tools.types.ts) and [`fluent-ai.types.ts`](../src/lib/services/fluent-ai/fluent-ai.types.ts)) explaining the convention and linking back to both this decision (**D8** / §8.1) and the originating review comment. The comment is what an AI agent or contributor will actually see at the edit site, so it — alongside this proposal — is the most durable guardrail against an accidental rename. Suggested wording: +> +> ```ts +> // Intentional snake_case — verbatim wire contract with fluent-ai (decision D8). +> // Approved in review: https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813 +> // Rationale: docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md §8.1 +> // Please keep these in snake_case; renaming to camelCase would break the fluent-ai contract. +> ``` + ### 8.2 The reverse direction (fluent-ai → fluent-api → fluent-web) Per **D9** (envelope pass-through), the response body is fluent-ai's `ToolJobResponse[RepeatedWordsResult]` verbatim: @@ -395,6 +406,8 @@ export const RepeatedWordsResponseSchema = z.object({ The `RepeatedWordsResponseSchema` is what the Hono route declares as its 200 response, so OpenAPI docs show the full envelope. fluent-web's hook receives the envelope and inspects `status` and `result` directly. +> **Reviewer condition** — kaseywright, 2026-06-02 ([PR #173, review comment](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343642943)); decision **D9**. The envelope pass-through was approved on the condition that the response delivered to the web client conforms to the standard response format already in place on fluent-api. In practice this means: success responses still carry the `ToolJobResponse` envelope verbatim, but error responses use fluent-api's existing `{ error, code, details }` shape via `getHttpStatus` (see §10.3), so fluent-web consumes successes and failures through the same conventions it already uses elsewhere. The implementer should verify this alignment when wiring up the route. + ### 8.3 Status codes from fluent-api | Outcome | HTTP | Body | @@ -476,6 +489,8 @@ The alias has the same string value, which means `requirePermission(PERMISSIONS. If reviewers prefer a real new permission row from day one, that's a defensible alternative; it costs a migration and seeding work and gives no immediate user-visible benefit. The alias approach was chosen because it's reversible from either direction. +> **Reviewer-confirmed** — kaseywright, 2026-06-02 ([PR #173, review comment](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343633722)); decision **D10**. The alias approach is approved. The reviewer noted this as a flexibility-vs-results trade-off that is acceptable provided the decision is documented for future reference — which is the purpose of this note. **Implementation note:** add a short comment beside the alias in [`permissions.ts`](../src/lib/permissions.ts) linking back to this decision and the review comment, so the intent is visible at the edit site. For whoever revisits it later: promoting `AI_TOOLS_USE` to a real, distinct permission means adding a row to the `permissions` table, mapping it to the appropriate roles in seed data, and changing only the string value here in [`permissions.ts`](../src/lib/permissions.ts) — no call sites that already import `PERMISSIONS.AI_TOOLS_USE` need to change. + ### 9.4 The `X-API-Key` for fluent-ai Per **D6**, fluent-api carries a single `FLUENT_AI_KEY` for _all_ fluent-ai calls. The key is read once at module scope in `callFluentAi`. Rotation is "set new env, restart fluent-api"; fluent-ai supports multiple active keys per its existing `ai_api_keys` table, so old key + new key can coexist briefly during a rolling restart. @@ -900,10 +915,14 @@ Two small, optional improvements identified while writing this spec: These are the items the spec discussion landed on but where reviewer pushback would meaningfully change the outcome. Each one has a recommended position (the doc reflects this); each one can be flipped without restructuring the rest of the proposal. +> **Status: resolved.** All four questions below were addressed in kaseywright's review of [PR #173](https://github.com/eten-tech-foundation/fluent-api/pull/173) on 2026-06-02 (review **APPROVED**). The reviewer confirmed each recommended position; two of them (§15.2, §15.4) came with a request to document the decision, now captured in §9.3 and §8.1 respectively. Per-item resolutions are noted inline below. + ### 15.1 URL layout: is `POST /ai/tools/greek-room/repeated-words` the right shape? **Recommended:** Yes — see **D2** and §5. +> **Resolved (kaseywright, 2026-06-02):** confirmed — "this URL layout works well." [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343625894) + **Alternatives:** - `POST /checks/repeated-words` — closer to the verbiage we use elsewhere ("checks" rather than "tools"). Downside: hides the network-bound, possibly-async nature of these endpoints. @@ -916,6 +935,8 @@ These are the items the spec discussion landed on but where reviewer pushback wo **Recommended:** Yes, alias — see **D10** and §9.3. +> **Resolved (kaseywright, 2026-06-02):** alias approach confirmed, with the request to document the decision for future reference (done in §9.3). [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343633722) + **Alternatives:** - Introduce a real new permission row in the `permissions` table with its own role mappings. Requires a migration and seed update. Gives nothing user-visible today but is the "cleaner" RBAC story. @@ -927,6 +948,8 @@ These are the items the spec discussion landed on but where reviewer pushback wo **Recommended:** Pass through the full `ToolJobResponse` — see **D9** and §8.2. +> **Resolved (kaseywright, 2026-06-02):** pass-through confirmed, conditioned on the web-client response following the standard format already in place (see §8.2). [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343642943) + **Alternatives:** - For the synchronous case only, return just the `result` field (i.e. `{findings, summary}`) and 200, reserving the envelope for when fluent-ai goes async. Simpler today; mildly more breaking when polling lands. @@ -938,6 +961,8 @@ These are the items the spec discussion landed on but where reviewer pushback wo **Recommended:** No enrichment — see **D8** and §8.1. +> **Resolved (kaseywright, 2026-06-02):** forwarding verbatim confirmed; the snake_case naming divergence accepted as a contained, intentional AI-tools-domain exception, with a request to document it (done in §8.1). [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813) + **Alternatives:** - fluent-api looks up `chapter_assignment_id` (or `project_id`) and adds verses + language metadata server-side. Caller sends a thin reference, fluent-api fattens it before forwarding. Trades client flexibility for harder-to-spoof inputs. diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-summary.md b/docs/proposals/repeated-word-check/ai-tools-integration-summary.md index 56eae9e..f922e1f 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-summary.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-summary.md @@ -28,3 +28,12 @@ Polling endpoint on either side, DB persistence of tool runs, frontend hooks and 2. **Permission alias** — `PERMISSIONS.AI_TOOLS_USE` as a string-value alias of `CONTENT_UPDATE`, vs. a real new permission row with migration + seeding? 3. **Envelope pass-through** — return the full `ToolJobResponse` today, vs. unwrap `result` for the sync case and reshape later when polling lands? 4. **No request enrichment** — forward verbatim, vs. server-side lookup of `chapter_assignment_id` → verses, vs. a validation-only hybrid? + +## Reviewer outcome + +Reviewed and **approved** by kaseywright on 2026-06-02 ([PR #173](https://github.com/eten-tech-foundation/fluent-api/pull/173)). All four questions above were confirmed as proposed; the supporting detail and the two "please document" follow-ups now live in the long-form proposal: + +1. **URL layout** — confirmed ("this URL layout works well"). [Comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343625894) +2. **Permission alias** — confirmed; decision documented for future reference in [`ai-tools-integration-suggestion.md`](ai-tools-integration-suggestion.md) §9.3 / **D10**. [Comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343633722) +3. **Envelope pass-through** — confirmed, conditioned on the web-client response following fluent-api's standard response format; see §8.2 / **D9**. [Comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343642943) +4. **No request enrichment** — forwarding verbatim confirmed; the intentional snake_case naming divergence is documented (with an in-code-comment guardrail) in §8.1 / **D8**. [Comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813) From b055f842fdb9e3d047ae6ede5a9a15619014fdca Mon Sep 17 00:00:00 2001 From: Joshua Lansford Date: Thu, 4 Jun 2026 08:45:06 -0400 Subject: [PATCH 7/9] feat(ai-tools): add greek-room repeated-words endpoint + fluent-ai client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the AI-tools integration approved in PR #173 (per docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md §2). - New domain src/domains/ai-tools/ exposing POST /ai/tools/greek-room/repeated-words, registered on the OpenAPIHono app via app.ts. - Shared client callFluentAi(toolPath, body, resultSchema, options?) at src/lib/services/fluent-ai/ (30s default timeout, AbortSignal support, no polling/caching/retries per §7.3). - Two env vars FLUENT_AI_URL and FLUENT_AI_KEY (required, no defaults) wired through env.ts + .env.example + .env.test. - Permission alias PERMISSIONS.AI_TOOLS_USE = 'content:update' (D10) with review-link comment. - Two ErrorCodes AI_SERVICE_UNAVAILABLE and AI_TOOL_EXECUTION_FAILED, both -> HTTP 502. - Envelope pass-through (D9): 200 for terminal completed/failed/cancelled, 202 for queued/running; errors via getHttpStatus standard shape. - snake_case wire contract kept verbatim (D8) with guardrail comments next to the Zod schemas. - Tests: callFluentAi unit tests (§13.1), route tests (§13.2), and host-runnable scripts/smoke-repeated-words.ts via npm run smoke:repeated-words. Schemas verified against the live fluent-ai contract; proposal §8.2/§10.2 updated to match. Out of scope: polling endpoint, DB persistence, frontend hooks, retries, caching. Refs: PR #173 (kaseywright, 2026-06-02) --- .env.example | 10 + .env.test | 2 + .../ai-tools-integration-suggestion.md | 54 ++-- package-lock.json | 130 +++++++- package.json | 1 + scripts/smoke-repeated-words.ts | 281 ++++++++++++++++++ src/app.ts | 1 + src/domains/ai-tools/ai-tools.route.test.ts | 192 ++++++++++++ src/domains/ai-tools/ai-tools.route.ts | 88 ++++++ src/domains/ai-tools/ai-tools.service.ts | 24 ++ src/domains/ai-tools/ai-tools.types.ts | 89 ++++++ src/env.ts | 7 + src/lib/permissions.ts | 10 + .../fluent-ai/fluent-ai.client.test.ts | 250 ++++++++++++++++ .../services/fluent-ai/fluent-ai.client.ts | 182 ++++++++++++ src/lib/services/fluent-ai/fluent-ai.types.ts | 45 +++ src/lib/types.ts | 8 + 17 files changed, 1348 insertions(+), 26 deletions(-) create mode 100644 scripts/smoke-repeated-words.ts create mode 100644 src/domains/ai-tools/ai-tools.route.test.ts create mode 100644 src/domains/ai-tools/ai-tools.route.ts create mode 100644 src/domains/ai-tools/ai-tools.service.ts create mode 100644 src/domains/ai-tools/ai-tools.types.ts create mode 100644 src/lib/services/fluent-ai/fluent-ai.client.test.ts create mode 100644 src/lib/services/fluent-ai/fluent-ai.client.ts create mode 100644 src/lib/services/fluent-ai/fluent-ai.types.ts diff --git a/.env.example b/.env.example index 357ce16..9523609 100644 --- a/.env.example +++ b/.env.example @@ -14,3 +14,13 @@ SEED_MANAGER_EMAIL=pm@fluent.local SEED_MANAGER_PASSWORD=pm@123456 SEED_TRANSLATOR_EMAIL=t@fluent.local SEED_TRANSLATOR_PASSWORD=t@123456 + +# Fluent-AI integration +# Base URL of the fluent-ai service (no trailing slash, no /api/v1 suffix). +# - Ecosystem mode (via fluent-platform): http://ai:8200 +# - Standalone fluent-api against standalone fluent-ai: http://localhost:8200 +FLUENT_AI_URL=http://localhost:8200 + +# Shared API key for calling fluent-ai. Matches a row in fluent-ai's ai_api_keys table. +# Dev value seeded by fluent-ai: fai_dev_admin +FLUENT_AI_KEY=fai_dev_admin diff --git a/.env.test b/.env.test index a575db7..a5c6c0a 100644 --- a/.env.test +++ b/.env.test @@ -11,3 +11,5 @@ EMAIL_SERVICE_API_KEY=test-only-dummy-api-key EMAIL_SERVICE_DOMAIN=test.example.com EMAIL_SERVICE_SENDER=no-reply@test.example.com FRONTEND_URL=http://localhost:5173 +FLUENT_AI_URL=http://localhost:8200 +FLUENT_AI_KEY=test-only-dummy-fluent-ai-key diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md index 6506a03..6e8059d 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md @@ -62,20 +62,20 @@ Relative paths in this document (e.g. `../../fluent-platform/...`) assume the st These are the decisions captured during the spec discussion. Each is restated here so reviewers can discuss the conclusion without reading the supporting analysis. -| # | Decision | Short rationale | -| ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| **D1** | PR scope is "minimum proxy" — no DB persistence, no job queue exercised in this PR. | Repeated-words is fast (<1s) and re-runnable; persistence is not motivated by this tool. Defer until a slow tool justifies a `ai_tool_runs` table. | -| **D2** | URL is `POST /ai/tools/greek-room/repeated-words`. | Introduces `/ai/` as fluent-api's first top-level service-family namespace. Telegraphs "network-bound, potentially slow, possibly async" — characteristics that local CRUD endpoints don't share. Per-tool URL preserves OpenAPI type-safety. Alternatives: `/checks/repeated-words` (more in convention but hides the proxy nature), nested under `/chapter-assignments/{id}/` (requires server-side enrichment which we reject in D8). | -| **D3** | Polling lives in the _browser_ via TanStack Query's `refetchInterval`, not in fluent-api. fluent-api is a thin pass-through for both kickoff and (future) polling. | Decouples slow tools from fluent-api's request budget. Aligns with fluent-web's existing TanStack Query usage. The polling code path is not exercised today because fluent-ai always returns `status: "completed"` synchronously. | -| **D4** | File layout: shared utility at [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts); per-tool routes/services in [`fluent-api/src/domains/ai-tools/`](../src/domains/ai-tools/). One route file for all tools; per-tool Zod schemas keep OpenAPI documentation fully typed. | Mirrors the existing [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../src/lib/services/notifications/mailgun.service.ts) pattern for "free functions wrapping a third-party API" and the existing [`fluent-api/src/lib/db-retry.ts`](../src/lib/db-retry.ts) pattern for "higher-order utility used by many call sites." Avoids a single one-size-fits-all dispatcher that would degrade OpenAPI schemas to `dict[str, Any]`. | -| **D5** | Service discovery / docker networking is handled by the existing [`fluent-platform`](../../fluent-platform/README.md) orchestrator. This PR adds two env vars on the fluent-api side and one `environment:` override on the fluent-platform side (`FLUENT_AI_URL: http://ai:8200`). See §12. | fluent-platform already wires `db`, `api`, `worker`, `ai`, `web` together on a shared network; we plug in to that substrate rather than invent a new one. | -| **D6** | A single shared `FLUENT_AI_KEY` is provisioned for the fluent-api → fluent-ai hop. If another consumer of fluent-ai appears later, it gets its own key. | Per-user keys give zero security benefit at this layer (everyone going through fluent-api is already authenticated to fluent-api). Single key minimizes IT complexity. | -| **D7** | Error translation specifics deferred to implementation. If conformity between the two error systems is awkward, prefer harmonizing fluent-ai toward fluent-api's patterns rather than the other way. | At the spec level there are no hard constraints; the safe defaults (5xx from fluent-ai → 502 on fluent-api with `ErrorCode.AI_SERVICE_UNAVAILABLE`) are obvious. | -| **D8** | No request enrichment. fluent-api forwards the request body to fluent-ai verbatim. fluent-web sends the full `RepeatedWordsRequest` shape including `lang_code`, `lang_name`, `project_id`, `project_name`, `verses[]`. **Reviewer-confirmed (kaseywright, 2026-06-02):** forwarding verbatim is approved; the snake_case field naming is an _intentional, contained_ exception to fluent-api's camelCase convention, scoped to the AI-tools domain. See §8.1 — please leave this divergence as-is rather than "normalizing" it to camelCase, since it intentionally mirrors the fluent-ai contract. | Maximum flexibility for the caller. Avoids coupling fluent-api to fluent-ai's request schema (today and tomorrow). The alternatives (per-service renaming or mapper functions) add cost without a corresponding benefit; keeping the divergence contained in the AI-tools domain is the lesser evil. | -| **D9** | The full `ToolJobResponse` envelope is passed through to fluent-web unchanged. No unwrap to `result` for the synchronous case. **Reviewer-confirmed (kaseywright, 2026-06-02):** approved on the condition that the response delivered to the web client conforms to the standard response format already in place on fluent-api (see §8.2). | Forward-compatible with TanStack-based polling — the same hook code consumes the envelope today (`status: completed`) and tomorrow (`status: queued` → polled to `completed`). | -| **D10** | Auth on the new endpoint: introduce `PERMISSIONS.AI_TOOLS_USE` as an _alias_ with the same underlying value as `CONTENT_UPDATE`. **Reviewer-confirmed (kaseywright, 2026-06-02):** alias approach approved; the trade-off (flexibility vs. user-results) was noted as acceptable provided the decision is documented here for future reference. See §9.3. | Cosmetically separates "can edit content" from "can invoke AI tools" without making a real distinction yet. Trivial to peel apart later. | -| **D11** | A smoke test analogous to [`fluent-ai/scripts/smoke_repeated_words.py`](../../fluent-ai/scripts/smoke_repeated_words.py) is added, runnable from the host with both services up. | Lets devs verify the cross-service plumbing without running the full vitest suite. | -| **D12** | This work ships as a **coordinated pair of PRs**: one against fluent-api (the bulk of the work) and one small PR against fluent-platform (compose env-var override + 1–2 README lines). Either order of merge is fine; both should be ready for review together. | The fluent-platform PR is small and contains no logic, so it can land first to unblock ecosystem-mode dev. Reviewers should be able to read both PRs side-by-side. | +| # | Decision | Short rationale | +| ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **D1** | PR scope is "minimum proxy" — no DB persistence, no job queue exercised in this PR. | Repeated-words is fast (<1s) and re-runnable; persistence is not motivated by this tool. Defer until a slow tool justifies a `ai_tool_runs` table. | +| **D2** | URL is `POST /ai/tools/greek-room/repeated-words`. | Introduces `/ai/` as fluent-api's first top-level service-family namespace. Telegraphs "network-bound, potentially slow, possibly async" — characteristics that local CRUD endpoints don't share. Per-tool URL preserves OpenAPI type-safety. Alternatives: `/checks/repeated-words` (more in convention but hides the proxy nature), nested under `/chapter-assignments/{id}/` (requires server-side enrichment which we reject in D8). | +| **D3** | Polling lives in the _browser_ via TanStack Query's `refetchInterval`, not in fluent-api. fluent-api is a thin pass-through for both kickoff and (future) polling. | Decouples slow tools from fluent-api's request budget. Aligns with fluent-web's existing TanStack Query usage. The polling code path is not exercised today because fluent-ai always returns `status: "completed"` synchronously. | +| **D4** | File layout: shared utility at [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts); per-tool routes/services in [`fluent-api/src/domains/ai-tools/`](../src/domains/ai-tools/). One route file for all tools; per-tool Zod schemas keep OpenAPI documentation fully typed. | Mirrors the existing [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../src/lib/services/notifications/mailgun.service.ts) pattern for "free functions wrapping a third-party API" and the existing [`fluent-api/src/lib/db-retry.ts`](../src/lib/db-retry.ts) pattern for "higher-order utility used by many call sites." Avoids a single one-size-fits-all dispatcher that would degrade OpenAPI schemas to `dict[str, Any]`. | +| **D5** | Service discovery / docker networking is handled by the existing [`fluent-platform`](../../fluent-platform/README.md) orchestrator. This PR adds two env vars on the fluent-api side and one `environment:` override on the fluent-platform side (`FLUENT_AI_URL: http://ai:8200`). See §12. | fluent-platform already wires `db`, `api`, `worker`, `ai`, `web` together on a shared network; we plug in to that substrate rather than invent a new one. | +| **D6** | A single shared `FLUENT_AI_KEY` is provisioned for the fluent-api → fluent-ai hop. If another consumer of fluent-ai appears later, it gets its own key. | Per-user keys give zero security benefit at this layer (everyone going through fluent-api is already authenticated to fluent-api). Single key minimizes IT complexity. | +| **D7** | Error translation specifics deferred to implementation. If conformity between the two error systems is awkward, prefer harmonizing fluent-ai toward fluent-api's patterns rather than the other way. | At the spec level there are no hard constraints; the safe defaults (5xx from fluent-ai → 502 on fluent-api with `ErrorCode.AI_SERVICE_UNAVAILABLE`) are obvious. | +| **D8** | No request enrichment. fluent-api forwards the request body to fluent-ai verbatim. fluent-web sends the full `RepeatedWordsRequest` shape including `lang_code`, `lang_name`, `project_id`, `project_name`, `verses[]`. **Reviewer-confirmed (kaseywright, 2026-06-02):** forwarding verbatim is approved; the snake*case field naming is an \_intentional, contained* exception to fluent-api's camelCase convention, scoped to the AI-tools domain. See §8.1 — please leave this divergence as-is rather than "normalizing" it to camelCase, since it intentionally mirrors the fluent-ai contract. | Maximum flexibility for the caller. Avoids coupling fluent-api to fluent-ai's request schema (today and tomorrow). The alternatives (per-service renaming or mapper functions) add cost without a corresponding benefit; keeping the divergence contained in the AI-tools domain is the lesser evil. | +| **D9** | The full `ToolJobResponse` envelope is passed through to fluent-web unchanged. No unwrap to `result` for the synchronous case. **Reviewer-confirmed (kaseywright, 2026-06-02):** approved on the condition that the response delivered to the web client conforms to the standard response format already in place on fluent-api (see §8.2). | Forward-compatible with TanStack-based polling — the same hook code consumes the envelope today (`status: completed`) and tomorrow (`status: queued` → polled to `completed`). | +| **D10** | Auth on the new endpoint: introduce `PERMISSIONS.AI_TOOLS_USE` as an _alias_ with the same underlying value as `CONTENT_UPDATE`. **Reviewer-confirmed (kaseywright, 2026-06-02):** alias approach approved; the trade-off (flexibility vs. user-results) was noted as acceptable provided the decision is documented here for future reference. See §9.3. | Cosmetically separates "can edit content" from "can invoke AI tools" without making a real distinction yet. Trivial to peel apart later. | +| **D11** | A smoke test analogous to [`fluent-ai/scripts/smoke_repeated_words.py`](../../fluent-ai/scripts/smoke_repeated_words.py) is added, runnable from the host with both services up. | Lets devs verify the cross-service plumbing without running the full vitest suite. | +| **D12** | This work ships as a **coordinated pair of PRs**: one against fluent-api (the bulk of the work) and one small PR against fluent-platform (compose env-var override + 1–2 README lines). Either order of merge is fine; both should be ready for review together. | The fluent-platform PR is small and contains no logic, so it can land first to unblock ecosystem-mode dev. Reviewers should be able to read both PRs side-by-side. | --- @@ -325,7 +325,7 @@ Notes: - `verses` is required and non-empty (`.min(1)`) so we can fail fast at the route layer rather than incur a round-trip to fluent-ai for a trivially-invalid request. - The field naming uses fluent-ai's snake_case verbatim (`lang_code`, `snt_id`). This is a deliberate departure from fluent-api's camelCase elsewhere; the alternative (rename in fluent-api, re-rename in fluent-ai) buys nothing and risks drift. The OpenAPI docs make the snake_case visible to the frontend. -> **ℹ️ Intentional convention exception — please leave as-is.** _Reviewer-confirmed by kaseywright on 2026-06-02 ([PR #173, review comment](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813)); decision **D8**._ The snake_case field names in `src/domains/ai-tools/` and `src/lib/services/fluent-ai/` are an _intentional_ divergence from fluent-api's camelCase convention, kept so the wire contract with fluent-ai stays a verbatim pass-through. The reviewer noted that the naming-case divergence is something he'd normally prefer to avoid, but that the alternatives (per-service renaming or mapper functions) are no more rewarding, so this contained exception is the accepted trade-off. When working in this area, please keep these fields in snake_case rather than "normalizing" them to camelCase — renaming them would silently break the fluent-ai contract. The exception is scoped strictly to the AI-tools domain; the rest of fluent-api remains camelCase. +> **ℹ️ Intentional convention exception — please leave as-is.** _Reviewer-confirmed by kaseywright on 2026-06-02 ([PR #173, review comment](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813)); decision **D8**._ The snake*case field names in `src/domains/ai-tools/` and `src/lib/services/fluent-ai/` are an \_intentional* divergence from fluent-api's camelCase convention, kept so the wire contract with fluent-ai stays a verbatim pass-through. The reviewer noted that the naming-case divergence is something he'd normally prefer to avoid, but that the alternatives (per-service renaming or mapper functions) are no more rewarding, so this contained exception is the accepted trade-off. When working in this area, please keep these fields in snake_case rather than "normalizing" them to camelCase — renaming them would silently break the fluent-ai contract. The exception is scoped strictly to the AI-tools domain; the rest of fluent-api remains camelCase. > > **Implementation note:** it helps to place a short code comment next to the snake_case Zod schemas (at least in [`ai-tools.types.ts`](../src/domains/ai-tools/ai-tools.types.ts) and [`fluent-ai.types.ts`](../src/lib/services/fluent-ai/fluent-ai.types.ts)) explaining the convention and linking back to both this decision (**D8** / §8.1) and the originating review comment. The comment is what an AI agent or contributor will actually see at the edit site, so it — alongside this proposal — is the most durable guardrail against an accidental rename. Suggested wording: > @@ -338,22 +338,23 @@ Notes: ### 8.2 The reverse direction (fluent-ai → fluent-api → fluent-web) -Per **D9** (envelope pass-through), the response body is fluent-ai's `ToolJobResponse[RepeatedWordsResult]` verbatim: +Per **D9** (envelope pass-through), the response body is fluent-ai's `ToolJobResponse[RepeatedWordsResult]` verbatim. The schemas below mirror the live fluent-ai contract — [`fluent-ai/src/app/schemas/greek_room.py`](../../../../fluent-ai/src/app/schemas/greek_room.py) and [`fluent-ai/src/app/schemas/tool_job.py`](../../../../fluent-ai/src/app/schemas/tool_job.py) — and are verified against [`fluent-ai/tests/api/v1/test_greek_room.py`](../../../../fluent-ai/tests/api/v1/test_greek_room.py). ```ts // fluent-api/src/lib/services/fluent-ai/fluent-ai.types.ts export type JobStatus = 'queued' | 'running' | 'completed' | 'failed' | 'cancelled'; +// Mirrors fluent-ai's ToolError (src/app/schemas/tool_job.py): { code, message, details? }. export interface ToolJobError { - type: string; // e.g. 'TOOL_EXECUTION_ERROR' + code: string; // e.g. 'TOOL_EXECUTION_ERROR' message: string; details?: unknown; } export interface ToolJobResponse { job_id: string; // UUID - tool: string; // e.g. 'greek-room/repeated-words' + tool: string; // e.g. 'greek_room.repeated_words' status: JobStatus; result: TResult | null; error: ToolJobError | null; @@ -371,16 +372,21 @@ export const RepeatedWordsFindingSchema = z.object({ surf: z.string(), start_position: z.number().int().nonnegative(), legitimate: z.boolean(), - severity: z.enum(['info', 'warning', 'error']), + // Upstream Greek-Room numeric severity (e.g. 0.1 legitimate, 0.5 suspicious). + severity: z.number(), }); export const RepeatedWordsSummarySchema = z.object({ total_findings: z.number().int().nonnegative(), - verses_with_findings: z.number().int().nonnegative(), - verses_total: z.number().int().nonnegative(), + legitimate_count: z.number().int().nonnegative(), + verse_count: z.number().int().nonnegative(), }); export const RepeatedWordsResultSchema = z.object({ + // Upstream library identity fields (distinct from the envelope's `tool`). + lang_code: z.string(), + provider: z.string(), + check: z.string(), findings: z.array(RepeatedWordsFindingSchema), summary: RepeatedWordsSummarySchema, }); @@ -389,12 +395,12 @@ export type RepeatedWordsResult = z.infer; export const RepeatedWordsResponseSchema = z.object({ job_id: z.string().uuid(), - tool: z.literal('greek-room/repeated-words'), + tool: z.literal('greek_room.repeated_words'), status: z.enum(['queued', 'running', 'completed', 'failed', 'cancelled']), result: RepeatedWordsResultSchema.nullable(), error: z .object({ - type: z.string(), + code: z.string(), message: z.string(), details: z.unknown().optional(), }) @@ -534,7 +540,7 @@ export const ErrorHttpStatus: Record = { | fluent-ai returns 5xx | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: '', details: { status, body } })` | | fluent-ai returns 4xx | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: '', details: { status, body } })` — yes, also 502 on our side; 4xx from fluent-ai represents a misconfiguration or a contract drift, neither of which is the _caller's_ fault, so we shield them with 502 rather than relay a 4xx that they cannot act on | | Response body fails JSON parse or envelope schema validation | `Result.err({ code: AI_SERVICE_UNAVAILABLE, message: 'malformed response from fluent-ai', details: { cause } })` | -| Envelope `status === "failed"` (fluent-ai reachable; tool refused) | `Result.err({ code: AI_TOOL_EXECUTION_FAILED, message: envelope.error?.message ?? 'tool execution failed', details: { type: envelope.error?.type, ... } })` | +| Envelope `status === "failed"` (fluent-ai reachable; tool refused) | `Result.err({ code: AI_TOOL_EXECUTION_FAILED, message: envelope.error?.message ?? 'tool execution failed', details: { upstreamCode: envelope.error?.code, ... } })` | | Envelope `status === "cancelled"` | Same as `failed` — propagate `AI_TOOL_EXECUTION_FAILED` | | Envelope `status === "completed"` | `Result.ok(envelope)` | | Envelope `status === "queued"` or `"running"` | `Result.ok(envelope)` — the route layer decides whether to return 200 or 202 based on `status` | diff --git a/package-lock.json b/package-lock.json index e8bcaf5..db5779c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -902,6 +902,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -918,6 +919,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -934,6 +936,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -950,6 +953,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -966,6 +970,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -982,6 +987,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -998,6 +1004,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1014,6 +1021,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1030,6 +1038,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1046,6 +1055,7 @@ "cpu": [ "ia32" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1062,6 +1072,7 @@ "cpu": [ "loong64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1078,6 +1089,7 @@ "cpu": [ "mips64el" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1094,6 +1106,7 @@ "cpu": [ "ppc64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1110,6 +1123,7 @@ "cpu": [ "riscv64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1126,6 +1140,7 @@ "cpu": [ "s390x" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1142,6 +1157,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1158,6 +1174,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1174,6 +1191,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1190,6 +1208,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1206,6 +1225,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1222,6 +1242,7 @@ "cpu": [ "ia32" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1238,6 +1259,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1304,6 +1326,7 @@ "cpu": [ "ppc64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1320,6 +1343,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1336,6 +1360,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1352,6 +1377,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1368,6 +1394,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1384,6 +1411,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1400,6 +1428,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1416,6 +1445,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1432,6 +1462,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1448,6 +1479,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1464,6 +1496,7 @@ "cpu": [ "ia32" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1480,6 +1513,7 @@ "cpu": [ "loong64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1496,6 +1530,7 @@ "cpu": [ "mips64el" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1512,6 +1547,7 @@ "cpu": [ "ppc64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1528,6 +1564,7 @@ "cpu": [ "riscv64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1544,6 +1581,7 @@ "cpu": [ "s390x" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1560,6 +1598,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1576,6 +1615,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1592,6 +1632,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1608,6 +1649,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1624,6 +1666,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1640,6 +1683,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1656,6 +1700,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1672,6 +1717,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1688,6 +1734,7 @@ "cpu": [ "ia32" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -1704,6 +1751,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4487,6 +4535,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4500,6 +4549,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4513,6 +4563,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4526,6 +4577,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4539,6 +4591,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4552,6 +4605,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4565,6 +4619,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4578,6 +4633,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4591,6 +4647,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4604,6 +4661,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4617,6 +4675,7 @@ "cpu": [ "loong64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4630,6 +4689,7 @@ "cpu": [ "loong64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4643,6 +4703,7 @@ "cpu": [ "ppc64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4656,6 +4717,7 @@ "cpu": [ "ppc64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4669,6 +4731,7 @@ "cpu": [ "riscv64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4682,6 +4745,7 @@ "cpu": [ "riscv64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4695,6 +4759,7 @@ "cpu": [ "s390x" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4708,6 +4773,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4721,6 +4787,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4734,6 +4801,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4747,6 +4815,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4760,6 +4829,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4773,6 +4843,7 @@ "cpu": [ "ia32" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4786,6 +4857,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4799,6 +4871,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -4967,7 +5040,7 @@ "version": "4.1.12", "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz", "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==", - "devOptional": true, + "dev": true, "license": "MIT", "dependencies": { "@types/ms": "*" @@ -5008,7 +5081,7 @@ "version": "2.1.0", "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", - "devOptional": true, + "dev": true, "license": "MIT" }, "node_modules/@types/mysql": { @@ -8365,6 +8438,7 @@ "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, "hasInstallScript": true, "license": "MIT", "optional": true, @@ -12308,6 +12382,7 @@ "cpu": [ "ppc64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12324,6 +12399,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12340,6 +12416,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12356,6 +12433,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12372,6 +12450,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12388,6 +12467,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12404,6 +12484,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12420,6 +12501,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12436,6 +12518,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12452,6 +12535,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12468,6 +12552,7 @@ "cpu": [ "ia32" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12484,6 +12569,7 @@ "cpu": [ "loong64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12500,6 +12586,7 @@ "cpu": [ "mips64el" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12516,6 +12603,7 @@ "cpu": [ "ppc64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12532,6 +12620,7 @@ "cpu": [ "riscv64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12548,6 +12637,7 @@ "cpu": [ "s390x" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12564,6 +12654,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12580,6 +12671,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12596,6 +12688,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12612,6 +12705,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12628,6 +12722,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12644,6 +12739,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12660,6 +12756,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12676,6 +12773,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12692,6 +12790,7 @@ "cpu": [ "ia32" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -12708,6 +12807,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13090,6 +13190,7 @@ "cpu": [ "ppc64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13106,6 +13207,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13122,6 +13224,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13138,6 +13241,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13154,6 +13258,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13170,6 +13275,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13186,6 +13292,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13202,6 +13309,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13218,6 +13326,7 @@ "cpu": [ "arm" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13234,6 +13343,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13250,6 +13360,7 @@ "cpu": [ "ia32" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13266,6 +13377,7 @@ "cpu": [ "loong64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13282,6 +13394,7 @@ "cpu": [ "mips64el" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13298,6 +13411,7 @@ "cpu": [ "ppc64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13314,6 +13428,7 @@ "cpu": [ "riscv64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13330,6 +13445,7 @@ "cpu": [ "s390x" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13346,6 +13462,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13362,6 +13479,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13378,6 +13496,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13394,6 +13513,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13410,6 +13530,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13426,6 +13547,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13442,6 +13564,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13458,6 +13581,7 @@ "cpu": [ "arm64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13474,6 +13598,7 @@ "cpu": [ "ia32" ], + "dev": true, "license": "MIT", "optional": true, "os": [ @@ -13490,6 +13615,7 @@ "cpu": [ "x64" ], + "dev": true, "license": "MIT", "optional": true, "os": [ diff --git a/package.json b/package.json index 03ad4e1..a326c2e 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "format:check": "prettier --check .", "prepare": "husky", "precheck": "npm run lint && npm run format:check && npm run typecheck && npm run test", + "smoke:repeated-words": "tsx scripts/smoke-repeated-words.ts", "db:seed:rbac": "npx tsx src/db/seeds/rbac.ts", "db:seed:roles": "npx tsx src/db/seeds/roles.ts", "db:seed:org": "npx tsx src/db/seeds/organizations.ts", diff --git a/scripts/smoke-repeated-words.ts b/scripts/smoke-repeated-words.ts new file mode 100644 index 0000000..a3c5583 --- /dev/null +++ b/scripts/smoke-repeated-words.ts @@ -0,0 +1,281 @@ +#!/usr/bin/env -S npx tsx +/** + * Manual smoke test for `POST /ai/tools/greek-room/repeated-words` on fluent-api. + * + * Mirrors fluent-ai's `scripts/smoke_repeated_words.py`, but probes the + * fluent-api proxy endpoint (which in turn calls fluent-ai). It hits the + * running fluent-api service over real HTTP with the same canned 3-verse + * corpus that exercises: + * + * - one verse with a suspicious duplicate ("In in the beginning ...") + * - one verse with a legitimate duplicate ("Truly, truly, I say unto thee.") + * - one clean verse with no duplicates + * + * It is a thin "does the deployed proxy respond correctly" probe, NOT a + * substitute for the vitest suite. It requires BOTH fluent-api and fluent-ai + * to be up (ecosystem mode, or standalone with fluent-ai started separately). + * + * Because the fluent-api endpoint is guarded by a BetterAuth session + + * AI_TOOLS_USE permission, you must supply a session credential: + * + * # Bearer token (mobile / API client): + * npm run smoke:repeated-words -- --token "" + * + * # Or a raw Cookie header (web session): + * npm run smoke:repeated-words -- --cookie "better-auth.session_token=..." + * + * # Override the base URL (default: $FLUENT_API_URL or http://localhost:9999): + * npm run smoke:repeated-words -- --url http://localhost:9999 --token "..." + * + * # Print the raw response body and skip sanity checks: + * npm run smoke:repeated-words -- --raw --token "..." + * + * Exit status: + * 0 — request succeeded and (unless --raw) all sanity checks passed + * 1 — HTTP error, unexpected response shape, or failed sanity check + * 2 — bad CLI arguments + */ + +/* eslint-disable no-console */ + +interface VerseInput { + snt_id: string; + text: string; +} + +interface SampleRequest { + lang_code: string; + lang_name: string; + project_id: string | number; + project_name: string; + verses: VerseInput[]; +} + +// Must stay in sync with fluent-ai/tests/api/v1/test_greek_room.py so that +// "what passes in pytest" and "what this script sends" agree. +const SAMPLE_REQUEST: SampleRequest = { + lang_code: 'eng', + lang_name: 'English', + project_id: 'smoke-test', + project_name: 'Smoke Test', + verses: [ + { snt_id: 'GEN 1:1', text: 'In in the beginning God created the heavens.' }, + { snt_id: 'JHN 3:3', text: 'Truly, truly, I say unto thee.' }, + { snt_id: 'PSA 23:1', text: 'The Lord is my shepherd.' }, + ], +}; + +interface CliArgs { + url: string; + token?: string; + cookie?: string; + timeoutMs: number; + raw: boolean; +} + +function parseArgs(argv: string[]): CliArgs { + const args: CliArgs = { + url: (process.env.FLUENT_API_URL ?? 'http://localhost:9999').replace(/\/$/, ''), + timeoutMs: 30_000, + raw: false, + }; + + for (let i = 0; i < argv.length; i++) { + const arg = argv[i]; + switch (arg) { + case '--url': + args.url = (argv[++i] ?? '').replace(/\/$/, ''); + break; + case '--token': + args.token = argv[++i]; + break; + case '--cookie': + args.cookie = argv[++i]; + break; + case '--timeout': + args.timeoutMs = Number(argv[++i]) * 1000; + break; + case '--raw': + args.raw = true; + break; + case '-h': + case '--help': + console.error( + 'Usage: npm run smoke:repeated-words -- [--url ] [--token ] ' + + '[--cookie
] [--timeout ] [--raw]' + ); + process.exit(2); + break; + default: + console.error(`error: unknown argument: ${arg}`); + process.exit(2); + } + } + + if (!args.token && !args.cookie && !process.env.FLUENT_API_TOKEN) { + console.error( + 'error: no session credential supplied. Pass --token or --cookie
' + + '(or set FLUENT_API_TOKEN). The endpoint requires a BetterAuth session.' + ); + process.exit(2); + } + if (!args.token && process.env.FLUENT_API_TOKEN) { + args.token = process.env.FLUENT_API_TOKEN; + } + + return args; +} + +interface Finding { + legitimate?: boolean; + [key: string]: unknown; +} + +interface SanityCheck { + passed: boolean; + label: string; +} + +function runSanityChecks(payload: unknown): SanityCheck[] { + const checks: SanityCheck[] = []; + const record = (label: string, passed: boolean) => checks.push({ label, passed }); + + if (typeof payload !== 'object' || payload === null) { + record('response is a JSON object', false); + return checks; + } + record('response is a JSON object', true); + + const envelope = payload as Record; + record('envelope.status == "completed"', envelope.status === 'completed'); + record( + 'envelope.tool == "greek_room.repeated_words"', + envelope.tool === 'greek_room.repeated_words' + ); + + const result = envelope.result; + if (typeof result !== 'object' || result === null) { + record('envelope.result is a JSON object', false); + return checks; + } + record('envelope.result is a JSON object', true); + + const resultObj = result as Record; + const findings = resultObj.findings; + if (!Array.isArray(findings)) { + record('result.findings is an array', false); + return checks; + } + record('result.findings is an array', true); + record('result.findings has exactly 2 entries', findings.length === 2); + + const typed = findings as Finding[]; + const legitimate = typed.filter((f) => f.legitimate === true); + const suspicious = typed.filter((f) => f.legitimate === false); + record('exactly one legitimate finding', legitimate.length === 1); + record('exactly one suspicious finding', suspicious.length === 1); + + const summary = resultObj.summary; + if (typeof summary === 'object' && summary !== null) { + const summaryObj = summary as Record; + record('summary.verse_count == 3', summaryObj.verse_count === 3); + record( + 'summary.total_findings == result.findings.length', + summaryObj.total_findings === findings.length + ); + } else { + record('summary is a JSON object', false); + } + + return checks; +} + +async function main(): Promise { + const args = parseArgs(process.argv.slice(2)); + const endpoint = `${args.url}/ai/tools/greek-room/repeated-words`; + + const headers: Record = { + 'Content-Type': 'application/json', + Accept: 'application/json', + }; + if (args.token) headers.Authorization = `Bearer ${args.token}`; + if (args.cookie) headers.Cookie = args.cookie; + + console.error(`POST ${endpoint}`); + console.error(args.token ? 'Authorization: Bearer ...(redacted)' : 'Cookie: ...(redacted)'); + console.error(''); + + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), args.timeoutMs); + + let status: number; + let rawBody: string; + try { + const response = await fetch(endpoint, { + method: 'POST', + headers, + body: JSON.stringify(SAMPLE_REQUEST), + signal: controller.signal, + }); + status = response.status; + rawBody = await response.text(); + } catch (error) { + const isAbort = error instanceof Error && error.name === 'AbortError'; + console.error( + isAbort + ? `error: request to ${endpoint} timed out` + : `error: could not reach ${endpoint}: ${error instanceof Error ? error.message : String(error)}` + ); + return 1; + } finally { + clearTimeout(timer); + } + + console.error(`HTTP ${status}`); + console.error(''); + + if (args.raw) { + process.stdout.write(rawBody.endsWith('\n') ? rawBody : `${rawBody}\n`); + return status === 200 ? 0 : 1; + } + + let payload: unknown; + try { + payload = JSON.parse(rawBody); + } catch { + console.error('error: response was not valid JSON; raw body follows:'); + console.error(rawBody); + return 1; + } + + console.log(JSON.stringify(payload, null, 2)); + + if (status !== 200) { + console.error(`\nerror: expected HTTP 200, got ${status}`); + return 1; + } + + console.error(''); + console.error('--- response shape sanity checks ---'); + const results = runSanityChecks(payload); + let failed = false; + for (const { passed, label } of results) { + console.error(` ${passed ? 'ok ' : 'FAIL'} ${label}`); + if (!passed) failed = true; + } + + console.error(''); + if (failed) { + console.error('one or more sanity checks failed'); + return 1; + } + console.error('smoke test passed'); + return 0; +} + +main() + .then((code) => process.exit(code)) + .catch((error: unknown) => { + console.error('unexpected error:', error); + process.exit(1); + }); diff --git a/src/app.ts b/src/app.ts index aecadd8..c88eff9 100644 --- a/src/app.ts +++ b/src/app.ts @@ -20,6 +20,7 @@ import '@/domains/chapter-assignments/editor-state/user-chapter-assignment-edito import '@/domains/projects/users/project-users.route'; import '@/domains/users/projects/user-projects.route'; import '@/domains/chapter-assignments/presence/chapter-assignments-presence.route'; +import '@/domains/ai-tools/ai-tools.route'; configureOpenAPI(server); export default server; diff --git a/src/domains/ai-tools/ai-tools.route.test.ts b/src/domains/ai-tools/ai-tools.route.test.ts new file mode 100644 index 0000000..4c80ce2 --- /dev/null +++ b/src/domains/ai-tools/ai-tools.route.test.ts @@ -0,0 +1,192 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import { getUserByEmail } from '@/domains/users/users.service'; +import { auth } from '@/lib/auth'; +import { roleHasPermission } from '@/lib/services/permissions/permissions.service'; +import { ErrorCode } from '@/lib/types'; +import { server } from '@/server/server'; + +import { callRepeatedWords } from './ai-tools.service'; + +import '@/domains/ai-tools/ai-tools.route'; + +// ─── Module mocks ───────────────────────────────────────────────────────────── + +vi.mock('@/lib/auth', () => ({ + auth: { + api: { getSession: vi.fn() }, + handler: vi.fn(), + }, +})); + +vi.mock('@/db', () => ({ + db: { select: vi.fn(), insert: vi.fn(), update: vi.fn() }, +})); + +vi.mock('@/lib/logger', () => ({ + logger: { info: vi.fn(), error: vi.fn(), debug: vi.fn(), warn: vi.fn() }, +})); + +vi.mock('@/domains/users/users.service', () => ({ + getUserByEmail: vi.fn(), +})); + +vi.mock('@/lib/services/permissions/permissions.service', () => ({ + roleHasPermission: vi.fn(), +})); + +vi.mock('./ai-tools.service', () => ({ + callRepeatedWords: vi.fn(), +})); + +// ─── Fixtures ───────────────────────────────────────────────────────────────── + +const VALID_BODY = { + lang_code: 'eng', + lang_name: 'English', + project_id: 1, + project_name: 'Test Project', + verses: [{ snt_id: 'GEN 1:1', text: 'In in the beginning' }], +}; + +const APP_USER = { + id: 1, + email: 'translator@example.com', + role: 5, + roleName: 'translator', + organization: 1, + status: 'verified' as const, +}; + +function buildEnvelope(overrides: Record = {}) { + return { + job_id: '11111111-1111-1111-1111-111111111111', + tool: 'greek_room.repeated_words', + status: 'completed', + result: { + lang_code: 'eng', + provider: 'GreekRoom', + check: 'RepeatedWords', + findings: [], + summary: { total_findings: 0, legitimate_count: 0, verse_count: 1 }, + }, + error: null, + created_at: '2026-06-02T00:00:00.000Z', + completed_at: '2026-06-02T00:00:01.000Z', + ...overrides, + }; +} + +/** Authenticate as APP_USER with the given permission grant. */ +function asAuthenticatedUser(granted: boolean) { + (auth.api.getSession as any).mockResolvedValue({ + session: { id: 's1', updatedAt: new Date(), expiresAt: new Date(Date.now() + 1e9) }, + user: { email: APP_USER.email }, + }); + (getUserByEmail as any).mockResolvedValue({ ok: true, data: APP_USER }); + (roleHasPermission as any).mockResolvedValue(granted); +} + +function postRepeatedWords(body: unknown, headers: Record = {}) { + return server.request('/ai/tools/greek-room/repeated-words', { + method: 'POST', + headers: { 'Content-Type': 'application/json', ...headers }, + body: JSON.stringify(body), + }); +} + +describe('pOST /ai/tools/greek-room/repeated-words', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('returns 401 when the caller is not authenticated', async () => { + (auth.api.getSession as any).mockResolvedValue(null); + + const res = await postRepeatedWords(VALID_BODY); + + expect(res.status).toBe(401); + expect(callRepeatedWords).not.toHaveBeenCalled(); + }); + + it('returns 403 when the caller lacks AI_TOOLS_USE', async () => { + asAuthenticatedUser(false); + + const res = await postRepeatedWords(VALID_BODY); + + expect(res.status).toBe(403); + expect(callRepeatedWords).not.toHaveBeenCalled(); + }); + + it('returns 400 when the body is invalid (empty verses)', async () => { + asAuthenticatedUser(true); + + const res = await postRepeatedWords({ ...VALID_BODY, verses: [] }); + + expect([400, 422]).toContain(res.status); + expect(callRepeatedWords).not.toHaveBeenCalled(); + }); + + it('returns 200 and passes the envelope through verbatim for a completed result', async () => { + asAuthenticatedUser(true); + const envelope = buildEnvelope(); + (callRepeatedWords as any).mockResolvedValue({ ok: true, data: envelope }); + + const res = await postRepeatedWords(VALID_BODY); + + expect(res.status).toBe(200); + const json = await res.json(); + expect(json).toEqual(envelope); + }); + + it('returns 202 for a queued envelope', async () => { + asAuthenticatedUser(true); + const envelope = buildEnvelope({ status: 'queued', result: null, completed_at: null }); + (callRepeatedWords as any).mockResolvedValue({ ok: true, data: envelope }); + + const res = await postRepeatedWords(VALID_BODY); + + expect(res.status).toBe(202); + const json = await res.json(); + expect(json.status).toBe('queued'); + }); + + it('returns 502 with the standard error shape when the tool execution failed', async () => { + asAuthenticatedUser(true); + (callRepeatedWords as any).mockResolvedValue({ + ok: false, + error: { code: ErrorCode.AI_TOOL_EXECUTION_FAILED, message: 'tool execution failed' }, + }); + + const res = await postRepeatedWords(VALID_BODY); + + expect(res.status).toBe(502); + const json = await res.json(); + expect(json.code).toBe(ErrorCode.AI_TOOL_EXECUTION_FAILED); + expect(json.error).toBe('tool execution failed'); + }); + + it('returns 502 when the upstream transport failed', async () => { + asAuthenticatedUser(true); + (callRepeatedWords as any).mockResolvedValue({ + ok: false, + error: { code: ErrorCode.AI_SERVICE_UNAVAILABLE, message: 'fluent-ai unreachable' }, + }); + + const res = await postRepeatedWords(VALID_BODY); + + expect(res.status).toBe(502); + const json = await res.json(); + expect(json.code).toBe(ErrorCode.AI_SERVICE_UNAVAILABLE); + }); + + it('forwards the request body verbatim (no enrichment)', async () => { + asAuthenticatedUser(true); + (callRepeatedWords as any).mockResolvedValue({ ok: true, data: buildEnvelope() }); + + await postRepeatedWords(VALID_BODY); + + expect(callRepeatedWords).toHaveBeenCalledOnce(); + expect(callRepeatedWords).toHaveBeenCalledWith(VALID_BODY); + }); +}); diff --git a/src/domains/ai-tools/ai-tools.route.ts b/src/domains/ai-tools/ai-tools.route.ts new file mode 100644 index 0000000..51c33b0 --- /dev/null +++ b/src/domains/ai-tools/ai-tools.route.ts @@ -0,0 +1,88 @@ +import { createRoute, z } from '@hono/zod-openapi'; +import * as HttpStatusCodes from 'stoker/http-status-codes'; +import { jsonContent } from 'stoker/openapi/helpers'; +import { createMessageObjectSchema } from 'stoker/openapi/schemas'; + +import { PERMISSIONS } from '@/lib/permissions'; +import { getHttpStatus } from '@/lib/types'; +import { authenticateUser, requirePermission } from '@/middlewares/role-auth'; +import { server } from '@/server/server'; + +import { callRepeatedWords } from './ai-tools.service'; +import { RepeatedWordsRequestSchema, RepeatedWordsResponseSchema } from './ai-tools.types'; + +// Standard fluent-api error body shape ({ error, code, details }) used for all +// non-2xx responses, per decision D9 / §10.3 — successes pass the fluent-ai +// envelope through verbatim, while errors conform to fluent-api's conventions. +const errorResponseSchema = z.object({ + error: z.string(), + code: z.string(), + details: z.unknown().optional(), +}); + +// ─── POST /ai/tools/greek-room/repeated-words ───────────────────────────────── + +const repeatedWordsRoute = createRoute({ + tags: ['AI Tools'], + method: 'post', + path: '/ai/tools/greek-room/repeated-words', + middleware: [authenticateUser, requirePermission(PERMISSIONS.AI_TOOLS_USE)] as const, + request: { + body: jsonContent( + RepeatedWordsRequestSchema, + 'Verses to scan for repeated words, forwarded verbatim to fluent-ai' + ), + }, + responses: { + [HttpStatusCodes.OK]: jsonContent( + RepeatedWordsResponseSchema, + 'Repeated-words check completed (terminal status)' + ), + [HttpStatusCodes.ACCEPTED]: jsonContent( + RepeatedWordsResponseSchema, + 'Repeated-words check accepted; poll for result (queued/running)' + ), + [HttpStatusCodes.BAD_REQUEST]: jsonContent( + createMessageObjectSchema('Bad Request'), + 'Invalid request body' + ), + [HttpStatusCodes.UNAUTHORIZED]: jsonContent( + createMessageObjectSchema('Unauthorized'), + 'Authentication required' + ), + [HttpStatusCodes.FORBIDDEN]: jsonContent( + createMessageObjectSchema('Forbidden'), + 'Missing AI_TOOLS_USE permission' + ), + [HttpStatusCodes.BAD_GATEWAY]: jsonContent(errorResponseSchema, 'Upstream fluent-ai error'), + }, + summary: 'Run the Greek-Room repeated-words check', + description: + 'Proxies a repeated-words check to fluent-ai. The request body is forwarded verbatim ' + + '(decision D8) and the full ToolJobResponse envelope is returned on success (decision D9). ' + + 'Returns 200 for terminal statuses (completed/failed/cancelled) and 202 for queued/running.', +}); + +server.openapi(repeatedWordsRoute, async (c) => { + const body = c.req.valid('json'); + const result = await callRepeatedWords(body); + + if (!result.ok) { + return c.json( + { error: result.error.message, code: result.error.code } as never, + getHttpStatus(result.error) as never + ); + } + + const envelope = result.data; + // Terminal statuses return 200; non-terminal (queued/running) return 202 so + // the (future) polling client knows to keep polling. D9 / §8.3 / §10.3. + const status = + envelope.status === 'completed' || + envelope.status === 'failed' || + envelope.status === 'cancelled' + ? HttpStatusCodes.OK + : HttpStatusCodes.ACCEPTED; + + return c.json(envelope as never, status as never); +}); diff --git a/src/domains/ai-tools/ai-tools.service.ts b/src/domains/ai-tools/ai-tools.service.ts new file mode 100644 index 0000000..5bb3401 --- /dev/null +++ b/src/domains/ai-tools/ai-tools.service.ts @@ -0,0 +1,24 @@ +import type { ToolJobResponse } from '@/lib/services/fluent-ai/fluent-ai.types'; +import type { Result } from '@/lib/types'; + +import { callFluentAi } from '@/lib/services/fluent-ai/fluent-ai.client'; + +import type { RepeatedWordsRequest, RepeatedWordsResult } from './ai-tools.types'; + +import { RepeatedWordsResultSchema } from './ai-tools.types'; + +/** + * Per-tool wrapper for Greek-Room's "Repeated Words" check. + * + * Note the two distinct identifiers in play: + * - URL path segment: `tools/greek-room/repeated-words` (hyphenated — the + * fluent-ai HTTP route). + * - Envelope tool id: `greek_room.repeated_words` (dotted snake_case — the + * library identifier returned in the response, validated in the route's + * response schema). + */ +export async function callRepeatedWords( + req: RepeatedWordsRequest +): Promise>> { + return callFluentAi('tools/greek-room/repeated-words', req, RepeatedWordsResultSchema); +} diff --git a/src/domains/ai-tools/ai-tools.types.ts b/src/domains/ai-tools/ai-tools.types.ts new file mode 100644 index 0000000..dd282f9 --- /dev/null +++ b/src/domains/ai-tools/ai-tools.types.ts @@ -0,0 +1,89 @@ +import { z } from '@hono/zod-openapi'; + +/** + * Per-tool Zod schemas for the AI-tools domain. + * + * ── Intentional snake_case (decision D8 / §8.1) ───────────────────────────── + * The field names below mirror fluent-ai's wire contract VERBATIM + * (`lang_code`, `snt_id`, `start_position`, etc.). This is an INTENTIONAL, + * contained exception to fluent-api's camelCase convention, scoped strictly to + * the AI-tools domain so the request/response stays a verbatim pass-through to + * fluent-ai. Please keep these in snake_case; renaming to camelCase would + * silently break the fluent-ai contract. + * Approved in review: + * https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813 + * Rationale: docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md §8.1 + * + * The result/response schemas mirror the live fluent-ai contract: + * fluent-ai/src/app/schemas/greek_room.py + * fluent-ai/src/app/schemas/tool_job.py + */ + +// ─── Forward direction: fluent-web → fluent-api → fluent-ai (§8.1) ──────────── + +export const VerseInputSchema = z.object({ + snt_id: z.string().min(1), + text: z.string(), +}); + +export const RepeatedWordsRequestSchema = z.object({ + lang_code: z.string().min(1), + lang_name: z.string().min(1), + // Permissive (string | number) to match fluent-ai's Pydantic model, which + // accepts either. fluent-api's own project.id is an integer. + project_id: z.union([z.string(), z.number()]), + project_name: z.string().min(1), + // Required and non-empty so we fail fast at the route layer rather than incur + // a round-trip to fluent-ai for a trivially-invalid request. + verses: z.array(VerseInputSchema).min(1), +}); + +export type RepeatedWordsRequest = z.infer; + +// ─── Reverse direction: fluent-ai → fluent-api → fluent-web (§8.2) ──────────── + +export const RepeatedWordsFindingSchema = z.object({ + snt_id: z.string(), + repeated_word: z.string(), + surf: z.string(), + start_position: z.number().int().nonnegative(), + legitimate: z.boolean(), + // Upstream Greek-Room numeric severity (e.g. 0.1 legitimate, 0.5 suspicious). + severity: z.number(), +}); + +export const RepeatedWordsSummarySchema = z.object({ + total_findings: z.number().int().nonnegative(), + legitimate_count: z.number().int().nonnegative(), + verse_count: z.number().int().nonnegative(), +}); + +export const RepeatedWordsResultSchema = z.object({ + // Upstream library identity fields (distinct from the envelope's `tool`). + lang_code: z.string(), + provider: z.string(), + check: z.string(), + findings: z.array(RepeatedWordsFindingSchema), + summary: RepeatedWordsSummarySchema, +}); + +export type RepeatedWordsResult = z.infer; + +export const RepeatedWordsResponseSchema = z.object({ + job_id: z.string().uuid(), + // The envelope's tool identifier — distinct from the result's library fields. + tool: z.literal('greek_room.repeated_words'), + status: z.enum(['queued', 'running', 'completed', 'failed', 'cancelled']), + result: RepeatedWordsResultSchema.nullable(), + error: z + .object({ + code: z.string(), + message: z.string(), + details: z.unknown().optional(), + }) + .nullable(), + created_at: z.string().datetime({ offset: true }), + completed_at: z.string().datetime({ offset: true }).nullable(), +}); + +export type RepeatedWordsResponse = z.infer; diff --git a/src/env.ts b/src/env.ts index 533ba27..1e046b1 100644 --- a/src/env.ts +++ b/src/env.ts @@ -27,6 +27,13 @@ const EnvSchema = z.object({ EMAIL_SERVICE_DOMAIN: z.string(), EMAIL_SERVICE_SENDER: z.string(), FRONTEND_URL: z.string(), + + // ── Fluent-AI integration ────────────────────────────────────────── + // Base URL of the fluent-ai service (no trailing slash, no /api/v1 suffix). + // Ecosystem mode (via fluent-platform): http://ai:8200 — standalone: http://localhost:8200 + FLUENT_AI_URL: z.string().url(), + // Shared API key for calling fluent-ai (matches a row in fluent-ai's ai_api_keys table). + FLUENT_AI_KEY: z.string().min(1), }); export type env = z.infer; diff --git a/src/lib/permissions.ts b/src/lib/permissions.ts index e308186..b450d10 100644 --- a/src/lib/permissions.ts +++ b/src/lib/permissions.ts @@ -24,6 +24,16 @@ export const PERMISSIONS = { CONTENT_ASSIGN: 'content:assign', CONTENT_UPDATE: 'content:update', + // ── AI tools ──────────────────────────────────────────────────────── + // Intentional alias of CONTENT_UPDATE (same string value) so "can invoke + // AI tools" is documented separately at call sites without yet being a + // distinct RBAC row. Promoting it to a real permission later means adding a + // `permissions` row, mapping it to roles in seed data, and changing only the + // string value here — no call site that imports AI_TOOLS_USE needs to change. + // Decision D10 / §9.3. Approved in review: + // https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343633722 + AI_TOOLS_USE: 'content:update', + // ── Users ─────────────────────────────────────────────────────────── USER_VIEW: 'user:view', USER_CREATE: 'user:create', diff --git a/src/lib/services/fluent-ai/fluent-ai.client.test.ts b/src/lib/services/fluent-ai/fluent-ai.client.test.ts new file mode 100644 index 0000000..38c4107 --- /dev/null +++ b/src/lib/services/fluent-ai/fluent-ai.client.test.ts @@ -0,0 +1,250 @@ +import { z } from '@hono/zod-openapi'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import env from '@/env'; +import { ErrorCode } from '@/lib/types'; + +import { callFluentAi } from './fluent-ai.client'; + +// ─── Fixtures ───────────────────────────────────────────────────────────────── + +const resultSchema = z.object({ + value: z.string(), +}); + +type TestResult = z.infer; + +const TOOL_PATH = 'tools/greek-room/repeated-words'; + +function buildEnvelope(overrides: Record = {}) { + return { + job_id: '11111111-1111-1111-1111-111111111111', + tool: 'greek_room.repeated_words', + status: 'completed', + result: { value: 'ok' }, + error: null, + created_at: '2026-06-02T00:00:00Z', + completed_at: '2026-06-02T00:00:01Z', + ...overrides, + }; +} + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(typeof body === 'string' ? body : JSON.stringify(body), { + status, + headers: { 'Content-Type': 'application/json' }, + }); +} + +describe('callFluentAi', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + // ─── Happy paths ───────────────────────────────────────────────────────── + + it('returns Result.ok with the full envelope on a completed status', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue(jsonResponse(buildEnvelope())); + + const result = await callFluentAi(TOOL_PATH, {}, resultSchema); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.data.status).toBe('completed'); + expect(result.data.result).toEqual({ value: 'ok' }); + expect(result.data.job_id).toBe('11111111-1111-1111-1111-111111111111'); + } + }); + + it('returns Result.ok for a queued envelope without validating the (null) result', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + jsonResponse(buildEnvelope({ status: 'queued', result: null, completed_at: null })) + ); + + const result = await callFluentAi(TOOL_PATH, {}, resultSchema); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.data.status).toBe('queued'); + expect(result.data.result).toBeNull(); + } + }); + + // ─── Terminal failures ─────────────────────────────────────────────────── + + it('maps status "failed" to AI_TOOL_EXECUTION_FAILED', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + jsonResponse( + buildEnvelope({ + status: 'failed', + result: null, + error: { code: 'TOOL_EXECUTION_ERROR', message: 'tool blew up' }, + }) + ) + ); + + const result = await callFluentAi(TOOL_PATH, {}, resultSchema); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCode.AI_TOOL_EXECUTION_FAILED); + expect(result.error.message).toContain('tool blew up'); + } + }); + + it('maps status "cancelled" to AI_TOOL_EXECUTION_FAILED', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + jsonResponse(buildEnvelope({ status: 'cancelled', result: null })) + ); + + const result = await callFluentAi(TOOL_PATH, {}, resultSchema); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCode.AI_TOOL_EXECUTION_FAILED); + } + }); + + // ─── Transport / HTTP errors ─────────────────────────────────────────────── + + it('maps a 4xx response to AI_SERVICE_UNAVAILABLE', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue(jsonResponse({ detail: 'bad key' }, 401)); + + const result = await callFluentAi(TOOL_PATH, {}, resultSchema); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCode.AI_SERVICE_UNAVAILABLE); + } + }); + + it('maps a 5xx response to AI_SERVICE_UNAVAILABLE', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue(jsonResponse({ detail: 'boom' }, 500)); + + const result = await callFluentAi(TOOL_PATH, {}, resultSchema); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCode.AI_SERVICE_UNAVAILABLE); + } + }); + + it('maps a rejected fetch (network error) to AI_SERVICE_UNAVAILABLE', async () => { + vi.spyOn(globalThis, 'fetch').mockRejectedValue(new TypeError('fetch failed')); + + const result = await callFluentAi(TOOL_PATH, {}, resultSchema); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCode.AI_SERVICE_UNAVAILABLE); + expect(result.error.message).toContain('unreachable'); + } + }); + + // ─── Parsing / schema validation ─────────────────────────────────────────── + + it('maps a non-JSON body to AI_SERVICE_UNAVAILABLE with a "malformed" message', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue(jsonResponse('not-json-at-all')); + + const result = await callFluentAi(TOOL_PATH, {}, resultSchema); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCode.AI_SERVICE_UNAVAILABLE); + expect(result.error.message.toLowerCase()).toContain('malformed'); + } + }); + + it('maps a result that fails the result schema to AI_SERVICE_UNAVAILABLE', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + jsonResponse(buildEnvelope({ result: { value: 123 } })) + ); + + const result = await callFluentAi(TOOL_PATH, {}, resultSchema); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCode.AI_SERVICE_UNAVAILABLE); + } + }); + + // ─── Timeout / abort ─────────────────────────────────────────────────────── + + it('maps the default timeout firing to AI_SERVICE_UNAVAILABLE', async () => { + vi.useFakeTimers(); + // Simulate fetch rejecting with an AbortError when the derived signal aborts. + vi.spyOn(globalThis, 'fetch').mockImplementation( + (_input, init) => + new Promise((_resolve, reject) => { + const signal = (init as RequestInit | undefined)?.signal; + signal?.addEventListener('abort', () => { + const abortError = new Error('The operation was aborted'); + abortError.name = 'AbortError'; + reject(abortError); + }); + }) + ); + + const promise = callFluentAi(TOOL_PATH, {}, resultSchema, { + timeoutMs: 100, + }); + await vi.advanceTimersByTimeAsync(100); + const result = await promise; + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCode.AI_SERVICE_UNAVAILABLE); + expect(result.error.message).toContain('timed out'); + } + }); + + it('maps a caller-supplied AbortSignal abort to AI_SERVICE_UNAVAILABLE', async () => { + const controller = new AbortController(); + vi.spyOn(globalThis, 'fetch').mockImplementation( + (_input, init) => + new Promise((_resolve, reject) => { + const signal = (init as RequestInit | undefined)?.signal; + signal?.addEventListener('abort', () => { + const abortError = new Error('The operation was aborted'); + abortError.name = 'AbortError'; + reject(abortError); + }); + }) + ); + + const promise = callFluentAi(TOOL_PATH, {}, resultSchema, { + signal: controller.signal, + }); + controller.abort(); + const result = await promise; + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.code).toBe(ErrorCode.AI_SERVICE_UNAVAILABLE); + } + }); + + // ─── Request shape ───────────────────────────────────────────────────────── + + it('sends the correct URL, headers, and JSON body', async () => { + const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValue(jsonResponse(buildEnvelope())); + + const body = { lang_code: 'eng', verses: [{ snt_id: 'GEN 1:1', text: 'In in' }] }; + await callFluentAi(TOOL_PATH, body, resultSchema); + + expect(fetchSpy).toHaveBeenCalledOnce(); + const [url, init] = fetchSpy.mock.calls[0] as [string, RequestInit]; + + expect(url).toBe(`${env.FLUENT_AI_URL}/api/v1/${TOOL_PATH}`); + expect(init.method).toBe('POST'); + + const headers = init.headers as Record; + expect(headers['Content-Type']).toBe('application/json'); + expect(headers['X-API-Key']).toBe(env.FLUENT_AI_KEY); + expect(JSON.parse(init.body as string)).toEqual(body); + }); +}); diff --git a/src/lib/services/fluent-ai/fluent-ai.client.ts b/src/lib/services/fluent-ai/fluent-ai.client.ts new file mode 100644 index 0000000..d02e0c1 --- /dev/null +++ b/src/lib/services/fluent-ai/fluent-ai.client.ts @@ -0,0 +1,182 @@ +import { z } from '@hono/zod-openapi'; + +import type { Result } from '@/lib/types'; + +import env from '@/env'; +import { ErrorCode, ErrorMessages } from '@/lib/types'; + +import type { JobStatus, ToolJobResponse } from './fluent-ai.types'; + +const DEFAULT_TIMEOUT_MS = 30_000; + +const JOB_STATUS_VALUES = [ + 'queued', + 'running', + 'completed', + 'failed', + 'cancelled', +] as const satisfies readonly JobStatus[]; + +/** + * Envelope schema used to validate the structural shape of every fluent-ai + * response. The `result` field is validated separately against the caller's + * per-tool schema (and only when status === 'completed'), so it is `unknown` + * here. + * + * snake_case mirrors the fluent-ai wire contract verbatim (decision D8 / §8.1). + */ +const toolJobEnvelopeSchema = z.object({ + job_id: z.string(), + tool: z.string(), + status: z.enum(JOB_STATUS_VALUES), + result: z.unknown().nullable(), + error: z + .object({ + code: z.string(), + message: z.string(), + details: z.unknown().optional(), + }) + .nullable(), + created_at: z.string(), + completed_at: z.string().nullable(), +}); + +interface CallFluentAiOptions { + /** Honored if the caller wants their own timeout / cancellation. */ + signal?: AbortSignal; + /** Default 30_000ms. Ignored when an explicit `signal` is supplied. */ + timeoutMs?: number; +} + +/** + * Build an AppError. fluent-api's `AppError` is intentionally minimal + * (`{ message, code }`), so any diagnostic context (HTTP status, upstream + * code, parse failures) is appended to the human-readable message rather than + * stuffed into a non-existent `details` field. The route layer logs the full + * envelope separately. + */ +function aiError(code: ErrorCode, detail?: string): Extract, { ok: false }> { + const base = ErrorMessages[code]; + return { + ok: false, + error: { code, message: detail ? `${base}: ${detail}` : base }, + }; +} + +/** + * Shared client for calling a fluent-ai tool endpoint. + * + * Behavior (see §7): + * - POSTs to `${FLUENT_AI_URL}/api/v1/${toolPath}` with `X-API-Key` and a JSON body. + * - Honors a caller-supplied `AbortSignal`, otherwise applies a default 30s timeout. + * - On 2xx, parses the body as a `ToolJobResponse` and (only when + * `status === 'completed'`) validates `result` against `resultSchema`. + * - Returns the FULL envelope on success — callers decide how to unwrap it. + * - Translates every failure mode to a `Result.err` per the §10.2 mapping table. + * + * What it deliberately does NOT do (§7.3): no polling, no caching, no retries. + */ +export async function callFluentAi( + toolPath: string, + body: TReq, + resultSchema: z.ZodType, + options?: CallFluentAiOptions +): Promise>> { + const url = `${env.FLUENT_AI_URL}/api/v1/${toolPath}`; + const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS; + + // Use the caller's signal if provided; otherwise derive a timeout signal. + let timeoutId: ReturnType | undefined; + let signal: AbortSignal; + if (options?.signal) { + signal = options.signal; + } else { + const controller = new AbortController(); + timeoutId = setTimeout(() => controller.abort(), timeoutMs); + signal = controller.signal; + } + + let response: Response; + try { + response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-API-Key': env.FLUENT_AI_KEY, + }, + body: JSON.stringify(body), + signal, + }); + } catch (error) { + const isAbort = error instanceof Error && error.name === 'AbortError'; + if (isAbort) { + return aiError(ErrorCode.AI_SERVICE_UNAVAILABLE, `request timed out after ${timeoutMs}ms`); + } + const cause = error instanceof Error ? error.message : String(error); + return aiError(ErrorCode.AI_SERVICE_UNAVAILABLE, `fluent-ai unreachable (${cause})`); + } finally { + if (timeoutId !== undefined) clearTimeout(timeoutId); + } + + // Read the body once as text so we can parse it ourselves (fluent-ai always + // responds with JSON on success). + const rawBody = await response.text(); + + if (!response.ok) { + return aiError(ErrorCode.AI_SERVICE_UNAVAILABLE, `fluent-ai returned HTTP ${response.status}`); + } + + // Parse + structurally validate the envelope. + const parsed = safeJsonParse(rawBody); + if (parsed === undefined) { + return aiError( + ErrorCode.AI_SERVICE_UNAVAILABLE, + 'malformed response from fluent-ai (body was not valid JSON)' + ); + } + + const envelopeResult = toolJobEnvelopeSchema.safeParse(parsed); + if (!envelopeResult.success) { + return aiError(ErrorCode.AI_SERVICE_UNAVAILABLE, 'malformed response envelope from fluent-ai'); + } + + const envelope = envelopeResult.data; + + // Terminal failure / cancellation: the dependency is up but the tool refused. + if (envelope.status === 'failed' || envelope.status === 'cancelled') { + const upstream = envelope.error?.message ?? envelope.status; + return aiError(ErrorCode.AI_TOOL_EXECUTION_FAILED, upstream); + } + + // Validate the result payload only for completed jobs; for queued/running the + // result is null and must not be validated. + let validatedResult: TResult | null = null; + if (envelope.status === 'completed') { + const resultParse = resultSchema.safeParse(envelope.result); + if (!resultParse.success) { + return aiError(ErrorCode.AI_SERVICE_UNAVAILABLE, 'malformed result payload from fluent-ai'); + } + validatedResult = resultParse.data; + } + + return { + ok: true, + data: { + job_id: envelope.job_id, + tool: envelope.tool, + status: envelope.status, + result: validatedResult, + error: envelope.error, + created_at: envelope.created_at, + completed_at: envelope.completed_at, + }, + }; +} + +function safeJsonParse(text: string): unknown { + try { + return JSON.parse(text); + } catch { + return undefined; + } +} diff --git a/src/lib/services/fluent-ai/fluent-ai.types.ts b/src/lib/services/fluent-ai/fluent-ai.types.ts new file mode 100644 index 0000000..349f84e --- /dev/null +++ b/src/lib/services/fluent-ai/fluent-ai.types.ts @@ -0,0 +1,45 @@ +/** + * Shared types for the fluent-ai (Python/FastAPI) integration. + * + * These mirror fluent-ai's generic tool-job envelope verbatim + * (see fluent-ai/src/app/schemas/tool_job.py), so every AI-tool endpoint on + * fluent-api consumes and forwards the same shape. + * + * ── snake_case convention (decision D8 / §8.1) ────────────────────────────── + * Fields here use fluent-ai's snake_case verbatim (e.g. `job_id`, `created_at`, + * `completed_at`). This is an INTENTIONAL, contained exception to fluent-api's + * camelCase convention, scoped strictly to the AI-tools wire contract so the + * pass-through to fluent-ai stays exact. Please keep these in snake_case; + * renaming to camelCase would silently break the fluent-ai contract. + * Approved in review: + * https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813 + * Rationale: docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md §8.1 + */ + +export type JobStatus = 'queued' | 'running' | 'completed' | 'failed' | 'cancelled'; + +/** + * Structured error payload populated on the envelope when `status === "failed"`. + * Mirrors fluent-ai's `ToolError` (src/app/schemas/tool_job.py): `{ code, message, details? }`. + */ +export interface ToolJobError { + code: string; // e.g. 'TOOL_EXECUTION_ERROR' + message: string; + details?: unknown; +} + +/** + * Universal response envelope returned by every fluent-ai tool endpoint, + * regardless of synchronous or (future) asynchronous execution. + * + * Mirrors fluent-ai's `ToolJobResponse[ResultT]` (src/app/schemas/tool_job.py). + */ +export interface ToolJobResponse { + job_id: string; // per-invocation UUID + tool: string; // fluent-ai tool identifier, e.g. 'greek_room.repeated_words' + status: JobStatus; + result: TResult | null; // populated when status === 'completed' + error: ToolJobError | null; // populated when status === 'failed' + created_at: string; // ISO-8601 timestamp + completed_at: string | null; // ISO-8601 timestamp for terminal states, else null +} diff --git a/src/lib/types.ts b/src/lib/types.ts index 16eb940..7dd9a68 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -72,6 +72,9 @@ export const ErrorCode = { // External service errors AUTH_ERROR: 'AUTH_ERROR', EMAIL_SERVICE_ERROR: 'EMAIL_SERVICE_ERROR', + // AI-tools (fluent-ai) integration errors — both map to HTTP 502 (see §10.1) + AI_SERVICE_UNAVAILABLE: 'AI_SERVICE_UNAVAILABLE', + AI_TOOL_EXECUTION_FAILED: 'AI_TOOL_EXECUTION_FAILED', // Feature domain errors LANGUAGE_NOT_FOUND: 'LANGUAGE_NOT_FOUND', } as const; @@ -109,6 +112,8 @@ export const ErrorMessages: Record = { INVALID_BIBLE_BOOKS: 'One or more requested books do not belong to the specified Bible', AUTH_ERROR: 'Authentication service error', EMAIL_SERVICE_ERROR: 'Email service error', + AI_SERVICE_UNAVAILABLE: 'AI service is unavailable', + AI_TOOL_EXECUTION_FAILED: 'AI tool execution failed', LANGUAGE_NOT_FOUND: 'Language not found', }; @@ -118,6 +123,9 @@ export const ErrorHttpStatus: Record = { INTERNAL_ERROR: 500, AUTH_ERROR: 500, EMAIL_SERVICE_ERROR: 500, + // AI-tools upstream failures surface as 502 Bad Gateway (see §10.1) + AI_SERVICE_UNAVAILABLE: 502, + AI_TOOL_EXECUTION_FAILED: 502, LANGUAGE_NOT_FOUND: 404, UNAUTHORIZED: 401, FORBIDDEN: 403, From cc739549444dc9f75d4485781419ecc28fe132de Mon Sep 17 00:00:00 2001 From: Joshua Lansford Date: Thu, 4 Jun 2026 10:39:06 -0400 Subject: [PATCH 8/9] =?UTF-8?q?docs(ai-tools):=20split=20proposal=20into?= =?UTF-8?q?=20Part=201/Part=202,=20add=20status=20doc=20+=20=C2=A712.10=20?= =?UTF-8?q?wiring=20runbook?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Split the long proposal at the §10/§11 boundary so each file stays under the markdown line-count lint limit; sections numbered continuously across the pair so all 'see §N' references stay valid. - Part 1 (ai-tools-integration-suggestion.md): contract & design, §1-§10. - Part 2 (ai-tools-integration-operations.md, new): job-queue forward compatibility, service discovery / Docker / env wiring, testing, future work, resolved reviewer Q&A, §11-§15. - Add §12.10 'Wiring up a running ecosystem' runbook (env vars, compose override, restart-just-api, health checks, BetterAuth set-auth-token sign-in, smoke-script invocation) reconciled with scripts/smoke-repeated-words.ts. - Add ai-tools-integration-status.md: file-by-file account of what is implemented at b055f84 vs. what remains (live e2e run + paired fluent-platform PR) vs. out of scope. - Update summary.md companion links to the new file set. - Fix relative-path depth in all four docs (now under repeated-word-check/); all 79 internal links resolve; repoint a stale route-test example to server.test.ts / projects.service.test.ts. Docs-only; no source changes. --- .../ai-tools-integration-operations.md | 480 ++++++++++++++++++ .../ai-tools-integration-status.md | 102 ++++ .../ai-tools-integration-suggestion.md | 476 ++--------------- .../ai-tools-integration-summary.md | 2 +- 4 files changed, 632 insertions(+), 428 deletions(-) create mode 100644 docs/proposals/repeated-word-check/ai-tools-integration-operations.md create mode 100644 docs/proposals/repeated-word-check/ai-tools-integration-status.md diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-operations.md b/docs/proposals/repeated-word-check/ai-tools-integration-operations.md new file mode 100644 index 0000000..5497a9a --- /dev/null +++ b/docs/proposals/repeated-word-check/ai-tools-integration-operations.md @@ -0,0 +1,480 @@ +# AI-Tools Integration on fluent-api — Proposal (Part 2 of 2: Operations & Forward Compatibility) + +**Status:** Reviewed and **approved** (kaseywright, PR #173, 2026-06-02). Implemented on branch `jel-word-check` — see the implementation-status doc below for what currently exists in the tree. +**Scope:** Extend fluent-api to expose AI tools implemented by fluent-ai, starting with Greek-Room's _Repeated Words_ check. The exposed pattern is meant to absorb every future AI tool (LLM drafting, embeddings, fine-tuning, other Greek-Room checks) without renegotiating the contract. + +**This document is Part 2 of 2.** It covers operations and forward compatibility: the job-queue protocol, service discovery / Docker / environment wiring (including the step-by-step "wire up a running ecosystem" checklist), the testing strategy, future work, and the resolved reviewer Q&A (§11–§15). + +**Companion documents:** + +- [`ai-tools-integration-suggestion.md`](ai-tools-integration-suggestion.md) — **Part 1 of 2.** Contract and design: background, scope, decisions, the URL, the file layout, the `callFluentAi` utility, request/response shapes, auth, and error translation (§1–§10). **Read Part 1 first.** +- [`ai-tools-integration-status.md`](ai-tools-integration-status.md) — **Implementation status.** What is already implemented in the tree (file-by-file) versus what remains to be done. Start here if you are an agent or developer picking this work up. +- [`ai-tools-integration-summary.md`](ai-tools-integration-summary.md) — short reviewer orientation. + +**Predecessors on the fluent-ai side:** [`fluent-ai/greek-room-integration-summary.md`](../../../../fluent-ai/greek-room-integration-summary.md), [`fluent-ai/greek-room-integration-suggestion.md`](../../../../fluent-ai/greek-room-integration-suggestion.md), [`fluent-ai/greek-room-integration-decisions.md`](../../../../fluent-ai/greek-room-integration-decisions.md). + +> **Note on document split.** This proposal was split into two files at the §10/§11 boundary so each stays under the repo's markdown line-count lint limit. Sections are numbered continuously across both files (Part 1 ends at §10; Part 2 begins at §11), so all internal "see §N" references remain valid across the pair. Relative paths in this document (e.g. `../src/...`, `../../fluent-platform/...`) assume the standard side-by-side repo layout that fluent-platform's setup script produces. + +--- + +## 11. The job-queue protocol — forward compatibility + +This section describes what this PR sets up but does **not** exercise: the asynchronous job-queue contract that fluent-ai's `ToolJobResponse` envelope already accommodates. Today every call is synchronous (`status: "completed"`); the machinery below is the agreed-upon shape for when a slow tool eventually needs it. Nothing here ships as code in this PR — it is documented so the envelope pass-through (D9) and the `/ai/` URL namespace (D2) can be understood as deliberately forward-compatible choices. + +### 11.1 The contract today vs. tomorrow + +**Today** every response from fluent-ai is synchronous with `status: "completed"`. fluent-api hands the envelope to fluent-web as a 200 response. No polling occurs. + +**Tomorrow**, when fluent-ai introduces a slow tool, it can return `202 Accepted` with `status: "queued"` and a real `job_id` that exists in fluent-ai's job table. The protocol fluent-ai will (eventually) expose is the existing fluent-ai decision **D3** envelope plus a new polling endpoint: + +``` +GET /api/v1/tools/jobs/{job_id} +→ ToolJobResponse with current status and (if completed) result +``` + +Returns 200 in all states (queued/running/completed/failed/cancelled). The HTTP status is _not_ used to communicate terminal vs. non-terminal — only the envelope's `status` field is. + +### 11.2 fluent-api's pass-through polling endpoint (future) + +When fluent-ai adds the polling endpoint, fluent-api adds: + +``` +GET /ai/tools/jobs/{job_id} +→ Pass-through of fluent-ai's response, with the same auth (BetterAuth session + AI_TOOLS_USE permission) +``` + +Implementation will be a second helper alongside `callFluentAi`: + +```ts +// future, not in this PR +export async function pollToolJob( + jobId: string, + resultSchema: z.ZodType +): Promise>>; +``` + +### 11.3 Why polling lives in the browser, not in fluent-api + +Per **D3**. The detailed reasoning, repeated for completeness: + +- **Decouples slow tools from fluent-api's request budget.** A 5-minute tool does not hold a browser-to-fluent-api socket open for 5 minutes through whatever proxies, load balancers, or middle boxes sit between them. +- **Matches the editor UX shape.** When the eventual squiggle-on-typing UX is built, the browser already has its own state machine for "user has typed, debounce, kick off check, show pending indicator, show squiggles when result arrives." Putting polling on the server adds nothing to that loop. +- **TanStack Query has the right primitives.** `refetchInterval` accepts a function that inspects the current data and returns `false` to stop polling — i.e., literally `(data) => isTerminal(data.status) ? false : 1500`. No custom polling library needed. +- **Aligns with the existing fluent-web pattern.** Every existing fluent-web API hook calls `fetch` directly; there is no centralized server-state abstraction beyond TanStack itself. Adding server-side polling would be the foreign element. + +### 11.4 What the frontend hook will look like (out of scope, sketched) + +This is _not_ part of this PR, but is sketched here so reviewers can see that the backend contract is consumable. + +```ts +// fluent-web/src/lib/api/useToolJob.ts (future) + +import { useQuery } from '@tanstack/react-query'; + +import type { ToolJobResponse } from './tool-job-types'; + +const TERMINAL: Set['status']> = new Set([ + 'completed', + 'failed', + 'cancelled', +]); + +export function useToolJob( + jobId: string | null, + opts?: { pollIntervalMs?: number; enabled?: boolean } +) { + return useQuery>({ + queryKey: ['ai-tools', 'jobs', jobId], + queryFn: () => + fetch(`${config.api.url}/ai/tools/jobs/${jobId}`, { credentials: 'include' }).then((r) => + r.json() + ), + enabled: !!jobId && (opts?.enabled ?? true), + refetchInterval: (q) => + q.state.data && TERMINAL.has(q.state.data.status) ? false : (opts?.pollIntervalMs ?? 1500), + }); +} +``` + +```ts +// fluent-web/src/features/checks/hooks/useRepeatedWords.ts (future) + +export function useRepeatedWords() { + const [pendingJobId, setPendingJobId] = useState(null); + + const kickoff = useMutation({ + mutationFn: (req: RepeatedWordsRequest) => + fetch(`${config.api.url}/ai/tools/greek-room/repeated-words`, { + method: 'POST', + credentials: 'include', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(req), + }).then((r) => r.json() as Promise>), + onSuccess: (envelope) => { + if (envelope.status === 'queued' || envelope.status === 'running') { + setPendingJobId(envelope.job_id); + } + }, + }); + + const polled = useToolJob(pendingJobId); + + // Today, only kickoff.data is ever populated. Tomorrow, polled.data takes over. + const envelope = polled.data ?? (kickoff.data?.status === 'completed' ? kickoff.data : null); + + return { kickoff, envelope }; +} +``` + +### 11.5 No frontend code in this PR + +Per the user's instruction during the spec discussion, frontend work is a separate session. The above sketches are appendix material so reviewers can confirm the backend contract is sufficient for the eventual frontend implementation. + +--- + +## 12. Service discovery, environment, and Docker networking + +The cross-repo orchestration substrate already exists as [`fluent-platform`](../../../../fluent-platform/README.md). Its [`compose.yaml`](../../../../fluent-platform/compose.yaml) brings up `db`, `api`, `worker`, `ai`, and `web` on a shared Docker/Podman network with service names usable as DNS, plus a shared PostgreSQL instance with role-based schema separation. This section describes how this PR plugs into that substrate and the small changes needed in fluent-api and fluent-platform. + +### 12.1 The two runtime modes + +Per [`fluent-platform/README.md`](../../../../fluent-platform/README.md), fluent-api runs in one of two modes: + +- **Ecosystem mode** — started via `./fluent.sh up` from `fluent-platform/`. fluent-ai is also up, reachable at `http://ai:8200` on the internal network (service name `ai` from [`fluent-platform/compose.yaml`](../../../../fluent-platform/compose.yaml) line 82). +- **Standalone mode** — started via `./fapi.sh up` from `fluent-api/`. fluent-ai is _not_ running unless the dev started it separately. fluent-api needs to gracefully report unavailability rather than crash. + +Both modes are first-class. The integration must work in both. + +### 12.2 Env vars (fluent-api side) + +Two new entries in [`fluent-api/src/env.ts`](../../../src/env.ts): + +```ts +const envSchema = z.object({ + // ... existing ... + FLUENT_AI_URL: z.string().url(), // ecosystem mode: http://ai:8200 — standalone: http://localhost:8200 + FLUENT_AI_KEY: z.string().min(1), // dev value: fai_dev_admin +}); +``` + +Both are required (no defaults). Zod failure on boot prints a clear error and exits, matching how fluent-api already handles `DATABASE_URL`, `BETTER_AUTH_SECRET`, etc. + +### 12.3 `fluent-api/.env.example` additions + +```dotenv +# Fluent-AI integration +# Base URL of the fluent-ai service (no trailing slash, no /api/v1 suffix). +# - Ecosystem mode (via fluent-platform): http://ai:8200 +# - Standalone fluent-api against standalone fluent-ai: http://localhost:8200 +FLUENT_AI_URL=http://localhost:8200 + +# Shared API key for calling fluent-ai. Matches a row in fluent-ai's ai_api_keys table. +# Dev value seeded by fluent-ai: fai_dev_admin +FLUENT_AI_KEY=fai_dev_admin +``` + +The `.env.example` documents the standalone-mode default because that's the path a dev hits first when running `./fapi.sh up` and copying `.env.example` to `.env`. Ecosystem-mode overrides are applied at the platform-compose layer (§12.4). + +### 12.4 Companion change in fluent-platform + +[`fluent-platform/compose.yaml`](../../../../fluent-platform/compose.yaml) currently passes fluent-api's `.env` verbatim via `env_file: ${API_CONTEXT:-../fluent-api}/.env`. To make ecosystem mode work regardless of what the dev wrote in `fluent-api/.env`, the platform compose should explicitly override the URL for the `api` service: + +```yaml +api: + # ... existing ... + env_file: ${API_CONTEXT:-../fluent-api}/.env + environment: + DATABASE_URL: postgres://postgres:postgres@db:5432/fluent + EXPORTS_DIR: /app/exports + # New entries: + FLUENT_AI_URL: http://ai:8200 + # FLUENT_AI_KEY intentionally NOT overridden here — sourced from fluent-api/.env, + # which must match fluent-ai's ai_api_keys seed (dev value: fai_dev_admin) +``` + +`FLUENT_AI_URL` is overridden because it's deployment-topology-dependent. `FLUENT_AI_KEY` is _not_ overridden because it's a shared secret — the same value belongs in `fluent-api/.env` (for the caller) and in fluent-ai's `ai_api_keys` table (which the dev seed already populates). Overriding only on one side would invite drift. + +This is a small fluent-platform PR that should land alongside the fluent-api PR. Both repos ship together; the spec calls this out as a release-coordination item in §15. + +### 12.5 Startup ordering + +[`fluent-platform/compose.yaml`](../../../../fluent-platform/compose.yaml) line 110–112 currently has `ai` declaring `depends_on: api: service_healthy`. So when the stack starts: + +1. `db` becomes healthy +2. `api` starts, becomes healthy +3. `ai` and `worker` and `web` start +4. Brief window where `api` is up but `ai` is still booting + +If a dev (or test) hits the `/ai/tools/...` endpoint during that window, fluent-api's `callFluentAi` will hit `ECONNREFUSED` and return `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. This is the correct behavior — no need for retries, no need to invert the `depends_on` direction. Worth noting only so reviewers don't mistake the 502 they see during startup for a bug. (An optional improvement: add an `ai` healthcheck and let `api` declare a soft dependency on it. Out of scope for this PR but a candidate for the fluent-platform follow-up.) + +### 12.6 Standalone-mode behavior when fluent-ai isn't running + +When a dev runs only `./fapi.sh up` without fluent-ai, the `/ai/tools/...` endpoints will return `502 Bad Gateway` with `code: AI_SERVICE_UNAVAILABLE`. This is acceptable: the rest of fluent-api works, and the dev sees a clear signal that they need to bring fluent-ai up (or switch to ecosystem mode) if they want to exercise the AI integration. + +### 12.7 README updates + +- **fluent-api's README** gains a short subsection under "Running locally" pointing to fluent-platform for ecosystem mode and explaining the standalone-mode caveat. +- **fluent-platform's README** has a Services table at line 61–68 listing `api`, `ai`, `web`, `worker`, `db`. The proposed compose change in §12.4 doesn't add new services so this table is unaffected, but the Environment Configuration section (line 166+) should mention that `FLUENT_AI_KEY` must be set in `fluent-api/.env` to enable the AI tools endpoints. + +### 12.8 What `callFluentAi` does _not_ assume about networking + +The client is unaware of whether fluent-ai is at `localhost:8200`, `ai:8200`, `https://fluent-ai.internal.example.com`, or anywhere else. It reads `FLUENT_AI_URL` verbatim, appends `/api/v1/${toolPath}`, and POSTs. This means: + +- Switching from standalone to ecosystem mode is a single env var change (handled automatically by the platform compose override). +- Switching to a staging or production deployment is a single env var change. +- TLS works automatically if `FLUENT_AI_URL` starts with `https://` — `fetch` handles it. + +### 12.9 Production / deployment + +Per [`fluent-platform/README.md`](../../../../fluent-platform/README.md) §"Deployment (placeholder - not active 2026-05-08)", Azure Bicep templates live in [`fluent-platform/deploy/azure/`](../../../../fluent-platform/deploy/azure/) but aren't active yet. When production deployment lands, `FLUENT_AI_URL` and `FLUENT_AI_KEY` will be wired through the same environment-injection mechanism the rest of the app uses (Azure App Settings / Key Vault references). No fluent-api code change is required for that transition. + +### 12.10 Wiring up a running ecosystem (post-PR checklist) + +The fluent-api code in this PR is complete, but **exercising it end-to-end against a live fluent-ai requires a small amount of local wiring that is intentionally _not_ part of this PR's committed changes** (the env values are machine-specific, and the compose override belongs to the paired fluent-platform PR per §12.4 / **D12**). This subsection is the runbook for an agent or developer who wants to take the merged code and watch a real request flow fluent-web → fluent-api → fluent-ai. It is deliberately step-by-step so it can be followed without prior context. + +> **Prerequisites.** All four repos cloned side-by-side (the layout fluent-platform's setup produces). Docker/Podman available. fluent-ai's dev seed has run at least once so its `ai_api_keys` table contains the dev key `fai_dev_admin`. You are on branch `jel-word-check` in fluent-api. + +**Step 1 — Add the two env vars to `fluent-api/.env`.** The repo only ships `.env.example`; the real `.env` is git-ignored and must be edited by hand. Append (or copy from `.env.example` — see §12.3): + +```dotenv +FLUENT_AI_URL=http://localhost:8200 +FLUENT_AI_KEY=fai_dev_admin +``` + +Use `http://localhost:8200` for standalone mode. In ecosystem mode the platform compose override (Step 2) replaces the URL with `http://ai:8200`, so the value here is only the standalone fallback — but it **must be present and non-empty** either way, because `src/env.ts` validates both vars at boot with no defaults (§12.2). A missing or blank value makes fluent-api exit on startup with a Zod error. + +**Step 2 — Add the compose override in fluent-platform (paired PR, §12.4 / D12).** This is _not_ committed in the fluent-api PR. In [`fluent-platform/compose.yaml`](../../../../fluent-platform/compose.yaml), under the `api` service's `environment:` block, add: + +```yaml +FLUENT_AI_URL: http://ai:8200 +``` + +Do **not** add `FLUENT_AI_KEY` here — it is a shared secret sourced from `fluent-api/.env` so the caller key and fluent-ai's `ai_api_keys` seed stay in lockstep (§12.4 explains why overriding only one side invites drift). + +**Step 3 — Bring the stack up (or restart just `api`).** + +- Cold start: from `fluent-platform/`, run `./fluent.sh up` (ecosystem mode — brings up `db`, `api`, `worker`, `ai`, `web`). +- If the stack is already running and you only changed env/compose for `api`, restart just that one service so you don't disturb the database or other services — e.g. `docker compose restart api` (or the equivalent `./fluent.sh` subcommand). **Do not** tear down the stack or re-run any DB seed; that would needlessly rebuild Postgres. + +**Step 4 — Verify both services are healthy.** + +- fluent-api: `curl -fsS http://localhost:8787/health` (or the port your stack maps the `api` service to) should return its health payload. +- fluent-ai: `curl -fsS http://localhost:8200/health` (standalone) or, from inside the network, `http://ai:8200/health` should return healthy. There is a brief startup window where `api` is up but `ai` is still booting (§12.5); a 502 with `AI_SERVICE_UNAVAILABLE` during that window is expected, not a bug — just retry once `ai` is healthy. + +**Step 5 — Obtain a BetterAuth bearer token.** The `/ai/tools/...` endpoint is guarded by `authenticateUser + requirePermission(AI_TOOLS_USE)` (§9), so an unauthenticated `curl` gets a 401. To call it from the host, sign in via BetterAuth and capture the session token. Sign-in returns the token in the `set-auth-token` response header (BetterAuth's bearer-token plugin), which you then send back as `Authorization: Bearer `: + +```bash +# Sign in with a seeded dev user; capture the set-auth-token header. +TOKEN=$(curl -sS -D - -o /dev/null \ + -X POST http://localhost:8787/api/auth/sign-in/email \ + -H 'Content-Type: application/json' \ + -d '{"email":"","password":""}' \ + | awk -F': ' 'tolower($1)=="set-auth-token"{print $2}' | tr -d '\r') +echo "token: ${TOKEN:0:12}..." +``` + +Use whatever dev credentials your local seed provisions. (Exact email/password are environment-specific — check your fluent-api/fluent-platform seed data; this runbook intentionally does not hard-code them.) + +**Step 6 — Run the smoke script.** The script is committed in this PR at [`fluent-api/scripts/smoke-repeated-words.ts`](../../../scripts/smoke-repeated-words.ts) with the npm alias `smoke:repeated-words` (§13.3). It posts the canned 3-verse corpus and asserts the envelope is `completed`, `tool === "greek_room.repeated_words"`, `result.findings` is an array with exactly two entries (one legitimate, one suspicious), `summary.verse_count === 3`, and `summary.total_findings === findings.length`. From `fluent-api/`: + +```bash +# Credential is passed as a CLI flag (note the `--` separating npm args from script args). +# Base URL defaults to $FLUENT_API_URL or http://localhost:9999; override with --url. +npm run smoke:repeated-words -- --url http://localhost:8787 --token "$TOKEN" + +# Web-session alternative (raw Cookie header instead of a bearer token): +# npm run smoke:repeated-words -- --url http://localhost:8787 --cookie "better-auth.session_token=..." +# The token may also be supplied via the FLUENT_API_TOKEN env var instead of --token. +# Add --raw to print the unchecked response body, or --timeout to change the 30s default. +``` + +A successful run prints the parsed envelope, lists each sanity check as `ok`, and exits 0. Exit 2 means bad CLI args or a missing credential (no `--token`/`--cookie`/`FLUENT_API_TOKEN`). A 401/403 means the token wasn't captured or the user lacks `content:update`; a 502 with `AI_SERVICE_UNAVAILABLE` means fluent-api couldn't reach fluent-ai (re-check Steps 1–4); a 502 with `AI_TOOL_EXECUTION_FAILED` means fluent-ai was reached but the tool itself reported a failure (inspect the propagated `details`). + +**Step 7 — (Optional) Drive it from the OpenAPI docs.** fluent-api serves Scalar docs at `/reference`; the `POST /ai/tools/greek-room/repeated-words` operation appears there with the full request/response schema (§5.2), so you can also exercise it interactively once authenticated. + +> **Why this isn't committed.** The `.env` values are per-machine secrets and the compose override is the paired fluent-platform PR's responsibility (D12). Keeping this as a documented runbook — rather than baked-in config — preserves the standalone/ecosystem split (§12.1) and avoids committing a dev key or a topology-specific URL into the fluent-api repo. + +--- + +## 13. Testing strategy + +Per **D11**, the test footprint mirrors the existing fluent-api conventions. Three layers: + +### 13.1 Unit tests — `callFluentAi` + +File: `fluent-api/src/lib/services/fluent-ai/fluent-ai.client.test.ts` + +Test surface, all with `global.fetch` stubbed via `vi.spyOn(global, 'fetch')`: + +- Happy path: completed envelope → returns `Result.ok(envelope)`. +- Happy path: queued envelope → returns `Result.ok(envelope)` (the route layer, not the client, decides 200 vs 202). +- Failed envelope (`status: "failed"`) → returns `Result.err({ code: AI_TOOL_EXECUTION_FAILED, ... })`. +- Cancelled envelope → returns `Result.err({ code: AI_TOOL_EXECUTION_FAILED, ... })`. +- fluent-ai returns 4xx → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- fluent-ai returns 5xx → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- `fetch` rejects (network error) → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- Response body fails JSON parse → `Result.err({ code: AI_SERVICE_UNAVAILABLE, message contains "malformed" })`. +- Response envelope passes parsing but `result` field fails the result schema → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- Default 30s timeout fires via fake timers → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- Caller-supplied `AbortSignal` triggers → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. +- Request shape: `X-API-Key` header is present, equals `env.FLUENT_AI_KEY`, `Content-Type` is `application/json`, URL is `${FLUENT_AI_URL}/api/v1/${toolPath}`. + +### 13.2 Domain tests — `ai-tools.route.ts` + +File: `fluent-api/src/domains/ai-tools/ai-tools.route.test.ts` + +Test surface, modeled on the existing HTTP-route coverage in [`fluent-api/src/server/server.test.ts`](../../../src/server/server.test.ts) and the domain-service test conventions in [`fluent-api/src/domains/projects/projects.service.test.ts`](../../../src/domains/projects/projects.service.test.ts): + +- Unauthenticated request → 401. +- Authenticated but missing `AI_TOOLS_USE` → 403. +- Invalid request body (e.g. empty `verses`) → 400 with Zod details. +- Authenticated + permitted + valid body + happy-path mock of `callRepeatedWords` returning completed envelope → 200, envelope passed through verbatim. +- Same but mock returns queued envelope → 202, envelope passed through. +- Same but mock returns failed envelope → 502, error body. +- Same but mock returns transport error → 502, error body. +- Mock is asserted to have been called with the exact request body the caller sent (verifies no enrichment). + +### 13.3 Smoke test — `scripts/smoke-repeated-words.ts` + +A standalone script mirroring [`fluent-ai/scripts/smoke_repeated_words.py`](../../../../fluent-ai/scripts/smoke_repeated_words.py). Runs from the host against a live fluent-api + fluent-ai pair, posts a known-good body, and asserts: + +- Returns 200 (today; 202 once fluent-ai goes async). +- Envelope `status` is `completed` (today). +- `result.findings` is an array. +- `result.summary.total_findings` equals `result.findings.length`. + +Invoked via an npm script: `npm run smoke:repeated-words`. Not part of `npm test` (it requires a live stack). Documented in fluent-api's README alongside the existing dev workflow. + +### 13.4 What is _not_ covered + +- **No end-to-end fluent-web → fluent-api → fluent-ai test.** That's a frontend concern that will land with the frontend PR. +- **No load tests** for the polling endpoint (which doesn't exist yet on either side). +- **No contract tests** auto-generated from fluent-ai's OpenAPI spec. This would be valuable, but introducing a contract-testing framework (Pact, openapi-typescript code generation, etc.) is its own decision worth a separate spec. For now, the Zod schemas in fluent-api are the contract, hand-maintained against [`fluent-ai/src/app/schemas/greek_room.py`](../../../../fluent-ai/src/app/schemas/greek_room.py) and [`fluent-ai/src/app/schemas/tool_job.py`](../../../../fluent-ai/src/app/schemas/tool_job.py). + +### 13.5 Test infrastructure inherited + +- Vitest config in [`fluent-api/vitest.config.ts`](../../../vitest.config.ts) — no changes. +- Existing test helpers in `fluent-api/src/tests/` (auth fixtures, request helpers) — reused as-is for the domain tests. +- No new test dependencies. + +--- + +## 14. Future work + +Items that are out of scope for this PR but enabled by the foundations laid here. None of these is blocked on a redesign; they all plug into the same `callFluentAi` / `ToolJobResponse` shape. + +### 14.1 The polling endpoint and slow tools + +When fluent-ai introduces a tool that justifies the queue substrate (per fluent-ai decision **D1**, currently deferred), it will ship: + +- A backing `ai.tool_jobs` table. +- An in-process worker for execution. +- `GET /api/v1/tools/jobs/{job_id}` for status polling. + +The matching fluent-api work is small: + +- Add `pollToolJob(jobId, resultSchema)` sibling to `callFluentAi` in [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../../../src/lib/services/fluent-ai/fluent-ai.client.ts). +- Add `GET /ai/tools/jobs/{job_id}` route in [`fluent-api/src/domains/ai-tools/ai-tools.route.ts`](../../../src/domains/ai-tools/ai-tools.route.ts) with the same `authenticateUser + requirePermission(AI_TOOLS_USE)` middleware. +- No DB persistence needed on the fluent-api side — fluent-api remains a thin pass-through; the job state of record lives in fluent-ai's `ai.tool_jobs` table. + +### 14.2 Frontend hook and editor squiggles + +A separate PR against fluent-web will introduce the `useToolJob` + `useRepeatedWords` hooks sketched in §11.4, then drive editor squiggle UI from the `findings` array. The backend surface is already shaped to feed that UI directly (`snt_id`, `surf`, `start_position`, `severity` on each finding). + +### 14.3 Additional Greek-Room checks + +Greek-Room exposes other static-analysis tools (punctuation, untranslated text, character-set sanity, etc.). Each will land in fluent-ai as a sibling tool, then surface in fluent-api with the same five-line pattern shown in §7.5. No new mechanism needed. + +### 14.4 Other AI tool families + +The same pattern absorbs LLM drafting, embeddings, fine-tuning, and any other tool family fluent-ai grows into. The naming convention `tools/{family}/{tool-name}` (e.g. `tools/openai/draft-suggestion`, `tools/embeddings/similarity`) keeps OpenAPI documentation organized. + +### 14.5 Per-user attribution + +Today fluent-ai sees a single shared identity (`FLUENT_AI_KEY`). If audit / billing / rate-limiting needs per-user attribution later, fluent-api can pass an opaque `X-Requested-By` header carrying the BetterAuth user ID. fluent-ai logs it; no change to the request body. + +### 14.6 Caching for idempotent tools + +`callFluentAi` is intentionally cache-free today. Some future tools may be both expensive and deterministic on their input — in which case a `(toolPath, hash(body))` cache (in-memory or Redis) makes sense. Drops in at the `callFluentAi` layer without changing call sites. + +### 14.7 Retries on transport failure + +Currently `callFluentAi` does not retry on network errors. If experience shows transient failures are common, a `withRetry` wrapper (analogous to [`withDatabaseRetry`](../../../src/lib/db-retry.ts)) can be added at the client level. Out of scope today because the failure mode of the only tool is "semantic," not "flaky." + +### 14.8 MCP facade + +A future Model Context Protocol facade (referenced as out-of-scope in [`fluent-ai/greek-room-integration-summary.md`](../../../../fluent-ai/greek-room-integration-summary.md)) could be layered over fluent-ai. fluent-api would call it via `callFluentAi` exactly as today — the only difference is the base URL. + +### 14.9 fluent-platform refinements + +Two small, optional improvements identified while writing this spec: + +- Add a healthcheck to the `ai` service in [`fluent-platform/compose.yaml`](../../../../fluent-platform/compose.yaml) and let `api` declare a soft dependency on it. Would eliminate the brief startup window where the AI endpoints return 502. Not pursued in this PR because the 502 response is already graceful. +- Document the `FLUENT_AI_KEY` ↔ fluent-ai `ai_api_keys` table relationship in [`fluent-platform/docs/`](../../../../fluent-platform/docs/) for new developers. + +--- + +## 15. Open questions for reviewer + +These are the items the spec discussion landed on but where reviewer pushback would meaningfully change the outcome. Each one has a recommended position (the doc reflects this); each one can be flipped without restructuring the rest of the proposal. + +> **Status: resolved.** All four questions below were addressed in kaseywright's review of [PR #173](https://github.com/eten-tech-foundation/fluent-api/pull/173) on 2026-06-02 (review **APPROVED**). The reviewer confirmed each recommended position; two of them (§15.2, §15.4) came with a request to document the decision, now captured in §9.3 and §8.1 respectively. Per-item resolutions are noted inline below. + +### 15.1 URL layout: is `POST /ai/tools/greek-room/repeated-words` the right shape? + +**Recommended:** Yes — see **D2** and §5. + +> **Resolved (kaseywright, 2026-06-02):** confirmed — "this URL layout works well." [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343625894) + +**Alternatives:** + +- `POST /checks/repeated-words` — closer to the verbiage we use elsewhere ("checks" rather than "tools"). Downside: hides the network-bound, possibly-async nature of these endpoints. +- `POST /chapter-assignments/{id}/checks/repeated-words` — nests the check under the resource it operates on. Rejected because it requires fluent-api to enrich the request body from `chapter_assignment_id` → verses + language metadata, which couples fluent-api to fluent-ai's input schema (rejected by **D8**). +- `POST /tools/dispatch` with `{tool: "...", params: {...}}` — collapses the type system at the wire boundary. Same reason fluent-ai rejected this (see [`fluent-ai/greek-room-integration-summary.md`](../../../../fluent-ai/greek-room-integration-summary.md) §1). + +**Decision needed from reviewer:** confirm `/ai/tools/{family}/{tool}` or push back with a preference. + +### 15.2 Permission: `PERMISSIONS.AI_TOOLS_USE` as a string-value alias of `CONTENT_UPDATE`? + +**Recommended:** Yes, alias — see **D10** and §9.3. + +> **Resolved (kaseywright, 2026-06-02):** alias approach confirmed, with the request to document the decision for future reference (done in §9.3). [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343633722) + +**Alternatives:** + +- Introduce a real new permission row in the `permissions` table with its own role mappings. Requires a migration and seed update. Gives nothing user-visible today but is the "cleaner" RBAC story. +- Reuse `PERMISSIONS.CONTENT_UPDATE` directly at the call site (no alias). Loses the documentary value of seeing "AI_TOOLS_USE" at the route. + +**Decision needed from reviewer:** confirm the alias approach or push back for either of the alternatives. + +### 15.3 Envelope pass-through vs. unwrapping `result` for the sync case? + +**Recommended:** Pass through the full `ToolJobResponse` — see **D9** and §8.2. + +> **Resolved (kaseywright, 2026-06-02):** pass-through confirmed, conditioned on the web-client response following the standard format already in place (see §8.2). [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343642943) + +**Alternatives:** + +- For the synchronous case only, return just the `result` field (i.e. `{findings, summary}`) and 200, reserving the envelope for when fluent-ai goes async. Simpler today; mildly more breaking when polling lands. +- Pass through always but add a thin `result_only` query parameter for callers that want the unwrapped shape. Adds API surface for negligible benefit. + +**Decision needed from reviewer:** confirm pass-through or push back for unwrap-now-envelope-later. + +### 15.4 No request enrichment vs. server-side context augmentation? + +**Recommended:** No enrichment — see **D8** and §8.1. + +> **Resolved (kaseywright, 2026-06-02):** forwarding verbatim confirmed; the snake_case naming divergence accepted as a contained, intentional AI-tools-domain exception, with a request to document it (done in §8.1). [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813) + +**Alternatives:** + +- fluent-api looks up `chapter_assignment_id` (or `project_id`) and adds verses + language metadata server-side. Caller sends a thin reference, fluent-api fattens it before forwarding. Trades client flexibility for harder-to-spoof inputs. +- Hybrid: caller sends the full body, fluent-api _validates_ certain fields against its own data (e.g. confirms the caller has access to that `project_id`). Lighter than full enrichment. + +**Decision needed from reviewer:** confirm no enrichment, or push back for either alternative. + +### 15.5 Anything else the reviewer wants surfaced + +If reviewers identify a concern not captured above, please raise it as a comment on the PR. The relevant pre-decisions are summarized in §3 and the rationale is in the predecessor docs ([`fluent-ai/greek-room-integration-summary.md`](../../../../fluent-ai/greek-room-integration-summary.md), [`fluent-ai/greek-room-integration-suggestion.md`](../../../../fluent-ai/greek-room-integration-suggestion.md), [`fluent-ai/greek-room-integration-decisions.md`](../../../../fluent-ai/greek-room-integration-decisions.md)). + +--- diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-status.md b/docs/proposals/repeated-word-check/ai-tools-integration-status.md new file mode 100644 index 0000000..a444143 --- /dev/null +++ b/docs/proposals/repeated-word-check/ai-tools-integration-status.md @@ -0,0 +1,102 @@ +# AI-Tools Integration on fluent-api — Implementation Status + +**Purpose:** A file-by-file account of what is already implemented in the fluent-api tree versus what remains to be done, so an agent or developer picking this work up can orient quickly without re-deriving it from the proposal. If you are new to this feature, **read this file first**, then the design in the companion docs. + +**Companion documents:** + +- [`ai-tools-integration-suggestion.md`](ai-tools-integration-suggestion.md) — **Part 1 of 2.** Contract & design (§1–§10). +- [`ai-tools-integration-operations.md`](ai-tools-integration-operations.md) — **Part 2 of 2.** Operations, forward compatibility, testing, future work (§11–§15), including the **§12.10 "wire up a running ecosystem" runbook**. +- [`ai-tools-integration-summary.md`](ai-tools-integration-summary.md) — short reviewer orientation. + +**Branch:** `jel-word-check` (do **not** create a new branch; do **not** push). +**Implementation commit:** `b055f84` — _feat(ai-tools): add greek-room repeated-words endpoint + fluent-ai client_ (17 files, +1348/−26). + +--- + +## 1. Status at a glance + +| Area | Status | Notes | +| ----------------------------------------------------------------- | --------------- | ------------------------------------------------------------------------------------------ | +| Endpoint `POST /ai/tools/greek-room/repeated-words` | ✅ Implemented | Route + service + types in `src/domains/ai-tools/`, registered on the app. | +| Shared client `callFluentAi` | ✅ Implemented | `src/lib/services/fluent-ai/`, modeled on Mailgun + `withDatabaseRetry`. | +| Env vars `FLUENT_AI_URL` / `FLUENT_AI_KEY` | ✅ Implemented | Required (no defaults) in `src/env.ts`; documented in `.env.example`. | +| Permission alias `AI_TOOLS_USE` | ✅ Implemented | Alias of `content:update` in `src/lib/permissions.ts` (D10). | +| Error codes `AI_SERVICE_UNAVAILABLE` / `AI_TOOL_EXECUTION_FAILED` | ✅ Implemented | Both → HTTP 502 in `src/lib/types.ts`. | +| Unit tests (`callFluentAi`) | ✅ Implemented | `fluent-ai.client.test.ts`. | +| Route tests | ✅ Implemented | `ai-tools.route.test.ts`. | +| Smoke script + npm alias | ✅ Implemented | `scripts/smoke-repeated-words.ts`, `npm run smoke:repeated-words`. | +| Documentation | ✅ Implemented | This file + the split Part 1 / Part 2 proposal + summary. | +| **Live end-to-end run** (fluent-api ↔ fluent-ai) | ⏳ **Not done** | Requires local wiring — see "What remains" below and the §12.10 runbook. | +| fluent-platform compose override | ⏳ Not done | **Separate paired PR** (D12). `FLUENT_AI_URL: http://ai:8200`. Not part of this repo's PR. | +| Polling endpoint / DB persistence / frontend / retries / caching | ⛔ Out of scope | Deferred by design — see §2 / §14 of the proposal. | + +Legend: ✅ done · ⏳ remaining · ⛔ intentionally out of scope. + +--- + +## 2. What is implemented (file-by-file, committed at `b055f84`) + +### New domain — `src/domains/ai-tools/` + +- **`ai-tools.route.ts`** — Declares `POST /ai/tools/greek-room/repeated-words` via `createRoute`, guarded by `authenticateUser` + `requirePermission(PERMISSIONS.AI_TOOLS_USE)`. On success returns the full `ToolJobResponse` envelope verbatim (D9): **200** for terminal statuses (`completed`/`failed`/`cancelled`) and **202** for non-terminal (`queued`/`running`). On error, uses fluent-api's standard `{ error, code, details }` shape via `getHttpStatus`. +- **`ai-tools.service.ts`** — `callRepeatedWords(req)`, the one-line typed wrapper that calls `callFluentAi('tools/greek-room/repeated-words', req, RepeatedWordsResultSchema)`. This is the per-tool pattern future tools copy. +- **`ai-tools.types.ts`** — `VerseInputSchema`, `RepeatedWordsRequestSchema`, `RepeatedWordsFindingSchema`, `RepeatedWordsSummarySchema`, `RepeatedWordsResultSchema`, `RepeatedWordsResponseSchema`, and inferred TS types. **Field names are snake_case** (`lang_code`, `snt_id`, `start_position`, …) — an intentional, contained exception (D8) carrying an in-code comment that links to §8.1 and review comment `#discussion_r3343677813`. + +### New shared client — `src/lib/services/fluent-ai/` + +- **`fluent-ai.client.ts`** — `callFluentAi(toolPath, body, resultSchema, options?)`. POSTs to `${FLUENT_AI_URL}/api/v1/${toolPath}` with `X-API-Key`; default **30s** timeout (overridable via `options.timeoutMs` / `options.signal`); validates the `result` field against `resultSchema` only when `status === "completed"`; returns `Result>`. Maps transport/HTTP/parse failures → `AI_SERVICE_UNAVAILABLE`, and `failed`/`cancelled` envelopes → `AI_TOOL_EXECUTION_FAILED` (§10.2). Does **not** poll, cache, or retry (by design). The malformed-body branch returns the message `malformed response from fluent-ai (body was not valid JSON)`. +- **`fluent-ai.types.ts`** — `JobStatus` union, `ToolJobError`, and the generic `ToolJobResponse` envelope. Carries the same snake_case in-code comment / D8 cross-reference as `ai-tools.types.ts`. + +### Edits to existing files + +- **`src/app.ts`** — Registers the ai-tools routes on the OpenAPIHono app, the same way existing domains are registered. +- **`src/env.ts`** — Adds `FLUENT_AI_URL` (URL) and `FLUENT_AI_KEY` (non-empty string) to the Zod env schema. Both **required, no defaults**; a missing/blank value fails validation at boot. +- **`src/lib/permissions.ts`** — Adds `PERMISSIONS.AI_TOOLS_USE = 'content:update'` (alias of `CONTENT_UPDATE`), with a comment linking to §9.3 / D10 and review comment `#discussion_r3343633722`. +- **`src/lib/types.ts`** — Adds `ErrorCode.AI_SERVICE_UNAVAILABLE` and `ErrorCode.AI_TOOL_EXECUTION_FAILED`, both mapped to HTTP **502** in `ErrorHttpStatus`. +- **`.env.example`** — Adds documented `FLUENT_AI_URL` and `FLUENT_AI_KEY` entries (standalone default `http://localhost:8200`, dev key `fai_dev_admin`). +- **`.env.test`** — Adds test values for the two vars so the suite boots. +- **`package.json`** — Adds the `smoke:repeated-words` script. + +### Tests & tooling + +- **`src/lib/services/fluent-ai/fluent-ai.client.test.ts`** — Unit tests for `callFluentAi` with `fetch` stubbed: completed/queued happy paths, failed/cancelled → `AI_TOOL_EXECUTION_FAILED`, 4xx/5xx/network/parse/schema failures → `AI_SERVICE_UNAVAILABLE`, timeout, abort signal, and request-shape assertions (header, URL). +- **`src/domains/ai-tools/ai-tools.route.test.ts`** — Route tests: 401 unauthenticated, 403 missing permission, 400 invalid body, 200 completed pass-through, 202 queued pass-through, 502 on failed/transport error, and a "no enrichment" assertion that the body is forwarded verbatim. +- **`scripts/smoke-repeated-words.ts`** — Host-runnable probe against a live fluent-api + fluent-ai pair. CLI flags `--url`, `--token`, `--cookie`, `--timeout`, `--raw`; reads `FLUENT_API_URL` / `FLUENT_API_TOKEN` from env; default base URL `http://localhost:9999`. Posts the canned 3-verse corpus and sanity-checks the envelope (see §13.3). + +--- + +## 3. What remains (and who owns it) + +### 3.1 Live end-to-end verification (this repo, but local-only) + +The code is complete and the automated suite passes, but **no live fluent-api ↔ fluent-ai round-trip has been exercised** as part of this work. Doing so needs machine-specific wiring that is intentionally _not_ committed: + +1. Add `FLUENT_AI_URL` + `FLUENT_AI_KEY` to the git-ignored `fluent-api/.env`. +2. Bring up the stack (ecosystem mode) or run fluent-ai alongside standalone fluent-api. +3. Sign in via BetterAuth to obtain a bearer token. +4. Run `npm run smoke:repeated-words -- --url --token `. + +The full step-by-step procedure (including the BetterAuth `set-auth-token` capture and the expected sanity-check output) is the **§12.10 runbook** in [`ai-tools-integration-operations.md`](ai-tools-integration-operations.md). Nothing in that runbook should be committed to this repo — the `.env` values are per-machine secrets. + +### 3.2 fluent-platform compose override (separate paired PR — D12) + +[`fluent-platform/compose.yaml`](../../../../fluent-platform/compose.yaml) needs `FLUENT_AI_URL: http://ai:8200` added to the `api` service's `environment:` block so ecosystem mode resolves fluent-ai by service name. **`FLUENT_AI_KEY` is deliberately not overridden there** — it stays a shared secret sourced from `fluent-api/.env` (§12.4). This is a small, logic-free PR that ships alongside the fluent-api PR. **Do not touch fluent-platform from this task.** + +### 3.3 Out of scope (do not build — §2 / §14) + +Polling endpoint (`GET /ai/tools/jobs/{job_id}`), DB persistence of tool runs, frontend hooks / editor squiggles, transport retries, response caching, rate limits, MCP facade. All deferred by design; the `callFluentAi` / `ToolJobResponse` shapes are forward-compatible with them. + +--- + +## 4. Verification + +Run from `fluent-api/`: + +```bash +npm run typecheck +npm run format:check +npm run lint +npm test +``` + +The automated suite does **not** include the smoke script (it needs a live stack). See §13 of the proposal for the full testing strategy. diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md index 6e8059d..1123043 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md @@ -1,9 +1,19 @@ -# AI-Tools Integration on fluent-api — Proposal +# AI-Tools Integration on fluent-api — Proposal (Part 1 of 2: Contract & Design) -**Status:** Draft for review. +**Status:** Reviewed and **approved** (kaseywright, PR #173, 2026-06-02). Implemented on branch `jel-word-check` — see the implementation-status doc below for what currently exists in the tree. **Scope:** Extend fluent-api to expose AI tools implemented by fluent-ai, starting with Greek-Room's _Repeated Words_ check. The exposed pattern is meant to absorb every future AI tool (LLM drafting, embeddings, fine-tuning, other Greek-Room checks) without renegotiating the contract. -**Companion document:** [`fluent-api/proposals/ai-tools-integration-summary.md`](ai-tools-integration-summary.md) — short reviewer orientation. -**Predecessors on the fluent-ai side:** [`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md), [`fluent-ai/greek-room-integration-suggestion.md`](../../fluent-ai/greek-room-integration-suggestion.md), [`fluent-ai/greek-room-integration-decisions.md`](../../fluent-ai/greek-room-integration-decisions.md). + +**This document is Part 1 of 2.** It covers the contract and design: background, scope, decisions, the URL, the file layout, the `callFluentAi` utility, request/response shapes, auth, and error translation (§1–§10). + +**Companion documents:** + +- [`ai-tools-integration-operations.md`](ai-tools-integration-operations.md) — **Part 2 of 2.** Forward-compatibility (the job-queue protocol), service discovery / Docker / environment wiring (including the step-by-step "wire up a running ecosystem" checklist), testing strategy, future work, and the reviewer Q&A (§11–§15). +- [`ai-tools-integration-status.md`](ai-tools-integration-status.md) — **Implementation status.** What is already implemented in the tree (file-by-file) versus what remains to be done. Start here if you are an agent or developer picking this work up. +- [`ai-tools-integration-summary.md`](ai-tools-integration-summary.md) — short reviewer orientation. + +**Predecessors on the fluent-ai side:** [`fluent-ai/greek-room-integration-summary.md`](../../../../fluent-ai/greek-room-integration-summary.md), [`fluent-ai/greek-room-integration-suggestion.md`](../../../../fluent-ai/greek-room-integration-suggestion.md), [`fluent-ai/greek-room-integration-decisions.md`](../../../../fluent-ai/greek-room-integration-decisions.md). + +> **Note on document split.** This proposal was split into two files at the §10/§11 boundary so each stays under the repo's markdown line-count lint limit. Sections are numbered continuously across both files (Part 1 ends at §10; Part 2 begins at §11), so all internal "see §N" references remain valid across the pair. --- @@ -53,7 +63,7 @@ Relative paths in this document (e.g. `../../fluent-platform/...`) assume the st - Async job polling endpoint on fluent-api (`GET /ai/tools/jobs/{job_id}` or similar). Not built because fluent-ai also has not built the corresponding endpoint yet — both sides chose "lightweight now" per fluent-ai decision **D1**. - Frontend (fluent-web) hooks and squiggle UI. Frontend is a separate session/PR. - DB persistence of tool runs / findings. No `ai_tool_runs` or `check_results` table is introduced. -- Net-new cross-repo docker orchestration. The substrate already exists as [`fluent-platform`](../../fluent-platform/README.md); this PR adds two small entries (`FLUENT_AI_URL` override) to [`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) and ships them alongside the fluent-api change. See §12 for details. +- Net-new cross-repo docker orchestration. The substrate already exists as [`fluent-platform`](../../../../fluent-platform/README.md); this PR adds two small entries (`FLUENT_AI_URL` override) to [`fluent-platform/compose.yaml`](../../../../fluent-platform/compose.yaml) and ships them alongside the fluent-api change. See §12 for details. - Rate limits, request-size limits, MCP facade, SSE/WebSocket streaming, scheduled runs, multi-tenant fairness. All deferred at the fluent-ai level and inherited here. --- @@ -62,20 +72,20 @@ Relative paths in this document (e.g. `../../fluent-platform/...`) assume the st These are the decisions captured during the spec discussion. Each is restated here so reviewers can discuss the conclusion without reading the supporting analysis. -| # | Decision | Short rationale | -| ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| **D1** | PR scope is "minimum proxy" — no DB persistence, no job queue exercised in this PR. | Repeated-words is fast (<1s) and re-runnable; persistence is not motivated by this tool. Defer until a slow tool justifies a `ai_tool_runs` table. | -| **D2** | URL is `POST /ai/tools/greek-room/repeated-words`. | Introduces `/ai/` as fluent-api's first top-level service-family namespace. Telegraphs "network-bound, potentially slow, possibly async" — characteristics that local CRUD endpoints don't share. Per-tool URL preserves OpenAPI type-safety. Alternatives: `/checks/repeated-words` (more in convention but hides the proxy nature), nested under `/chapter-assignments/{id}/` (requires server-side enrichment which we reject in D8). | -| **D3** | Polling lives in the _browser_ via TanStack Query's `refetchInterval`, not in fluent-api. fluent-api is a thin pass-through for both kickoff and (future) polling. | Decouples slow tools from fluent-api's request budget. Aligns with fluent-web's existing TanStack Query usage. The polling code path is not exercised today because fluent-ai always returns `status: "completed"` synchronously. | -| **D4** | File layout: shared utility at [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts); per-tool routes/services in [`fluent-api/src/domains/ai-tools/`](../src/domains/ai-tools/). One route file for all tools; per-tool Zod schemas keep OpenAPI documentation fully typed. | Mirrors the existing [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../src/lib/services/notifications/mailgun.service.ts) pattern for "free functions wrapping a third-party API" and the existing [`fluent-api/src/lib/db-retry.ts`](../src/lib/db-retry.ts) pattern for "higher-order utility used by many call sites." Avoids a single one-size-fits-all dispatcher that would degrade OpenAPI schemas to `dict[str, Any]`. | -| **D5** | Service discovery / docker networking is handled by the existing [`fluent-platform`](../../fluent-platform/README.md) orchestrator. This PR adds two env vars on the fluent-api side and one `environment:` override on the fluent-platform side (`FLUENT_AI_URL: http://ai:8200`). See §12. | fluent-platform already wires `db`, `api`, `worker`, `ai`, `web` together on a shared network; we plug in to that substrate rather than invent a new one. | -| **D6** | A single shared `FLUENT_AI_KEY` is provisioned for the fluent-api → fluent-ai hop. If another consumer of fluent-ai appears later, it gets its own key. | Per-user keys give zero security benefit at this layer (everyone going through fluent-api is already authenticated to fluent-api). Single key minimizes IT complexity. | -| **D7** | Error translation specifics deferred to implementation. If conformity between the two error systems is awkward, prefer harmonizing fluent-ai toward fluent-api's patterns rather than the other way. | At the spec level there are no hard constraints; the safe defaults (5xx from fluent-ai → 502 on fluent-api with `ErrorCode.AI_SERVICE_UNAVAILABLE`) are obvious. | -| **D8** | No request enrichment. fluent-api forwards the request body to fluent-ai verbatim. fluent-web sends the full `RepeatedWordsRequest` shape including `lang_code`, `lang_name`, `project_id`, `project_name`, `verses[]`. **Reviewer-confirmed (kaseywright, 2026-06-02):** forwarding verbatim is approved; the snake*case field naming is an \_intentional, contained* exception to fluent-api's camelCase convention, scoped to the AI-tools domain. See §8.1 — please leave this divergence as-is rather than "normalizing" it to camelCase, since it intentionally mirrors the fluent-ai contract. | Maximum flexibility for the caller. Avoids coupling fluent-api to fluent-ai's request schema (today and tomorrow). The alternatives (per-service renaming or mapper functions) add cost without a corresponding benefit; keeping the divergence contained in the AI-tools domain is the lesser evil. | -| **D9** | The full `ToolJobResponse` envelope is passed through to fluent-web unchanged. No unwrap to `result` for the synchronous case. **Reviewer-confirmed (kaseywright, 2026-06-02):** approved on the condition that the response delivered to the web client conforms to the standard response format already in place on fluent-api (see §8.2). | Forward-compatible with TanStack-based polling — the same hook code consumes the envelope today (`status: completed`) and tomorrow (`status: queued` → polled to `completed`). | -| **D10** | Auth on the new endpoint: introduce `PERMISSIONS.AI_TOOLS_USE` as an _alias_ with the same underlying value as `CONTENT_UPDATE`. **Reviewer-confirmed (kaseywright, 2026-06-02):** alias approach approved; the trade-off (flexibility vs. user-results) was noted as acceptable provided the decision is documented here for future reference. See §9.3. | Cosmetically separates "can edit content" from "can invoke AI tools" without making a real distinction yet. Trivial to peel apart later. | -| **D11** | A smoke test analogous to [`fluent-ai/scripts/smoke_repeated_words.py`](../../fluent-ai/scripts/smoke_repeated_words.py) is added, runnable from the host with both services up. | Lets devs verify the cross-service plumbing without running the full vitest suite. | -| **D12** | This work ships as a **coordinated pair of PRs**: one against fluent-api (the bulk of the work) and one small PR against fluent-platform (compose env-var override + 1–2 README lines). Either order of merge is fine; both should be ready for review together. | The fluent-platform PR is small and contains no logic, so it can land first to unblock ecosystem-mode dev. Reviewers should be able to read both PRs side-by-side. | +| # | Decision | Short rationale | +| ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **D1** | PR scope is "minimum proxy" — no DB persistence, no job queue exercised in this PR. | Repeated-words is fast (<1s) and re-runnable; persistence is not motivated by this tool. Defer until a slow tool justifies a `ai_tool_runs` table. | +| **D2** | URL is `POST /ai/tools/greek-room/repeated-words`. | Introduces `/ai/` as fluent-api's first top-level service-family namespace. Telegraphs "network-bound, potentially slow, possibly async" — characteristics that local CRUD endpoints don't share. Per-tool URL preserves OpenAPI type-safety. Alternatives: `/checks/repeated-words` (more in convention but hides the proxy nature), nested under `/chapter-assignments/{id}/` (requires server-side enrichment which we reject in D8). | +| **D3** | Polling lives in the _browser_ via TanStack Query's `refetchInterval`, not in fluent-api. fluent-api is a thin pass-through for both kickoff and (future) polling. | Decouples slow tools from fluent-api's request budget. Aligns with fluent-web's existing TanStack Query usage. The polling code path is not exercised today because fluent-ai always returns `status: "completed"` synchronously. | +| **D4** | File layout: shared utility at [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../../../src/lib/services/fluent-ai/fluent-ai.client.ts); per-tool routes/services in [`fluent-api/src/domains/ai-tools/`](../../../src/domains/ai-tools/). One route file for all tools; per-tool Zod schemas keep OpenAPI documentation fully typed. | Mirrors the existing [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../../../src/lib/services/notifications/mailgun.service.ts) pattern for "free functions wrapping a third-party API" and the existing [`fluent-api/src/lib/db-retry.ts`](../../../src/lib/db-retry.ts) pattern for "higher-order utility used by many call sites." Avoids a single one-size-fits-all dispatcher that would degrade OpenAPI schemas to `dict[str, Any]`. | +| **D5** | Service discovery / docker networking is handled by the existing [`fluent-platform`](../../../../fluent-platform/README.md) orchestrator. This PR adds two env vars on the fluent-api side and one `environment:` override on the fluent-platform side (`FLUENT_AI_URL: http://ai:8200`). See §12. | fluent-platform already wires `db`, `api`, `worker`, `ai`, `web` together on a shared network; we plug in to that substrate rather than invent a new one. | +| **D6** | A single shared `FLUENT_AI_KEY` is provisioned for the fluent-api → fluent-ai hop. If another consumer of fluent-ai appears later, it gets its own key. | Per-user keys give zero security benefit at this layer (everyone going through fluent-api is already authenticated to fluent-api). Single key minimizes IT complexity. | +| **D7** | Error translation specifics deferred to implementation. If conformity between the two error systems is awkward, prefer harmonizing fluent-ai toward fluent-api's patterns rather than the other way. | At the spec level there are no hard constraints; the safe defaults (5xx from fluent-ai → 502 on fluent-api with `ErrorCode.AI_SERVICE_UNAVAILABLE`) are obvious. | +| **D8** | No request enrichment. fluent-api forwards the request body to fluent-ai verbatim. fluent-web sends the full `RepeatedWordsRequest` shape including `lang_code`, `lang_name`, `project_id`, `project_name`, `verses[]`. **Reviewer-confirmed (kaseywright, 2026-06-02):** forwarding verbatim is approved; the snake*case field naming is an \_intentional, contained* exception to fluent-api's camelCase convention, scoped to the AI-tools domain. See §8.1 — please leave this divergence as-is rather than "normalizing" it to camelCase, since it intentionally mirrors the fluent-ai contract. | Maximum flexibility for the caller. Avoids coupling fluent-api to fluent-ai's request schema (today and tomorrow). The alternatives (per-service renaming or mapper functions) add cost without a corresponding benefit; keeping the divergence contained in the AI-tools domain is the lesser evil. | +| **D9** | The full `ToolJobResponse` envelope is passed through to fluent-web unchanged. No unwrap to `result` for the synchronous case. **Reviewer-confirmed (kaseywright, 2026-06-02):** approved on the condition that the response delivered to the web client conforms to the standard response format already in place on fluent-api (see §8.2). | Forward-compatible with TanStack-based polling — the same hook code consumes the envelope today (`status: completed`) and tomorrow (`status: queued` → polled to `completed`). | +| **D10** | Auth on the new endpoint: introduce `PERMISSIONS.AI_TOOLS_USE` as an _alias_ with the same underlying value as `CONTENT_UPDATE`. **Reviewer-confirmed (kaseywright, 2026-06-02):** alias approach approved; the trade-off (flexibility vs. user-results) was noted as acceptable provided the decision is documented here for future reference. See §9.3. | Cosmetically separates "can edit content" from "can invoke AI tools" without making a real distinction yet. Trivial to peel apart later. | +| **D11** | A smoke test analogous to [`fluent-ai/scripts/smoke_repeated_words.py`](../../../../fluent-ai/scripts/smoke_repeated_words.py) is added, runnable from the host with both services up. | Lets devs verify the cross-service plumbing without running the full vitest suite. | +| **D12** | This work ships as a **coordinated pair of PRs**: one against fluent-api (the bulk of the work) and one small PR against fluent-platform (compose env-var override + 1–2 README lines). Either order of merge is fine; both should be ready for review together. | The fluent-platform PR is small and contains no logic, so it can land first to unblock ecosystem-mode dev. Reviewers should be able to read both PRs side-by-side. | --- @@ -147,15 +157,15 @@ The path layout makes room for the polling endpoint without name collisions: - `POST /ai/tools/{family}/{tool-name}` — kickoff (this PR for `greek-room/repeated-words`). - `GET /ai/tools/jobs/{job_id}` — poll (future, when first slow tool ships). -Note that the existing [`fluent-api/src/domains/usfm/usfm.route.ts`](../src/domains/usfm/usfm.route.ts) already owns `GET /jobs/{job_id}` for pg-boss USFM-export polling. **Keeping the AI-tools polling endpoint under `/ai/tools/jobs/{id}` avoids that collision** and lets the two job systems coexist with different response shapes (pg-boss-native vs. fluent-ai's `ToolJobResponse` envelope). +Note that the existing [`fluent-api/src/domains/usfm/usfm.route.ts`](../../../src/domains/usfm/usfm.route.ts) already owns `GET /jobs/{job_id}` for pg-boss USFM-export polling. **Keeping the AI-tools polling endpoint under `/ai/tools/jobs/{id}` avoids that collision** and lets the two job systems coexist with different response shapes (pg-boss-native vs. fluent-ai's `ToolJobResponse` envelope). ### 5.2 OpenAPI documentation -Each tool gets its own `createRoute({...})` call in [`fluent-api/src/domains/ai-tools/ai-tools.route.ts`](../src/domains/ai-tools/ai-tools.route.ts) with: +Each tool gets its own `createRoute({...})` call in [`fluent-api/src/domains/ai-tools/ai-tools.route.ts`](../../../src/domains/ai-tools/ai-tools.route.ts) with: - A typed `RepeatedWordsRequestSchema` (Zod schema mirroring fluent-ai's `RepeatedWordsRequest`). - A typed `RepeatedWordsResponseSchema` wrapping the `ToolJobResponse[RepeatedWordsResult]` envelope. -- Proper 4xx/5xx response schemas using the existing `Result` → HTTP-status conventions ([`fluent-api/src/lib/types.ts`](../src/lib/types.ts)). +- Proper 4xx/5xx response schemas using the existing `Result` → HTTP-status conventions ([`fluent-api/src/lib/types.ts`](../../../src/lib/types.ts)). This means the `/reference` Scalar docs at fluent-api's root will display the full request/response shape for each tool. No `dict[str, Any]` degradation. Adding a new tool means adding a new `createRoute(...)` block in the same file, registering it on the OpenAPIHono app — three to ten lines plus schemas. @@ -189,13 +199,13 @@ fluent-api/src/ The fluent-api codebase already has the right precedent for both pieces: -- **`lib/services/fluent-ai/`** mirrors [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../src/lib/services/notifications/mailgun.service.ts) — free functions exported from a service file under `lib/services/{vendor}/{vendor}.service.ts`. The Mailgun file returns `Promise>` and reads its credentials directly from `process.env`. Our `callFluentAi` follows the same shape. -- **`callFluentAi` as a higher-order utility** mirrors [`fluent-api/src/lib/db-retry.ts`](../src/lib/db-retry.ts)'s `withDatabaseRetry(operation, options)` pattern. One shared utility, many call sites, no code duplication, no over-generalization. +- **`lib/services/fluent-ai/`** mirrors [`fluent-api/src/lib/services/notifications/mailgun.service.ts`](../../../src/lib/services/notifications/mailgun.service.ts) — free functions exported from a service file under `lib/services/{vendor}/{vendor}.service.ts`. The Mailgun file returns `Promise>` and reads its credentials directly from `process.env`. Our `callFluentAi` follows the same shape. +- **`callFluentAi` as a higher-order utility** mirrors [`fluent-api/src/lib/db-retry.ts`](../../../src/lib/db-retry.ts)'s `withDatabaseRetry(operation, options)` pattern. One shared utility, many call sites, no code duplication, no over-generalization. - **`domains/ai-tools/`** as a domain folder mirrors every other domain in the codebase (`domains/projects/`, `domains/translated-verses/`, etc.). Routes/services/types separated. Hono `createRoute` per endpoint. `Result` returned from services and converted via `getHttpStatus(error)` in routes. ### 6.2 Why not a generic dispatcher -A generic `POST /ai/dispatch` endpoint accepting `{tool: string, params: unknown}` was considered and rejected (this echoes the fluent-ai-side decision **D2** in [`fluent-ai/greek-room-integration-decisions.md`](../../fluent-ai/greek-room-integration-decisions.md)). The reasons are the same in TypeScript-land: +A generic `POST /ai/dispatch` endpoint accepting `{tool: string, params: unknown}` was considered and rejected (this echoes the fluent-ai-side decision **D2** in [`fluent-ai/greek-room-integration-decisions.md`](../../../../fluent-ai/greek-room-integration-decisions.md)). The reasons are the same in TypeScript-land: - OpenAPI/Scalar docs would degrade to `unknown` payloads. - Each new tool would lose its named, typed request/response in the docs. @@ -210,7 +220,7 @@ A generic `POST /ai/dispatch` endpoint accepting `{tool: string, params: unknown ## 7. The shared utility: `callFluentAi` -The single piece of _new mechanism_ this PR introduces is the function in [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts). +The single piece of _new mechanism_ this PR introduces is the function in [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../../../src/lib/services/fluent-ai/fluent-ai.client.ts). ### 7.1 Signature @@ -234,7 +244,7 @@ export async function callFluentAi( ### 7.2 What it does -1. Reads `env.FLUENT_AI_URL` and `env.FLUENT_AI_KEY` (validated at boot in [`fluent-api/src/env.ts`](../src/env.ts)). +1. Reads `env.FLUENT_AI_URL` and `env.FLUENT_AI_KEY` (validated at boot in [`fluent-api/src/env.ts`](../../../src/env.ts)). 2. POSTs to `${FLUENT_AI_URL}/api/v1/${toolPath}` with: - `Content-Type: application/json` - `X-API-Key: ${FLUENT_AI_KEY}` @@ -254,10 +264,10 @@ export async function callFluentAi( Compare it to the existing utilities it's modeled on: -- [`withDatabaseRetry(operation, options)`](../src/lib/db-retry.ts) is a higher-order async wrapper. `callFluentAi` is also a higher-order async wrapper, parameterized by request/result types and the runtime Zod schema. -- [`sendInvitationEmail({email, ticketUrl, ...})`](../src/lib/services/notifications/mailgun.service.ts) is a free function in `lib/services/` that wraps a third-party API and returns `Promise>`. `callFluentAi` is a free function in `lib/services/` that wraps a third-party API and returns `Promise>`. +- [`withDatabaseRetry(operation, options)`](../../../src/lib/db-retry.ts) is a higher-order async wrapper. `callFluentAi` is also a higher-order async wrapper, parameterized by request/result types and the runtime Zod schema. +- [`sendInvitationEmail({email, ticketUrl, ...})`](../../../src/lib/services/notifications/mailgun.service.ts) is a free function in `lib/services/` that wraps a third-party API and returns `Promise>`. `callFluentAi` is a free function in `lib/services/` that wraps a third-party API and returns `Promise>`. -The point of `callFluentAi` is **not** to be the only function callers ever touch. Each tool gets a typed wrapper in [`ai-tools.service.ts`](../src/domains/ai-tools/ai-tools.service.ts) that calls `callFluentAi` once. The wrapper is what the route file imports; the shared utility is a private implementation detail. +The point of `callFluentAi` is **not** to be the only function callers ever touch. Each tool gets a typed wrapper in [`ai-tools.service.ts`](../../../src/domains/ai-tools/ai-tools.service.ts) that calls `callFluentAi` once. The wrapper is what the route file imports; the shared utility is a private implementation detail. ### 7.5 Example per-tool wrapper @@ -300,7 +310,7 @@ export async function callCoherenceCheck( ### 8.1 The forward direction (fluent-web → fluent-api → fluent-ai) -Per **D8** (no enrichment), the request body shape on `POST /ai/tools/greek-room/repeated-words` is **identical** to fluent-ai's `RepeatedWordsRequest`. Codified in Zod in [`fluent-api/src/domains/ai-tools/ai-tools.types.ts`](../src/domains/ai-tools/ai-tools.types.ts): +Per **D8** (no enrichment), the request body shape on `POST /ai/tools/greek-room/repeated-words` is **identical** to fluent-ai's `RepeatedWordsRequest`. Codified in Zod in [`fluent-api/src/domains/ai-tools/ai-tools.types.ts`](../../../src/domains/ai-tools/ai-tools.types.ts): ```ts export const VerseInputSchema = z.object({ @@ -327,7 +337,7 @@ Notes: > **ℹ️ Intentional convention exception — please leave as-is.** _Reviewer-confirmed by kaseywright on 2026-06-02 ([PR #173, review comment](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813)); decision **D8**._ The snake*case field names in `src/domains/ai-tools/` and `src/lib/services/fluent-ai/` are an \_intentional* divergence from fluent-api's camelCase convention, kept so the wire contract with fluent-ai stays a verbatim pass-through. The reviewer noted that the naming-case divergence is something he'd normally prefer to avoid, but that the alternatives (per-service renaming or mapper functions) are no more rewarding, so this contained exception is the accepted trade-off. When working in this area, please keep these fields in snake_case rather than "normalizing" them to camelCase — renaming them would silently break the fluent-ai contract. The exception is scoped strictly to the AI-tools domain; the rest of fluent-api remains camelCase. > -> **Implementation note:** it helps to place a short code comment next to the snake_case Zod schemas (at least in [`ai-tools.types.ts`](../src/domains/ai-tools/ai-tools.types.ts) and [`fluent-ai.types.ts`](../src/lib/services/fluent-ai/fluent-ai.types.ts)) explaining the convention and linking back to both this decision (**D8** / §8.1) and the originating review comment. The comment is what an AI agent or contributor will actually see at the edit site, so it — alongside this proposal — is the most durable guardrail against an accidental rename. Suggested wording: +> **Implementation note:** it helps to place a short code comment next to the snake_case Zod schemas (at least in [`ai-tools.types.ts`](../../../src/domains/ai-tools/ai-tools.types.ts) and [`fluent-ai.types.ts`](../../../src/lib/services/fluent-ai/fluent-ai.types.ts)) explaining the convention and linking back to both this decision (**D8** / §8.1) and the originating review comment. The comment is what an AI agent or contributor will actually see at the edit site, so it — alongside this proposal — is the most durable guardrail against an accidental rename. Suggested wording: > > ```ts > // Intentional snake_case — verbatim wire contract with fluent-ai (decision D8). @@ -495,7 +505,7 @@ The alias has the same string value, which means `requirePermission(PERMISSIONS. If reviewers prefer a real new permission row from day one, that's a defensible alternative; it costs a migration and seeding work and gives no immediate user-visible benefit. The alias approach was chosen because it's reversible from either direction. -> **Reviewer-confirmed** — kaseywright, 2026-06-02 ([PR #173, review comment](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343633722)); decision **D10**. The alias approach is approved. The reviewer noted this as a flexibility-vs-results trade-off that is acceptable provided the decision is documented for future reference — which is the purpose of this note. **Implementation note:** add a short comment beside the alias in [`permissions.ts`](../src/lib/permissions.ts) linking back to this decision and the review comment, so the intent is visible at the edit site. For whoever revisits it later: promoting `AI_TOOLS_USE` to a real, distinct permission means adding a row to the `permissions` table, mapping it to the appropriate roles in seed data, and changing only the string value here in [`permissions.ts`](../src/lib/permissions.ts) — no call sites that already import `PERMISSIONS.AI_TOOLS_USE` need to change. +> **Reviewer-confirmed** — kaseywright, 2026-06-02 ([PR #173, review comment](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343633722)); decision **D10**. The alias approach is approved. The reviewer noted this as a flexibility-vs-results trade-off that is acceptable provided the decision is documented for future reference — which is the purpose of this note. **Implementation note:** add a short comment beside the alias in [`permissions.ts`](../../../src/lib/permissions.ts) linking back to this decision and the review comment, so the intent is visible at the edit site. For whoever revisits it later: promoting `AI_TOOLS_USE` to a real, distinct permission means adding a row to the `permissions` table, mapping it to the appropriate roles in seed data, and changing only the string value here in [`permissions.ts`](../../../src/lib/permissions.ts) — no call sites that already import `PERMISSIONS.AI_TOOLS_USE` need to change. ### 9.4 The `X-API-Key` for fluent-ai @@ -509,7 +519,7 @@ Per **D7**, the exact mapping is settled at implementation time, and if conformi ### 10.1 New `ErrorCode` entries on fluent-api -Two new entries are added to [`fluent-api/src/lib/types.ts`](../src/lib/types.ts)'s `ErrorCode` enum: +Two new entries are added to [`fluent-api/src/lib/types.ts`](../../../src/lib/types.ts)'s `ErrorCode` enum: ```ts export enum ErrorCode { @@ -547,7 +557,7 @@ export const ErrorHttpStatus: Record = { ### 10.3 Route-level translation -The Hono route handler uses `getHttpStatus(error)` from [`fluent-api/src/lib/types.ts`](../src/lib/types.ts) exactly as every existing fluent-api route does. The new `AI_*` codes plug into the same conversion path: +The Hono route handler uses `getHttpStatus(error)` from [`fluent-api/src/lib/types.ts`](../../../src/lib/types.ts) exactly as every existing fluent-api route does. The new `AI_*` codes plug into the same conversion path: ```ts // fluent-api/src/domains/ai-tools/ai-tools.route.ts (excerpt) @@ -586,398 +596,10 @@ If during implementation the team finds fluent-ai's error envelope shape (`{type --- -## 11. The job-queue protocol — forward compatibility - -This section describes what this PR sets up but does not exercise. It is deliberately detailed so reviewers can sanity-check that the contract is sound before fluent-ai (and a slow tool) actually need it. - -### 11.1 The contract today vs. tomorrow - -**Today** every response from fluent-ai is synchronous with `status: "completed"`. fluent-api hands the envelope to fluent-web as a 200 response. No polling occurs. - -**Tomorrow**, when fluent-ai introduces a slow tool, it can return `202 Accepted` with `status: "queued"` and a real `job_id` that exists in fluent-ai's job table. The protocol fluent-ai will (eventually) expose is the existing fluent-ai decision **D3** envelope plus a new polling endpoint: - -``` -GET /api/v1/tools/jobs/{job_id} -→ ToolJobResponse with current status and (if completed) result -``` - -Returns 200 in all states (queued/running/completed/failed/cancelled). The HTTP status is _not_ used to communicate terminal vs. non-terminal — only the envelope's `status` field is. - -### 11.2 fluent-api's pass-through polling endpoint (future) - -When fluent-ai adds the polling endpoint, fluent-api adds: - -``` -GET /ai/tools/jobs/{job_id} -→ Pass-through of fluent-ai's response, with the same auth (BetterAuth session + AI_TOOLS_USE permission) -``` - -Implementation will be a second helper alongside `callFluentAi`: - -```ts -// future, not in this PR -export async function pollToolJob( - jobId: string, - resultSchema: z.ZodType -): Promise>>; -``` - -### 11.3 Why polling lives in the browser, not in fluent-api - -Per **D3**. The detailed reasoning, repeated for completeness: - -- **Decouples slow tools from fluent-api's request budget.** A 5-minute tool does not hold a browser-to-fluent-api socket open for 5 minutes through whatever proxies, load balancers, or middle boxes sit between them. -- **Matches the editor UX shape.** When the eventual squiggle-on-typing UX is built, the browser already has its own state machine for "user has typed, debounce, kick off check, show pending indicator, show squiggles when result arrives." Putting polling on the server adds nothing to that loop. -- **TanStack Query has the right primitives.** `refetchInterval` accepts a function that inspects the current data and returns `false` to stop polling — i.e., literally `(data) => isTerminal(data.status) ? false : 1500`. No custom polling library needed. -- **Aligns with the existing fluent-web pattern.** Every existing fluent-web API hook calls `fetch` directly; there is no centralized server-state abstraction beyond TanStack itself. Adding server-side polling would be the foreign element. - -### 11.4 What the frontend hook will look like (out of scope, sketched) - -This is _not_ part of this PR, but is sketched here so reviewers can see that the backend contract is consumable. - -```ts -// fluent-web/src/lib/api/useToolJob.ts (future) - -import { useQuery } from '@tanstack/react-query'; - -import type { ToolJobResponse } from './tool-job-types'; - -const TERMINAL: Set['status']> = new Set([ - 'completed', - 'failed', - 'cancelled', -]); - -export function useToolJob( - jobId: string | null, - opts?: { pollIntervalMs?: number; enabled?: boolean } -) { - return useQuery>({ - queryKey: ['ai-tools', 'jobs', jobId], - queryFn: () => - fetch(`${config.api.url}/ai/tools/jobs/${jobId}`, { credentials: 'include' }).then((r) => - r.json() - ), - enabled: !!jobId && (opts?.enabled ?? true), - refetchInterval: (q) => - q.state.data && TERMINAL.has(q.state.data.status) ? false : (opts?.pollIntervalMs ?? 1500), - }); -} -``` - -```ts -// fluent-web/src/features/checks/hooks/useRepeatedWords.ts (future) - -export function useRepeatedWords() { - const [pendingJobId, setPendingJobId] = useState(null); - - const kickoff = useMutation({ - mutationFn: (req: RepeatedWordsRequest) => - fetch(`${config.api.url}/ai/tools/greek-room/repeated-words`, { - method: 'POST', - credentials: 'include', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(req), - }).then((r) => r.json() as Promise>), - onSuccess: (envelope) => { - if (envelope.status === 'queued' || envelope.status === 'running') { - setPendingJobId(envelope.job_id); - } - }, - }); - - const polled = useToolJob(pendingJobId); - - // Today, only kickoff.data is ever populated. Tomorrow, polled.data takes over. - const envelope = polled.data ?? (kickoff.data?.status === 'completed' ? kickoff.data : null); - - return { kickoff, envelope }; -} -``` - -### 11.5 No frontend code in this PR - -Per the user's instruction during the spec discussion, frontend work is a separate session. The above sketches are appendix material so reviewers can confirm the backend contract is sufficient for the eventual frontend implementation. - ---- - -## 12. Service discovery, environment, and Docker networking - -The cross-repo orchestration substrate already exists as [`fluent-platform`](../../fluent-platform/README.md). Its [`compose.yaml`](../../fluent-platform/compose.yaml) brings up `db`, `api`, `worker`, `ai`, and `web` on a shared Docker/Podman network with service names usable as DNS, plus a shared PostgreSQL instance with role-based schema separation. This section describes how this PR plugs into that substrate and the small changes needed in fluent-api and fluent-platform. - -### 12.1 The two runtime modes - -Per [`fluent-platform/README.md`](../../fluent-platform/README.md), fluent-api runs in one of two modes: - -- **Ecosystem mode** — started via `./fluent.sh up` from `fluent-platform/`. fluent-ai is also up, reachable at `http://ai:8200` on the internal network (service name `ai` from [`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) line 82). -- **Standalone mode** — started via `./fapi.sh up` from `fluent-api/`. fluent-ai is _not_ running unless the dev started it separately. fluent-api needs to gracefully report unavailability rather than crash. - -Both modes are first-class. The integration must work in both. - -### 12.2 Env vars (fluent-api side) - -Two new entries in [`fluent-api/src/env.ts`](../src/env.ts): - -```ts -const envSchema = z.object({ - // ... existing ... - FLUENT_AI_URL: z.string().url(), // ecosystem mode: http://ai:8200 — standalone: http://localhost:8200 - FLUENT_AI_KEY: z.string().min(1), // dev value: fai_dev_admin -}); -``` - -Both are required (no defaults). Zod failure on boot prints a clear error and exits, matching how fluent-api already handles `DATABASE_URL`, `BETTER_AUTH_SECRET`, etc. - -### 12.3 `fluent-api/.env.example` additions - -```dotenv -# Fluent-AI integration -# Base URL of the fluent-ai service (no trailing slash, no /api/v1 suffix). -# - Ecosystem mode (via fluent-platform): http://ai:8200 -# - Standalone fluent-api against standalone fluent-ai: http://localhost:8200 -FLUENT_AI_URL=http://localhost:8200 - -# Shared API key for calling fluent-ai. Matches a row in fluent-ai's ai_api_keys table. -# Dev value seeded by fluent-ai: fai_dev_admin -FLUENT_AI_KEY=fai_dev_admin -``` - -The `.env.example` documents the standalone-mode default because that's the path a dev hits first when running `./fapi.sh up` and copying `.env.example` to `.env`. Ecosystem-mode overrides are applied at the platform-compose layer (§12.4). - -### 12.4 Companion change in fluent-platform - -[`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) currently passes fluent-api's `.env` verbatim via `env_file: ${API_CONTEXT:-../fluent-api}/.env`. To make ecosystem mode work regardless of what the dev wrote in `fluent-api/.env`, the platform compose should explicitly override the URL for the `api` service: - -```yaml -api: - # ... existing ... - env_file: ${API_CONTEXT:-../fluent-api}/.env - environment: - DATABASE_URL: postgres://postgres:postgres@db:5432/fluent - EXPORTS_DIR: /app/exports - # New entries: - FLUENT_AI_URL: http://ai:8200 - # FLUENT_AI_KEY intentionally NOT overridden here — sourced from fluent-api/.env, - # which must match fluent-ai's ai_api_keys seed (dev value: fai_dev_admin) -``` - -`FLUENT_AI_URL` is overridden because it's deployment-topology-dependent. `FLUENT_AI_KEY` is _not_ overridden because it's a shared secret — the same value belongs in `fluent-api/.env` (for the caller) and in fluent-ai's `ai_api_keys` table (which the dev seed already populates). Overriding only on one side would invite drift. - -This is a small fluent-platform PR that should land alongside the fluent-api PR. Both repos ship together; the spec calls this out as a release-coordination item in §15. - -### 12.5 Startup ordering - -[`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) line 110–112 currently has `ai` declaring `depends_on: api: service_healthy`. So when the stack starts: - -1. `db` becomes healthy -2. `api` starts, becomes healthy -3. `ai` and `worker` and `web` start -4. Brief window where `api` is up but `ai` is still booting - -If a dev (or test) hits the `/ai/tools/...` endpoint during that window, fluent-api's `callFluentAi` will hit `ECONNREFUSED` and return `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. This is the correct behavior — no need for retries, no need to invert the `depends_on` direction. Worth noting only so reviewers don't mistake the 502 they see during startup for a bug. (An optional improvement: add an `ai` healthcheck and let `api` declare a soft dependency on it. Out of scope for this PR but a candidate for the fluent-platform follow-up.) - -### 12.6 Standalone-mode behavior when fluent-ai isn't running - -When a dev runs only `./fapi.sh up` without fluent-ai, the `/ai/tools/...` endpoints will return `502 Bad Gateway` with `code: AI_SERVICE_UNAVAILABLE`. This is acceptable: the rest of fluent-api works, and the dev sees a clear signal that they need to bring fluent-ai up (or switch to ecosystem mode) if they want to exercise the AI integration. - -### 12.7 README updates - -- **fluent-api's README** gains a short subsection under "Running locally" pointing to fluent-platform for ecosystem mode and explaining the standalone-mode caveat. -- **fluent-platform's README** has a Services table at line 61–68 listing `api`, `ai`, `web`, `worker`, `db`. The proposed compose change in §12.4 doesn't add new services so this table is unaffected, but the Environment Configuration section (line 166+) should mention that `FLUENT_AI_KEY` must be set in `fluent-api/.env` to enable the AI tools endpoints. - -### 12.8 What `callFluentAi` does _not_ assume about networking - -The client is unaware of whether fluent-ai is at `localhost:8200`, `ai:8200`, `https://fluent-ai.internal.example.com`, or anywhere else. It reads `FLUENT_AI_URL` verbatim, appends `/api/v1/${toolPath}`, and POSTs. This means: - -- Switching from standalone to ecosystem mode is a single env var change (handled automatically by the platform compose override). -- Switching to a staging or production deployment is a single env var change. -- TLS works automatically if `FLUENT_AI_URL` starts with `https://` — `fetch` handles it. +## Continued in Part 2 -### 12.9 Production / deployment +Sections §11 through §15 — the job-queue forward-compatibility protocol, service discovery / Docker / environment wiring (including the **"wire up a running ecosystem"** operational checklist in §12.10), the testing strategy, future work, and the resolved reviewer Q&A — now live in the companion file: -Per [`fluent-platform/README.md`](../../fluent-platform/README.md) §"Deployment (placeholder - not active 2026-05-08)", Azure Bicep templates live in [`fluent-platform/deploy/azure/`](../../fluent-platform/deploy/azure/) but aren't active yet. When production deployment lands, `FLUENT_AI_URL` and `FLUENT_AI_KEY` will be wired through the same environment-injection mechanism the rest of the app uses (Azure App Settings / Key Vault references). No fluent-api code change is required for that transition. +➡️ **[`ai-tools-integration-operations.md`](ai-tools-integration-operations.md) — Part 2 of 2.** ---- - -## 13. Testing strategy - -Per **D11**, the test footprint mirrors the existing fluent-api conventions. Three layers: - -### 13.1 Unit tests — `callFluentAi` - -File: `fluent-api/src/lib/services/fluent-ai/fluent-ai.client.test.ts` - -Test surface, all with `global.fetch` stubbed via `vi.spyOn(global, 'fetch')`: - -- Happy path: completed envelope → returns `Result.ok(envelope)`. -- Happy path: queued envelope → returns `Result.ok(envelope)` (the route layer, not the client, decides 200 vs 202). -- Failed envelope (`status: "failed"`) → returns `Result.err({ code: AI_TOOL_EXECUTION_FAILED, ... })`. -- Cancelled envelope → returns `Result.err({ code: AI_TOOL_EXECUTION_FAILED, ... })`. -- fluent-ai returns 4xx → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. -- fluent-ai returns 5xx → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. -- `fetch` rejects (network error) → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. -- Response body fails JSON parse → `Result.err({ code: AI_SERVICE_UNAVAILABLE, message contains "malformed" })`. -- Response envelope passes parsing but `result` field fails the result schema → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. -- Default 30s timeout fires via fake timers → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. -- Caller-supplied `AbortSignal` triggers → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. -- Request shape: `X-API-Key` header is present, equals `env.FLUENT_AI_KEY`, `Content-Type` is `application/json`, URL is `${FLUENT_AI_URL}/api/v1/${toolPath}`. - -### 13.2 Domain tests — `ai-tools.route.ts` - -File: `fluent-api/src/domains/ai-tools/ai-tools.route.test.ts` - -Test surface, modeled on the existing route tests like [`fluent-api/src/domains/translated-verses/translated-verses.route.test.ts`](../src/domains/translated-verses/translated-verses.route.test.ts): - -- Unauthenticated request → 401. -- Authenticated but missing `AI_TOOLS_USE` → 403. -- Invalid request body (e.g. empty `verses`) → 400 with Zod details. -- Authenticated + permitted + valid body + happy-path mock of `callRepeatedWords` returning completed envelope → 200, envelope passed through verbatim. -- Same but mock returns queued envelope → 202, envelope passed through. -- Same but mock returns failed envelope → 502, error body. -- Same but mock returns transport error → 502, error body. -- Mock is asserted to have been called with the exact request body the caller sent (verifies no enrichment). - -### 13.3 Smoke test — `scripts/smoke-repeated-words.ts` - -A standalone script mirroring [`fluent-ai/scripts/smoke_repeated_words.py`](../../fluent-ai/scripts/smoke_repeated_words.py). Runs from the host against a live fluent-api + fluent-ai pair, posts a known-good body, and asserts: - -- Returns 200 (today; 202 once fluent-ai goes async). -- Envelope `status` is `completed` (today). -- `result.findings` is an array. -- `result.summary.total_findings` equals `result.findings.length`. - -Invoked via an npm script: `npm run smoke:repeated-words`. Not part of `npm test` (it requires a live stack). Documented in fluent-api's README alongside the existing dev workflow. - -### 13.4 What is _not_ covered - -- **No end-to-end fluent-web → fluent-api → fluent-ai test.** That's a frontend concern that will land with the frontend PR. -- **No load tests** for the polling endpoint (which doesn't exist yet on either side). -- **No contract tests** auto-generated from fluent-ai's OpenAPI spec. This would be valuable, but introducing a contract-testing framework (Pact, openapi-typescript code generation, etc.) is its own decision worth a separate spec. For now, the Zod schemas in fluent-api are the contract, hand-maintained against [`fluent-ai/src/app/schemas/greek_room.py`](../../fluent-ai/src/app/schemas/greek_room.py) and [`fluent-ai/src/app/schemas/tool_job.py`](../../fluent-ai/src/app/schemas/tool_job.py). - -### 13.5 Test infrastructure inherited - -- Vitest config in [`fluent-api/vitest.config.ts`](../vitest.config.ts) — no changes. -- Existing test helpers in `fluent-api/src/tests/` (auth fixtures, request helpers) — reused as-is for the domain tests. -- No new test dependencies. - ---- - -## 14. Future work - -Items that are out of scope for this PR but enabled by the foundations laid here. None of these is blocked on a redesign; they all plug into the same `callFluentAi` / `ToolJobResponse` shape. - -### 14.1 The polling endpoint and slow tools - -When fluent-ai introduces a tool that justifies the queue substrate (per fluent-ai decision **D1**, currently deferred), it will ship: - -- A backing `ai.tool_jobs` table. -- An in-process worker for execution. -- `GET /api/v1/tools/jobs/{job_id}` for status polling. - -The matching fluent-api work is small: - -- Add `pollToolJob(jobId, resultSchema)` sibling to `callFluentAi` in [`fluent-api/src/lib/services/fluent-ai/fluent-ai.client.ts`](../src/lib/services/fluent-ai/fluent-ai.client.ts). -- Add `GET /ai/tools/jobs/{job_id}` route in [`fluent-api/src/domains/ai-tools/ai-tools.route.ts`](../src/domains/ai-tools/ai-tools.route.ts) with the same `authenticateUser + requirePermission(AI_TOOLS_USE)` middleware. -- No DB persistence needed on the fluent-api side — fluent-api remains a thin pass-through; the job state of record lives in fluent-ai's `ai.tool_jobs` table. - -### 14.2 Frontend hook and editor squiggles - -A separate PR against fluent-web will introduce the `useToolJob` + `useRepeatedWords` hooks sketched in §11.4, then drive editor squiggle UI from the `findings` array. The backend surface is already shaped to feed that UI directly (`snt_id`, `surf`, `start_position`, `severity` on each finding). - -### 14.3 Additional Greek-Room checks - -Greek-Room exposes other static-analysis tools (punctuation, untranslated text, character-set sanity, etc.). Each will land in fluent-ai as a sibling tool, then surface in fluent-api with the same five-line pattern shown in §7.5. No new mechanism needed. - -### 14.4 Other AI tool families - -The same pattern absorbs LLM drafting, embeddings, fine-tuning, and any other tool family fluent-ai grows into. The naming convention `tools/{family}/{tool-name}` (e.g. `tools/openai/draft-suggestion`, `tools/embeddings/similarity`) keeps OpenAPI documentation organized. - -### 14.5 Per-user attribution - -Today fluent-ai sees a single shared identity (`FLUENT_AI_KEY`). If audit / billing / rate-limiting needs per-user attribution later, fluent-api can pass an opaque `X-Requested-By` header carrying the BetterAuth user ID. fluent-ai logs it; no change to the request body. - -### 14.6 Caching for idempotent tools - -`callFluentAi` is intentionally cache-free today. Some future tools may be both expensive and deterministic on their input — in which case a `(toolPath, hash(body))` cache (in-memory or Redis) makes sense. Drops in at the `callFluentAi` layer without changing call sites. - -### 14.7 Retries on transport failure - -Currently `callFluentAi` does not retry on network errors. If experience shows transient failures are common, a `withRetry` wrapper (analogous to [`withDatabaseRetry`](../src/lib/db-retry.ts)) can be added at the client level. Out of scope today because the failure mode of the only tool is "semantic," not "flaky." - -### 14.8 MCP facade - -A future Model Context Protocol facade (referenced as out-of-scope in [`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md)) could be layered over fluent-ai. fluent-api would call it via `callFluentAi` exactly as today — the only difference is the base URL. - -### 14.9 fluent-platform refinements - -Two small, optional improvements identified while writing this spec: - -- Add a healthcheck to the `ai` service in [`fluent-platform/compose.yaml`](../../fluent-platform/compose.yaml) and let `api` declare a soft dependency on it. Would eliminate the brief startup window where the AI endpoints return 502. Not pursued in this PR because the 502 response is already graceful. -- Document the `FLUENT_AI_KEY` ↔ fluent-ai `ai_api_keys` table relationship in [`fluent-platform/docs/`](../../fluent-platform/docs/) for new developers. - ---- - -## 15. Open questions for reviewer - -These are the items the spec discussion landed on but where reviewer pushback would meaningfully change the outcome. Each one has a recommended position (the doc reflects this); each one can be flipped without restructuring the rest of the proposal. - -> **Status: resolved.** All four questions below were addressed in kaseywright's review of [PR #173](https://github.com/eten-tech-foundation/fluent-api/pull/173) on 2026-06-02 (review **APPROVED**). The reviewer confirmed each recommended position; two of them (§15.2, §15.4) came with a request to document the decision, now captured in §9.3 and §8.1 respectively. Per-item resolutions are noted inline below. - -### 15.1 URL layout: is `POST /ai/tools/greek-room/repeated-words` the right shape? - -**Recommended:** Yes — see **D2** and §5. - -> **Resolved (kaseywright, 2026-06-02):** confirmed — "this URL layout works well." [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343625894) - -**Alternatives:** - -- `POST /checks/repeated-words` — closer to the verbiage we use elsewhere ("checks" rather than "tools"). Downside: hides the network-bound, possibly-async nature of these endpoints. -- `POST /chapter-assignments/{id}/checks/repeated-words` — nests the check under the resource it operates on. Rejected because it requires fluent-api to enrich the request body from `chapter_assignment_id` → verses + language metadata, which couples fluent-api to fluent-ai's input schema (rejected by **D8**). -- `POST /tools/dispatch` with `{tool: "...", params: {...}}` — collapses the type system at the wire boundary. Same reason fluent-ai rejected this (see [`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md) §1). - -**Decision needed from reviewer:** confirm `/ai/tools/{family}/{tool}` or push back with a preference. - -### 15.2 Permission: `PERMISSIONS.AI_TOOLS_USE` as a string-value alias of `CONTENT_UPDATE`? - -**Recommended:** Yes, alias — see **D10** and §9.3. - -> **Resolved (kaseywright, 2026-06-02):** alias approach confirmed, with the request to document the decision for future reference (done in §9.3). [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343633722) - -**Alternatives:** - -- Introduce a real new permission row in the `permissions` table with its own role mappings. Requires a migration and seed update. Gives nothing user-visible today but is the "cleaner" RBAC story. -- Reuse `PERMISSIONS.CONTENT_UPDATE` directly at the call site (no alias). Loses the documentary value of seeing "AI_TOOLS_USE" at the route. - -**Decision needed from reviewer:** confirm the alias approach or push back for either of the alternatives. - -### 15.3 Envelope pass-through vs. unwrapping `result` for the sync case? - -**Recommended:** Pass through the full `ToolJobResponse` — see **D9** and §8.2. - -> **Resolved (kaseywright, 2026-06-02):** pass-through confirmed, conditioned on the web-client response following the standard format already in place (see §8.2). [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343642943) - -**Alternatives:** - -- For the synchronous case only, return just the `result` field (i.e. `{findings, summary}`) and 200, reserving the envelope for when fluent-ai goes async. Simpler today; mildly more breaking when polling lands. -- Pass through always but add a thin `result_only` query parameter for callers that want the unwrapped shape. Adds API surface for negligible benefit. - -**Decision needed from reviewer:** confirm pass-through or push back for unwrap-now-envelope-later. - -### 15.4 No request enrichment vs. server-side context augmentation? - -**Recommended:** No enrichment — see **D8** and §8.1. - -> **Resolved (kaseywright, 2026-06-02):** forwarding verbatim confirmed; the snake_case naming divergence accepted as a contained, intentional AI-tools-domain exception, with a request to document it (done in §8.1). [Review comment.](https://github.com/eten-tech-foundation/fluent-api/pull/173#discussion_r3343677813) - -**Alternatives:** - -- fluent-api looks up `chapter_assignment_id` (or `project_id`) and adds verses + language metadata server-side. Caller sends a thin reference, fluent-api fattens it before forwarding. Trades client flexibility for harder-to-spoof inputs. -- Hybrid: caller sends the full body, fluent-api _validates_ certain fields against its own data (e.g. confirms the caller has access to that `project_id`). Lighter than full enrichment. - -**Decision needed from reviewer:** confirm no enrichment, or push back for either alternative. - -### 15.5 Anything else the reviewer wants surfaced - -If reviewers identify a concern not captured above, please raise it as a comment on the PR. The relevant pre-decisions are summarized in §3 and the rationale is in the predecessor docs ([`fluent-ai/greek-room-integration-summary.md`](../../fluent-ai/greek-room-integration-summary.md), [`fluent-ai/greek-room-integration-suggestion.md`](../../fluent-ai/greek-room-integration-suggestion.md), [`fluent-ai/greek-room-integration-decisions.md`](../../fluent-ai/greek-room-integration-decisions.md)). - ---- +For the file-by-file record of what is already implemented in the tree versus what remains, see **[`ai-tools-integration-status.md`](ai-tools-integration-status.md)**. diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-summary.md b/docs/proposals/repeated-word-check/ai-tools-integration-summary.md index f922e1f..fccbd8d 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-summary.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-summary.md @@ -1,6 +1,6 @@ # AI-Tools Integration on fluent-api — Architecture Review Summary -**Purpose:** Reviewer orientation for the proposed AI-tools integration. Long-form proposal lives in the sibling [`ai-tools-integration-suggestion.md`](ai-tools-integration-suggestion.md) if more detail is wanted; this summary is intended to stand on its own. Ships as a coordinated pair of PRs — fluent-api (the bulk) plus a small fluent-platform PR adding one compose env-var override (per **D12**). +**Purpose:** Reviewer orientation for the proposed AI-tools integration. This summary is intended to stand on its own; if more detail is wanted, the long-form proposal is split across two sibling files — [`ai-tools-integration-suggestion.md`](ai-tools-integration-suggestion.md) (**Part 1**: contract & design, §1–§10) and [`ai-tools-integration-operations.md`](ai-tools-integration-operations.md) (**Part 2**: operations, forward compatibility, testing, §11–§15). A file-by-file account of what is already implemented in the tree versus what remains lives in [`ai-tools-integration-status.md`](ai-tools-integration-status.md). Ships as a coordinated pair of PRs — fluent-api (the bulk) plus a small fluent-platform PR adding one compose env-var override (per **D12**). ## What's being proposed From ee47fd2f295a15aa7d92e98c33eecd45f52a05f9 Mon Sep 17 00:00:00 2001 From: Joshua Lansford Date: Thu, 4 Jun 2026 16:09:10 -0400 Subject: [PATCH 9/9] feat(ai-tools): configurable fluent-ai path prefix + smoke auto sign-in; verify live e2e - Add FLUENT_AI_API_PREFIX env var (default empty) and buildToolUrl() so the fluent-ai base path is configurable instead of hardcoded /api/v1/, matching the live build that serves Greek-Room tools at the root. - Enhance scripts/smoke-repeated-words.ts: BetterAuth auto sign-in with a trusted Origin header, set-auth-token capture, missing-dev-user + origin hints, and a PASS/FAIL verdict banner. - Add client tests for empty-prefix and configured-prefix URL building. - Document FLUENT_AI_API_PREFIX in .env.example. - Update repeated-word-check proposal docs to mark the live end-to-end run and compose wiring as done and to reflect the configurable prefix. --- .env.example | 8 +- .../ai-tools-integration-operations.md | 23 +- .../ai-tools-integration-status.md | 128 ++++++-- .../ai-tools-integration-suggestion.md | 14 +- .../ai-tools-integration-summary.md | 13 + scripts/smoke-repeated-words.ts | 276 ++++++++++++++++-- src/env.ts | 9 +- .../fluent-ai/fluent-ai.client.test.ts | 35 ++- .../services/fluent-ai/fluent-ai.client.ts | 21 +- 9 files changed, 464 insertions(+), 63 deletions(-) diff --git a/.env.example b/.env.example index 9523609..32f7821 100644 --- a/.env.example +++ b/.env.example @@ -16,7 +16,7 @@ SEED_TRANSLATOR_EMAIL=t@fluent.local SEED_TRANSLATOR_PASSWORD=t@123456 # Fluent-AI integration -# Base URL of the fluent-ai service (no trailing slash, no /api/v1 suffix). +# Base URL of the fluent-ai service (no trailing slash, no path suffix). # - Ecosystem mode (via fluent-platform): http://ai:8200 # - Standalone fluent-api against standalone fluent-ai: http://localhost:8200 FLUENT_AI_URL=http://localhost:8200 @@ -24,3 +24,9 @@ FLUENT_AI_URL=http://localhost:8200 # Shared API key for calling fluent-ai. Matches a row in fluent-ai's ai_api_keys table. # Dev value seeded by fluent-ai: fai_dev_admin FLUENT_AI_KEY=fai_dev_admin + +# Path prefix fluent-ai mounts its routers under, inserted between FLUENT_AI_URL +# and the per-tool path. The current live fluent-ai build serves at the root +# (e.g. POST /tools/greek-room/repeated-words), so leave this empty. If/when +# fluent-ai adopts versioned routing, set this to /api/v1 — no code change needed. +FLUENT_AI_API_PREFIX= diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-operations.md b/docs/proposals/repeated-word-check/ai-tools-integration-operations.md index 5497a9a..fcc896b 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-operations.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-operations.md @@ -218,7 +218,7 @@ When a dev runs only `./fapi.sh up` without fluent-ai, the `/ai/tools/...` endpo ### 12.8 What `callFluentAi` does _not_ assume about networking -The client is unaware of whether fluent-ai is at `localhost:8200`, `ai:8200`, `https://fluent-ai.internal.example.com`, or anywhere else. It reads `FLUENT_AI_URL` verbatim, appends `/api/v1/${toolPath}`, and POSTs. This means: +The client is unaware of whether fluent-ai is at `localhost:8200`, `ai:8200`, `https://fluent-ai.internal.example.com`, or anywhere else. It reads `FLUENT_AI_URL` verbatim, joins the optional `FLUENT_AI_API_PREFIX` (default empty — see the implementation update in §7.2) and `toolPath`, and POSTs. With the default empty prefix it POSTs to `${FLUENT_AI_URL}/${toolPath}` (matching the live fluent-ai build); `FLUENT_AI_API_PREFIX=/api/v1` makes it `${FLUENT_AI_URL}/api/v1/${toolPath}`. This means: - Switching from standalone to ecosystem mode is a single env var change (handled automatically by the platform compose override). - Switching to a staging or production deployment is a single env var change. @@ -232,6 +232,25 @@ Per [`fluent-platform/README.md`](../../../../fluent-platform/README.md) §"Depl The fluent-api code in this PR is complete, but **exercising it end-to-end against a live fluent-ai requires a small amount of local wiring that is intentionally _not_ part of this PR's committed changes** (the env values are machine-specific, and the compose override belongs to the paired fluent-platform PR per §12.4 / **D12**). This subsection is the runbook for an agent or developer who wants to take the merged code and watch a real request flow fluent-web → fluent-api → fluent-ai. It is deliberately step-by-step so it can be followed without prior context. +> **Update (2026-06-04) — this runbook has now been executed against a from-scratch +> stack and the steps below have been simplified by the smoke script.** Two practical +> notes that supersede the hand-rolled `curl` instructions in Steps 5–6: +> +> - **Steps 5–6 are now one command.** `npm run smoke:repeated-words` performs the +> BetterAuth sign-in itself (default dev user `t@fluent.local`, overridable via +> `--signin-email` / `--signin-password`), captures the `set-auth-token` bearer, sends +> a trusted `Origin` header (default `http://localhost:5173`, overridable via `--origin` +> / `FLUENT_API_ORIGIN` / `FRONTEND_URL`), runs the check, and prints a PASS/FAIL banner. +> The manual `curl` token capture below still works if you prefer it. +> - **Seed the org + dev user first.** The fluent-api entrypoint auto-runs +> migrate/roles/rbac but **not** account provisioning, so on a clean stack run +> `docker compose exec api npx tsx src/db/seeds/organizations.ts` then +> `… src/db/seeds/dev-users.ts` (org before users) so a sign-in account exists. The +> smoke script prints these exact commands if the dev user is missing. +> - **The fluent-ai path prefix is now configurable** (`FLUENT_AI_API_PREFIX`, default +> empty — see §7.2 / §12.8). The live build serves at the root, so the default works +> out of the box; no `/api/v1` is needed. + > **Prerequisites.** All four repos cloned side-by-side (the layout fluent-platform's setup produces). Docker/Podman available. fluent-ai's dev seed has run at least once so its `ai_api_keys` table contains the dev key `fai_dev_admin`. You are on branch `jel-word-check` in fluent-api. **Step 1 — Add the two env vars to `fluent-api/.env`.** The repo only ships `.env.example`; the real `.env` is git-ignored and must be edited by hand. Append (or copy from `.env.example` — see §12.3): @@ -317,7 +336,7 @@ Test surface, all with `global.fetch` stubbed via `vi.spyOn(global, 'fetch')`: - Response envelope passes parsing but `result` field fails the result schema → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. - Default 30s timeout fires via fake timers → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. - Caller-supplied `AbortSignal` triggers → `Result.err({ code: AI_SERVICE_UNAVAILABLE, ... })`. -- Request shape: `X-API-Key` header is present, equals `env.FLUENT_AI_KEY`, `Content-Type` is `application/json`, URL is `${FLUENT_AI_URL}/api/v1/${toolPath}`. +- Request shape: `X-API-Key` header is present, equals `env.FLUENT_AI_KEY`, `Content-Type` is `application/json`, and the URL is built from `FLUENT_AI_URL` + the optional `FLUENT_AI_API_PREFIX` + `toolPath`. Two cases are covered: the default empty prefix → `${FLUENT_AI_URL}/${toolPath}`, and a configured prefix (`/api/v1`, `api/v1`, `/api/v1/`) → `${FLUENT_AI_URL}/api/v1/${toolPath}` (slashes normalized). ### 13.2 Domain tests — `ai-tools.route.ts` diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-status.md b/docs/proposals/repeated-word-check/ai-tools-integration-status.md index a444143..87d7c35 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-status.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-status.md @@ -11,24 +11,40 @@ **Branch:** `jel-word-check` (do **not** create a new branch; do **not** push). **Implementation commit:** `b055f84` — _feat(ai-tools): add greek-room repeated-words endpoint + fluent-ai client_ (17 files, +1348/−26). +> **Update (2026-06-04) — verified live end-to-end.** The integration has now been +> exercised against a from-scratch fluent-platform stack and passes both on the host +> and from inside the `api` container (10/10 smoke-test checks, HTTP 200). Two things +> changed while wiring it up, both reflected in the sections below: +> +> 1. **Configurable fluent-ai path prefix.** The live fluent-ai build mounts its +> routers at the **root** (`POST /tools/greek-room/repeated-words`), not under +> `/api/v1` as the schemas' directory layout implied — the previously hardcoded +> `/api/v1/` in `callFluentAi` produced a 404 → `502 AI_SERVICE_UNAVAILABLE`. The +> prefix is now a configurable env var, **`FLUENT_AI_API_PREFIX`**, defaulting to +> empty to match the live build. When fluent-ai adopts versioned routing it is a +> pure env flip (`FLUENT_AI_API_PREFIX=/api/v1`) with no code change. +> 2. **fluent-platform compose needs more than the URL override** to bring the `ai` +> container up from a clean slate (Alembic DB-URL and a uv-cache fix) — see §3.2. + --- ## 1. Status at a glance -| Area | Status | Notes | -| ----------------------------------------------------------------- | --------------- | ------------------------------------------------------------------------------------------ | -| Endpoint `POST /ai/tools/greek-room/repeated-words` | ✅ Implemented | Route + service + types in `src/domains/ai-tools/`, registered on the app. | -| Shared client `callFluentAi` | ✅ Implemented | `src/lib/services/fluent-ai/`, modeled on Mailgun + `withDatabaseRetry`. | -| Env vars `FLUENT_AI_URL` / `FLUENT_AI_KEY` | ✅ Implemented | Required (no defaults) in `src/env.ts`; documented in `.env.example`. | -| Permission alias `AI_TOOLS_USE` | ✅ Implemented | Alias of `content:update` in `src/lib/permissions.ts` (D10). | -| Error codes `AI_SERVICE_UNAVAILABLE` / `AI_TOOL_EXECUTION_FAILED` | ✅ Implemented | Both → HTTP 502 in `src/lib/types.ts`. | -| Unit tests (`callFluentAi`) | ✅ Implemented | `fluent-ai.client.test.ts`. | -| Route tests | ✅ Implemented | `ai-tools.route.test.ts`. | -| Smoke script + npm alias | ✅ Implemented | `scripts/smoke-repeated-words.ts`, `npm run smoke:repeated-words`. | -| Documentation | ✅ Implemented | This file + the split Part 1 / Part 2 proposal + summary. | -| **Live end-to-end run** (fluent-api ↔ fluent-ai) | ⏳ **Not done** | Requires local wiring — see "What remains" below and the §12.10 runbook. | -| fluent-platform compose override | ⏳ Not done | **Separate paired PR** (D12). `FLUENT_AI_URL: http://ai:8200`. Not part of this repo's PR. | -| Polling endpoint / DB persistence / frontend / retries / caching | ⛔ Out of scope | Deferred by design — see §2 / §14 of the proposal. | +| Area | Status | Notes | +| ----------------------------------------------------------------- | --------------- | ---------------------------------------------------------------------------------------------------------------- | +| Endpoint `POST /ai/tools/greek-room/repeated-words` | ✅ Implemented | Route + service + types in `src/domains/ai-tools/`, registered on the app. | +| Shared client `callFluentAi` | ✅ Implemented | `src/lib/services/fluent-ai/`, modeled on Mailgun + `withDatabaseRetry`. | +| Env vars `FLUENT_AI_URL` / `FLUENT_AI_KEY` | ✅ Implemented | Required (no defaults) in `src/env.ts`; documented in `.env.example`. | +| Env var `FLUENT_AI_API_PREFIX` | ✅ Implemented | Optional; defaults to empty (live build serves at root). Set `/api/v1` if/when fluent-ai versions its routes. | +| Permission alias `AI_TOOLS_USE` | ✅ Implemented | Alias of `content:update` in `src/lib/permissions.ts` (D10). | +| Error codes `AI_SERVICE_UNAVAILABLE` / `AI_TOOL_EXECUTION_FAILED` | ✅ Implemented | Both → HTTP 502 in `src/lib/types.ts`. | +| Unit tests (`callFluentAi`) | ✅ Implemented | `fluent-ai.client.test.ts` (incl. `FLUENT_AI_API_PREFIX` cases). | +| Route tests | ✅ Implemented | `ai-tools.route.test.ts`. | +| Smoke script + npm alias | ✅ Implemented | `scripts/smoke-repeated-words.ts`, `npm run smoke:repeated-words` (auto sign-in + PASS/FAIL banner). | +| Documentation | ✅ Implemented | This file + the split Part 1 / Part 2 proposal + summary. | +| **Live end-to-end run** (fluent-api ↔ fluent-ai) | ✅ **Done** | Verified 2026-06-04 against a from-scratch platform stack — 10/10 on the host **and** in-container. See §3.1. | +| fluent-platform compose override | ✅ Applied | Applied locally (URL override + two clean-slate fixes). Still ships as a **separate paired PR** (D12); see §3.2. | +| Polling endpoint / DB persistence / frontend / retries / caching | ⛔ Out of scope | Deferred by design — see §2 / §14 of the proposal. | Legend: ✅ done · ⏳ remaining · ⛔ intentionally out of scope. @@ -44,16 +60,16 @@ Legend: ✅ done · ⏳ remaining · ⛔ intentionally out of scope. ### New shared client — `src/lib/services/fluent-ai/` -- **`fluent-ai.client.ts`** — `callFluentAi(toolPath, body, resultSchema, options?)`. POSTs to `${FLUENT_AI_URL}/api/v1/${toolPath}` with `X-API-Key`; default **30s** timeout (overridable via `options.timeoutMs` / `options.signal`); validates the `result` field against `resultSchema` only when `status === "completed"`; returns `Result>`. Maps transport/HTTP/parse failures → `AI_SERVICE_UNAVAILABLE`, and `failed`/`cancelled` envelopes → `AI_TOOL_EXECUTION_FAILED` (§10.2). Does **not** poll, cache, or retry (by design). The malformed-body branch returns the message `malformed response from fluent-ai (body was not valid JSON)`. +- **`fluent-ai.client.ts`** — `callFluentAi(toolPath, body, resultSchema, options?)`. Builds the target URL via a `buildToolUrl(toolPath)` helper that joins `FLUENT_AI_URL` + the optional `FLUENT_AI_API_PREFIX` + `toolPath` (normalizing slashes), then POSTs to it with `X-API-Key`. With the default empty prefix the URL is `${FLUENT_AI_URL}/${toolPath}` (matching the live fluent-ai build, which serves at the root); setting `FLUENT_AI_API_PREFIX=/api/v1` produces `${FLUENT_AI_URL}/api/v1/${toolPath}`. Default **30s** timeout (overridable via `options.timeoutMs` / `options.signal`); validates the `result` field against `resultSchema` only when `status === "completed"`; returns `Result>`. Maps transport/HTTP/parse failures → `AI_SERVICE_UNAVAILABLE`, and `failed`/`cancelled` envelopes → `AI_TOOL_EXECUTION_FAILED` (§10.2). Does **not** poll, cache, or retry (by design). The malformed-body branch returns the message `malformed response from fluent-ai (body was not valid JSON)`. - **`fluent-ai.types.ts`** — `JobStatus` union, `ToolJobError`, and the generic `ToolJobResponse` envelope. Carries the same snake_case in-code comment / D8 cross-reference as `ai-tools.types.ts`. ### Edits to existing files - **`src/app.ts`** — Registers the ai-tools routes on the OpenAPIHono app, the same way existing domains are registered. -- **`src/env.ts`** — Adds `FLUENT_AI_URL` (URL) and `FLUENT_AI_KEY` (non-empty string) to the Zod env schema. Both **required, no defaults**; a missing/blank value fails validation at boot. +- **`src/env.ts`** — Adds `FLUENT_AI_URL` (URL) and `FLUENT_AI_KEY` (non-empty string) to the Zod env schema. Both **required, no defaults**; a missing/blank value fails validation at boot. Also adds `FLUENT_AI_API_PREFIX` (string, **optional**, defaults to `''`) — the path segment between `FLUENT_AI_URL` and the tool path; empty matches the live fluent-ai build (routes served at root), `/api/v1` for a future versioned deployment. - **`src/lib/permissions.ts`** — Adds `PERMISSIONS.AI_TOOLS_USE = 'content:update'` (alias of `CONTENT_UPDATE`), with a comment linking to §9.3 / D10 and review comment `#discussion_r3343633722`. - **`src/lib/types.ts`** — Adds `ErrorCode.AI_SERVICE_UNAVAILABLE` and `ErrorCode.AI_TOOL_EXECUTION_FAILED`, both mapped to HTTP **502** in `ErrorHttpStatus`. -- **`.env.example`** — Adds documented `FLUENT_AI_URL` and `FLUENT_AI_KEY` entries (standalone default `http://localhost:8200`, dev key `fai_dev_admin`). +- **`.env.example`** — Adds documented `FLUENT_AI_URL` and `FLUENT_AI_KEY` entries (standalone default `http://localhost:8200`, dev key `fai_dev_admin`), plus `FLUENT_AI_API_PREFIX=` (empty, with a note that it can be set to `/api/v1` once fluent-ai versions its routes). - **`.env.test`** — Adds test values for the two vars so the suite boots. - **`package.json`** — Adds the `smoke:repeated-words` script. @@ -61,26 +77,74 @@ Legend: ✅ done · ⏳ remaining · ⛔ intentionally out of scope. - **`src/lib/services/fluent-ai/fluent-ai.client.test.ts`** — Unit tests for `callFluentAi` with `fetch` stubbed: completed/queued happy paths, failed/cancelled → `AI_TOOL_EXECUTION_FAILED`, 4xx/5xx/network/parse/schema failures → `AI_SERVICE_UNAVAILABLE`, timeout, abort signal, and request-shape assertions (header, URL). - **`src/domains/ai-tools/ai-tools.route.test.ts`** — Route tests: 401 unauthenticated, 403 missing permission, 400 invalid body, 200 completed pass-through, 202 queued pass-through, 502 on failed/transport error, and a "no enrichment" assertion that the body is forwarded verbatim. -- **`scripts/smoke-repeated-words.ts`** — Host-runnable probe against a live fluent-api + fluent-ai pair. CLI flags `--url`, `--token`, `--cookie`, `--timeout`, `--raw`; reads `FLUENT_API_URL` / `FLUENT_API_TOKEN` from env; default base URL `http://localhost:9999`. Posts the canned 3-verse corpus and sanity-checks the envelope (see §13.3). +- **`scripts/smoke-repeated-words.ts`** — Host-runnable probe against a live fluent-api + fluent-ai pair. CLI flags `--url`, `--token`, `--cookie`, `--timeout`, `--raw`, plus **auto sign-in** support: when no credential is supplied it signs in via BetterAuth (`--signin-email` / `--signin-password`, default `t@fluent.local`) and captures the `set-auth-token` bearer. Sends a trusted `Origin` header (`--origin`, env `FLUENT_API_ORIGIN` / `FRONTEND_URL`, default `http://localhost:5173`) so the sign-in isn't rejected with `MISSING_OR_NULL_ORIGIN` / `INVALID_ORIGIN`. Reads `FLUENT_API_URL` / `FLUENT_API_TOKEN` from env; default base URL `http://localhost:9999`. Posts the canned 3-verse corpus, sanity-checks the envelope (see §13.3), and ends with an unmissable **PASS/FAIL banner + check tally**. On a missing dev user it prints the exact `docker compose exec … organizations.ts` / `dev-users.ts` seed commands; on an origin mismatch it prints a targeted hint. --- ## 3. What remains (and who owns it) -### 3.1 Live end-to-end verification (this repo, but local-only) - -The code is complete and the automated suite passes, but **no live fluent-api ↔ fluent-ai round-trip has been exercised** as part of this work. Doing so needs machine-specific wiring that is intentionally _not_ committed: - -1. Add `FLUENT_AI_URL` + `FLUENT_AI_KEY` to the git-ignored `fluent-api/.env`. -2. Bring up the stack (ecosystem mode) or run fluent-ai alongside standalone fluent-api. -3. Sign in via BetterAuth to obtain a bearer token. -4. Run `npm run smoke:repeated-words -- --url --token `. - -The full step-by-step procedure (including the BetterAuth `set-auth-token` capture and the expected sanity-check output) is the **§12.10 runbook** in [`ai-tools-integration-operations.md`](ai-tools-integration-operations.md). Nothing in that runbook should be committed to this repo — the `.env` values are per-machine secrets. - -### 3.2 fluent-platform compose override (separate paired PR — D12) - -[`fluent-platform/compose.yaml`](../../../../fluent-platform/compose.yaml) needs `FLUENT_AI_URL: http://ai:8200` added to the `api` service's `environment:` block so ecosystem mode resolves fluent-ai by service name. **`FLUENT_AI_KEY` is deliberately not overridden there** — it stays a shared secret sourced from `fluent-api/.env` (§12.4). This is a small, logic-free PR that ships alongside the fluent-api PR. **Do not touch fluent-platform from this task.** +### 3.1 Live end-to-end verification — ✅ done (2026-06-04) + +The live fluent-api ↔ fluent-ai round-trip **has now been exercised** against a +from-scratch fluent-platform ecosystem stack (clean containers + clean volumes, +`./fluent.sh up`), with `db`, `api`, and `ai` all reaching healthy on a from-scratch +init (no manual `ALTER USER`). The smoke test passed **10/10** twice: + +- **From the host:** `npm run smoke:repeated-words` (auto sign-in, default base URL + `http://localhost:9999`) → HTTP 200, envelope `completed`, tool + `greek_room.repeated_words`, exactly two findings (one legitimate, one suspicious), + `verse_count === 3`, `total_findings === findings.length`. +- **From inside the `api` container:** the same script via the platform compose + scripts bind mount, `--url http://api:9999` → identical 10/10 result. + +The automated suite is green alongside it: `typecheck`, `lint`, and the full **76/76** +vitest run (including the new `FLUENT_AI_API_PREFIX` client cases). + +Reproducing this still needs machine-specific wiring that is intentionally _not_ +committed to this repo (per-machine secrets): + +1. Add `FLUENT_AI_URL`, `FLUENT_AI_KEY` (and optionally `FLUENT_AI_API_PREFIX`) to the + git-ignored `fluent-api/.env`. +2. Bring up the stack (ecosystem mode `./fluent.sh up`) or run fluent-ai alongside + standalone fluent-api. +3. Seed the org + dev users so a sign-in account exists (the entrypoint auto-runs + migrate/roles/rbac but **not** account provisioning): + `docker compose exec api npx tsx src/db/seeds/organizations.ts` then + `… src/db/seeds/dev-users.ts` (org before users). The smoke script prints these + exact commands if the dev user is missing. +4. Run `npm run smoke:repeated-words` (auto sign-in) — or pass `--token` / `--cookie` + explicitly. + +The full step-by-step procedure (including the BetterAuth `set-auth-token` capture and +the expected sanity-check output) is the **§12.10 runbook** in +[`ai-tools-integration-operations.md`](ai-tools-integration-operations.md). Nothing in +that runbook should be committed to this repo — the `.env` values are per-machine +secrets. + +### 3.2 fluent-platform compose changes (applied locally; separate paired PR — D12) + +The required fluent-platform changes have been **applied to the local working tree** and +exercised by the §3.1 from-scratch bring-up. They still ship as a **separate paired PR** +against fluent-platform (D12) — a fork remote needs to be created before pushing. The +`api` service's `environment:` block in +[`fluent-platform/compose.yaml`](../../../../fluent-platform/compose.yaml) gets +`FLUENT_AI_URL: http://ai:8200` (so ecosystem mode resolves fluent-ai by service name), +plus a read-only bind mount of `fluent-api/scripts` so the smoke script is runnable +in-container. **`FLUENT_AI_KEY` is deliberately not overridden there** — it stays a +shared secret sourced from `fluent-api/.env` (§12.4). + +Two additional clean-slate fixes to the **`ai`** service were needed so the container +comes up from scratch (both upstream-platform issues surfaced while validating +reproducibility, not specific to this feature): + +- **Alembic DB URL:** `MIGRATIONS_DATABASE_URL=postgresql+asyncpg://migrations:postgres@db:5432/fluent` + — the migrations role's password was drifting (`password` vs `postgres`), crashing the + `ai` container's migration step. +- **uv cache off the noexec mount:** `UV_CACHE_DIR=/tmp/.uv-cache` with the `/tmp` tmpfs + bumped to `size=256m`. + +These belong in the same paired fluent-platform PR. **Do not push fluent-platform from +this task** until the fork remote exists. ### 3.3 Out of scope (do not build — §2 / §14) diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md index 1123043..15380b4 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-suggestion.md @@ -244,11 +244,21 @@ export async function callFluentAi( ### 7.2 What it does -1. Reads `env.FLUENT_AI_URL` and `env.FLUENT_AI_KEY` (validated at boot in [`fluent-api/src/env.ts`](../../../src/env.ts)). -2. POSTs to `${FLUENT_AI_URL}/api/v1/${toolPath}` with: +1. Reads `env.FLUENT_AI_URL`, `env.FLUENT_AI_KEY`, and `env.FLUENT_AI_API_PREFIX` (validated at boot in [`fluent-api/src/env.ts`](../../../src/env.ts)). +2. POSTs to `${FLUENT_AI_URL}/${FLUENT_AI_API_PREFIX}/${toolPath}` (slashes normalized) with: - `Content-Type: application/json` - `X-API-Key: ${FLUENT_AI_KEY}` - body serialized as JSON + + > **Implementation update (2026-06-04).** The prefix between `FLUENT_AI_URL` and + > `toolPath` is **configurable** via `FLUENT_AI_API_PREFIX` and defaults to **empty**. + > The original spec assumed `/api/v1`, but the live fluent-ai build serves its routers + > at the root (`POST /tools/greek-room/repeated-words`), so the hardcoded `/api/v1/` + > produced a 404 → `502 AI_SERVICE_UNAVAILABLE`. With the empty default the URL is + > `${FLUENT_AI_URL}/${toolPath}`; setting `FLUENT_AI_API_PREFIX=/api/v1` restores the + > versioned form for a future deployment — a pure env flip, no code change. See + > [`ai-tools-integration-status.md`](ai-tools-integration-status.md) §3.1. + 3. Honors the caller's `AbortSignal` if provided; otherwise applies a default 30-second timeout via a derived signal. (Tunable per-call.) 4. On HTTP-level success (2xx), parses the response body as `ToolJobResponse` and validates the `result` field against `resultSchema` _if and only if_ `status === "completed"`. (When status is `queued|running`, `result` is `null` and is not validated.) 5. Returns `{ ok: true, data: envelope }` — note this is the **full envelope**, not the unwrapped result. Callers that care only about the synchronous-completed case can `if (envelope.status === "completed") return envelope.result`. Callers that want to support the future polling case can inspect `envelope.status` and `envelope.job_id`. diff --git a/docs/proposals/repeated-word-check/ai-tools-integration-summary.md b/docs/proposals/repeated-word-check/ai-tools-integration-summary.md index fccbd8d..fdfa3f2 100644 --- a/docs/proposals/repeated-word-check/ai-tools-integration-summary.md +++ b/docs/proposals/repeated-word-check/ai-tools-integration-summary.md @@ -1,5 +1,18 @@ # AI-Tools Integration on fluent-api — Architecture Review Summary +> **Status update (2026-06-04) — implemented and verified live end-to-end.** This +> summary captures the pre-merge review framing; the integration has since been built on +> branch `jel-word-check` and exercised against a from-scratch fluent-platform stack +> (fluent-web → fluent-api → fluent-ai), with the smoke check passing host- and +> in-container. The file-by-file record and the verified-live notes live in +> [`ai-tools-integration-status.md`](ai-tools-integration-status.md). One implementation +> detail postdates this summary: the fluent-ai path prefix is configurable via +> `FLUENT_AI_API_PREFIX` (default empty, matching the live build that serves at the root) +> — see [`ai-tools-integration-suggestion.md`](ai-tools-integration-suggestion.md) §7.2. +> The paired fluent-platform compose change is broader than the single env override noted +> below (it also pins `MIGRATIONS_DATABASE_URL`, sets `UV_CACHE_DIR`, and mounts the +> smoke script) — see status doc §3.2. + **Purpose:** Reviewer orientation for the proposed AI-tools integration. This summary is intended to stand on its own; if more detail is wanted, the long-form proposal is split across two sibling files — [`ai-tools-integration-suggestion.md`](ai-tools-integration-suggestion.md) (**Part 1**: contract & design, §1–§10) and [`ai-tools-integration-operations.md`](ai-tools-integration-operations.md) (**Part 2**: operations, forward compatibility, testing, §11–§15). A file-by-file account of what is already implemented in the tree versus what remains lives in [`ai-tools-integration-status.md`](ai-tools-integration-status.md). Ships as a coordinated pair of PRs — fluent-api (the bulk) plus a small fluent-platform PR adding one compose env-var override (per **D12**). ## What's being proposed diff --git a/scripts/smoke-repeated-words.ts b/scripts/smoke-repeated-words.ts index a3c5583..9ed3679 100644 --- a/scripts/smoke-repeated-words.ts +++ b/scripts/smoke-repeated-words.ts @@ -15,24 +15,61 @@ * substitute for the vitest suite. It requires BOTH fluent-api and fluent-ai * to be up (ecosystem mode, or standalone with fluent-ai started separately). * + * Prerequisites for a from-scratch stack: + * A fresh `./fluent.sh up` runs fluent-api's docker-entrypoint.sh, which + * migrates the DB and seeds roles + RBAC — but it does NOT seed the default + * organization or the dev login users (those are deliberately kept out of + * automatic boot so production images never auto-provision accounts). This + * script signs in as a seeded dev user, so on a brand-new stack you must + * seed the org + dev users once (order matters — org before users). Run them + * against the already-running `api` container with `docker compose exec` (the + * platform's own db:seed helper uses the same `exec` form): + * + * Ecosystem mode (from fluent-platform/): + * docker compose exec api npx tsx src/db/seeds/organizations.ts + * docker compose exec api npx tsx src/db/seeds/dev-users.ts + * + * Standalone / inside the api container (from fluent-api/): + * npm run db:seed:org + * npm run db:seed:dev-users + * + * That creates "Fluent Dev" plus pm@fluent.local and t@fluent.local. If you + * skip this step, sign-in returns 401 and the script prints the same hint. + * * Because the fluent-api endpoint is guarded by a BetterAuth session + - * AI_TOOLS_USE permission, you must supply a session credential: + * AI_TOOLS_USE permission, the script needs a session credential. There are + * three ways to supply one, in priority order: * - * # Bearer token (mobile / API client): - * npm run smoke:repeated-words -- --token "" + * 1. A bearer token you already have (mobile / API client): + * npm run smoke:repeated-words -- --token "" + * # (or set FLUENT_API_TOKEN in the environment) * - * # Or a raw Cookie header (web session): - * npm run smoke:repeated-words -- --cookie "better-auth.session_token=..." + * 2. A raw Cookie header (web session): + * npm run smoke:repeated-words -- --cookie "better-auth.session_token=..." * - * # Override the base URL (default: $FLUENT_API_URL or http://localhost:9999): - * npm run smoke:repeated-words -- --url http://localhost:9999 --token "..." + * 3. Nothing — the script auto-signs-in with a seeded dev user and captures + * the bearer token from the BetterAuth `set-auth-token` response header. + * Defaults to the seeded translator (t@fluent.local / t@123456), which + * carries the content:update permission that AI_TOOLS_USE aliases. + * Override with --signin-email / --signin-password (or the env vars + * FLUENT_API_SIGNIN_EMAIL / FLUENT_API_SIGNIN_PASSWORD): + * npm run smoke:repeated-words -- --signin-email pm@fluent.local \ + * --signin-password pm@123456 + * + * The base URL defaults to $FLUENT_API_URL or http://localhost:9999 (which + * works both on the host — published port — and inside the api container — + * loopback). From a *different* container, pass --url http://api:9999. + * + * # Override the base URL: + * npm run smoke:repeated-words -- --url http://localhost:9999 * * # Print the raw response body and skip sanity checks: - * npm run smoke:repeated-words -- --raw --token "..." + * npm run smoke:repeated-words -- --raw * * Exit status: * 0 — request succeeded and (unless --raw) all sanity checks passed - * 1 — HTTP error, unexpected response shape, or failed sanity check + * 1 — HTTP error, unexpected response shape, failed sanity check, or + * sign-in failure * 2 — bad CLI arguments */ @@ -65,10 +102,27 @@ const SAMPLE_REQUEST: SampleRequest = { ], }; +// Seeded dev translator (see fluent-api/src/db/seeds/dev-users.ts and rbac.ts). +// The Translator role carries content:update, which AI_TOOLS_USE aliases, so +// this user can invoke the endpoint. Override with --signin-email/-password. +const DEFAULT_SIGNIN_EMAIL = 't@fluent.local'; +const DEFAULT_SIGNIN_PASSWORD = 't@123456'; + +// BetterAuth rejects sign-in requests whose Origin isn't trusted +// (`MISSING_OR_NULL_ORIGIN` / 403). fluent-api's trustedOrigins is derived from +// FRONTEND_URL (see src/lib/auth.ts), which in the dev stack is the web app at +// http://localhost:5173. Browsers set Origin automatically; this script is not +// a browser, so it must send a matching Origin header by hand. Override with +// --origin or FLUENT_API_ORIGIN / FRONTEND_URL if your stack differs. +const DEFAULT_SIGNIN_ORIGIN = 'http://localhost:5173'; + interface CliArgs { url: string; token?: string; cookie?: string; + signinEmail: string; + signinPassword: string; + signinOrigin: string; timeoutMs: number; raw: boolean; } @@ -76,6 +130,13 @@ interface CliArgs { function parseArgs(argv: string[]): CliArgs { const args: CliArgs = { url: (process.env.FLUENT_API_URL ?? 'http://localhost:9999').replace(/\/$/, ''), + signinEmail: process.env.FLUENT_API_SIGNIN_EMAIL ?? DEFAULT_SIGNIN_EMAIL, + signinPassword: process.env.FLUENT_API_SIGNIN_PASSWORD ?? DEFAULT_SIGNIN_PASSWORD, + signinOrigin: ( + process.env.FLUENT_API_ORIGIN ?? + process.env.FRONTEND_URL ?? + DEFAULT_SIGNIN_ORIGIN + ).replace(/\/$/, ''), timeoutMs: 30_000, raw: false, }; @@ -92,6 +153,15 @@ function parseArgs(argv: string[]): CliArgs { case '--cookie': args.cookie = argv[++i]; break; + case '--signin-email': + args.signinEmail = argv[++i] ?? ''; + break; + case '--signin-password': + args.signinPassword = argv[++i] ?? ''; + break; + case '--origin': + args.signinOrigin = (argv[++i] ?? '').replace(/\/$/, ''); + break; case '--timeout': args.timeoutMs = Number(argv[++i]) * 1000; break; @@ -102,7 +172,12 @@ function parseArgs(argv: string[]): CliArgs { case '--help': console.error( 'Usage: npm run smoke:repeated-words -- [--url ] [--token ] ' + - '[--cookie
] [--timeout ] [--raw]' + '[--cookie
] [--signin-email ] [--signin-password ] ' + + '[--origin ] [--timeout ] [--raw]\n\n' + + 'With no --token/--cookie, the script auto-signs-in (default ' + + `${DEFAULT_SIGNIN_EMAIL}) and uses the captured bearer token. The ` + + `sign-in Origin header defaults to ${DEFAULT_SIGNIN_ORIGIN} (the dev ` + + "web app), which must match fluent-api's trustedOrigins / FRONTEND_URL." ); process.exit(2); break; @@ -112,13 +187,7 @@ function parseArgs(argv: string[]): CliArgs { } } - if (!args.token && !args.cookie && !process.env.FLUENT_API_TOKEN) { - console.error( - 'error: no session credential supplied. Pass --token or --cookie
' + - '(or set FLUENT_API_TOKEN). The endpoint requires a BetterAuth session.' - ); - process.exit(2); - } + // A pre-supplied token (flag or env) takes precedence over auto sign-in. if (!args.token && process.env.FLUENT_API_TOKEN) { args.token = process.env.FLUENT_API_TOKEN; } @@ -126,6 +195,142 @@ function parseArgs(argv: string[]): CliArgs { return args; } +/** + * Print an actionable hint when sign-in is rejected because the dev user was + * never seeded. A fresh `./fluent.sh up` only runs the roles + RBAC seeds + * (via fluent-api/docker-entrypoint.sh); the organization and dev-user seeds + * are deliberately left out of the automatic boot so production images never + * auto-provision login accounts. The two seeds below create the default + * "Fluent Dev" org and the dev users this script authenticates as. + */ +/** + * Print an actionable hint when sign-in is rejected for a bad Origin + * (MISSING_OR_NULL_ORIGIN). fluent-api's BetterAuth trustedOrigins is derived + * from FRONTEND_URL (src/lib/auth.ts); a non-browser client must send a + * matching Origin header by hand. + */ +function printOriginMismatchHint(origin: string): void { + console.error(''); + console.error('────────────────────────────────────────────────────────────────────'); + console.error(`The sign-in Origin "${origin}" was rejected by BetterAuth.`); + console.error(''); + console.error("fluent-api only trusts the origin derived from its FRONTEND_URL"); + console.error('(see src/lib/auth.ts → trustedOrigins). In the default dev stack'); + console.error('that is the web app at http://localhost:5173.'); + console.error(''); + console.error('Pass an Origin that matches your stack, e.g.:'); + console.error(' npm run smoke:repeated-words -- --origin http://localhost:5173'); + console.error('Or set FLUENT_API_ORIGIN / FRONTEND_URL in the environment.'); + console.error('Check the api container value with:'); + console.error(' docker compose exec api sh -c \'echo "$FRONTEND_URL"\''); + console.error('────────────────────────────────────────────────────────────────────'); +} + +function printMissingDevUserHint(email: string): void { + const isDefaultUser = email === DEFAULT_SIGNIN_EMAIL || email === 'pm@fluent.local'; + console.error(''); + console.error('────────────────────────────────────────────────────────────────────'); + console.error(`The account "${email}" could not sign in.`); + if (isDefaultUser) { + console.error(''); + console.error('A fresh `./fluent.sh up` seeds roles + RBAC but NOT the organization'); + console.error('or the dev users, so the default smoke-test account does not exist'); + console.error('yet. Seed them once (order matters — org before users):'); + console.error(''); + console.error(' Ecosystem mode (from fluent-platform/):'); + console.error(' docker compose exec api npx tsx src/db/seeds/organizations.ts'); + console.error(' docker compose exec api npx tsx src/db/seeds/dev-users.ts'); + console.error(''); + console.error(' Standalone / inside the api container (from fluent-api/):'); + console.error(' npm run db:seed:org'); + console.error(' npm run db:seed:dev-users'); + console.error(''); + console.error('That creates "Fluent Dev" plus pm@fluent.local / t@fluent.local.'); + console.error('Then re-run this smoke test.'); + } else { + console.error(''); + console.error('Verify the credentials, or seed dev users with (from fluent-api/):'); + console.error(' npm run db:seed:org && npm run db:seed:dev-users'); + console.error('Override the SEED_* env vars if you use custom dev credentials'); + console.error('(see src/db/seeds/dev-users.ts).'); + } + console.error('────────────────────────────────────────────────────────────────────'); +} + +/** + * Sign in with email/password and return the BetterAuth bearer token, which + * the server exposes via the `set-auth-token` response header (bearer plugin). + * Returns null on any failure (caller renders a clear error). + */ +async function signIn( + baseUrl: string, + email: string, + password: string, + origin: string, + timeoutMs: number +): Promise { + const endpoint = `${baseUrl}/api/auth/sign-in/email`; + console.error(`Signing in as ${email} at ${endpoint} (Origin: ${origin}) ...`); + + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), timeoutMs); + try { + const response = await fetch(endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Accept: 'application/json', + // Required: BetterAuth rejects sign-in with a missing/untrusted Origin + // (MISSING_OR_NULL_ORIGIN). Must match fluent-api's trustedOrigins, + // which is derived from FRONTEND_URL. See parseArgs / --origin. + Origin: origin, + }, + body: JSON.stringify({ email, password }), + signal: controller.signal, + }); + + const token = response.headers.get('set-auth-token'); + if (!response.ok) { + const body = await response.text(); + console.error(`error: sign-in failed (HTTP ${response.status}): ${body}`); + // Two very different failures land here: + // * MISSING_OR_NULL_ORIGIN → the Origin header didn't match + // fluent-api's trustedOrigins (FRONTEND_URL). Tell the operator to + // fix --origin rather than reseed. + // * Otherwise a 401/403 almost always means the dev user simply hasn't + // been seeded yet: a fresh `./fluent.sh up` seeds roles + RBAC but + // intentionally NOT the org or the dev users (see + // src/db/seeds/organizations.ts + dev-users.ts), so the account this + // script signs in as does not exist. Point at the exact seeds. + if (body.includes('MISSING_OR_NULL_ORIGIN') || body.includes('ORIGIN')) { + printOriginMismatchHint(origin); + } else if (response.status === 401 || response.status === 403) { + printMissingDevUserHint(email); + } + return null; + } + if (!token) { + console.error( + 'error: sign-in succeeded but no set-auth-token header was returned. ' + + 'Is the BetterAuth bearer() plugin enabled and the header exposed via CORS?' + ); + return null; + } + console.error('Sign-in OK; captured bearer token.'); + return token; + } catch (error) { + const isAbort = error instanceof Error && error.name === 'AbortError'; + console.error( + isAbort + ? `error: sign-in request to ${endpoint} timed out` + : `error: could not reach ${endpoint}: ${error instanceof Error ? error.message : String(error)}` + ); + return null; + } finally { + clearTimeout(timer); + } +} + interface Finding { legitimate?: boolean; [key: string]: unknown; @@ -194,6 +399,27 @@ async function main(): Promise { const args = parseArgs(process.argv.slice(2)); const endpoint = `${args.url}/ai/tools/greek-room/repeated-words`; + // Resolve a credential: explicit token/cookie wins; otherwise auto sign-in. + if (!args.token && !args.cookie) { + if (!args.signinEmail || !args.signinPassword) { + console.error( + 'error: no credential and no sign-in email/password available. ' + + 'Pass --token/--cookie, or --signin-email/--signin-password.' + ); + return 2; + } + const token = await signIn( + args.url, + args.signinEmail, + args.signinPassword, + args.signinOrigin, + args.timeoutMs + ); + if (!token) return 1; + args.token = token; + console.error(''); + } + const headers: Record = { 'Content-Type': 'application/json', Accept: 'application/json', @@ -258,18 +484,26 @@ async function main(): Promise { console.error(''); console.error('--- response shape sanity checks ---'); const results = runSanityChecks(payload); - let failed = false; + let passedCount = 0; for (const { passed, label } of results) { console.error(` ${passed ? 'ok ' : 'FAIL'} ${label}`); - if (!passed) failed = true; + if (passed) passedCount++; } + const total = results.length; + const failed = passedCount < total; + // Unmissable final verdict banner, mirroring fluent-ai's self-grading smoke + // script: a clear PASS/FAIL line with the check tally, set off by a rule so + // it never blends into the JSON body printed above. console.error(''); + console.error('════════════════════════════════════════════════════════════════════'); if (failed) { - console.error('one or more sanity checks failed'); + console.error(` SMOKE TEST FAILED — ${passedCount}/${total} checks passed`); + console.error('════════════════════════════════════════════════════════════════════'); return 1; } - console.error('smoke test passed'); + console.error(` SMOKE TEST PASSED — ${passedCount}/${total} checks passed`); + console.error('════════════════════════════════════════════════════════════════════'); return 0; } diff --git a/src/env.ts b/src/env.ts index 1e046b1..ca05521 100644 --- a/src/env.ts +++ b/src/env.ts @@ -29,11 +29,18 @@ const EnvSchema = z.object({ FRONTEND_URL: z.string(), // ── Fluent-AI integration ────────────────────────────────────────── - // Base URL of the fluent-ai service (no trailing slash, no /api/v1 suffix). + // Base URL of the fluent-ai service (no trailing slash, no path suffix). // Ecosystem mode (via fluent-platform): http://ai:8200 — standalone: http://localhost:8200 FLUENT_AI_URL: z.string().url(), // Shared API key for calling fluent-ai (matches a row in fluent-ai's ai_api_keys table). FLUENT_AI_KEY: z.string().min(1), + // Path prefix that fluent-ai mounts its routers under, BETWEEN the base URL and + // the per-tool path. The live fluent-ai build currently mounts routers at the + // root (e.g. POST /tools/greek-room/repeated-words), so the default is empty. + // When fluent-ai eventually adopts versioned routing it can be flipped to + // '/api/v1' via env with no code change. Leading slash optional; trailing + // slashes are trimmed when the request URL is assembled. + FLUENT_AI_API_PREFIX: z.string().default(''), }); export type env = z.infer; diff --git a/src/lib/services/fluent-ai/fluent-ai.client.test.ts b/src/lib/services/fluent-ai/fluent-ai.client.test.ts index 38c4107..fcb6fc1 100644 --- a/src/lib/services/fluent-ai/fluent-ai.client.test.ts +++ b/src/lib/services/fluent-ai/fluent-ai.client.test.ts @@ -230,7 +230,7 @@ describe('callFluentAi', () => { // ─── Request shape ───────────────────────────────────────────────────────── - it('sends the correct URL, headers, and JSON body', async () => { + it('sends the correct URL, headers, and JSON body (default empty prefix)', async () => { const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValue(jsonResponse(buildEnvelope())); const body = { lang_code: 'eng', verses: [{ snt_id: 'GEN 1:1', text: 'In in' }] }; @@ -239,7 +239,9 @@ describe('callFluentAi', () => { expect(fetchSpy).toHaveBeenCalledOnce(); const [url, init] = fetchSpy.mock.calls[0] as [string, RequestInit]; - expect(url).toBe(`${env.FLUENT_AI_URL}/api/v1/${TOOL_PATH}`); + // The live fluent-ai build mounts routers at the root, so with the default + // empty FLUENT_AI_API_PREFIX the URL carries no version segment. + expect(url).toBe(`${env.FLUENT_AI_URL}/${TOOL_PATH}`); expect(init.method).toBe('POST'); const headers = init.headers as Record; @@ -247,4 +249,33 @@ describe('callFluentAi', () => { expect(headers['X-API-Key']).toBe(env.FLUENT_AI_KEY); expect(JSON.parse(init.body as string)).toEqual(body); }); + + it('inserts a configured FLUENT_AI_API_PREFIX between base URL and tool path', async () => { + // env is read at call time, so we can flip the prefix for this one case and + // restore it afterward. Covers the forward-compat path (fluent-ai adopting + // a versioned mount such as /api/v1) without a code change. + const original = env.FLUENT_AI_API_PREFIX; + const cases: { prefix: string; expected: string }[] = [ + { prefix: '/api/v1', expected: `${env.FLUENT_AI_URL}/api/v1/${TOOL_PATH}` }, + { prefix: 'api/v1', expected: `${env.FLUENT_AI_URL}/api/v1/${TOOL_PATH}` }, + { prefix: '/api/v1/', expected: `${env.FLUENT_AI_URL}/api/v1/${TOOL_PATH}` }, + ]; + + try { + for (const { prefix, expected } of cases) { + env.FLUENT_AI_API_PREFIX = prefix; + const fetchSpy = vi + .spyOn(globalThis, 'fetch') + .mockResolvedValue(jsonResponse(buildEnvelope())); + + await callFluentAi(TOOL_PATH, {}, resultSchema); + + const [url] = fetchSpy.mock.calls[0] as [string, RequestInit]; + expect(url).toBe(expected); + vi.restoreAllMocks(); + } + } finally { + env.FLUENT_AI_API_PREFIX = original; + } + }); }); diff --git a/src/lib/services/fluent-ai/fluent-ai.client.ts b/src/lib/services/fluent-ai/fluent-ai.client.ts index d02e0c1..dc7fdf7 100644 --- a/src/lib/services/fluent-ai/fluent-ai.client.ts +++ b/src/lib/services/fluent-ai/fluent-ai.client.ts @@ -63,11 +63,28 @@ function aiError(code: ErrorCode, detail?: string): Extract, { ok: }; } +/** + * Assemble the absolute fluent-ai request URL from the base URL, the optional + * configurable API prefix, and the per-tool path. + * + * The prefix (`env.FLUENT_AI_API_PREFIX`) lets fluent-api track wherever + * fluent-ai mounts its routers WITHOUT a code change: the live build serves at + * the root (prefix ''), but a future versioned build (e.g. '/api/v1') is a pure + * env flip. Leading/trailing slashes on any segment are normalized so the + * resulting URL never contains an empty (`//`) segment. + */ +function buildToolUrl(toolPath: string): string { + const base = env.FLUENT_AI_URL.replace(/\/+$/, ''); + const prefix = env.FLUENT_AI_API_PREFIX.replace(/^\/+|\/+$/g, ''); + const tool = toolPath.replace(/^\/+/, ''); + return prefix ? `${base}/${prefix}/${tool}` : `${base}/${tool}`; +} + /** * Shared client for calling a fluent-ai tool endpoint. * * Behavior (see §7): - * - POSTs to `${FLUENT_AI_URL}/api/v1/${toolPath}` with `X-API-Key` and a JSON body. + * - POSTs to `${FLUENT_AI_URL}${FLUENT_AI_API_PREFIX}/${toolPath}` with `X-API-Key` and a JSON body. * - Honors a caller-supplied `AbortSignal`, otherwise applies a default 30s timeout. * - On 2xx, parses the body as a `ToolJobResponse` and (only when * `status === 'completed'`) validates `result` against `resultSchema`. @@ -82,7 +99,7 @@ export async function callFluentAi( resultSchema: z.ZodType, options?: CallFluentAiOptions ): Promise>> { - const url = `${env.FLUENT_AI_URL}/api/v1/${toolPath}`; + const url = buildToolUrl(toolPath); const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS; // Use the caller's signal if provided; otherwise derive a timeout signal.