diff --git a/apps/admin/CLAUDE.md b/apps/admin/CLAUDE.md index 3f6a3dbd9..0cb57a43d 100644 --- a/apps/admin/CLAUDE.md +++ b/apps/admin/CLAUDE.md @@ -688,13 +688,11 @@ locale) DO UPDATE SET …`. The `embedding` cast is per-row at the 2. Ensure both S3 env blocks are set on the `forge-admin` Railway service: - `RAILWAY_S3_*` → admin's write bucket - (`cms-storage-jbpuckp0lmqap`, Railway bucket resource - `17368fd5-23e7-45bb-b007-e3f843b3d710`). Used for the coreId - mapping snapshot and any other `admin-migrations/*` writes. + in Railway S3. Used for the coreId mapping snapshot and any + other `admin-migrations/*` writes. - `MANAGER_ARTIFACTS_S3_*` → manager's bucket - (`forgemanagerartifacts-xtgld8`, Railway bucket resource - `b1c705c6-5add-48a0-a153-5ef40f876a4f`). Read-only; - `{assetId}/scene-analysis.json` + `{assetId}/embeddings.json`. + read-only; `{assetId}/scene-analysis.json` + + `{assetId}/embeddings.json`. Also ensure `OPENROUTER_API_KEY` or `OPENAI_API_KEY` is set so admin can re-embed scene descriptions. @@ -1448,7 +1446,7 @@ path and the Cloudflare 524 edge timeout. Per pnpm --filter @forge/admin pull:mapping ``` - Downloads `s3://cms-storage-jbpuckp0lmqap/admin-migrations/core-id-mapping.json` + Downloads `s3:///admin-migrations/core-id-mapping.json` (admin's prod bucket — same key the GraphQL workflow reads in prod) into `apps/admin/.tmp/objects/admin-migrations/core-id-mapping.json`. Admin's `src/storage/s3.ts` `getObject` falls back to that path diff --git a/apps/admin/src/app/watch/demo-keyword-search/algolia-action.test.ts b/apps/admin/src/app/watch/demo-keyword-search/algolia-action.test.ts deleted file mode 100644 index 24df814f3..000000000 --- a/apps/admin/src/app/watch/demo-keyword-search/algolia-action.test.ts +++ /dev/null @@ -1,178 +0,0 @@ -import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" - -// Mock the env singleton — vitest hoists vi.mock before imports, so the -// action under test reads the mocked values when first imported below. -vi.mock("@/config/env", () => ({ - env: { - ALGOLIA_APP_ID: "TESTAPP", - ALGOLIA_SEARCH_API_KEY: "test-key", - ALGOLIA_INDEX: "video-variants-test", - }, -})) - -import { env } from "@/config/env" -import { searchAlgolia } from "./algolia-action" - -const ENV = env as { - ALGOLIA_APP_ID: string | undefined - ALGOLIA_SEARCH_API_KEY: string | undefined - ALGOLIA_INDEX: string | undefined -} - -describe("searchAlgolia", () => { - const fetchMock = vi.fn() - - beforeEach(() => { - fetchMock.mockReset() - vi.stubGlobal("fetch", fetchMock) - ENV.ALGOLIA_APP_ID = "TESTAPP" - ENV.ALGOLIA_SEARCH_API_KEY = "test-key" - ENV.ALGOLIA_INDEX = "video-variants-test" - }) - - afterEach(() => { - vi.unstubAllGlobals() - }) - - function jsonResponse(body: unknown, init: ResponseInit = {}): Response { - return new Response(JSON.stringify(body), { - status: 200, - headers: { "Content-Type": "application/json" }, - ...init, - }) - } - - it("returns shaped hits for a successful Algolia response", async () => { - fetchMock.mockResolvedValueOnce( - jsonResponse({ - hits: [ - { - videoId: "BibleProject", - titles: ["The BibleProject Collection", "Other"], - description: ["A short film collection."], - }, - { - videoId: "JesusFilm", - titles: ["JESUS Film"], - }, - ], - }), - ) - - const result = await searchAlgolia({ - q: "the bible project", - locale: "en", - limit: 5, - }) - - expect(result).toEqual({ - hits: [ - { - videoId: "BibleProject", - title: "The BibleProject Collection", - description: "A short film collection.", - }, - { - videoId: "JesusFilm", - title: "JESUS Film", - description: null, - }, - ], - }) - const [url, init] = fetchMock.mock.calls[0]! - expect(url).toBe( - "https://TESTAPP-dsn.algolia.net/1/indexes/video-variants-test/query", - ) - expect((init as RequestInit).method).toBe("POST") - const headers = (init as RequestInit).headers as Record - expect(headers["X-Algolia-API-Key"]).toBe("test-key") - expect(headers["X-Algolia-Application-Id"]).toBe("TESTAPP") - expect(JSON.parse(String((init as RequestInit).body))).toEqual({ - query: "the bible project", - hitsPerPage: 5, - }) - }) - - it("throws algolia_not_configured when any env var is missing", async () => { - ENV.ALGOLIA_INDEX = undefined - await expect( - searchAlgolia({ q: "x", locale: "en", limit: 5 }), - ).rejects.toThrow("algolia_not_configured") - expect(fetchMock).not.toHaveBeenCalled() - }) - - it("throws algolia_upstream_error on non-2xx response", async () => { - fetchMock.mockResolvedValueOnce(new Response("forbidden", { status: 403 })) - await expect( - searchAlgolia({ q: "x", locale: "en", limit: 5 }), - ).rejects.toThrow("algolia_upstream_error") - }) - - it("throws algolia_upstream_error on fetch network failure", async () => { - fetchMock.mockRejectedValueOnce(new Error("connection refused")) - await expect( - searchAlgolia({ q: "x", locale: "en", limit: 5 }), - ).rejects.toThrow("algolia_upstream_error") - }) - - it("throws algolia_upstream_error on invalid JSON", async () => { - fetchMock.mockResolvedValueOnce( - new Response("", { - status: 200, - headers: { "Content-Type": "application/json" }, - }), - ) - await expect( - searchAlgolia({ q: "x", locale: "en", limit: 5 }), - ).rejects.toThrow("algolia_upstream_error") - }) - - it("clamps limit to MAX_LIMIT (50)", async () => { - fetchMock.mockResolvedValueOnce(jsonResponse({ hits: [] })) - await searchAlgolia({ q: "x", locale: "en", limit: 999 }) - const body = JSON.parse( - String((fetchMock.mock.calls[0]![1] as RequestInit).body), - ) - expect(body.hitsPerPage).toBe(50) - }) - - it("clamps non-positive / non-numeric limit to a sane minimum", async () => { - fetchMock.mockResolvedValueOnce(jsonResponse({ hits: [] })) - await searchAlgolia({ q: "x", locale: "en", limit: 0 }) - const body = JSON.parse( - String((fetchMock.mock.calls[0]![1] as RequestInit).body), - ) - expect(body.hitsPerPage).toBeGreaterThanOrEqual(1) - }) - - it("tolerates hits without titles or description fields", async () => { - fetchMock.mockResolvedValueOnce( - jsonResponse({ - hits: [ - { videoId: "OnlyId" }, - { videoId: "EmptyArrays", titles: [], description: [] }, - ], - }), - ) - const result = await searchAlgolia({ q: "x", locale: "en", limit: 5 }) - expect(result.hits).toEqual([ - { videoId: "OnlyId", title: null, description: null }, - { videoId: "EmptyArrays", title: null, description: null }, - ]) - }) - - it("drops hits without a string videoId", async () => { - fetchMock.mockResolvedValueOnce( - jsonResponse({ - hits: [ - { videoId: 123, titles: ["bad"] }, - { videoId: "Good", titles: ["yes"] }, - ], - }), - ) - const result = await searchAlgolia({ q: "x", locale: "en", limit: 5 }) - expect(result.hits).toEqual([ - { videoId: "Good", title: "yes", description: null }, - ]) - }) -}) diff --git a/apps/admin/src/app/watch/demo-keyword-search/algolia-action.ts b/apps/admin/src/app/watch/demo-keyword-search/algolia-action.ts deleted file mode 100644 index 3858ceb4a..000000000 --- a/apps/admin/src/app/watch/demo-keyword-search/algolia-action.ts +++ /dev/null @@ -1,138 +0,0 @@ -"use server" - -/** - * Throwaway operator harness — server action backing the third - * column of /watch/demo-keyword-search. - * - * Proxies a query to the watch project's Algolia index using - * `ALGOLIA_SEARCH_API_KEY` (the watch project's `ALGOLIA_SERVER_API_KEY` - * value, which is unrestricted; the public `NEXT_PUBLIC_ALGOLIA_API_KEY` - * is referer-locked to the watch domain and cannot be used from - * admin.jesusfilm.org). - * - * Lifetime: this exists only while we refine admin's hybrid + - * keyword-first ranking. At R8 cutover, delete this file, drop the - * Algolia env vars from Doppler / Railway, and remove the third pane - * from `demo-search-client.tsx`. No service layer, no GraphQL surface, - * no REST endpoint — that is the point. - * - * `locale` is accepted for log context / forward compatibility but - * NOT forwarded to Algolia in v1 — the index returns multi-locale - * hits and the demo renders `titles[0]` defensively. - */ - -import { env } from "@/config/env" - -const ALGOLIA_TIMEOUT_MS = 5000 -const MAX_LIMIT = 50 - -export type AlgoliaHit = { - videoId: string - title: string | null - description: string | null -} - -export type AlgoliaSearchResult = { - hits: AlgoliaHit[] -} - -type AlgoliaRawHit = { - videoId?: unknown - titles?: unknown - description?: unknown -} - -type AlgoliaRawResponse = { - hits?: AlgoliaRawHit[] -} - -export async function searchAlgolia(args: { - q: string - locale: string - limit: number -}): Promise { - const appId = env.ALGOLIA_APP_ID - const apiKey = env.ALGOLIA_SEARCH_API_KEY - const index = env.ALGOLIA_INDEX - if (!appId || !apiKey || !index) { - throw new Error("algolia_not_configured") - } - - const hitsPerPage = Math.max( - 1, - Math.min(MAX_LIMIT, Math.floor(Number(args.limit) || 10)), - ) - - const url = `https://${appId}-dsn.algolia.net/1/indexes/${encodeURIComponent(index)}/query` - - let response: Response - try { - response = await fetch(url, { - method: "POST", - headers: { - "X-Algolia-API-Key": apiKey, - "X-Algolia-Application-Id": appId, - "Content-Type": "application/json", - }, - body: JSON.stringify({ query: args.q, hitsPerPage }), - signal: AbortSignal.timeout(ALGOLIA_TIMEOUT_MS), - cache: "no-store", - }) - } catch (error) { - const msg = error instanceof Error ? error.message : String(error) - console.error( - `[demo-search][algolia] fetch failed msg=${sanitize(msg)} q=${sanitize(args.q)}`, - ) - throw new Error("algolia_upstream_error") - } - - if (!response.ok) { - const body = await response.text().catch(() => "") - console.error( - `[demo-search][algolia] upstream error status=${response.status} body=${sanitize(body)} q=${sanitize(args.q)}`, - ) - throw new Error("algolia_upstream_error") - } - - let payload: AlgoliaRawResponse - try { - payload = (await response.json()) as AlgoliaRawResponse - } catch (error) { - const msg = error instanceof Error ? error.message : String(error) - console.error( - `[demo-search][algolia] invalid JSON msg=${sanitize(msg)} q=${sanitize(args.q)}`, - ) - throw new Error("algolia_upstream_error") - } - - const rawHits = Array.isArray(payload.hits) ? payload.hits : [] - const hits: AlgoliaHit[] = [] - for (const raw of rawHits) { - const videoId = typeof raw.videoId === "string" ? raw.videoId : null - if (!videoId) continue - hits.push({ - videoId, - title: pickFirstString(raw.titles), - description: pickFirstString(raw.description), - }) - } - - return { hits } -} - -function pickFirstString(value: unknown): string | null { - if (typeof value === "string") return value - if (Array.isArray(value)) { - for (const v of value) { - if (typeof v === "string" && v.length > 0) return v - } - } - return null -} - -/** Strip CR/LF/TAB from log inputs and clamp length so a malicious or - * just-large value can't pollute structured-log lines. */ -function sanitize(input: string): string { - const stripped = input.replace(/[\r\n\t]/g, " ") - return stripped.length > 200 ? `${stripped.slice(0, 200)}…` : stripped -} diff --git a/apps/admin/src/app/watch/demo-keyword-search/demo-search-client.tsx b/apps/admin/src/app/watch/demo-keyword-search/demo-search-client.tsx index 3200bcab1..9f1f7ff0c 100644 --- a/apps/admin/src/app/watch/demo-keyword-search/demo-search-client.tsx +++ b/apps/admin/src/app/watch/demo-keyword-search/demo-search-client.tsx @@ -9,13 +9,7 @@ import { type SearchResponse, type SearchResult, } from "./search-operation" -import { - buildProvenanceMap, - computeThreeWayDiff, - computeTopKDiff, - type Source, -} from "./diff" -import { searchAlgolia, type AlgoliaHit } from "./algolia-action" +import { buildProvenanceMap, computeTopKDiff, type Source } from "./diff" type ModeKey = "hybrid" | "keyword-first" @@ -25,13 +19,6 @@ type PaneState = | { status: "ok"; response: SearchResponse } | { status: "error"; messages: string[] } -type AlgoliaPaneState = - | { status: "idle" } - | { status: "loading" } - | { status: "ok"; hits: AlgoliaHit[] } - | { status: "not_configured" } - | { status: "error"; messages: string[] } - const DEFAULTS = { q: "", locale: "en", @@ -66,9 +53,6 @@ export function DemoSearchClient() { const [hybridPane, setHybridPane] = useState({ status: "idle" }) const [keywordPane, setKeywordPane] = useState({ status: "idle" }) - const [algoliaPane, setAlgoliaPane] = useState({ - status: "idle", - }) // Fire on URL change (effective query). Empty q skips. useEffect(() => { @@ -78,8 +62,6 @@ export function DemoSearchClient() { setHybridPane({ status: "idle" }) setKeywordPane({ status: "idle" }) - - setAlgoliaPane({ status: "idle" }) return } @@ -89,8 +71,6 @@ export function DemoSearchClient() { setKeywordPane({ status: "loading" }) - setAlgoliaPane({ status: "loading" }) - void Promise.allSettled([ runSearch({ q: trimmed, @@ -104,7 +84,6 @@ export function DemoSearchClient() { limit: urlLimit, mode: "keyword-first", }), - searchAlgolia({ q: trimmed, locale: urlLocale, limit: urlLimit }), ]).then((settled) => { if (cancelled) return setHybridPane( @@ -113,11 +92,6 @@ export function DemoSearchClient() { setKeywordPane( toPaneState(settled[1] as PromiseSettledResult), ) - setAlgoliaPane( - toAlgoliaPaneState( - settled[2] as PromiseSettledResult<{ hits: AlgoliaHit[] }>, - ), - ) }) return () => { @@ -151,8 +125,6 @@ export function DemoSearchClient() { return computeTopKDiff(aIds, bIds, urlK) }, [hybridPane, keywordPane, urlK]) - // 3-way diff is keyed by SLUG (Algolia's `videoId` ≈ admin's `slug`). - // Admin cuids and Algolia videoIds are not directly comparable. const hybridSlugs = useMemo( () => hybridPane.status === "ok" @@ -167,18 +139,9 @@ export function DemoSearchClient() { : [], [keywordPane], ) - const algoliaSlugs = useMemo( - () => - algoliaPane.status === "ok" ? algoliaPane.hits.map((h) => h.videoId) : [], - [algoliaPane], - ) - const triDiff = useMemo( - () => computeThreeWayDiff(hybridSlugs, keywordSlugs, algoliaSlugs, urlK), - [hybridSlugs, keywordSlugs, algoliaSlugs, urlK], - ) const provenance = useMemo( - () => buildProvenanceMap(hybridSlugs, keywordSlugs, algoliaSlugs, urlK), - [hybridSlugs, keywordSlugs, algoliaSlugs, urlK], + () => buildProvenanceMap(hybridSlugs, keywordSlugs, urlK), + [hybridSlugs, keywordSlugs, urlK], ) const rowAccent = useMemo(() => buildRowAccentMap(diff), [diff]) @@ -267,12 +230,10 @@ export function DemoSearchClient() { keywordPane={keywordPane} /> - -
r.slug} /> -
) @@ -324,20 +284,6 @@ function toPaneState(settled: PromiseSettledResult): PaneState { return { status: "error", messages: message.split("; ") } } -function toAlgoliaPaneState( - settled: PromiseSettledResult<{ hits: AlgoliaHit[] }>, -): AlgoliaPaneState { - if (settled.status === "fulfilled") { - return { status: "ok", hits: settled.value.hits } - } - const reason = settled.reason - const message = reason instanceof Error ? reason.message : String(reason) - if (message === "algolia_not_configured") { - return { status: "not_configured" } - } - return { status: "error", messages: message.split("; ") } -} - // --------------------------------------------------------------------------- // Diff panel // --------------------------------------------------------------------------- @@ -696,205 +642,6 @@ function ResultTable({ ) } -// --------------------------------------------------------------------------- -// 3-way diff panel + Algolia pane -// --------------------------------------------------------------------------- - -function AlgoliaDiffPanel({ - diff, - k, - algoliaPane, -}: { - diff: { - inAll: string[] - hybridAlgolia: string[] - keywordAlgolia: string[] - algoliaOnly: string[] - } - k: number - algoliaPane: AlgoliaPaneState -}) { - const haveData = algoliaPane.status === "ok" - return ( -
- - - - -
- ) -} - -function AlgoliaPane({ - state, - provenance, -}: { - state: AlgoliaPaneState - provenance: Map> -}) { - return ( -
-

- algolia (watch stg) -

- -
- ) -} - -function AlgoliaPaneBody({ - state, - provenance, -}: { - state: AlgoliaPaneState - provenance: Map> -}) { - if (state.status === "idle") { - return Enter a query above and submit to canary all sources. - } - if (state.status === "loading") { - return Loading… - } - if (state.status === "not_configured") { - return ( - - Algolia not configured for this environment. Set{" "} - ALGOLIA_APP_ID, ALGOLIA_SEARCH_API_KEY, and{" "} - ALGOLIA_INDEX on the admin Railway service. - - ) - } - if (state.status === "error") { - return ( - - Algolia upstream error. -
    - {state.messages.map((m, i) => ( -
  • {m}
  • - ))} -
-
- ) - } - - const hits = state.hits - - return ( -
-
- - source: video-variants-stg - - - results: {hits.length} - -
- - Throwaway parity column — Algolia stg index, plain query, no locale - filter, no facets. Removed at R8 cutover. - - {hits.length === 0 ? ( - No results. - ) : ( - - - - - - - - - - {hits.map((h, i) => { - const otherSources = otherSourcesFor(provenance, h.videoId, "A") - return ( - - - - - - ) - })} - -
#id / titlealso in
{i + 1} -
- {h.title || "(no title)"} -
-
- slug:{truncateId(h.videoId)} -
-
- -
- )} -
- ) -} - function ProvenanceChips({ sources }: { sources: Source[] }) { if (sources.length === 0) { return @@ -903,7 +650,6 @@ function ProvenanceChips({ sources }: { sources: Source[] }) { { H: { bg: "#fdeede", border: "#9a4400", color: "#9a4400" }, K: { bg: "#e6efff", border: "#0a4a99", color: "#0a4a99" }, - A: { bg: "#fbecd5", border: "#9a5a00", color: "#9a5a00" }, } return (
@@ -913,11 +659,7 @@ function ProvenanceChips({ sources }: { sources: Source[] }) { { it("returns full overlap when inputs are identical", () => { @@ -93,108 +89,23 @@ describe("computeTopKDiff", () => { }) }) -describe("computeThreeWayDiff", () => { - const empty = { - inAll: [], - hybridKeyword: [], - hybridAlgolia: [], - keywordAlgolia: [], - hybridOnly: [], - keywordOnly: [], - algoliaOnly: [], - } - - it("places identical inputs entirely in inAll", () => { - const ids = ["a", "b", "c"] - expect(computeThreeWayDiff(ids, ids, ids, 10)).toEqual({ - ...empty, - inAll: ["a", "b", "c"], - }) - }) - - it("classifies disjoint inputs into per-source-only buckets", () => { - expect(computeThreeWayDiff(["a"], ["b"], ["c"], 10)).toEqual({ - ...empty, - hybridOnly: ["a"], - keywordOnly: ["b"], - algoliaOnly: ["c"], - }) - }) - - it("classifies pairwise overlaps without leaking into inAll", () => { - // a: H+K, b: H+A, c: K+A, d: only H, e: only K, f: only A - expect( - computeThreeWayDiff( - ["a", "b", "d"], - ["a", "c", "e"], - ["b", "c", "f"], - 10, - ), - ).toEqual({ - inAll: [], - hybridKeyword: ["a"], - hybridAlgolia: ["b"], - keywordAlgolia: ["c"], - hybridOnly: ["d"], - keywordOnly: ["e"], - algoliaOnly: ["f"], - }) - }) - - it("respects per-source top-k truncation independently", () => { - // k=2 — "z" only appears via hybrid index 2 + algolia index 2, - // both truncated. So z drops out entirely. - expect( - computeThreeWayDiff(["a", "b", "z"], ["a"], ["c", "d", "z"], 2), - ).toEqual({ - ...empty, - hybridKeyword: ["a"], - hybridOnly: ["b"], - algoliaOnly: ["c", "d"], - }) - }) - - it("dedupes within each source (first occurrence wins)", () => { - expect( - computeThreeWayDiff(["a", "a", "b"], ["b", "b"], ["a", "c"], 10), - ).toEqual({ - inAll: [], - hybridKeyword: ["b"], - hybridAlgolia: ["a"], - keywordAlgolia: [], - hybridOnly: [], - keywordOnly: [], - algoliaOnly: ["c"], - }) - }) - - it("returns all empty buckets for k <= 0", () => { - expect(computeThreeWayDiff(["a"], ["b"], ["c"], 0)).toEqual(empty) - expect(computeThreeWayDiff(["a"], ["b"], ["c"], -3)).toEqual(empty) - }) - - it("handles empty inputs without throwing", () => { - expect(computeThreeWayDiff([], [], [], 5)).toEqual(empty) - }) -}) - describe("buildProvenanceMap", () => { it("records source membership per id within top-k", () => { - const map = buildProvenanceMap(["a", "b"], ["a", "c"], ["b", "c"], 10) + const map = buildProvenanceMap(["a", "b"], ["a", "c"], 10) expect(Array.from(map.get("a") ?? [])).toEqual(["H", "K"]) - expect(Array.from(map.get("b") ?? [])).toEqual(["H", "A"]) - expect(Array.from(map.get("c") ?? [])).toEqual(["K", "A"]) + expect(Array.from(map.get("b") ?? [])).toEqual(["H"]) + expect(Array.from(map.get("c") ?? [])).toEqual(["K"]) }) it("respects k truncation per source", () => { - const map = buildProvenanceMap(["a", "b"], ["b"], ["a"], 1) + const map = buildProvenanceMap(["a", "b"], ["b"], 1) // Only the first id of each source counts at k=1: - // hybrid -> a, keyword -> b, algolia -> a - expect(Array.from(map.get("a") ?? [])).toEqual(["H", "A"]) + // hybrid -> a, keyword -> b + expect(Array.from(map.get("a") ?? [])).toEqual(["H"]) expect(Array.from(map.get("b") ?? [])).toEqual(["K"]) }) it("returns an empty map for k <= 0", () => { - expect(buildProvenanceMap(["a"], ["b"], ["c"], 0).size).toBe(0) + expect(buildProvenanceMap(["a"], ["b"], 0).size).toBe(0) }) }) diff --git a/apps/admin/src/app/watch/demo-keyword-search/diff.ts b/apps/admin/src/app/watch/demo-keyword-search/diff.ts index 3fe0d414a..e9fd4150f 100644 --- a/apps/admin/src/app/watch/demo-keyword-search/diff.ts +++ b/apps/admin/src/app/watch/demo-keyword-search/diff.ts @@ -42,103 +42,15 @@ export function computeTopKDiff( } /** - * 3-way overlap variant. Operates on the same {first-k, dedupe-first} - * semantics as `computeTopKDiff` but partitions the union of three - * ordered id lists into 7 buckets: - * - * - `inAll` — present in all three - * - `hybridKeyword` — hybrid + keyword-first only - * - `hybridAlgolia` — hybrid + algolia only - * - `keywordAlgolia` — keyword-first + algolia only - * - `hybridOnly` — hybrid alone - * - `keywordOnly` — keyword-first alone - * - `algoliaOnly` — algolia alone - * - * Bucket order preserves the order of the source the id was first seen - * in (hybrid, then keyword, then algolia). - * - * The 3-way diff in the demo route compares by SLUG, not cuid, because - * Algolia hits don't carry admin's cuid id — `videoId` on the Algolia - * hit is the same shape as admin's `SearchResult.slug`. - */ -export type ThreeWayDiff = { - inAll: string[] - hybridKeyword: string[] - hybridAlgolia: string[] - keywordAlgolia: string[] - hybridOnly: string[] - keywordOnly: string[] - algoliaOnly: string[] -} - -export function computeThreeWayDiff( - hybrid: readonly string[], - keyword: readonly string[], - algolia: readonly string[], - k: number, -): ThreeWayDiff { - const empty: ThreeWayDiff = { - inAll: [], - hybridKeyword: [], - hybridAlgolia: [], - keywordAlgolia: [], - hybridOnly: [], - keywordOnly: [], - algoliaOnly: [], - } - if (k <= 0) return empty - - const h = dedupeFirst(hybrid.slice(0, k)) - const kk = dedupeFirst(keyword.slice(0, k)) - const a = dedupeFirst(algolia.slice(0, k)) - const hSet = new Set(h) - const kSet = new Set(kk) - const aSet = new Set(a) - - const out: ThreeWayDiff = { - inAll: [], - hybridKeyword: [], - hybridAlgolia: [], - keywordAlgolia: [], - hybridOnly: [], - keywordOnly: [], - algoliaOnly: [], - } - const seen = new Set() - - const classify = (id: string): void => { - if (seen.has(id)) return - seen.add(id) - const inH = hSet.has(id) - const inK = kSet.has(id) - const inA = aSet.has(id) - if (inH && inK && inA) out.inAll.push(id) - else if (inH && inK) out.hybridKeyword.push(id) - else if (inH && inA) out.hybridAlgolia.push(id) - else if (inK && inA) out.keywordAlgolia.push(id) - else if (inH) out.hybridOnly.push(id) - else if (inK) out.keywordOnly.push(id) - else if (inA) out.algoliaOnly.push(id) - } - - for (const id of h) classify(id) - for (const id of kk) classify(id) - for (const id of a) classify(id) - - return out -} - -/** - * Per-id provenance map: which of {H, K, A} contains each id within + * Per-id provenance map: which of {H, K} contains each id within * its top-K. Used by the demo route to render "also in" badges on * each result row without re-traversing arrays. */ -export type Source = "H" | "K" | "A" +export type Source = "H" | "K" export function buildProvenanceMap( hybrid: readonly string[], keyword: readonly string[], - algolia: readonly string[], k: number, ): Map> { const map = new Map>() @@ -153,7 +65,6 @@ export function buildProvenanceMap( } add(hybrid, "H") add(keyword, "K") - add(algolia, "A") return map } diff --git a/apps/admin/src/config/env.ts b/apps/admin/src/config/env.ts index b4f08c575..680c98c8b 100644 --- a/apps/admin/src/config/env.ts +++ b/apps/admin/src/config/env.ts @@ -107,7 +107,7 @@ export const env = createEnv({ // Manager artifacts bucket — admin reads {assetId}/scene-analysis.json // and {assetId}/embeddings.json from apps/manager's S3 bucket via // readManagerArtifact() in src/storage/s3.ts. Distinct from - // RAILWAY_S3_*, which is admin's own write bucket (cms-storage, + // RAILWAY_S3_*, which is admin's own write bucket, // used for admin-migrations/core-id-mapping.json etc.). Read-only // at the code layer: src/storage/s3.ts intentionally exposes no // writeManagerArtifact helper. @@ -133,19 +133,6 @@ export const env = createEnv({ MANAGER_API_BASE_URL: z.string().url().optional(), MANAGER_TRIGGER_API_KEY: z.string().min(1).optional(), NEXT_RUNTIME: z.enum(["nodejs", "edge"]).optional(), - // Algolia (watch-project parity demo column on /watch/demo-keyword-search). - // Server-side only — the demo route's `searchAlgolia` server action - // (`apps/admin/src/app/watch/demo-keyword-search/algolia-action.ts`) - // proxies queries using ALGOLIA_SEARCH_API_KEY (the watch project's - // ALGOLIA_SERVER_API_KEY value, which is unrestricted; the public - // NEXT_PUBLIC_ALGOLIA_API_KEY is referer-locked to the watch domain - // and cannot be used from admin.jesusfilm.org). All three optional — - // the action throws `algolia_not_configured` when any is absent and - // the demo client renders a muted "Algolia disabled" banner. - // Throwaway: removed at R8 cutover when admin replaces Algolia. - ALGOLIA_APP_ID: z.string().min(1).optional(), - ALGOLIA_SEARCH_API_KEY: z.string().min(1).optional(), - ALGOLIA_INDEX: z.string().min(1).optional(), NODE_ENV: z.enum(["development", "test", "production"]).optional(), }, client: { @@ -248,11 +235,6 @@ export const env = createEnv({ process.env.MANAGER_TRIGGER_API_KEY, ), NEXT_RUNTIME: emptyToUndefined(process.env.NEXT_RUNTIME), - ALGOLIA_APP_ID: emptyToUndefined(process.env.ALGOLIA_APP_ID), - ALGOLIA_SEARCH_API_KEY: emptyToUndefined( - process.env.ALGOLIA_SEARCH_API_KEY, - ), - ALGOLIA_INDEX: emptyToUndefined(process.env.ALGOLIA_INDEX), NODE_ENV: emptyToUndefined(process.env.NODE_ENV), NEXT_PUBLIC_APP_NAME: process.env.NEXT_PUBLIC_APP_NAME, }, diff --git a/apps/admin/src/scripts/pull-mapping-from-prod.ts b/apps/admin/src/scripts/pull-mapping-from-prod.ts index 6deafc65f..621ae1f73 100644 --- a/apps/admin/src/scripts/pull-mapping-from-prod.ts +++ b/apps/admin/src/scripts/pull-mapping-from-prod.ts @@ -13,7 +13,7 @@ * * # Override defaults: * pnpm --filter @forge/admin pull:mapping \ - * --bucket=cms-storage-jbpuckp0lmqap \ + * --bucket= \ * --key=admin-migrations/core-id-mapping.json \ * --endpoint=https://t3.storageapi.dev \ * --region=sjc \ @@ -27,7 +27,7 @@ * shape. * * NOT run against prod write paths. Read-only download. The bucket - * defaults to admin's prod bucket because that's where + * defaults to `RAILWAY_S3_BUCKET` because that's where * `refresh:core-id-mapping` uploads the canonical snapshot — the * operator who refreshed it from cms PG most-recently is the source * of truth. @@ -38,7 +38,6 @@ import { dirname, join, resolve as resolvePath } from "node:path" import { DEFAULT_CORE_ID_MAPPING_S3_KEY } from "@/services/core-id-mapping.constants" -const DEFAULT_BUCKET = "cms-storage-jbpuckp0lmqap" const DEFAULT_ENDPOINT = "https://t3.storageapi.dev" const DEFAULT_REGION = "sjc" @@ -106,10 +105,16 @@ export async function downloadMapping(args: PullMappingArgs): Promise { } export async function main(): Promise { - const bucket = parseArg("bucket", DEFAULT_BUCKET) + const bucket = parseArg("bucket", process.env.RAILWAY_S3_BUCKET ?? "") const key = parseArg("key", DEFAULT_CORE_ID_MAPPING_S3_KEY) - const endpoint = parseArg("endpoint", DEFAULT_ENDPOINT) - const region = parseArg("region", DEFAULT_REGION) + const endpoint = parseArg( + "endpoint", + process.env.RAILWAY_S3_ENDPOINT ?? DEFAULT_ENDPOINT, + ) + const region = parseArg( + "region", + process.env.RAILWAY_S3_REGION ?? DEFAULT_REGION, + ) // Default output path resolves relative to the operator's CWD; // `pnpm --filter @forge/admin` runs the script from // `apps/admin`, so the default `.tmp/...` lands inside admin's @@ -122,6 +127,12 @@ export async function main(): Promise { const accessKeyId = process.env.RAILWAY_S3_ACCESS_KEY_ID const secretAccessKey = process.env.RAILWAY_S3_SECRET_ACCESS_KEY + if (!bucket) { + process.stderr.write( + "[pull-mapping] RAILWAY_S3_BUCKET is required unless --bucket is provided\n", + ) + process.exit(2) + } if (!accessKeyId || !secretAccessKey) { process.stderr.write( "[pull-mapping] RAILWAY_S3_ACCESS_KEY_ID and RAILWAY_S3_SECRET_ACCESS_KEY are required\n", diff --git a/apps/admin/src/scripts/refresh-core-id-mapping.ts b/apps/admin/src/scripts/refresh-core-id-mapping.ts index c89a0f76c..3cb519efd 100644 --- a/apps/admin/src/scripts/refresh-core-id-mapping.ts +++ b/apps/admin/src/scripts/refresh-core-id-mapping.ts @@ -2,7 +2,7 @@ * Refresh the admin coreId → cms video id mapping snapshot. * * Dumps from cms via `pnpm --filter @forge/cms dump:core-id-mapping` and - * uploads the resulting JSON to the shared Railway S3 bucket at + * uploads the resulting JSON to Railway S3 at * `admin-migrations/core-id-mapping.json`. That key is the default * consumed by `triggerSceneEmbeddingBackfill` (and future admin-migration * mutations). @@ -13,7 +13,7 @@ * Env: * RAILWAY_S3_BUCKET, RAILWAY_S3_ENDPOINT, RAILWAY_S3_REGION, * RAILWAY_S3_ACCESS_KEY_ID, RAILWAY_S3_SECRET_ACCESS_KEY must point - * at the shared bucket. Local fallback (no bucket) writes to + * at Railway S3. Local fallback (no bucket) writes to * `apps/admin/.tmp/objects/admin-migrations/core-id-mapping.json`. * * The cms dump inherits apps/cms/.env for its DATABASE_URL — point diff --git a/apps/admin/src/services/core-id-mapping.constants.ts b/apps/admin/src/services/core-id-mapping.constants.ts index a273c7aa2..637212a17 100644 --- a/apps/admin/src/services/core-id-mapping.constants.ts +++ b/apps/admin/src/services/core-id-mapping.constants.ts @@ -19,9 +19,8 @@ export const DEFAULT_CORE_ID_MAPPING_S3_KEY = /** * Any S3 key handed to the mutation must live under this prefix. The - * bucket is shared across services (manager writes - * `{assetId}/scene-analysis.json` etc.); confining ADMIN-supplied keys - * to the admin namespace stops a compromised ADMIN session from using - * the mutation to enumerate other apps' objects via error-code timing. + * bucket is dedicated to admin artifacts; confining ADMIN-supplied keys + * to the migration namespace stops a compromised ADMIN session from using + * the mutation to probe unrelated admin objects via error-code timing. */ export const ADMIN_MIGRATIONS_S3_PREFIX = "admin-migrations/" diff --git a/apps/admin/src/services/hybrid-search-keyword-first-retrievers.ts b/apps/admin/src/services/hybrid-search-keyword-first-retrievers.ts index 6cc9f641d..5401fc26e 100644 --- a/apps/admin/src/services/hybrid-search-keyword-first-retrievers.ts +++ b/apps/admin/src/services/hybrid-search-keyword-first-retrievers.ts @@ -156,7 +156,7 @@ export function tokenizeForExactTitle(query: string): string[] { * Phrase-aware, per-field weighted full-text retrieval. * * `websearch_to_tsquery('simple', ?)` accepts user-typed double-quotes - * as exact phrases (Algolia-like). Ranking uses `ts_rank_cd` against + * as exact phrases (lexical search style). Ranking uses `ts_rank_cd` against * the per-field weighted tsvector * `(setweight(vl.title_tsv,'A') || setweight(vl.description_tsv,'B'))` * so a query word in the title outranks the same word in the @@ -306,7 +306,7 @@ export async function searchByTrigram( * Wired into RRF as the 4th list in keyword-first mode. Together with * `searchByKeywordWeighted` and `searchByTrigram`, it produces the * "every query token must appear in the most-important attribute" - * Algolia-like behavior the keyword-first plan calls for. + * lexical phrase behavior the keyword-first plan calls for. * * Token count is capped at `MAX_EXACT_TITLE_TOKENS` (16) — see * `tokenizeForExactTitle`. Empty / whitespace-only / all-punctuation diff --git a/apps/admin/src/services/hybrid-search.service.ts b/apps/admin/src/services/hybrid-search.service.ts index 905762941..25e8f4344 100644 --- a/apps/admin/src/services/hybrid-search.service.ts +++ b/apps/admin/src/services/hybrid-search.service.ts @@ -385,7 +385,7 @@ export class HybridSearchService { if (pipelineMode === "keyword-first") { // Three-list lexical stack: phrase-aware weighted tsvector, // typo-tolerant trigram on title, and exact-token-in-title - // (Algolia-like). The legacy R4 `searchVideoKeyword` is NOT + // (lexical search style). The legacy R4 `searchVideoKeyword` is NOT // dispatched on this branch — its concatenated tsvector is // strictly weaker than the weighted one for this workload. retrievals.push({ diff --git a/apps/admin/src/storage/s3.manager-artifacts-backend.test.ts b/apps/admin/src/storage/s3.manager-artifacts-backend.test.ts index e5128f82b..60556b162 100644 --- a/apps/admin/src/storage/s3.manager-artifacts-backend.test.ts +++ b/apps/admin/src/storage/s3.manager-artifacts-backend.test.ts @@ -16,7 +16,7 @@ * The key assertion: this helper resolves Bucket/creds from the * MANAGER_ARTIFACTS_S3_* env block — NOT from RAILWAY_S3_*. This * locks in the two-bucket separation introduced when admin moved its - * artifact reads off the cms-storage bucket and onto manager's bucket. + * artifact reads off admin's own artifacts bucket and onto manager's bucket. */ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest" diff --git a/apps/admin/src/storage/s3.ts b/apps/admin/src/storage/s3.ts index 40d2edf6f..436133624 100644 --- a/apps/admin/src/storage/s3.ts +++ b/apps/admin/src/storage/s3.ts @@ -214,8 +214,8 @@ export async function readArtifact( // Admin's R1 (scene embeddings) and R2 (transcript embeddings) backfills // re-index `{assetId}/scene-analysis.json` and `{assetId}/embeddings.json` // produced by apps/manager. Those artifacts live in manager's own -// Railway bucket, NOT admin's RAILWAY_S3_* (cms-storage) bucket — admin's -// reads must be routed there. +// Railway bucket, NOT admin's RAILWAY_S3_* bucket — admin's reads +// must be routed there. // // Distinct env block (MANAGER_ARTIFACTS_S3_*) so admin's writes (which // continue to land in RAILWAY_S3_BUCKET) never mix with manager's bucket. diff --git a/docs/roadmap/platform/feat-104-admin-railway-provisioning.md b/docs/roadmap/platform/feat-104-admin-railway-provisioning.md index 475718e40..aae027a3b 100644 --- a/docs/roadmap/platform/feat-104-admin-railway-provisioning.md +++ b/docs/roadmap/platform/feat-104-admin-railway-provisioning.md @@ -81,7 +81,7 @@ deploy lands in production. | `REDIS_HOST` / `REDIS_PORT` / `REDIS_PASSWORD` | Optional for R1; wire when Redis plugin lands | | `CORE_API_URL` | `https://api-gateway.central.jesusfilm.org/` | | `CORE_API_TOKEN` | Pull from Doppler `forge-admin` if present; skip otherwise | -| `RAILWAY_S3_*` | Copy from `@forge/cms` or `@forge/manager` (shared bucket) | +| `RAILWAY_S3_*` | Railway S3-compatible object storage for admin artifacts | | `GRAPHQL_INTROSPECTION_ENABLED` | Leave unset in production (defaults off) | | `NODE_ENV` | `production` | | `HOSTNAME` | Set to `0.0.0.0` in railway.toml startCommand — no env var needed | @@ -122,7 +122,7 @@ guidance. ### R1 smoke test -1. Refresh the coreId mapping into the shared Railway S3 bucket: +1. Refresh the coreId mapping into Railway S3: ``` pnpm --filter @forge/admin refresh:core-id-mapping ```