From 087f8b27f9d92931eae00e369148fb0cb8d33e56 Mon Sep 17 00:00:00 2001 From: Tiffany Wei Date: Sat, 28 Feb 2026 16:28:58 -0500 Subject: [PATCH 01/20] Provider abstraction and Tavily extraction --- .../tools/trend-search/providers/registry.ts | 8 +++ .../tools/trend-search/providers/serper.ts | 6 ++ .../tools/trend-search/providers/tavily.ts | 62 +++++++++++++++++++ src/lib/tools/trend-search/providers/types.ts | 17 +++++ src/lib/tools/trend-search/web-search.ts | 61 +----------------- 5 files changed, 94 insertions(+), 60 deletions(-) create mode 100644 src/lib/tools/trend-search/providers/registry.ts create mode 100644 src/lib/tools/trend-search/providers/serper.ts create mode 100644 src/lib/tools/trend-search/providers/tavily.ts create mode 100644 src/lib/tools/trend-search/providers/types.ts diff --git a/src/lib/tools/trend-search/providers/registry.ts b/src/lib/tools/trend-search/providers/registry.ts new file mode 100644 index 00000000..09505a67 --- /dev/null +++ b/src/lib/tools/trend-search/providers/registry.ts @@ -0,0 +1,8 @@ +import type { SearchProviderFn } from "./types"; +import { callTavily } from "./tavily"; +import { callSerper } from "./serper"; // placeholder; full impl in task 2.1 + +export const providerRegistry: Record = { + tavily: callTavily, + serper: callSerper, +}; diff --git a/src/lib/tools/trend-search/providers/serper.ts b/src/lib/tools/trend-search/providers/serper.ts new file mode 100644 index 00000000..07dbba1f --- /dev/null +++ b/src/lib/tools/trend-search/providers/serper.ts @@ -0,0 +1,6 @@ +import type { RawSearchResult } from "~/lib/tools/trend-search/types"; + +/** Placeholder: to be implemented in task 2.1. */ +export async function callSerper(_query: string): Promise { + return []; +} diff --git a/src/lib/tools/trend-search/providers/tavily.ts b/src/lib/tools/trend-search/providers/tavily.ts new file mode 100644 index 00000000..44d0b75a --- /dev/null +++ b/src/lib/tools/trend-search/providers/tavily.ts @@ -0,0 +1,62 @@ +import { env } from "~/env"; +import type { RawSearchResult } from "~/lib/tools/trend-search/types"; + +const TAVILY_SEARCH_URL = "https://api.tavily.com/search"; +const MAX_RESULTS_PER_QUERY = 10; + +/** Response shape from Tavily /search API (subset we use). Note: published_date is not in the current Tavily spec; we accept it if present. */ +interface TavilyResultItem { + title?: string; + url?: string; + content?: string; + score?: number; + published_date?: string; +} + +interface TavilySearchResponse { + results?: TavilyResultItem[]; +} + +/** + * Calls Tavily search API for a single query with advanced depth and news topic. + * @returns RawSearchResult[] or empty array on missing key / parse failure + */ +export async function callTavily(query: string): Promise { + const apiKey = env.server.TAVILY_API_KEY; + if (!apiKey) { + console.warn("[web-search] TAVILY_API_KEY not set; skipping Tavily search."); + return []; + } + + const response = await fetch(TAVILY_SEARCH_URL, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + api_key: apiKey, + query, + search_depth: "advanced", + topic: "news", + max_results: MAX_RESULTS_PER_QUERY, + }), + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`Tavily API error: ${response.status} ${response.statusText} - ${text}`); + } + + const data = (await response.json()) as TavilySearchResponse; + if (!data.results || !Array.isArray(data.results)) { + return []; + } + + return data.results + .filter((item): item is TavilyResultItem & { url: string } => Boolean(item?.url)) + .map((item) => ({ + url: item.url, + title: item.title ?? "Untitled", + content: item.content ?? "", + score: typeof item.score === "number" ? item.score : 0, + ...(item.published_date != null && { publishedDate: item.published_date }), + })); +} diff --git a/src/lib/tools/trend-search/providers/types.ts b/src/lib/tools/trend-search/providers/types.ts new file mode 100644 index 00000000..21e9df5b --- /dev/null +++ b/src/lib/tools/trend-search/providers/types.ts @@ -0,0 +1,17 @@ +import type { RawSearchResult } from "~/lib/tools/trend-search/types"; + +/** A search provider function: takes a query string, returns normalized results. */ +export type SearchProviderFn = (query: string) => Promise; + +/** The supported provider strategy names. */ +export type ProviderStrategy = + | "tavily" + | "serper" + | "fallback" + | "parallel"; + +/** Extended result from executeSearch, includes which provider was used. */ +export interface SearchExecutionResult { + results: RawSearchResult[]; + providerUsed: string; +} diff --git a/src/lib/tools/trend-search/web-search.ts b/src/lib/tools/trend-search/web-search.ts index 0275e5f2..1629e2a0 100644 --- a/src/lib/tools/trend-search/web-search.ts +++ b/src/lib/tools/trend-search/web-search.ts @@ -1,8 +1,6 @@ -import { env } from "~/env"; +import { callTavily } from "~/lib/tools/trend-search/providers/tavily"; import type { PlannedQuery, RawSearchResult } from "~/lib/tools/trend-search/types"; -const TAVILY_SEARCH_URL = "https://api.tavily.com/search"; -const MAX_RESULTS_PER_QUERY = 10; const MAX_RETRIES = 2; /** Normalize URL for deduplication; falls back to trim if invalid. */ @@ -14,63 +12,6 @@ function normalizeUrl(url: string): string { } } -/** Response shape from Tavily /search API (subset we use). Note: published_date is not in the current Tavily spec; we accept it if present. */ -interface TavilyResultItem { - title?: string; - url?: string; - content?: string; - score?: number; - published_date?: string; -} - -interface TavilySearchResponse { - results?: TavilyResultItem[]; -} - -/** - * Calls Tavily search API for a single query with advanced depth and news topic. - * @returns RawSearchResult[] or empty array on missing key / parse failure - */ -async function callTavily(query: string): Promise { - const apiKey = env.server.TAVILY_API_KEY; - if (!apiKey) { - console.warn("[web-search] TAVILY_API_KEY not set; skipping Tavily search."); - return []; - } - - const response = await fetch(TAVILY_SEARCH_URL, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - api_key: apiKey, - query, - search_depth: "advanced", - topic: "news", - max_results: MAX_RESULTS_PER_QUERY, - }), - }); - - if (!response.ok) { - const text = await response.text(); - throw new Error(`Tavily API error: ${response.status} ${response.statusText} - ${text}`); - } - - const data = (await response.json()) as TavilySearchResponse; - if (!data.results || !Array.isArray(data.results)) { - return []; - } - - return data.results - .filter((item): item is TavilyResultItem & { url: string } => Boolean(item?.url)) - .map((item) => ({ - url: item.url, - title: item.title ?? "Untitled", - content: item.content ?? "", - score: typeof item.score === "number" ? item.score : 0, - ...(item.published_date != null && { publishedDate: item.published_date }), - })); -} - /** * Executes sub-queries against Tavily and returns combined, deduplicated raw results for synthesis. * From 88fbb35095407046d2f019ca86ba9351c47bcdd2 Mon Sep 17 00:00:00 2001 From: Tiffany Wei Date: Sat, 28 Feb 2026 17:08:57 -0500 Subject: [PATCH 02/20] Serper adapter --- src/env.ts | 7 ++ .../tools/trend-search/providers/registry.ts | 2 +- .../tools/trend-search/providers/serper.ts | 68 ++++++++++++++++++- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/src/env.ts b/src/env.ts index c1476d68..b3a27055 100644 --- a/src/env.ts +++ b/src/env.ts @@ -16,6 +16,11 @@ const serverSchema = z.object({ UPLOADTHING_TOKEN: optionalString(), DATALAB_API_KEY: optionalString(), TAVILY_API_KEY: optionalString(), + SERPER_API_KEY: optionalString(), + SEARCH_PROVIDER: z + .enum(["tavily", "serper", "fallback", "parallel"]) + .default("tavily") + .optional(), // Azure Document Intelligence (for OCR pipeline) AZURE_DOC_INTELLIGENCE_ENDPOINT: optionalString(), AZURE_DOC_INTELLIGENCE_KEY: optionalString(), @@ -73,6 +78,8 @@ function parseServerEnv() { UPLOADTHING_TOKEN: process.env.UPLOADTHING_TOKEN, DATALAB_API_KEY: process.env.DATALAB_API_KEY, TAVILY_API_KEY: process.env.TAVILY_API_KEY, + SERPER_API_KEY: process.env.SERPER_API_KEY, + SEARCH_PROVIDER: process.env.SEARCH_PROVIDER as "tavily" | "serper" | "fallback" | "parallel" | undefined, AZURE_DOC_INTELLIGENCE_ENDPOINT: process.env.AZURE_DOC_INTELLIGENCE_ENDPOINT, AZURE_DOC_INTELLIGENCE_KEY: process.env.AZURE_DOC_INTELLIGENCE_KEY, LANDING_AI_API_KEY: process.env.LANDING_AI_API_KEY, diff --git a/src/lib/tools/trend-search/providers/registry.ts b/src/lib/tools/trend-search/providers/registry.ts index 09505a67..cdc7f8a9 100644 --- a/src/lib/tools/trend-search/providers/registry.ts +++ b/src/lib/tools/trend-search/providers/registry.ts @@ -1,6 +1,6 @@ import type { SearchProviderFn } from "./types"; import { callTavily } from "./tavily"; -import { callSerper } from "./serper"; // placeholder; full impl in task 2.1 +import { callSerper } from "./serper"; export const providerRegistry: Record = { tavily: callTavily, diff --git a/src/lib/tools/trend-search/providers/serper.ts b/src/lib/tools/trend-search/providers/serper.ts index 07dbba1f..b3696f87 100644 --- a/src/lib/tools/trend-search/providers/serper.ts +++ b/src/lib/tools/trend-search/providers/serper.ts @@ -1,6 +1,68 @@ +import { env } from "~/env"; import type { RawSearchResult } from "~/lib/tools/trend-search/types"; -/** Placeholder: to be implemented in task 2.1. */ -export async function callSerper(_query: string): Promise { - return []; +const SERPER_NEWS_URL = "https://google.serper.dev/news"; +const MAX_RESULTS_PER_QUERY = 10; + +/** Response shape from Serper Google News API (subset we use). */ +interface SerperNewsItem { + title?: string; + link?: string; + snippet?: string; + date?: string; + source?: string; + position?: number; +} + +interface SerperNewsResponse { + news?: SerperNewsItem[]; +} + +/** + * Calls Serper.dev Google News API for a single query. + * @returns RawSearchResult[] or empty array if SERPER_API_KEY not set; throws on non-2xx. + */ +export async function callSerper(query: string): Promise { + const apiKey = env.server.SERPER_API_KEY; + if (!apiKey) { + console.warn("[web-search] SERPER_API_KEY not set; skipping Serper search."); + return []; + } + + const response = await fetch(SERPER_NEWS_URL, { + method: "POST", + headers: { + "X-API-KEY": apiKey, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + q: query, + num: MAX_RESULTS_PER_QUERY, + }), + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error(`Serper API error: ${response.status} ${response.statusText} - ${text}`); + } + + const data = (await response.json()) as SerperNewsResponse; + if (!data.news || !Array.isArray(data.news)) { + return []; + } + + const totalResults = data.news.length; + return data.news + .filter((item): item is SerperNewsItem & { link: string } => Boolean(item?.link)) + .map((item, index) => { + const position = item.position ?? index + 1; + const score = totalResults > 0 ? 1 - position / totalResults : 0; + return { + url: item.link, + title: item.title ?? "Untitled", + content: item.snippet ?? "", + score, + ...(item.date != null && item.date !== "" && { publishedDate: item.date }), + }; + }); } From 083e39bf0a1e4c23510964a3535da3c69c92b16c Mon Sep 17 00:00:00 2001 From: Tiffany Wei Date: Sat, 28 Feb 2026 20:13:06 -0500 Subject: [PATCH 03/20] Strategy logic and executeSearch refactor --- src/lib/tools/trend-search/run.ts | 3 +- src/lib/tools/trend-search/web-search.ts | 96 ++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 7 deletions(-) diff --git a/src/lib/tools/trend-search/run.ts b/src/lib/tools/trend-search/run.ts index 559c2787..47df198f 100644 --- a/src/lib/tools/trend-search/run.ts +++ b/src/lib/tools/trend-search/run.ts @@ -25,7 +25,8 @@ export async function runTrendSearch( // Step 2: Execute web searches await options.onStageChange?.("searching"); - const rawResults = await executeSearch(plannedQueries); + const { results: rawResults, providerUsed } = await executeSearch(plannedQueries); + console.log(`[trend-search] Search provider used: ${providerUsed}`); // Step 3: Synthesize results await options.onStageChange?.("synthesizing"); diff --git a/src/lib/tools/trend-search/web-search.ts b/src/lib/tools/trend-search/web-search.ts index 1629e2a0..0f30211a 100644 --- a/src/lib/tools/trend-search/web-search.ts +++ b/src/lib/tools/trend-search/web-search.ts @@ -1,4 +1,6 @@ -import { callTavily } from "~/lib/tools/trend-search/providers/tavily"; +import { env } from "~/env"; +import type { SearchExecutionResult, SearchProviderFn, ProviderStrategy } from "~/lib/tools/trend-search/providers/types"; +import { providerRegistry } from "~/lib/tools/trend-search/providers/registry"; import type { PlannedQuery, RawSearchResult } from "~/lib/tools/trend-search/types"; const MAX_RETRIES = 2; @@ -13,13 +15,12 @@ function normalizeUrl(url: string): string { } /** - * Executes sub-queries against Tavily and returns combined, deduplicated raw results for synthesis. - * - * @param subQueries - Planned queries from the query planner. + * Runs all sub-queries through a single provider with retry and URL deduplication. * @returns Combined RawSearchResult[] (deduplicated by URL). */ -export async function executeSearch( +async function executeWithProvider( subQueries: PlannedQuery[], + providerFn: SearchProviderFn, ): Promise { const seenUrls = new Set(); const combined: RawSearchResult[] = []; @@ -31,7 +32,7 @@ export async function executeSearch( for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { - results = await callTavily(query); + results = await providerFn(query); lastError = null; break; } catch (err) { @@ -69,3 +70,86 @@ export async function executeSearch( return combined; } + +/** Resolve strategy: override → env → default. Downgrade to tavily if Serper required but key missing. */ +function resolveStrategy(strategyOverride?: ProviderStrategy): ProviderStrategy { + const fromEnv = env.server.SEARCH_PROVIDER ?? "tavily"; + const strategy: ProviderStrategy = strategyOverride ?? (fromEnv as ProviderStrategy); + + const needsSerper: ProviderStrategy[] = ["serper", "fallback", "parallel"]; + if (needsSerper.includes(strategy) && !env.server.SERPER_API_KEY) { + console.warn( + "[web-search] SERPER_API_KEY not set; downgrading strategy to tavily.", + ); + return "tavily"; + } + + return strategy; +} + +/** Merge two result arrays by URL; on collision keep the result with the higher score. */ +function mergeDedupByUrl( + a: RawSearchResult[], + b: RawSearchResult[], +): RawSearchResult[] { + const byUrl = new Map(); + for (const r of [...a, ...b]) { + const key = normalizeUrl(r.url); + if (!key) continue; + const existing = byUrl.get(key); + if (!existing || r.score > existing.score) { + byUrl.set(key, r); + } + } + return [...byUrl.values()]; +} + +/** + * Executes sub-queries using the configured provider strategy. + * + * @param subQueries - Planned queries from the query planner. + * @param strategyOverride - Optional override for SEARCH_PROVIDER. + * @returns SearchExecutionResult (results + providerUsed). + */ +export async function executeSearch( + subQueries: PlannedQuery[], + strategyOverride?: ProviderStrategy, +): Promise { + const strategy = resolveStrategy(strategyOverride); + const tavily = providerRegistry.tavily!; + const serper = providerRegistry.serper!; + + if (strategy === "tavily" || strategy === "serper") { + const providerFn = providerRegistry[strategy]; + if (!providerFn) { + console.warn(`[web-search] Unknown provider "${strategy}"; falling back to tavily.`); + const results = await executeWithProvider(subQueries, tavily); + return { results, providerUsed: "tavily" }; + } + const results = await executeWithProvider(subQueries, providerFn); + return { results, providerUsed: strategy }; + } + + if (strategy === "fallback") { + const primaryResults = await executeWithProvider(subQueries, serper); + if (primaryResults.length > 0) { + return { results: primaryResults, providerUsed: "serper" }; + } + console.warn("[web-search] Serper returned no results; falling back to Tavily."); + const fallbackResults = await executeWithProvider(subQueries, tavily); + return { results: fallbackResults, providerUsed: "tavily (fallback)" }; + } + + if (strategy === "parallel") { + const [serperResults, tavilyResults] = await Promise.all([ + executeWithProvider(subQueries, serper), + executeWithProvider(subQueries, tavily), + ]); + const results = mergeDedupByUrl(serperResults, tavilyResults); + return { results, providerUsed: "tavily+serper" }; + } + + // Unreachable if strategy type is correct; defensive fallback + const results = await executeWithProvider(subQueries, tavily); + return { results, providerUsed: "tavily" }; +} From eba12155f3c15d45b128687e51a5772d444378ec Mon Sep 17 00:00:00 2001 From: Tiffany Wei Date: Sat, 28 Feb 2026 20:17:41 -0500 Subject: [PATCH 04/20] updated trendSearch tests and added new tests for serper --- .../inngest-completion.pbt.test.ts | 16 +- .../api/trendSearch/persistence.pbt.test.ts | 6 +- .../api/trendSearch/query-planner.pbt.test.ts | 6 +- .../trendSearch/search-provider.pbt.test.ts | 336 ++++++++++++++++++ .../api/trendSearch/search-strategy.test.ts | 242 +++++++++++++ .../api/trendSearch/serper-adapter.test.ts | 198 +++++++++++ .../api/trendSearch/synthesizer.pbt.test.ts | 22 +- __tests__/api/trendSearch/types.pbt.test.ts | 2 +- .../api/trendSearch/web-search.pbt.test.ts | 16 +- 9 files changed, 815 insertions(+), 29 deletions(-) create mode 100644 __tests__/api/trendSearch/search-provider.pbt.test.ts create mode 100644 __tests__/api/trendSearch/search-strategy.test.ts create mode 100644 __tests__/api/trendSearch/serper-adapter.test.ts diff --git a/__tests__/api/trendSearch/inngest-completion.pbt.test.ts b/__tests__/api/trendSearch/inngest-completion.pbt.test.ts index a27776e6..dddc377d 100644 --- a/__tests__/api/trendSearch/inngest-completion.pbt.test.ts +++ b/__tests__/api/trendSearch/inngest-completion.pbt.test.ts @@ -9,12 +9,12 @@ jest.mock("~/server/db", () => ({ db: {}, })); -jest.mock("~/server/trend-search/run", () => ({ +jest.mock("~/lib/tools/trend-search/run", () => ({ runTrendSearch: jest.fn(), })); -jest.mock("~/server/trend-search/db", () => { - const actual = jest.requireActual("~/server/trend-search/db"); +jest.mock("~/lib/tools/trend-search/db", () => { + const actual = jest.requireActual("~/lib/tools/trend-search/db"); return { ...actual, @@ -23,17 +23,17 @@ jest.mock("~/server/trend-search/db", () => { }; }); -import { createTrendSearchJobHelpers } from "~/server/trend-search/db"; -import * as trendSearchDb from "~/server/trend-search/db"; +import { createTrendSearchJobHelpers } from "~/lib/tools/trend-search/db"; +import * as trendSearchDb from "~/lib/tools/trend-search/db"; import { trendSearchJob } from "~/server/inngest/functions/trendSearch"; -import { runTrendSearch } from "~/server/trend-search/run"; +import { runTrendSearch } from "~/lib/tools/trend-search/run"; import type { SearchCategory, SearchResult, TrendSearchJobStatus, TrendSearchOutput, -} from "~/server/trend-search/types"; -import { SearchCategoryEnum } from "~/server/trend-search/types"; +} from "~/lib/tools/trend-search/types"; +import { SearchCategoryEnum } from "~/lib/tools/trend-search/types"; type StoredRow = { id: string; diff --git a/__tests__/api/trendSearch/persistence.pbt.test.ts b/__tests__/api/trendSearch/persistence.pbt.test.ts index 2c81ffff..a6f81462 100644 --- a/__tests__/api/trendSearch/persistence.pbt.test.ts +++ b/__tests__/api/trendSearch/persistence.pbt.test.ts @@ -9,14 +9,14 @@ jest.mock("~/server/db", () => ({ db: {}, })); -import { createTrendSearchJobHelpers } from "~/server/trend-search/db"; +import { createTrendSearchJobHelpers } from "~/lib/tools/trend-search/db"; import type { SearchCategory, SearchResult, TrendSearchJobStatus, TrendSearchOutput, -} from "~/server/trend-search/types"; -import { SearchCategoryEnum } from "~/server/trend-search/types"; +} from "~/lib/tools/trend-search/types"; +import { SearchCategoryEnum } from "~/lib/tools/trend-search/types"; type StoredRow = { id: string; diff --git a/__tests__/api/trendSearch/query-planner.pbt.test.ts b/__tests__/api/trendSearch/query-planner.pbt.test.ts index 31440456..31ce69c7 100644 --- a/__tests__/api/trendSearch/query-planner.pbt.test.ts +++ b/__tests__/api/trendSearch/query-planner.pbt.test.ts @@ -16,9 +16,9 @@ jest.mock("@langchain/openai", () => { }); import * as fc from "fast-check"; -import { planQueries } from "~/server/trend-search/query-planner"; -import { SearchCategoryEnum } from "~/server/trend-search/types"; -import type { PlannedQuery, SearchCategory } from "~/server/trend-search/types"; +import { planQueries } from "~/lib/tools/trend-search/query-planner"; +import { SearchCategoryEnum } from "~/lib/tools/trend-search/types"; +import type { PlannedQuery, SearchCategory } from "~/lib/tools/trend-search/types"; // ─── Arbitraries ───────────────────────────────────────────────────────────── diff --git a/__tests__/api/trendSearch/search-provider.pbt.test.ts b/__tests__/api/trendSearch/search-provider.pbt.test.ts new file mode 100644 index 00000000..eeb15eb6 --- /dev/null +++ b/__tests__/api/trendSearch/search-provider.pbt.test.ts @@ -0,0 +1,336 @@ +/** + * Property-based tests for search provider normalization and strategy behavior. + * Feature: Serper dual-channel search — Task 4.3 + * Property 13: Serper adapter output conforms to RawSearchResult. + * Property 14: Fallback strategy invokes secondary when primary returns empty. + * Property 15: Parallel merge deduplicates by URL and keeps higher score. + * Property 16: Default (no env) matches Tavily-only behavior. + * Property 17: Serper-dependent strategies downgrade when key missing. + */ + +const TAVILY_URL = "https://api.tavily.com/search"; +const SERPER_URL = "https://google.serper.dev/news"; + +jest.mock("~/env", () => { + const server = { + TAVILY_API_KEY: "test-tavily-key", + SERPER_API_KEY: "test-serper-key", + SEARCH_PROVIDER: undefined as "tavily" | "serper" | "fallback" | "parallel" | undefined, + }; + return { env: { server } }; +}); + +import * as fc from "fast-check"; +import { env } from "~/env"; +import { callSerper } from "~/lib/tools/trend-search/providers/serper"; +import { executeSearch } from "~/lib/tools/trend-search/web-search"; +import type { PlannedQuery, RawSearchResult } from "~/lib/tools/trend-search/types"; +import type { ProviderStrategy } from "~/lib/tools/trend-search/providers/types"; + +beforeEach(() => { + env.server.TAVILY_API_KEY = "test-tavily-key"; + env.server.SERPER_API_KEY = "test-serper-key"; + env.server.SEARCH_PROVIDER = undefined; +}); + +// ─── Arbitraries ───────────────────────────────────────────────────────────── + +const validCategories = ["fashion", "finance", "business", "tech"] as const; + +const categoryArb = fc.constantFrom(...validCategories); + +const plannedQueryArb: fc.Arbitrary = fc.record({ + searchQuery: fc.string({ minLength: 1, maxLength: 300 }), + category: categoryArb, + rationale: fc.string({ minLength: 1, maxLength: 200 }), +}); + +const subQueriesArb = fc.array(plannedQueryArb, { minLength: 1, maxLength: 5 }); + +/** Serper news item shape (subset we use). */ +const serperNewsItemArb = fc.record({ + link: fc.webUrl({ validSchemes: ["https"] }), + title: fc.option(fc.string({ maxLength: 200 }), { nil: undefined }), + snippet: fc.option(fc.string({ maxLength: 500 }), { nil: undefined }), + date: fc.option(fc.string(), { nil: undefined }), + position: fc.option(fc.nat({ max: 20 }), { nil: undefined }), +}); + +const serperNewsArrayArb = fc.array(serperNewsItemArb, { minLength: 0, maxLength: 15 }); + +/** RawSearchResult arbitrary for merge tests. */ +const rawSearchResultArb = fc.record({ + url: fc.webUrl({ validSchemes: ["https"] }), + title: fc.string({ minLength: 1, maxLength: 200 }), + content: fc.string({ maxLength: 500 }), + score: fc.double({ min: 0, max: 1 }), + publishedDate: fc.option(fc.string(), { nil: undefined }), +}); + +function normalizeUrl(url: string): string { + try { + return new URL(url).href; + } catch { + return url.trim(); + } +} + +function conformsToRawSearchResult(r: unknown): r is RawSearchResult { + if (r === null || typeof r !== "object") return false; + const o = r as Record; + return ( + typeof o.url === "string" && + o.url.length > 0 && + typeof o.title === "string" && + typeof o.content === "string" && + typeof o.score === "number" && + (!("publishedDate" in o) || typeof o.publishedDate === "string" || o.publishedDate === undefined) + ); +} + +// ─── Property 13: Serper output conforms to RawSearchResult ─────────────────── + +describe("Property 13: Serper-shaped responses normalize to RawSearchResult", () => { + it("for any random Serper news array, every output item conforms to RawSearchResult", async () => { + await fc.assert( + fc.asyncProperty(serperNewsArrayArb, async (news) => { + const fetchSpy = jest.spyOn(globalThis, "fetch").mockResolvedValue({ + ok: true, + text: async () => "", + json: async () => ({ news }), + } as Response); + + const results = await callSerper("test query"); + + fetchSpy.mockRestore(); + + for (const item of results) { + expect(conformsToRawSearchResult(item)).toBe(true); + expect(item.url).toBeDefined(); + expect(typeof item.title).toBe("string"); + expect(typeof item.content).toBe("string"); + expect(typeof item.score).toBe("number"); + } + }), + { numRuns: 80 } + ); + }); +}); + +// ─── Property 14: Fallback invokes secondary when primary returns empty ───────── + +describe("Property 14: Fallback strategy invokes secondary when primary returns empty", () => { + it("for random sub-query lists, when primary (Serper) returns empty, Tavily is invoked once per sub-query", async () => { + await fc.assert( + fc.asyncProperty(subQueriesArb, async (subQueries) => { + let serperCalls = 0; + let tavilyCalls = 0; + const fetchSpy = jest.spyOn(globalThis, "fetch").mockImplementation((input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url; + if (url === SERPER_URL) { + serperCalls++; + return Promise.resolve({ + ok: true, + text: async () => "", + json: async () => ({ news: [] }), + } as Response); + } + if (url === TAVILY_URL) { + tavilyCalls++; + return Promise.resolve({ + ok: true, + text: async () => "", + json: async () => ({ + results: [{ url: "https://tavily.com/1", title: "T", content: "C", score: 0.9 }], + }), + } as Response); + } + return Promise.reject(new Error(`Unexpected URL: ${url}`)); + }); + + env.server.SEARCH_PROVIDER = "fallback"; + env.server.SERPER_API_KEY = "test-serper-key"; + + await executeSearch(subQueries); + + fetchSpy.mockRestore(); + + expect(serperCalls).toBe(subQueries.length); + expect(tavilyCalls).toBe(subQueries.length); + }), + { numRuns: 50 } + ); + }); +}); + +// ─── Property 15: Parallel dedup keeps higher score, no duplicate URLs ────────── + +describe("Property 15: Parallel merge deduplicates by URL and keeps higher score", () => { + it("for two random result sets with overlapping URLs, merged result has no duplicate URLs and each URL has the higher score", async () => { + await fc.assert( + fc.asyncProperty( + fc.array(rawSearchResultArb, { minLength: 0, maxLength: 5 }), + fc.array(rawSearchResultArb, { minLength: 0, maxLength: 5 }), + fc.string({ minLength: 1, maxLength: 100 }), + async (setA, setB, _query) => { + // Tavily returns setA with original scores; Serper adapter recomputes score as 1 - position/totalResults + const serperScores = setB.length > 0 + ? setB.map((_, i) => 1 - (i + 1) / setB.length) + : []; + const setBWithSerperScores = setB.map((r, i) => ({ ...r, score: serperScores[i] ?? 0 })); + // Replicate mergeDedupByUrl logic so expected matches implementation + const byUrl = new Map(); + for (const r of [...setA, ...setBWithSerperScores]) { + const key = normalizeUrl(r.url); + if (!key) continue; + const existing = byUrl.get(key); + if (!existing || r.score > existing.score) byUrl.set(key, r); + } + const pairKey = (r: RawSearchResult) => `${normalizeUrl(r.url)}::${Number(r.score).toFixed(10)}`; + const expectedPairs = new Set([...byUrl.values()].map(pairKey)); + + const fetchSpy = jest.spyOn(globalThis, "fetch").mockImplementation((input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url; + if (url === TAVILY_URL) { + return Promise.resolve({ + ok: true, + text: async () => "", + json: async () => ({ results: setA }), + } as Response); + } + if (url === SERPER_URL) { + const serperNews = setB.map((r, i) => ({ + link: r.url, + title: r.title, + snippet: r.content, + position: i + 1, + })); + return Promise.resolve({ + ok: true, + text: async () => "", + json: async () => ({ news: serperNews }), + } as Response); + } + return Promise.reject(new Error(`Unexpected URL: ${url}`)); + }); + + env.server.SEARCH_PROVIDER = "parallel"; + env.server.SERPER_API_KEY = "test-serper-key"; + + const { results } = await executeSearch( + [{ searchQuery: "q", category: "tech", rationale: "r" }], + "parallel" + ); + + fetchSpy.mockRestore(); + + const resultUrls = results.map((r) => r.url); + const uniqueUrls = new Set(resultUrls); + expect(resultUrls.length).toBe(uniqueUrls.size); + + const actualPairs = new Set(results.map(pairKey)); + expect(actualPairs.size).toBe(expectedPairs.size); + for (const p of actualPairs) { + expect(expectedPairs.has(p)).toBe(true); + } + } + ), + { numRuns: 60 } + ); + }); +}); + +// ─── Property 16: Default (no env) matches Tavily-only ────────────────────────── + +describe("Property 16: Default strategy matches Tavily-only behavior", () => { + it("when SEARCH_PROVIDER is unset, providerUsed is tavily and only Tavily is called", async () => { + await fc.assert( + fc.asyncProperty(subQueriesArb, async (subQueries) => { + let tavilyCalls = 0; + let serperCalls = 0; + const fetchSpy = jest.spyOn(globalThis, "fetch").mockImplementation((input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url; + if (url === TAVILY_URL) { + tavilyCalls++; + return Promise.resolve({ + ok: true, + text: async () => "", + json: async () => ({ + results: [{ url: "https://tavily.com/1", title: "T", content: "C", score: 0.9 }], + }), + } as Response); + } + if (url === SERPER_URL) { + serperCalls++; + return Promise.resolve({ + ok: true, + text: async () => "", + json: async () => ({ news: [] }), + } as Response); + } + return Promise.reject(new Error(`Unexpected URL: ${url}`)); + }); + + env.server.SEARCH_PROVIDER = undefined; + env.server.SERPER_API_KEY = "test-serper-key"; + + const { providerUsed } = await executeSearch(subQueries); + + fetchSpy.mockRestore(); + + expect(providerUsed).toBe("tavily"); + expect(tavilyCalls).toBe(subQueries.length); + expect(serperCalls).toBe(0); + }), + { numRuns: 50 } + ); + }); +}); + +// ─── Property 17: Serper-dependent strategies downgrade when key missing ─────── + +describe("Property 17: Missing Serper key downgrades Serper-dependent strategies to tavily", () => { + const serperDependentStrategies: ProviderStrategy[] = ["serper", "fallback", "parallel"]; + + it("for each Serper-dependent strategy, when SERPER_API_KEY is unset, providerUsed is tavily and no throw", async () => { + await fc.assert( + fc.asyncProperty( + fc.constantFrom(...serperDependentStrategies), + subQueriesArb, + async (strategy, subQueries) => { + const fetchSpy = jest.spyOn(globalThis, "fetch").mockImplementation((input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url; + if (url === TAVILY_URL) { + return Promise.resolve({ + ok: true, + text: async () => "", + json: async () => ({ + results: [{ url: "https://tavily.com/1", title: "T", content: "C", score: 0.9 }], + }), + } as Response); + } + if (url === SERPER_URL) { + return Promise.resolve({ + ok: true, + text: async () => "", + json: async () => ({ news: [] }), + } as Response); + } + return Promise.reject(new Error(`Unexpected URL: ${url}`)); + }); + + env.server.SERPER_API_KEY = undefined as unknown as string; + const warnSpy = jest.spyOn(console, "warn").mockImplementation(); + + const { providerUsed } = await executeSearch(subQueries, strategy); + + warnSpy.mockRestore(); + fetchSpy.mockRestore(); + + expect(providerUsed).toBe("tavily"); + } + ), + { numRuns: 30 } + ); + }); +}); diff --git a/__tests__/api/trendSearch/search-strategy.test.ts b/__tests__/api/trendSearch/search-strategy.test.ts new file mode 100644 index 00000000..fec7716a --- /dev/null +++ b/__tests__/api/trendSearch/search-strategy.test.ts @@ -0,0 +1,242 @@ +/** + * Unit tests for executeSearch strategy logic. + * Feature: Serper dual-channel search — Task 4.2 + * Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 4.3, 6.1, 6.2 + */ + +const TAVILY_URL = "https://api.tavily.com/search"; +const SERPER_URL = "https://google.serper.dev/news"; + +jest.mock("~/env", () => { + const server = { + TAVILY_API_KEY: "test-tavily-key", + SERPER_API_KEY: "test-serper-key", + SEARCH_PROVIDER: undefined as "tavily" | "serper" | "fallback" | "parallel" | undefined, + }; + return { env: { server } }; +}); + +import { env } from "~/env"; +import { executeSearch } from "~/lib/tools/trend-search/web-search"; +import type { PlannedQuery } from "~/lib/tools/trend-search/types"; + +const subQueries: PlannedQuery[] = [ + { searchQuery: "test query", category: "tech", rationale: "test" }, +]; + +function tavilyResponse(results: { url: string; title?: string; content?: string; score?: number }[]) { + return { + ok: true, + text: async () => "", + json: async () => ({ results }), + } as Response; +} + +function serperResponse(items: { link: string; title?: string; snippet?: string; score?: number }[]) { + return { + ok: true, + text: async () => "", + json: async () => ({ + news: items.map((item) => ({ + link: item.link, + title: item.title ?? "Untitled", + snippet: item.snippet ?? "", + position: 1, + })), + }), + } as Response; +} + +function getFetchCallsByUrl(fetchSpy: jest.SpyInstance): { tavily: number; serper: number } { + const calls = fetchSpy.mock.calls as [string, unknown][]; + let tavily = 0; + let serper = 0; + for (const [url] of calls) { + if (url === TAVILY_URL) tavily++; + if (url === SERPER_URL) serper++; + } + return { tavily, serper }; +} + +describe("executeSearch strategy logic", () => { + let fetchSpy: jest.SpyInstance; + + beforeEach(() => { + env.server.TAVILY_API_KEY = "test-tavily-key"; + env.server.SERPER_API_KEY = "test-serper-key"; + env.server.SEARCH_PROVIDER = undefined; + fetchSpy = jest.spyOn(globalThis, "fetch").mockImplementation((input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url; + if (url === TAVILY_URL) { + return Promise.resolve(tavilyResponse([{ url: "https://tavily.com/1", title: "T", content: "C", score: 0.9 }])); + } + if (url === SERPER_URL) { + return Promise.resolve(serperResponse([{ link: "https://serper.com/1", title: "S", snippet: "Snippet" }])); + } + return Promise.reject(new Error(`Unexpected URL: ${url}`)); + }); + }); + + afterEach(() => { + fetchSpy.mockRestore(); + }); + + describe("default strategy (no env) uses Tavily only", () => { + it("when SEARCH_PROVIDER is unset, only Tavily is called and providerUsed is tavily", async () => { + env.server.SEARCH_PROVIDER = undefined; + env.server.SERPER_API_KEY = "test-serper-key"; + + const { results, providerUsed } = await executeSearch(subQueries); + + expect(providerUsed).toBe("tavily"); + const { tavily, serper } = getFetchCallsByUrl(fetchSpy); + expect(tavily).toBe(1); + expect(serper).toBe(0); + expect(results).toHaveLength(1); + expect(results[0]!.url).toBe("https://tavily.com/1"); + }); + }); + + describe('"serper" strategy uses Serper only', () => { + it("when strategy is serper, only Serper is called and providerUsed is serper", async () => { + env.server.SEARCH_PROVIDER = "serper"; + + const { results, providerUsed } = await executeSearch(subQueries); + + expect(providerUsed).toBe("serper"); + const { tavily, serper } = getFetchCallsByUrl(fetchSpy); + expect(tavily).toBe(0); + expect(serper).toBe(1); + expect(results).toHaveLength(1); + expect(results[0]!.url).toBe("https://serper.com/1"); + }); + }); + + describe('"fallback" strategy tries Serper first, falls back to Tavily on total failure', () => { + it("when Serper returns no results for all sub-queries, Tavily is called and providerUsed is tavily (fallback)", async () => { + env.server.SEARCH_PROVIDER = "fallback"; + let callCount = 0; + fetchSpy.mockImplementation((input: RequestInfo | URL) => { + callCount++; + const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url; + if (url === SERPER_URL) { + return Promise.resolve(serperResponse([])); // empty + } + if (url === TAVILY_URL) { + return Promise.resolve(tavilyResponse([{ url: "https://tavily.com/fallback", title: "T", content: "C", score: 0.8 }])); + } + return Promise.reject(new Error(`Unexpected URL: ${url}`)); + }); + + const { results, providerUsed } = await executeSearch(subQueries); + + expect(providerUsed).toBe("tavily (fallback)"); + const { tavily, serper } = getFetchCallsByUrl(fetchSpy); + expect(serper).toBe(1); + expect(tavily).toBe(1); + expect(results).toHaveLength(1); + expect(results[0]!.url).toBe("https://tavily.com/fallback"); + }); + }); + + describe('"fallback" strategy does NOT fall back when Serper returns results', () => { + it("when Serper returns results, Tavily is not called and providerUsed is serper", async () => { + env.server.SEARCH_PROVIDER = "fallback"; + fetchSpy.mockImplementation((input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url; + if (url === SERPER_URL) { + return Promise.resolve(serperResponse([{ link: "https://serper.com/ok", title: "S", snippet: "S" }])); + } + if (url === TAVILY_URL) { + return Promise.resolve(tavilyResponse([{ url: "https://tavily.com/1", title: "T", content: "C", score: 0.9 }])); + } + return Promise.reject(new Error(`Unexpected URL: ${url}`)); + }); + + const { results, providerUsed } = await executeSearch(subQueries); + + expect(providerUsed).toBe("serper"); + const { tavily, serper } = getFetchCallsByUrl(fetchSpy); + expect(serper).toBe(1); + expect(tavily).toBe(0); + expect(results).toHaveLength(1); + expect(results[0]!.url).toBe("https://serper.com/ok"); + }); + }); + + describe('"parallel" strategy calls both providers and merges results', () => { + it("when strategy is parallel, both Serper and Tavily are called and providerUsed is tavily+serper", async () => { + env.server.SEARCH_PROVIDER = "parallel"; + fetchSpy.mockImplementation((input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url; + if (url === SERPER_URL) { + return Promise.resolve(serperResponse([{ link: "https://serper.com/1", title: "S", snippet: "S" }])); + } + if (url === TAVILY_URL) { + return Promise.resolve(tavilyResponse([{ url: "https://tavily.com/1", title: "T", content: "C", score: 0.9 }])); + } + return Promise.reject(new Error(`Unexpected URL: ${url}`)); + }); + + const { results, providerUsed } = await executeSearch(subQueries); + + expect(providerUsed).toBe("tavily+serper"); + const { tavily, serper } = getFetchCallsByUrl(fetchSpy); + expect(serper).toBe(1); + expect(tavily).toBe(1); + expect(results).toHaveLength(2); + const urls = results.map((r) => r.url).sort(); + expect(urls).toEqual(["https://serper.com/1", "https://tavily.com/1"]); + }); + }); + + describe('"parallel" strategy deduplicates by URL, keeping higher score', () => { + it("when both providers return the same URL, result has the higher score", async () => { + env.server.SEARCH_PROVIDER = "parallel"; + const sameUrl = "https://example.com/same"; + fetchSpy.mockImplementation((input: RequestInfo | URL) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url; + if (url === SERPER_URL) { + return Promise.resolve( + serperResponse([{ link: sameUrl, title: "From Serper", snippet: "S" }]) + ); + } + if (url === TAVILY_URL) { + return Promise.resolve( + tavilyResponse([{ url: sameUrl, title: "From Tavily", content: "C", score: 0.95 }]) + ); + } + return Promise.reject(new Error(`Unexpected URL: ${url}`)); + }); + + const { results, providerUsed } = await executeSearch(subQueries); + + expect(providerUsed).toBe("tavily+serper"); + expect(results).toHaveLength(1); + expect(results[0]!.url).toBe(sameUrl); + expect(results[0]!.score).toBe(0.95); + expect(results[0]!.title).toBe("From Tavily"); + }); + }); + + describe('missing Serper key with "serper" strategy downgrades to "tavily"', () => { + it("when SEARCH_PROVIDER is serper but SERPER_API_KEY is unset, only Tavily is called and providerUsed is tavily", async () => { + env.server.SEARCH_PROVIDER = "serper"; + env.server.SERPER_API_KEY = undefined as unknown as string; + const warnSpy = jest.spyOn(console, "warn").mockImplementation(); + + const { results, providerUsed } = await executeSearch(subQueries); + + expect(providerUsed).toBe("tavily"); + const { tavily, serper } = getFetchCallsByUrl(fetchSpy); + expect(tavily).toBe(1); + expect(serper).toBe(0); + expect(results).toHaveLength(1); + expect(results[0]!.url).toBe("https://tavily.com/1"); + expect(warnSpy).toHaveBeenCalledWith( + "[web-search] SERPER_API_KEY not set; downgrading strategy to tavily." + ); + warnSpy.mockRestore(); + }); + }); +}); diff --git a/__tests__/api/trendSearch/serper-adapter.test.ts b/__tests__/api/trendSearch/serper-adapter.test.ts new file mode 100644 index 00000000..526ff04f --- /dev/null +++ b/__tests__/api/trendSearch/serper-adapter.test.ts @@ -0,0 +1,198 @@ +/** + * Unit tests for Serper adapter (callSerper). + * Feature: Serper dual-channel search — Task 4.1 + */ + +const mockFetch = jest.fn(); + +jest.mock("~/env", () => ({ + env: { + server: { + SERPER_API_KEY: "test-serper-key", + }, + }, +})); + +beforeEach(() => { + mockFetch.mockReset(); + jest.spyOn(globalThis, "fetch").mockImplementation(mockFetch); +}); + +afterEach(() => { + jest.restoreAllMocks(); +}); + +// Must import after mocks so env is mocked +import { callSerper } from "~/lib/tools/trend-search/providers/serper"; +import type { RawSearchResult } from "~/lib/tools/trend-search/types"; + +const SERPER_NEWS_URL = "https://google.serper.dev/news"; + +function makeOkResponse(body: { news?: unknown[] }) { + return { + ok: true, + text: async () => "", + json: async () => body, + } as Response; +} + +function makeErrorResponse(status: number, statusText: string, body = "Error") { + return { + ok: false, + status, + statusText, + text: async () => body, + json: async () => { + throw new Error("not json"); + }, + } as Response; +} + +describe("callSerper", () => { + describe("valid response normalizes correctly to RawSearchResult[]", () => { + it("maps link→url, title, snippet→content, score, publishedDate", async () => { + mockFetch.mockResolvedValue( + makeOkResponse({ + news: [ + { + title: "AI Trends 2026", + link: "https://example.com/1", + snippet: "Summary here", + date: "2 hours ago", + position: 1, + }, + ], + }) + ); + + const results = await callSerper("AI trends"); + + expect(mockFetch).toHaveBeenCalledWith( + SERPER_NEWS_URL, + expect.objectContaining({ + method: "POST", + headers: { + "X-API-KEY": "test-serper-key", + "Content-Type": "application/json", + }, + body: JSON.stringify({ q: "AI trends", num: 10 }), + }) + ); + expect(results).toHaveLength(1); + const r = results[0] as RawSearchResult; + expect(r.url).toBe("https://example.com/1"); + expect(r.title).toBe("AI Trends 2026"); + expect(r.content).toBe("Summary here"); + expect(r.score).toBeDefined(); + expect(typeof r.score).toBe("number"); + expect(r.publishedDate).toBe("2 hours ago"); + }); + + it("uses Untitled and empty string when title/snippet missing", async () => { + mockFetch.mockResolvedValue( + makeOkResponse({ + news: [{ link: "https://example.com/2" }], + }) + ); + + const results = await callSerper("query"); + + expect(results).toHaveLength(1); + expect(results[0].title).toBe("Untitled"); + expect(results[0].content).toBe(""); + }); + }); + + describe("missing SERPER_API_KEY returns empty array", () => { + it("returns [] and does not call fetch when key is undefined", async () => { + const warnSpy = jest.spyOn(console, "warn").mockImplementation(); + const envModule = await import("~/env"); + const server = envModule.env.server as { SERPER_API_KEY?: string }; + const original = server.SERPER_API_KEY; + server.SERPER_API_KEY = undefined; + + const results = await callSerper("query"); + + expect(results).toEqual([]); + expect(mockFetch).not.toHaveBeenCalled(); + server.SERPER_API_KEY = original; + warnSpy.mockRestore(); + }); + }); + + describe("non-2xx response throws error", () => { + it("throws with status and body text on 500", async () => { + mockFetch.mockResolvedValue(makeErrorResponse(500, "Internal Server Error", "Server down")); + + await expect(callSerper("query")).rejects.toThrow( + /Serper API error: 500 Internal Server Error.*Server down/ + ); + }); + + it("throws on 401", async () => { + mockFetch.mockResolvedValue(makeErrorResponse(401, "Unauthorized", "Invalid key")); + + await expect(callSerper("query")).rejects.toThrow(/Serper API error: 401/); + }); + }); + + describe("empty news array returns empty results", () => { + it("returns [] when news is empty array", async () => { + mockFetch.mockResolvedValue(makeOkResponse({ news: [] })); + + const results = await callSerper("query"); + + expect(results).toEqual([]); + }); + + it("returns [] when news is missing", async () => { + mockFetch.mockResolvedValue(makeOkResponse({})); + + const results = await callSerper("query"); + + expect(results).toEqual([]); + }); + }); + + describe("positional score calculation is correct", () => { + it("first item has highest score, last has lowest (score = 1 - position/total)", async () => { + mockFetch.mockResolvedValue( + makeOkResponse({ + news: [ + { link: "https://a.com", position: 1 }, + { link: "https://b.com", position: 2 }, + { link: "https://c.com", position: 3 }, + ], + }) + ); + + const results = await callSerper("query"); + + expect(results).toHaveLength(3); + const total = 3; + expect(results[0].score).toBeCloseTo(1 - 1 / total); // 0.666... + expect(results[1].score).toBeCloseTo(1 - 2 / total); // 0.333... + expect(results[2].score).toBeCloseTo(1 - 3 / total); // 0 + expect(results[0].score).toBeGreaterThan(results[1].score); + expect(results[1].score).toBeGreaterThan(results[2].score); + }); + + it("uses index+1 when position is missing", async () => { + mockFetch.mockResolvedValue( + makeOkResponse({ + news: [ + { link: "https://a.com" }, + { link: "https://b.com" }, + ], + }) + ); + + const results = await callSerper("query"); + + expect(results).toHaveLength(2); + // position 1 and 2 from index+1, total 2 → scores 0.5 and 0 + expect(results[0].score).toBeCloseTo(1 - 1 / 2); + expect(results[1].score).toBeCloseTo(1 - 2 / 2); + }); + }); +}); diff --git a/__tests__/api/trendSearch/synthesizer.pbt.test.ts b/__tests__/api/trendSearch/synthesizer.pbt.test.ts index 6e8a8d79..185b7a78 100644 --- a/__tests__/api/trendSearch/synthesizer.pbt.test.ts +++ b/__tests__/api/trendSearch/synthesizer.pbt.test.ts @@ -19,8 +19,8 @@ jest.mock("@langchain/openai", () => { }); import * as fc from "fast-check"; -import { synthesizeResults } from "~/server/trend-search/synthesizer"; -import type { RawSearchResult, SearchCategory } from "~/server/trend-search/types"; +import { synthesizeResults } from "~/lib/tools/trend-search/synthesizer"; +import type { RawSearchResult, SearchCategory } from "~/lib/tools/trend-search/types"; // ─── Arbitraries ───────────────────────────────────────────────────────────── @@ -30,11 +30,11 @@ const categoryArb = fc.constantFrom(...validCategories); const validQueryArb = fc .string({ minLength: 1, maxLength: 1000 }) - .filter((s) => s.trim().length > 0); + .filter((s: string) => s.trim().length > 0); const validCompanyContextArb = fc .string({ minLength: 1, maxLength: 2000 }) - .filter((s) => s.trim().length > 0); + .filter((s: string) => s.trim().length > 0); /** Single raw result (URL must be unique for traceability). */ const rawResultArb = fc.record({ @@ -72,7 +72,12 @@ describe("Property 7: Synthesizer output structure", () => { validQueryArb, validCompanyContextArb, fc.array(categoryArb, { minLength: 0, maxLength: 4 }), - async (rawResults, query, companyContext, categories) => { + async ( + rawResults: RawSearchResult[], + query: string, + companyContext: string, + categories: SearchCategory[] + ) => { const mockResults = buildMockResults(rawResults, 5); mockInvoke.mockResolvedValue({ results: mockResults }); @@ -115,7 +120,12 @@ describe("Property 8: Source URL traceability", () => { validQueryArb, validCompanyContextArb, fc.array(categoryArb, { minLength: 0, maxLength: 4 }), - async (rawResults, query, companyContext, categories) => { + async ( + rawResults: RawSearchResult[], + query: string, + companyContext: string, + categories: SearchCategory[] + ) => { const urlSet = new Set(rawResults.map((r) => r.url)); const mockResults = buildMockResults(rawResults, 5); mockInvoke.mockResolvedValue({ results: mockResults }); diff --git a/__tests__/api/trendSearch/types.pbt.test.ts b/__tests__/api/trendSearch/types.pbt.test.ts index e8052b1f..15586e5c 100644 --- a/__tests__/api/trendSearch/types.pbt.test.ts +++ b/__tests__/api/trendSearch/types.pbt.test.ts @@ -8,7 +8,7 @@ import { TrendSearchInputSchema, TrendSearchEventDataSchema, SearchCategoryEnum, -} from "~/server/trend-search/types"; +} from "~/lib/tools/trend-search/types"; // ─── Arbitraries ───────────────────────────────────────────────────────────── diff --git a/__tests__/api/trendSearch/web-search.pbt.test.ts b/__tests__/api/trendSearch/web-search.pbt.test.ts index 2c5c9de2..41871d6a 100644 --- a/__tests__/api/trendSearch/web-search.pbt.test.ts +++ b/__tests__/api/trendSearch/web-search.pbt.test.ts @@ -14,8 +14,8 @@ jest.mock("~/env", () => ({ })); import * as fc from "fast-check"; -import { executeSearch } from "~/server/trend-search/web-search"; -import type { PlannedQuery, SearchCategory } from "~/server/trend-search/types"; +import { executeSearch } from "~/lib/tools/trend-search/web-search"; +import type { PlannedQuery, SearchCategory } from "~/lib/tools/trend-search/types"; // ─── Arbitraries ───────────────────────────────────────────────────────────── @@ -112,11 +112,11 @@ describe("Unit: one sub-query returns 0 results, pipeline continues", () => { ); }); - const result = await executeSearch(subQueries); + const { results } = await executeSearch(subQueries); expect(fetchSpy).toHaveBeenCalledTimes(3); - expect(result).toHaveLength(2); - expect(result.map((r) => r.url)).toEqual(["https://b.com", "https://c.com"]); + expect(results).toHaveLength(2); + expect(results.map((r) => r.url)).toEqual(["https://b.com", "https://c.com"]); }); }); @@ -151,14 +151,14 @@ describe("Unit: Tavily fails, retries 2 times then marks sub-query failed", () = const consoleErrorSpy = jest.spyOn(console, "error").mockImplementation(() => {}); const consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation(() => {}); - const result = await executeSearch(subQueries); + const { results } = await executeSearch(subQueries); consoleErrorSpy.mockRestore(); consoleWarnSpy.mockRestore(); // 1 + 2 retries for first sub-query, then 1 for second expect(fetchSpy).toHaveBeenCalledTimes(4); - expect(result).toHaveLength(1); - expect(result[0].url).toBe("https://ok.com"); + expect(results).toHaveLength(1); + expect(results[0]!.url).toBe("https://ok.com"); }); }); From 1003b85dd1b915160b47c048816be5d33dbc8c49 Mon Sep 17 00:00:00 2001 From: Tiffany Wei Date: Mon, 2 Mar 2026 20:44:04 -0500 Subject: [PATCH 05/20] fix errors in lint --- src/lib/ocr/processor.ts | 43 +++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/lib/ocr/processor.ts b/src/lib/ocr/processor.ts index 865e55a1..dc97a16f 100644 --- a/src/lib/ocr/processor.ts +++ b/src/lib/ocr/processor.ts @@ -56,7 +56,7 @@ function sanitizeDbError(error: unknown): void { const e = error as unknown as Record; delete e.params; if (typeof e.query === "string") { - e.query = (e.query as string).substring(0, 120) + "…"; + e.query = e.query.substring(0, 120) + "…"; } } @@ -435,25 +435,28 @@ export async function storeBatch( return withDbRetry(async () => { return db.transaction(async (tx) => { - const parentValues = vectorizedChunks.map((chunk) => ({ - documentId: BigInt(documentId), - structureId: BigInt(rootStructureId), - content: chunk.content, - tokenCount: Math.ceil(chunk.content.length / 4), - charCount: chunk.content.length, - embedding: - chunk.vector && chunk.vector.length > 0 - ? sql`${JSON.stringify(chunk.vector)}::vector(1536)` - : null, - pageNumber: chunk.metadata.pageNumber, - semanticType: (chunk.metadata.isTable ? "tabular" : "narrative") as - | "tabular" - | "narrative", - contentHash: crypto - .createHash("sha256") - .update(chunk.content) - .digest("hex"), - })); + const parentValues = vectorizedChunks.map((chunk) => { + const semanticType: "tabular" | "narrative" = chunk.metadata.isTable + ? "tabular" + : "narrative"; + return { + documentId: BigInt(documentId), + structureId: BigInt(rootStructureId), + content: chunk.content, + tokenCount: Math.ceil(chunk.content.length / 4), + charCount: chunk.content.length, + embedding: + chunk.vector && chunk.vector.length > 0 + ? sql`${JSON.stringify(chunk.vector)}::vector(1536)` + : null, + pageNumber: chunk.metadata.pageNumber, + semanticType, + contentHash: crypto + .createHash("sha256") + .update(chunk.content) + .digest("hex"), + }; + }); const parentRows = await tx .insert(documentContextChunks) From 4f030df6f775715e7e7a944d1d92a72eef58b6ce Mon Sep 17 00:00:00 2001 From: ewar Date: Sat, 7 Mar 2026 13:49:19 -0500 Subject: [PATCH 06/20] update metadata UI --- src/app/api/company/metadata/history/route.ts | 56 +++++ src/app/api/company/metadata/route.ts | 133 +++++++++- .../components/CompanyMetadataPanel.tsx | 121 ++++++++- .../metadata/components/CompanyInfoCard.tsx | 217 +++++++++++++--- .../metadata/components/InlineEditor.tsx | 72 ++++++ .../metadata/components/MarketsSection.tsx | 149 +++++++++-- .../components/MetadataHistorySection.tsx | 237 ++++++++++++++++++ .../metadata/components/PeopleSection.tsx | 170 +++++++++++-- .../metadata/components/PriorityBadge.tsx | 20 ++ .../metadata/components/ServicesSection.tsx | 147 ++++++++++- 10 files changed, 1235 insertions(+), 87 deletions(-) create mode 100644 src/app/api/company/metadata/history/route.ts create mode 100644 src/app/employer/metadata/components/InlineEditor.tsx create mode 100644 src/app/employer/metadata/components/MetadataHistorySection.tsx create mode 100644 src/app/employer/metadata/components/PriorityBadge.tsx diff --git a/src/app/api/company/metadata/history/route.ts b/src/app/api/company/metadata/history/route.ts new file mode 100644 index 00000000..df108c88 --- /dev/null +++ b/src/app/api/company/metadata/history/route.ts @@ -0,0 +1,56 @@ +/** + * GET /api/company/metadata/history + * + * Returns the audit history for the logged-in user's company metadata. + * Sorted newest-first, limited to 100 entries. + */ + +import { NextResponse } from "next/server"; +import { auth } from "@clerk/nextjs/server"; +import { eq, desc } from "drizzle-orm"; + +import { db } from "~/server/db"; +import { users } from "~/server/db/schema"; +import { companyMetadataHistory } from "~/server/db/schema/company-metadata"; + +export async function GET() { + try { + const { userId } = await auth(); + if (!userId) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const [userInfo] = await db + .select({ companyId: users.companyId }) + .from(users) + .where(eq(users.userId, userId)); + + if (!userInfo) { + return NextResponse.json({ error: "User not found" }, { status: 400 }); + } + + const history = await db + .select({ + id: companyMetadataHistory.id, + changeType: companyMetadataHistory.changeType, + diff: companyMetadataHistory.diff, + changedBy: companyMetadataHistory.changedBy, + documentId: companyMetadataHistory.documentId, + createdAt: companyMetadataHistory.createdAt, + }) + .from(companyMetadataHistory) + .where(eq(companyMetadataHistory.companyId, userInfo.companyId)) + .orderBy(desc(companyMetadataHistory.createdAt)) + .limit(100); + + const serializable = history.map((h) => ({ + ...h, + documentId: h.documentId != null ? String(h.documentId) : null, + })); + + return NextResponse.json({ history: serializable }); + } catch (error) { + console.error("[company-metadata/history] GET error:", error); + return NextResponse.json({ error: "Internal server error" }, { status: 500 }); + } +} diff --git a/src/app/api/company/metadata/route.ts b/src/app/api/company/metadata/route.ts index e27fb685..4ad228ab 100644 --- a/src/app/api/company/metadata/route.ts +++ b/src/app/api/company/metadata/route.ts @@ -7,10 +7,11 @@ import { NextResponse } from "next/server"; import { auth } from "@clerk/nextjs/server"; import { eq } from "drizzle-orm"; +import { z } from "zod"; import { db } from "~/server/db"; import { users } from "~/server/db/schema"; -import { companyMetadata } from "~/server/db/schema/company-metadata"; +import { companyMetadata, companyMetadataHistory } from "~/server/db/schema/company-metadata"; export async function GET() { try { @@ -65,3 +66,133 @@ export async function GET() { ); } } + +const PatchSchema = z.object({ + path: z.string().min(1), + value: z.string(), +}); + +function buildManualFact(value: string | number, existing?: { visibility?: string; usage?: string }) { + const now = new Date().toISOString(); + return { + value, + visibility: (existing?.visibility as "public" | "internal" | "confidential") ?? ("public" as const), + usage: (existing?.usage as "outreach_ok" | "internal_only" | "do_not_use") ?? ("outreach_ok" as const), + confidence: 1.0, + priority: "manual_override" as const, + status: "active" as const, + last_updated: now, + sources: [{ doc_id: 0, doc_name: "Manual edit", extracted_at: now }], + }; +} + +export async function PATCH(request: Request) { + try { + const { userId } = await auth(); + if (!userId) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + + const body = await request.json() as unknown; + const parsed = PatchSchema.safeParse(body); + if (!parsed.success) { + return NextResponse.json({ error: "Invalid request body" }, { status: 400 }); + } + const { path, value } = parsed.data; + + const [userInfo] = await db + .select({ companyId: users.companyId }) + .from(users) + .where(eq(users.userId, userId)); + + if (!userInfo) { + return NextResponse.json({ error: "User not found" }, { status: 400 }); + } + + const [existing] = await db + .select({ metadata: companyMetadata.metadata }) + .from(companyMetadata) + .where(eq(companyMetadata.companyId, userInfo.companyId)); + + if (!existing) { + return NextResponse.json( + { error: "No metadata found. Run extraction first." }, + { status: 404 }, + ); + } + + const updatedMetadata = structuredClone(existing.metadata); + const now = new Date().toISOString(); + const segments = path.split("."); + let oldFact: unknown = undefined; + let updatedFact: unknown = undefined; + + if (segments[0] === "company" && segments[1]) { + const field = segments[1]; + const existingFact = updatedMetadata.company[field]; + oldFact = existingFact; + updatedFact = buildManualFact( + field === "founded_year" ? Number(value) : value, + existingFact, + ); + updatedMetadata.company[field] = updatedFact; + } else if (segments[0] === "people" && segments[1] && segments[2]) { + const idx = Number(segments[1]); + const field = segments[2]; + if (isNaN(idx) || idx < 0 || idx >= updatedMetadata.people.length) { + return NextResponse.json({ error: "Invalid people index" }, { status: 400 }); + } + const person = updatedMetadata.people[idx]!; + oldFact = person[field]; + updatedFact = buildManualFact(value, person[field]); + person[field] = updatedFact; + } else if (segments[0] === "services" && segments[1] && segments[2]) { + const idx = Number(segments[1]); + const field = segments[2]; + if (isNaN(idx) || idx < 0 || idx >= updatedMetadata.services.length) { + return NextResponse.json({ error: "Invalid services index" }, { status: 400 }); + } + const service = updatedMetadata.services[idx]!; + oldFact = service[field]; + updatedFact = buildManualFact(value, service[field]); + service[field] = updatedFact; + } else if (segments[0] === "markets" && segments[1] && segments[2] != null) { + const subfield = segments[1] as "primary" | "verticals" | "geographies"; + const idx = Number(segments[2]); + const arr = updatedMetadata.markets[subfield]; + if (!arr || isNaN(idx) || idx < 0 || idx >= arr.length) { + return NextResponse.json({ error: "Invalid markets index" }, { status: 400 }); + } + oldFact = arr[idx]; + updatedFact = buildManualFact(value, arr[idx]); + arr[idx] = updatedFact; + } else { + return NextResponse.json({ error: `Unsupported path: ${path}` }, { status: 400 }); + } + + updatedMetadata.updated_at = now; + + const diff = { + added: oldFact ? [] : [{ path, new: updatedFact }], + updated: oldFact ? [{ path, old: oldFact, new: updatedFact }] : [], + deprecated: [], + }; + + await db + .update(companyMetadata) + .set({ metadata: updatedMetadata }) + .where(eq(companyMetadata.companyId, userInfo.companyId)); + + await db.insert(companyMetadataHistory).values({ + companyId: userInfo.companyId, + changeType: "manual_override", + diff, + changedBy: userId, + }); + + return NextResponse.json({ success: true, path, fact: updatedFact }); + } catch (error) { + console.error("[company-metadata] PATCH error:", error); + return NextResponse.json({ error: "Internal server error" }, { status: 500 }); + } +} diff --git a/src/app/employer/documents/components/CompanyMetadataPanel.tsx b/src/app/employer/documents/components/CompanyMetadataPanel.tsx index db34a3db..4161dee9 100644 --- a/src/app/employer/documents/components/CompanyMetadataPanel.tsx +++ b/src/app/employer/documents/components/CompanyMetadataPanel.tsx @@ -1,6 +1,6 @@ "use client"; -import React, { useEffect, useState, useCallback } from "react"; +import React, { useEffect, useState, useCallback, useRef } from "react"; import Link from "next/link"; import { Building2, @@ -11,6 +11,7 @@ import { FileText, Sparkles, Pencil, + Download, } from "lucide-react"; import { Button } from "~/app/employer/documents/components/ui/button"; import { Card, CardHeader, CardTitle, CardContent } from "~/app/employer/documents/components/ui/card"; @@ -20,6 +21,7 @@ import { PeopleSection } from "~/app/employer/metadata/components/PeopleSection" import { ServicesSection } from "~/app/employer/metadata/components/ServicesSection"; import { MarketsSection } from "~/app/employer/metadata/components/MarketsSection"; import { ProvenanceCard } from "~/app/employer/metadata/components/ProvenanceCard"; +import { MetadataHistorySection } from "~/app/employer/metadata/components/MetadataHistorySection"; import type { CompanyMetadataJSON } from "~/lib/tools/company-metadata/types"; interface CompanyProfile { @@ -78,6 +80,8 @@ export function CompanyMetadataPanel() { const [loading, setLoading] = useState(true); const [error, setError] = useState(null); const [extracting, setExtracting] = useState(false); + const [isEditMode, setIsEditMode] = useState(false); + const previousDataRef = useRef(null); const fetchMetadata = useCallback(async () => { setLoading(true); @@ -102,6 +106,83 @@ export function CompanyMetadataPanel() { } }, []); + const handleFieldSave = useCallback(async (path: string, value: string) => { + previousDataRef.current = data; + const now = new Date().toISOString(); + const manualSource = { doc_id: 0, doc_name: "Manual edit", extracted_at: now }; + + // Optimistic update + setData((prev) => { + if (!prev?.metadata) return prev; + const m = structuredClone(prev.metadata); + const segments = path.split("."); + + const buildFact = (val: string | number, existing?: { visibility?: string; usage?: string }) => ({ + value: val, + visibility: existing?.visibility ?? "public", + usage: existing?.usage ?? "outreach_ok", + confidence: 1.0, + priority: "manual_override" as const, + status: "active" as const, + last_updated: now, + sources: [manualSource], + }); + + if (segments[0] === "company" && segments[1]) { + const field = segments[1]; + const existing = m.company[field]; + m.company[field] = buildFact(field === "founded_year" ? Number(value) : value, existing); + } else if (segments[0] === "people" && segments[1] && segments[2]) { + const idx = Number(segments[1]); + const field = segments[2]; + if (m.people[idx]) { + m.people[idx][field] = buildFact(value, m.people[idx][field]); + } + } else if (segments[0] === "services" && segments[1] && segments[2]) { + const idx = Number(segments[1]); + const field = segments[2]; + if (m.services[idx]) { + m.services[idx][field] = buildFact(value, m.services[idx][field]); + } + } else if (segments[0] === "markets" && segments[1] && segments[2] != null) { + const sub = segments[1] as "primary" | "verticals" | "geographies"; + const idx = Number(segments[2]); + const arr = m.markets[sub]; + if (arr?.[idx]) { + arr[idx] = buildFact(value, arr[idx]); + } + } + m.updated_at = now; + return { ...prev, metadata: m }; + }); + try { + const res = await fetch("/api/company/metadata", { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ path, value }), + }); + if (!res.ok) throw new Error("Save failed"); + await fetchMetadata(); + } catch (err) { + if (previousDataRef.current !== null) { + setData(previousDataRef.current); + } + throw err; + } + }, [data, fetchMetadata]); + + const handleExportJson = useCallback(() => { + if (!data?.metadata) return; + const json = JSON.stringify(data.metadata, null, 2); + const blob = new Blob([json], { type: "application/json" }); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = `company-metadata-${new Date().toISOString().split("T")[0]}.json`; + a.click(); + URL.revokeObjectURL(url); + }, [data]); + const runExtraction = useCallback(async () => { setExtracting(true); setError(null); @@ -182,6 +263,15 @@ export function CompanyMetadataPanel() {
+ +
- + - {metadata.people.length > 0 && } + {metadata.people.length > 0 && ( + + )} - {metadata.services.length > 0 && } + {metadata.services.length > 0 && ( + + )} {(metadata.markets.primary?.length ?? 0) > 0 || (metadata.markets.geographies?.length ?? 0) > 0 ? ( - + ) : null} + + )} diff --git a/src/app/employer/metadata/components/CompanyInfoCard.tsx b/src/app/employer/metadata/components/CompanyInfoCard.tsx index 1ba61bec..d3f3157c 100644 --- a/src/app/employer/metadata/components/CompanyInfoCard.tsx +++ b/src/app/employer/metadata/components/CompanyInfoCard.tsx @@ -1,6 +1,6 @@ "use client"; -import React from "react"; +import React, { useState, useEffect } from "react"; import { Building2, Globe, @@ -13,12 +13,15 @@ import { import { Card, CardHeader, CardTitle, CardContent } from "~/app/employer/documents/components/ui/card"; import { ConfidenceBadge } from "./ConfidenceBadge"; import { VisibilityBadge } from "./VisibilityBadge"; +import { PriorityBadge } from "./PriorityBadge"; import type { CompanyInfo, MetadataFact } from "~/lib/tools/company-metadata/types"; type AnyMetadataFact = MetadataFact | MetadataFact | MetadataFact; interface CompanyInfoCardProps { company: CompanyInfo; + isEditMode?: boolean; + onFieldSave?: (field: string, value: string) => Promise; } interface FieldDisplayProps { @@ -26,12 +29,97 @@ interface FieldDisplayProps { fact: AnyMetadataFact | undefined; icon: React.ComponentType<{ className?: string }>; isLink?: boolean; + fieldKey: string; + isEditMode?: boolean; + onFieldSave?: (field: string, value: string) => Promise; } -function FieldDisplay({ label, fact, icon: Icon, isLink }: FieldDisplayProps) { - if (!fact) return null; +/** Inline editor strip: input + Save/Reset buttons. Used inside both FieldDisplay and the description section. */ +function InlineEditor({ + fieldKey, + initialValue, + multiline, + onFieldSave, + onReset, +}: { + fieldKey: string; + initialValue: string; + multiline?: boolean; + onFieldSave: (field: string, value: string) => Promise; + onReset: () => void; +}) { + const [value, setValue] = useState(initialValue); + const [saving, setSaving] = useState(false); + const [localError, setLocalError] = useState(null); - const value = String(fact.value); + // Keep value in sync if parent resets (isEditMode toggled off → on) + useEffect(() => { + setValue(initialValue); + setLocalError(null); + }, [initialValue]); + + const handleSave = async () => { + if (value.trim() === initialValue) return; + setSaving(true); + setLocalError(null); + try { + await onFieldSave(fieldKey, value.trim()); + } catch { + setLocalError("Failed to save. Try again."); + } finally { + setSaving(false); + } + }; + + return ( +
+ {multiline ? ( +