From 60b23cf1145c728811cdb1f94d93f81d29521adc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 12:57:41 +0000 Subject: [PATCH 1/3] Initial plan From 2090b397502848305cb0d3fd404a7126c070918f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 13:03:01 +0000 Subject: [PATCH 2/3] Add Cloudflare Workers AI fallback when Gemini is rate limited Co-authored-by: harshithpabbati <43822585+harshithpabbati@users.noreply.github.com> --- README.md | 2 + app/api/webhooks/reply/route.ts | 136 ++++++++++++++++++++++++++------ env.example | 2 + 3 files changed, 114 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index a0e12c3..20c7563 100644 --- a/README.md +++ b/README.md @@ -141,3 +141,5 @@ GRANT ALL ON TABLE "public"."reply_edit" TO "service_role"; | `NEXT_PUBLIC_BASE_URL` | Base URL of your deployment (e.g. `https://answerify.dev`) | | `RESEND_API_KEY` | Resend API key for sending emails | | `GEMINI_API_KEY` | Google Gemini API key for embeddings (`gemini-embedding-001`) and completions (`gemini-3-flash-preview`) | +| `CLOUDFLARE_ACCOUNT_ID` | *(Optional)* Cloudflare account ID – used as a fallback AI provider when Gemini is unavailable (e.g. rate limited) | +| `CLOUDFLARE_API_TOKEN` | *(Optional)* Cloudflare API token with Workers AI permission – required alongside `CLOUDFLARE_ACCOUNT_ID` for the fallback to activate | diff --git a/app/api/webhooks/reply/route.ts b/app/api/webhooks/reply/route.ts index 7ef01ab..a2c11c7 100644 --- a/app/api/webhooks/reply/route.ts +++ b/app/api/webhooks/reply/route.ts @@ -11,6 +11,50 @@ function getGenAIClient() { return new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY! }); } +const CLOUDFLARE_MODEL = '@cf/meta/llama-3.1-8b-instruct'; + +/** + * Call Cloudflare Workers AI via the REST API. + * Used as a fallback when Gemini is unavailable (e.g. rate limited). + */ +async function runCloudflareAgent(systemPrompt: string, userPrompt: string): Promise { + const accountId = process.env.CLOUDFLARE_ACCOUNT_ID; + const apiToken = process.env.CLOUDFLARE_API_TOKEN; + + if (!accountId || !apiToken) { + throw new Error('Cloudflare credentials not configured'); + } + + const response = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${accountId}/ai/run/${CLOUDFLARE_MODEL}`, + { + method: 'POST', + headers: { + Authorization: `Bearer ${apiToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, + ], + max_tokens: 1024, + }), + }, + ); + + if (!response.ok) { + throw new Error(`Cloudflare AI request failed: ${response.status}`); + } + + const data = await response.json(); + const text = (data as { result?: { response?: string } }).result?.response ?? ''; + if (!text) { + console.warn('Cloudflare AI returned an empty response'); + } + return text; +} + /** * Derive a 0–1 confidence score from Gemini grounding metadata. * Falls back to URL_CONTEXT_FALLBACK_CONFIDENCE when the URL context tool @@ -99,7 +143,7 @@ async function runResearchAgent( subject: string, question: string, urlList: string, -): Promise<{ findings: string; candidates: any[] | undefined }> { +): Promise<{ findings: string; candidates: any[] | undefined; usedFallback?: boolean }> { const researchPrompt = codeBlock` You are a research assistant for a customer support team. Your job is to find and extract the most relevant information from the provided URLs @@ -113,18 +157,43 @@ async function runResearchAgent( - If no relevant information can be found, respond with only: NO_INFORMATION `; - const result = await ai.models.generateContent({ - model: 'gemini-2.5-flash', - contents: `Subject: ${subject}\nCustomer question:\n${question}\n\nURLs to search:\n${urlList}`, - config: { - systemInstruction: researchPrompt, - maxOutputTokens: 1024, - temperature: 0.3, - tools: [{ urlContext: {} }], - }, - }); + try { + const result = await ai.models.generateContent({ + model: 'gemini-2.5-flash', + contents: `Subject: ${subject}\nCustomer question:\n${question}\n\nURLs to search:\n${urlList}`, + config: { + systemInstruction: researchPrompt, + maxOutputTokens: 1024, + temperature: 0.3, + tools: [{ urlContext: {} }], + }, + }); + + return { findings: result.text ?? '', candidates: result.candidates }; + } catch (err) { + console.warn('Gemini research agent failed, falling back to Cloudflare AI:', err); + + const cloudflareResearchPrompt = codeBlock` + You are a research assistant for a customer support team. + Your job is to extract the most relevant information to answer a customer's question. + + - Extract only information that is directly relevant to the question + - Organise the findings as concise bullet points or short paragraphs + - Include specific details: steps, values, settings, or policies that apply + - Do not write the final reply – only gather and present the raw facts + - If you cannot find relevant information, respond with only: NO_INFORMATION + `; + + // Note: unlike Gemini's urlContext tool, Cloudflare AI cannot fetch URL + // content. The URLs are listed as context so the model can reference them + // in its answer, but the response is based on the model's training data. + const findings = await runCloudflareAgent( + cloudflareResearchPrompt, + `Subject: ${subject}\nCustomer question:\n${question}\n\nKnowledge base sources:\n${urlList}`, + ); - return { findings: result.text ?? '', candidates: result.candidates }; + return { findings, candidates: undefined, usedFallback: true }; + } } /** @@ -176,17 +245,24 @@ async function runWritingAgent( - Do not output anything outside of the HTML response `; - const result = await ai.models.generateContent({ - model: 'gemini-2.5-flash', - contents: `Subject: ${subject}\nCustomer question:\n${question}\n\nResearch findings:\n${findings}`, - config: { - systemInstruction: writingPrompt, - maxOutputTokens: 1024, - temperature: 0.7, - }, - }); - - return result.text ?? ''; + const userContent = `Subject: ${subject}\nCustomer question:\n${question}\n\nResearch findings:\n${findings}`; + + try { + const result = await ai.models.generateContent({ + model: 'gemini-2.5-flash', + contents: userContent, + config: { + systemInstruction: writingPrompt, + maxOutputTokens: 1024, + temperature: 0.7, + }, + }); + + return result.text ?? ''; + } catch (err) { + console.warn('Gemini writing agent failed, falling back to Cloudflare AI:', err); + return runCloudflareAgent(writingPrompt, userContent); + } } export async function POST(request: Request) { @@ -267,15 +343,23 @@ export async function POST(request: Request) { // Fetch and synthesise relevant information from the datasource URLs. // Confidence is derived from the grounding metadata of this step because it // is the step that actually reads from the knowledge-base URLs. - const { findings, candidates: researchCandidates } = await runResearchAgent( + const { + findings, + candidates: researchCandidates, + usedFallback, + } = await runResearchAgent( ai, thread?.subject ?? '', record.cleaned_body, urlList, ); - const confidence = computeConfidence(researchCandidates, datasources.length); - const citations = extractCitations(researchCandidates); + // When using the Cloudflare fallback, grounding metadata is unavailable so + // we use a fixed conservative confidence score to keep the reply as a draft. + const confidence = usedFallback + ? 0.5 + : computeConfidence(researchCandidates, datasources.length); + const citations = usedFallback ? [] : extractCitations(researchCandidates); if (!findings || findings.trim() === 'NO_INFORMATION') { // Generate clarifying question draft instead of erroring out diff --git a/env.example b/env.example index 179e159..24afaac 100644 --- a/env.example +++ b/env.example @@ -4,3 +4,5 @@ SUPABASE_SERVICE_KEY= NEXT_PUBLIC_BASE_URL=http://localhost:3000 RESEND_API_KEY= GEMINI_API_KEY= +CLOUDFLARE_ACCOUNT_ID= +CLOUDFLARE_API_TOKEN= From 2e37bd35b3e9c458ae6d281ac146f942369c9f7b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 1 Mar 2026 15:34:17 +0000 Subject: [PATCH 3/3] Fetch URL content server-side so Cloudflare fallback reads actual knowledge base pages Co-authored-by: harshithpabbati <43822585+harshithpabbati@users.noreply.github.com> --- README.md | 2 + app/api/webhooks/reply/route.ts | 67 +++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 20c7563..3d5a143 100644 --- a/README.md +++ b/README.md @@ -143,3 +143,5 @@ GRANT ALL ON TABLE "public"."reply_edit" TO "service_role"; | `GEMINI_API_KEY` | Google Gemini API key for embeddings (`gemini-embedding-001`) and completions (`gemini-3-flash-preview`) | | `CLOUDFLARE_ACCOUNT_ID` | *(Optional)* Cloudflare account ID – used as a fallback AI provider when Gemini is unavailable (e.g. rate limited) | | `CLOUDFLARE_API_TOKEN` | *(Optional)* Cloudflare API token with Workers AI permission – required alongside `CLOUDFLARE_ACCOUNT_ID` for the fallback to activate | + +> **How the Cloudflare fallback handles URLs:** when Gemini is unavailable the server fetches up to 5 knowledge-base URLs itself, strips the HTML to plain text, and injects the content directly into the Cloudflare AI prompt. This gives the fallback model the same knowledge-base information that Gemini obtains via its native URL context tool — no special model capability required. diff --git a/app/api/webhooks/reply/route.ts b/app/api/webhooks/reply/route.ts index a2c11c7..b9ec97d 100644 --- a/app/api/webhooks/reply/route.ts +++ b/app/api/webhooks/reply/route.ts @@ -1,5 +1,6 @@ import { GoogleGenAI } from '@google/genai'; import { codeBlock } from 'common-tags'; +import { JSDOM } from 'jsdom'; import { Resend } from 'resend'; import { cleanBody } from '@/lib/cleanBody'; @@ -13,6 +14,51 @@ function getGenAIClient() { const CLOUDFLARE_MODEL = '@cf/meta/llama-3.1-8b-instruct'; +// Maximum number of URLs to fetch when running the Cloudflare fallback. +// Kept intentionally small because we download page content ourselves. +const MAX_FALLBACK_URLS = 5; + +// Maximum plain-text characters to include per fetched URL in the prompt. +const FALLBACK_URL_CONTENT_LENGTH = 4000; + +// Timeout in milliseconds for each URL fetch in the fallback path. +const URL_FETCH_TIMEOUT_MS = 5000; + +/** + * Fetch the plain-text content of a URL for use as AI context. + * + * Any LLM can "read" a knowledge-base URL when the page text is fetched + * server-side and injected directly into the prompt, so this approach works + * for any fallback model — not just Gemini's native urlContext tool. + * + * Returns null when the URL cannot be reached or returns no usable text. + */ +async function fetchUrlContent(url: string): Promise<{ url: string; text: string } | null> { + try { + const response = await fetch(url, { + headers: { 'User-Agent': 'Answerify/1.0 (+https://answerify.dev)' }, + signal: AbortSignal.timeout(URL_FETCH_TIMEOUT_MS), + }); + + if (!response.ok) return null; + + const html = await response.text(); + + // Use JSDOM to safely parse and extract plain text — more robust than + // regex stripping, which can leave residual tag fragments. + const dom = new JSDOM(html); + dom.window.document.querySelectorAll('script, style').forEach((el) => el.remove()); + const text = (dom.window.document.body?.textContent ?? '') + .replace(/\s+/g, ' ') + .trim() + .slice(0, FALLBACK_URL_CONTENT_LENGTH); + + return text ? { url, text } : null; + } catch { + return null; + } +} + /** * Call Cloudflare Workers AI via the REST API. * Used as a fallback when Gemini is unavailable (e.g. rate limited). @@ -184,12 +230,25 @@ async function runResearchAgent( - If you cannot find relevant information, respond with only: NO_INFORMATION `; - // Note: unlike Gemini's urlContext tool, Cloudflare AI cannot fetch URL - // content. The URLs are listed as context so the model can reference them - // in its answer, but the response is based on the model's training data. + // Fetch the actual content of the knowledge-base URLs so the model has + // real page text to work with. Any LLM can support URL-based knowledge + // bases this way — no native URL tool required. + const urls = urlList.split('\n').filter(Boolean).slice(0, MAX_FALLBACK_URLS); + const settledPages = await Promise.allSettled(urls.map(fetchUrlContent)); + const fetchedPages = settledPages + .filter((r): r is PromiseFulfilledResult<{ url: string; text: string }> => + r.status === 'fulfilled' && r.value !== null, + ) + .map((r) => r.value); + + const urlContext = + fetchedPages.length > 0 + ? fetchedPages.map((p) => `[${p.url}]\n${p.text}`).join('\n\n---\n\n') + : urlList; + const findings = await runCloudflareAgent( cloudflareResearchPrompt, - `Subject: ${subject}\nCustomer question:\n${question}\n\nKnowledge base sources:\n${urlList}`, + `Subject: ${subject}\nCustomer question:\n${question}\n\nKnowledge base content:\n${urlContext}`, ); return { findings, candidates: undefined, usedFallback: true };