From 4bacf3a5bf85f2ca8d4be45966890710edfdf934 Mon Sep 17 00:00:00 2001 From: Timothy Lin <55767165+Deodat-Lawson@users.noreply.github.com> Date: Tue, 17 Feb 2026 19:43:16 -0500 Subject: [PATCH] fixed chat button and loading option --- __tests__/api/agent/references.test.ts | 58 +++ .../api/fetchDocument/fetchDocument.test.ts | 25 +- next.config.ts | 9 +- package.json | 2 + pnpm-lock.yaml | 73 ++++ .../agents/documentQ&A/AIChat/query/route.ts | 10 +- .../api/agents/documentQ&A/AIQuery/route.ts | 3 +- .../api/agents/documentQ&A/services/index.ts | 3 + .../agents/documentQ&A/services/references.ts | 134 +++++++ .../api/agents/documentQ&A/services/types.ts | 30 ++ .../components/AgentChatInterface.tsx | 285 ++++++++----- .../documents/components/PdfPageViewer.tsx | 125 ++++++ .../documents/components/SimpleQueryPanel.tsx | 5 +- src/app/employer/documents/hooks/useAIChat.ts | 4 + .../employer/documents/hooks/useAIChatbot.ts | 2 + src/lib/ocr/adapters/azureAdapter.ts | 20 +- src/lib/ocr/adapters/landingAdapter.ts | 374 +++++------------- src/lib/ocr/processor.ts | 98 ++++- .../tools/rag/retrievers/bm25-retriever.ts | 12 +- 19 files changed, 824 insertions(+), 448 deletions(-) create mode 100644 __tests__/api/agent/references.test.ts create mode 100644 src/app/api/agents/documentQ&A/services/references.ts create mode 100644 src/app/employer/documents/components/PdfPageViewer.tsx diff --git a/__tests__/api/agent/references.test.ts b/__tests__/api/agent/references.test.ts new file mode 100644 index 00000000..c3d707b9 --- /dev/null +++ b/__tests__/api/agent/references.test.ts @@ -0,0 +1,58 @@ +import { buildReferences, extractRecommendedPages } from "~/app/api/agents/documentQ&A/services/references"; +import type { SearchResult } from "~/lib/tools/rag"; + +describe("references service", () => { + it("extracts sorted unique recommended pages and ignores invalid values", () => { + const docs: SearchResult[] = [ + { pageContent: "A", metadata: { searchScope: "document", page: 3 } }, + { pageContent: "B", metadata: { searchScope: "document", page: 1 } }, + { pageContent: "C", metadata: { searchScope: "document", page: 3 } }, + { pageContent: "D", metadata: { searchScope: "document", page: 0 } }, + { pageContent: "E", metadata: { searchScope: "document" } }, + ]; + + expect(extractRecommendedPages(docs)).toEqual([1, 3]); + }); + + it("builds query-aware snippets using childContent when available", () => { + const docs: SearchResult[] = [ + { + pageContent: "General parent content that does not include the key phrase.", + metadata: { + searchScope: "document", + chunkId: 11, + page: 4, + documentId: 99, + documentTitle: "Policy Handbook", + source: "vector_ann", + childContent: "Vacation policy states employees receive 15 days paid time off annually.", + } as SearchResult["metadata"] & { childContent: string }, + }, + ]; + + const refs = buildReferences("How many vacation days do employees receive?", docs); + expect(refs).toHaveLength(1); + expect(refs[0]?.page).toBe(4); + expect(refs[0]?.documentTitle).toBe("Policy Handbook"); + expect(refs[0]?.snippet.toLowerCase()).toContain("vacation"); + }); + + it("falls back to prefix snippet and omits invalid page numbers", () => { + const docs: SearchResult[] = [ + { + pageContent: + "This section explains onboarding procedures and account setup for new hires in detail.", + metadata: { + searchScope: "document", + page: 0, + documentId: 10, + }, + }, + ]; + + const refs = buildReferences("What is covered here?", docs); + expect(refs).toHaveLength(1); + expect(refs[0]?.page).toBeUndefined(); + expect(refs[0]?.snippet.length).toBeGreaterThan(20); + }); +}); diff --git a/__tests__/api/fetchDocument/fetchDocument.test.ts b/__tests__/api/fetchDocument/fetchDocument.test.ts index fc77c9f5..86afa568 100644 --- a/__tests__/api/fetchDocument/fetchDocument.test.ts +++ b/__tests__/api/fetchDocument/fetchDocument.test.ts @@ -1,7 +1,7 @@ import { POST } from "~/app/api/fetchDocument/route"; import { auth } from "@clerk/nextjs/server"; import { validateRequestBody } from "~/lib/validation"; -import { db } from "~/server/db/index"; +import { dbCore } from "~/server/db/core"; jest.mock("@clerk/nextjs/server", () => ({ auth: jest.fn(), @@ -11,8 +11,9 @@ jest.mock("~/lib/validation", () => ({ validateRequestBody: jest.fn(), })); -jest.mock("~/server/db/index", () => ({ - db: { +// Route uses dbCore from core, not db from index +jest.mock("~/server/db/core", () => ({ + dbCore: { select: jest.fn(), }, })); @@ -53,7 +54,7 @@ describe("POST /api/fetchDocument", () => { }), }); - (db.select as jest.Mock) = mockSelect; + (dbCore.select as jest.Mock) = mockSelect; const request = new Request("http://localhost/api/fetchDocument", { method: "POST", @@ -91,7 +92,7 @@ describe("POST /api/fetchDocument", () => { }), }); - (db.select as jest.Mock) = mockSelect; + (dbCore.select as jest.Mock) = mockSelect; const request = new Request("http://localhost/api/fetchDocument", { method: "POST", @@ -121,7 +122,7 @@ describe("POST /api/fetchDocument", () => { where: jest.fn().mockResolvedValue([]), }), }); - (db.select as jest.Mock) = mockSelect; + (dbCore.select as jest.Mock) = mockSelect; const request = new Request("http://localhost/api/fetchDocument", { method: "POST", @@ -199,7 +200,7 @@ describe("POST /api/fetchDocument", () => { where: jest.fn().mockRejectedValue(new Error("Database connection failed")), }), }); - (db.select as jest.Mock) = mockSelect; + (dbCore.select as jest.Mock) = mockSelect; const request = new Request("http://localhost/api/fetchDocument", { method: "POST", @@ -245,7 +246,7 @@ describe("POST /api/fetchDocument", () => { }), }); - (db.select as jest.Mock) = mockSelect; + (dbCore.select as jest.Mock) = mockSelect; const request = new Request("http://localhost/api/fetchDocument", { method: "POST", @@ -277,7 +278,7 @@ describe("POST /api/fetchDocument", () => { where: jest.fn().mockResolvedValue([]), }), }); - (db.select as jest.Mock) = mockSelect; + (dbCore.select as jest.Mock) = mockSelect; const request = new Request("http://localhost/api/fetchDocument", { method: "POST", @@ -320,7 +321,7 @@ describe("POST /api/fetchDocument", () => { }), }); - (db.select as jest.Mock) = mockSelect; + (dbCore.select as jest.Mock) = mockSelect; const request = new Request("http://localhost/api/fetchDocument", { method: "POST", @@ -365,7 +366,7 @@ describe("POST /api/fetchDocument", () => { }), }); - (db.select as jest.Mock) = mockSelect; + (dbCore.select as jest.Mock) = mockSelect; const request = new Request("http://localhost/api/fetchDocument", { method: "POST", @@ -409,7 +410,7 @@ describe("POST /api/fetchDocument", () => { }), }); - (db.select as jest.Mock) = mockSelect; + (dbCore.select as jest.Mock) = mockSelect; const request = new Request("http://localhost/api/fetchDocument", { method: "POST", diff --git a/next.config.ts b/next.config.ts index 592e84fe..c17eca7b 100644 --- a/next.config.ts +++ b/next.config.ts @@ -65,9 +65,8 @@ const config: NextConfig = { // Exclude HuggingFace heavy files since we lazy load "node_modules/.pnpm/@huggingface+transformers@*/node_modules/@huggingface/transformers/dist/**/*.wasm", "node_modules/.pnpm/@huggingface+transformers@*/node_modules/@huggingface/transformers/models/**", - // Exclude pdfjs heavy files since we lazy load + // Exclude pdfjs heavy files since we lazy load (keep legacy build - used for Node.js/Inngest) "node_modules/.pnpm/pdfjs-dist@*/node_modules/pdfjs-dist/build/**/*.map", - "node_modules/.pnpm/pdfjs-dist@*/node_modules/pdfjs-dist/legacy/**", // Exclude pdf-parse test data (8MB) "node_modules/.pnpm/pdf-parse@*/node_modules/pdf-parse/test/**", "node_modules/pdf-parse/test/**", @@ -79,16 +78,12 @@ const config: NextConfig = { serverExternalPackages: [ "@huggingface/transformers", "pdf2pic", - "pdfjs-dist", + "pdfjs-serverless", "onnxruntime-web", - "onnxruntime-node", "sharp", "@img/sharp-libvips-linuxmusl-x64", "@img/sharp-libvips-linux-x64", - "pdf-parse", "pdf-lib", - "canvas", - "@napi-rs/canvas", ], }; diff --git a/package.json b/package.json index 042bee74..dbd6cf6a 100644 --- a/package.json +++ b/package.json @@ -99,6 +99,7 @@ "pdf-parse": "^1.1.1", "pdf2pic": "^3.2.0", "pdfjs-dist": "^5.4.530", + "pdfjs-serverless": "^1.1.0", "postgres": "^3.4.7", "prom-client": "^15.1.3", "re-resizable": "^6.11.2", @@ -107,6 +108,7 @@ "react-dom": "^18.3.1", "react-hook-form": "^7.55.0", "react-markdown": "^9.0.0", + "react-pdf": "^10.3.0", "react-resizable-panels": "^2.1.7", "recharts": "^2.15.2", "rehype-katex": "^7.0.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ac2e3afb..71471827 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -236,6 +236,9 @@ importers: pdfjs-dist: specifier: ^5.4.530 version: 5.4.530 + pdfjs-serverless: + specifier: ^1.1.0 + version: 1.1.0 postgres: specifier: ^3.4.7 version: 3.4.7 @@ -260,6 +263,9 @@ importers: react-markdown: specifier: ^9.0.0 version: 9.1.0(@types/react@19.1.12)(react@18.3.1) + react-pdf: + specifier: ^10.3.0 + version: 10.3.0(@types/react@19.1.12)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) react-resizable-panels: specifier: ^2.1.7 version: 2.1.9(react-dom@18.3.1(react@18.3.1))(react@18.3.1) @@ -6354,10 +6360,16 @@ packages: resolution: {integrity: sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==} hasBin: true + make-cancellable-promise@2.0.0: + resolution: {integrity: sha512-3SEQqTpV9oqVsIWqAcmDuaNeo7yBO3tqPtqGRcKkEo0lrzD3wqbKG9mkxO65KoOgXqj+zH2phJ2LiAsdzlogSw==} + make-dir@4.0.0: resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} engines: {node: '>=10'} + make-event-props@2.0.0: + resolution: {integrity: sha512-G/hncXrl4Qt7mauJEXSg3AcdYzmpkIITTNl5I+rH9sog5Yw0kK6vseJjCaPfOXqOqQuPUP89Rkhfz5kPS8ijtw==} + makeerror@1.0.12: resolution: {integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==} @@ -6425,6 +6437,14 @@ packages: mdast-util-to-string@4.0.0: resolution: {integrity: sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==} + merge-refs@2.0.0: + resolution: {integrity: sha512-3+B21mYK2IqUWnd2EivABLT7ueDhb0b8/dGK8LoFQPrU61YITeCMn14F7y7qZafWNZhUEKb24cJdiT5Wxs3prg==} + peerDependencies: + '@types/react': ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + peerDependenciesMeta: + '@types/react': + optional: true + merge-stream@2.0.0: resolution: {integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==} @@ -6915,10 +6935,17 @@ packages: resolution: {integrity: sha512-p0bp+Mp4iJy2hqSCLvJ521rDaZkzBvDFT9O9Y0BUID3I04/eDaebAFM5t8hoWeo2BCf42cDijLCGJWTOtkJVpA==} engines: {node: '>=14'} + pdfjs-dist@5.4.296: + resolution: {integrity: sha512-DlOzet0HO7OEnmUmB6wWGJrrdvbyJKftI1bhMitK7O2N8W2gc757yyYBbINy9IDafXAV9wmKr9t7xsTaNKRG5Q==} + engines: {node: '>=20.16.0 || >=22.3.0'} + pdfjs-dist@5.4.530: resolution: {integrity: sha512-r1hWsSIGGmyYUAHR26zSXkxYWLXLMd6AwqcaFYG9YUZ0GBf5GvcjJSeo512tabM4GYFhxhl5pMCmPr7Q72Rq2Q==} engines: {node: '>=20.16.0 || >=22.3.0'} + pdfjs-serverless@1.1.0: + resolution: {integrity: sha512-4JNsyupufenSUfFpMxCsrkfzeskWFS5J8ksXPeSnaaEug9h0bbBNuI+WqEpUGHMLGrORakMOvnc9mN9rqF7iqA==} + peek-readable@4.1.0: resolution: {integrity: sha512-ZI3LnwUv5nOGbQzD9c2iDG6toheuXSZP5esSHBjopsXH4dg19soufvpUGA3uohi5anFtGb2lhAVdHzH6R/Evvg==} engines: {node: '>=8'} @@ -7202,6 +7229,16 @@ packages: '@types/react': '>=18' react: '>=18' + react-pdf@10.3.0: + resolution: {integrity: sha512-2LQzC9IgNVAX8gM+6F+1t/70a9/5RWThYxc+CWAmT2LW/BRmnj+35x1os5j/nR2oldyf8L+hCAMBmVKU8wrYFA==} + peerDependencies: + '@types/react': ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + react-dom: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + peerDependenciesMeta: + '@types/react': + optional: true + react-remove-scroll-bar@2.3.8: resolution: {integrity: sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==} engines: {node: '>=10'} @@ -8033,6 +8070,9 @@ packages: walker@1.0.8: resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==} + warning@4.0.3: + resolution: {integrity: sha512-rpJyN222KWIvHJ/F53XSZv0Zl/accqHR8et1kpaMTD/fLCRxtV8iX8czMzY7sVZupTI3zcUTg8eycS2kNF9l6w==} + wasm-feature-detect@1.8.0: resolution: {integrity: sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==} @@ -14558,10 +14598,14 @@ snapshots: lz-string@1.5.0: {} + make-cancellable-promise@2.0.0: {} + make-dir@4.0.0: dependencies: semver: 7.7.3 + make-event-props@2.0.0: {} + makeerror@1.0.12: dependencies: tmpl: 1.0.5 @@ -14752,6 +14796,10 @@ snapshots: dependencies: '@types/mdast': 4.0.4 + merge-refs@2.0.0(@types/react@19.1.12): + optionalDependencies: + '@types/react': 19.1.12 + merge-stream@2.0.0: {} merge2@1.4.1: {} @@ -15366,10 +15414,16 @@ snapshots: transitivePeerDependencies: - supports-color + pdfjs-dist@5.4.296: + optionalDependencies: + '@napi-rs/canvas': 0.1.88 + pdfjs-dist@5.4.530: optionalDependencies: '@napi-rs/canvas': 0.1.88 + pdfjs-serverless@1.1.0: {} + peek-readable@4.1.0: {} pg-int8@1.0.1: {} @@ -15592,6 +15646,21 @@ snapshots: transitivePeerDependencies: - supports-color + react-pdf@10.3.0(@types/react@19.1.12)(react-dom@18.3.1(react@18.3.1))(react@18.3.1): + dependencies: + clsx: 2.1.1 + dequal: 2.0.3 + make-cancellable-promise: 2.0.0 + make-event-props: 2.0.0 + merge-refs: 2.0.0(@types/react@19.1.12) + pdfjs-dist: 5.4.296 + react: 18.3.1 + react-dom: 18.3.1(react@18.3.1) + tiny-invariant: 1.3.3 + warning: 4.0.3 + optionalDependencies: + '@types/react': 19.1.12 + react-remove-scroll-bar@2.3.8(@types/react@19.1.12)(react@18.3.1): dependencies: react: 18.3.1 @@ -16643,6 +16712,10 @@ snapshots: dependencies: makeerror: 1.0.12 + warning@4.0.3: + dependencies: + loose-envify: 1.4.0 + wasm-feature-detect@1.8.0: {} weaviate-client@3.8.1: diff --git a/src/app/api/agents/documentQ&A/AIChat/query/route.ts b/src/app/api/agents/documentQ&A/AIChat/query/route.ts index c14c9263..be45e54f 100644 --- a/src/app/api/agents/documentQ&A/AIChat/query/route.ts +++ b/src/app/api/agents/documentQ&A/AIChat/query/route.ts @@ -23,6 +23,8 @@ import { getWebSearchInstruction, getChatModel, getEmbeddings, + buildReferences, + extractRecommendedPages, } from "../../services"; import type { AIModelType } from "../../services"; import type { SYSTEM_PROMPTS } from "../../services/prompts"; @@ -316,6 +318,9 @@ export async function POST(request: Request) { console.log(`✅ [AIChat] Built context with pages: ${documents.map(doc => doc.metadata?.page).join(', ')}`); + // Build references for document highlights and page navigation + const references = buildReferences(question, documents, 5); + // Perform comprehensive web search if enabled const documentContext = documents.length > 0 ? documents.map(doc => doc.pageContent).join('\n\n') @@ -374,7 +379,7 @@ export async function POST(request: Request) { documentTitle: doc.title, question: question, response: summarizedAnswer, - pages: documents.map(doc => doc.metadata?.page).filter((page): page is number => page !== undefined), + pages: extractRecommendedPages(documents), queryType: "simple" }); } @@ -389,7 +394,8 @@ export async function POST(request: Request) { return NextResponse.json({ success: true, summarizedAnswer, - recommendedPages: documents.map(doc => doc.metadata?.page).filter((page): page is number => page !== undefined), + recommendedPages: extractRecommendedPages(documents), + references: references.length > 0 ? references : undefined, retrievalMethod, processingTimeMs: totalTime, chunksAnalyzed: documents.length, diff --git a/src/app/api/agents/documentQ&A/AIQuery/route.ts b/src/app/api/agents/documentQ&A/AIQuery/route.ts index 5c9420a7..9560b468 100644 --- a/src/app/api/agents/documentQ&A/AIQuery/route.ts +++ b/src/app/api/agents/documentQ&A/AIQuery/route.ts @@ -22,6 +22,7 @@ import { getWebSearchInstruction, getChatModel, getEmbeddings, + extractRecommendedPages, } from "../services"; import type { AIModelType } from "../services"; import type { SYSTEM_PROMPTS } from "../services/prompts"; @@ -281,7 +282,7 @@ export async function POST(request: Request) { return NextResponse.json({ success: true, summarizedAnswer, - recommendedPages: documents.map(doc => doc.metadata?.page).filter((page): page is number => page !== undefined), + recommendedPages: extractRecommendedPages(documents), retrievalMethod, processingTimeMs: totalTime, chunksAnalyzed: documents.length, diff --git a/src/app/api/agents/documentQ&A/services/index.ts b/src/app/api/agents/documentQ&A/services/index.ts index 0fc7f66d..c5026ecb 100644 --- a/src/app/api/agents/documentQ&A/services/index.ts +++ b/src/app/api/agents/documentQ&A/services/index.ts @@ -12,6 +12,7 @@ // Functions export { normalizeModelContent } from "./normalizeModelContent"; export { performWebSearch } from "./webSearch"; +export { buildReferences, extractRecommendedPages } from "./references"; export { performTavilySearch } from "./tavilySearch"; export { executeWebSearchAgent } from "./webSearchAgent"; export { SYSTEM_PROMPTS, getSystemPrompt, getWebSearchInstruction } from "./prompts"; @@ -34,6 +35,8 @@ export type { AIModelType, // Response Style Types ResponseStyle, + // Source Reference Types + SourceReference, // Web Search Types WebSearchResult, WebSearchAgentInput, diff --git a/src/app/api/agents/documentQ&A/services/references.ts b/src/app/api/agents/documentQ&A/services/references.ts new file mode 100644 index 00000000..0b9af527 --- /dev/null +++ b/src/app/api/agents/documentQ&A/services/references.ts @@ -0,0 +1,134 @@ +import type { SearchResult } from "~/lib/tools/rag"; +import type { SourceReference } from "./types"; + +const STOPWORDS = new Set([ + "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", + "from", "how", "i", "in", "is", "it", "of", "on", "or", "that", + "the", "their", "this", "to", "was", "we", "what", "when", "where", + "which", "who", "why", "with", "you", "your", +]); + +function normalizeWhitespace(text: string): string { + return text.replace(/\s+/g, " ").trim(); +} + +function getQuestionKeywords(question: string): string[] { + const words = question + .toLowerCase() + .split(/[^a-z0-9]+/) + .filter((word) => word.length > 2 && !STOPWORDS.has(word)); + + return Array.from(new Set(words)).slice(0, 12); +} + +function getPageValue(value: unknown): number | undefined { + return typeof value === "number" && Number.isInteger(value) && value > 0 + ? value + : undefined; +} + +function extractSnippet(text: string, question: string): { + snippet: string; + matchText?: string; + matchStart?: number; + matchEnd?: number; + confidence: number; +} { + const normalizedText = normalizeWhitespace(text); + if (!normalizedText) { + return { snippet: "", confidence: 0 }; + } + + const maxSnippetLength = 240; + const keywords = getQuestionKeywords(question).sort((a, b) => b.length - a.length); + const haystack = normalizedText.toLowerCase(); + + let bestIndex = -1; + for (const keyword of keywords) { + const idx = haystack.indexOf(keyword); + if (idx >= 0) { + bestIndex = idx; + break; + } + } + + if (bestIndex < 0) { + const snippet = normalizedText.slice(0, maxSnippetLength).trimEnd(); + return { snippet, confidence: 0.25 }; + } + + const left = Math.max(0, bestIndex - 110); + const right = Math.min(normalizedText.length, left + maxSnippetLength); + const rawSnippet = normalizedText.slice(left, right).trim(); + + const prefix = left > 0 ? "... " : ""; + const suffix = right < normalizedText.length ? " ..." : ""; + const bestKeyword = keywords.find((keyword) => haystack.indexOf(keyword) === bestIndex); + return { + snippet: `${prefix}${rawSnippet}${suffix}`, + matchText: bestKeyword, + matchStart: bestIndex, + matchEnd: bestKeyword ? bestIndex + bestKeyword.length : undefined, + confidence: 0.8, + }; +} + +export function extractRecommendedPages(documents: SearchResult[]): number[] { + const pages = documents + .map((doc) => getPageValue(doc.metadata?.page)) + .filter((page): page is number => page !== undefined); + + if (pages.length > 1 && pages.every((page) => page === 1)) { + // Legacy fallback data often pins everything to page 1; hide misleading values. + return []; + } + + return Array.from(new Set(pages)).sort((a, b) => a - b); +} + +export function buildReferences( + question: string, + documents: SearchResult[], + maxReferences = 5 +): SourceReference[] { + const dedup = new Set(); + const references: SourceReference[] = []; + + for (const doc of documents) { + if (references.length >= maxReferences) { + break; + } + + const metadata = (doc.metadata ?? {}) as unknown as Record; + const childContent = typeof metadata.childContent === "string" ? metadata.childContent : ""; + const snippetResult = extractSnippet(childContent || doc.pageContent, question); + if (!snippetResult.snippet) { + continue; + } + + const page = getPageValue(metadata.page); + const reference: SourceReference = { + page, + snippet: snippetResult.snippet, + matchText: snippetResult.matchText, + matchStart: snippetResult.matchStart, + matchEnd: snippetResult.matchEnd, + confidence: snippetResult.confidence, + documentId: typeof metadata.documentId === "number" ? metadata.documentId : undefined, + documentTitle: typeof metadata.documentTitle === "string" ? metadata.documentTitle : undefined, + chunkId: typeof metadata.chunkId === "number" ? metadata.chunkId : undefined, + source: typeof metadata.source === "string" ? metadata.source : undefined, + }; + + const docId = reference.documentId ?? "unknown"; + const pageVal = reference.page ?? "unknown"; + const dedupKey = `${docId}|${pageVal}|${reference.snippet}`; + if (dedup.has(dedupKey)) { + continue; + } + dedup.add(dedupKey); + references.push(reference); + } + + return references; +} diff --git a/src/app/api/agents/documentQ&A/services/types.ts b/src/app/api/agents/documentQ&A/services/types.ts index b577c33b..e35b718c 100644 --- a/src/app/api/agents/documentQ&A/services/types.ts +++ b/src/app/api/agents/documentQ&A/services/types.ts @@ -147,6 +147,36 @@ export interface WebSearchInstructionParams { reasoning?: string; } +// ============================================================================ +// Source Reference Types +// ============================================================================ + +/** + * A reference to a source excerpt (e.g. from RAG search) for citations and highlighting + */ +export interface SourceReference { + /** Page number (1-based) if applicable */ + page?: number; + /** Snippet of text from the source */ + snippet: string; + /** Matched keyword/phrase for highlighting */ + matchText?: string; + /** Start index of match in snippet */ + matchStart?: number; + /** End index of match in snippet */ + matchEnd?: number; + /** Confidence score 0–1 */ + confidence?: number; + /** Document ID if from a stored document */ + documentId?: number; + /** Document title for display */ + documentTitle?: string; + /** Chunk ID within document */ + chunkId?: number; + /** Source identifier (e.g. URL) */ + source?: string; +} + // ============================================================================ // Search Scope Types // ============================================================================ diff --git a/src/app/employer/documents/components/AgentChatInterface.tsx b/src/app/employer/documents/components/AgentChatInterface.tsx index 2a75b12d..30114bea 100644 --- a/src/app/employer/documents/components/AgentChatInterface.tsx +++ b/src/app/employer/documents/components/AgentChatInterface.tsx @@ -16,7 +16,7 @@ import { X } from 'lucide-react'; import { useAIChatbot, type Message } from '../hooks/useAIChatbot'; -import { useAIChat } from '../hooks/useAIChat'; +import { useAIChat, type SourceReference } from '../hooks/useAIChat'; import { cn } from '~/lib/utils'; const MarkdownMessage = dynamic( @@ -39,6 +39,7 @@ interface AgentChatInterfaceProps { aiStyle?: string; aiPersona?: string; onPageClick?: (page: number) => void; + onReferencesResolved?: (references: SourceReference[]) => void; onCreateChat?: () => Promise; } @@ -53,6 +54,7 @@ export const AgentChatInterface: React.FC = ({ aiStyle = 'concise', aiPersona = 'general', onPageClick, + onReferencesResolved, onCreateChat, }) => { const { getMessages, sendMessage, voteMessage, error } = useAIChatbot(); @@ -72,6 +74,25 @@ export const AgentChatInterface: React.FC = ({ const loadAndCheckWelcome = async () => { const msgs = await getMessages(chatId); setMessages(msgs); + const latestReferencedMessage = [...msgs] + .reverse() + .find((msg) => + msg.role === "assistant" && + typeof msg.content === "object" && + msg.content !== null && + "references" in msg.content && + Array.isArray(msg.content.references) && + msg.content.references.length > 0 + ); + if ( + latestReferencedMessage && + typeof latestReferencedMessage.content === "object" && + latestReferencedMessage.content !== null && + "references" in latestReferencedMessage.content && + Array.isArray(latestReferencedMessage.content.references) + ) { + onReferencesResolved?.(latestReferencedMessage.content.references); + } if (msgs.length === 0 && aiPersona === 'learning-coach') { sendMessage({ @@ -203,14 +224,28 @@ export const AgentChatInterface: React.FC = ({ } const aiAnswer = aiData.summarizedAnswer ?? "I'm sorry, I couldn't generate a response right now. Could you try rephrasing your question?"; + const references = aiData.references ?? []; const pages = aiData.recommendedPages ?? []; const webSources = aiData.webSources ?? []; + if (references.length > 0) { + onReferencesResolved?.(references); + const firstPage = references[0]?.page; + if (typeof firstPage === "number") { + onPageClick?.(firstPage); + } + } else if (pages.length > 0) { + const firstFallbackPage = pages[0]; + if (typeof firstFallbackPage === "number") { + onPageClick?.(firstFallbackPage); + } + } const aiResponse = await sendMessage({ chatId: activeChatId, role: 'assistant', content: { text: aiAnswer, + references: references.length > 0 ? references : undefined, pages: pages, webSources: webSources.length > 0 ? webSources : undefined }, @@ -334,8 +369,34 @@ export const AgentChatInterface: React.FC = ({ {msg.role === 'assistant' && typeof msg.content === 'object' && msg.content !== null && ( <> - {/* Page References */} - {'pages' in msg.content && Array.isArray(msg.content.pages) && msg.content.pages.length > 0 && ( + {/* Source References */} + {'references' in msg.content && Array.isArray(msg.content.references) && msg.content.references.length > 0 && ( +
+

+ Page References +

+
+ {msg.content.references.map((reference: SourceReference, idx: number) => ( + + ))} +
+
+ )} + + {/* Legacy page references fallback */} + {(!('references' in msg.content) || !Array.isArray(msg.content.references) || msg.content.references.length === 0) && + 'pages' in msg.content && Array.isArray(msg.content.pages) && msg.content.pages.length > 0 && (

Referenced Pages @@ -459,122 +520,136 @@ export const AgentChatInterface: React.FC = ({

)} -
- {/* Tools Button */} -
- - - {/* Tools Menu */} - {showToolsMenu && ( -
-
- Tools - -
+ +
+
+ {/* Tools Button */} +
+ + {/* Tools Menu */} + {showToolsMenu && ( +
+
+ Tools + +
+ +
+ )}
- )} -
- {/* Input Field */} -
- {enableWebSearch && ( -
- - Web search on + {/* Input Field */} +
+ {enableWebSearch && ( +
+ + Web search on +
+ )} +