diff --git a/backend/controllers/ragController.js b/backend/controllers/ragController.js index ac14319..81d3503 100644 --- a/backend/controllers/ragController.js +++ b/backend/controllers/ragController.js @@ -12,7 +12,7 @@ import logger from '../config/logger.js'; * This controller only handles HTTP request/response shaping. */ export const askQuestion = catchAsync(async (req, res) => { - const { question, conversationId, filters, useIntentAware = true, forceIntent } = req.body; + const { question, conversationId, filters, useIntentAware = true, forceIntent, lang } = req.body; if (!conversationId) { return sendError(res, 400, 'conversationId is required'); @@ -29,6 +29,7 @@ export const askQuestion = catchAsync(async (req, res) => { authorizedWorkspaceIds: req.authorizedWorkspaces?.map((w) => w.workspaceId) || [], forceIntent: forceIntent || null, useIntentAware, + lang, }); // B1: honor the workspace canViewSources permission before returning sources. @@ -54,7 +55,7 @@ export const askQuestion = catchAsync(async (req, res) => { * progressively. */ export const askQuestionStream = catchAsync(async (req, res) => { - const { question, conversationId, filters, useIntentAware = true, forceIntent } = req.body; + const { question, conversationId, filters, useIntentAware = true, forceIntent, lang } = req.body; if (!conversationId) { return sendError(res, 400, 'conversationId is required'); @@ -92,6 +93,7 @@ export const askQuestionStream = catchAsync(async (req, res) => { authorizedWorkspaceIds: req.authorizedWorkspaces?.map((w) => w.workspaceId) || [], forceIntent: forceIntent || null, useIntentAware, + lang, onEvent: send, }); } catch (error) { diff --git a/backend/services/rag.js b/backend/services/rag.js index 668d63b..5758e99 100644 --- a/backend/services/rag.js +++ b/backend/services/rag.js @@ -586,7 +586,7 @@ class RAGService { // unseeded compliance_kb) doesn't take down the other. const [vendorResult, regulationResult] = await Promise.allSettled([ this.vectorStore.similaritySearch(searchQuery, 15, qdrantFilter), - this.retrieveRegulationDocs(searchQuery, 5), + this.retrieveRegulationDocs(searchQuery, 5, options.lang), ]); const vendorDocsRaw = vendorResult.status === 'fulfilled' ? vendorResult.value : []; diff --git a/backend/services/rag/complianceKbRetriever.js b/backend/services/rag/complianceKbRetriever.js index 6a27b6e..d188aa3 100644 --- a/backend/services/rag/complianceKbRetriever.js +++ b/backend/services/rag/complianceKbRetriever.js @@ -59,11 +59,33 @@ async function getComplianceKbStore(embeddings = defaultEmbeddings) { return cachedStorePromise; } +// EUR-Lex CELEX identifiers for the regulations in the KB. DORA-RTS spans +// several delegated acts with no single reliable CELEX, so it gets no direct +// link (better no link than a misleading one). +const CELEX_BY_REGULATION = { DORA: '32022R2554' }; + +/** + * Official EUR-Lex source URL for a regulation, in the user's language (#424). + * EUR-Lex serves the verbatim text in all EU languages, so a French user gets a + * link to the official French DORA text. Returns undefined when we have no + * reliable CELEX for that regulation. + */ +export function eurLexUrl(regulation, lang) { + const celex = CELEX_BY_REGULATION[regulation]; + if (!celex) return undefined; + const langCode = String(lang || 'en') + .toLowerCase() + .startsWith('fr') + ? 'FR' + : 'EN'; + return `https://eur-lex.europa.eu/legal-content/${langCode}/TXT/?uri=CELEX:${celex}`; +} + /** * Map a raw compliance_kb document to the shape the RAG context formatter * expects, so citations render as the article number rather than "Untitled". */ -function adaptRegulationDoc(doc) { +function adaptRegulationDoc(doc, lang) { const meta = doc.metadata || {}; const regulation = meta.regulation || 'Regulation'; const article = meta.article || ''; @@ -72,6 +94,8 @@ function adaptRegulationDoc(doc) { const documentTitle = [regulation, article].filter(Boolean).join(' ') + (title ? `: ${title}` : ''); + const url = eurLexUrl(regulation, lang); + return { pageContent: doc.pageContent, metadata: { @@ -80,6 +104,7 @@ function adaptRegulationDoc(doc) { documentTitle: documentTitle || regulation, heading_path: [regulation, article].filter(Boolean), documentType: 'regulation', + ...(url ? { url } : {}), }, }; } @@ -93,7 +118,7 @@ function adaptRegulationDoc(doc) { * @param {number} [k=5] * @returns {Promise>} */ -export async function retrieveRegulationDocs(query, k = 5) { +export async function retrieveRegulationDocs(query, k = 5, lang = 'en') { if (!query || typeof query !== 'string') return []; const store = await getComplianceKbStore(); @@ -101,7 +126,7 @@ export async function retrieveRegulationDocs(query, k = 5) { try { const docs = await store.similaritySearch(query, k); - return docs.map(adaptRegulationDoc); + return docs.map((doc) => adaptRegulationDoc(doc, lang)); } catch (error) { logger.warn('compliance_kb similarity search failed', { service: 'compliance-kb-retriever', diff --git a/backend/services/ragExecutor.js b/backend/services/ragExecutor.js index 2f72613..0bd90e8 100644 --- a/backend/services/ragExecutor.js +++ b/backend/services/ragExecutor.js @@ -33,6 +33,7 @@ export async function executeRAG({ onEvent = null, userId = null, authorizedWorkspaceIds = null, + lang = 'en', }) { logger.info('Executing RAG query', { service: 'rag-executor', @@ -47,6 +48,7 @@ export async function executeRAG({ onEvent, userId, authorizedWorkspaceIds, + lang, }); logger.info('RAG query completed', { diff --git a/backend/tests/unittest/complianceKbSourceUrl.test.js b/backend/tests/unittest/complianceKbSourceUrl.test.js new file mode 100644 index 0000000..42cbdeb --- /dev/null +++ b/backend/tests/unittest/complianceKbSourceUrl.test.js @@ -0,0 +1,35 @@ +import { describe, it, expect, vi } from 'vitest'; + +// Avoid pulling the real LangChain Qdrant store at import time. +vi.mock('@langchain/qdrant', () => ({ + QdrantVectorStore: { fromExistingCollection: vi.fn() }, +})); + +const { eurLexUrl } = await import('../../services/rag/complianceKbRetriever.js'); + +/** + * #424 — regulation citations link to the OFFICIAL EUR-Lex text, in the user's + * language (the regulation is published verbatim in all EU languages). + */ +describe('compliance KB source URLs (#424)', () => { + it('links DORA to the English EUR-Lex text by default', () => { + expect(eurLexUrl('DORA', 'en')).toBe( + 'https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32022R2554' + ); + }); + + it('links DORA to the French EUR-Lex text for fr / fr-FR', () => { + expect(eurLexUrl('DORA', 'fr')).toContain('/FR/TXT/'); + expect(eurLexUrl('DORA', 'fr-FR')).toContain('/FR/TXT/'); + }); + + it('defaults to EN for an unknown or missing locale', () => { + expect(eurLexUrl('DORA', undefined)).toContain('/EN/TXT/'); + expect(eurLexUrl('DORA', 'de')).toContain('/EN/TXT/'); + }); + + it('returns no URL where we have no reliable CELEX (DORA-RTS, unknown)', () => { + expect(eurLexUrl('DORA-RTS', 'fr')).toBeUndefined(); + expect(eurLexUrl('Unknown', 'en')).toBeUndefined(); + }); +}); diff --git a/backend/tests/unittest/ragService.test.js b/backend/tests/unittest/ragService.test.js index 547368a..d6917e4 100644 --- a/backend/tests/unittest/ragService.test.js +++ b/backend/tests/unittest/ragService.test.js @@ -505,7 +505,8 @@ describe('askWithConversation', () => { expect(vendorSearch).toHaveBeenCalledTimes(1); expect(retrieveRegulationDocs).toHaveBeenCalledTimes(1); - expect(retrieveRegulationDocs).toHaveBeenCalledWith(expect.any(String), 5); + // 3rd arg = the UI locale, threaded through for language-aware source links (#424). + expect(retrieveRegulationDocs).toHaveBeenCalledWith(expect.any(String), 5, undefined); // Reranker should see merged set (vendor first, regulation second). const mergedArg = rerankDocuments.mock.calls[0][0]; diff --git a/backend/validators/schemas.js b/backend/validators/schemas.js index 9a65f49..68f4429 100644 --- a/backend/validators/schemas.js +++ b/backend/validators/schemas.js @@ -34,6 +34,8 @@ export const askQuestionSchema = z // workspaceId may arrive in the body (requireWorkspaceAccess also reads it from // the X-Workspace-Id header); declare it so .strict() doesn't reject it. workspaceId: z.string().max(64).optional(), + // UI locale (e.g. 'fr', 'en-US') — drives language-aware regulation source links. + lang: z.string().max(10).optional(), filters: z .object({ page: z.number().int().positive().optional(), @@ -58,6 +60,8 @@ export const streamQuestionSchema = z conversationId: z.string().regex(/^[0-9a-fA-F]{24}$/, 'Invalid conversation ID'), // See askQuestionSchema: workspaceId may be sent in the body. workspaceId: z.string().max(64).optional(), + // UI locale (e.g. 'fr', 'en-US') — drives language-aware regulation source links. + lang: z.string().max(10).optional(), filters: z .object({ page: z.number().int().positive().optional(), diff --git a/frontend/src/features/chat/hooks/use-streaming.ts b/frontend/src/features/chat/hooks/use-streaming.ts index 7dcd075..dc4631b 100644 --- a/frontend/src/features/chat/hooks/use-streaming.ts +++ b/frontend/src/features/chat/hooks/use-streaming.ts @@ -3,6 +3,7 @@ import { useState, useCallback, useRef } from 'react'; import type { Source } from '@/types'; import { getActiveWorkspaceContextId } from '@/shared/lib/workspace-context'; +import i18n from '@/shared/i18n/config'; /** * ISSUE #41 FIX: Streaming timeout configuration @@ -111,7 +112,7 @@ export function useStreaming(options: UseStreamingOptions = {}) { ...(resolvedWorkspaceId ? { 'X-Workspace-Id': resolvedWorkspaceId } : {}), }, credentials: 'include', - body: JSON.stringify({ question, conversationId }), + body: JSON.stringify({ question, conversationId, lang: i18n.language }), signal: controller.signal, }); diff --git a/frontend/src/tests/use-streaming.test.ts b/frontend/src/tests/use-streaming.test.ts index 8121cbc..6df6dd3 100644 --- a/frontend/src/tests/use-streaming.test.ts +++ b/frontend/src/tests/use-streaming.test.ts @@ -169,15 +169,12 @@ describe('useStreaming', () => { result.current.startStreaming('test question', 'conv-123'); }); - expect(mockFetch).toHaveBeenCalledWith( - expect.any(String), - expect.objectContaining({ - body: JSON.stringify({ - question: 'test question', - conversationId: 'conv-123', - }), - }) - ); + // Parse the body so the assertion ignores the added `lang` field (#424). + const body = JSON.parse((mockFetch.mock.calls[0][1] as { body: string }).body); + expect(body).toMatchObject({ + question: 'test question', + conversationId: 'conv-123', + }); }); it('should use POST method', () => {