KoderFPV · KoderFPV · Feb 7, 2026
diff --git a/agents/__tests__/evaluation/conversationRunner.ts b/agents/__tests__/evaluation/conversationRunner.ts
@@ -1,4 +1,5 @@
 import { executeChatGraphWithStream, IStreamCallback } from '@/agents/graph/chatGraph';
+import { ISearchResult } from '@/agents/graph/state';
 import { IConversationTurn } from './evaluator';
 
 export interface IConversationScenario {
@@ -32,22 +33,26 @@ export const runConversation = async (
   const callbacks = createNoopCallbacks();
 
   const messages: Array<{ role: string; content: string }> = [];
+  let lastSearchResults: ISearchResult | null = null;
 
   for (const turn of scenario.turns) {
     messages.push({ role: 'user', content: turn.userMessage });
     conversation.push({ role: 'user', content: turn.userMessage });
 
-    const response = await executeChatGraphWithStream(
+    const result = await executeChatGraphWithStream(
       sessionId,
       scenario.locale,
       messages,
-      callbacks
+      callbacks,
+      lastSearchResults
     );
 
-    messages.push({ role: 'assistant', content: response });
-    conversation.push({ role: 'assistant', content: response });
+    lastSearchResults = result.lastSearchResults;
 
-    if (turn.validateResponse && !turn.validateResponse(response)) {
+    messages.push({ role: 'assistant', content: result.response });
+    conversation.push({ role: 'assistant', content: result.response });
+
+    if (turn.validateResponse && !turn.validateResponse(result.response)) {
       return {
         scenario,
         conversation,

diff --git a/agents/__tests__/evaluation/evaluator.ts b/agents/__tests__/evaluation/evaluator.ts
@@ -164,3 +164,26 @@ export const defaultChatCriteria: IEvaluationCriteria[] = [
     weight: 1,
   },
 ];
+
+export const defaultProductDetailsCriteria: IEvaluationCriteria[] = [
+  {
+    name: 'Accuracy',
+    description: 'Does the assistant provide accurate product details?',
+    weight: 3,
+  },
+  {
+    name: 'Completeness',
+    description: 'Does the response include relevant specifications?',
+    weight: 2,
+  },
+  {
+    name: 'Reference Understanding',
+    description: 'Does the assistant correctly identify which product the user is asking about?',
+    weight: 3,
+  },
+  {
+    name: 'Natural Language',
+    description: 'Is the response natural and easy to understand?',
+    weight: 1,
+  },
+];
diff --git a/agents/__tests__/evaluation/productDetails.e2e.test.ts b/agents/__tests__/evaluation/productDetails.e2e.test.ts
@@ -0,0 +1,183 @@
+import { describe, it, expect, beforeAll, afterAll } from 'vitest';
+import {
+  evaluateConversation,
+  defaultProductDetailsCriteria,
+  IEvaluationResult,
+  IConversationTurn,
+} from './evaluator';
+import { runConversation, IConversationScenario } from './conversationRunner';
+import { clearLastRunDirectory, saveFailedTest } from './testResultsReporter';
+import { setupTestProducts, teardownTestProducts } from './testFixtures';
+
+const MINIMUM_PASSING_SCORE = 3.5;
+
+beforeAll(async () => {
+  clearLastRunDirectory();
+  await setupTestProducts();
+}, 60000);
+
+afterAll(async () => {
+  await teardownTestProducts();
+}, 30000);
+
+const productDetailsScenarios: IConversationScenario[] = [
+  {
+    name: 'Product details by position',
+    locale: 'en',
+    turns: [
+      { userMessage: 'Show me laptops' },
+      { userMessage: 'What are the specs of the first one?' },
+    ],
+    expectedBehavior:
+      'After showing laptops, the assistant should provide detailed specifications of the first laptop including RAM, processor, storage from attributes or description.',
+  },
+  {
+    name: 'Product details by name',
+    locale: 'en',
+    turns: [{ userMessage: 'Tell me about Gaming Laptop Pro X1' }],
+    expectedBehavior:
+      'The assistant should provide detailed information about the Gaming Laptop Pro X1 including specifications like RAM, GPU, and storage.',
+  },
+  {
+    name: 'Product details in Polish',
+    locale: 'pl',
+    turns: [
+      { userMessage: 'Pokaż laptopy' },
+      { userMessage: 'Jaki procesor ma pierwszy?' },
+    ],
+    expectedBehavior:
+      'The assistant should provide processor details of the first laptop in Polish language.',
+  },
+  {
+    name: 'Non-existent product',
+    locale: 'en',
+    turns: [{ userMessage: 'Tell me about SuperPhone 3000' }],
+    expectedBehavior:
+      'The assistant should indicate that the product was not found or ask for more information.',
+  },
+  {
+    name: 'Product details by partial name',
+    locale: 'en',
+    turns: [{ userMessage: 'What specs does the iPhone have?' }],
+    expectedBehavior:
+      'The assistant should provide details about the iPhone 15 Pro Max including processor and storage.',
+  },
+];
+
+describe('Product Details E2E Evaluation', () => {
+  describe.each(productDetailsScenarios)('Scenario: $name', (scenario) => {
+    let evaluationResult: IEvaluationResult;
+    let conversation: IConversationTurn[];
+
+    beforeAll(async () => {
+      const conversationResult = await runConversation(scenario);
+      conversation = conversationResult.conversation;
+
+      console.log(`\n=== Conversation: ${scenario.name} ===`);
+      conversation.forEach((turn) => {
+        console.log(`${turn.role.toUpperCase()}: ${turn.content}`);
+      });
+
+      expect(conversationResult.success).toBe(true);
+
+      evaluationResult = await evaluateConversation(
+        conversation,
+        defaultProductDetailsCriteria,
+        scenario.expectedBehavior
+      );
+
+      console.log(`\nEvaluation Score: ${evaluationResult.score}`);
+      console.log(`Reasoning: ${evaluationResult.reasoning}\n`);
+
+      if (evaluationResult.score < MINIMUM_PASSING_SCORE) {
+        saveFailedTest(scenario, conversation, evaluationResult);
+      }
+    }, 180000);
+
+    it('should pass LLM evaluation with score >= 3.5', () => {
+      expect(evaluationResult.score).toBeGreaterThanOrEqual(MINIMUM_PASSING_SCORE);
+      expect(evaluationResult.passed).toBe(true);
+    });
+
+    it('should have valid reasoning', () => {
+      expect(evaluationResult.reasoning).toBeTruthy();
+      expect(evaluationResult.reasoning.length).toBeGreaterThan(10);
+    });
+  });
+});
+
+const MULTI_TURN_COMPLEX_MIN_SCORE = 3.0;
+
+const multiTurnDetailsScenarios: Array<{
+  scenario: IConversationScenario;
+  minScore: number;
+}> = [
+  {
+    scenario: {
+      name: 'Search then ask for multiple products',
+      locale: 'en',
+      turns: [
+        { userMessage: 'Show me smartphones' },
+        { userMessage: 'Tell me more about the first one' },
+        { userMessage: 'What about the second one?' },
+      ],
+      expectedBehavior:
+        'The assistant should show smartphones first, then provide details for the first smartphone, then provide details for the second smartphone. Each product should have specifications.',
+    },
+    minScore: MULTI_TURN_COMPLEX_MIN_SCORE,
+  },
+  {
+    scenario: {
+      name: 'Search then compare',
+      locale: 'en',
+      turns: [
+        { userMessage: 'I need a laptop' },
+        { userMessage: 'How much RAM does the first one have?' },
+      ],
+      expectedBehavior:
+        'The assistant should first show laptops, then provide the RAM specification for the first laptop when asked.',
+    },
+    minScore: MINIMUM_PASSING_SCORE,
+  },
+];
+
+describe('Multi-Turn Product Details E2E Evaluation', () => {
+  describe.each(multiTurnDetailsScenarios)('Scenario: $scenario.name', ({ scenario, minScore }) => {
+    let evaluationResult: IEvaluationResult;
+    let conversation: IConversationTurn[];
+
+    beforeAll(async () => {
+      const conversationResult = await runConversation(scenario);
+      conversation = conversationResult.conversation;
+
+      console.log(`\n=== Multi-Turn: ${scenario.name} ===`);
+      conversation.forEach((turn) => {
+        console.log(`${turn.role.toUpperCase()}: ${turn.content}`);
+      });
+
+      expect(conversationResult.success).toBe(true);
+
+      evaluationResult = await evaluateConversation(
+        conversation,
+        defaultProductDetailsCriteria,
+        scenario.expectedBehavior
+      );
+
+      console.log(`\nEvaluation Score: ${evaluationResult.score}`);
+      console.log(`Reasoning: ${evaluationResult.reasoning}\n`);
+
+      if (evaluationResult.score < minScore) {
+        saveFailedTest(scenario, conversation, evaluationResult);
+      }
+    }, 240000);
+
+    it(`should pass LLM evaluation with score >= ${minScore}`, () => {
+      expect(evaluationResult.score).toBeGreaterThanOrEqual(minScore);
+    });
+
+    it('should have valid reasoning', () => {
+      expect(evaluationResult.reasoning).toBeTruthy();
+      expect(evaluationResult.reasoning.length).toBeGreaterThan(10);
+    });
+  });
+});
diff --git a/agents/__tests__/evaluation/testFixtures.ts b/agents/__tests__/evaluation/testFixtures.ts
@@ -13,6 +13,12 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 15,
     category: 'Laptops',
     isActive: true,
+    attributes: [
+      { name: 'RAM', value: '32', unit: 'GB' },
+      { name: 'GPU', value: 'RTX 4080' },
+      { name: 'Storage', value: '1', unit: 'TB SSD' },
+      { name: 'Processor', value: 'Intel Core i9-13900HX' },
+    ],
   },
   {
     name: 'Business Laptop Elite',
@@ -22,6 +28,12 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 25,
     category: 'Laptops',
     isActive: true,
+    attributes: [
+      { name: 'RAM', value: '16', unit: 'GB' },
+      { name: 'Processor', value: 'Intel Core i7-1365U' },
+      { name: 'Storage', value: '512', unit: 'GB SSD' },
+      { name: 'Weight', value: '1.3', unit: 'kg' },
+    ],
   },
   {
     name: 'Budget Laptop Basic',
@@ -31,6 +43,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 50,
     category: 'Laptops',
     isActive: true,
+    attributes: [
+      { name: 'RAM', value: '8', unit: 'GB' },
+      { name: 'Processor', value: 'Intel Core i5-1235U' },
+      { name: 'Storage', value: '256', unit: 'GB SSD' },
+    ],
   },
   {
     name: 'Samsung Galaxy S24 Ultra',
@@ -40,6 +57,12 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 30,
     category: 'Smartphones',
     isActive: true,
+    attributes: [
+      { name: 'RAM', value: '12', unit: 'GB' },
+      { name: 'Storage', value: '512', unit: 'GB' },
+      { name: 'Camera', value: '200', unit: 'MP' },
+      { name: 'Display', value: '6.8', unit: 'inch' },
+    ],
   },
   {
     name: 'iPhone 15 Pro Max',
@@ -49,6 +72,12 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 20,
     category: 'Smartphones',
     isActive: true,
+    attributes: [
+      { name: 'Processor', value: 'A17 Pro' },
+      { name: 'Storage', value: '256', unit: 'GB' },
+      { name: 'Display', value: '6.7', unit: 'inch' },
+      { name: 'Material', value: 'Titanium' },
+    ],
   },
   {
     name: 'Xiaomi 14 Pro',
@@ -58,6 +87,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 40,
     category: 'Smartphones',
     isActive: true,
+    attributes: [
+      { name: 'Processor', value: 'Snapdragon 8 Gen 3' },
+      { name: 'Storage', value: '256', unit: 'GB' },
+      { name: 'Camera', value: 'Leica' },
+    ],
   },
   {
     name: 'Mechanical Gaming Keyboard RGB',
@@ -67,6 +101,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 100,
     category: 'Gaming Peripherals',
     isActive: true,
+    attributes: [
+      { name: 'Switch Type', value: 'Cherry MX' },
+      { name: 'Backlight', value: 'RGB' },
+      { name: 'Keys', value: '104' },
+    ],
   },
   {
     name: 'Gaming Mouse Pro',
@@ -76,6 +115,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 80,
     category: 'Gaming Peripherals',
     isActive: true,
+    attributes: [
+      { name: 'DPI', value: '25000' },
+      { name: 'Buttons', value: '8' },
+      { name: 'Lighting', value: 'RGB' },
+    ],
   },
   {
     name: 'Sony WH-1000XM5 Headphones',
@@ -85,6 +129,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 35,
     category: 'Audio',
     isActive: true,
+    attributes: [
+      { name: 'Battery Life', value: '30', unit: 'hours' },
+      { name: 'Noise Cancellation', value: 'Active' },
+      { name: 'Connection', value: 'Wireless Bluetooth' },
+    ],
   },
   {
     name: 'AirPods Pro 2',
@@ -94,6 +143,11 @@ export const TEST_PRODUCTS: IProductCreateInput[] = [
     stock: 45,
     category: 'Audio',
     isActive: true,
+    attributes: [
+      { name: 'Noise Cancellation', value: 'Active' },
+      { name: 'Audio', value: 'Spatial Audio' },
+      { name: 'Type', value: 'Wireless Earbuds' },
+    ],
   },
 ];