From 2225602a7b9e8bb21c924959b9eec96b9f41b43b Mon Sep 17 00:00:00 2001
From: Diego <53939730+DiegoIEC@users.noreply.github.com>
Date: Wed, 6 May 2026 14:43:35 +0200
Subject: [PATCH] add test for aggregator and analytics
---
Backend/AI_stats_measurement.Backend.csproj | 1 +
Backend/Services/AnalyticsService.cs | 12 +-
Tests/AI_stats_measurement.Tests.csproj | 1 +
Tests/AnalyticsServiceTest.cs | 142 +++++++++++++++++++-
Tests/LlmAggregatorTests.cs | 108 +++++++++++++++
Tests/ModelResponseParserTests.cs | 20 ++-
6 files changed, 260 insertions(+), 24 deletions(-)
create mode 100644 Tests/LlmAggregatorTests.cs
diff --git a/Backend/AI_stats_measurement.Backend.csproj b/Backend/AI_stats_measurement.Backend.csproj
index ceddabf..652f6be 100644
--- a/Backend/AI_stats_measurement.Backend.csproj
+++ b/Backend/AI_stats_measurement.Backend.csproj
@@ -25,6 +25,7 @@
+
diff --git a/Backend/Services/AnalyticsService.cs b/Backend/Services/AnalyticsService.cs
index e9cf5a4..9b173af 100644
--- a/Backend/Services/AnalyticsService.cs
+++ b/Backend/Services/AnalyticsService.cs
@@ -384,15 +384,15 @@ private static bool MatchesLlmGroup(string provider, string? llmGroup)
|| provider.StartsWith("grok-4.20-reasoning", StringComparison.OrdinalIgnoreCase);
}
- // fallback (normal filtering)
+ // fallback
return provider.StartsWith(llmGroup, StringComparison.OrdinalIgnoreCase);
}
public Dictionary GetWeeklyMetricsPerNsi(
- List results,
- string? nsi,
- string? llm,
- string? theme)
+ List results,
+ string? nsi,
+ string? llm,
+ string? theme)
{
var filtered = ApplyFilters(results, nsi, llm, theme);
@@ -436,8 +436,6 @@ public Dictionary GetWeeklyMetricsPerNsi(
);
}
-
-
public class MetricResultDto
{
public double Score { get; set; }
diff --git a/Tests/AI_stats_measurement.Tests.csproj b/Tests/AI_stats_measurement.Tests.csproj
index a766caa..7837b0c 100644
--- a/Tests/AI_stats_measurement.Tests.csproj
+++ b/Tests/AI_stats_measurement.Tests.csproj
@@ -13,6 +13,7 @@
all
runtime; build; native; contentfiles; analyzers; buildtransitive
+
diff --git a/Tests/AnalyticsServiceTest.cs b/Tests/AnalyticsServiceTest.cs
index 2e57f91..de31b5b 100644
--- a/Tests/AnalyticsServiceTest.cs
+++ b/Tests/AnalyticsServiceTest.cs
@@ -1,4 +1,7 @@
-using AI_stats_measurement.Backend.Services.Parsing;
+using AI_stats_measurement.Backend.Models;
+using AI_stats_measurement.Backend.Services;
+using AI_stats_measurement.Backend.Services.Parsing;
+using AI_stats_measurement.Models;
using System;
using System.Collections.Generic;
using System.Linq;
@@ -7,16 +10,145 @@
namespace AI_stats_measurement.Tests
{
+
public class AnalyticsServiceTest
{
[Fact]
- public void ComputeAccuracyScore_()
+ public void GetMetricsPerNsi_Computes_FindabilityScore()
+ {
+ var service = new AnalyticsService();
+
+ var results = new List
+ {
+ CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
+ CreateResult(2, "CBS", "gpt-4o-mini", 90, 100, false),
+ };
+
+ var metrics = service.GetMetricsPerNsi(results, "CBS", null, null);
+
+ Assert.Single(metrics);
+ Assert.Equal(5.0, metrics[0].FindabilityScore);
+ }
+
+ [Fact]
+ public void GetMetricsPerNsi_Computes_PerfectConsistencyScore_WhenAnswersAreEqual()
+ {
+ var service = new AnalyticsService();
+
+ var results = new List
+ {
+ CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
+ CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
+ };
+
+ var metrics = service.GetMetricsPerNsi(results, "CBS", null, null);
+
+ Assert.Single(metrics);
+ Assert.Equal(10.0, metrics[0].ConsistencyScore);
+ }
+
+ [Fact]
+ public void GetMetricsPerNsi_GroupsResults_PerNsi()
+ {
+ var service = new AnalyticsService();
+
+ var results = new List
+ {
+ CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
+ CreateResult(2, "OECD", "gpt-4o-mini", 200, 200, true),
+ };
+
+ var metrics = service.GetMetricsPerNsi(results, null, null, null);
+
+ Assert.Equal(2, metrics.Count);
+ Assert.Contains(metrics, m => m.Nsi == "CBS");
+ Assert.Contains(metrics, m => m.Nsi == "OECD");
+ }
+
+ [Fact]
+ public void GetMetricsPerNsi_Filters_ByNsi()
+ {
+ var service = new AnalyticsService();
+
+ var results = new List
+ {
+ CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
+ CreateResult(2, "OECD", "gpt-4o-mini", 200, 200, true),
+ };
+
+ var metrics = service.GetMetricsPerNsi(results, "CBS", null, null);
+
+ Assert.Single(metrics);
+ Assert.Equal("CBS", metrics[0].Nsi);
+ }
+
+
+ [Fact]
+ public void GetWeeklyMetricsPerNsi_GroupsResults_ByWeek()
+ {
+ var service = new AnalyticsService();
+
+ var results = new List
{
- var text = "In 2020 was de gemiddelde verkoopprijs ongeveer € 348.000.";
+ CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true, new DateTime(2026, 1, 5, 0, 0, 0, DateTimeKind.Utc)),
+ CreateResult(2, "CBS", "gpt-4o-mini", 90, 100, false, new DateTime(2026, 1, 12, 0, 0, 0, DateTimeKind.Utc)),
+ };
- var parsed = ModelResponseParser.ParseDutch(0, text);
+ var weekly = service.GetWeeklyMetricsPerNsi(results, "CBS", null, null);
- Assert.Equal(348_000m, parsed.Answer);
+ Assert.True(weekly.ContainsKey("CBS"));
+ Assert.Equal(2, weekly["CBS"].Findability.Count);
+ Assert.Equal(2, weekly["CBS"].Accuracy.Count);
+ Assert.Equal(2, weekly["CBS"].Consistency.Count);
}
+
+ private static FactCheckResult CreateResult(
+ int promptId,
+ string nsi,
+ string llm,
+ decimal actualAnswer,
+ decimal expectedAnswer,
+ bool sourceIsCorrect,
+ DateTime? createdUtc = null)
+ {
+ Source source = new Source { Id = 1, Name = "Test Source" };
+ var prompt = new Prompt(nsi, "test", "test", DateTime.Now, "none", "question", expectedAnswer, source, "");
+
+ var modelResponse = ModelResponse.Import(
+ id: promptId,
+ promptId: promptId,
+ provider: llm,
+ rawText: "test response",
+ exception: null,
+ createdUtc: createdUtc ?? DateTime.UtcNow
+ );
+
+ modelResponse.Prompt = prompt;
+
+ var parsed = new ParsedModelResponse(
+ modelResponseId: modelResponse.Id,
+ answer: actualAnswer,
+ extractedSources: new List()
+ );
+
+ parsed.ModelResponse = modelResponse;
+
+ var absoluteError = Math.Abs(expectedAnswer - actualAnswer);
+ var relativeError = expectedAnswer == 0 ? 0 : absoluteError / expectedAnswer;
+
+ var fact = new FactCheckResult(
+ parsedModelResponseId: parsed.Id,
+ absoluteError: absoluteError,
+ relativeError: relativeError,
+ answerIsCorrect: actualAnswer == expectedAnswer,
+ sourceIsCorrect: sourceIsCorrect,
+ abstained: false
+ );
+
+ fact.ParsedModelResponse = parsed;
+
+ return fact;
+ }
}
}
+
diff --git a/Tests/LlmAggregatorTests.cs b/Tests/LlmAggregatorTests.cs
new file mode 100644
index 0000000..fb70529
--- /dev/null
+++ b/Tests/LlmAggregatorTests.cs
@@ -0,0 +1,108 @@
+using AI_stats_measurement.Backend.Models;
+using AI_stats_measurement.Data;
+using AI_stats_measurement.Interface;
+using AI_stats_measurement.Services;
+using Microsoft.EntityFrameworkCore;
+using Moq;
+using Xunit;
+
+namespace AI_stats_measurement.Tests;
+
+public class LlmAggregatorTests
+{
+ private AIMeasureDbContext CreateContext()
+ {
+ var options = new DbContextOptionsBuilder()
+ .UseInMemoryDatabase(Guid.NewGuid().ToString())
+ .Options;
+
+ return new AIMeasureDbContext(options);
+ }
+
+ [Fact]
+ public async Task AskByPromptIdsAsync_Calls_Only_Selected_Models_And_Prompts()
+ {
+ Source source = new Source { Id = 1, Name = "Test Source" };
+ var prompt1 = new Prompt("CBS", "test", "test", DateTime.Now, "none", "question", 100, source, "");
+ var prompt2 = new Prompt("CBS", "test", "test", DateTime.Now, "none", "question", 100, source, "");
+
+ var context = CreateContext();
+ context.Prompts.AddRange(prompt1, prompt2);
+ await context.SaveChangesAsync();
+
+ var mock1 = new Mock();
+ mock1.SetupGet(q => q.Name).Returns("ChatGPT");
+ mock1.Setup(q => q.AskAsync(It.IsAny(), It.IsAny()))
+ .ReturnsAsync("antwoord");
+
+ var mock2 = new Mock();
+ mock2.SetupGet(q => q.Name).Returns("Gemini");
+ mock2.Setup(q => q.AskAsync(It.IsAny(), It.IsAny()))
+ .ReturnsAsync("antwoord");
+
+ var aggregator = new LlmAggregator(
+ new[] { mock1.Object, mock2.Object },
+ context
+ );
+
+ var result = await aggregator.AskByPromptIdsAsync(
+ new List { 1, 2 },
+ new List { "ChatGPT" },
+ CancellationToken.None
+ );
+
+ Assert.Equal(2, result.Count);
+
+ mock1.Verify(q => q.AskAsync(It.IsAny(), It.IsAny()), Times.Exactly(2));
+ mock2.Verify(q => q.AskAsync(It.IsAny(), It.IsAny()), Times.Never);
+ }
+
+ [Fact]
+ public async Task AskByPromptIdsAsync_Returns_Empty_When_No_Prompts_Found()
+ {
+ var context = CreateContext();
+
+ var mock = new Mock();
+ mock.SetupGet(q => q.Name).Returns("ChatGPT");
+
+ var aggregator = new LlmAggregator(
+ new[] { mock.Object },
+ context
+ );
+
+ var result = await aggregator.AskByPromptIdsAsync(
+ new List { 999 },
+ new List { "ChatGPT" },
+ CancellationToken.None
+ );
+
+ Assert.Empty(result);
+ }
+
+ [Fact]
+ public async Task AskByPromptIdsAsync_Skips_Non_Selected_Models()
+ {
+ Source source = new Source { Id = 1, Name = "Test Source" };
+ var prompt = new Prompt("CBS", "test", "test", DateTime.Now, "none", "question", 100, source, "");
+
+ var context = CreateContext();
+ context.Prompts.Add(prompt);
+ await context.SaveChangesAsync();
+
+ var mock = new Mock();
+ mock.SetupGet(q => q.Name).Returns("Gemini");
+
+ var aggregator = new LlmAggregator(
+ new[] { mock.Object },
+ context
+ );
+
+ var result = await aggregator.AskByPromptIdsAsync(
+ new List { 1 },
+ new List { "ChatGPT" },
+ CancellationToken.None
+ );
+
+ Assert.Empty(result);
+ }
+}
\ No newline at end of file
diff --git a/Tests/ModelResponseParserTests.cs b/Tests/ModelResponseParserTests.cs
index 85681e9..398dbed 100644
--- a/Tests/ModelResponseParserTests.cs
+++ b/Tests/ModelResponseParserTests.cs
@@ -197,9 +197,9 @@ public void Parse_Returns_Zero_When_No_Answer_Is_Found()
}
[Fact]
- public void Parse_Returns_1()
+ public void Parse_Returns_Zero_Ignore_Source()
{
- var text = "Bron: [CBS - Arbeidsongeschiktheidsuitkeringen] (https://www.cbs.nl/nl-nl/cijfers/detail/arbeidsongeschiktheidsuitkeringen)";
+ var text = "Bron: [CBS - Arbeidsongeschiktheidsuitkeringen] (https://www.cbs.nl/nl-nl/cijfers/86165NED/detail/arbeidsongeschiktheidsuitkeringen)";
var parsed = ModelResponseParser.ParseDutch(0, text);
@@ -207,7 +207,7 @@ public void Parse_Returns_1()
}
[Fact]
- public void Parse_Returns_2()
+ public void Parse_Returns_Answer_Ignore_Year()
{
var text = "**De levensverwachting bij geboorte voor mannen in Nederland in 2022 was 80,1 jaar.**[[1]] (https://www.lifetable.de/File/GetDocument/data/NLD/NLD000020222022CU1.pdf)\r\n\r\nDit cijfer komt uit de officiële sterftetafels(levensverwachtingstafels) van het **Centraal Bureau voor de Statistiek(CBS)**. Ter vergelijking: in 2020 was het circa 79,7 jaar(daling door COVID-19), in 2024 circa 80,5 jaar.[[2]] (https://www.cbs.nl/?sc_itemid=40d28916-85d7-494e-84d6-9d97ca41e253&sc_lang=nl-%20nl)\r\n\r\n**Bron:** CBS, tabel 37360ned(Levensverwachting; geslacht, leeftijd). \r\nDirecte link: [https://www.cbs.nl/nl-nl/cijfers/detail/37360ned](https://www.cbs.nl/nl-nl/cijfers/detail/37360ned) of de StatLine-tabel op opendata.cbs.nl.";
@@ -217,7 +217,7 @@ public void Parse_Returns_2()
}
[Fact]
- public void Parse_Returns_3()
+ public void Parse_Returns_Answer_Recognize_Ton()
{
var text = "30.300 ton";
@@ -227,7 +227,7 @@ public void Parse_Returns_3()
}
[Fact]
- public void Parse_Returns_4()
+ public void Parse_Returns_Zero_Ignore_Dates()
{
var text = "1 januari 2021";
@@ -237,7 +237,7 @@ public void Parse_Returns_4()
}
[Fact]
- public void Parse_Returns_5()
+ public void Parse_Returns_Zero_Ignore_Age_Range_English()
{
var text = "aged 25 to 29 ";
@@ -247,7 +247,7 @@ public void Parse_Returns_5()
}
[Fact]
- public void Parse_Returns_6()
+ public void Parse_Returns_Zero_Ignore_Age_Range_Dutch()
{
var text = "18 tot 25 jaar";
@@ -257,7 +257,7 @@ public void Parse_Returns_6()
}
[Fact]
- public void Parse_Returns_7()
+ public void Parse_Returns_Zero_Ignore_Base_Year()
{
var text = "(base year 2025=100)";
@@ -265,8 +265,4 @@ public void Parse_Returns_7()
Assert.Equal(0, parsed.Answer);
}
-
-
-
-
}