diff --git a/Backend/AI_stats_measurement.Backend.csproj b/Backend/AI_stats_measurement.Backend.csproj index ceddabf..652f6be 100644 --- a/Backend/AI_stats_measurement.Backend.csproj +++ b/Backend/AI_stats_measurement.Backend.csproj @@ -25,6 +25,7 @@ + diff --git a/Backend/Services/AnalyticsService.cs b/Backend/Services/AnalyticsService.cs index e9cf5a4..9b173af 100644 --- a/Backend/Services/AnalyticsService.cs +++ b/Backend/Services/AnalyticsService.cs @@ -384,15 +384,15 @@ private static bool MatchesLlmGroup(string provider, string? llmGroup) || provider.StartsWith("grok-4.20-reasoning", StringComparison.OrdinalIgnoreCase); } - // fallback (normal filtering) + // fallback return provider.StartsWith(llmGroup, StringComparison.OrdinalIgnoreCase); } public Dictionary GetWeeklyMetricsPerNsi( - List results, - string? nsi, - string? llm, - string? theme) + List results, + string? nsi, + string? llm, + string? theme) { var filtered = ApplyFilters(results, nsi, llm, theme); @@ -436,8 +436,6 @@ public Dictionary GetWeeklyMetricsPerNsi( ); } - - public class MetricResultDto { public double Score { get; set; } diff --git a/Tests/AI_stats_measurement.Tests.csproj b/Tests/AI_stats_measurement.Tests.csproj index a766caa..7837b0c 100644 --- a/Tests/AI_stats_measurement.Tests.csproj +++ b/Tests/AI_stats_measurement.Tests.csproj @@ -13,6 +13,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive + diff --git a/Tests/AnalyticsServiceTest.cs b/Tests/AnalyticsServiceTest.cs index 2e57f91..de31b5b 100644 --- a/Tests/AnalyticsServiceTest.cs +++ b/Tests/AnalyticsServiceTest.cs @@ -1,4 +1,7 @@ -using AI_stats_measurement.Backend.Services.Parsing; +using AI_stats_measurement.Backend.Models; +using AI_stats_measurement.Backend.Services; +using AI_stats_measurement.Backend.Services.Parsing; +using AI_stats_measurement.Models; using System; using System.Collections.Generic; using System.Linq; @@ -7,16 +10,145 @@ namespace AI_stats_measurement.Tests { + public class AnalyticsServiceTest { [Fact] - public void ComputeAccuracyScore_() + public void GetMetricsPerNsi_Computes_FindabilityScore() + { + var service = new AnalyticsService(); + + var results = new List + { + CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true), + CreateResult(2, "CBS", "gpt-4o-mini", 90, 100, false), + }; + + var metrics = service.GetMetricsPerNsi(results, "CBS", null, null); + + Assert.Single(metrics); + Assert.Equal(5.0, metrics[0].FindabilityScore); + } + + [Fact] + public void GetMetricsPerNsi_Computes_PerfectConsistencyScore_WhenAnswersAreEqual() + { + var service = new AnalyticsService(); + + var results = new List + { + CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true), + CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true), + }; + + var metrics = service.GetMetricsPerNsi(results, "CBS", null, null); + + Assert.Single(metrics); + Assert.Equal(10.0, metrics[0].ConsistencyScore); + } + + [Fact] + public void GetMetricsPerNsi_GroupsResults_PerNsi() + { + var service = new AnalyticsService(); + + var results = new List + { + CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true), + CreateResult(2, "OECD", "gpt-4o-mini", 200, 200, true), + }; + + var metrics = service.GetMetricsPerNsi(results, null, null, null); + + Assert.Equal(2, metrics.Count); + Assert.Contains(metrics, m => m.Nsi == "CBS"); + Assert.Contains(metrics, m => m.Nsi == "OECD"); + } + + [Fact] + public void GetMetricsPerNsi_Filters_ByNsi() + { + var service = new AnalyticsService(); + + var results = new List + { + CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true), + CreateResult(2, "OECD", "gpt-4o-mini", 200, 200, true), + }; + + var metrics = service.GetMetricsPerNsi(results, "CBS", null, null); + + Assert.Single(metrics); + Assert.Equal("CBS", metrics[0].Nsi); + } + + + [Fact] + public void GetWeeklyMetricsPerNsi_GroupsResults_ByWeek() + { + var service = new AnalyticsService(); + + var results = new List { - var text = "In 2020 was de gemiddelde verkoopprijs ongeveer € 348.000."; + CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true, new DateTime(2026, 1, 5, 0, 0, 0, DateTimeKind.Utc)), + CreateResult(2, "CBS", "gpt-4o-mini", 90, 100, false, new DateTime(2026, 1, 12, 0, 0, 0, DateTimeKind.Utc)), + }; - var parsed = ModelResponseParser.ParseDutch(0, text); + var weekly = service.GetWeeklyMetricsPerNsi(results, "CBS", null, null); - Assert.Equal(348_000m, parsed.Answer); + Assert.True(weekly.ContainsKey("CBS")); + Assert.Equal(2, weekly["CBS"].Findability.Count); + Assert.Equal(2, weekly["CBS"].Accuracy.Count); + Assert.Equal(2, weekly["CBS"].Consistency.Count); } + + private static FactCheckResult CreateResult( + int promptId, + string nsi, + string llm, + decimal actualAnswer, + decimal expectedAnswer, + bool sourceIsCorrect, + DateTime? createdUtc = null) + { + Source source = new Source { Id = 1, Name = "Test Source" }; + var prompt = new Prompt(nsi, "test", "test", DateTime.Now, "none", "question", expectedAnswer, source, ""); + + var modelResponse = ModelResponse.Import( + id: promptId, + promptId: promptId, + provider: llm, + rawText: "test response", + exception: null, + createdUtc: createdUtc ?? DateTime.UtcNow + ); + + modelResponse.Prompt = prompt; + + var parsed = new ParsedModelResponse( + modelResponseId: modelResponse.Id, + answer: actualAnswer, + extractedSources: new List() + ); + + parsed.ModelResponse = modelResponse; + + var absoluteError = Math.Abs(expectedAnswer - actualAnswer); + var relativeError = expectedAnswer == 0 ? 0 : absoluteError / expectedAnswer; + + var fact = new FactCheckResult( + parsedModelResponseId: parsed.Id, + absoluteError: absoluteError, + relativeError: relativeError, + answerIsCorrect: actualAnswer == expectedAnswer, + sourceIsCorrect: sourceIsCorrect, + abstained: false + ); + + fact.ParsedModelResponse = parsed; + + return fact; + } } } + diff --git a/Tests/LlmAggregatorTests.cs b/Tests/LlmAggregatorTests.cs new file mode 100644 index 0000000..fb70529 --- /dev/null +++ b/Tests/LlmAggregatorTests.cs @@ -0,0 +1,108 @@ +using AI_stats_measurement.Backend.Models; +using AI_stats_measurement.Data; +using AI_stats_measurement.Interface; +using AI_stats_measurement.Services; +using Microsoft.EntityFrameworkCore; +using Moq; +using Xunit; + +namespace AI_stats_measurement.Tests; + +public class LlmAggregatorTests +{ + private AIMeasureDbContext CreateContext() + { + var options = new DbContextOptionsBuilder() + .UseInMemoryDatabase(Guid.NewGuid().ToString()) + .Options; + + return new AIMeasureDbContext(options); + } + + [Fact] + public async Task AskByPromptIdsAsync_Calls_Only_Selected_Models_And_Prompts() + { + Source source = new Source { Id = 1, Name = "Test Source" }; + var prompt1 = new Prompt("CBS", "test", "test", DateTime.Now, "none", "question", 100, source, ""); + var prompt2 = new Prompt("CBS", "test", "test", DateTime.Now, "none", "question", 100, source, ""); + + var context = CreateContext(); + context.Prompts.AddRange(prompt1, prompt2); + await context.SaveChangesAsync(); + + var mock1 = new Mock(); + mock1.SetupGet(q => q.Name).Returns("ChatGPT"); + mock1.Setup(q => q.AskAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync("antwoord"); + + var mock2 = new Mock(); + mock2.SetupGet(q => q.Name).Returns("Gemini"); + mock2.Setup(q => q.AskAsync(It.IsAny(), It.IsAny())) + .ReturnsAsync("antwoord"); + + var aggregator = new LlmAggregator( + new[] { mock1.Object, mock2.Object }, + context + ); + + var result = await aggregator.AskByPromptIdsAsync( + new List { 1, 2 }, + new List { "ChatGPT" }, + CancellationToken.None + ); + + Assert.Equal(2, result.Count); + + mock1.Verify(q => q.AskAsync(It.IsAny(), It.IsAny()), Times.Exactly(2)); + mock2.Verify(q => q.AskAsync(It.IsAny(), It.IsAny()), Times.Never); + } + + [Fact] + public async Task AskByPromptIdsAsync_Returns_Empty_When_No_Prompts_Found() + { + var context = CreateContext(); + + var mock = new Mock(); + mock.SetupGet(q => q.Name).Returns("ChatGPT"); + + var aggregator = new LlmAggregator( + new[] { mock.Object }, + context + ); + + var result = await aggregator.AskByPromptIdsAsync( + new List { 999 }, + new List { "ChatGPT" }, + CancellationToken.None + ); + + Assert.Empty(result); + } + + [Fact] + public async Task AskByPromptIdsAsync_Skips_Non_Selected_Models() + { + Source source = new Source { Id = 1, Name = "Test Source" }; + var prompt = new Prompt("CBS", "test", "test", DateTime.Now, "none", "question", 100, source, ""); + + var context = CreateContext(); + context.Prompts.Add(prompt); + await context.SaveChangesAsync(); + + var mock = new Mock(); + mock.SetupGet(q => q.Name).Returns("Gemini"); + + var aggregator = new LlmAggregator( + new[] { mock.Object }, + context + ); + + var result = await aggregator.AskByPromptIdsAsync( + new List { 1 }, + new List { "ChatGPT" }, + CancellationToken.None + ); + + Assert.Empty(result); + } +} \ No newline at end of file diff --git a/Tests/ModelResponseParserTests.cs b/Tests/ModelResponseParserTests.cs index 85681e9..398dbed 100644 --- a/Tests/ModelResponseParserTests.cs +++ b/Tests/ModelResponseParserTests.cs @@ -197,9 +197,9 @@ public void Parse_Returns_Zero_When_No_Answer_Is_Found() } [Fact] - public void Parse_Returns_1() + public void Parse_Returns_Zero_Ignore_Source() { - var text = "Bron: [CBS - Arbeidsongeschiktheidsuitkeringen] (https://www.cbs.nl/nl-nl/cijfers/detail/arbeidsongeschiktheidsuitkeringen)"; + var text = "Bron: [CBS - Arbeidsongeschiktheidsuitkeringen] (https://www.cbs.nl/nl-nl/cijfers/86165NED/detail/arbeidsongeschiktheidsuitkeringen)"; var parsed = ModelResponseParser.ParseDutch(0, text); @@ -207,7 +207,7 @@ public void Parse_Returns_1() } [Fact] - public void Parse_Returns_2() + public void Parse_Returns_Answer_Ignore_Year() { var text = "**De levensverwachting bij geboorte voor mannen in Nederland in 2022 was 80,1 jaar.**[[1]] (https://www.lifetable.de/File/GetDocument/data/NLD/NLD000020222022CU1.pdf)\r\n\r\nDit cijfer komt uit de officiële sterftetafels(levensverwachtingstafels) van het **Centraal Bureau voor de Statistiek(CBS)**. Ter vergelijking: in 2020 was het circa 79,7 jaar(daling door COVID-19), in 2024 circa 80,5 jaar.[[2]] (https://www.cbs.nl/?sc_itemid=40d28916-85d7-494e-84d6-9d97ca41e253&sc_lang=nl-%20nl)\r\n\r\n**Bron:** CBS, tabel 37360ned(Levensverwachting; geslacht, leeftijd). \r\nDirecte link: [https://www.cbs.nl/nl-nl/cijfers/detail/37360ned](https://www.cbs.nl/nl-nl/cijfers/detail/37360ned) of de StatLine-tabel op opendata.cbs.nl."; @@ -217,7 +217,7 @@ public void Parse_Returns_2() } [Fact] - public void Parse_Returns_3() + public void Parse_Returns_Answer_Recognize_Ton() { var text = "30.300 ton"; @@ -227,7 +227,7 @@ public void Parse_Returns_3() } [Fact] - public void Parse_Returns_4() + public void Parse_Returns_Zero_Ignore_Dates() { var text = "1 januari 2021"; @@ -237,7 +237,7 @@ public void Parse_Returns_4() } [Fact] - public void Parse_Returns_5() + public void Parse_Returns_Zero_Ignore_Age_Range_English() { var text = "aged 25 to 29 "; @@ -247,7 +247,7 @@ public void Parse_Returns_5() } [Fact] - public void Parse_Returns_6() + public void Parse_Returns_Zero_Ignore_Age_Range_Dutch() { var text = "18 tot 25 jaar"; @@ -257,7 +257,7 @@ public void Parse_Returns_6() } [Fact] - public void Parse_Returns_7() + public void Parse_Returns_Zero_Ignore_Base_Year() { var text = "(base year 2025=100)"; @@ -265,8 +265,4 @@ public void Parse_Returns_7() Assert.Equal(0, parsed.Answer); } - - - - }