Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Backend/AI_stats_measurement.Backend.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
</PackageReference>
<PackageReference Include="Microsoft.Recognizers.Text.Number" Version="1.8.13" />
<PackageReference Include="Microsoft.VisualStudio.Web.CodeGeneration.Design" Version="9.0.12" />
<PackageReference Include="Moq" Version="4.20.72" />
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="9.0.4" />
<PackageReference Include="OpenAI" Version="2.9.1" />
<PackageReference Include="System.IdentityModel.Tokens.Jwt" Version="8.17.0" />
Expand Down
12 changes: 5 additions & 7 deletions Backend/Services/AnalyticsService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -384,15 +384,15 @@ private static bool MatchesLlmGroup(string provider, string? llmGroup)
|| provider.StartsWith("grok-4.20-reasoning", StringComparison.OrdinalIgnoreCase);
}

// fallback (normal filtering)
// fallback
return provider.StartsWith(llmGroup, StringComparison.OrdinalIgnoreCase);
}

public Dictionary<string, MetricsOverTimeDto> GetWeeklyMetricsPerNsi(
List<FactCheckResult> results,
string? nsi,
string? llm,
string? theme)
List<FactCheckResult> results,
string? nsi,
string? llm,
string? theme)
{
var filtered = ApplyFilters(results, nsi, llm, theme);

Expand Down Expand Up @@ -436,8 +436,6 @@ public Dictionary<string, MetricsOverTimeDto> GetWeeklyMetricsPerNsi(
);
}



public class MetricResultDto
{
public double Score { get; set; }
Expand Down
1 change: 1 addition & 0 deletions Tests/AI_stats_measurement.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.EntityFrameworkCore.InMemory" Version="9.0.11" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
<PackageReference Include="xunit" Version="2.9.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
Expand Down
142 changes: 137 additions & 5 deletions Tests/AnalyticsServiceTest.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
using AI_stats_measurement.Backend.Services.Parsing;
using AI_stats_measurement.Backend.Models;
using AI_stats_measurement.Backend.Services;
using AI_stats_measurement.Backend.Services.Parsing;
using AI_stats_measurement.Models;
using System;
using System.Collections.Generic;
using System.Linq;
Expand All @@ -7,16 +10,145 @@

namespace AI_stats_measurement.Tests
{

public class AnalyticsServiceTest
{
[Fact]
public void ComputeAccuracyScore_()
public void GetMetricsPerNsi_Computes_FindabilityScore()
{
var service = new AnalyticsService();

var results = new List<FactCheckResult>
{
CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
CreateResult(2, "CBS", "gpt-4o-mini", 90, 100, false),
};

var metrics = service.GetMetricsPerNsi(results, "CBS", null, null);

Assert.Single(metrics);
Assert.Equal(5.0, metrics[0].FindabilityScore);
}

[Fact]
public void GetMetricsPerNsi_Computes_PerfectConsistencyScore_WhenAnswersAreEqual()
{
var service = new AnalyticsService();

var results = new List<FactCheckResult>
{
CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
};

var metrics = service.GetMetricsPerNsi(results, "CBS", null, null);

Assert.Single(metrics);
Assert.Equal(10.0, metrics[0].ConsistencyScore);
}

[Fact]
public void GetMetricsPerNsi_GroupsResults_PerNsi()
{
var service = new AnalyticsService();

var results = new List<FactCheckResult>
{
CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
CreateResult(2, "OECD", "gpt-4o-mini", 200, 200, true),
};

var metrics = service.GetMetricsPerNsi(results, null, null, null);

Assert.Equal(2, metrics.Count);
Assert.Contains(metrics, m => m.Nsi == "CBS");
Assert.Contains(metrics, m => m.Nsi == "OECD");
}

[Fact]
public void GetMetricsPerNsi_Filters_ByNsi()
{
var service = new AnalyticsService();

var results = new List<FactCheckResult>
{
CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true),
CreateResult(2, "OECD", "gpt-4o-mini", 200, 200, true),
};

var metrics = service.GetMetricsPerNsi(results, "CBS", null, null);

Assert.Single(metrics);
Assert.Equal("CBS", metrics[0].Nsi);
}


[Fact]
public void GetWeeklyMetricsPerNsi_GroupsResults_ByWeek()
{
var service = new AnalyticsService();

var results = new List<FactCheckResult>
{
var text = "In 2020 was de gemiddelde verkoopprijs ongeveer € 348.000.";
CreateResult(1, "CBS", "gpt-4o-mini", 100, 100, true, new DateTime(2026, 1, 5, 0, 0, 0, DateTimeKind.Utc)),
CreateResult(2, "CBS", "gpt-4o-mini", 90, 100, false, new DateTime(2026, 1, 12, 0, 0, 0, DateTimeKind.Utc)),
};

var parsed = ModelResponseParser.ParseDutch(0, text);
var weekly = service.GetWeeklyMetricsPerNsi(results, "CBS", null, null);

Assert.Equal(348_000m, parsed.Answer);
Assert.True(weekly.ContainsKey("CBS"));
Assert.Equal(2, weekly["CBS"].Findability.Count);
Assert.Equal(2, weekly["CBS"].Accuracy.Count);
Assert.Equal(2, weekly["CBS"].Consistency.Count);
}

private static FactCheckResult CreateResult(
int promptId,
string nsi,
string llm,
decimal actualAnswer,
decimal expectedAnswer,
bool sourceIsCorrect,
DateTime? createdUtc = null)
{
Source source = new Source { Id = 1, Name = "Test Source" };
var prompt = new Prompt(nsi, "test", "test", DateTime.Now, "none", "question", expectedAnswer, source, "");

var modelResponse = ModelResponse.Import(
id: promptId,
promptId: promptId,
provider: llm,
rawText: "test response",
exception: null,
createdUtc: createdUtc ?? DateTime.UtcNow
);

modelResponse.Prompt = prompt;

var parsed = new ParsedModelResponse(
modelResponseId: modelResponse.Id,
answer: actualAnswer,
extractedSources: new List<ExtractedSource>()
);

parsed.ModelResponse = modelResponse;

var absoluteError = Math.Abs(expectedAnswer - actualAnswer);
var relativeError = expectedAnswer == 0 ? 0 : absoluteError / expectedAnswer;

var fact = new FactCheckResult(
parsedModelResponseId: parsed.Id,
absoluteError: absoluteError,
relativeError: relativeError,
answerIsCorrect: actualAnswer == expectedAnswer,
sourceIsCorrect: sourceIsCorrect,
abstained: false
);

fact.ParsedModelResponse = parsed;

return fact;
}
}
}

108 changes: 108 additions & 0 deletions Tests/LlmAggregatorTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
using AI_stats_measurement.Backend.Models;
using AI_stats_measurement.Data;
using AI_stats_measurement.Interface;
using AI_stats_measurement.Services;
using Microsoft.EntityFrameworkCore;
using Moq;
using Xunit;

namespace AI_stats_measurement.Tests;

public class LlmAggregatorTests
{
private AIMeasureDbContext CreateContext()
{
var options = new DbContextOptionsBuilder<AIMeasureDbContext>()
.UseInMemoryDatabase(Guid.NewGuid().ToString())
.Options;

return new AIMeasureDbContext(options);
}

[Fact]
public async Task AskByPromptIdsAsync_Calls_Only_Selected_Models_And_Prompts()
{
Source source = new Source { Id = 1, Name = "Test Source" };
var prompt1 = new Prompt("CBS", "test", "test", DateTime.Now, "none", "question", 100, source, "");
var prompt2 = new Prompt("CBS", "test", "test", DateTime.Now, "none", "question", 100, source, "");

var context = CreateContext();
context.Prompts.AddRange(prompt1, prompt2);
await context.SaveChangesAsync();

var mock1 = new Mock<ILlmQuerier>();
mock1.SetupGet(q => q.Name).Returns("ChatGPT");
mock1.Setup(q => q.AskAsync(It.IsAny<Prompt>(), It.IsAny<CancellationToken>()))
.ReturnsAsync("antwoord");

var mock2 = new Mock<ILlmQuerier>();
mock2.SetupGet(q => q.Name).Returns("Gemini");
mock2.Setup(q => q.AskAsync(It.IsAny<Prompt>(), It.IsAny<CancellationToken>()))
.ReturnsAsync("antwoord");

var aggregator = new LlmAggregator(
new[] { mock1.Object, mock2.Object },
context
);

var result = await aggregator.AskByPromptIdsAsync(
new List<int> { 1, 2 },
new List<string> { "ChatGPT" },
CancellationToken.None
);

Assert.Equal(2, result.Count);

mock1.Verify(q => q.AskAsync(It.IsAny<Prompt>(), It.IsAny<CancellationToken>()), Times.Exactly(2));
mock2.Verify(q => q.AskAsync(It.IsAny<Prompt>(), It.IsAny<CancellationToken>()), Times.Never);
}

[Fact]
public async Task AskByPromptIdsAsync_Returns_Empty_When_No_Prompts_Found()
{
var context = CreateContext();

var mock = new Mock<ILlmQuerier>();
mock.SetupGet(q => q.Name).Returns("ChatGPT");

var aggregator = new LlmAggregator(
new[] { mock.Object },
context
);

var result = await aggregator.AskByPromptIdsAsync(
new List<int> { 999 },
new List<string> { "ChatGPT" },
CancellationToken.None
);

Assert.Empty(result);
}

[Fact]
public async Task AskByPromptIdsAsync_Skips_Non_Selected_Models()
{
Source source = new Source { Id = 1, Name = "Test Source" };
var prompt = new Prompt("CBS", "test", "test", DateTime.Now, "none", "question", 100, source, "");

var context = CreateContext();
context.Prompts.Add(prompt);
await context.SaveChangesAsync();

var mock = new Mock<ILlmQuerier>();
mock.SetupGet(q => q.Name).Returns("Gemini");

var aggregator = new LlmAggregator(
new[] { mock.Object },
context
);

var result = await aggregator.AskByPromptIdsAsync(
new List<int> { 1 },
new List<string> { "ChatGPT" },
CancellationToken.None
);

Assert.Empty(result);
}
}
20 changes: 8 additions & 12 deletions Tests/ModelResponseParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -197,17 +197,17 @@ public void Parse_Returns_Zero_When_No_Answer_Is_Found()
}

[Fact]
public void Parse_Returns_1()
public void Parse_Returns_Zero_Ignore_Source()
{
var text = "Bron: [CBS - Arbeidsongeschiktheidsuitkeringen] (https://www.cbs.nl/nl-nl/cijfers/detail/arbeidsongeschiktheidsuitkeringen)";
var text = "Bron: [CBS - Arbeidsongeschiktheidsuitkeringen] (https://www.cbs.nl/nl-nl/cijfers/86165NED/detail/arbeidsongeschiktheidsuitkeringen)";

var parsed = ModelResponseParser.ParseDutch(0, text);

Assert.Equal(0m, parsed.Answer);
}

[Fact]
public void Parse_Returns_2()
public void Parse_Returns_Answer_Ignore_Year()
{
var text = "**De levensverwachting bij geboorte voor mannen in Nederland in 2022 was 80,1 jaar.**[[1]] (https://www.lifetable.de/File/GetDocument/data/NLD/NLD000020222022CU1.pdf)\r\n\r\nDit cijfer komt uit de officiële sterftetafels(levensverwachtingstafels) van het **Centraal Bureau voor de Statistiek(CBS)**. Ter vergelijking: in 2020 was het circa 79,7 jaar(daling door COVID-19), in 2024 circa 80,5 jaar.[[2]] (https://www.cbs.nl/?sc_itemid=40d28916-85d7-494e-84d6-9d97ca41e253&sc_lang=nl-%20nl)\r\n\r\n**Bron:** CBS, tabel 37360ned(Levensverwachting; geslacht, leeftijd). \r\nDirecte link: [https://www.cbs.nl/nl-nl/cijfers/detail/37360ned](https://www.cbs.nl/nl-nl/cijfers/detail/37360ned) of de StatLine-tabel op opendata.cbs.nl.";

Expand All @@ -217,7 +217,7 @@ public void Parse_Returns_2()
}

[Fact]
public void Parse_Returns_3()
public void Parse_Returns_Answer_Recognize_Ton()
{
var text = "30.300 ton";

Expand All @@ -227,7 +227,7 @@ public void Parse_Returns_3()
}

[Fact]
public void Parse_Returns_4()
public void Parse_Returns_Zero_Ignore_Dates()
{
var text = "1 januari 2021";

Expand All @@ -237,7 +237,7 @@ public void Parse_Returns_4()
}

[Fact]
public void Parse_Returns_5()
public void Parse_Returns_Zero_Ignore_Age_Range_English()
{
var text = "aged 25 to 29 ";

Expand All @@ -247,7 +247,7 @@ public void Parse_Returns_5()
}

[Fact]
public void Parse_Returns_6()
public void Parse_Returns_Zero_Ignore_Age_Range_Dutch()
{
var text = "18 tot 25 jaar";

Expand All @@ -257,16 +257,12 @@ public void Parse_Returns_6()
}

[Fact]
public void Parse_Returns_7()
public void Parse_Returns_Zero_Ignore_Base_Year()
{
var text = "(base year 2025=100)";

var parsed = ModelResponseParser.ParseDutch(0, text);

Assert.Equal(0, parsed.Answer);
}




}
Loading