From cf4a061f7ab6d96a9d94141e532cdcdc34239f23 Mon Sep 17 00:00:00 2001 From: zendodx Date: Thu, 11 Jun 2026 11:16:06 +0800 Subject: [PATCH 1/7] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9github=20pages?= =?UTF-8?q?=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docs.yml | 1 - docs/dev-guide/github-pages.md | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 8a1ccf9..ea7e4e7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -5,7 +5,6 @@ on: branches: - main - master - - 1.4.x paths: - "docs/**" - ".github/workflows/docs.yml" diff --git a/docs/dev-guide/github-pages.md b/docs/dev-guide/github-pages.md index 4c636d8..99ca6ac 100644 --- a/docs/dev-guide/github-pages.md +++ b/docs/dev-guide/github-pages.md @@ -2,7 +2,7 @@ layout: default title: Github Pages用法参考 parent: 开发指南 -nav_order: 2 +nav_order: 90 --- Github Pages使用方法 From 1f85b143667af1b81ae796df5107032f86b8b727 Mon Sep 17 00:00:00 2001 From: zendodx Date: Thu, 11 Jun 2026 11:17:04 +0800 Subject: [PATCH 2/7] =?UTF-8?q?fix:=20=E9=85=8D=E7=BD=AEgithub=20codecov?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/ci.yml | 47 +++++++++ docs/dev-guide/github-codecov.md | 166 +++++++++++++++++++++++++++++++ evalkit-test/pom.xml | 5 + pom.xml | 25 +++++ 4 files changed, 243 insertions(+) create mode 100644 .github/workflows/ci.yml create mode 100644 docs/dev-guide/github-codecov.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f4059c8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,47 @@ +name: CI - Test & Coverage + +on: + push: + branches: + - main + - master + - 1.5.x + pull_request: + branches: + - main + - master + - 1.5.x + +jobs: + test: + name: Run Tests & Upload Coverage + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK 8 + uses: actions/setup-java@v4 + with: + java-version: '8' + distribution: 'temurin' + cache: maven + + - name: Run Tests with JaCoCo + run: mvn -B test --no-transfer-progress -Dgpg.skip=true + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: | + evalkit-common/target/site/jacoco/jacoco.xml + evalkit-workflow/target/site/jacoco/jacoco.xml + evalkit-infra/target/site/jacoco/jacoco.xml + evalkit-eval/target/site/jacoco/jacoco.xml + flags: unittests + name: evalkit-coverage + fail_ci_if_error: false + verbose: true + diff --git a/docs/dev-guide/github-codecov.md b/docs/dev-guide/github-codecov.md new file mode 100644 index 0000000..62d7437 --- /dev/null +++ b/docs/dev-guide/github-codecov.md @@ -0,0 +1,166 @@ +--- +layout: default +title: Github Codecov用法参考 +parent: 开发指南 +nav_order: 91 +--- +Github Codecov使用方法 + +# 怎么从codecov获取token + +从 Codecov 获取 Token 的步骤如下: + +## 1. 登录 Codecov + +访问 [https://codecov.io](https://codecov.io),点击右上角 **Sign up** / **Log in**,选择 **Sign in with GitHub**,授权后跳回 Codecov 首页。 + +## 2. 添加你的仓库 + +登录后,左侧会列出你的 GitHub 组织/账号,找到 `zendodx` → 搜索 `evalkit-framework` → 点击 **Setup repo**(或 **Configure**)。 + +## 3. 获取 Token + +进入仓库设置页面后,会看到如下内容: + +``` +Step 2: add the repository upload token as a secret +CODECOV_TOKEN = xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx +``` + +复制这串 UUID 格式的 token。 + +## 4. 添加到 GitHub Secrets + +1. 打开 GitHub 仓库页面:`https://github.com/zendodx/evalkit-framework` +2. 点击顶部 **Settings** 标签 +3. 左侧菜单找到 **Secrets and variables → Actions** +4. 点击绿色按钮 **New repository secret** +5. 填写: + - **Name**:`CODECOV_TOKEN` + - **Secret**:粘贴刚才复制的 token +6. 点击 **Add secret** 保存 + +--- + +完成后,下次 push 代码触发 CI 时,GitHub Actions 就会自动将覆盖率数据上传到 Codecov,并在仓库页面展示覆盖率趋势图。 + +# 怎样在github开启代码覆盖率扫描 + +基于你的项目结构(Maven + Java + GitHub Actions),以下是开启代码覆盖率扫描的完整方案: + +## 整体思路 + +需要做两件事: +1. **在 `pom.xml` 中添加 JaCoCo 插件** — 在 Maven 构建时生成覆盖率报告 +2. **在 GitHub Actions 中添加 CI workflow** — 自动运行测试并上传覆盖率到 [Codecov](https://codecov.io)(免费、与 GitHub 集成好) + +--- + +## 第一步:在根 `pom.xml` 添加 JaCoCo 插件 + +在 `pom.xml` 的 `` 中加入版本号,在 `` 中加入 JaCoCo 插件: + +```xml + + + org.jacoco + jacoco-maven-plugin + ${jacoco-maven-plugin.version} + + + prepare-agent + + prepare-agent + + + + report + test + + report + + + + +``` + +## 第二步:创建 GitHub Actions CI 工作流 + +```yml +name: CI - Test & Coverage + +on: + push: + branches: + - main + - master + - 1.5.x + - 1.4.x + pull_request: + branches: + - main + - master + - 1.5.x + - 1.4.x + +jobs: + test: + name: Run Tests & Upload Coverage + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK 8 + uses: actions/setup-java@v4 + with: + java-version: '8' + distribution: 'temurin' + cache: maven + + - name: Run Tests with JaCoCo + run: mvn -B test --no-transfer-progress -Dgpg.skip=true + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: | + evalkit-common/target/site/jacoco/jacoco.xml + evalkit-workflow/target/site/jacoco/jacoco.xml + evalkit-infra/target/site/jacoco/jacoco.xml + evalkit-eval/target/site/jacoco/jacoco.xml + evalkit-test/target/site/jacoco/jacoco.xml + flags: unittests + name: evalkit-coverage + fail_ci_if_error: false + verbose: true +``` + +## 第三步:在 GitHub 上配置 Codecov Token + +Push 代码前,你还需要在 GitHub 仓库中添加 Secret: + +1. 访问 [codecov.io](https://codecov.io),用 GitHub 账号登录 +2. 添加你的仓库 `zendodx/evalkit-framework`,获取 `CODECOV_TOKEN` +3. 在 GitHub 仓库页面进入 **Settings → Secrets and variables → Actions** +4. 点击 **New repository secret**,名称填 `CODECOV_TOKEN`,值填入从 Codecov 获取的 token + +## 配置完成后的效果 + +| 功能 | 说明 | +|------|------| +| 本地覆盖率报告 | 执行 `mvn test` 后,在各模块的 `target/site/jacoco/index.html` 可查看 HTML 报告 | +| CI 自动扫描 | 每次 push 或 PR 自动运行测试,生成覆盖率并上传到 Codecov | +| README 徽章 | Codecov 提供徽章,可以贴到 `README.md` | + +## 可选:在 README 中添加覆盖率徽章 + +```markdown +[![codecov](https://codecov.io/gh/zendodx/evalkit-framework/branch/main/graph/badge.svg?token=YOUR_TOKEN)](https://codecov.io/gh/zendodx/evalkit-framework) +``` + +--- + +**注意**:由于根 `pom.xml` 中 GPG 插件在 `verify` 阶段会签名,CI 里用了 `-Dgpg.skip=true` 跳过签名(只跑 `test` 阶段),否则会因为没有 GPG 私钥而失败。 \ No newline at end of file diff --git a/evalkit-test/pom.xml b/evalkit-test/pom.xml index 9a271c2..eca635b 100644 --- a/evalkit-test/pom.xml +++ b/evalkit-test/pom.xml @@ -40,6 +40,11 @@ HEAD + + + true + + io.github.zendodx diff --git a/pom.xml b/pom.xml index c335578..c298ea1 100644 --- a/pom.xml +++ b/pom.xml @@ -64,6 +64,7 @@ 1.5 3.2.5 1.7.3 + 0.8.12 @@ -127,6 +128,27 @@ + + + org.jacoco + jacoco-maven-plugin + ${jacoco-maven-plugin.version} + + + prepare-agent + + prepare-agent + + + + report + test + + report + + + + org.apache.maven.plugins @@ -144,6 +166,9 @@ OrderedDeltaEvalFacadeTest OrderedDeltaEvalWithinDataInjectTest + + -Dnet.bytebuddy.experimental=true From feeef4af39a46638cdbb6c120b5aa211ad5fd8a2 Mon Sep 17 00:00:00 2001 From: zendodx Date: Thu, 11 Jun 2026 11:18:37 +0800 Subject: [PATCH 3/7] =?UTF-8?q?fix:=20=E4=BC=98=E5=8C=96=E5=8D=95=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../scorer/config/RouterScorerConfig.java | 4 +- .../evalkit/framework/eval/core/CoreTest.java | 33 +++++-- .../FullEvalFacadeWithinDataInjectTest.java | 55 +++++++++-- ...edDeltaEvalFacadeWithinDataInjectTest.java | 81 +++++++++++++--- .../eval/node/api/HttpApiCompletionTest.java | 25 ++++- .../node/counter/AttributeCounterTest.java | 39 +++++++- .../node/counter/AttributeCounterV2Test.java | 43 ++++++++- .../KGBasedQueryGeneratorTest.java | 35 ++++++- .../MultiDataGeneratorTest.java | 42 ++++++-- .../PromptBasedQueryGeneratorTest.java | 56 ++++++++--- .../node/dataloader/ApiDataLoaderTest.java | 92 +++++++++++++++++- .../node/dataloader/JdbcDataLoaderTest.java | 95 +++++++++++++++++-- .../eval/node/reporter/ApiReporterTest.java | 48 +++++++++- .../eval/node/reporter/JdbcReportTest.java | 45 +++++++-- .../eval/node/scorer/GSBScorerTest.java | 89 ++++++++++++++--- .../node/scorer/PromptBasedScorerTest.java | 83 ++++++++++++++-- .../node/scorer/RubricBasedScorerTest.java | 10 +- .../eval/node/scorer/SecurityScorerTest.java | 61 +++++++++++- .../scorer/checker/LLMBasedCheckerTest.java | 30 +++++- .../service/llm/LLMServiceFactoryTest.java | 76 +++++++++++---- .../llm/LoadBalanceLLMServiceTest.java | 84 +++++++++++----- .../test/DAGEvalPerformanceTest.java | 2 +- .../test/DeltaEvalPerformanceTest.java | 2 +- 23 files changed, 976 insertions(+), 154 deletions(-) diff --git a/evalkit-eval/src/main/java/com/evalkit/framework/eval/node/scorer/config/RouterScorerConfig.java b/evalkit-eval/src/main/java/com/evalkit/framework/eval/node/scorer/config/RouterScorerConfig.java index 5805850..6e88adf 100644 --- a/evalkit-eval/src/main/java/com/evalkit/framework/eval/node/scorer/config/RouterScorerConfig.java +++ b/evalkit-eval/src/main/java/com/evalkit/framework/eval/node/scorer/config/RouterScorerConfig.java @@ -25,8 +25,8 @@ public class RouterScorerConfig extends ScorerConfig { @Builder.Default private Scorer defaultScorer = null; - /* 路由匹配模式,false=first-match,true=match-all(默认) */ + /* 路由匹配模式,false=first-match(默认),true=match-all */ @Builder.Default - private boolean matchAll = true; + private boolean matchAll = false; } diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/core/CoreTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/core/CoreTest.java index 8fef48d..8b001b1 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/core/CoreTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/core/CoreTest.java @@ -32,7 +32,6 @@ import com.evalkit.framework.eval.node.scorer.strategy.MaxScoreRateStrategy; import com.evalkit.framework.infra.service.llm.LLMService; import com.evalkit.framework.infra.service.llm.LLMTokenMetrics; -import com.evalkit.framework.infra.utils.DebugUtils; import com.evalkit.framework.workflow.WorkflowBuilder; import com.evalkit.framework.workflow.model.WorkflowContext; import com.fasterxml.jackson.core.type.TypeReference; @@ -40,6 +39,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.io.File; @@ -74,25 +74,45 @@ public class CoreTest { JsonReporter jsonReporter; End end; + /** + * 构造一个固定回复的 mock LLMService,不发起任何真实 HTTP 请求 + */ + private LLMService buildMockLLMService() { + return new LLMService() { + @Override + public String chat(String prompt) { + // 返回符合 JSON 格式的 mock 回复,满足 AttributeCounter 的期望格式 + return "{\"attributes\":[{\"name\":\"mock_attr\",\"value\":\"mock_value\"}]}"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + @BeforeEach public void init() { - LLMService llmService = DebugUtils.buildLLMService(); + // 使用 mock LLMService 替代真实 DeepSeek 服务,不依赖外部 token 或 HTTP 请求 + LLMService llmService = buildMockLLMService(); begin = new Begin( BeginConfig.builder() .scoreStrategy(new MaxScoreRateStrategy()) .threshold(1) -// .evalReasonStrategy(new LLMSummaryEvalReasonStrategy(llmService)) .evalReasonStrategy(new JsonEvalReasonStrategy()) .build() ); + // dataGenerator 只在 dataGeneratorTest(已 @Disabled)中使用,但仍需初始化 + // travel_demo 相关文件在 classpath:src/test/resources/travel_demo/ 中已存在 dataGenerator = new KGBasedQueryGenerator( KGBasedQueryGeneratorConfig.builder() .scenarioConfigFilePath(ListUtils.of("travel_demo/scenario_config.json")) - .kgFilePath("travel_demo/travel_kg_v2.ttl") + .kgFilePath("travel_demo/travel_kg.ttl") .llmService(llmService) - .enableOutputFile(true) + .enableOutputFile(false) .generateCount(1) .threadNum(1) .build() @@ -107,7 +127,7 @@ public void init() { public List prepareDataList() { List inputDatas = new ArrayList<>(); for (int i = 0; i < 10; i++) { - inputDatas.add(new InputData(1L, JsonUtils.fromJson("{\t\"query\":\"hello, {{holiday}}\",\"type\":\"1\"}", new TypeReference>() { + inputDatas.add(new InputData(1L, JsonUtils.fromJson("{\"query\":\"hello, world\",\"type\":\"1\"}", new TypeReference>() { }))); } return inputDatas; @@ -274,6 +294,7 @@ public void fullTest() { } @Test + @Disabled("依赖外部 LLM 服务(需要 secret.properties token)及知识图谱生成,本地手动测试") public void dataGeneratorTest() { List scorers = ListUtils.of(scorer1, scorer2, scorer3); List reporters = ListUtils.of(reporter, htmlReporter, csvReporter, excelReporter, jsonReporter); diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/FullEvalFacadeWithinDataInjectTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/FullEvalFacadeWithinDataInjectTest.java index 01c8733..a3de77e 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/FullEvalFacadeWithinDataInjectTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/FullEvalFacadeWithinDataInjectTest.java @@ -1,8 +1,8 @@ package com.evalkit.framework.eval.facade; import com.evalkit.framework.common.utils.file.FileUtils; +import com.evalkit.framework.common.utils.json.JsonUtils; import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; import com.evalkit.framework.common.utils.time.DateUtils; import com.evalkit.framework.eval.facade.config.FullEvalConfig; import com.evalkit.framework.eval.model.DataItem; @@ -22,12 +22,18 @@ import com.evalkit.framework.workflow.Workflow; import com.evalkit.framework.workflow.WorkflowBuilder; import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.function.ThrowingSupplier; import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively; @@ -35,6 +41,8 @@ @Slf4j class FullEvalFacadeWithinDataInjectTest { + private File tempJsonFile; + /** * 自定义全量式评测 */ @@ -58,11 +66,46 @@ protected void afterExecute() { } } + @BeforeEach + void setUp() throws IOException { + // 运行时动态创建临时 JSON 测试文件,不依赖外部文件路径 + // 构造符合 JsonFileDataLoader 期望格式的数据($.dataItems 数组) + List> dataItems = new java.util.ArrayList<>(); + for (int i = 0; i < 5; i++) { + Map inputItem = new HashMap<>(); + inputItem.put("query", "测试问题" + i); + inputItem.put("type", "1"); + + Map inputData = new HashMap<>(); + inputData.put("dataIndex", (long) i); + inputData.put("inputItem", inputItem); + + Map item = new HashMap<>(); + item.put("dataIndex", (long) i); + item.put("inputData", inputData); + dataItems.add(item); + } + Map jsonContent = new HashMap<>(); + jsonContent.put("dataItems", dataItems); + + // 写入临时文件 + tempJsonFile = File.createTempFile("full_eval_inject_test_", ".json"); + tempJsonFile.deleteOnExit(); + Files.write(tempJsonFile.toPath(), JsonUtils.toJson(jsonContent).getBytes(StandardCharsets.UTF_8)); + log.info("Created temp test file: {}", tempJsonFile.getAbsolutePath()); + } + + @AfterEach + void tearDown() { + if (tempJsonFile != null && tempJsonFile.exists()) { + tempJsonFile.delete(); + } + } + @Test - @Disabled public void test() throws Exception { - // 数据加载器,开启数据注入 - String filePath = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "json-file-datainjector-test-file"); + // 使用运行时创建的临时文件,不依赖外部文件或 secret.properties + String filePath = tempJsonFile.getAbsolutePath(); JsonFileDataLoader jsonFileDataLoader = new JsonFileDataLoader( JsonFileDataLoaderConfig.builder() .jsonPath("$.dataItems") @@ -101,7 +144,7 @@ public ScorerResult eval(DataItem dataItem) { ScorerResult scorerResult = new ScorerResult(); scorerResult.setMetric("eval-test-2"); scorerResult.setScore(1.0); - scorerResult.setReason("eval test1:" + dataItem.getInputData().get("query")); + scorerResult.setReason("eval test2:" + dataItem.getInputData().get("query")); return scorerResult; } }; diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/OrderedDeltaEvalFacadeWithinDataInjectTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/OrderedDeltaEvalFacadeWithinDataInjectTest.java index fb8d809..4ff8dcb 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/OrderedDeltaEvalFacadeWithinDataInjectTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/OrderedDeltaEvalFacadeWithinDataInjectTest.java @@ -1,8 +1,8 @@ package com.evalkit.framework.eval.facade; import com.evalkit.framework.common.utils.file.FileUtils; +import com.evalkit.framework.common.utils.json.JsonUtils; import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; import com.evalkit.framework.common.utils.time.DateUtils; import com.evalkit.framework.eval.facade.config.DeltaEvalConfig; import com.evalkit.framework.eval.model.DataItem; @@ -23,12 +23,16 @@ import com.evalkit.framework.workflow.Workflow; import com.evalkit.framework.workflow.WorkflowBuilder; import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.function.ThrowingSupplier; import java.io.File; -import java.util.Comparator; -import java.util.List; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.*; import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively; @@ -77,16 +81,67 @@ protected void afterLoadData() { @Override protected void afterExecute() { log.info("===>Finish consume and eval, remain data size:{}, processed data size:{}", getRemainDataCount(), getProcessedDataCount()); - List files = FileUtils.listFiles("attaches/"); + List files = FileUtils.listFiles(config.getAttachDir()); List collect = files.stream().map(File::getName).collect(Collectors.toList()); log.info("===>attaches files:{}", collect); } } + private File tempJsonFile; + + @BeforeEach + void setUp() throws IOException { + // 运行时动态创建临时 JSON 测试文件,不依赖外部文件路径或 secret.properties + // 构造符合 openInjectData 模式的嵌套数据格式($.dataItems 数组): + // item.dataIndex → DataInjector.injectDataIndex 读取(Long 类型) + // item.inputData → DataInjector.injectInputData 读取,包含业务字段 + // inputData.dataIndex + // inputData.inputItem → 实际业务字段(caseId、round、query) + // 构建 3 个 caseId,每个 caseId 有 2 轮数据,共 6 条 + List> dataItems = new ArrayList<>(); + long idx = 0L; + for (int caseId = 1; caseId <= 3; caseId++) { + for (int round = 1; round <= 2; round++) { + // 业务字段放在 inputItem 中 + Map inputItem = new HashMap<>(); + inputItem.put("caseId", caseId); + inputItem.put("round", round); + inputItem.put("query", "caseId=" + caseId + " round=" + round + " 测试问题"); + + // 嵌套的 inputData 对象 + Map inputData = new HashMap<>(); + inputData.put("dataIndex", idx); + inputData.put("inputItem", inputItem); + + // 顶层 item + Map item = new HashMap<>(); + item.put("dataIndex", idx); + item.put("inputData", inputData); + dataItems.add(item); + idx++; + } + } + Map jsonContent = new HashMap<>(); + jsonContent.put("dataItems", dataItems); + + // 写入临时文件 + tempJsonFile = File.createTempFile("ordered_delta_eval_inject_test_", ".json"); + tempJsonFile.deleteOnExit(); + Files.write(tempJsonFile.toPath(), JsonUtils.toJson(jsonContent).getBytes(StandardCharsets.UTF_8)); + log.info("Created temp test file: {}", tempJsonFile.getAbsolutePath()); + } + + @AfterEach + void tearDown() { + if (tempJsonFile != null && tempJsonFile.exists()) { + tempJsonFile.delete(); + } + } + @Test public void test() throws Exception { - // 数据加载器,开启数据注入 - String filePath = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "json-file-datainjector-test-file"); + // 使用运行时创建的临时文件,不依赖外部文件或 secret.properties + String filePath = tempJsonFile.getAbsolutePath(); JsonFileDataLoader jsonFileDataLoader = new JsonFileDataLoader( JsonFileDataLoaderConfig.builder() .jsonPath("$.dataItems") @@ -124,7 +179,7 @@ public ScorerResult eval(DataItem dataItem) { ScorerResult scorerResult = new ScorerResult(); scorerResult.setMetric("eval-test-2"); scorerResult.setScore(1.0); - scorerResult.setReason("eval test1:" + dataItem.getInputData().get("query")); + scorerResult.setReason("eval test2:" + dataItem.getInputData().get("query")); return scorerResult; } }; @@ -140,12 +195,14 @@ public ScorerResult eval(DataItem dataItem) { }; // 评测结果上报 + String taskName = "OrderedDeltaEvalWithinDataInjectTest"; + String attachDir = "attachments/" + taskName; String fileName = "ordered_delta_eval_within_datainject_test_" + DateUtils.nowToString(); BasicCounter basicCounter = new BasicCounter(); - HtmlReporter htmlReporter = new HtmlReporter(fileName, fileName); - JsonReporter jsonReporter = new JsonReporter(fileName, fileName); - ExcelReporter excelReporter = new ExcelReporter(fileName, fileName); - CsvReporter csvReporter = new CsvReporter(fileName, fileName); + HtmlReporter htmlReporter = new HtmlReporter(fileName, attachDir); + JsonReporter jsonReporter = new JsonReporter(fileName, attachDir); + ExcelReporter excelReporter = new ExcelReporter(fileName, attachDir); + CsvReporter csvReporter = new CsvReporter(fileName, attachDir); List scorers = ListUtils.of(scorer1, scorer2, scorer3); @@ -156,7 +213,7 @@ public ScorerResult eval(DataItem dataItem) { CustomDeltaEval cfe = new CustomDeltaEval( DeltaEvalConfig.builder() - .taskName("OrderedDeltaEvalWithinDataInjectTest") + .taskName(taskName) .dataLoader(jsonFileDataLoader) .evalWorkflow(evalWorkflow) .reportWorkflow(reportWorkflow) diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java index bfea4a9..37aea54 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java @@ -9,13 +9,22 @@ import java.util.Collections; import java.util.Map; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + class HttpApiCompletionTest { - void test() { + + /** + * 测试 HttpApiCompletion 可以正确构建并初始化,使用 localhost 作为 mock host + * 不发起真实 HTTP 请求,只验证对象构建逻辑 + */ + @Test + void testConstructAndBuildConfig() { HttpApiCompletion httpApiCompletion = new HttpApiCompletion( HttpApiCompletionConfig.builder() - .host("") - .api("") - .method("") + .host("http://localhost:8080") + .api("/api/test") + .method("POST") .build() ) { @Override @@ -35,8 +44,14 @@ public Map prepareHeader(InputData inputData) { @Override public ApiCompletionResult buildApiCompletionResult(InputData inputData, HttpApiResponse response) { - return null; + return new ApiCompletionResult(); } }; + + assertNotNull(httpApiCompletion, "HttpApiCompletion 实例不应为 null"); + assertNotNull(httpApiCompletion.getConfig(), "HttpApiCompletion 配置不应为 null"); + HttpApiCompletionConfig config = (HttpApiCompletionConfig) httpApiCompletion.getConfig(); + assertEquals("http://localhost:8080", config.getHost(), "Host 应与构建时一致"); + assertEquals("/api/test", config.getApi(), "API 路径应与构建时一致"); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java index 5a41749..078c802 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java @@ -4,25 +4,56 @@ import com.evalkit.framework.eval.model.CountResult; import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; import com.fasterxml.jackson.core.type.TypeReference; import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertNotNull; @Slf4j class AttributeCounterTest { + + /** + * 构造一个 mock LLMService: + * - 第一次调用(问题类型提取):返回 "编号|问题类型" 格式 + * - 后续调用(同义词归一化):返回合法 JSON 格式 + */ + private LLMService buildMockLLMService() { + AtomicInteger callCount = new AtomicInteger(0); + return new LLMService() { + @Override + public String chat(String prompt) { + int count = callCount.incrementAndGet(); + if (count == 1) { + // 第一次:提取问题类型,格式为 "编号|问题类型" + return "0|查询机票#价格咨询\n1|预订问题"; + } else { + // 后续:同义词归一化,返回合法 JSON + return "{\"价格咨询\": [\"查询机票\", \"价格咨询\"], \"预订问题\": [\"预订问题\"]}"; + } + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + @Test - @Disabled public void test() { - LLMService llmService = DebugUtils.buildLLMService(); + LLMService llmService = buildMockLLMService(); + // 从 classpath 加载预置测试数据,不依赖外部文件 List dataItems = JsonUtils.readJsonFile("classpath:dataItems.json", new TypeReference>() { }); dataItems = dataItems.subList(0, 2); AttributeCounter counter = new AttributeCounter(llmService); CountResult countResult = counter.count(dataItems); + + assertNotNull(countResult, "统计结果不应为 null"); log.info("countResult: {}", JsonUtils.toJson(countResult)); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java index 2b6e779..ac90fb5 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java @@ -4,25 +4,60 @@ import com.evalkit.framework.eval.model.CountResult; import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; import com.fasterxml.jackson.core.type.TypeReference; import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertNotNull; @Slf4j class AttributeCounterV2Test { + + /** + * 构造一个 mock LLMService,符合 AttributeCounterV2 的期望格式: + * - 提取阶段:返回 "编号|类别|问题|置信度|情感" 格式(每行5字段,用|分隔) + * - 归一化阶段:返回合法 JSON({ "标准名": ["同义名"] } 格式) + * - 摘要阶段:返回简短文本描述 + */ + private LLMService buildMockLLMService() { + AtomicInteger callCount = new AtomicInteger(0); + return new LLMService() { + @Override + public String chat(String prompt) { + int count = callCount.incrementAndGet(); + if (count == 1) { + // 提取阶段:返回 "编号|类别|问题|置信度|情感" 格式 + return "0|查询问题|机票价格查询|0.9|NEG\n1|预订问题|座位预订失败|0.8|NEG"; + } else if (prompt.contains("合并") || prompt.contains("归一化") || prompt.contains("标准名")) { + // 归一化阶段:返回 JSON 格式 + return "{\"查询问题\": [\"查询问题\"], \"预订问题\": [\"预订问题\"]}"; + } else { + // 摘要阶段:返回简短描述 + return "用户反馈机票查询和预订相关问题"; + } + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + @Test - @Disabled public void test() { - LLMService llmService = DebugUtils.buildLLMService(); + LLMService llmService = buildMockLLMService(); + // 从 classpath 加载预置测试数据,不依赖外部文件 List dataItems = JsonUtils.readJsonFile("classpath:dataItems.json", new TypeReference>() { }); dataItems = dataItems.subList(0, 2); AttributeCounterV2 counter = new AttributeCounterV2(llmService); CountResult countResult = counter.count(dataItems); + + assertNotNull(countResult, "统计结果不应为 null"); log.info("countResult: {}", JsonUtils.toJson(countResult)); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java index 0722fec..6cb5515 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java @@ -4,28 +4,54 @@ import com.evalkit.framework.eval.model.InputData; import com.evalkit.framework.eval.node.data_generator.config.KGBasedQueryGeneratorConfig; import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; import lombok.extern.slf4j.Slf4j; import org.junit.jupiter.api.Test; import java.util.List; +import static org.junit.jupiter.api.Assertions.assertNotNull; + @Slf4j class KGBasedQueryGeneratorTest { + /** + * 构造一个 mock LLMService,返回符合 Turn JSON 格式的内容: + * KGBasedQueryGenerator 期望 LLM 返回 List 的 JSON 数组 + */ + private LLMService buildMockLLMService() { + return new LLMService() { + @Override + public String chat(String prompt) { + // 返回合法的 Turn JSON 数组,匹配 scenario_config.json 中定义的 4 轮对话 + return "[" + + "{\"turn\":1,\"query\":\"打算带孩子去北京玩,有什么必看景点推荐吗?\"}," + + "{\"turn\":2,\"query\":\"从上海出发,有什么推荐的交通方式吗?\"}," + + "{\"turn\":3,\"query\":\"到了那边晚上住哪里比较方便?\"}," + + "{\"turn\":4,\"query\":\"帮我把刚才看好的车票预订一下。\"}" + + "]"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + @Test public void test() throws Exception { + // 文件已存在于 classpath:travel_demo/,由 KGBasedQueryGenerator 自动从 classpath 加载 String kgFilePath = "travel_demo/travel_kg.ttl"; String scenarioConfigFilePath = "travel_demo/scenario_config.json"; String scenarioConfigFilePath2 = "travel_demo/scenario2_config.json"; - LLMService llmService = DebugUtils.buildLLMService(); + LLMService llmService = buildMockLLMService(); KGBasedQueryGenerator generator = new KGBasedQueryGenerator( KGBasedQueryGeneratorConfig.builder() .scenarioConfigFilePath(ListUtils.of(scenarioConfigFilePath, scenarioConfigFilePath2)) .kgFilePath(kgFilePath) .llmService(llmService) - .enableOutputFile(true) + .enableOutputFile(false) // 关闭文件输出,避免在 CI 环境写文件 .generateCount(1) .threadNum(1) .sessionIdFieldName("session_id") @@ -36,6 +62,7 @@ public void test() throws Exception { ); List generated = generator.generateWrapper(); - log.debug("generated: {}", generated); + assertNotNull(generated, "生成的数据列表不应为 null"); + log.debug("generated count: {}, data: {}", generated.size(), generated); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java index 7a0d22c..7a4bcba 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java @@ -4,24 +4,50 @@ import com.evalkit.framework.eval.node.data_generator.config.KGBasedQueryGeneratorConfig; import com.evalkit.framework.eval.node.data_generator.config.MultiDataGeneratorConfig; import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + class MultiDataGeneratorTest { + + /** + * 构造一个 mock LLMService,返回符合 Turn JSON 格式的内容 + */ + private LLMService buildMockLLMService() { + return new LLMService() { + @Override + public String chat(String prompt) { + // 返回合法的 Turn JSON 数组,供 KGBasedQueryGenerator 解析 + return "[" + + "{\"turn\":1,\"query\":\"我想了解一下旅游攻略\"}," + + "{\"turn\":2,\"query\":\"请推荐交通方式\"}," + + "{\"turn\":3,\"query\":\"有什么酒店推荐吗?\"}," + + "{\"turn\":4,\"query\":\"帮我预订一下。\"}" + + "]"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + @Test public void test() { + // 文件已存在于 classpath:travel_demo/,由 KGBasedQueryGenerator 自动从 classpath 加载 String kgFilePath = "travel_demo/travel_kg.ttl"; String scenarioConfigFilePath = "travel_demo/scenario_config.json"; String scenario2ConfigFilePath = "travel_demo/scenario2_config.json"; - LLMService llmService = DebugUtils.buildLLMService(); + LLMService llmService = buildMockLLMService(); KGBasedQueryGenerator generator1 = new KGBasedQueryGenerator( KGBasedQueryGeneratorConfig.builder() .scenarioConfigFilePath(ListUtils.of(scenarioConfigFilePath)) .kgFilePath(kgFilePath) .llmService(llmService) - .enableOutputFile(true) + .enableOutputFile(false) // 关闭文件输出,避免在 CI 环境写文件 .generateCount(1) .build() ); @@ -31,18 +57,20 @@ public void test() { .scenarioConfigFilePath(ListUtils.of(scenario2ConfigFilePath)) .kgFilePath(kgFilePath) .llmService(llmService) - .enableOutputFile(true) + .enableOutputFile(false) .generateCount(1) .build() ); - MultiDataGenerator multiDataGenerator = new MultiDataGenerator( MultiDataGeneratorConfig.builder() .dataGenerators(ListUtils.of(generator1, generator2)) - .enableOutputFile(true) + .enableOutputFile(false) .build() ); - multiDataGenerator.generateWrapper(); + + // 调用并验证结果不为 null + assertDoesNotThrow(multiDataGenerator::generateWrapper, + "MultiDataGenerator 不应抛出异常"); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java index dc027ce..b56d976 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java @@ -1,28 +1,40 @@ package com.evalkit.framework.eval.node.data_generator.querygen; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; import com.evalkit.framework.eval.node.querygen.PromptBasedQueryGenerator; import com.evalkit.framework.eval.node.querygen.config.PromptBasedQueryGeneratorConfig; import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; -import com.evalkit.framework.infra.service.llm.config.DeepseekLLMServiceConfig; -import com.evalkit.framework.infra.service.llm.constants.LLMServiceEnum; import lombok.extern.slf4j.Slf4j; import org.junit.jupiter.api.Test; import java.util.List; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; + @Slf4j class PromptBasedQueryGeneratorTest { + + /** + * 构造一个 mock LLMService,返回换行分隔的 Query 列表(PromptBasedQueryGenerator 按 \n 分割回复) + */ + private LLMService buildMockLLMService() { + return new LLMService() { + @Override + public String chat(String prompt) { + // 返回多行文本,模拟 LLM 生成 Query 的格式(每行一条 Query) + return "如何快速预订机票\n机票价格最低查询\n最近热门旅游目的地推荐"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + @Test void test() { - String deepSeekToken = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "deepseek-token"); - LLMService llmService = LLMServiceFactory.createLLMService( - LLMServiceEnum.DEEPSEEK.name(), - DeepseekLLMServiceConfig.builder() - .apiToken(deepSeekToken) - .build() - ); + LLMService llmService = buildMockLLMService(); PromptBasedQueryGenerator promptBasedQueryGenerator = new PromptBasedQueryGenerator( PromptBasedQueryGeneratorConfig.builder() @@ -32,6 +44,28 @@ void test() { .build() ); List queries = promptBasedQueryGenerator.generate(); + + assertNotNull(queries, "生成的 queries 不应为 null"); + assertFalse(queries.isEmpty(), "生成的 queries 不应为空"); log.info("queries: {}", queries); } + + @Test + void testCustomSysPrompt() { + LLMService llmService = buildMockLLMService(); + + PromptBasedQueryGenerator generator = new PromptBasedQueryGenerator( + PromptBasedQueryGeneratorConfig.builder() + .llmService(llmService) + .sysPrompt("你是一个Query生成助手,请生成简短的用户查询") + .userPrompt("关键词: 酒店预订") + .genCount(3) + .langStyle("简洁直接") + .build() + ); + + List queries = generator.generate(); + assertNotNull(queries, "使用自定义 sysPrompt 生成的 queries 不应为 null"); + log.info("customSysPrompt queries: {}", queries); + } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java index 1449054..e2b1229 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java @@ -7,13 +7,96 @@ import java.util.Map; import java.util.concurrent.TimeUnit; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + class ApiDataLoaderTest { - void test() { + /** + * 测试 ApiDataLoader 配置校验逻辑:host 为空时应抛出 IllegalArgumentException + */ + @Test + void testEmptyHostThrowsException() { + assertThrows(IllegalArgumentException.class, () -> { + new ApiDataLoader( + ApiDataLoaderConfig.builder() + .host("") + .api("/api/test") + .method("get") + .timeout(10) + .timeUnit(TimeUnit.SECONDS) + .build() + ) { + @Override + public Map prepareBody() { + return Collections.emptyMap(); + } + + @Override + public Map prepareParam() { + return Collections.emptyMap(); + } + + @Override + public Map prepareHeader() { + return Collections.emptyMap(); + } + + @Override + public String prepareJsonpath() { + return "$.data"; + } + }; + }, "host 为空时构造应抛出 IllegalArgumentException"); + } + + /** + * 测试 ApiDataLoader 配置校验逻辑:api 为空时应抛出 IllegalArgumentException + */ + @Test + void testEmptyApiThrowsException() { + assertThrows(IllegalArgumentException.class, () -> { + new ApiDataLoader( + ApiDataLoaderConfig.builder() + .host("http://localhost:8080") + .api("") + .method("get") + .timeout(10) + .timeUnit(TimeUnit.SECONDS) + .build() + ) { + @Override + public Map prepareBody() { + return Collections.emptyMap(); + } + + @Override + public Map prepareParam() { + return Collections.emptyMap(); + } + + @Override + public Map prepareHeader() { + return Collections.emptyMap(); + } + + @Override + public String prepareJsonpath() { + return "$.data"; + } + }; + }, "api 为空时构造应抛出 IllegalArgumentException"); + } + + /** + * 测试 ApiDataLoader 正常构建(不发起真实 HTTP 请求,只验证构造成功) + */ + @Test + void testConstructWithValidConfig() { ApiDataLoader apiDataLoader = new ApiDataLoader( ApiDataLoaderConfig.builder() - .host("") - .api("") + .host("http://localhost:8080") + .api("/api/data") .method("get") .timeout(10) .timeUnit(TimeUnit.SECONDS) @@ -39,6 +122,7 @@ public String prepareJsonpath() { return "$.data"; } }; - } + assertNotNull(apiDataLoader, "ApiDataLoader 实例不应为 null"); + } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java index f65721e..976350e 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java @@ -1,22 +1,105 @@ package com.evalkit.framework.eval.node.dataloader; +import com.evalkit.framework.eval.model.InputData; import com.evalkit.framework.eval.node.dataloader.config.JdbcDataLoaderConfig; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import java.io.File; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.Statement; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * JdbcDataLoader 测试 —— 使用 SQLite 文件型数据库替代外部 MySQL,无需外部服务 + *

+ * 注意:SQLite 内存模式(file::memory:)与 HikariCP 连接池不兼容(DriverManager 创建的连接 + * 与 HikariCP 连接池使用的连接相互隔离),因此改用临时文件型 SQLite,确保连接共享同一数据库。 + */ class JdbcDataLoaderTest { - void test() { + + private static final String SQLITE_DRIVER = "org.sqlite.JDBC"; + private File tempDbFile; + private String sqliteUrl; + + @BeforeEach + void setUp() throws Exception { + // 创建临时 SQLite 文件,确保 DriverManager 和 HikariCP 访问同一数据库 + tempDbFile = File.createTempFile("jdbcloader_test_", ".db"); + tempDbFile.deleteOnExit(); + sqliteUrl = "jdbc:sqlite:" + tempDbFile.getAbsolutePath(); + + // 在 SQLite 文件中创建测试表并插入数据 + try (Connection conn = DriverManager.getConnection(sqliteUrl); + Statement st = conn.createStatement()) { + st.execute("CREATE TABLE IF NOT EXISTS testcase (" + + "id INTEGER PRIMARY KEY, " + + "query TEXT NOT NULL, " + + "expected TEXT)"); + st.execute("DELETE FROM testcase"); + st.execute("INSERT INTO testcase (query, expected) VALUES ('hello world', '预期回复1')"); + st.execute("INSERT INTO testcase (query, expected) VALUES ('test query', '预期回复2')"); + } + } + + @AfterEach + void tearDown() { + if (tempDbFile != null && tempDbFile.exists()) { + tempDbFile.delete(); + } + } + + /** + * 测试 JdbcDataLoader 可以通过 SQLite 文件数据库正常加载数据 + */ + @Test + void testLoadDataFromSQLite() throws Exception { JdbcDataLoader jdbcDataLoader = new JdbcDataLoader( JdbcDataLoaderConfig.builder() - .driver("com.mysql.jdbc.Driver") - .url("jdbc:mysql://127.0.0.1:3306/evalkit?useSSL=false&serverTimezone=Asia/Shanghai&characterEncoding=utf8") - .user("root") - .password("root") + .driver(SQLITE_DRIVER) + .url(sqliteUrl) + // SQLite 不需要用户名,但 validConfig 要求非空,传 "sa" 作为占位符 + .user("sa") + .password("") .build() ) { @Override public String prepareSql() { - return "select * from testcase"; + return "SELECT * FROM testcase"; } }; + + List dataList = jdbcDataLoader.prepareDataList(); + assertNotNull(dataList, "加载的数据列表不应为 null"); + assertEquals(2, dataList.size(), "应加载 2 条测试数据"); + + // 验证数据内容 + InputData first = dataList.get(0); + assertNotNull(first.getInputItem(), "数据项的 inputItem 不应为 null"); + assertTrue(first.getInputItem().containsKey("query"), "应包含 query 字段"); + } + + /** + * 测试 JdbcDataLoader 校验逻辑:driver 为空时应抛出异常 + */ + @Test + void testEmptyDriverThrowsException() { + assertThrows(IllegalArgumentException.class, () -> + new JdbcDataLoader( + JdbcDataLoaderConfig.builder() + .driver("") + .url(sqliteUrl) + .user("") + .password("") + .build() + ) { + @Override + public String prepareSql() { return "SELECT * FROM testcase"; } + } + ); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java index 6c8a9b4..d4bdf0f 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java @@ -1,12 +1,21 @@ package com.evalkit.framework.eval.node.reporter; import com.evalkit.framework.eval.model.DataItem; +import org.junit.jupiter.api.Test; import java.util.Collections; import java.util.Map; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + class ApiReporterTest { - void test() { + + /** + * 测试 ApiReporter 可以正常构建,不发起真实 HTTP 请求 + */ + @Test + void testConstructApiReporter() { String host = "http://localhost:8080"; String api = "/api/test"; String method = "POST"; @@ -26,5 +35,42 @@ public Map prepareParams(DataItem item) { return Collections.emptyMap(); } }; + + assertNotNull(apiReporter, "ApiReporter 实例不应为 null"); + assertNotNull(apiReporter.getRequest(), "ApiReporter 的 request 不应为 null"); + assertEquals(host, apiReporter.getRequest().getHost(), "Host 应与构建时一致"); + assertEquals(api, apiReporter.getRequest().getApi(), "API 路径应与构建时一致"); + } + + /** + * 测试 prepareBody/prepareHeader/prepareParams 可以正确返回空 Map + */ + @Test + void testPrepareMethods() { + ApiReporter apiReporter = new ApiReporter("http://localhost:8080", "/api/report", "POST") { + @Override + public Map prepareBody(DataItem item) { + return Collections.singletonMap("key", "value"); + } + + @Override + public Map prepareHeader(DataItem item) { + return Collections.singletonMap("Content-Type", "application/json"); + } + + @Override + public Map prepareParams(DataItem item) { + return Collections.emptyMap(); + } + }; + + DataItem dataItem = new DataItem(); + Map body = apiReporter.prepareBody(dataItem); + assertNotNull(body, "prepareBody 不应返回 null"); + assertEquals("value", body.get("key")); + + Map headers = apiReporter.prepareHeader(dataItem); + assertNotNull(headers, "prepareHeader 不应返回 null"); + assertEquals("application/json", headers.get("Content-Type")); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java index 79e81c3..46c3fb8 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java @@ -1,16 +1,47 @@ package com.evalkit.framework.eval.node.reporter; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * JdbcReport 测试 —— 验证 SQLite 内嵌数据库可以正常构建 JdbcReport 实例 + * 注意:JdbcReport.report() 方法使用了 MySQL 专用建表 SQL(auto_increment、comment), + * 因此仅测试对象构建逻辑,不执行真实的报告写入操作 + */ class JdbcReportTest { - void test() { - String driver = "com.mysql.cj.jdbc.Driver"; - String url = "jdbc:mysql://127.0.0.1:3306/evalkit?useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai"; - String username = "root"; - String password = "123456"; - JdbcReport jdbcReport = new JdbcReport(driver, url, username, password) { + + private static final String SQLITE_URL = "jdbc:sqlite:file::memory:?cache=shared&db=jdbc_report_test"; + private static final String SQLITE_DRIVER = "org.sqlite.JDBC"; + + /** + * 测试 JdbcReport 可以使用 SQLite 内嵌数据库正常构建,不依赖外部 MySQL + */ + @Test + void testConstructWithSQLite() { + JdbcReport jdbcReport = new JdbcReport(SQLITE_DRIVER, SQLITE_URL, "", "") { @Override public String prepareTableName() { - return ""; + return "eval_result"; } }; + assertNotNull(jdbcReport, "JdbcReport 实例不应为 null"); + assertEquals("eval_result", jdbcReport.prepareTableName(), "表名应正确返回"); + } + + /** + * 测试 JdbcReport 可以连接并验证 SQLite 连接池正常初始化 + */ + @Test + void testConnectionPoolInitialized() { + assertDoesNotThrow(() -> { + JdbcReport jdbcReport = new JdbcReport(SQLITE_DRIVER, SQLITE_URL, "", "") { + @Override + public String prepareTableName() { + return "test_table"; + } + }; + assertNotNull(jdbcReport); + }, "使用 SQLite 构建 JdbcReport 不应抛出异常"); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java index 692064b..3f29648 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java @@ -1,32 +1,43 @@ package com.evalkit.framework.eval.node.scorer; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; import com.evalkit.framework.eval.model.ApiCompletionResult; import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.eval.model.InputData; import com.evalkit.framework.eval.model.ScorerResult; import com.evalkit.framework.eval.node.scorer.config.PromptBasedScorerConfig; -import com.evalkit.framework.infra.service.llm.DeepSeekLLMService; import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; -import com.evalkit.framework.infra.service.llm.config.DeepseekLLMServiceConfig; -import com.evalkit.framework.infra.service.llm.config.LLMServiceConfig; import lombok.extern.slf4j.Slf4j; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertNotNull; + @Slf4j class GSBScorerTest { LLMService llmService; @BeforeEach void setUp() { - String deepSeekToken = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "deepseek-token"); - LLMServiceFactory.registerLLMService("DeepSeek_Test", (LLMServiceFactory.LLMServiceBuilder) config -> new DeepSeekLLMService((DeepseekLLMServiceConfig) config)); - DeepseekLLMServiceConfig config = DeepseekLLMServiceConfig.builder() - .apiToken(deepSeekToken) - .build(); - llmService = LLMServiceFactory.createLLMService("DeepSeek_Test", config); + // 使用 mock LLMService 替代真实 DeepSeek,不依赖外部 token 或 HTTP 请求 + // GSBScorer.parseLLMReply 期望 LLM 返回 JSON 格式,包含 accuracy/relevance/completeness/fluency/reason 字段 + llmService = new LLMService() { + @Override + public String chat(String prompt) { + // 返回符合 GSBScorer 期望的 JSON 格式(各维度低分,表示候选回答较差) + return "{\n" + + " \"accuracy\": 2,\n" + + " \"relevance\": 2,\n" + + " \"completeness\": 2,\n" + + " \"fluency\": 3,\n" + + " \"reason\": \"候选答案与金标准存在明显差距,缺少关键信息。\"\n" + + "}"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; } @Test @@ -55,6 +66,60 @@ public String prepareInput(InputData inputData, ApiCompletionResult apiCompletio dataItem.setInputData(new InputData()); dataItem.setApiCompletionResult(new ApiCompletionResult()); ScorerResult scorerResult = gsbScorer.eval(dataItem); - log.error("scorerResult:{}", scorerResult); + + assertNotNull(scorerResult, "评分结果不应为 null"); + log.info("scorerResult:{}", scorerResult); + } + + @Test + void testGoodResult() { + // mock LLM 返回高分 JSON,表示候选回答比参考回答好 + // GSBScorer.parseLLMReply 期望 JSON 格式,包含 accuracy/relevance/completeness/fluency/reason + LLMService goodLLM = new LLMService() { + @Override + public String chat(String prompt) { + return "{\n" + + " \"accuracy\": 5,\n" + + " \"relevance\": 5,\n" + + " \"completeness\": 5,\n" + + " \"fluency\": 5,\n" + + " \"reason\": \"候选答案与金标准语义一致,语言自然,无遗漏。\"\n" + + "}"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + + GSBScorer gsbScorer = new GSBScorer( + PromptBasedScorerConfig.builder() + .llmService(goodLLM) + .build() + ) { + @Override + public String prepareGoldAnswer(InputData inputData, ApiCompletionResult apiCompletionResult) { + return "gold answer"; + } + + @Override + public String prepareCandidateAnswer(InputData inputData, ApiCompletionResult apiCompletionResult) { + return "better candidate"; + } + + @Override + public String prepareInput(InputData inputData, ApiCompletionResult apiCompletionResult) { + return "test input"; + } + }; + + DataItem dataItem = new DataItem(); + dataItem.setInputData(new InputData()); + dataItem.setApiCompletionResult(new ApiCompletionResult()); + ScorerResult result = gsbScorer.eval(dataItem); + + assertNotNull(result, "评分结果不应为 null"); + log.info("Good result scorerResult:{}", result); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java index b7b9bb1..30d6716 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java @@ -1,31 +1,102 @@ package com.evalkit.framework.eval.node.scorer; import com.evalkit.framework.eval.model.ApiCompletionResult; +import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.eval.model.InputData; +import com.evalkit.framework.eval.model.ScorerResult; import com.evalkit.framework.eval.node.scorer.config.PromptBasedScorerConfig; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; +import com.evalkit.framework.infra.service.llm.LLMService; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; class PromptBasedScorerTest { - void test() { + + /** + * 构造一个 mock LLMService,返回符合 PromptBasedScorer.LLMResult 格式的 JSON + */ + private LLMService buildMockLLMService() { + return new LLMService() { + @Override + public String chat(String prompt) { + // 返回符合 LLMResult(包含 score 和 reason 字段)的 JSON + return "{\"score\":0.8,\"reason\":\"回复基本符合预期\"}"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + + @Test + void testConstructPromptBasedScorer() { PromptBasedScorer promptBasedScorer = new PromptBasedScorer( PromptBasedScorerConfig.builder() - .llmService(LLMServiceFactory.createLLMService("test", null)) + .llmService(buildMockLLMService()) .build() ) { @Override public String prepareSysPrompt() { - return ""; + return "你是一个评分助手"; } @Override public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { - return ""; + return "问题: hello\n答案: world"; } @Override public LLMResult parseLLMReply(String reply) { - return null; + // 使用 setter 方法(@Data 生成 private 字段的 getter/setter) + LLMResult result = new LLMResult(); + result.setScore(0.8); + result.setReason("mock reason"); + return result; } }; + + assertNotNull(promptBasedScorer, "PromptBasedScorer 实例不应为 null"); + } + + @Test + void testEvalWithMockLLM() { + PromptBasedScorer promptBasedScorer = new PromptBasedScorer( + PromptBasedScorerConfig.builder() + .llmService(buildMockLLMService()) + .metricName("相关性检查") + .totalScore(1) + .enableRetry(false) + .build() + ) { + @Override + public String prepareSysPrompt() { + return "你是一个评分助手"; + } + + @Override + public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { + return "问题: hello\n答案: world"; + } + + @Override + public LLMResult parseLLMReply(String reply) { + LLMResult result = new LLMResult(); + result.setScore(0.8); + result.setReason("回复基本符合预期"); + return result; + } + }; + + DataItem dataItem = new DataItem(); + dataItem.setInputData(new InputData()); + dataItem.setApiCompletionResult(new ApiCompletionResult()); + + ScorerResult result = promptBasedScorer.eval(dataItem); + assertNotNull(result, "评分结果不应为 null"); + assertEquals(0.8, result.getScore(), 1e-6, "评分应为 0.8"); + assertEquals("相关性检查", result.getMetric(), "指标名应正确"); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java index 3173506..edebc9f 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java @@ -19,7 +19,6 @@ import com.evalkit.framework.eval.node.scorer.model.RubricMergeStrategy; import com.evalkit.framework.eval.node.scorer.model.RubricScoreType; import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; import com.evalkit.framework.workflow.Workflow; import com.evalkit.framework.workflow.WorkflowBuilder; import lombok.extern.slf4j.Slf4j; @@ -978,10 +977,13 @@ void minScoreGtZero_starGate_triggersZero() { // ==================== 真实链路 ==================== @Test - @DisplayName("真实链路") + @DisplayName("真实链路(mock LLM)") void realLink() { - LLMService llm = DebugUtils.buildLLMService(); - // LLMService llm = mockLLMSequence(cotJson(1, "最差"), cotJson(5, "最好")); + // 使用 mock LLMService 替代真实 DeepSeek 服务,不依赖外部 token 或 HTTP 请求 + // criteriaBatchSize=2,每次 LLM 调用需返回包含 2 个维度评分结果的 JSON 数组 + // 3 条数据 × 1 次批量调用(2 个维度合并为一次) = 3 次 LLM 调用 + String batchCotJson = "[" + cotJson(4, "回复质量良好") + "," + cotJson(5, "内容安全") + "]"; + LLMService llm = mockLLMSequence(batchCotJson, batchCotJson, batchCotJson); // 开始节点 Begin begin = new Begin(); diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java index 457a22c..c84815c 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java @@ -1,21 +1,74 @@ package com.evalkit.framework.eval.node.scorer; import com.evalkit.framework.eval.model.ApiCompletionResult; +import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.eval.model.InputData; +import com.evalkit.framework.eval.model.ScorerResult; import com.evalkit.framework.eval.node.scorer.config.PromptBasedScorerConfig; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; +import com.evalkit.framework.infra.service.llm.LLMService; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; class SecurityScorerTest { - void test() { + + /** + * 构造一个 mock LLMService,返回安全评分 JSON 格式(符合 SecurityScorer 的期望) + */ + private LLMService buildMockLLMService() { + return new LLMService() { + @Override + public String chat(String prompt) { + // 返回符合 SecurityScorer parseLLMReply 期望的 JSON 格式 + return "{\"score\":1,\"reason\":\"内容安全,无违规信息\"}"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + + @Test + void testConstructSecurityScorer() { SecurityScorer securityScorer = new SecurityScorer( PromptBasedScorerConfig.builder() - .llmService(LLMServiceFactory.createLLMService("test", null)) + .llmService(buildMockLLMService()) .build() ) { @Override public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { - return ""; + return "测试文本:你好,今天天气真好!"; } }; + + assertNotNull(securityScorer, "SecurityScorer 实例不应为 null"); + } + + @Test + void testEvalWithMockLLM() { + SecurityScorer securityScorer = new SecurityScorer( + PromptBasedScorerConfig.builder() + .llmService(buildMockLLMService()) + .metricName("安全检查") + .totalScore(1) + .enableRetry(false) + .build() + ) { + @Override + public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { + return "测试文本:你好,今天天气真好!"; + } + }; + + DataItem dataItem = new DataItem(); + dataItem.setInputData(new InputData()); + dataItem.setApiCompletionResult(new ApiCompletionResult()); + + ScorerResult result = securityScorer.eval(dataItem); + assertNotNull(result, "评分结果不应为 null"); + assertEquals(1.0, result.getScore(), 1e-6, "安全内容应得满分"); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java index 5b95614..5bd8372 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java @@ -3,16 +3,38 @@ import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.eval.node.scorer.checker.config.LLMBasedCheckerConfig; import com.evalkit.framework.eval.node.scorer.checker.model.CheckItem; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; +import com.evalkit.framework.infra.service.llm.LLMService; +import org.junit.jupiter.api.Test; import java.util.Collections; import java.util.List; +import static org.junit.jupiter.api.Assertions.assertNotNull; + class LLMBasedCheckerTest { - void test() { + + /** + * 构造一个 mock LLMService,不依赖外部服务 + */ + private LLMService buildMockLLMService() { + return new LLMService() { + @Override + public String chat(String prompt) { + return "mock reply"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + + @Test + void testConstructLLMBasedChecker() { LLMBasedChecker checker = new LLMBasedChecker( LLMBasedCheckerConfig.builder() - .llmService(LLMServiceFactory.createLLMService("test", null)) + .llmService(buildMockLLMService()) .build() ) { @Override @@ -40,5 +62,7 @@ public double getTotalScore() { return 0; } }; + + assertNotNull(checker, "LLMBasedChecker 实例不应为 null"); } } \ No newline at end of file diff --git a/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LLMServiceFactoryTest.java b/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LLMServiceFactoryTest.java index 181df1d..0626680 100644 --- a/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LLMServiceFactoryTest.java +++ b/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LLMServiceFactoryTest.java @@ -1,34 +1,74 @@ package com.evalkit.framework.infra.service.llm; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; -import com.evalkit.framework.infra.service.llm.config.DeepseekLLMServiceConfig; import com.evalkit.framework.infra.service.llm.config.LLMServiceConfig; import lombok.extern.slf4j.Slf4j; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + @Slf4j class LLMServiceFactoryTest { - String deepSeekToken = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "deepseek-token"); + /** + * 构造一个固定回复的 mock LLMService,不依赖任何外部服务 + */ + private LLMService mockLLMService(String fixedReply) { + return new LLMService() { + @Override + public String chat(String prompt) { + return fixedReply; + } - @Test - public void test() { - // 注册DeepSeek_Test大模型服务 - LLMServiceFactory.registerLLMService("DeepSeek_Test", new LLMServiceFactory.LLMServiceBuilder() { @Override - public LLMService build(LLMServiceConfig config) { - return new DeepSeekLLMService((DeepseekLLMServiceConfig) config); + public String getModel() { + return "mock-model"; } - }); + }; + } + + @Test + void testRegisterAndCreateLLMService() { + // 使用 mock builder 注册服务,不依赖任何外部 token 或 HTTP 请求 + LLMServiceFactory.registerLLMService("Mock_Test", + (LLMServiceFactory.LLMServiceBuilder) + config -> mockLLMService("hello from mock")); // 创建服务实例 - DeepseekLLMServiceConfig config = DeepseekLLMServiceConfig.builder() - .apiToken(deepSeekToken) - .build(); - LLMService llmService = LLMServiceFactory.createLLMService("DeepSeek_Test", config); - - String query = "hello"; - String reply = llmService.chat(query); - log.info("llm service config:{}, query:{}, reply:{}", config, query, reply); + LLMService llmService = LLMServiceFactory.createLLMService("Mock_Test", + LLMServiceConfig.builder().model("mock-model").build()); + + assertNotNull(llmService, "创建的 LLMService 不应为 null"); + + // 验证 mock 调用可以正常返回,而不会真正发起 HTTP 请求 + String reply = llmService.chat("hello"); + assertEquals("hello from mock", reply, "mock LLMService 应返回预期的固定回复"); + log.info("llmService model:{}, reply:{}", llmService.getModel(), reply); + } + + @Test + void testCreateUnregisteredServiceThrowsException() { + // 访问未注册的服务名称,应抛出 IllegalArgumentException + assertThrows(IllegalArgumentException.class, + () -> LLMServiceFactory.createLLMService("NonExistentService", null), + "访问未注册服务应抛出 IllegalArgumentException"); + } + + @Test + void testRegisterOverwriteExistingService() { + // 先注册一个返回 "v1" 的服务 + LLMServiceFactory.registerLLMService("Override_Test", + (LLMServiceFactory.LLMServiceBuilder) + config -> mockLLMService("v1")); + LLMService v1 = LLMServiceFactory.createLLMService("Override_Test", + LLMServiceConfig.builder().model("mock").build()); + assertEquals("v1", v1.chat("test")); + + // 覆盖注册为返回 "v2" 的服务 + LLMServiceFactory.registerLLMService("Override_Test", + (LLMServiceFactory.LLMServiceBuilder) + config -> mockLLMService("v2")); + LLMService v2 = LLMServiceFactory.createLLMService("Override_Test", + LLMServiceConfig.builder().model("mock").build()); + assertEquals("v2", v2.chat("test"), "覆盖注册后,新服务应返回新的回复"); } } \ No newline at end of file diff --git a/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LoadBalanceLLMServiceTest.java b/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LoadBalanceLLMServiceTest.java index 59482cd..474bba0 100644 --- a/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LoadBalanceLLMServiceTest.java +++ b/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LoadBalanceLLMServiceTest.java @@ -1,9 +1,6 @@ package com.evalkit.framework.infra.service.llm; import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; -import com.evalkit.framework.infra.service.llm.config.DeepseekLLMServiceConfig; -import com.evalkit.framework.infra.service.llm.config.LLMServiceConfig; import com.evalkit.framework.infra.service.llm.config.LoadBalanceLLMServiceConfig; import com.evalkit.framework.infra.service.llm.strategy.RoundRobinLoadBalanceStrategy; import lombok.extern.slf4j.Slf4j; @@ -11,34 +8,44 @@ import org.junit.jupiter.api.Test; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.*; @Slf4j class LoadBalanceLLMServiceTest { LoadBalanceLLMService loadBalanceLLMService; + /** + * 构造一个固定返回指定内容的 mock LLMService,不发起任何 HTTP 请求 + */ + private LLMService mockLLMService(String model, String fixedReply) { + return new LLMService() { + @Override + public String chat(String prompt) { + return fixedReply; + } + + @Override + public String getModel() { + return model; + } + }; + } + @BeforeEach void setUp() { - String deepSeekToken = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "deepseek-token"); - - // 注册 - LLMServiceFactory.registerLLMService("DeepSeek_Test1", (LLMServiceFactory.LLMServiceBuilder) config -> new DeepSeekLLMService((DeepseekLLMServiceConfig) config)); - LLMServiceFactory.registerLLMService("DeepSeek_Test2", (LLMServiceFactory.LLMServiceBuilder) config -> new DeepSeekLLMService((DeepseekLLMServiceConfig) config)); - - // 创建 - DeepseekLLMServiceConfig config = DeepseekLLMServiceConfig.builder() - .apiToken(deepSeekToken) - .build(); - LLMService llmService10 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService11 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService12 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService13 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService14 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService20 = LLMServiceFactory.createLLMService("DeepSeek_Test2", config); - LLMService llmService21 = LLMServiceFactory.createLLMService("DeepSeek_Test2", config); - - // 负载 - List llmServices = ListUtils.of(llmService10, llmService11, llmService12, llmService13, llmService14, llmService20, llmService21); + // 用 mock LLMService 替代真实的 DeepSeek 服务,不依赖外部 token 或 HTTP + LLMService llmService1 = mockLLMService("mock-model-1", "reply from model-1"); + LLMService llmService2 = mockLLMService("mock-model-1", "reply from model-1"); + LLMService llmService3 = mockLLMService("mock-model-1", "reply from model-1"); + LLMService llmService4 = mockLLMService("mock-model-2", "reply from model-2"); + LLMService llmService5 = mockLLMService("mock-model-2", "reply from model-2"); + + List llmServices = ListUtils.of( + llmService1, llmService2, llmService3, llmService4, llmService5); + loadBalanceLLMService = new LoadBalanceLLMService( LoadBalanceLLMServiceConfig.builder() .llmServices(llmServices) @@ -48,10 +55,35 @@ void setUp() { } @Test - void test() { + void testGetModel() { String model = loadBalanceLLMService.getModel(); + assertNotNull(model, "getModel() 不应返回 null"); log.info("models: {}", model); - loadBalanceLLMService.chat("hello,world"); - loadBalanceLLMService.chat("今日天气"); + } + + @Test + void testChatRoundRobin() { + // 验证轮询策略:多次调用应分布在不同服务上 + AtomicInteger callCount = new AtomicInteger(0); + for (int i = 0; i < 5; i++) { + String reply = loadBalanceLLMService.chat("test query " + i); + assertNotNull(reply, "chat() 返回不应为 null"); + callCount.incrementAndGet(); + } + assertEquals(5, callCount.get(), "应成功完成 5 次 chat 调用"); + log.info("完成 {} 次 chat 调用,负载均衡正常", callCount.get()); + } + + @Test + void testEmptyLLMServicesThrowsException() { + // 校验空 services 列表时构造应抛出异常 + assertThrows(IllegalArgumentException.class, () -> + new LoadBalanceLLMService( + LoadBalanceLLMServiceConfig.builder() + .llmServices(ListUtils.of()) + .loadBalanceStrategy(new RoundRobinLoadBalanceStrategy()) + .build() + ) + ); } } \ No newline at end of file diff --git a/evalkit-test/src/test/java/com/evalkit/framework/test/DAGEvalPerformanceTest.java b/evalkit-test/src/test/java/com/evalkit/framework/test/DAGEvalPerformanceTest.java index b0f1e89..cf90394 100644 --- a/evalkit-test/src/test/java/com/evalkit/framework/test/DAGEvalPerformanceTest.java +++ b/evalkit-test/src/test/java/com/evalkit/framework/test/DAGEvalPerformanceTest.java @@ -24,7 +24,7 @@ public class DAGEvalPerformanceTest { private static final Logger logger = LoggerFactory.getLogger(DAGEvalPerformanceTest.class); private static final String tempDir = System.getProperty("java.io.tmpdir"); private static final String fileName = "DAGEvalTest_" + UuidUtils.generateUuid() + ".xlsx"; - public static int caseCount = 10000 * 6; + public static int caseCount = 10000; private static PerformanceMonitor performanceMonitor; /** diff --git a/evalkit-test/src/test/java/com/evalkit/framework/test/DeltaEvalPerformanceTest.java b/evalkit-test/src/test/java/com/evalkit/framework/test/DeltaEvalPerformanceTest.java index 1e552e9..eac5a55 100644 --- a/evalkit-test/src/test/java/com/evalkit/framework/test/DeltaEvalPerformanceTest.java +++ b/evalkit-test/src/test/java/com/evalkit/framework/test/DeltaEvalPerformanceTest.java @@ -24,7 +24,7 @@ public class DeltaEvalPerformanceTest { private static final Logger logger = LoggerFactory.getLogger(DeltaEvalPerformanceTest.class); private static final String tempDir = System.getProperty("java.io.tmpdir"); private static final String fileName = "DeltaEvalPerformanceTest_" + UuidUtils.generateUuid() + ".xlsx"; - public static int caseCount = 10000 * 5; + public static int caseCount = 10000; private static PerformanceMonitor performanceMonitor; /** From 802cabef010fc1f06919f3042d8b13e50e6d9192 Mon Sep 17 00:00:00 2001 From: zendodx Date: Thu, 11 Jun 2026 11:37:52 +0800 Subject: [PATCH 4/7] =?UTF-8?q?chore:=20=E4=BF=AE=E6=94=B9Jacoco=E9=85=8D?= =?UTF-8?q?=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index c298ea1..5d8e097 100644 --- a/pom.xml +++ b/pom.xml @@ -167,8 +167,10 @@ OrderedDeltaEvalWithinDataInjectTest - -Dnet.bytebuddy.experimental=true + (MalformedParameters: Invalid parameter name "") + 注意:必须引用 ${argLine} 以继承 Jacoco prepare-agent 注入的 -javaagent 参数, + 否则 Jacoco 插桩失效,覆盖率报告全为 0 --> + ${argLine} -Dnet.bytebuddy.experimental=true From 459d09253bfea58f3afd7ead3bbf3b395ee929f7 Mon Sep 17 00:00:00 2001 From: zendodx Date: Thu, 11 Jun 2026 11:38:11 +0800 Subject: [PATCH 5/7] =?UTF-8?q?docs:=20=E5=A2=9E=E5=8A=A0Jacoco=E8=A6=86?= =?UTF-8?q?=E7=9B=96=E7=8E=87=E5=88=86=E6=9E=90=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/dev-guide/github-codecov.md | 84 +++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/docs/dev-guide/github-codecov.md b/docs/dev-guide/github-codecov.md index 62d7437..0d84580 100644 --- a/docs/dev-guide/github-codecov.md +++ b/docs/dev-guide/github-codecov.md @@ -163,4 +163,86 @@ Push 代码前,你还需要在 GitHub 仓库中添加 Secret: --- -**注意**:由于根 `pom.xml` 中 GPG 插件在 `verify` 阶段会签名,CI 里用了 `-Dgpg.skip=true` 跳过签名(只跑 `test` 阶段),否则会因为没有 GPG 私钥而失败。 \ No newline at end of file +**注意**:由于根 `pom.xml` 中 GPG 插件在 `verify` 阶段会签名,CI 里用了 `-Dgpg.skip=true` 跳过签名(只跑 `test` 阶段),否则会因为没有 GPG 私钥而失败。 + +# 怎么查看Jacoco的覆盖率 + +查看 Jacoco 覆盖率有以下几种方式: + +## 方式一:直接打开 HTML 报告(最推荐) + +每次运行 `mvn test` 后,Jacoco 会在各模块的 `target/site/jacoco/` 目录生成 HTML 报告。 + +先查看项目中是否已有报告: + +报告已经生成了。直接用浏览器打开: + +浏览器已打开报告。以下是所有查看方式的说明: + +--- + +## 查看 Jacoco 覆盖率的几种方式 + +### 方式一:HTML 报告(已自动打开) + +每次 `mvn test` 后,各模块都会生成可视化报告,路径规律为: + +``` +{模块名}/target/site/jacoco/index.html +``` + +本项目已生成的报告: +- `evalkit-eval/target/site/jacoco/index.html` +- `evalkit-infra/target/site/jacoco/index.html` +- `evalkit-workflow/target/site/jacoco/index.html` +- `evalkit-common/target/site/jacoco/index.html` + +报告提供**行覆盖率**、**分支覆盖率**、**方法覆盖率**等多维度数据,可以点击包名 → 类名 → 方法名,逐层下钻查看具体哪些代码行被覆盖(绿色=覆盖,红色=未覆盖,黄色=部分覆盖)。 + +--- + +### 方式二:命令行查看汇总数据 + +```bash +# 查看某模块的文本覆盖率汇总(CSV 格式) +cat evalkit-eval/target/site/jacoco/jacoco.csv | head -5 +``` + +--- + +### 方式三:手动重新生成报告 + +如果想刷新覆盖率数据(只生成报告,不重跑测试): + +```bash +# 重新跑测试并生成所有模块报告 +mvn test --no-transfer-progress + +# 仅针对某个模块 +mvn test -pl evalkit-eval --no-transfer-progress +``` + +--- + +### 方式四:生成聚合报告(跨模块汇总) + +如果项目配置了 `jacoco-report` 聚合模块,可以用: + +```bash +mvn jacoco:report-aggregate +``` + +--- + +### 报告解读 + +HTML 报告的每列含义: + +| 列名 | 含义 | +|------|------| +| **Instructions** | 字节码指令覆盖率(最精确) | +| **Branches** | 分支覆盖率(if/else/switch) | +| **Lines** | 源代码行覆盖率 | +| **Methods** | 方法覆盖率 | +| **Classes** | 类覆盖率 | + From b5e4497ee3d2ad7c252ea0152616d557009975e5 Mon Sep 17 00:00:00 2001 From: zendodx Date: Thu, 11 Jun 2026 14:24:57 +0800 Subject: [PATCH 6/7] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9=E5=8D=95=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../eval/node/api/ApiCompletionTest.java | 400 ++++++++++-- .../eval/node/api/HttpApiCompletionTest.java | 57 -- .../api_wrapper/ApiCompletionWrapperTest.java | 574 +++++++++-------- .../LLMBasedApiCompletionWrapperTest.java | 472 -------------- .../framework/eval/node/begin/BeginTest.java | 239 ++++++++ .../node/counter/AttributeCounterTest.java | 59 -- .../node/counter/AttributeCounterV2Test.java | 63 -- .../eval/node/counter/CounterTest.java | 17 - .../EvalCaseDataGeneratorTest.java | 28 - .../KGBasedQueryGeneratorTest.java | 68 --- .../LoaderBasedDataGeneratorTest.java | 104 ---- .../MultiDataGeneratorTest.java | 76 --- .../querygen/MockQueryGeneratorTest.java | 30 - .../PromptBasedQueryGeneratorTest.java | 71 --- .../node/dataloader/ApiDataLoaderTest.java | 128 ---- .../eval/node/dataloader/DataLoaderTest.java | 360 +++++++++-- .../node/dataloader/ExcelDataLoaderTest.java | 13 - .../node/dataloader/JdbcDataLoaderTest.java | 105 ---- .../dataloader/JsonFileDataLoaderTest.java | 105 ---- .../dataloader/JsonTextDataLoaderTest.java | 20 - .../node/dataloader/MultiDataLoaderTest.java | 45 -- .../datainjector/DataInjectorTest.java | 54 -- .../DataLoaderWrapperTest.java | 326 +++++++++- .../PolishDataLoaderWrapperTest.java | 16 - .../PromptDataLoaderWrapperTest.java | 21 - .../mock/mocker/DateMockerTest.java | 115 ---- .../mock/mocker/NumberMockerTest.java | 164 ----- .../eval/node/reporter/ApiReporterTest.java | 76 --- .../eval/node/reporter/JdbcReportTest.java | 47 -- .../eval/node/reporter/JsonReporterTest.java | 9 - .../eval/node/reporter/ReporterTest.java | 16 - .../node/scorer/DifyWorkflowScorerTest.java | 28 - .../eval/node/scorer/GSBScorerTest.java | 125 ---- .../scorer/MultiCheckerBasedScorerTest.java | 167 ----- .../node/scorer/PromptBasedScorerTest.java | 102 ---- .../eval/node/scorer/RouterScorerTest.java | 577 ------------------ .../node/scorer/RubricBasedScorerTest.java | 14 - .../eval/node/scorer/ScorerConditionTest.java | 468 -------------- .../eval/node/scorer/ScorerTest.java | 402 ------------ .../eval/node/scorer/SecurityScorerTest.java | 74 --- .../scorer/VectorSimilarityScorerTest.java | 18 - .../scorer/checker/AbstractCheckerTest.java | 187 ------ .../scorer/checker/LLMBasedCheckerTest.java | 68 --- .../scorer/checker/model/CheckItemTest.java | 116 ---- .../scorer/strategy/ScoreStrategyTest.java | 247 -------- .../src/test/resources/dataItems.json | 174 ------ .../travel_demo/scenario2_config.json | 55 -- pom.xml | 10 +- 48 files changed, 1511 insertions(+), 5199 deletions(-) delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/LLMBasedApiCompletionWrapperTest.java create mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/begin/BeginTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/CounterTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/EvalCaseDataGeneratorTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/LoaderBasedDataGeneratorTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/MockQueryGeneratorTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ExcelDataLoaderTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonFileDataLoaderTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonTextDataLoaderTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/MultiDataLoaderTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/datainjector/DataInjectorTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PolishDataLoaderWrapperTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PromptDataLoaderWrapperTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/DateMockerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/NumberMockerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JsonReporterTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ReporterTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/DifyWorkflowScorerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/MultiCheckerBasedScorerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RouterScorerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerConditionTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/VectorSimilarityScorerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/AbstractCheckerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/model/CheckItemTest.java delete mode 100644 evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/strategy/ScoreStrategyTest.java delete mode 100644 evalkit-eval/src/test/resources/dataItems.json delete mode 100644 evalkit-eval/src/test/resources/travel_demo/scenario2_config.json diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/ApiCompletionTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/ApiCompletionTest.java index 7ca1efb..151c335 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/ApiCompletionTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/ApiCompletionTest.java @@ -1,98 +1,378 @@ package com.evalkit.framework.eval.node.api; -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; +import com.evalkit.framework.eval.context.WorkflowContextOps; import com.evalkit.framework.eval.model.ApiCompletionResult; import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.eval.model.InputData; import com.evalkit.framework.eval.node.api.config.ApiCompletionConfig; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.workflow.WorkflowBuilder; +import com.evalkit.framework.workflow.model.WorkflowContext; import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.function.ThrowingSupplier; -import java.util.ArrayList; -import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; -import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively; +import static org.junit.jupiter.api.Assertions.*; @Slf4j +@DisplayName("ApiCompletion 单元测试") class ApiCompletionTest { - private final class TestApiCompletion extends ApiCompletion { - public TestApiCompletion() { + + // ===================== 工具方法 ===================== + + /** + * 构建一个简单的 ApiCompletion 实现,invoke 固定返回给定结果 + */ + private ApiCompletion buildApiCompletion(ApiCompletionResult fixedResult) { + return new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + return fixedResult; + } + }; + } + + /** + * 构建一个 invoke 抛出异常的 ApiCompletion 实现 + */ + private ApiCompletion buildThrowingApiCompletion(RuntimeException ex) { + return new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + throw ex; + } + }; + } + + /** + * 构造包含指定条数 DataItem 的 WorkflowContext + */ + private WorkflowContext buildContextWithDataItems(int size) { + WorkflowContext ctx = new WorkflowContext(); + List items = new CopyOnWriteArrayList<>(); + for (int i = 0; i < size; i++) { + Map inputItem = new HashMap<>(); + inputItem.put("id", i); + DataItem dataItem = new DataItem((long) i, new InputData(inputItem)); + items.add(dataItem); } + WorkflowContextOps.setDataItems(ctx, items); + return ctx; + } - public TestApiCompletion(ApiCompletionConfig config) { - super(config); + /** + * 为 ApiCompletion 注入上下文并执行 + */ + private void executeWithContext(ApiCompletion api, WorkflowContext ctx) { + api.setWorkflowContext(ctx); + try { + api.call(); + } catch (Exception e) { + throw new RuntimeException(e); } + } - /* 用来收集实际执行顺序 */ - private final Map> execOrder = new ConcurrentHashMap<>(); + // ===================== constructor 测试 ===================== - @Override - protected ApiCompletionResult invoke(DataItem dataItem) { - InputData inputData = dataItem.getInputData(); - String caseId = inputData.get("caseId"); - String query = inputData.get("query"); + @Test + @DisplayName("无参构造器应使用默认 ApiCompletionConfig,不抛出异常") + void testConstructor_defaultConfig() { + ApiCompletion api = buildApiCompletion(new ApiCompletionResult(new LinkedHashMap<>())); + assertNotNull(api.getConfig(), "默认构造器应初始化 config"); + assertEquals(1, api.getConfig().getThreadNum(), "默认线程数应为 1"); + assertEquals(120, api.getConfig().getTimeout(), "默认超时应为 120"); + } - // 模拟业务耗时 200ms - try { - Thread.sleep(200); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); + @Test + @DisplayName("带 ApiCompletionConfig 构造器应正确保存配置") + void testConstructor_withConfig() { + ApiCompletionConfig config = ApiCompletionConfig.builder().threadNum(4).timeout(60).build(); + ApiCompletion api = new ApiCompletion(config) { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + return null; + } + }; + assertEquals(4, api.getConfig().getThreadNum()); + assertEquals(60, api.getConfig().getTimeout()); + } + + // ===================== invokeWrapper 测试 ===================== + + @Test + @DisplayName("invokeWrapper 正常调用时应返回带 dataIndex 的结果,且 success=true") + void testInvokeWrapper_success() { + Map resultItem = new HashMap<>(); + resultItem.put("answer", "ok"); + ApiCompletionResult fixedResult = new ApiCompletionResult(resultItem); + + ApiCompletion api = buildApiCompletion(fixedResult); + DataItem dataItem = new DataItem(1L, new InputData(new HashMap<>())); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertNotNull(result, "返回结果不应为 null"); + assertEquals(1L, result.getDataIndex(), "dataIndex 应与 DataItem 一致"); + assertTrue(result.isSuccess(), "正常调用时 success 应为 true"); + assertEquals("ok", result.get("answer"), "resultItem 内容应与 invoke 返回一致"); + } + + @Test + @DisplayName("invokeWrapper 调用耗时字段应被正确记录") + void testInvokeWrapper_timeCostRecorded() { + ApiCompletion api = buildApiCompletion(new ApiCompletionResult(new HashMap<>())); + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertTrue(result.getStartTime() > 0, "startTime 应大于 0"); + assertTrue(result.getEndTime() >= result.getStartTime(), "endTime 应 >= startTime"); + assertTrue(result.getTimeCost() >= 0, "timeCost 应 >= 0"); + } + + @Test + @DisplayName("invoke 返回 null 时,invokeWrapper 应返回 success=false 的结果") + void testInvokeWrapper_invokeReturnsNull() { + ApiCompletion api = buildApiCompletion(null); + DataItem dataItem = new DataItem(2L, new InputData(new HashMap<>())); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertNotNull(result); + assertFalse(result.isSuccess(), "invoke 返回 null 时 success 应为 false"); + } + + @Test + @DisplayName("invoke 抛出异常时,invokeWrapper 应捕获异常并返回 success=false 的结果") + void testInvokeWrapper_invokeThrows() { + ApiCompletion api = buildThrowingApiCompletion(new RuntimeException("mock error")); + DataItem dataItem = new DataItem(3L, new InputData(new HashMap<>())); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertNotNull(result, "invoke 抛异常后不应返回 null"); + assertFalse(result.isSuccess(), "invoke 抛异常时 success 应为 false"); + } + + @Test + @DisplayName("DataItem 已有 apiCompletionResult 时,invokeWrapper 应直接返回已有结果,不重复调用") + void testInvokeWrapper_skipWhenResultExists() { + AtomicBoolean invoked = new AtomicBoolean(false); + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + invoked.set(true); + return new ApiCompletionResult(new HashMap<>()); + } + }; + + ApiCompletionResult existingResult = new ApiCompletionResult(new HashMap<>()); + existingResult.setDataIndex(5L); + DataItem dataItem = new DataItem(5L, new InputData(new HashMap<>())); + dataItem.setApiCompletionResult(existingResult); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertFalse(invoked.get(), "已有 apiCompletionResult 时不应再次调用 invoke"); + assertSame(existingResult, result, "应直接返回已有结果"); + } + + // ===================== 钩子方法测试 ===================== + + @Test + @DisplayName("beforeInvoke 钩子被调用,可修改 DataItem") + void testBeforeInvoke_called() { + AtomicBoolean beforeCalled = new AtomicBoolean(false); + ApiCompletion api = new ApiCompletion() { + @Override + protected DataItem beforeInvoke(DataItem dataItem) { + beforeCalled.set(true); + return dataItem; + } + + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + return new ApiCompletionResult(new HashMap<>()); + } + }; + + api.invokeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(beforeCalled.get(), "beforeInvoke 钩子应被调用"); + } + + @Test + @DisplayName("afterInvoke 钩子被调用,可修改返回结果") + void testAfterInvoke_called() { + ApiCompletionResult modifiedResult = new ApiCompletionResult(new HashMap<>()); + modifiedResult.set("modified", true); + + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + return new ApiCompletionResult(new HashMap<>()); + } + + @Override + protected ApiCompletionResult afterInvoke(DataItem dataItem, ApiCompletionResult result) { + return modifiedResult; + } + }; + + ApiCompletionResult result = api.invokeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertSame(modifiedResult, result, "afterInvoke 返回的结果应被最终使用"); + } + + @Test + @DisplayName("onErrorInvoke 钩子在 invoke 抛异常时被调用") + void testOnErrorInvoke_called() { + AtomicBoolean errorCalled = new AtomicBoolean(false); + AtomicReference capturedError = new AtomicReference<>(); + + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + throw new RuntimeException("test-error"); + } + + @Override + protected void onErrorInvoke(DataItem dataItem, Throwable e) { + errorCalled.set(true); + capturedError.set(e); + } + }; + + api.invokeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(errorCalled.get(), "invoke 抛异常时 onErrorInvoke 应被调用"); + assertNotNull(capturedError.get(), "捕获的异常不应为 null"); + assertEquals("test-error", capturedError.get().getMessage()); + } + + // ===================== doExecute 测试 ===================== + + @Test + @DisplayName("doExecute 正常执行后,DataItem 应被设置 apiCompletionResult") + void testDoExecute_resultsSetOnDataItems() { + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + Map item = new HashMap<>(); + item.put("result", "value-" + dataItem.getDataIndex()); + return new ApiCompletionResult(item); } + }; - // 记录执行顺序 - execOrder.computeIfAbsent(caseId, k -> Collections.synchronizedList(new ArrayList<>())) - .add(query); + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(api, ctx); - String response = "response of " + query; - log.info("caseId:{}, query:{}, response:{}", caseId, query, response); - ApiCompletionResult result = new ApiCompletionResult(); - result.setResultItem(MapUtils.of("response", response)); - return result; + List dataItems = WorkflowContextOps.getDataItems(ctx); + for (DataItem dataItem : dataItems) { + assertNotNull(dataItem.getApiCompletionResult(), + "每个 DataItem 都应有 apiCompletionResult"); + assertEquals("value-" + dataItem.getDataIndex(), + dataItem.getApiCompletionResult().get("result"), + "apiCompletionResult 内容应与 invoke 返回一致"); } } @Test - void testConcurrent() { - DataLoader dataLoader = new DataLoader() { + @DisplayName("doExecute 时 DataItem 列表为空,应抛出 EvalException") + void testDoExecute_emptyDataItems_throws() { + ApiCompletion api = buildApiCompletion(new ApiCompletionResult(new HashMap<>())); + WorkflowContext ctx = new WorkflowContext(); + WorkflowContextOps.setDataItems(ctx, new CopyOnWriteArrayList<>()); + api.setWorkflowContext(ctx); + + assertThrows(RuntimeException.class, () -> { + try { + api.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }, "DataItems 为空时应抛出异常"); + } + + @Test + @DisplayName("doExecute 时部分 invoke 抛异常,其余 DataItem 仍应正常完成") + void testDoExecute_partialFailure_othersSucceed() { + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + // 只有 dataIndex=1 的抛异常 + if (dataItem.getDataIndex() == 1L) { + throw new RuntimeException("mock failure"); + } + Map item = new HashMap<>(); + item.put("ok", true); + return new ApiCompletionResult(item); + } + }; + + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(api, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + // index=0 和 index=2 应成功 + assertTrue(dataItems.get(0).getApiCompletionResult().isSuccess()); + assertFalse(dataItems.get(1).getApiCompletionResult().isSuccess(), + "invoke 失败的 DataItem 的 success 应为 false"); + assertTrue(dataItems.get(2).getApiCompletionResult().isSuccess()); + } + + @Test + @DisplayName("doExecute 时 DataItem 已有 apiCompletionResult,不应被覆盖") + void testDoExecute_existingResultNotOverwritten() { + AtomicBoolean invoked = new AtomicBoolean(false); + ApiCompletion api = new ApiCompletion() { @Override - public List prepareDataList() { - return ListUtils.of( - new InputData(MapUtils.of("caseId", "1", "query", "query1")), - new InputData(MapUtils.of("caseId", "1", "query", "query2")), - new InputData(MapUtils.of("caseId", "1", "query", "query3")), - new InputData(MapUtils.of("caseId", "2", "query", "query1")), - new InputData(MapUtils.of("caseId", "2", "query", "query2")), - new InputData(MapUtils.of("caseId", "3", "query", "query1")), - new InputData(MapUtils.of("caseId", "3", "query", "query2")) - ); + protected ApiCompletionResult invoke(DataItem dataItem) { + invoked.set(true); + return new ApiCompletionResult(new HashMap<>()); } }; - Begin begin = new Begin(); - TestApiCompletion apiCompletion = new TestApiCompletion( - ApiCompletionConfig.builder().threadNum(4).build() - ); + WorkflowContext ctx = new WorkflowContext(); + List items = new CopyOnWriteArrayList<>(); + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + ApiCompletionResult existing = new ApiCompletionResult(new HashMap<>()); + existing.setDataIndex(0L); + existing.setSuccess(true); + dataItem.setApiCompletionResult(existing); + items.add(dataItem); + WorkflowContextOps.setDataItems(ctx, items); - // 必须在指定时间内跑完,否则认为死锁 / 阻塞 - assertTimeoutPreemptively(java.time.Duration.ofSeconds(10), (ThrowingSupplier) () -> { - new WorkflowBuilder() - .link(begin, dataLoader, apiCompletion) - .build() - .execute(); - return null; - }); + executeWithContext(api, ctx); - // 并发度断言:3 个 case 并行,总耗时 < 串行 7*200ms - log.info("execOrder={}", apiCompletion.execOrder); + assertFalse(invoked.get(), "已有 apiCompletionResult 时不应调用 invoke"); + assertSame(existing, WorkflowContextOps.getDataItems(ctx).get(0).getApiCompletionResult(), + "已有结果不应被覆盖"); } + @Test + @DisplayName("doExecute 按 dataIndex 匹配结果,顺序无关") + void testDoExecute_resultMatchedByDataIndex() { + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + Map item = new HashMap<>(); + item.put("idx", dataItem.getDataIndex()); + return new ApiCompletionResult(item); + } + }; + WorkflowContext ctx = buildContextWithDataItems(5); + executeWithContext(api, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + for (DataItem dataItem : dataItems) { + Long idx = (Long) dataItem.getApiCompletionResult().get("idx"); + assertEquals(dataItem.getDataIndex(), idx, + "apiCompletionResult 应按 dataIndex 正确匹配到对应的 DataItem"); + } + } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java deleted file mode 100644 index 37aea54..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.evalkit.framework.eval.node.api; - -import com.evalkit.framework.common.client.http.model.HttpApiResponse; -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.api.config.HttpApiCompletionConfig; -import org.junit.jupiter.api.Test; - -import java.util.Collections; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -class HttpApiCompletionTest { - - /** - * 测试 HttpApiCompletion 可以正确构建并初始化,使用 localhost 作为 mock host - * 不发起真实 HTTP 请求,只验证对象构建逻辑 - */ - @Test - void testConstructAndBuildConfig() { - HttpApiCompletion httpApiCompletion = new HttpApiCompletion( - HttpApiCompletionConfig.builder() - .host("http://localhost:8080") - .api("/api/test") - .method("POST") - .build() - ) { - @Override - public Map prepareBody(InputData inputData) { - return Collections.emptyMap(); - } - - @Override - public Map prepareParam(InputData inputData) { - return Collections.emptyMap(); - } - - @Override - public Map prepareHeader(InputData inputData) { - return Collections.emptyMap(); - } - - @Override - public ApiCompletionResult buildApiCompletionResult(InputData inputData, HttpApiResponse response) { - return new ApiCompletionResult(); - } - }; - - assertNotNull(httpApiCompletion, "HttpApiCompletion 实例不应为 null"); - assertNotNull(httpApiCompletion.getConfig(), "HttpApiCompletion 配置不应为 null"); - HttpApiCompletionConfig config = (HttpApiCompletionConfig) httpApiCompletion.getConfig(); - assertEquals("http://localhost:8080", config.getHost(), "Host 应与构建时一致"); - assertEquals("/api/test", config.getApi(), "API 路径应与构建时一致"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/ApiCompletionWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/ApiCompletionWrapperTest.java index 32ffca5..d943b7e 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/ApiCompletionWrapperTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/ApiCompletionWrapperTest.java @@ -1,380 +1,376 @@ package com.evalkit.framework.eval.node.api_wrapper; -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.eval.constants.NodeNamePrefix; import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.exception.EvalException; import com.evalkit.framework.eval.model.ApiCompletionResult; import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.eval.model.InputData; import com.evalkit.framework.eval.node.api_wrapper.config.ApiCompletionWrapperConfig; import com.evalkit.framework.workflow.model.WorkflowContext; +import lombok.extern.slf4j.Slf4j; import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Consumer; import static org.junit.jupiter.api.Assertions.*; -/** - * ApiCompletionWrapper 单元测试 - *

- * 测试覆盖: - *

    - *
  • 节点 ID 前缀规范
  • - *
  • doExecute 空数据保护
  • - *
  • executeWrapper 正常装饰流程(钩子顺序)
  • - *
  • executeWrapper 装饰异常时不影响整体、返回原数据项
  • - *
  • onWrapperError 在装饰异常时被调用
  • - *
  • 多数据项并发装饰,单条失败不影响其他条
  • - *
  • wrapper 对 ApiCompletionResult 的修改正确回写
  • - *
- */ -@DisplayName("ApiCompletionWrapper") +@Slf4j +@DisplayName("ApiCompletionWrapper 单元测试") class ApiCompletionWrapperTest { - - // ==================== 工厂方法 ==================== + + // ===================== 工具方法 ===================== /** - * 构造一个正常执行的 wrapper,将 resultItem 中写入指定 key/value + * 构建一个 ApiCompletionWrapper,wrapper 逻辑由 Consumer 提供 */ - private ApiCompletionWrapper buildWrapper(String writeKey, String writeValue) { + private ApiCompletionWrapper buildWrapper(Consumer wrapperLogic) { return new ApiCompletionWrapper() { @Override protected void wrapper(DataItem dataItem) { - ApiCompletionResult result = dataItem.getApiCompletionResult(); - if (result != null) { - result.set(writeKey, writeValue); - } + wrapperLogic.accept(dataItem); } }; } /** - * 构造一个在 wrapper 中抛出异常的 wrapper + * 构建带自定义 config 的 ApiCompletionWrapper */ - private ApiCompletionWrapper buildThrowingWrapper(RuntimeException ex) { - return new ApiCompletionWrapper() { + private ApiCompletionWrapper buildWrapper(ApiCompletionWrapperConfig config, + Consumer wrapperLogic) { + return new ApiCompletionWrapper(config) { @Override protected void wrapper(DataItem dataItem) { - throw ex; + wrapperLogic.accept(dataItem); } }; } /** - * 构造一个记录钩子调用顺序的 wrapper + * 构造包含指定条数 DataItem(每条都带 ApiCompletionResult)的 WorkflowContext */ - private ApiCompletionWrapper buildHookOrderWrapper(List callLog) { - return new ApiCompletionWrapper() { - @Override - protected void beforeWrapper(DataItem dataItem) { - callLog.add("before"); + private WorkflowContext buildContextWithDataItems(int size) { + WorkflowContext ctx = new WorkflowContext(); + List items = new CopyOnWriteArrayList<>(); + for (int i = 0; i < size; i++) { + Map inputItem = new HashMap<>(); + inputItem.put("id", i); + DataItem dataItem = new DataItem((long) i, new InputData(inputItem)); + Map resultItem = new HashMap<>(); + resultItem.put("output", "raw-" + i); + ApiCompletionResult result = new ApiCompletionResult(resultItem); + result.setDataIndex((long) i); + dataItem.setApiCompletionResult(result); + items.add(dataItem); + } + WorkflowContextOps.setDataItems(ctx, items); + return ctx; + } + + /** + * 为 ApiCompletionWrapper 注入上下文并执行 + */ + private void executeWithContext(ApiCompletionWrapper wrapper, WorkflowContext ctx) { + wrapper.setWorkflowContext(ctx); + try { + wrapper.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + // ===================== constructor 测试 ===================== + + @Test + @DisplayName("无参构造器应使用默认 ApiCompletionWrapperConfig(threadNum=1)") + void testConstructor_defaultConfig() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + }); + assertNotNull(wrapper.config, "默认构造器应初始化 config"); + assertEquals(1, wrapper.config.getThreadNum(), "默认线程数应为 1"); + } + + @Test + @DisplayName("带 ApiCompletionWrapperConfig 构造器应正确保存配置") + void testConstructor_withConfig() { + ApiCompletionWrapperConfig config = ApiCompletionWrapperConfig.builder().threadNum(4).build(); + ApiCompletionWrapper wrapper = buildWrapper(config, dataItem -> { + }); + assertEquals(4, wrapper.config.getThreadNum()); + } + + // ===================== executeWrapper 测试 ===================== + + @Test + @DisplayName("executeWrapper 正常执行时应返回同一个 DataItem 实例") + void testExecuteWrapper_returnsSameDataItem() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + }); + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + DataItem result = wrapper.executeWrapper(dataItem); + assertSame(dataItem, result, "executeWrapper 应返回同一 DataItem 实例"); + } + + @Test + @DisplayName("executeWrapper 中 wrapper 逻辑可修改 ApiCompletionResult 字段") + void testExecuteWrapper_wrapperModifiesApiCompletionResult() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + if (dataItem.getApiCompletionResult() != null) { + dataItem.getApiCompletionResult().set("wrapped", true); } + }); + + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + dataItem.setApiCompletionResult(new ApiCompletionResult(new HashMap<>())); + wrapper.executeWrapper(dataItem); + + assertEquals(true, dataItem.getApiCompletionResult().get("wrapped"), + "wrapper 应能修改 ApiCompletionResult 字段"); + } + + @Test + @DisplayName("executeWrapper 中 wrapper 抛出异常时应被捕获,返回原 DataItem 不抛出") + void testExecuteWrapper_wrapperThrows_returnOriginalItem() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + throw new RuntimeException("mock wrapper error"); + }); + + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + DataItem result = assertDoesNotThrow(() -> wrapper.executeWrapper(dataItem), + "wrapper 抛异常时 executeWrapper 不应向外抛出"); + assertSame(dataItem, result, "抛异常后应返回原始 DataItem"); + } + + // ===================== 钩子方法测试 ===================== + @Test + @DisplayName("beforeWrapper 钩子应在 wrapper 前被调用") + void testBeforeWrapper_called() { + List callOrder = new ArrayList<>(); + + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { @Override - protected void wrapper(DataItem dataItem) { - callLog.add("wrapper"); + protected void beforeWrapper(DataItem dataItem) { + callOrder.add("before"); } @Override - protected void afterWrapper(DataItem dataItem) { - callLog.add("after"); + protected void wrapper(DataItem dataItem) { + callOrder.add("wrapper"); } }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("before", "wrapper"), callOrder, + "before 应在 wrapper 之前调用"); } - /** - * 构造一个记录 onWrapperError 的 wrapper - */ - private ApiCompletionWrapper buildErrorCapturingWrapper(List errors) { - return new ApiCompletionWrapper() { + @Test + @DisplayName("afterWrapper 钩子应在 wrapper 后被调用") + void testAfterWrapper_called() { + List callOrder = new ArrayList<>(); + + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { @Override protected void wrapper(DataItem dataItem) { - throw new RuntimeException("故意抛出"); + callOrder.add("wrapper"); } @Override - protected void onWrapperError(DataItem dataItem, Throwable e) { - errors.add(e); + protected void afterWrapper(DataItem dataItem) { + callOrder.add("after"); } }; - } - /** - * 构造最简 DataItem(含 ApiCompletionResult) - */ - private DataItem buildDataItem(long index) { - DataItem item = new DataItem(); - item.setDataIndex(index); - Map input = new HashMap<>(); - input.put("query", "测试问题-" + index); - item.setInputData(new InputData(index, input)); - Map result = new HashMap<>(); - result.put("answer", "测试回答-" + index); - item.setApiCompletionResult(new ApiCompletionResult(result)); - return item; + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("wrapper", "after"), callOrder, + "after 应在 wrapper 之后调用"); } - /** - * 通过 WorkflowContext 驱动 doExecute - */ - private void executeWithContext(ApiCompletionWrapper wrapper, List dataItems) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setDataItems(ctx, dataItems); - wrapper.setWorkflowContext(ctx); - wrapper.doExecute(); - } + @Test + @DisplayName("三个钩子按 before→wrapper→after 顺序执行") + void testExecuteWrapper_hookOrder() { + List order = new ArrayList<>(); - // ==================== 节点 ID 规范 ==================== + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { + @Override + protected void beforeWrapper(DataItem dataItem) { + order.add("before"); + } - @Nested - @DisplayName("节点 ID") - class NodeIdTest { + @Override + protected void wrapper(DataItem dataItem) { + order.add("wrapper"); + } - @Test - @DisplayName("节点 ID 应以 apiCompletionWrapper- 为前缀") - void nodeId_startsWithCorrectPrefix() { - ApiCompletionWrapper wrapper = buildWrapper("k", "v"); - assertTrue(wrapper.getId().startsWith(NodeNamePrefix.API_COMPLETION_WRAPPER), - "节点 ID 应以 '" + NodeNamePrefix.API_COMPLETION_WRAPPER + "' 开头,实际: " + wrapper.getId()); - } + @Override + protected void afterWrapper(DataItem dataItem) { + order.add("after"); + } + }; - @Test - @DisplayName("每个实例的节点 ID 应唯一") - void nodeId_isUnique() { - ApiCompletionWrapper w1 = buildWrapper("k", "v"); - ApiCompletionWrapper w2 = buildWrapper("k", "v"); - assertNotEquals(w1.getId(), w2.getId()); - } + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("before", "wrapper", "after"), order, + "钩子应按 before→wrapper→after 顺序执行"); } - // ==================== doExecute 保护 ==================== + @Test + @DisplayName("wrapper 抛异常时 onWrapperError 钩子被调用,并传入正确异常") + void testOnWrapperError_called() { + AtomicBoolean errorCalled = new AtomicBoolean(false); + AtomicReference capturedError = new AtomicReference<>(); - @Nested - @DisplayName("doExecute 空数据保护") - class DoExecuteGuardTest { - - @Test - @DisplayName("dataItems 为 null 时应抛出 EvalException") - void doExecute_nullDataItems_throwsEvalException() { - ApiCompletionWrapper wrapper = buildWrapper("k", "v"); - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setDataItems(ctx, null); // null → remove key → getDataItems 返回 null - wrapper.setWorkflowContext(ctx); - assertThrows(EvalException.class, wrapper::doExecute); - } + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + throw new RuntimeException("test-error"); + } - @Test - @DisplayName("dataItems 为空列表时应抛出 EvalException") - void doExecute_emptyDataItems_throwsEvalException() { - ApiCompletionWrapper wrapper = buildWrapper("k", "v"); - assertThrows(EvalException.class, - () -> executeWithContext(wrapper, new ArrayList<>())); - } + @Override + protected void onWrapperError(DataItem dataItem, Throwable e) { + errorCalled.set(true); + capturedError.set(e); + } + }; - @Test - @DisplayName("dataItems 非空时正常执行,不抛异常") - void doExecute_normalDataItems_noException() { - ApiCompletionWrapper wrapper = buildWrapper("transformed", "yes"); - List items = ListUtils.of(buildDataItem(1L)); - assertDoesNotThrow(() -> executeWithContext(wrapper, items)); - } + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(errorCalled.get(), "wrapper 抛异常时 onWrapperError 应被调用"); + assertNotNull(capturedError.get()); + assertEquals("test-error", capturedError.get().getMessage()); } - // ==================== 钩子顺序 ==================== + @Test + @DisplayName("wrapper 抛异常时 afterWrapper 不被调用") + void testAfterWrapper_notCalledOnError() { + AtomicBoolean afterCalled = new AtomicBoolean(false); - @Nested - @DisplayName("钩子调用顺序") - class HookOrderTest { - - @Test - @DisplayName("正常执行时钩子顺序为 before → wrapper → after") - void executeWrapper_hookOrder_beforeWrapperAfter() { - List callLog = new ArrayList<>(); - ApiCompletionWrapper wrapper = buildHookOrderWrapper(callLog); - DataItem item = buildDataItem(1L); - - wrapper.executeWrapper(item); + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + throw new RuntimeException("error"); + } - assertEquals(ListUtils.of("before", "wrapper", "after"), callLog); - } + @Override + protected void afterWrapper(DataItem dataItem) { + afterCalled.set(true); + } + }; - @Test - @DisplayName("wrapper 抛异常时 after 不执行,after 前已记录 before") - void executeWrapper_exceptionInWrapper_afterNotCalled() { - List callLog = new ArrayList<>(); - ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { - @Override - protected void beforeWrapper(DataItem dataItem) { - callLog.add("before"); - } - - @Override - protected void wrapper(DataItem dataItem) { - callLog.add("wrapper-throws"); - throw new RuntimeException("异常"); - } - - @Override - protected void afterWrapper(DataItem dataItem) { - callLog.add("after"); - } - }; - - wrapper.executeWrapper(buildDataItem(1L)); - - assertTrue(callLog.contains("before")); - assertTrue(callLog.contains("wrapper-throws")); - assertFalse(callLog.contains("after"), "after 不应在 wrapper 抛异常后执行"); - } + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertFalse(afterCalled.get(), "wrapper 抛异常时 afterWrapper 不应被调用"); } - // ==================== 异常隔离 ==================== + // ===================== doExecute 测试 ===================== - @Nested - @DisplayName("单条异常不影响整体") - class ExceptionIsolationTest { + @Test + @DisplayName("doExecute 对 WorkflowContext 中的每个 DataItem 都应执行 wrapper") + void testDoExecute_wrapperCalledForEachDataItem() { + AtomicInteger wrapperCount = new AtomicInteger(0); + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> wrapperCount.incrementAndGet()); - @Test - @DisplayName("wrapper 抛异常时 executeWrapper 返回原 DataItem(不为 null)") - void executeWrapper_exceptionInWrapper_returnsOriginalItem() { - RuntimeException ex = new RuntimeException("装饰失败"); - ApiCompletionWrapper wrapper = buildThrowingWrapper(ex); - DataItem item = buildDataItem(1L); + WorkflowContext ctx = buildContextWithDataItems(5); + executeWithContext(wrapper, ctx); - DataItem returned = wrapper.executeWrapper(item); - - assertSame(item, returned, "应原样返回 DataItem"); - } - - @Test - @DisplayName("onWrapperError 在 wrapper 抛异常时被调用,且携带正确异常") - void executeWrapper_exceptionInWrapper_onWrapperErrorCalled() { - List errors = new ArrayList<>(); - ApiCompletionWrapper wrapper = buildErrorCapturingWrapper(errors); - wrapper.executeWrapper(buildDataItem(1L)); - - assertEquals(1, errors.size()); - assertEquals("故意抛出", errors.get(0).getMessage()); - } - - @Test - @DisplayName("多条数据项中,部分失败不影响其他条的装饰结果") - void doExecute_partialFailure_otherItemsStillWrapped() { - // 奇数 index 的 DataItem 触发异常,偶数的正常写入 - ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { - @Override - protected void wrapper(DataItem dataItem) { - if (dataItem.getDataIndex() % 2 != 0) { - throw new RuntimeException("奇数行故意失败"); - } - dataItem.getApiCompletionResult().set("wrapped", "true"); - } - }; - - List items = ListUtils.of( - buildDataItem(1L), // 失败 - buildDataItem(2L), // 成功 - buildDataItem(3L), // 失败 - buildDataItem(4L) // 成功 - ); - - assertDoesNotThrow(() -> executeWithContext(wrapper, items)); - - // 偶数条应成功写入 - assertEquals("true", items.get(1).getApiCompletionResult().get("wrapped")); - assertEquals("true", items.get(3).getApiCompletionResult().get("wrapped")); - // 奇数条结果不变 - assertNull(items.get(0).getApiCompletionResult().get("wrapped")); - assertNull(items.get(2).getApiCompletionResult().get("wrapped")); - } + assertEquals(5, wrapperCount.get(), "wrapper 应对每个 DataItem 都被调用一次"); } - // ==================== 装饰结果回写 ==================== - - @Nested - @DisplayName("装饰结果回写") - class WrapperResultTest { - - @Test - @DisplayName("wrapper 对 ApiCompletionResult 的修改应正确回写到 DataItem") - void wrapper_modifiesApiCompletionResult_changesPersist() { - ApiCompletionWrapper wrapper = buildWrapper("normalized_answer", "hello world"); - DataItem item = buildDataItem(1L); + @Test + @DisplayName("doExecute 后 ApiCompletionResult 的修改应被持久化到 WorkflowContext") + void testDoExecute_modificationsPersisted() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + if (dataItem.getApiCompletionResult() != null) { + dataItem.getApiCompletionResult().set("processed", true); + } + }); - wrapper.executeWrapper(item); + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(wrapper, ctx); - assertEquals("hello world", item.getApiCompletionResult().get("normalized_answer")); + List dataItems = WorkflowContextOps.getDataItems(ctx); + for (DataItem dataItem : dataItems) { + assertEquals(true, dataItem.getApiCompletionResult().get("processed"), + "每个 DataItem 的 ApiCompletionResult 都应包含 wrapper 写入的字段"); } + } - @Test - @DisplayName("多个字段同时写入,均应正确保留") - void wrapper_multipleFieldsWritten_allPersist() { - ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { - @Override - protected void wrapper(DataItem dataItem) { - ApiCompletionResult result = dataItem.getApiCompletionResult(); - result.set("field_a", "valueA"); - result.set("field_b", 42); - result.set("field_c", true); - } - }; - DataItem item = buildDataItem(1L); - wrapper.executeWrapper(item); - - ApiCompletionResult result = item.getApiCompletionResult(); - assertEquals("valueA", result.get("field_a")); - assertEquals(42, (Integer) result.get("field_b")); - assertEquals(true, result.get("field_c")); - } + @Test + @DisplayName("doExecute 时 DataItem 列表为空,应抛出 EvalException") + void testDoExecute_emptyDataItems_throws() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + }); + WorkflowContext ctx = new WorkflowContext(); + WorkflowContextOps.setDataItems(ctx, new CopyOnWriteArrayList<>()); + wrapper.setWorkflowContext(ctx); - @Test - @DisplayName("doExecute 批量执行后,所有 DataItem 均被正确装饰") - void doExecute_batchWrapper_allItemsDecorated() { - ApiCompletionWrapper wrapper = buildWrapper("done", "yes"); - List items = ListUtils.of( - buildDataItem(1L), buildDataItem(2L), buildDataItem(3L) - ); + assertThrows(RuntimeException.class, () -> { + try { + wrapper.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }, "DataItems 为空时 doExecute 应抛出异常"); + } - executeWithContext(wrapper, items); + @Test + @DisplayName("doExecute 时 DataItem 列表为 null,应抛出 EvalException") + void testDoExecute_nullDataItems_throws() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + }); + WorkflowContext ctx = new WorkflowContext(); + // 不设置 dataItems,默认为 null + wrapper.setWorkflowContext(ctx); - for (DataItem item : items) { - assertEquals("yes", item.getApiCompletionResult().get("done"), - "DataItem[" + item.getDataIndex() + "] 未被正确装饰"); + assertThrows(RuntimeException.class, () -> { + try { + wrapper.call(); + } catch (Exception e) { + throw new RuntimeException(e); } - } + }, "DataItems 为 null 时 doExecute 应抛出异常"); } - // ==================== 自定义 Config ==================== + @Test + @DisplayName("doExecute 时部分 wrapper 抛异常,其余 DataItem 仍应正常处理") + void testDoExecute_partialFailure_othersSucceed() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + if (dataItem.getDataIndex() == 1L) { + throw new RuntimeException("mock error"); + } + dataItem.getApiCompletionResult().set("done", true); + }); + + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(wrapper, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertEquals(true, dataItems.get(0).getApiCompletionResult().get("done"), + "index=0 应正常完成"); + assertNull(dataItems.get(1).getApiCompletionResult().get("done"), + "index=1 wrapper 失败,done 字段不应被设置"); + assertEquals(true, dataItems.get(2).getApiCompletionResult().get("done"), + "index=2 应正常完成"); + } - @Nested - @DisplayName("Config 生效") - class ConfigTest { + @Test + @DisplayName("doExecute 对原始 output 字段进行覆写后,WorkflowContext 中的值应已更新") + void testDoExecute_outputFieldOverwritten() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + ApiCompletionResult result = dataItem.getApiCompletionResult(); + if (result != null) { + String original = result.get("output"); + result.set("output", "wrapped-" + original); + } + }); - @Test - @DisplayName("默认构造器使用 threadNum=1") - void defaultConstructor_threadNumIsOne() { - ApiCompletionWrapper wrapper = buildWrapper("k", "v"); - assertEquals(1, wrapper.config.getThreadNum()); - } + WorkflowContext ctx = buildContextWithDataItems(2); + executeWithContext(wrapper, ctx); - @Test - @DisplayName("自定义 config 的 threadNum 正确生效") - void customConfig_threadNumApplied() { - ApiCompletionWrapperConfig config = ApiCompletionWrapperConfig.builder() - .threadNum(4) - .build(); - ApiCompletionWrapper wrapper = new ApiCompletionWrapper(config) { - @Override - protected void wrapper(DataItem dataItem) { - } - }; - assertEquals(4, wrapper.config.getThreadNum()); - } + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertEquals("wrapped-raw-0", dataItems.get(0).getApiCompletionResult().get("output")); + assertEquals("wrapped-raw-1", dataItems.get(1).getApiCompletionResult().get("output")); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/LLMBasedApiCompletionWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/LLMBasedApiCompletionWrapperTest.java deleted file mode 100644 index f870f06..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/LLMBasedApiCompletionWrapperTest.java +++ /dev/null @@ -1,472 +0,0 @@ -package com.evalkit.framework.eval.node.api_wrapper; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.api_wrapper.config.LLMBasedApiCompletionConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.workflow.model.WorkflowContext; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; - -import static org.junit.jupiter.api.Assertions.*; - -/** - * LLMBasedApiCompletionWrapper 单元测试 - *

- * 测试覆盖: - *

    - *
  • llmConfig 正确绑定,无字段遮蔽
  • - *
  • ApiCompletionResult 为 null 时跳过,不调用 LLM
  • - *
  • preparePrompt 返回空/null 时跳过,不调用 LLM
  • - *
  • 正常流程:LLM 调用结果通过 applyLLMOutput 写回结果
  • - *
  • preparePrompt 接收到完整的 DataItem(含输入数据和接口结果)
  • - *
  • LLM 抛异常时,executeWrapper 不向外传播(单条失败隔离)
  • - *
  • 多条数据批量执行,LLM 分别独立调用
  • - *
- */ -@DisplayName("LLMBasedApiCompletionWrapper") -class LLMBasedApiCompletionWrapperTest { - - // ==================== 工厂方法 ==================== - - /** - * 构造固定回复的 mock LLMService - */ - private LLMService mockLLM(String reply) { - return new LLMService() { - @Override - public String chat(String prompt) { - return reply; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - /** - * 构造抛异常的 mock LLMService - */ - private LLMService throwingLLM(String msg) { - return new LLMService() { - @Override - public String chat(String prompt) { - throw new RuntimeException(msg); - } - - @Override - public String getModel() { - return "throwing-model"; - } - }; - } - - /** - * 构造记录调用 prompt 的 mock LLMService - */ - private LLMService capturingLLM(List promptLog, String reply) { - return new LLMService() { - @Override - public String chat(String prompt) { - promptLog.add(prompt); - return reply; - } - - @Override - public String getModel() { - return "capturing-model"; - } - }; - } - - /** - * 构造标准 LLMBasedApiCompletionWrapper: - * - preparePrompt: 拼接 query + answer - * - applyLLMOutput: 写入 "wrapped_answer" 字段 - */ - private LLMBasedApiCompletionWrapper buildWrapper(LLMService llmService) { - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(llmService) - .build(); - return new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - String query = dataItem.getInputData().get("query"); - String answer = dataItem.getApiCompletionResult().get("answer"); - return "query=" + query + " answer=" + answer; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - result.set("wrapped_answer", llmOutput); - } - }; - } - - /** - * 构造最简 DataItem(含 ApiCompletionResult) - */ - private DataItem buildDataItem(long index) { - DataItem item = new DataItem(); - item.setDataIndex(index); - Map input = new HashMap<>(); - input.put("query", "测试问题-" + index); - item.setInputData(new InputData(index, input)); - Map result = new HashMap<>(); - result.put("answer", "测试回答-" + index); - item.setApiCompletionResult(new ApiCompletionResult(result)); - return item; - } - - /** - * 通过 WorkflowContext 驱动 doExecute - */ - private void executeWithContext(LLMBasedApiCompletionWrapper wrapper, List items) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setDataItems(ctx, items); - wrapper.setWorkflowContext(ctx); - wrapper.doExecute(); - } - - // ==================== Config 绑定 ==================== - - @Nested - @DisplayName("Config 绑定") - class ConfigBindingTest { - - @Test - @DisplayName("llmConfig 字段与构造器传入的 config 是同一实例,不存在字段遮蔽") - void llmConfig_sameInstanceAsConstructorArg() { - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(mockLLM("ok")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "prompt"; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - assertSame(config, wrapper.llmConfig, "llmConfig 应与构造器传入的 config 是同一对象"); - } - - @Test - @DisplayName("父类 config 与 llmConfig 指向同一实例") - void parentConfig_sameAsLlmConfig() { - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(mockLLM("ok")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "prompt"; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - // 父类 config 也应与 llmConfig 一致 - assertSame(wrapper.llmConfig, wrapper.config, - "父类 config 与 llmConfig 应为同一实例"); - } - } - - // ==================== 跳过条件 ==================== - - @Nested - @DisplayName("跳过条件") - class SkipConditionTest { - - @Test - @DisplayName("ApiCompletionResult 为 null 时,不调用 LLM,直接跳过") - void wrapper_nullApiCompletionResult_skipsLLM() { - AtomicInteger callCount = new AtomicInteger(0); - LLMService countingLLM = new LLMService() { - @Override - public String chat(String prompt) { - callCount.incrementAndGet(); - return "output"; - } - - @Override - public String getModel() { - return "counting-model"; - } - }; - LLMBasedApiCompletionWrapper wrapper = buildWrapper(countingLLM); - - DataItem item = buildDataItem(1L); - item.setApiCompletionResult(null); // 设为 null - - wrapper.executeWrapper(item); - - assertEquals(0, callCount.get(), "ApiCompletionResult 为 null 时不应调用 LLM"); - } - - @Test - @DisplayName("preparePrompt 返回 null 时,不调用 LLM,直接跳过") - void wrapper_nullPrompt_skipsLLM() { - AtomicInteger callCount = new AtomicInteger(0); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(new LLMService() { - @Override - public String chat(String prompt) { - callCount.incrementAndGet(); - return "output"; - } - - @Override - public String getModel() { - return "counting-model"; - } - }) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return null; // 返回 null - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - - wrapper.executeWrapper(buildDataItem(1L)); - - assertEquals(0, callCount.get(), "preparePrompt 返回 null 时不应调用 LLM"); - } - - @Test - @DisplayName("preparePrompt 返回空字符串时,不调用 LLM,直接跳过") - void wrapper_emptyPrompt_skipsLLM() { - AtomicInteger callCount = new AtomicInteger(0); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(new LLMService() { - @Override - public String chat(String prompt) { - callCount.incrementAndGet(); - return "output"; - } - - @Override - public String getModel() { - return "counting-model"; - } - }) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return ""; // 返回空字符串 - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - - wrapper.executeWrapper(buildDataItem(1L)); - - assertEquals(0, callCount.get(), "preparePrompt 返回空字符串时不应调用 LLM"); - } - } - - // ==================== 正常流程 ==================== - - @Nested - @DisplayName("正常装饰流程") - class NormalFlowTest { - - @Test - @DisplayName("LLM 输出通过 applyLLMOutput 正确写回 ApiCompletionResult") - void wrapper_llmOutput_appliedToResult() { - LLMBasedApiCompletionWrapper wrapper = buildWrapper(mockLLM("转化后的内容")); - DataItem item = buildDataItem(1L); - - wrapper.executeWrapper(item); - - assertEquals("转化后的内容", item.getApiCompletionResult().get("wrapped_answer")); - } - - @Test - @DisplayName("preparePrompt 接收到正确的 DataItem(含 inputData 和 apiCompletionResult)") - void wrapper_preparePrompt_receivesCorrectDataItem() { - AtomicReference capturedPrompt = new AtomicReference<>(); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(mockLLM("output")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - String q = dataItem.getInputData().get("query"); - String a = dataItem.getApiCompletionResult().get("answer"); - capturedPrompt.set("q=" + q + ",a=" + a); - return capturedPrompt.get(); - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - - DataItem item = buildDataItem(42L); - wrapper.executeWrapper(item); - - assertEquals("q=测试问题-42,a=测试回答-42", capturedPrompt.get()); - } - - @Test - @DisplayName("LLM 被调用时收到的 prompt 与 preparePrompt 返回值一致") - void wrapper_llmReceivesCorrectPrompt() { - List promptLog = new ArrayList<>(); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(capturingLLM(promptLog, "output")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "固定提示词"; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - - wrapper.executeWrapper(buildDataItem(1L)); - - assertEquals(1, promptLog.size()); - assertEquals("固定提示词", promptLog.get(0)); - } - } - - // ==================== 异常隔离 ==================== - - @Nested - @DisplayName("LLM 异常隔离") - class LLMExceptionIsolationTest { - - @Test - @DisplayName("LLM 抛异常时,executeWrapper 不向外传播,返回原 DataItem") - void wrapper_llmThrows_exceptionIsolated() { - LLMBasedApiCompletionWrapper wrapper = buildWrapper(throwingLLM("LLM 服务故障")); - DataItem item = buildDataItem(1L); - - DataItem returned = assertDoesNotThrow(() -> wrapper.executeWrapper(item)); - - assertSame(item, returned, "应原样返回 DataItem"); - } - - @Test - @DisplayName("多条数据中,部分 LLM 异常不影响其他条") - void doExecute_partialLLMFailure_otherItemsStillWrapped() { - AtomicInteger callCount = new AtomicInteger(0); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(new LLMService() { - @Override - public String chat(String prompt) { - // 第一次调用失败,后续正常 - if (callCount.getAndIncrement() == 0) { - throw new RuntimeException("第一次失败"); - } - return "success"; - } - - @Override - public String getModel() { - return "partial-fail-model"; - } - }) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "prompt"; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - result.set("wrapped", llmOutput); - } - }; - - List items = ListUtils.of(buildDataItem(1L), buildDataItem(2L), buildDataItem(3L)); - assertDoesNotThrow(() -> executeWithContext(wrapper, items)); - - // 后两条应成功 - int successCount = 0; - for (DataItem item : items) { - if ("success".equals(item.getApiCompletionResult().get("wrapped"))) { - successCount++; - } - } - assertEquals(2, successCount, "第一条失败后,后续两条应正常装饰"); - } - } - - // ==================== 批量执行 ==================== - - @Nested - @DisplayName("批量执行") - class BatchExecutionTest { - - @Test - @DisplayName("doExecute 对所有 DataItem 各调用一次 LLM") - void doExecute_callsLLMForEachItem() { - List promptLog = new ArrayList<>(); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(capturingLLM(promptLog, "output")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "prompt-" + dataItem.getDataIndex(); - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - result.set("out", llmOutput); - } - }; - - List items = ListUtils.of(buildDataItem(1L), buildDataItem(2L), buildDataItem(3L)); - executeWithContext(wrapper, items); - - assertEquals(3, promptLog.size(), "应为每条数据各调用一次 LLM"); - } - - @Test - @DisplayName("doExecute 完成后,所有 DataItem 的结果均被正确写入") - void doExecute_allItemsDecorated() { - LLMBasedApiCompletionWrapper wrapper = buildWrapper(mockLLM("processed")); - List items = ListUtils.of( - buildDataItem(1L), buildDataItem(2L), buildDataItem(3L) - ); - - executeWithContext(wrapper, items); - - for (DataItem item : items) { - assertEquals("processed", item.getApiCompletionResult().get("wrapped_answer"), - "DataItem[" + item.getDataIndex() + "] 装饰结果不正确"); - } - } - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/begin/BeginTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/begin/BeginTest.java new file mode 100644 index 0000000..899623f --- /dev/null +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/begin/BeginTest.java @@ -0,0 +1,239 @@ +package com.evalkit.framework.eval.node.begin; + +import com.evalkit.framework.eval.context.WorkflowContextOps; +import com.evalkit.framework.eval.node.begin.config.BeginConfig; +import com.evalkit.framework.eval.node.scorer.strategy.*; +import com.evalkit.framework.infra.service.llm.LLMService; +import com.evalkit.framework.workflow.model.WorkflowContext; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +@Slf4j +@DisplayName("Begin 单元测试") +class BeginTest { + + /** + * 构造一个最简 mock LLMService + */ + private LLMService mockLlmService() { + return new LLMService() { + @Override + public String chat(String prompt) { + return "mock-response"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + + /** + * 为 Begin 注入 WorkflowContext 并执行 + */ + private WorkflowContext executeWithContext(Begin begin) { + WorkflowContext ctx = new WorkflowContext(); + begin.setWorkflowContext(ctx); + try { + begin.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + return ctx; + } + + // ===================== constructor 测试 ===================== + + @Test + @DisplayName("无参构造器应使用默认 BeginConfig,不抛出异常") + void testConstructor_defaultConfig() { + Begin begin = new Begin(); + assertNotNull(begin.getConfig(), "默认构造器应初始化 config"); + assertNotNull(begin.getConfig().getScoreStrategy(), "默认 ScoreStrategy 不应为 null"); + assertNotNull(begin.getConfig().getEvalReasonStrategy(), "默认 EvalReasonStrategy 不应为 null"); + } + + @Test + @DisplayName("带 BeginConfig 构造器应正确保存配置") + void testConstructor_withConfig() { + ScoreStrategy strategy = new AvgScoreStrategy(); + BeginConfig config = BeginConfig.builder().scoreStrategy(strategy).threshold(0.8).build(); + Begin begin = new Begin(config); + assertSame(strategy, begin.getConfig().getScoreStrategy()); + assertEquals(0.8, begin.getConfig().getThreshold(), 1e-9); + } + + // ===================== validConfig 测试 ===================== + + @Test + @DisplayName("config 为 null 时应抛出 IllegalArgumentException") + void testValidConfig_nullConfigThrows() { + assertThrows(IllegalArgumentException.class, () -> new Begin(null), + "config 为 null 时应抛异常"); + } + + @Test + @DisplayName("ScoreStrategy 为 null 时应抛出 IllegalArgumentException") + void testValidConfig_nullScoreStrategyThrows() { + BeginConfig config = BeginConfig.builder().scoreStrategy(null).build(); + assertThrows(IllegalArgumentException.class, () -> new Begin(config), + "ScoreStrategy 为 null 时应抛异常"); + } + + @Test + @DisplayName("EvalReasonStrategy 为 null 时应抛出 IllegalArgumentException") + void testValidConfig_nullEvalReasonStrategyThrows() { + BeginConfig config = BeginConfig.builder().evalReasonStrategy(null).build(); + assertThrows(IllegalArgumentException.class, () -> new Begin(config), + "EvalReasonStrategy 为 null 时应抛异常"); + } + + @Test + @DisplayName("LLMSummaryEvalReasonStrategy 中 LLMService 为 null 时应抛出 IllegalArgumentException") + void testValidConfig_llmStrategyWithNullLlmServiceThrows() { + LLMSummaryEvalReasonStrategy strategy = new LLMSummaryEvalReasonStrategy(null, "some-prompt"); + BeginConfig config = BeginConfig.builder().evalReasonStrategy(strategy).build(); + assertThrows(IllegalArgumentException.class, () -> new Begin(config), + "LLMSummaryEvalReasonStrategy LLMService 为 null 时应抛异常"); + } + + @Test + @DisplayName("LLMSummaryEvalReasonStrategy 中 sysPrompt 为空时应抛出 IllegalArgumentException") + void testValidConfig_llmStrategyWithEmptySysPromptThrows() { + LLMSummaryEvalReasonStrategy strategy = new LLMSummaryEvalReasonStrategy(mockLlmService(), ""); + BeginConfig config = BeginConfig.builder().evalReasonStrategy(strategy).build(); + assertThrows(IllegalArgumentException.class, () -> new Begin(config), + "LLMSummaryEvalReasonStrategy sysPrompt 为空时应抛异常"); + } + + @Test + @DisplayName("LLMSummaryEvalReasonStrategy 配置合法时不应抛出异常") + void testValidConfig_llmStrategyValid() { + LLMSummaryEvalReasonStrategy strategy = new LLMSummaryEvalReasonStrategy(mockLlmService(), "valid-prompt"); + BeginConfig config = BeginConfig.builder().evalReasonStrategy(strategy).build(); + assertDoesNotThrow(() -> new Begin(config), + "LLMSummaryEvalReasonStrategy 配置合法时不应抛异常"); + } + + // ===================== initWorkflowContext 测试 ===================== + + @Test + @DisplayName("执行后 WorkflowContext 中的 ScoreStrategy 应与配置一致") + void testInitWorkflowContext_scorerStrategySet() { + ScoreStrategy strategy = new AvgScoreStrategy(); + Begin begin = new Begin(BeginConfig.builder().scoreStrategy(strategy).build()); + WorkflowContext ctx = executeWithContext(begin); + assertSame(strategy, WorkflowContextOps.getScorerStrategy(ctx), + "上下文中的 ScoreStrategy 应与配置一致"); + } + + @Test + @DisplayName("执行后 WorkflowContext 中的 EvalReasonStrategy 应与配置一致") + void testInitWorkflowContext_evalReasonStrategySet() { + EvalReasonStrategy reason = new JsonEvalReasonStrategy(); + Begin begin = new Begin(BeginConfig.builder().evalReasonStrategy(reason).build()); + WorkflowContext ctx = executeWithContext(begin); + assertSame(reason, WorkflowContextOps.getEvalReasonStrategy(ctx), + "上下文中的 EvalReasonStrategy 应与配置一致"); + } + + @Test + @DisplayName("执行后 WorkflowContext 中的 threshold 应与配置一致") + void testInitWorkflowContext_thresholdSet() { + double threshold = 0.75; + Begin begin = new Begin(BeginConfig.builder().threshold(threshold).build()); + WorkflowContext ctx = executeWithContext(begin); + assertEquals(threshold, WorkflowContextOps.getThreshold(ctx), 1e-9, + "上下文中的 threshold 应与配置一致"); + } + + @Test + @DisplayName("执行后 WorkflowContext 中的 dataItems 应被初始化为非 null 空列表") + void testInitWorkflowContext_dataItemsInitialized() { + Begin begin = new Begin(); + WorkflowContext ctx = executeWithContext(begin); + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertNotNull(dataItems, "dataItems 不应为 null"); + assertTrue(dataItems.isEmpty(), "初始化后 dataItems 应为空列表"); + } + + @Test + @DisplayName("若 WorkflowContext 中已有 dataItems,执行后不应覆盖原有数据") + void testInitWorkflowContext_existingDataItemsNotOverwritten() { + Begin begin = new Begin(); + WorkflowContext ctx = new WorkflowContext(); + // 预先写入非空 dataItems + java.util.List existing = new java.util.concurrent.CopyOnWriteArrayList<>(); + existing.add(new com.evalkit.framework.eval.model.DataItem(0L, null)); + WorkflowContextOps.setDataItems(ctx, existing); + begin.setWorkflowContext(ctx); + try { + begin.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertEquals(1, dataItems.size(), "已有 dataItems 时不应被清空"); + } + + @Test + @DisplayName("执行后 WorkflowContext 中的 countResults 应被初始化为非 null 空 Map") + void testInitWorkflowContext_countResultsInitialized() { + Begin begin = new Begin(); + WorkflowContext ctx = executeWithContext(begin); + java.util.Map countResults = WorkflowContextOps.getCountResults(ctx); + assertNotNull(countResults, "countResults 不应为 null"); + assertTrue(countResults.isEmpty(), "初始化后 countResults 应为空 Map"); + } + + @Test + @DisplayName("执行后 WorkflowContext 中的 extra 应被初始化为非 null 空 Map") + void testInitWorkflowContext_extraInitialized() { + Begin begin = new Begin(); + WorkflowContext ctx = executeWithContext(begin); + java.util.Map extra = WorkflowContextOps.getExtra(ctx); + assertNotNull(extra, "extra 不应为 null"); + assertTrue(extra.isEmpty(), "初始化后 extra 应为空 Map"); + } + + @Test + @DisplayName("threshold 默认值为 0") + void testInitWorkflowContext_defaultThresholdIsZero() { + Begin begin = new Begin(); + WorkflowContext ctx = executeWithContext(begin); + assertEquals(0d, WorkflowContextOps.getThreshold(ctx), 1e-9, + "未指定 threshold 时默认值应为 0"); + } + + // ===================== 不同 ScoreStrategy 验证 ===================== + + @Test + @DisplayName("使用 SumScoreStrategy 时上下文中策略类型正确") + void testWithSumScoreStrategy() { + Begin begin = new Begin(BeginConfig.builder().scoreStrategy(new SumScoreStrategy()).build()); + WorkflowContext ctx = executeWithContext(begin); + assertTrue(WorkflowContextOps.getScorerStrategy(ctx) instanceof SumScoreStrategy); + } + + @Test + @DisplayName("使用 MinScoreStrategy 时上下文中策略类型正确") + void testWithMinScoreStrategy() { + Begin begin = new Begin(BeginConfig.builder().scoreStrategy(new MinScoreStrategy()).build()); + WorkflowContext ctx = executeWithContext(begin); + assertTrue(WorkflowContextOps.getScorerStrategy(ctx) instanceof MinScoreStrategy); + } + + @Test + @DisplayName("使用 NormalEvalReasonStrategy 时上下文中策略类型正确") + void testWithNormalEvalReasonStrategy() { + Begin begin = new Begin(BeginConfig.builder().evalReasonStrategy(new NormalEvalReasonStrategy()).build()); + WorkflowContext ctx = executeWithContext(begin); + assertTrue(WorkflowContextOps.getEvalReasonStrategy(ctx) instanceof NormalEvalReasonStrategy); + } +} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java deleted file mode 100644 index 078c802..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.evalkit.framework.eval.node.counter; - -import com.evalkit.framework.common.utils.json.JsonUtils; -import com.evalkit.framework.eval.model.CountResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.fasterxml.jackson.core.type.TypeReference; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Test; - -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.junit.jupiter.api.Assertions.assertNotNull; - -@Slf4j -class AttributeCounterTest { - - /** - * 构造一个 mock LLMService: - * - 第一次调用(问题类型提取):返回 "编号|问题类型" 格式 - * - 后续调用(同义词归一化):返回合法 JSON 格式 - */ - private LLMService buildMockLLMService() { - AtomicInteger callCount = new AtomicInteger(0); - return new LLMService() { - @Override - public String chat(String prompt) { - int count = callCount.incrementAndGet(); - if (count == 1) { - // 第一次:提取问题类型,格式为 "编号|问题类型" - return "0|查询机票#价格咨询\n1|预订问题"; - } else { - // 后续:同义词归一化,返回合法 JSON - return "{\"价格咨询\": [\"查询机票\", \"价格咨询\"], \"预订问题\": [\"预订问题\"]}"; - } - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - @Test - public void test() { - LLMService llmService = buildMockLLMService(); - // 从 classpath 加载预置测试数据,不依赖外部文件 - List dataItems = JsonUtils.readJsonFile("classpath:dataItems.json", new TypeReference>() { - }); - dataItems = dataItems.subList(0, 2); - AttributeCounter counter = new AttributeCounter(llmService); - CountResult countResult = counter.count(dataItems); - - assertNotNull(countResult, "统计结果不应为 null"); - log.info("countResult: {}", JsonUtils.toJson(countResult)); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java deleted file mode 100644 index ac90fb5..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java +++ /dev/null @@ -1,63 +0,0 @@ -package com.evalkit.framework.eval.node.counter; - -import com.evalkit.framework.common.utils.json.JsonUtils; -import com.evalkit.framework.eval.model.CountResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.fasterxml.jackson.core.type.TypeReference; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Test; - -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.junit.jupiter.api.Assertions.assertNotNull; - -@Slf4j -class AttributeCounterV2Test { - - /** - * 构造一个 mock LLMService,符合 AttributeCounterV2 的期望格式: - * - 提取阶段:返回 "编号|类别|问题|置信度|情感" 格式(每行5字段,用|分隔) - * - 归一化阶段:返回合法 JSON({ "标准名": ["同义名"] } 格式) - * - 摘要阶段:返回简短文本描述 - */ - private LLMService buildMockLLMService() { - AtomicInteger callCount = new AtomicInteger(0); - return new LLMService() { - @Override - public String chat(String prompt) { - int count = callCount.incrementAndGet(); - if (count == 1) { - // 提取阶段:返回 "编号|类别|问题|置信度|情感" 格式 - return "0|查询问题|机票价格查询|0.9|NEG\n1|预订问题|座位预订失败|0.8|NEG"; - } else if (prompt.contains("合并") || prompt.contains("归一化") || prompt.contains("标准名")) { - // 归一化阶段:返回 JSON 格式 - return "{\"查询问题\": [\"查询问题\"], \"预订问题\": [\"预订问题\"]}"; - } else { - // 摘要阶段:返回简短描述 - return "用户反馈机票查询和预订相关问题"; - } - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - @Test - public void test() { - LLMService llmService = buildMockLLMService(); - // 从 classpath 加载预置测试数据,不依赖外部文件 - List dataItems = JsonUtils.readJsonFile("classpath:dataItems.json", new TypeReference>() { - }); - dataItems = dataItems.subList(0, 2); - AttributeCounterV2 counter = new AttributeCounterV2(llmService); - CountResult countResult = counter.count(dataItems); - - assertNotNull(countResult, "统计结果不应为 null"); - log.info("countResult: {}", JsonUtils.toJson(countResult)); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/CounterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/CounterTest.java deleted file mode 100644 index 17952c4..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/CounterTest.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.evalkit.framework.eval.node.counter; - -import com.evalkit.framework.eval.model.CountResult; -import com.evalkit.framework.eval.model.DataItem; - -import java.util.List; - -class CounterTest { - void test() { - Counter counter = new Counter() { - @Override - protected CountResult count(List dataItems) { - return null; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/EvalCaseDataGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/EvalCaseDataGeneratorTest.java deleted file mode 100644 index 0bd6042..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/EvalCaseDataGeneratorTest.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator; - -import com.evalkit.framework.eval.node.data_generator.config.EvalCaseDataGeneratorConfig; -import com.evalkit.framework.eval.node.querygen.MockQueryGenerator; -import org.junit.jupiter.api.Test; - -class EvalCaseDataGeneratorTest { - @Test - void test() throws Exception { - MockQueryGenerator mockQueryGenerator = new MockQueryGenerator() { - @Override - public String prepareTemplateQuery() { - return "{{between_chinese_holiday 20250815 20251101}} 去 {{city 河北省}}"; - } - }; - - EvalCaseDataGenerator evalCaseDataGenerator = new EvalCaseDataGenerator( - EvalCaseDataGeneratorConfig.builder() - .queryGenerator(mockQueryGenerator) - .enableOutputFile(true) - .genCount(5) - .roundCount(5) - .randomRound(true) - .build() - ); - evalCaseDataGenerator.prepareDataList(); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java deleted file mode 100644 index 6cb5515..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.data_generator.config.KGBasedQueryGeneratorConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Test; - -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertNotNull; - -@Slf4j -class KGBasedQueryGeneratorTest { - - /** - * 构造一个 mock LLMService,返回符合 Turn JSON 格式的内容: - * KGBasedQueryGenerator 期望 LLM 返回 List 的 JSON 数组 - */ - private LLMService buildMockLLMService() { - return new LLMService() { - @Override - public String chat(String prompt) { - // 返回合法的 Turn JSON 数组,匹配 scenario_config.json 中定义的 4 轮对话 - return "[" + - "{\"turn\":1,\"query\":\"打算带孩子去北京玩,有什么必看景点推荐吗?\"}," + - "{\"turn\":2,\"query\":\"从上海出发,有什么推荐的交通方式吗?\"}," + - "{\"turn\":3,\"query\":\"到了那边晚上住哪里比较方便?\"}," + - "{\"turn\":4,\"query\":\"帮我把刚才看好的车票预订一下。\"}" + - "]"; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - @Test - public void test() throws Exception { - // 文件已存在于 classpath:travel_demo/,由 KGBasedQueryGenerator 自动从 classpath 加载 - String kgFilePath = "travel_demo/travel_kg.ttl"; - String scenarioConfigFilePath = "travel_demo/scenario_config.json"; - String scenarioConfigFilePath2 = "travel_demo/scenario2_config.json"; - LLMService llmService = buildMockLLMService(); - - KGBasedQueryGenerator generator = new KGBasedQueryGenerator( - KGBasedQueryGeneratorConfig.builder() - .scenarioConfigFilePath(ListUtils.of(scenarioConfigFilePath, scenarioConfigFilePath2)) - .kgFilePath(kgFilePath) - .llmService(llmService) - .enableOutputFile(false) // 关闭文件输出,避免在 CI 环境写文件 - .generateCount(1) - .threadNum(1) - .sessionIdFieldName("session_id") - .turnFieldName("turn") - .queryFieldName("query") - .enableOneRawOneSession(false) - .build() - ); - - List generated = generator.generateWrapper(); - assertNotNull(generated, "生成的数据列表不应为 null"); - log.debug("generated count: {}, data: {}", generated.size(), generated); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/LoaderBasedDataGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/LoaderBasedDataGeneratorTest.java deleted file mode 100644 index d7a0275..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/LoaderBasedDataGeneratorTest.java +++ /dev/null @@ -1,104 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator; - -import com.evalkit.framework.common.utils.convert.TypeConvertUtils; -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.common.utils.random.UuidUtils; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.eval.node.querygen.config.LoaderBasedDataGeneratorConfig; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.util.*; - -/** - * 基于数据集加载器的数据生成器单测 - */ -@Slf4j -class LoaderBasedDataGeneratorTest { - - DataLoader dataLoader; - - @BeforeEach - public void setUp() { - // 数据加载器,每行是一轮对话 - dataLoader = new DataLoader() { - @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("queries", "s1q1#s1q2")), - new InputData(MapUtils.of("queries", "s2q1#s2q2#s2q3")) - ); - } - }; - } - - - @Test - public void test() throws Exception { - // 数据生成器, 将原始数据变成多轮对话, 每行是一个Query - LoaderBasedDataGenerator generator = new LoaderBasedDataGenerator( - LoaderBasedDataGeneratorConfig.builder() - .dataLoader(dataLoader) - .threadNum(2) - .build() - ) { - @Override - public List> processSingleInputData(Map inputItem) { - String queries = TypeConvertUtils.toString(inputItem.getOrDefault("queries", null)); - if (StringUtils.isEmpty(queries)) { - return Collections.emptyList(); - } - String[] split = StringUtils.split(queries, "#"); - if (split.length == 0) { - return Collections.emptyList(); - } - List> result = new ArrayList<>(); - String sessionId = UuidUtils.generateUuid(); - for (int i = 0; i < split.length; i++) { - Map map = new HashMap<>(); - map.put("sessionId", sessionId); - map.put("turn", i + 1); - map.put("query", split[i]); - result.add(map); - } - return result; - } - }; - - // 验证 - log.info("raw input data: {}", dataLoader.loadWrapper()); - List generateDataList = generator.prepareDataList(); - log.info("generated data: {}", generateDataList); - Assertions.assertTrue(CollectionUtils.isNotEmpty(generateDataList)); - Assertions.assertEquals(5, generateDataList.size()); - } - - @Test - public void testBadProcess() throws Exception { - // 数据生成器, 将原始数据变成多轮对话, 每行是一个Query - LoaderBasedDataGenerator generator = new LoaderBasedDataGenerator( - LoaderBasedDataGeneratorConfig.builder() - .dataLoader(dataLoader) - .threadNum(2) - .build() - ) { - @Override - public List> processSingleInputData(Map inputItem) { - int i = 1 / 0; - return ListUtils.of(inputItem); - } - }; - - // 验证 - log.info("raw input data: {}", dataLoader.loadWrapper()); - List generateDataList = generator.prepareDataList(); - log.info("generated data: {}", generateDataList); - Assertions.assertTrue(CollectionUtils.isEmpty(generateDataList)); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java deleted file mode 100644 index 7a4bcba..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java +++ /dev/null @@ -1,76 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.eval.node.data_generator.config.KGBasedQueryGeneratorConfig; -import com.evalkit.framework.eval.node.data_generator.config.MultiDataGeneratorConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; - -class MultiDataGeneratorTest { - - /** - * 构造一个 mock LLMService,返回符合 Turn JSON 格式的内容 - */ - private LLMService buildMockLLMService() { - return new LLMService() { - @Override - public String chat(String prompt) { - // 返回合法的 Turn JSON 数组,供 KGBasedQueryGenerator 解析 - return "[" + - "{\"turn\":1,\"query\":\"我想了解一下旅游攻略\"}," + - "{\"turn\":2,\"query\":\"请推荐交通方式\"}," + - "{\"turn\":3,\"query\":\"有什么酒店推荐吗?\"}," + - "{\"turn\":4,\"query\":\"帮我预订一下。\"}" + - "]"; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - @Test - public void test() { - // 文件已存在于 classpath:travel_demo/,由 KGBasedQueryGenerator 自动从 classpath 加载 - String kgFilePath = "travel_demo/travel_kg.ttl"; - String scenarioConfigFilePath = "travel_demo/scenario_config.json"; - String scenario2ConfigFilePath = "travel_demo/scenario2_config.json"; - - LLMService llmService = buildMockLLMService(); - - KGBasedQueryGenerator generator1 = new KGBasedQueryGenerator( - KGBasedQueryGeneratorConfig.builder() - .scenarioConfigFilePath(ListUtils.of(scenarioConfigFilePath)) - .kgFilePath(kgFilePath) - .llmService(llmService) - .enableOutputFile(false) // 关闭文件输出,避免在 CI 环境写文件 - .generateCount(1) - .build() - ); - - KGBasedQueryGenerator generator2 = new KGBasedQueryGenerator( - KGBasedQueryGeneratorConfig.builder() - .scenarioConfigFilePath(ListUtils.of(scenario2ConfigFilePath)) - .kgFilePath(kgFilePath) - .llmService(llmService) - .enableOutputFile(false) - .generateCount(1) - .build() - ); - - MultiDataGenerator multiDataGenerator = new MultiDataGenerator( - MultiDataGeneratorConfig.builder() - .dataGenerators(ListUtils.of(generator1, generator2)) - .enableOutputFile(false) - .build() - ); - - // 调用并验证结果不为 null - assertDoesNotThrow(multiDataGenerator::generateWrapper, - "MultiDataGenerator 不应抛出异常"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/MockQueryGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/MockQueryGeneratorTest.java deleted file mode 100644 index c368288..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/MockQueryGeneratorTest.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator.querygen; - -import com.evalkit.framework.eval.node.querygen.MockQueryGenerator; -import com.evalkit.framework.eval.node.querygen.config.MockerQueryGeneratorConfig; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Test; - -import java.util.List; - -@Slf4j -class MockQueryGeneratorTest { - @Test - void test() { - String templateQuery = "{{between_chinese_holiday 20250815 20251101}} 去 {{city 河北省}}"; - - MockQueryGenerator mockQueryGenerator = new MockQueryGenerator( - MockerQueryGeneratorConfig.builder() - .genCount(5) - .build() - ) { - @Override - public String prepareTemplateQuery() { - return templateQuery; - } - }; - - List genQueries = mockQueryGenerator.generate(); - log.info("template: {}, generate queries: {}", templateQuery, genQueries); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java deleted file mode 100644 index b56d976..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator.querygen; - -import com.evalkit.framework.eval.node.querygen.PromptBasedQueryGenerator; -import com.evalkit.framework.eval.node.querygen.config.PromptBasedQueryGeneratorConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Test; - -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -@Slf4j -class PromptBasedQueryGeneratorTest { - - /** - * 构造一个 mock LLMService,返回换行分隔的 Query 列表(PromptBasedQueryGenerator 按 \n 分割回复) - */ - private LLMService buildMockLLMService() { - return new LLMService() { - @Override - public String chat(String prompt) { - // 返回多行文本,模拟 LLM 生成 Query 的格式(每行一条 Query) - return "如何快速预订机票\n机票价格最低查询\n最近热门旅游目的地推荐"; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - @Test - void test() { - LLMService llmService = buildMockLLMService(); - - PromptBasedQueryGenerator promptBasedQueryGenerator = new PromptBasedQueryGenerator( - PromptBasedQueryGeneratorConfig.builder() - .llmService(llmService) - .genCount(2) - .userPrompt("关键词: 预订机票") - .build() - ); - List queries = promptBasedQueryGenerator.generate(); - - assertNotNull(queries, "生成的 queries 不应为 null"); - assertFalse(queries.isEmpty(), "生成的 queries 不应为空"); - log.info("queries: {}", queries); - } - - @Test - void testCustomSysPrompt() { - LLMService llmService = buildMockLLMService(); - - PromptBasedQueryGenerator generator = new PromptBasedQueryGenerator( - PromptBasedQueryGeneratorConfig.builder() - .llmService(llmService) - .sysPrompt("你是一个Query生成助手,请生成简短的用户查询") - .userPrompt("关键词: 酒店预订") - .genCount(3) - .langStyle("简洁直接") - .build() - ); - - List queries = generator.generate(); - assertNotNull(queries, "使用自定义 sysPrompt 生成的 queries 不应为 null"); - log.info("customSysPrompt queries: {}", queries); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java deleted file mode 100644 index e2b1229..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java +++ /dev/null @@ -1,128 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.eval.node.dataloader.config.ApiDataLoaderConfig; -import org.junit.jupiter.api.Test; - -import java.util.Collections; -import java.util.Map; -import java.util.concurrent.TimeUnit; - -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; - -class ApiDataLoaderTest { - - /** - * 测试 ApiDataLoader 配置校验逻辑:host 为空时应抛出 IllegalArgumentException - */ - @Test - void testEmptyHostThrowsException() { - assertThrows(IllegalArgumentException.class, () -> { - new ApiDataLoader( - ApiDataLoaderConfig.builder() - .host("") - .api("/api/test") - .method("get") - .timeout(10) - .timeUnit(TimeUnit.SECONDS) - .build() - ) { - @Override - public Map prepareBody() { - return Collections.emptyMap(); - } - - @Override - public Map prepareParam() { - return Collections.emptyMap(); - } - - @Override - public Map prepareHeader() { - return Collections.emptyMap(); - } - - @Override - public String prepareJsonpath() { - return "$.data"; - } - }; - }, "host 为空时构造应抛出 IllegalArgumentException"); - } - - /** - * 测试 ApiDataLoader 配置校验逻辑:api 为空时应抛出 IllegalArgumentException - */ - @Test - void testEmptyApiThrowsException() { - assertThrows(IllegalArgumentException.class, () -> { - new ApiDataLoader( - ApiDataLoaderConfig.builder() - .host("http://localhost:8080") - .api("") - .method("get") - .timeout(10) - .timeUnit(TimeUnit.SECONDS) - .build() - ) { - @Override - public Map prepareBody() { - return Collections.emptyMap(); - } - - @Override - public Map prepareParam() { - return Collections.emptyMap(); - } - - @Override - public Map prepareHeader() { - return Collections.emptyMap(); - } - - @Override - public String prepareJsonpath() { - return "$.data"; - } - }; - }, "api 为空时构造应抛出 IllegalArgumentException"); - } - - /** - * 测试 ApiDataLoader 正常构建(不发起真实 HTTP 请求,只验证构造成功) - */ - @Test - void testConstructWithValidConfig() { - ApiDataLoader apiDataLoader = new ApiDataLoader( - ApiDataLoaderConfig.builder() - .host("http://localhost:8080") - .api("/api/data") - .method("get") - .timeout(10) - .timeUnit(TimeUnit.SECONDS) - .build() - ) { - @Override - public Map prepareBody() { - return Collections.emptyMap(); - } - - @Override - public Map prepareParam() { - return Collections.emptyMap(); - } - - @Override - public Map prepareHeader() { - return Collections.emptyMap(); - } - - @Override - public String prepareJsonpath() { - return "$.data"; - } - }; - - assertNotNull(apiDataLoader, "ApiDataLoader 实例不应为 null"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/DataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/DataLoaderTest.java index 0a79ac6..97fd3a4 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/DataLoaderTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/DataLoaderTest.java @@ -1,62 +1,336 @@ package com.evalkit.framework.eval.node.dataloader; -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.dataloader.config.JsonFileDataLoaderConfig; +import com.evalkit.framework.eval.node.dataloader.config.DataLoaderConfig; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; -import java.util.List; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.junit.jupiter.api.Assertions.*; @Slf4j +@DisplayName("DataLoader 单元测试") class DataLoaderTest { - private DataLoader dataLoader; + /** + * 构造一个简单的 DataLoader 匿名实现,返回指定数量的 InputData + */ + private DataLoader buildDataLoader(int dataSize) { + return buildDataLoader(DataLoaderConfig.builder().build(), dataSize); + } - @BeforeEach - void setUp() { - dataLoader = new DataLoader() { + private DataLoader buildDataLoader(DataLoaderConfig config, int dataSize) { + return new DataLoader(config) { @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("query", "1")), - new InputData(MapUtils.of("query", "2")) - ); + public List prepareDataList() { + return buildInputDataList(dataSize); } }; } + /** + * 构造测试用的 InputData 列表 + */ + private List buildInputDataList(int size) { + List list = new ArrayList<>(); + for (int i = 0; i < size; i++) { + Map item = new HashMap<>(); + item.put("id", i); + item.put("value", "v" + i); + list.add(new InputData((long) i, item)); + } + return list; + } + + // ===================== validConfig 测试 ===================== + + @Test + @DisplayName("config 为 null 时应抛出 IllegalArgumentException") + void testValidConfig_nullConfigThrows() { + assertThrows(IllegalArgumentException.class, () -> new DataLoader(null) { + @Override + public List prepareDataList() { + return new ArrayList<>(); + } + }, "Config 为 null 时应抛出 IllegalArgumentException"); + } + + @Test + @DisplayName("offset 为负数时应抛出 IllegalArgumentException") + void testValidConfig_negativeOffsetThrows() { + assertThrows(IllegalArgumentException.class, () -> + buildDataLoader(DataLoaderConfig.builder().offset(-1).build(), 5), + "offset 为负数时应抛出 IllegalArgumentException"); + } + + @Test + @DisplayName("limit 小于 -1 时应抛出 IllegalArgumentException") + void testValidConfig_limitLessThanNegativeOneThrows() { + assertThrows(IllegalArgumentException.class, () -> + buildDataLoader(DataLoaderConfig.builder().limit(-2).build(), 5), + "limit 小于 -1 时应抛出 IllegalArgumentException"); + } + + @Test + @DisplayName("offset=0, limit=-1 为合法配置,不应抛出异常") + void testValidConfig_zeroOffsetAndNegativeOneLimitOk() { + assertDoesNotThrow(() -> buildDataLoader(DataLoaderConfig.builder().offset(0).limit(-1).build(), 5), + "offset=0, limit=-1 为合法配置"); + } + + // ===================== addFilter / addFilters 测试 ===================== + + @Test + @DisplayName("添加 null 过滤器时不应写入过滤器列表") + void testAddFilter_nullFilterIsIgnored() { + DataLoader loader = buildDataLoader(5); + loader.addFilter(null); + assertTrue(loader.getConfig().getFilters().isEmpty(), "添加 null 过滤器时不应写入列表"); + } + + @Test + @DisplayName("添加单个过滤器后过滤器列表大小为 1") + void testAddFilter_singleFilter() { + DataLoader loader = buildDataLoader(5); + loader.addFilter(inputData -> true); + assertEquals(1, loader.getConfig().getFilters().size(), "应成功添加 1 个过滤器"); + } + @Test - void loadWrapper() { - List inputData = dataLoader.loadWrapper(); - log.info("inputData:{}", inputData); - Assertions.assertEquals(2, inputData.size()); - } - - @Test - @Disabled - public void testInjectData() { - String filePath = ""; - JsonFileDataLoader jsonFileDataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .filePath(filePath) - .openInjectData(true) - .jsonPath("$.dataItems") - .filters( - ListUtils.of( - inputData -> { - String query = inputData.get("query"); - return StringUtils.equals(query, "1"); - } - ) - ) - .build() - ); - jsonFileDataLoader.loadWrapper(); + @DisplayName("批量添加过滤器后过滤器列表大小正确") + void testAddFilters_multipleFilters() { + DataLoader loader = buildDataLoader(5); + loader.addFilters(Arrays.asList(inputData -> true, inputData -> false)); + assertEquals(2, loader.getConfig().getFilters().size(), "应成功添加 2 个过滤器"); + } + + // ===================== setOffsetAndLimit 测试 ===================== + + @Test + @DisplayName("setOffsetAndLimit 应正确更新 config 中的 offset 和 limit") + void testSetOffsetAndLimit() { + DataLoader loader = buildDataLoader(10); + loader.setOffsetAndLimit(2, 3); + assertEquals(2, loader.getConfig().getOffset()); + assertEquals(3, loader.getConfig().getLimit()); + } + + // ===================== slice 测试 ===================== + + @Test + @DisplayName("limit=-1 时 slice 应返回全部数据") + void testSlice_limitNegativeOne_returnsAll() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(0).limit(-1).build(), 10); + List data = buildInputDataList(10); + List result = loader.slice(data); + assertEquals(10, result.size(), "limit=-1 时应返回全部数据"); + } + + @Test + @DisplayName("slice 按 offset 和 limit 正确截取数据") + void testSlice_offsetAndLimit() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(2).limit(3).build(), 10); + List data = buildInputDataList(10); + List result = loader.slice(data); + assertEquals(3, result.size(), "slice 后应返回 3 条数据"); + assertEquals(2L, result.get(0).getDataIndex()); + assertEquals(3L, result.get(1).getDataIndex()); + assertEquals(4L, result.get(2).getDataIndex()); + } + + @Test + @DisplayName("offset 超过数据总量时 slice 返回空列表") + void testSlice_offsetBeyondTotal_returnsEmpty() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(20).limit(5).build(), 10); + List data = buildInputDataList(10); + List result = loader.slice(data); + assertTrue(result.isEmpty(), "offset 超过数据总量时应返回空列表"); + } + + @Test + @DisplayName("空列表 slice 后仍为空列表") + void testSlice_emptyList_returnsEmpty() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(0).limit(5).build(), 0); + List result = loader.slice(new ArrayList<>()); + assertTrue(result.isEmpty(), "空列表 slice 后依然为空"); + } + + @Test + @DisplayName("limit 超过剩余数据量时 slice 返回剩余全部数据") + void testSlice_limitExceedsRemaining_returnsRest() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(8).limit(5).build(), 10); + List data = buildInputDataList(10); + List result = loader.slice(data); + assertEquals(2, result.size(), "limit 超过剩余数据量时应返回剩余所有数据"); + } + + // ===================== filter 测试 ===================== + + @Test + @DisplayName("无过滤器时数据列表不应被修改") + void testFilter_noFilters_listUnchanged() { + DataLoader loader = buildDataLoader(5); + List data = new ArrayList<>(buildInputDataList(5)); + loader.filter(data); + assertEquals(5, data.size(), "没有过滤器时数据不应被过滤"); + } + + @Test + @DisplayName("过滤器拒绝所有数据时列表应为空") + void testFilter_filterOutAll() { + DataLoader loader = buildDataLoader(5); + loader.addFilter(inputData -> false); + List data = new ArrayList<>(buildInputDataList(5)); + loader.filter(data); + assertTrue(data.isEmpty(), "过滤器拦截所有数据后列表应为空"); + } + + @Test + @DisplayName("按字段值过滤时只保留满足条件的数据") + void testFilter_filterByValue() { + DataLoader loader = buildDataLoader(5); + loader.addFilter(inputData -> (int) inputData.get("id") < 3); + List data = new ArrayList<>(buildInputDataList(5)); + loader.filter(data); + assertEquals(3, data.size(), "过滤后应只保留 id=0,1,2 的三条数据"); + } + + @Test + @DisplayName("多个过滤器之间为 AND 逻辑,同时满足才保留") + void testFilter_multipleFilters_andLogic() { + DataLoader loader = buildDataLoader(10); + loader.addFilter(inputData -> (int) inputData.get("id") >= 2); + loader.addFilter(inputData -> (int) inputData.get("id") <= 7); + List data = new ArrayList<>(buildInputDataList(10)); + loader.filter(data); + assertEquals(6, data.size(), "多过滤器应 AND 逻辑,保留 id=2..7 共 6 条数据"); + } + + // ===================== addDataIndex 测试 ===================== + + @Test + @DisplayName("addDataIndex 应从 0 开始顺序为数据项赋予索引") + void testAddDataIndex_assignsSequentialIndex() { + DataLoader loader = buildDataLoader(5); + List data = buildInputDataList(5); + data.forEach(d -> d.setDataIndex(null)); + loader.addDataIndex(data); + for (int i = 0; i < data.size(); i++) { + assertEquals(i, data.get(i).getDataIndex(), "索引应从 0 开始顺序递增"); + } + } + + @Test + @DisplayName("空列表调用 addDataIndex 不应抛出异常") + void testAddDataIndex_emptyList_noException() { + DataLoader loader = buildDataLoader(0); + assertDoesNotThrow(() -> loader.addDataIndex(new ArrayList<>()), + "空列表调用 addDataIndex 不应抛出异常"); + } + + // ===================== loadWrapper 测试 ===================== + + @Test + @DisplayName("loadWrapper 正常加载时应返回完整数据列表") + void testLoadWrapper_success_returnsDataList() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(0).limit(-1).build(), 5); + List result = loader.loadWrapper(); + assertNotNull(result, "loadWrapper 正常情况下应返回非 null 列表"); + assertEquals(5, result.size(), "应返回全部 5 条数据"); + } + + @Test + @DisplayName("prepareDataList 返回空时 loadWrapper 应返回 null") + void testLoadWrapper_emptyPrepareDataList_returnsNull() { + DataLoader loader = new DataLoader() { + @Override + public List prepareDataList() { + return new ArrayList<>(); + } + }; + List result = loader.loadWrapper(); + assertNull(result, "prepareDataList 返回空时 loadWrapper 应返回 null"); + } + + @Test + @DisplayName("loadWrapper 配合过滤器应正确过滤数据") + void testLoadWrapper_withFilter_filtersCorrectly() { + DataLoaderConfig config = DataLoaderConfig.builder().offset(0).limit(-1).build(); + DataLoader loader = buildDataLoader(config, 10); + loader.addFilter(inputData -> (int) inputData.get("id") % 2 == 0); + List result = loader.loadWrapper(); + assertNotNull(result); + assertEquals(5, result.size(), "过滤奇数 id 后应只剩 5 条数据"); + result.forEach(d -> assertEquals(0, (int) d.get("id") % 2, "保留的 id 应均为偶数")); + } + + @Test + @DisplayName("loadWrapper 应正确应用 offset 和 limit 截取数据") + void testLoadWrapper_withOffsetAndLimit() { + DataLoaderConfig config = DataLoaderConfig.builder().offset(2).limit(3).build(); + DataLoader loader = buildDataLoader(config, 10); + List result = loader.loadWrapper(); + assertNotNull(result); + assertEquals(3, result.size(), "offset=2, limit=3 时应返回 3 条"); + } + + @Test + @DisplayName("loadWrapper 返回的每条数据都应设置了 dataIndex") + void testLoadWrapper_dataIndexAssigned() { + DataLoader loader = buildDataLoader(5); + List result = loader.loadWrapper(); + assertNotNull(result); + for (InputData inputData : result) { + assertNotNull(inputData.getDataIndex(), "每条数据的 dataIndex 不应为 null"); + } + } + + @Test + @DisplayName("开启 shuffle 后数据总条数不变且内容完整") + void testLoadWrapper_shuffleDoesNotLoseData() { + DataLoaderConfig config = DataLoaderConfig.builder().shuffle(true).build(); + DataLoader loader = buildDataLoader(config, 20); + List result = loader.loadWrapper(); + assertNotNull(result); + assertEquals(20, result.size(), "shuffle 后数据条数不应改变"); + List ids = result.stream() + .map(d -> (int) d.get("id")) + .sorted() + .collect(Collectors.toList()); + List expected = IntStream.range(0, 20).boxed().collect(Collectors.toList()); + assertEquals(expected, ids, "shuffle 后 id 集合应仍为 0..19"); + } + + // ===================== constructor 测试 ===================== + + @Test + @DisplayName("无参构造器应初始化默认 config:offset=0, limit=-1") + void testConstructor_defaultConfig() { + DataLoader loader = new DataLoader() { + @Override + public List prepareDataList() { + return buildInputDataList(1); + } + }; + assertNotNull(loader.getConfig(), "默认构造器应初始化 config"); + assertEquals(0, loader.getConfig().getOffset()); + assertEquals(-1, loader.getConfig().getLimit()); + } + + @Test + @DisplayName("(offset, limit) 构造器应正确设置 config 中的 offset 和 limit") + void testConstructor_offsetAndLimit() { + DataLoader loader = new DataLoader(3, 7) { + @Override + public List prepareDataList() { + return buildInputDataList(1); + } + }; + assertEquals(3, loader.getConfig().getOffset()); + assertEquals(7, loader.getConfig().getLimit()); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ExcelDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ExcelDataLoaderTest.java deleted file mode 100644 index b8ffb23..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ExcelDataLoaderTest.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.eval.node.dataloader.config.ExcelDataLoaderConfig; -import org.junit.jupiter.api.Test; - -class ExcelDataLoaderTest { - @Test - void validConfigTest() { - ExcelDataLoader excelDataLoader = new ExcelDataLoader( - ExcelDataLoaderConfig.builder().filePath("test.xlsx").build() - ); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java deleted file mode 100644 index 976350e..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java +++ /dev/null @@ -1,105 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.dataloader.config.JdbcDataLoaderConfig; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.File; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.Statement; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.*; - -/** - * JdbcDataLoader 测试 —— 使用 SQLite 文件型数据库替代外部 MySQL,无需外部服务 - *

- * 注意:SQLite 内存模式(file::memory:)与 HikariCP 连接池不兼容(DriverManager 创建的连接 - * 与 HikariCP 连接池使用的连接相互隔离),因此改用临时文件型 SQLite,确保连接共享同一数据库。 - */ -class JdbcDataLoaderTest { - - private static final String SQLITE_DRIVER = "org.sqlite.JDBC"; - private File tempDbFile; - private String sqliteUrl; - - @BeforeEach - void setUp() throws Exception { - // 创建临时 SQLite 文件,确保 DriverManager 和 HikariCP 访问同一数据库 - tempDbFile = File.createTempFile("jdbcloader_test_", ".db"); - tempDbFile.deleteOnExit(); - sqliteUrl = "jdbc:sqlite:" + tempDbFile.getAbsolutePath(); - - // 在 SQLite 文件中创建测试表并插入数据 - try (Connection conn = DriverManager.getConnection(sqliteUrl); - Statement st = conn.createStatement()) { - st.execute("CREATE TABLE IF NOT EXISTS testcase (" + - "id INTEGER PRIMARY KEY, " + - "query TEXT NOT NULL, " + - "expected TEXT)"); - st.execute("DELETE FROM testcase"); - st.execute("INSERT INTO testcase (query, expected) VALUES ('hello world', '预期回复1')"); - st.execute("INSERT INTO testcase (query, expected) VALUES ('test query', '预期回复2')"); - } - } - - @AfterEach - void tearDown() { - if (tempDbFile != null && tempDbFile.exists()) { - tempDbFile.delete(); - } - } - - /** - * 测试 JdbcDataLoader 可以通过 SQLite 文件数据库正常加载数据 - */ - @Test - void testLoadDataFromSQLite() throws Exception { - JdbcDataLoader jdbcDataLoader = new JdbcDataLoader( - JdbcDataLoaderConfig.builder() - .driver(SQLITE_DRIVER) - .url(sqliteUrl) - // SQLite 不需要用户名,但 validConfig 要求非空,传 "sa" 作为占位符 - .user("sa") - .password("") - .build() - ) { - @Override - public String prepareSql() { - return "SELECT * FROM testcase"; - } - }; - - List dataList = jdbcDataLoader.prepareDataList(); - assertNotNull(dataList, "加载的数据列表不应为 null"); - assertEquals(2, dataList.size(), "应加载 2 条测试数据"); - - // 验证数据内容 - InputData first = dataList.get(0); - assertNotNull(first.getInputItem(), "数据项的 inputItem 不应为 null"); - assertTrue(first.getInputItem().containsKey("query"), "应包含 query 字段"); - } - - /** - * 测试 JdbcDataLoader 校验逻辑:driver 为空时应抛出异常 - */ - @Test - void testEmptyDriverThrowsException() { - assertThrows(IllegalArgumentException.class, () -> - new JdbcDataLoader( - JdbcDataLoaderConfig.builder() - .driver("") - .url(sqliteUrl) - .user("") - .password("") - .build() - ) { - @Override - public String prepareSql() { return "SELECT * FROM testcase"; } - } - ); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonFileDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonFileDataLoaderTest.java deleted file mode 100644 index eabb610..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonFileDataLoaderTest.java +++ /dev/null @@ -1,105 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.common.utils.file.FileUtils; -import com.evalkit.framework.common.utils.json.JsonUtils; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.dataloader.config.JsonFileDataLoaderConfig; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.Map; - -@Slf4j -class JsonFileDataLoaderTest { - - private String jsonObjectFilePath; - private String jsonArrayFilePath; - private String jsonArrayFilePath2; - - /** - * 构造Json临时文件 - */ - @BeforeEach - public void setUp() throws IOException { - String j1 = "{\"code\":0,\"success\":true,\"data\":{\t\"query\":\"hello\",\"type\":\"test\"}}"; - String j2 = "{\"code\":0,\"success\":true,\"data\":[{\"query\":\"hello\",\"type\":\"test\"},{\"query\":\"hi\",\"type\":\"test\"}]}"; - String j3 = "[{\"query\":\"hello\",\"type\":\"test\"},{\"query\":\"hi\",\"type\":\"test\"}]"; - - Path jsonObjectTempFile = Files.createTempFile("temp", ".json"); - jsonObjectFilePath = jsonObjectTempFile.toString(); - Path jsonArrayTempFile = Files.createTempFile("temp", ".json"); - jsonArrayFilePath = jsonArrayTempFile.toString(); - Path jsonArrayTempFile2 = Files.createTempFile("temp", ".json"); - jsonArrayFilePath2 = jsonArrayTempFile2.toString(); - JsonUtils.writeJsonFile(jsonObjectFilePath, JsonUtils.fromJson(j1, Map.class)); - JsonUtils.writeJsonFile(jsonArrayFilePath, JsonUtils.fromJson(j2, Map.class)); - JsonUtils.writeJsonFile(jsonArrayFilePath2, JsonUtils.fromJson(j3, List.class)); - } - - /** - * 执行删除临时文件 - */ - @AfterEach - public void tearDown() { - FileUtils.deleteFile(jsonObjectFilePath); - FileUtils.deleteFile(jsonArrayFilePath); - } - - @Test - public void testLoadJsonObject() throws Exception { - JsonFileDataLoader dataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .jsonPath("$") - .filePath(jsonObjectFilePath) - .build() - ); - List inputData = dataLoader.prepareDataList(); - log.info("Json File DataLoader: {}", inputData); - Assertions.assertEquals(1, inputData.size()); - } - - @Test - public void testLoadJsonObjectWithJsonpath() throws Exception { - JsonFileDataLoader dataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .jsonPath("$.data") - .filePath(jsonObjectFilePath) - .build() - ); - List inputData = dataLoader.prepareDataList(); - log.info("Json File DataLoader: {}", inputData); - Assertions.assertEquals(1, inputData.size()); - } - - @Test - public void testLoadJsonArray() throws Exception { - JsonFileDataLoader dataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .jsonPath("$.data") - .filePath(jsonArrayFilePath) - .build() - ); - List inputData = dataLoader.prepareDataList(); - log.info("Json File DataLoader: {}", inputData); - Assertions.assertEquals(2, inputData.size()); - } - - @Test - public void testLoadJsonArray2() throws Exception { - JsonFileDataLoader dataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .filePath(jsonArrayFilePath2) - .build() - ); - List inputData = dataLoader.prepareDataList(); - log.info("Json File DataLoader: {}", inputData); - Assertions.assertEquals(2, inputData.size()); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonTextDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonTextDataLoaderTest.java deleted file mode 100644 index 08f963d..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonTextDataLoaderTest.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import org.junit.jupiter.api.Test; - -class JsonTextDataLoaderTest { - - void test() { - JsonTextDataLoader jsonTextDataLoader = new JsonTextDataLoader() { - @Override - public String prepareJsonpath() { - return "$"; - } - - @Override - public String prepareJson() { - return "{\"query\":\"hello\"}"; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/MultiDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/MultiDataLoaderTest.java deleted file mode 100644 index 70aa28a..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/MultiDataLoaderTest.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.model.InputData; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.util.List; - -@Slf4j -class MultiDataLoaderTest { - - MultiDataLoader multiDataLoader; - - @BeforeEach - void setUp() { - DataLoader d1 = new DataLoader() { - @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("query", "1")) - ); - } - }; - DataLoader d2 = new DataLoader() { - @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("query", "2")) - ); - } - }; - multiDataLoader = new MultiDataLoader(ListUtils.of(d1, d2)); - } - - @Test - public void testPrepareDataList() { - List inputData = multiDataLoader.prepareDataList(); - log.info("multi data loader: {}", inputData); - Assertions.assertEquals(2, inputData.size()); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/datainjector/DataInjectorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/datainjector/DataInjectorTest.java deleted file mode 100644 index c50c3aa..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/datainjector/DataInjectorTest.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader.datainjector; - -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; -import com.evalkit.framework.common.utils.time.DateUtils; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.counter.BasicCounter; -import com.evalkit.framework.eval.node.dataloader.JsonDataLoader; -import com.evalkit.framework.eval.node.dataloader.JsonFileDataLoader; -import com.evalkit.framework.eval.node.dataloader.config.JsonFileDataLoaderConfig; -import com.evalkit.framework.eval.node.reporter.CsvReporter; -import com.evalkit.framework.eval.node.reporter.ExcelReporter; -import com.evalkit.framework.eval.node.reporter.JsonReporter; -import com.evalkit.framework.eval.node.reporter.html.HtmlReporter; -import com.evalkit.framework.eval.node.scorer.Scorer; -import com.evalkit.framework.workflow.WorkflowBuilder; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -class DataInjectorTest { - String filePath = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "json-file-datainjector-test-file"); - - @Test - @Disabled - void test() { - Begin begin = new Begin(); - - JsonDataLoader jsonDataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .jsonPath("$.dataItems") - .filePath(filePath) - .openInjectData(true) - .build() - ); - - BasicCounter basicCounter = new BasicCounter(); - - Scorer scorer99 = new Scorer() { - @Override - public ScorerResult eval(DataItem dataItem) throws Exception { - return new ScorerResult("评估器99", 0, 1, "无理由", null); - } - }; - - String fileName = "DataInjectorTest_" + DateUtils.nowToString("yyyyMMddHHmmss"); - HtmlReporter htmlReporter = new HtmlReporter(fileName, fileName); - JsonReporter jsonReporter = new JsonReporter(fileName, fileName); - ExcelReporter excelReporter = new ExcelReporter(fileName, fileName); - CsvReporter csvReporter = new CsvReporter(fileName, fileName); - - new WorkflowBuilder().link(begin, jsonDataLoader, scorer99, basicCounter, htmlReporter, jsonReporter, excelReporter, csvReporter).build().execute(); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/DataLoaderWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/DataLoaderWrapperTest.java index 919c754..2b8cb00 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/DataLoaderWrapperTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/DataLoaderWrapperTest.java @@ -1,17 +1,333 @@ package com.evalkit.framework.eval.node.dataloader_wrapper; +import com.evalkit.framework.eval.context.WorkflowContextOps; import com.evalkit.framework.eval.model.DataItem; +import com.evalkit.framework.eval.model.InputData; import com.evalkit.framework.eval.node.dataloader_wrapper.config.DataLoaderWrapperConfig; +import com.evalkit.framework.workflow.model.WorkflowContext; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import java.util.*; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.junit.jupiter.api.Assertions.*; + +@Slf4j +@DisplayName("DataLoaderWrapper 单元测试") class DataLoaderWrapperTest { - void test() { - DataLoaderWrapper dataLoaderWrapper = new DataLoaderWrapper( - DataLoaderWrapperConfig.builder().build() - ) { + + // ===================== 工具方法 ===================== + + /** + * 构建一个简单的 DataLoaderWrapper,wrapper 逻辑由 Runnable 提供 + */ + private DataLoaderWrapper buildWrapper(java.util.function.Consumer wrapperLogic) { + return new DataLoaderWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + wrapperLogic.accept(dataItem); + } + }; + } + + /** + * 构建带自定义 config 的 DataLoaderWrapper + */ + private DataLoaderWrapper buildWrapper(DataLoaderWrapperConfig config, + java.util.function.Consumer wrapperLogic) { + return new DataLoaderWrapper(config) { + @Override + protected void wrapper(DataItem dataItem) { + wrapperLogic.accept(dataItem); + } + }; + } + + /** + * 构造包含指定条数 DataItem 的 WorkflowContext + */ + private WorkflowContext buildContextWithDataItems(int size) { + WorkflowContext ctx = new WorkflowContext(); + List items = new CopyOnWriteArrayList<>(); + for (int i = 0; i < size; i++) { + Map inputItem = new HashMap<>(); + inputItem.put("id", i); + inputItem.put("value", "v" + i); + items.add(new DataItem((long) i, new InputData(inputItem))); + } + WorkflowContextOps.setDataItems(ctx, items); + return ctx; + } + + /** + * 为 DataLoaderWrapper 注入上下文并执行 + */ + private void executeWithContext(DataLoaderWrapper wrapper, WorkflowContext ctx) { + wrapper.setWorkflowContext(ctx); + try { + wrapper.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + // ===================== constructor 测试 ===================== + + @Test + @DisplayName("无参构造器应使用默认 DataLoaderWrapperConfig(threadNum=1)") + void testConstructor_defaultConfig() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + }); + assertNotNull(wrapper.config, "默认构造器应初始化 config"); + assertEquals(1, wrapper.config.getThreadNum(), "默认线程数应为 1"); + } + + @Test + @DisplayName("带 DataLoaderWrapperConfig 构造器应正确保存配置") + void testConstructor_withConfig() { + DataLoaderWrapperConfig config = DataLoaderWrapperConfig.builder().threadNum(4).build(); + DataLoaderWrapper wrapper = buildWrapper(config, dataItem -> { + }); + assertEquals(4, wrapper.config.getThreadNum()); + } + + // ===================== executeWrapper 测试 ===================== + + @Test + @DisplayName("executeWrapper 正常执行时应返回同一个 DataItem 实例") + void testExecuteWrapper_returnsSameDataItem() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + }); + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + DataItem result = wrapper.executeWrapper(dataItem); + assertSame(dataItem, result, "executeWrapper 应返回同一 DataItem 实例"); + } + + @Test + @DisplayName("executeWrapper 中 wrapper 逻辑可修改 DataItem 的 InputData 字段") + void testExecuteWrapper_wrapperModifiesDataItem() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> + dataItem.getInputData().set("modified", true)); + + Map inputItem = new HashMap<>(); + DataItem dataItem = new DataItem(0L, new InputData(inputItem)); + wrapper.executeWrapper(dataItem); + + assertEquals(true, dataItem.getInputData().get("modified"), + "wrapper 应能修改 DataItem 的 InputData 字段"); + } + + @Test + @DisplayName("executeWrapper 中 wrapper 抛出异常时应被捕获,返回原 DataItem 不抛出") + void testExecuteWrapper_wrapperThrows_returnOriginalItem() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + throw new RuntimeException("mock wrapper error"); + }); + + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + DataItem result = assertDoesNotThrow(() -> wrapper.executeWrapper(dataItem), + "wrapper 抛异常时 executeWrapper 不应向外抛出"); + assertSame(dataItem, result, "抛异常后应返回原始 DataItem"); + } + + // ===================== 钩子方法测试 ===================== + + @Test + @DisplayName("beforeWrapper 钩子在 wrapper 前被调用") + void testBeforeWrapper_called() { + AtomicBoolean beforeCalled = new AtomicBoolean(false); + AtomicBoolean wrapperCalled = new AtomicBoolean(false); + List callOrder = new ArrayList<>(); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { + @Override + protected void beforeWrapper(DataItem dataItem) { + beforeCalled.set(true); + callOrder.add("before"); + } + + @Override + protected void wrapper(DataItem dataItem) { + wrapperCalled.set(true); + callOrder.add("wrapper"); + } + }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(beforeCalled.get(), "beforeWrapper 应被调用"); + assertEquals(Arrays.asList("before", "wrapper"), callOrder, "before 应在 wrapper 之前调用"); + } + + @Test + @DisplayName("afterWrapper 钩子在 wrapper 后被调用") + void testAfterWrapper_called() { + List callOrder = new ArrayList<>(); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + callOrder.add("wrapper"); + } + + @Override + protected void afterWrapper(DataItem dataItem) { + callOrder.add("after"); + } + }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("wrapper", "after"), callOrder, "after 应在 wrapper 之后调用"); + } + + @Test + @DisplayName("wrapper 抛异常时 onWrapperError 钩子被调用,并传入正确异常") + void testOnWrapperError_called() { + AtomicBoolean errorCalled = new AtomicBoolean(false); + AtomicReference capturedError = new AtomicReference<>(); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { @Override protected void wrapper(DataItem dataItem) { - // 增强dataItem + throw new RuntimeException("test-error"); + } + + @Override + protected void onWrapperError(DataItem dataItem, Throwable e) { + errorCalled.set(true); + capturedError.set(e); } }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(errorCalled.get(), "wrapper 抛异常时 onWrapperError 应被调用"); + assertNotNull(capturedError.get()); + assertEquals("test-error", capturedError.get().getMessage()); + } + + @Test + @DisplayName("wrapper 抛异常时 afterWrapper 不被调用") + void testAfterWrapper_notCalledOnError() { + AtomicBoolean afterCalled = new AtomicBoolean(false); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + throw new RuntimeException("error"); + } + + @Override + protected void afterWrapper(DataItem dataItem) { + afterCalled.set(true); + } + }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertFalse(afterCalled.get(), "wrapper 抛异常时 afterWrapper 不应被调用"); + } + + // ===================== doExecute 测试 ===================== + + @Test + @DisplayName("doExecute 对 WorkflowContext 中的每个 DataItem 都应执行 wrapper") + void testDoExecute_wrapperCalledForEachDataItem() { + AtomicInteger wrapperCount = new AtomicInteger(0); + DataLoaderWrapper wrapper = buildWrapper(dataItem -> wrapperCount.incrementAndGet()); + + WorkflowContext ctx = buildContextWithDataItems(5); + executeWithContext(wrapper, ctx); + + assertEquals(5, wrapperCount.get(), "wrapper 应对每个 DataItem 都被调用一次"); + } + + @Test + @DisplayName("doExecute 后 DataItem 中的修改应被持久化到 WorkflowContext") + void testDoExecute_modificationsPersisted() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> + dataItem.getInputData().set("wrapped", true)); + + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(wrapper, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + for (DataItem dataItem : dataItems) { + assertEquals(true, dataItem.getInputData().get("wrapped"), + "每个 DataItem 的 InputData 都应包含 wrapper 写入的字段"); + } + } + + @Test + @DisplayName("doExecute 时部分 wrapper 抛异常,其余 DataItem 仍应正常处理") + void testDoExecute_partialFailure_othersSucceed() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + if (dataItem.getDataIndex() == 1L) { + throw new RuntimeException("mock error"); + } + dataItem.getInputData().set("done", true); + }); + + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(wrapper, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertEquals(true, dataItems.get(0).getInputData().get("done"), + "index=0 应正常完成"); + assertNull(dataItems.get(1).getInputData().get("done"), + "index=1 wrapper 失败,done 字段不应被设置"); + assertEquals(true, dataItems.get(2).getInputData().get("done"), + "index=2 应正常完成"); + } + + @Test + @DisplayName("doExecute 时 DataItem 列表为 null,不应抛出异常") + void testDoExecute_nullDataItems_noThrow() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + }); + WorkflowContext ctx = new WorkflowContext(); + // 不设置 dataItems,默认为 null + assertDoesNotThrow(() -> executeWithContext(wrapper, ctx), + "DataItems 为 null 时 doExecute 不应抛出异常"); + } + + @Test + @DisplayName("doExecute 时 DataItem 列表为空,不应抛出异常") + void testDoExecute_emptyDataItems_noThrow() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + }); + WorkflowContext ctx = new WorkflowContext(); + WorkflowContextOps.setDataItems(ctx, new CopyOnWriteArrayList<>()); + assertDoesNotThrow(() -> executeWithContext(wrapper, ctx), + "DataItems 为空时 doExecute 不应抛出异常"); + } + + @Test + @DisplayName("executeWrapper 三个钩子按 before→wrapper→after 顺序执行") + void testExecuteWrapper_hookOrder() { + List order = new ArrayList<>(); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { + @Override + protected void beforeWrapper(DataItem dataItem) { + order.add("before"); + } + + @Override + protected void wrapper(DataItem dataItem) { + order.add("wrapper"); + } + + @Override + protected void afterWrapper(DataItem dataItem) { + order.add("after"); + } + }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("before", "wrapper", "after"), order, + "钩子应按 before→wrapper→after 顺序执行"); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PolishDataLoaderWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PolishDataLoaderWrapperTest.java deleted file mode 100644 index aed09a1..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PolishDataLoaderWrapperTest.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader_wrapper; - -import com.evalkit.framework.eval.node.dataloader_wrapper.config.PolishDataLoaderWrapperConfig; - -class PolishDataLoaderWrapperTest { - void test() { - PolishDataLoaderWrapper polishDataLoaderWrapper = new PolishDataLoaderWrapper( - PolishDataLoaderWrapperConfig.builder().build() - ) { - @Override - public String selectField() { - return ""; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PromptDataLoaderWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PromptDataLoaderWrapperTest.java deleted file mode 100644 index 1a02410..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PromptDataLoaderWrapperTest.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader_wrapper; - -import com.evalkit.framework.eval.node.dataloader_wrapper.config.DataLoaderWrapperConfig; - -class PromptDataLoaderWrapperTest { - void test() { - PromptDataLoaderWrapper promptDataLoaderWrapper = new PromptDataLoaderWrapper( - DataLoaderWrapperConfig.builder().build() - ) { - @Override - public String preparePrompt() { - return ""; - } - - @Override - public String selectField() { - return ""; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/DateMockerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/DateMockerTest.java deleted file mode 100644 index 7677d88..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/DateMockerTest.java +++ /dev/null @@ -1,115 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader_wrapper.mock.mocker; - -import com.evalkit.framework.common.utils.time.DateUtils; -import com.evalkit.framework.eval.mock.mocker.DateMocker; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.RepeatedTest; -import org.junit.jupiter.api.Test; - -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Collections; -import java.util.Date; - -import static org.junit.jupiter.api.Assertions.*; - -@Slf4j -class DateMockerTest { - private final DateMocker mocker = new DateMocker(); - - @Test - void testSupportRuleName() { - assertTrue(mocker.support("date", null)); - assertTrue(mocker.support("future_date", null)); - assertTrue(mocker.support("past_date", null)); - assertFalse(mocker.support("random_string", null)); - } - - @Test - void testNowStrategyDefaultPattern() throws ParseException { - String result = mocker.mock("date", Collections.emptyList()); - assertNotNull(result); - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(result); - } - - @Test - void testNowStrategyCustomPattern() throws ParseException { - String pattern = "yyyy/MM/dd"; - String result = mocker.mock("date", Collections.singletonList(pattern)); - assertNotNull(result); - new SimpleDateFormat(pattern).parse(result); - } - - @RepeatedTest(100) - void testFutureDateWithinRange() throws ParseException { - String result = mocker.mock("future_date", Arrays.asList("15", "365")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), 14))); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), 366))); - - } - - @RepeatedTest(100) - void testPastDateWithinRange() throws ParseException { - String result = mocker.mock("past_date", Arrays.asList("15", "365")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), -14))); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), -366))); - } - - @RepeatedTest(100) - void testFutureDateWithCustomPattern() throws ParseException { - String result = mocker.mock("future_date", Arrays.asList("366", "yyyy/MM/dd")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), 366))); - } - - @RepeatedTest(100) - void testPastDateWithCustomPattern() throws ParseException { - String result = mocker.mock("past_date", Arrays.asList("365", "yyyy/MM/dd")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), -366))); - } - - @RepeatedTest(100) - void testFutureDateWithinRangeWithCustomPattern() throws ParseException { - String result = mocker.mock("future_date", Arrays.asList("15", "365", "yyyy/MM/dd")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), 14))); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), 366))); - } - - @RepeatedTest(100) - void testPastDateWithinRangeWithCustomPattern() throws ParseException { - String result = mocker.mock("past_date", Arrays.asList("15", "365", "yyyy/MM/dd")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), -14))); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), -366))); - } - - @Test - void testInvalidArgsThrowsException() { - IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, - () -> mocker.mock("future_date", Arrays.asList("abc", "xyz"))); - log.info(ex.getMessage()); - assertTrue(ex.getMessage().contains("Error parsing args")); - } - - @Test - void testUnsupportedRuleReturnsNull() { - assertNull(mocker.mock("unknown_rule", Collections.emptyList())); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/NumberMockerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/NumberMockerTest.java deleted file mode 100644 index 353865a..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/NumberMockerTest.java +++ /dev/null @@ -1,164 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader_wrapper.mock.mocker; - -import com.evalkit.framework.eval.mock.mocker.NumberMocker; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.RepeatedTest; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.Collections; - -import static org.junit.jupiter.api.Assertions.*; - -@Slf4j -class NumberMockerTest { - private final NumberMocker mocker = new NumberMocker(); - - @Test - void testSupportRuleName() { - assertTrue(mocker.support("int", null)); - assertTrue(mocker.support("INT", null)); - assertTrue(mocker.support("float", null)); - assertTrue(mocker.support("FLOAT", null)); - assertFalse(mocker.support("string", null)); - assertFalse(mocker.support("date", null)); - } - - @Test - void testIntegerDefaultRange() { - String result = mocker.mock("int", Collections.emptyList()); - assertNotNull(result); - int value = Integer.parseInt(result); - assertTrue(value >= 0 && value <= 100, "Value should be between 0 and 100"); - log.info("Random int (default range): {}", result); - } - - @RepeatedTest(50) - void testIntegerWithMinValue() { - String result = mocker.mock("int", Collections.singletonList("50")); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= 50 && value <= 100, "Value should be between 50 and 100"); - log.info("Random int (min=50): {}", result); - } - - @RepeatedTest(50) - void testIntegerWithRange() { - String result = mocker.mock("int", Arrays.asList("100", "200")); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= 100 && value <= 200, "Value should be between 100 and 200"); - log.info("Random int (100-200): {}", result); - } - - @Test - void testFloatDefaultRange() { - String result = mocker.mock("float", Collections.emptyList()); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= 0.0 && value < 100.0, "Value should be between 0.0 and 100.0"); - log.info("Random float (default range): {}", result); - } - - @RepeatedTest(50) - void testFloatWithMinValue() { - String result = mocker.mock("float", Collections.singletonList("10.5")); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= 10.5 && value < 100.0, "Value should be between 10.5 and 100.0"); - log.info("Random float (min=10.5): {}", result); - } - - @RepeatedTest(50) - void testFloatWithRange() { - String result = mocker.mock("float", Arrays.asList("5.5", "15.5")); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= 5.5 && value < 15.5, "Value should be between 5.5 and 15.5"); - log.info("Random float (5.5-15.5): {}", result); - } - - @RepeatedTest(50) - void testNegativeIntegerRange() { - String result = mocker.mock("int", Arrays.asList("-100", "-10")); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= -100 && value <= -10, "Value should be between -100 and -10"); - log.info("Random int (negative range): {}", result); - } - - @RepeatedTest(50) - void testNegativeFloatRange() { - String result = mocker.mock("float", Arrays.asList("-50.5", "-10.5")); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= -50.5 && value < -10.5, "Value should be between -50.5 and -10.5"); - log.info("Random float (negative range): {}", result); - } - - @Test - void testZeroValue() { - String result = mocker.mock("int", Arrays.asList("0", "0")); - assertNotNull(result); - long value = Long.parseLong(result); - assertEquals(0, value, "Value should be 0"); - log.info("Random int (0-0): {}", result); - } - - @Test - void testLargeIntegerValue() { - String result = mocker.mock("int", Arrays.asList("1000000", "2000000")); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= 1000000 && value <= 2000000, "Value should be in range"); - log.info("Random int (large range): {}", result); - } - - @Test - void testInvalidIntegerArgsThrowsException() { - IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, - () -> mocker.mock("int", Collections.singletonList("abc"))); - log.info(ex.getMessage()); - assertTrue(ex.getMessage().contains("Error parsing args")); - } - - @Test - void testInvalidFloatArgsThrowsException() { - IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, - () -> mocker.mock("float", Arrays.asList("10.5", "abc"))); - log.info(ex.getMessage()); - assertTrue(ex.getMessage().contains("Error parsing args")); - } - - @Test - void testTooManyArgsThrowsException() { - IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, - () -> mocker.mock("int", Arrays.asList("10", "20", "30", "40"))); - log.info(ex.getMessage()); - assertTrue(ex.getMessage().contains("Error parsing args")); - } - - @Test - void testUnsupportedRuleReturnsNull() { - assertNull(mocker.mock("unknown_rule", Collections.emptyList())); - } - - @Test - void testIntegerCaseInsensitive() { - String result = mocker.mock("INT", Collections.emptyList()); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= 0 && value <= 100, "Value should be between 0 and 100"); - log.info("Random INT (case-insensitive): {}", result); - } - - @Test - void testFloatCaseInsensitive() { - String result = mocker.mock("FLOAT", Collections.emptyList()); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= 0.0 && value < 100.0, "Value should be between 0.0 and 100.0"); - log.info("Random FLOAT (case-insensitive): {}", result); - } -} - diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java deleted file mode 100644 index d4bdf0f..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java +++ /dev/null @@ -1,76 +0,0 @@ -package com.evalkit.framework.eval.node.reporter; - -import com.evalkit.framework.eval.model.DataItem; -import org.junit.jupiter.api.Test; - -import java.util.Collections; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -class ApiReporterTest { - - /** - * 测试 ApiReporter 可以正常构建,不发起真实 HTTP 请求 - */ - @Test - void testConstructApiReporter() { - String host = "http://localhost:8080"; - String api = "/api/test"; - String method = "POST"; - ApiReporter apiReporter = new ApiReporter(host, api, method) { - @Override - public Map prepareBody(DataItem item) { - return Collections.emptyMap(); - } - - @Override - public Map prepareHeader(DataItem item) { - return Collections.emptyMap(); - } - - @Override - public Map prepareParams(DataItem item) { - return Collections.emptyMap(); - } - }; - - assertNotNull(apiReporter, "ApiReporter 实例不应为 null"); - assertNotNull(apiReporter.getRequest(), "ApiReporter 的 request 不应为 null"); - assertEquals(host, apiReporter.getRequest().getHost(), "Host 应与构建时一致"); - assertEquals(api, apiReporter.getRequest().getApi(), "API 路径应与构建时一致"); - } - - /** - * 测试 prepareBody/prepareHeader/prepareParams 可以正确返回空 Map - */ - @Test - void testPrepareMethods() { - ApiReporter apiReporter = new ApiReporter("http://localhost:8080", "/api/report", "POST") { - @Override - public Map prepareBody(DataItem item) { - return Collections.singletonMap("key", "value"); - } - - @Override - public Map prepareHeader(DataItem item) { - return Collections.singletonMap("Content-Type", "application/json"); - } - - @Override - public Map prepareParams(DataItem item) { - return Collections.emptyMap(); - } - }; - - DataItem dataItem = new DataItem(); - Map body = apiReporter.prepareBody(dataItem); - assertNotNull(body, "prepareBody 不应返回 null"); - assertEquals("value", body.get("key")); - - Map headers = apiReporter.prepareHeader(dataItem); - assertNotNull(headers, "prepareHeader 不应返回 null"); - assertEquals("application/json", headers.get("Content-Type")); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java deleted file mode 100644 index 46c3fb8..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.evalkit.framework.eval.node.reporter; - -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.*; - -/** - * JdbcReport 测试 —— 验证 SQLite 内嵌数据库可以正常构建 JdbcReport 实例 - * 注意:JdbcReport.report() 方法使用了 MySQL 专用建表 SQL(auto_increment、comment), - * 因此仅测试对象构建逻辑,不执行真实的报告写入操作 - */ -class JdbcReportTest { - - private static final String SQLITE_URL = "jdbc:sqlite:file::memory:?cache=shared&db=jdbc_report_test"; - private static final String SQLITE_DRIVER = "org.sqlite.JDBC"; - - /** - * 测试 JdbcReport 可以使用 SQLite 内嵌数据库正常构建,不依赖外部 MySQL - */ - @Test - void testConstructWithSQLite() { - JdbcReport jdbcReport = new JdbcReport(SQLITE_DRIVER, SQLITE_URL, "", "") { - @Override - public String prepareTableName() { - return "eval_result"; - } - }; - assertNotNull(jdbcReport, "JdbcReport 实例不应为 null"); - assertEquals("eval_result", jdbcReport.prepareTableName(), "表名应正确返回"); - } - - /** - * 测试 JdbcReport 可以连接并验证 SQLite 连接池正常初始化 - */ - @Test - void testConnectionPoolInitialized() { - assertDoesNotThrow(() -> { - JdbcReport jdbcReport = new JdbcReport(SQLITE_DRIVER, SQLITE_URL, "", "") { - @Override - public String prepareTableName() { - return "test_table"; - } - }; - assertNotNull(jdbcReport); - }, "使用 SQLite 构建 JdbcReport 不应抛出异常"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JsonReporterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JsonReporterTest.java deleted file mode 100644 index f8fa01f..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JsonReporterTest.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.evalkit.framework.eval.node.reporter; - -import static org.junit.jupiter.api.Assertions.*; - -class JsonReporterTest { - void test(){ - JsonReporter jsonReporter = new JsonReporter("test.json"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ReporterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ReporterTest.java deleted file mode 100644 index d0c4169..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ReporterTest.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.evalkit.framework.eval.node.reporter; - -import com.evalkit.framework.eval.model.ReportData; - -import java.io.IOException; - -class ReporterTest { - void test() { - Reporter reporter = new Reporter() { - @Override - protected void report(ReportData reportData) throws IOException { - - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/DifyWorkflowScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/DifyWorkflowScorerTest.java deleted file mode 100644 index e46d42a..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/DifyWorkflowScorerTest.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.scorer.config.DifyWorkflowScorerConfig; -import org.junit.jupiter.api.Test; - -import java.util.Collections; -import java.util.Map; - -class DifyWorkflowScorerTest { - void test() { - DifyWorkflowScorer difyWorkflowScorer = new DifyWorkflowScorer( - DifyWorkflowScorerConfig.builder().build() - ) { - @Override - public Map prepareInputParams(InputData inputData, ApiCompletionResult apiCompletionResult) { - return Collections.emptyMap(); - } - - @Override - public ScorerResult prepareScorerResult(InputData inputData, ApiCompletionResult apiCompletionResult, Map outputs) { - return null; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java deleted file mode 100644 index 3f29648..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java +++ /dev/null @@ -1,125 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.scorer.config.PromptBasedScorerConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertNotNull; - -@Slf4j -class GSBScorerTest { - LLMService llmService; - - @BeforeEach - void setUp() { - // 使用 mock LLMService 替代真实 DeepSeek,不依赖外部 token 或 HTTP 请求 - // GSBScorer.parseLLMReply 期望 LLM 返回 JSON 格式,包含 accuracy/relevance/completeness/fluency/reason 字段 - llmService = new LLMService() { - @Override - public String chat(String prompt) { - // 返回符合 GSBScorer 期望的 JSON 格式(各维度低分,表示候选回答较差) - return "{\n" + - " \"accuracy\": 2,\n" + - " \"relevance\": 2,\n" + - " \"completeness\": 2,\n" + - " \"fluency\": 3,\n" + - " \"reason\": \"候选答案与金标准存在明显差距,缺少关键信息。\"\n" + - "}"; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - @Test - void test() { - GSBScorer gsbScorer = new GSBScorer( - PromptBasedScorerConfig.builder() - .llmService(llmService) - .build() - ) { - @Override - public String prepareGoldAnswer(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "乔布斯是美国人"; - } - - @Override - public String prepareCandidateAnswer(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "乔布美国人"; - } - - @Override - public String prepareInput(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "乔布斯是非洲人"; - } - }; - DataItem dataItem = new DataItem(); - dataItem.setInputData(new InputData()); - dataItem.setApiCompletionResult(new ApiCompletionResult()); - ScorerResult scorerResult = gsbScorer.eval(dataItem); - - assertNotNull(scorerResult, "评分结果不应为 null"); - log.info("scorerResult:{}", scorerResult); - } - - @Test - void testGoodResult() { - // mock LLM 返回高分 JSON,表示候选回答比参考回答好 - // GSBScorer.parseLLMReply 期望 JSON 格式,包含 accuracy/relevance/completeness/fluency/reason - LLMService goodLLM = new LLMService() { - @Override - public String chat(String prompt) { - return "{\n" + - " \"accuracy\": 5,\n" + - " \"relevance\": 5,\n" + - " \"completeness\": 5,\n" + - " \"fluency\": 5,\n" + - " \"reason\": \"候选答案与金标准语义一致,语言自然,无遗漏。\"\n" + - "}"; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - - GSBScorer gsbScorer = new GSBScorer( - PromptBasedScorerConfig.builder() - .llmService(goodLLM) - .build() - ) { - @Override - public String prepareGoldAnswer(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "gold answer"; - } - - @Override - public String prepareCandidateAnswer(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "better candidate"; - } - - @Override - public String prepareInput(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "test input"; - } - }; - - DataItem dataItem = new DataItem(); - dataItem.setInputData(new InputData()); - dataItem.setApiCompletionResult(new ApiCompletionResult()); - ScorerResult result = gsbScorer.eval(dataItem); - - assertNotNull(result, "评分结果不应为 null"); - log.info("Good result scorerResult:{}", result); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/MultiCheckerBasedScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/MultiCheckerBasedScorerTest.java deleted file mode 100644 index 950e839..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/MultiCheckerBasedScorerTest.java +++ /dev/null @@ -1,167 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.begin.config.BeginConfig; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.eval.node.reporter.StdReporter; -import com.evalkit.framework.eval.node.scorer.checker.AbstractChecker; -import com.evalkit.framework.eval.node.scorer.checker.Checker; -import com.evalkit.framework.eval.node.scorer.checker.config.CheckerConfig; -import com.evalkit.framework.eval.node.scorer.checker.constants.CheckMethod; -import com.evalkit.framework.eval.node.scorer.checker.model.CheckItem; -import com.evalkit.framework.eval.node.scorer.strategy.AvgScoreRateStrategy; -import com.evalkit.framework.workflow.WorkflowBuilder; -import org.junit.jupiter.api.Test; - -import java.util.List; - -/** - * 多检查器评估器测试类 - */ -class MultiCheckerBasedScorerTest { - - /** - * 必过检查器 - */ - class StarChecker extends AbstractChecker { - - public StarChecker() { - } - - public StarChecker(CheckerConfig config) { - super(config); - } - - /* 必过检查项 */ - private final CheckItem starCheckItem = CheckItem.builder() - .name("starCheckItem") - .star(true) - .build(); - /* 一般检查项 */ - private final CheckItem normalCheckItem = CheckItem.builder() - .name("normalCheckItem") - .star(false) - .build(); - - @Override - protected List prepareCheckItems(DataItem dataItem) { - return ListUtils.of( - starCheckItem, normalCheckItem - ); - } - - @Override - protected void check(DataItem dataItem) { - // 模拟必过项没过,普通项通过 - starCheckItem.setScore(0); - starCheckItem.setExecuted(true); - starCheckItem.setReason("不通过"); - starCheckItem.setCheckMethod(CheckMethod.RULE); - - normalCheckItem.setScore(1); - normalCheckItem.setExecuted(true); - normalCheckItem.setReason("通过"); - normalCheckItem.setCheckMethod(CheckMethod.RULE); - } - - @Override - public boolean support(DataItem dataItem) { - return true; - } - - @Override - public double getTotalScore() { - return 2; - } - } - - /** - * 普通检查器 - */ - class NormalChecker extends AbstractChecker { - - public NormalChecker() { - } - - public NormalChecker(CheckerConfig config) { - super(config); - } - - /* 一般检查项 */ - private final CheckItem normalCheckItem = CheckItem.builder() - .name("normalCheckItem") - .star(false) - .build(); - - @Override - protected List prepareCheckItems(DataItem dataItem) { - return ListUtils.of( - normalCheckItem - ); - } - - @Override - protected void check(DataItem dataItem) { - normalCheckItem.setScore(1); - normalCheckItem.setExecuted(true); - normalCheckItem.setReason("通过"); - normalCheckItem.setCheckMethod(CheckMethod.RULE); - } - - @Override - public boolean support(DataItem dataItem) { - return true; - } - - @Override - public double getTotalScore() { - return 1; - } - } - - /** - * 自定义评估器 - */ - class CustomScorer extends MultiCheckerBasedScorer { - @Override - public List prepareCheckers(DataItem dataItem) { - return ListUtils.of( - new StarChecker( - CheckerConfig.builder().name("StarChecker").star(true).totalScore(2).build() - ), - new NormalChecker( - CheckerConfig.builder().name("NormalChecker").star(false).totalScore(2).build() - ) - ); - } - } - - @Test - void test() { - Begin begin = new Begin( - BeginConfig.builder() - .threshold(0.5) - .scoreStrategy(new AvgScoreRateStrategy()) - .build() - ); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("query", "1")) - ); - } - }; - - CustomScorer customScorer = new CustomScorer(); - - StdReporter stdReporter = new StdReporter(); - - new WorkflowBuilder().link(begin, dataLoader, customScorer, stdReporter).build().execute(); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java deleted file mode 100644 index 30d6716..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java +++ /dev/null @@ -1,102 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.scorer.config.PromptBasedScorerConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -class PromptBasedScorerTest { - - /** - * 构造一个 mock LLMService,返回符合 PromptBasedScorer.LLMResult 格式的 JSON - */ - private LLMService buildMockLLMService() { - return new LLMService() { - @Override - public String chat(String prompt) { - // 返回符合 LLMResult(包含 score 和 reason 字段)的 JSON - return "{\"score\":0.8,\"reason\":\"回复基本符合预期\"}"; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - @Test - void testConstructPromptBasedScorer() { - PromptBasedScorer promptBasedScorer = new PromptBasedScorer( - PromptBasedScorerConfig.builder() - .llmService(buildMockLLMService()) - .build() - ) { - @Override - public String prepareSysPrompt() { - return "你是一个评分助手"; - } - - @Override - public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "问题: hello\n答案: world"; - } - - @Override - public LLMResult parseLLMReply(String reply) { - // 使用 setter 方法(@Data 生成 private 字段的 getter/setter) - LLMResult result = new LLMResult(); - result.setScore(0.8); - result.setReason("mock reason"); - return result; - } - }; - - assertNotNull(promptBasedScorer, "PromptBasedScorer 实例不应为 null"); - } - - @Test - void testEvalWithMockLLM() { - PromptBasedScorer promptBasedScorer = new PromptBasedScorer( - PromptBasedScorerConfig.builder() - .llmService(buildMockLLMService()) - .metricName("相关性检查") - .totalScore(1) - .enableRetry(false) - .build() - ) { - @Override - public String prepareSysPrompt() { - return "你是一个评分助手"; - } - - @Override - public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "问题: hello\n答案: world"; - } - - @Override - public LLMResult parseLLMReply(String reply) { - LLMResult result = new LLMResult(); - result.setScore(0.8); - result.setReason("回复基本符合预期"); - return result; - } - }; - - DataItem dataItem = new DataItem(); - dataItem.setInputData(new InputData()); - dataItem.setApiCompletionResult(new ApiCompletionResult()); - - ScorerResult result = promptBasedScorer.eval(dataItem); - assertNotNull(result, "评分结果不应为 null"); - assertEquals(0.8, result.getScore(), 1e-6, "评分应为 0.8"); - assertEquals("相关性检查", result.getMetric(), "指标名应正确"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RouterScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RouterScorerTest.java deleted file mode 100644 index 3070501..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RouterScorerTest.java +++ /dev/null @@ -1,577 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.begin.config.BeginConfig; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.eval.node.reporter.StdReporter; -import com.evalkit.framework.eval.node.scorer.config.RouterScorerConfig; -import com.evalkit.framework.eval.node.scorer.config.ScorerConfig; -import com.evalkit.framework.eval.node.scorer.model.ScorerRoute; -import com.evalkit.framework.eval.node.scorer.strategy.SumScoreStrategy; -import com.evalkit.framework.workflow.WorkflowBuilder; -import com.evalkit.framework.workflow.model.WorkflowContext; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assertions.*; - -/** - * 方案B:{@link RouterScorer} 路由评估器的单元测试。 - * - *

测试覆盖: - *

    - *
  • 构造校验:routes 为空时抛出 IllegalArgumentException
  • - *
  • first-match 模式:命中第一条规则,后续规则不执行
  • - *
  • first-match 模式:无路由命中且无兜底,返回跳过结果
  • - *
  • first-match 模式:无路由命中但有兜底 Scorer,委托兜底执行
  • - *
  • match-all 模式:所有命中规则均执行,结果取平均
  • - *
  • match-all 模式:无命中时返回跳过结果
  • - *
  • {@link ScorerRoute#of} 工厂方法
  • - *
  • {@link ScorerRoute#matches} 逻辑
  • - *
  • 端到端:三场景数据集,RouterScorer 单节点完成所有场景分流
  • - *
- *

- */ -@DisplayName("方案B - RouterScorer 路由评估器") -class RouterScorerTest { - - // ───────────────────────── 辅助 Builder ───────────────────────── - - /** - * 构造一个固定返回 returnScore 的简单 Scorer(不带 condition) - */ - private Scorer fixedScorer(String metric, double returnScore, double totalScore) { - ScorerConfig cfg = ScorerConfig.builder() - .metricName(metric) - .totalScore(totalScore) - .build(); - return new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult(metric, returnScore, totalScore, metric + " 评估结果"); - } - }; - } - - /** - * 构造带 WorkflowContext 的 DataItem - */ - private DataItem buildDataItem(long index, String scene, Scorer scorer) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - WorkflowContextOps.setThreshold(ctx, 0.0); - scorer.setWorkflowContext(ctx); - - DataItem item = new DataItem(); - item.setDataIndex(index); - item.setInputData(new InputData(index, MapUtils.of("scene", scene))); - return item; - } - - // ═══════════════════════════════════════════════════════════════ - // 构造校验 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("构造校验") - class ConstructorValidationTest { - - @Test - @DisplayName("routes 为 null 时抛出 IllegalArgumentException") - void nullRoutes_throwsIllegalArgument() { - assertThatThrownBy(() -> new RouterScorer( - RouterScorerConfig.builder() - .metricName("路由评估") - .routes(null) - .build() - )).isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("routes"); - } - - @Test - @DisplayName("routes 为空列表时抛出 IllegalArgumentException") - void emptyRoutes_throwsIllegalArgument() { - assertThatThrownBy(() -> new RouterScorer( - RouterScorerConfig.builder() - .metricName("路由评估") - .routes(java.util.Collections.emptyList()) - .build() - )).isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("routes"); - } - } - - // ═══════════════════════════════════════════════════════════════ - // ScorerRoute 工具方法 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("ScorerRoute") - class ScorerRouteTest { - - @Test - @DisplayName("of() 工厂方法构造正确") - void of_buildsRouteCorrectly() { - Scorer scorer = fixedScorer("m", 1.0, 1.0); - ScorerRoute route = ScorerRoute.of(item -> true, scorer, "测试路由"); - - assertEquals("测试路由", route.getRouteName()); - assertNotNull(route.getCondition()); - assertSame(scorer, route.getScorer()); - } - - @Test - @DisplayName("matches() 条件为 true 时返回 true") - void matches_conditionTrue_returnsTrue() { - ScorerRoute route = ScorerRoute.of( - item -> "chat".equals(item.getInputData().get("scene")), - fixedScorer("m", 1.0, 1.0), - "对话场景" - ); - DataItem item = new DataItem(); - item.setInputData(new InputData(MapUtils.of("scene", "chat"))); - assertTrue(route.matches(item)); - } - - @Test - @DisplayName("matches() 条件为 false 时返回 false") - void matches_conditionFalse_returnsFalse() { - ScorerRoute route = ScorerRoute.of( - item -> "chat".equals(item.getInputData().get("scene")), - fixedScorer("m", 1.0, 1.0), - "对话场景" - ); - DataItem item = new DataItem(); - item.setInputData(new InputData(MapUtils.of("scene", "search"))); - assertFalse(route.matches(item)); - } - } - - // ═══════════════════════════════════════════════════════════════ - // first-match 模式(默认) - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("first-match 模式(默认)") - class FirstMatchModeTest { - - @Test - @DisplayName("命中第一条规则,返回该规则的 Scorer 结果") - void firstMatch_hitsFirstRoute_returnsFirstResult() throws Exception { - Scorer chatScorer = fixedScorer("对话质量", 0.8, 1.0); - Scorer searchScorer = fixedScorer("搜索相关性", 0.6, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评估") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), searchScorer, "搜索") - )) - .build()); - - DataItem chatItem = buildDataItem(1L, "chat", router); - ScorerResult result = router.eval(chatItem); - - assertEquals("对话质量", result.getMetric()); - assertEquals(0.8, result.getScore(), 1e-6); - assertEquals("对话质量 评估结果", result.getReason()); - } - - @Test - @DisplayName("命中第二条规则(第一条未命中),返回第二条规则的结果") - void firstMatch_hitsSecondRoute_returnsSecondResult() throws Exception { - Scorer chatScorer = fixedScorer("对话质量", 0.8, 1.0); - Scorer searchScorer = fixedScorer("搜索相关性", 0.6, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评估") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), searchScorer, "搜索") - )) - .build()); - - DataItem searchItem = buildDataItem(2L, "search", router); - ScorerResult result = router.eval(searchItem); - - assertEquals("搜索相关性", result.getMetric()); - assertEquals(0.6, result.getScore(), 1e-6); - } - - @Test - @DisplayName("无路由命中且无兜底,返回跳过结果(score=0, totalScore=0)") - void firstMatch_noMatchNoDefault_returnsSkipResult() throws Exception { - Scorer chatScorer = fixedScorer("对话质量", 0.8, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评估") - .routes(ListUtils.of( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话") - )) - .build()); - - DataItem unknownItem = buildDataItem(3L, "unknown", router); - ScorerResult result = router.eval(unknownItem); - - assertEquals("skipped by condition", result.getReason()); - assertEquals(0.0, result.getScore(), 1e-6); - assertEquals(0.0, result.getTotalScore(), 1e-6); - assertTrue(result.isSuccess()); - assertTrue(result.isPass()); - } - - @Test - @DisplayName("无路由命中但有兜底 Scorer,委托兜底执行") - void firstMatch_noMatchWithDefault_delegatesToDefaultScorer() throws Exception { - Scorer chatScorer = fixedScorer("对话质量", 0.8, 1.0); - Scorer fallbackScorer = fixedScorer("兜底评估", 0.3, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评估") - .routes(ListUtils.of( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话") - )) - .defaultScorer(fallbackScorer) - .build()); - - DataItem unknownItem = buildDataItem(4L, "unknown", router); - ScorerResult result = router.eval(unknownItem); - - assertEquals("兜底评估", result.getMetric()); - assertEquals(0.3, result.getScore(), 1e-6); - } - - @Test - @DisplayName("多条规则均命中时,only 第一条规则生效(first-match 语义)") - void firstMatch_multipleRoutesMatch_onlyFirstTaken() throws Exception { - Scorer scorer1 = fixedScorer("指标1", 0.9, 1.0); - Scorer scorer2 = fixedScorer("指标2", 0.5, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评估") - .routes(Arrays.asList( - ScorerRoute.of(item -> true, scorer1, "全匹配1"), // 始终命中 - ScorerRoute.of(item -> true, scorer2, "全匹配2") // 也始终命中 - )) - .matchAll(false) - .build()); - - DataItem item = buildDataItem(5L, "any", router); - ScorerResult result = router.eval(item); - - // first-match: 只取第一条 - assertEquals("指标1", result.getMetric()); - assertEquals(0.9, result.getScore(), 1e-6); - } - } - - // ═══════════════════════════════════════════════════════════════ - // match-all 模式 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("match-all 模式") - class MatchAllModeTest { - - @Test - @DisplayName("多条规则均命中,结果取所有命中 Scorer 的平均分") - void matchAll_allRouteMatch_returnsAvgScore() throws Exception { - Scorer scorer1 = fixedScorer("指标1", 0.8, 1.0); - Scorer scorer2 = fixedScorer("指标2", 0.6, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("多维评估") - .routes(Arrays.asList( - ScorerRoute.of(item -> true, scorer1, "维度1"), - ScorerRoute.of(item -> true, scorer2, "维度2") - )) - .matchAll(true) - .build()); - - DataItem item = buildDataItem(1L, "any", router); - ScorerResult result = router.eval(item); - - // 平均分 = (0.8 + 0.6) / 2 = 0.7 - assertEquals("多维评估", result.getMetric()); - assertThat(result.getScore()).isCloseTo(0.7, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("只有部分规则命中,只对命中规则求平均") - void matchAll_partialMatch_averagesMatchedOnly() throws Exception { - Scorer scorer1 = fixedScorer("指标1", 1.0, 1.0); - Scorer scorer2 = fixedScorer("指标2", 0.0, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("部分匹配") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), scorer1, "对话"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), scorer2, "搜索") - )) - .matchAll(true) - .build()); - - // scene=chat 只命中第一条规则 - DataItem chatItem = buildDataItem(1L, "chat", router); - ScorerResult result = router.eval(chatItem); - - // 只有 scorer1 命中,score = 1.0 - assertThat(result.getScore()).isCloseTo(1.0, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("match-all 无命中时返回跳过结果") - void matchAll_noMatch_returnsSkipResult() throws Exception { - Scorer scorer = fixedScorer("指标", 1.0, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("无命中") - .routes(ListUtils.of( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), scorer, "对话") - )) - .matchAll(true) - .build()); - - DataItem item = buildDataItem(1L, "unknown", router); - ScorerResult result = router.eval(item); - - assertEquals("skipped by condition", result.getReason()); - assertEquals(0.0, result.getTotalScore(), 1e-6); - } - - @Test - @DisplayName("match-all 理由拼接了所有命中路由的 metric 和 reason") - void matchAll_reasonContainsAllMatchedMetrics() throws Exception { - Scorer scorer1 = fixedScorer("指标1", 0.9, 1.0); - Scorer scorer2 = fixedScorer("指标2", 0.7, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("多维路由") - .routes(Arrays.asList( - ScorerRoute.of(item -> true, scorer1, "维度1"), - ScorerRoute.of(item -> true, scorer2, "维度2") - )) - .matchAll(true) - .build()); - - DataItem item = buildDataItem(1L, "any", router); - ScorerResult result = router.eval(item); - - assertThat(result.getReason()).contains("指标1").contains("指标2"); - } - } - - // ═══════════════════════════════════════════════════════════════ - // workflowContext 传递校验 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("workflowContext 传递") - class WorkflowContextPropagationTest { - - @Test - @DisplayName("子 Scorer 在 eval 时可访问 RouterScorer 的 workflowContext") - void subScorer_receivesWorkflowContext() throws Exception { - // 子 Scorer 通过 getWorkflowContext() 读取 threshold 做断言 - final double[] capturedThreshold = {-1}; - ScorerConfig cfg = ScorerConfig.builder().metricName("上下文校验").totalScore(1.0).build(); - Scorer contextAwareScorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - capturedThreshold[0] = WorkflowContextOps.getThreshold(getWorkflowContext()); - return new ScorerResult("上下文校验", 1.0, 1.0, "OK"); - } - }; - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评估") - .routes(ListUtils.of( - ScorerRoute.of(item -> true, contextAwareScorer, "全匹配") - )) - .build()); - - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - WorkflowContextOps.setThreshold(ctx, 0.75); // 设置特定阈值 - router.setWorkflowContext(ctx); - - DataItem item = new DataItem(); - item.setDataIndex(1L); - item.setInputData(new InputData(MapUtils.of("x", "y"))); - - router.eval(item); - - // 验证子 Scorer 拿到了正确的 threshold - assertEquals(0.75, capturedThreshold[0], 1e-6); - } - } - - // ═══════════════════════════════════════════════════════════════ - // 端到端集成测试(通过 WorkflowBuilder) - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("端到端:RouterScorer + WorkflowBuilder") - class EndToEndTest { - - /** - * 数据集包含 chat/search/rag 三种场景各一条, - * RouterScorer 单节点通过 first-match 完成分流。 - * 验证每个 DataItem 的 EvalResult 分数来自对应场景的 Scorer。 - */ - @Test - @DisplayName("三场景数据集,单个 RouterScorer 节点完成所有场景分流") - void endToEnd_threeScenes_singleRouterNode() { - Scorer chatScorer = fixedScorer("对话质量", 0.8, 1.0); - Scorer searchScorer = fixedScorer("搜索相关性", 0.7, 1.0); - Scorer ragScorer = fixedScorer("RAG准确率", 0.9, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("场景路由评估") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话场景"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), searchScorer, "搜索场景"), - ScorerRoute.of(item -> "rag".equals(item.getInputData().get("scene")), ragScorer, "RAG场景") - )) - .build()); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .threshold(0) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of( - new InputData(MapUtils.of("scene", "chat", "query", "你好")), - new InputData(MapUtils.of("scene", "search", "query", "搜索词")), - new InputData(MapUtils.of("scene", "rag", "query", "文档问题")) - ); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, router, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - List dataItems = WorkflowContextOps.getDataItems(ctx); - assertThat(dataItems).hasSize(3); - - DataItem chatItem = dataItems.stream() - .filter(d -> "chat".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - assertThat(chatItem.getEvalResult().getScore()).isCloseTo(0.8, org.assertj.core.data.Offset.offset(1e-6)); - // 验证 metric 是对话质量(由 chatScorer 的结果写入) - assertThat(chatItem.getEvalResult().getScorerResults().get(0).getMetric()).isEqualTo("对话质量"); - - DataItem searchItem = dataItems.stream() - .filter(d -> "search".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - assertThat(searchItem.getEvalResult().getScore()).isCloseTo(0.7, org.assertj.core.data.Offset.offset(1e-6)); - - DataItem ragItem = dataItems.stream() - .filter(d -> "rag".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - assertThat(ragItem.getEvalResult().getScore()).isCloseTo(0.9, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("RouterScorer + 通用 Scorer 串联:通用 Scorer 对所有 DataItem 生效,路由 Scorer 按场景分流") - void endToEnd_routerPlusUniversalScorer() { - // 通用 Scorer(无 condition) - Scorer universalScorer = fixedScorer("通用格式检查", 0.5, 1.0); - - // 路由 Scorer - Scorer chatScorer = fixedScorer("对话质量", 0.8, 1.0); - Scorer searchScorer = fixedScorer("搜索相关性", 0.6, 1.0); - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("场景路由") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), searchScorer, "搜索") - )) - .build()); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of( - new InputData(MapUtils.of("scene", "chat")), - new InputData(MapUtils.of("scene", "search")) - ); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, universalScorer, router, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - List dataItems = WorkflowContextOps.getDataItems(ctx); - - DataItem chatItem = dataItems.stream() - .filter(d -> "chat".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - // chat: universalScorer(0.5) + chatScorer(0.8) = 1.3 - assertThat(chatItem.getEvalResult().getScore()).isCloseTo(1.3, org.assertj.core.data.Offset.offset(1e-6)); - - DataItem searchItem = dataItems.stream() - .filter(d -> "search".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - // search: universalScorer(0.5) + searchScorer(0.6) = 1.1 - assertThat(searchItem.getEvalResult().getScore()).isCloseTo(1.1, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("未知场景数据使用 defaultScorer 兜底") - void endToEnd_unknownScene_defaultScorerApplied() { - Scorer fallback = fixedScorer("兜底评估", 0.1, 1.0); - Scorer chatScorer = fixedScorer("对话质量", 0.8, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("场景路由") - .routes(ListUtils.of( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话") - )) - .defaultScorer(fallback) - .build()); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of(new InputData(MapUtils.of("scene", "unknown"))); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, router, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - // 未知场景命中 defaultScorer,分数=0.1 - assertThat(item.getEvalResult().getScore()).isCloseTo(0.1, org.assertj.core.data.Offset.offset(1e-6)); - assertThat(item.getEvalResult().getScorerResults().get(0).getMetric()).isEqualTo("兜底评估"); - } - } -} - diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java index edebc9f..7bb27bf 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java @@ -35,20 +35,6 @@ import static org.junit.jupiter.api.Assertions.*; -/** - * RubricBasedScorer 单元测试 - *

- * 测试覆盖: - *

    - *
  • 配置校验(validRubricConfig)
  • - *
  • 五种合并策略(WEIGHTED_AVERAGE / SIMPLE_AVERAGE / LOGICAL_AND / STAR_GATE / COMPLETION_RATE)
  • - *
  • 二元分强制约束(BINARY scoreType)
  • - *
  • 归一化公式(minScore > 0 的区间归一化)
  • - *
  • 多次采样取均值 + 代表性采样保留
  • - *
  • extra 字段透传
  • - *
  • 采样全失败时抛异常
  • - *
- */ @Slf4j class RubricBasedScorerTest { diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerConditionTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerConditionTest.java deleted file mode 100644 index 2002aff..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerConditionTest.java +++ /dev/null @@ -1,468 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.EvalResult; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.begin.config.BeginConfig; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.eval.node.reporter.StdReporter; -import com.evalkit.framework.eval.node.scorer.config.ScorerConfig; -import com.evalkit.framework.eval.node.scorer.strategy.SumScoreStrategy; -import com.evalkit.framework.workflow.WorkflowBuilder; -import com.evalkit.framework.workflow.model.WorkflowContext; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.*; - -/** - * 方案A:ScorerConfig.condition 场景路由条件的单元测试。 - * - *

测试覆盖: - *

    - *
  • {@link Scorer#shouldEval} 条件为 null 时始终执行
  • - *
  • {@link Scorer#shouldEval} 条件命中时执行,未命中时跳过
  • - *
  • {@link Scorer#buildSkipResult} 跳过结果的各字段正确性
  • - *
  • 通过 WorkflowBuilder 的端到端集成:多 Scorer 按 scene 字段分流,互不干扰
  • - *
  • 跳过结果的 totalScore=0 不影响汇总分数
  • - *
  • skipScore 自定义值被写入跳过结果
  • - *
- *

- */ -@DisplayName("方案A - Scorer condition 场景条件过滤") -class ScorerConditionTest { - - // ───────────────────────── 辅助 Builder ───────────────────────── - - /** - * 构造一个固定返回 returnScore 的简单 Scorer,可携带 condition - */ - private Scorer buildScorer(String metric, double returnScore, double totalScore, - java.util.function.Function condition) { - ScorerConfig cfg = ScorerConfig.builder() - .metricName(metric) - .totalScore(totalScore) - .condition(condition) - .build(); - return new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult(metric, returnScore, totalScore, "正常评估结果"); - } - }; - } - - /** - * 构造一个携带 scene 字段的 DataItem,并注入 WorkflowContext - */ - private DataItem buildDataItem(long index, String scene, Scorer scorer) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - WorkflowContextOps.setThreshold(ctx, 0.0); - scorer.setWorkflowContext(ctx); - - InputData inputData = new InputData(index, MapUtils.of("scene", scene)); - DataItem item = new DataItem(); - item.setDataIndex(index); - item.setInputData(inputData); - return item; - } - - // ═══════════════════════════════════════════════════════════════ - // shouldEval 方法 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("shouldEval") - class ShouldEvalTest { - - @Test - @DisplayName("condition=null 时对任意 DataItem 均返回 true") - void condition_null_alwaysEval() { - Scorer scorer = buildScorer("m", 1.0, 1.0, null); - DataItem item = new DataItem(); - item.setDataIndex(1L); - assertTrue(scorer.shouldEval(item)); - } - - @Test - @DisplayName("condition 返回 true 时返回 true") - void condition_matches_returnsTrue() { - Scorer scorer = buildScorer("m", 1.0, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - DataItem item = buildDataItem(1L, "chat", scorer); - assertTrue(scorer.shouldEval(item)); - } - - @Test - @DisplayName("condition 返回 false 时返回 false") - void condition_notMatches_returnsFalse() { - Scorer scorer = buildScorer("m", 1.0, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - DataItem item = buildDataItem(1L, "search", scorer); - assertFalse(scorer.shouldEval(item)); - } - - @Test - @DisplayName("condition 返回 null 时视为 false(防御 NPE)") - void condition_returnsNull_treatedAsFalse() { - Scorer scorer = buildScorer("m", 1.0, 1.0, item -> null); - DataItem item = new DataItem(); - item.setDataIndex(1L); - assertFalse(scorer.shouldEval(item)); - } - } - - // ═══════════════════════════════════════════════════════════════ - // buildSkipResult 方法 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("buildSkipResult") - class BuildSkipResultTest { - - @Test - @DisplayName("跳过结果的基本字段正确") - void skipResult_basicFields() { - Scorer scorer = buildScorer("指标A", 1.0, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - DataItem item = buildDataItem(42L, "search", scorer); - - ScorerResult skipResult = scorer.buildSkipResult(item); - - assertEquals(42L, skipResult.getDataIndex()); - assertEquals("指标A", skipResult.getMetric()); - assertEquals(0.0, skipResult.getScore(), 1e-6); - // totalScore=0 确保不影响汇总分数 - assertEquals(0.0, skipResult.getTotalScore(), 1e-6); - assertEquals("skipped by condition", skipResult.getReason()); - assertTrue(skipResult.isSuccess()); - assertTrue(skipResult.isPass()); // 跳过不算失败 - } - - @Test - @DisplayName("star 字段固定为 false(跳过结果不触发一票否决)") - void skipResult_starIsFalse() { - ScorerConfig cfg = ScorerConfig.builder() - .metricName("必过指标") - .star(true) // config 中设置了 star - .condition(item -> false) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult("必过指标", 1.0, 1.0, ""); - } - }; - DataItem item = new DataItem(); - item.setDataIndex(1L); - - ScorerResult skipResult = scorer.buildSkipResult(item); - // 跳过结果的 star=false,不会触发一票否决 - assertFalse(skipResult.isStar()); - } - - @Test - @DisplayName("skipScore 自定义值被写入跳过结果") - void skipResult_customSkipScore() { - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(item -> false) - .skipScore(0.5) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult("m", 1.0, 1.0, ""); - } - }; - DataItem item = new DataItem(); - item.setDataIndex(1L); - - ScorerResult skipResult = scorer.buildSkipResult(item); - assertEquals(0.5, skipResult.getScore(), 1e-6); - } - } - - // ═══════════════════════════════════════════════════════════════ - // evalWrapper 集成 condition 过滤 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("evalWrapper 集成 condition") - class EvalWrapperWithConditionTest { - - @Test - @DisplayName("条件命中时,正常执行评估并返回评估结果") - void evalWrapper_conditionMatches_executesNormally() { - Scorer scorer = buildScorer("对话质量", 0.9, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - DataItem item = buildDataItem(1L, "chat", scorer); - - ScorerResult result = scorer.evalWrapper(item); - - assertTrue(result.isSuccess()); - assertEquals(0.9, result.getScore(), 1e-6); - assertEquals("正常评估结果", result.getReason()); - } - - @Test - @DisplayName("条件未命中时,doExecute 返回跳过结果(score=0, totalScore=0)") - void evalWrapper_conditionNotMatches_doExecuteReturnsSkipResult() { - // 注意:条件过滤在 doExecute 的调度层(shouldEval ? evalWrapper : buildSkipResult), - // 不在 evalWrapper 本身。本测试通过 Workflow 端到端验证跳过行为。 - Scorer scorer = buildScorer("对话质量", 0.9, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - // scene=search,不满足 condition(需要 chat) - return ListUtils.of(new InputData(MapUtils.of("scene", "search"))); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, scorer, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - EvalResult evalResult = item.getEvalResult(); - - // 条件未命中,跳过结果:score=0, totalScore=0 - ScorerResult skipResult = evalResult.getScorerResults().get(0); - assertTrue(skipResult.isSuccess()); - assertTrue(skipResult.isPass()); - assertEquals(0.0, skipResult.getScore(), 1e-6); - assertEquals(0.0, skipResult.getTotalScore(), 1e-6); - assertEquals("skipped by condition", skipResult.getReason()); - } - - @Test - @DisplayName("condition=null 时行为与无 condition 完全一致") - void evalWrapper_nullCondition_behavesLikeNormal() { - Scorer scorer = buildScorer("无条件", 1.0, 1.0, null); - DataItem item = buildDataItem(1L, "any_scene", scorer); - - ScorerResult result = scorer.evalWrapper(item); - - assertTrue(result.isSuccess()); - assertEquals(1.0, result.getScore(), 1e-6); - } - } - - // ═══════════════════════════════════════════════════════════════ - // 端到端集成测试:多 Scorer 按 scene 分流(通过 WorkflowBuilder) - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("端到端:多 Scorer 按 scene 场景分流") - class EndToEndMultiSceneTest { - - /** - * 数据集包含 chat/search/rag 三种场景各一条, - * 三个 Scorer 分别只处理对应场景的 DataItem, - * 验证:每个 DataItem 只被对应 Scorer 评估,跳过结果不影响最终分数。 - */ - @Test - @DisplayName("三场景数据集,各 Scorer 只处理对应场景数据") - void multiScene_eachScorerHandlesOwnScene() { - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .threshold(0) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of( - new InputData(MapUtils.of("scene", "chat", "query", "你好")), - new InputData(MapUtils.of("scene", "search", "query", "搜索词")), - new InputData(MapUtils.of("scene", "rag", "query", "文档问题")) - ); - } - }; - - // chat 评估器:只处理 scene=chat,固定得分 0.8 - Scorer chatScorer = buildScorer("对话质量", 0.8, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - - // search 评估器:只处理 scene=search,固定得分 0.7 - Scorer searchScorer = buildScorer("搜索相关性", 0.7, 1.0, - item -> "search".equals(item.getInputData().get("scene"))); - - // rag 评估器:只处理 scene=rag,固定得分 0.9 - Scorer ragScorer = buildScorer("RAG准确率", 0.9, 1.0, - item -> "rag".equals(item.getInputData().get("scene"))); - - StdReporter reporter = new StdReporter(); - - new WorkflowBuilder() - .link(begin, dataLoader, chatScorer, searchScorer, ragScorer, reporter) - .build() - .execute(); - - // 通过 WorkflowContext 获取最终结果 - WorkflowContext ctx = begin.getWorkflowContext(); - List dataItems = WorkflowContextOps.getDataItems(ctx); - assertThat(dataItems).hasSize(3); - - // 找到 chat 数据项 - DataItem chatItem = dataItems.stream() - .filter(d -> "chat".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - EvalResult chatResult = chatItem.getEvalResult(); - // chat 数据项:chatScorer 得分0.8,searchScorer/ragScorer 跳过(totalScore=0不计入) - // SumScoreStrategy 只计入 success=true 的 score,skip result score=0 + totalScore=0 - // 最终 score = 0.8 + 0 + 0 = 0.8(跳过的 totalScore=0,不影响归一化基准) - assertThat(chatResult.getScore()).isCloseTo(0.8, org.assertj.core.data.Offset.offset(1e-6)); - // 验证 chat 数据项确实包含 chatScorer 的正常评估结果 - boolean hasChatScore = chatResult.getScorerResults().stream() - .anyMatch(r -> "对话质量".equals(r.getMetric()) && r.getScore() > 0); - assertTrue(hasChatScore, "chat 数据项应包含对话质量评估结果"); - - // 找到 search 数据项 - DataItem searchItem = dataItems.stream() - .filter(d -> "search".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - EvalResult searchResult = searchItem.getEvalResult(); - assertThat(searchResult.getScore()).isCloseTo(0.7, org.assertj.core.data.Offset.offset(1e-6)); - - // 找到 rag 数据项 - DataItem ragItem = dataItems.stream() - .filter(d -> "rag".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - EvalResult ragResult = ragItem.getEvalResult(); - assertThat(ragResult.getScore()).isCloseTo(0.9, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("同一数据项被多个 Scorer 评估时(无 condition),分数正常累加") - void noCondition_allScorersEvaluateAllItems() { - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of(new InputData(MapUtils.of("query", "测试"))); - } - }; - - // 两个无 condition 的 Scorer,分别得 0.6 和 0.4 - Scorer scorer1 = buildScorer("指标1", 0.6, 1.0, null); - Scorer scorer2 = buildScorer("指标2", 0.4, 1.0, null); - StdReporter reporter = new StdReporter(); - - new WorkflowBuilder() - .link(begin, dataLoader, scorer1, scorer2, reporter) - .build() - .execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - // SumScoreStrategy: 0.6 + 0.4 = 1.0 - assertThat(item.getEvalResult().getScore()).isCloseTo(1.0, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("所有 Scorer 均未命中(全部跳过),最终分数为 0") - void allScorersSkip_finalScoreIsZero() { - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of(new InputData(MapUtils.of("scene", "unknown"))); - } - }; - - Scorer chatScorer = buildScorer("对话质量", 0.8, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - Scorer searchScorer = buildScorer("搜索相关性", 0.7, 1.0, - item -> "search".equals(item.getInputData().get("scene"))); - StdReporter reporter = new StdReporter(); - - new WorkflowBuilder() - .link(begin, dataLoader, chatScorer, searchScorer, reporter) - .build() - .execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - // 两个 Scorer 都跳过,score=0+0=0 - assertThat(item.getEvalResult().getScore()).isCloseTo(0.0, org.assertj.core.data.Offset.offset(1e-6)); - } - } - - // ═══════════════════════════════════════════════════════════════ - // AvgScoreRateStrategy 下的跳过验证(验证 totalScore=0 不影响均值) - // ═══════════════════════════════════════════════════════════════ - - @Test - @DisplayName("跳过结果(totalScore=0)不影响整体得分(通过 Workflow 端到端验证)") - void skipResult_doesNotInfluenceFinalScore() { - // chat 场景:chatScorer 正常评 1.0,searchScorer 跳过(doExecute 层返回 totalScore=0) - // 验证最终 EvalResult.score 只包含正常评估的分数 - Scorer chatScorer = buildScorer("对话质量", 1.0, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - Scorer searchScorer = buildScorer("搜索相关性", 0.5, 1.0, - item -> "search".equals(item.getInputData().get("scene"))); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - // 只有 chat 场景的一条数据 - return ListUtils.of(new InputData(MapUtils.of("scene", "chat"))); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, chatScorer, searchScorer, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - List scorerResults = item.getEvalResult().getScorerResults(); - assertThat(scorerResults).hasSize(2); - - // chatScorer 正常评估,score=1.0,totalScore=1.0 - ScorerResult chatResult = scorerResults.stream() - .filter(r -> "对话质量".equals(r.getMetric()) && !"skipped by condition".equals(r.getReason())) - .findFirst().orElseThrow(RuntimeException::new); - assertEquals(1.0, chatResult.getScore(), 1e-6); - assertEquals(1.0, chatResult.getTotalScore(), 1e-6); - - // searchScorer 跳过,score=0.0,totalScore=0.0(不计入汇总基准) - ScorerResult skipResult = scorerResults.stream() - .filter(r -> "skipped by condition".equals(r.getReason())) - .findFirst().orElseThrow(RuntimeException::new); - assertEquals(0.0, skipResult.getScore(), 1e-6); - assertEquals(0.0, skipResult.getTotalScore(), 1e-6); - assertTrue(skipResult.isSuccess()); - assertTrue(skipResult.isPass()); - - // SumScoreStrategy 最终分数 = 1.0(skip 的 score=0 不影响) - assertThat(item.getEvalResult().getScore()).isCloseTo(1.0, org.assertj.core.data.Offset.offset(1e-6)); - } -} - diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerTest.java deleted file mode 100644 index a00651f..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerTest.java +++ /dev/null @@ -1,402 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.scorer.config.ScorerConfig; -import com.evalkit.framework.eval.node.scorer.strategy.AvgScoreRateStrategy; -import com.evalkit.framework.eval.node.scorer.strategy.SumScoreStrategy; -import com.evalkit.framework.workflow.model.WorkflowContext; -import org.junit.jupiter.api.Test; - -import java.util.HashMap; -import java.util.Map; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assertions.*; - -class ScorerTest { - - /** - * 构造一个最简单的具体 Scorer,始终返回指定分数 - */ - private Scorer buildScorer(String metric, double totalScore, double threshold, boolean star, double returnScore) { - ScorerConfig cfg = ScorerConfig.builder() - .metricName(metric) - .totalScore(totalScore) - .threshold(threshold) - .star(star) - .build(); - return new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult(metric, returnScore, totalScore, "理由"); - } - }; - } - - /** - * 构造一个始终抛异常的 Scorer - */ - private Scorer buildThrowingScorer(String metric) { - ScorerConfig cfg = ScorerConfig.builder().metricName(metric).build(); - return new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) throws Exception { - throw new RuntimeException("故意抛出的异常"); - } - }; - } - - /** - * 构建带上下文的 DataItem - */ - private DataItem buildDataItem(long dataIndex, Scorer scorer, SumScoreStrategy strategy) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, strategy); - WorkflowContextOps.setThreshold(ctx, 0.5); - scorer.setWorkflowContext(ctx); - - DataItem dataItem = new DataItem(); - dataItem.setDataIndex(dataIndex); - Map input = new HashMap<>(); - input.put("query", "测试查询"); - dataItem.setInputData(new InputData(dataIndex, input)); - ApiCompletionResult result = new ApiCompletionResult(); - result.setSuccess(true); - Map res = new HashMap<>(); - res.put("response", "测试回复"); - result.setResultItem(res); - dataItem.setApiCompletionResult(result); - return dataItem; - } - - // ─────────────────────────── calcScoreRate ─────────────────────────── - - @Test - void calcScoreRate_normalCase() { - double rate = Scorer.calcScoreRate(0.8, 1.0); - assertEquals(0.8, rate, 1e-6); - } - - @Test - void calcScoreRate_totalScoreIsZero_returnsZero() { - double rate = Scorer.calcScoreRate(0.5, 0.0); - assertEquals(0.0, rate, 1e-6); - } - - @Test - void calcScoreRate_fullScore() { - double rate = Scorer.calcScoreRate(3.0, 3.0); - assertEquals(1.0, rate, 1e-6); - } - - @Test - void calcScoreRate_zeroScore() { - double rate = Scorer.calcScoreRate(0.0, 5.0); - assertEquals(0.0, rate, 1e-6); - } - - // ─────────────────────────── validConfig ───────────────────────────── - - @Test - void validConfig_nullConfig_throwsIllegalArgument() { - assertThatThrownBy(() -> buildScorer(null, 1, 0, false, 1)) - .isInstanceOf(IllegalArgumentException.class); - } - - @Test - void validConfig_negativeThreshold_throwsIllegalArgument() { - assertThatThrownBy(() -> { - ScorerConfig cfg = ScorerConfig.builder().metricName("m").threshold(-0.1).build(); - new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return null; - } - }; - }).isInstanceOf(IllegalArgumentException.class); - } - - @Test - void validConfig_zeroThreadNum_throwsIllegalArgument() { - assertThatThrownBy(() -> { - ScorerConfig cfg = ScorerConfig.builder().metricName("m").threadNum(0).build(); - new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return null; - } - }; - }).isInstanceOf(IllegalArgumentException.class); - } - - // ─────────────────────────── buildErrorResult ──────────────────────── - - @Test - void buildErrorResult_returnsFailedResult() { - Scorer scorer = buildScorer("m", 1.0, 0, false, 1); - DataItem item = new DataItem(); - item.setDataIndex(42L); - RuntimeException ex = new RuntimeException("test error"); - - ScorerResult result = scorer.buildErrorResult(item, ex); - - assertFalse(result.isSuccess()); - assertFalse(result.isPass()); - assertEquals(0, result.getScore(), 1e-6); - assertEquals(42L, result.getDataIndex()); - assertTrue(result.getReason().contains("test error")); - } - - // ─────────────────────────── evalWrapper ───────────────────────────── - - @Test - void evalWrapper_normalEval_returnsCorrectResult() { - Scorer scorer = buildScorer("准确率", 1.0, 0.5, false, 1.0); - DataItem item = buildDataItem(1L, scorer, new SumScoreStrategy()); - - ScorerResult result = scorer.evalWrapper(item); - - assertTrue(result.isSuccess()); - assertEquals(1.0, result.getScore(), 1e-6); - assertEquals(1.0, result.getScoreRate(), 1e-6); - assertEquals("准确率", result.getMetric()); - } - - @Test - void evalWrapper_exceptionInEval_returnsErrorResult() { - Scorer scorer = buildThrowingScorer("异常评估器"); - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - scorer.setWorkflowContext(ctx); - DataItem item = new DataItem(); - item.setDataIndex(99L); - item.setInputData(new InputData(99L, new HashMap<>())); - - ScorerResult result = scorer.evalWrapper(item); - - assertFalse(result.isSuccess()); - assertEquals(0, result.getScore(), 1e-6); - assertTrue(result.getReason().contains("故意抛出的异常")); - } - - // ─────────────────────────── decidePass (via evalWrapper) ───────────── - - @Test - void decidePass_scoreValueStrategy_pass() { - // SumScoreStrategy is ScoreValueStrategy, threshold=0.5, score=1.0 → pass - Scorer scorer = buildScorer("m", 1.0, 0.5, false, 1.0); - DataItem item = buildDataItem(1L, scorer, new SumScoreStrategy()); - - ScorerResult result = scorer.evalWrapper(item); - assertTrue(result.isPass()); - } - - @Test - void decidePass_scoreValueStrategy_fail() { - // threshold=0.9, score=0.5 → fail - Scorer scorer = buildScorer("m", 1.0, 0.9, false, 0.5); - DataItem item = buildDataItem(2L, scorer, new SumScoreStrategy()); - - ScorerResult result = scorer.evalWrapper(item); - assertFalse(result.isPass()); - } - - @Test - void decidePass_scoreRateStrategy_pass() { - // AvgScoreRateStrategy is ScoreRateStrategy, threshold=0.5, score=0.8/1.0=0.8 → pass - Scorer scorer = buildScorer("m", 1.0, 0.5, false, 0.8); - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new AvgScoreRateStrategy()); - scorer.setWorkflowContext(ctx); - DataItem item = new DataItem(); - item.setDataIndex(3L); - item.setInputData(new InputData(3L, new HashMap<>())); - - ScorerResult result = scorer.evalWrapper(item); - assertTrue(result.isPass()); - } - - // ─────────────────────────── star field propagation ─────────────────── - - @Test - void evalWrapper_starFlag_propagatedToResult() { - Scorer scorer = buildScorer("必过项", 1.0, 0.5, true, 1.0); - DataItem item = buildDataItem(10L, scorer, new SumScoreStrategy()); - - ScorerResult result = scorer.evalWrapper(item); - assertTrue(result.isStar()); - } - - // ─────────────────────────── dynamicTotalScore ─────────────────────── - - @Test - void evalWrapper_dynamicTotalScore_usesResultTotalScore() { - ScorerConfig cfg = ScorerConfig.builder() - .metricName("动态总分") - .totalScore(1.0) // 配置总分1 - .dynamicTotalScore(true) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - // 返回评估结果中的 totalScore=5,分数=4 - return new ScorerResult("动态总分", 4.0, 5.0, "理由"); - } - }; - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - scorer.setWorkflowContext(ctx); - DataItem item = new DataItem(); - item.setDataIndex(5L); - item.setInputData(new InputData(5L, new HashMap<>())); - - ScorerResult result = scorer.evalWrapper(item); - - // totalScore 来自评估结果中的 5, scoreRate=4/5=0.8 - assertThat(result.getTotalScore()).isCloseTo(5.0, org.assertj.core.data.Offset.offset(1e-6)); - assertThat(result.getScoreRate()).isCloseTo(0.8, org.assertj.core.data.Offset.offset(1e-6)); - } - - // ─────────────────────────── shouldEval(条件跳过)──────────────────── - - @Test - void shouldEval_nullCondition_alwaysTrue() { - // condition=null 时,shouldEval 始终返回 true(向前兼容,不过滤任何数据项) - Scorer scorer = buildScorer("m", 1.0, 0, false, 1.0); - DataItem item = new DataItem(); - item.setDataIndex(1L); - assertTrue(scorer.shouldEval(item)); - } - - @Test - void shouldEval_conditionMatches_returnsTrue() { - // condition 命中时返回 true,本 Scorer 正常执行 - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(i -> "chat".equals(i.getInputData().get("scene"))) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - DataItem item = new DataItem(); - item.setInputData(new InputData(MapUtils.of("scene", "chat"))); - assertTrue(scorer.shouldEval(item)); - } - - @Test - void shouldEval_conditionNotMatches_returnsFalse() { - // condition 未命中时返回 false,doExecute 层将调用 buildSkipResult 跳过 - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(i -> "chat".equals(i.getInputData().get("scene"))) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - DataItem item = new DataItem(); - item.setInputData(new InputData(MapUtils.of("scene", "search"))); - assertFalse(scorer.shouldEval(item)); - } - - @Test - void shouldEval_conditionReturnsNull_treatedAsFalse() { - // condition 返回 null 时视为 false,防止 NPE - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(i -> null) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - assertFalse(scorer.shouldEval(new DataItem())); - } - - // ─────────────────────────── buildSkipResult(跳过结果)────────────── - - @Test - void buildSkipResult_fieldsCorrect() { - // 跳过结果的各字段语义:success=true、pass=true(不拉低通过率), - // totalScore=0(不影响汇总基准),reason 固定为 "skipped by condition" - Scorer scorer = buildScorer("指标A", 1.0, 0.5, false, 1.0); - DataItem item = new DataItem(); - item.setDataIndex(42L); - - ScorerResult skip = scorer.buildSkipResult(item); - - assertEquals(42L, skip.getDataIndex()); - assertEquals("指标A", skip.getMetric()); - assertEquals(0.0, skip.getScore(), 1e-6); - assertEquals(0.0, skip.getTotalScore(), 1e-6); - assertEquals("skipped by condition", skip.getReason()); - assertTrue(skip.isSuccess()); - assertTrue(skip.isPass()); // 跳过不算失败 - } - - @Test - void buildSkipResult_starIsFalse_noVeto() { - // 即使 config 中 star=true,跳过结果的 star 必须为 false, - // 防止跳过的数据项触发一票否决逻辑 - ScorerConfig cfg = ScorerConfig.builder() - .metricName("必过项") - .star(true) - .condition(i -> false) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - DataItem item = new DataItem(); - item.setDataIndex(1L); - - assertFalse(scorer.buildSkipResult(item).isStar()); - } - - @Test - void buildSkipResult_customSkipScore_writtenToResult() { - // skipScore 配置的自定义值应写入跳过结果的 score 字段 - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(i -> false) - .skipScore(0.5) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - DataItem item = new DataItem(); - item.setDataIndex(1L); - - assertEquals(0.5, scorer.buildSkipResult(item).getScore(), 1e-6); - } - - @Test - void buildSkipResult_scorerTypePreserved() { - // 跳过结果应携带 scorerType,便于报告层区分来源 - Scorer scorer = buildScorer("m", 1.0, 0, false, 1.0); - DataItem item = new DataItem(); - item.setDataIndex(1L); - - ScorerResult skip = scorer.buildSkipResult(item); - assertNotNull(skip.getScorerType()); - assertFalse(skip.getScorerType().isEmpty()); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java deleted file mode 100644 index c84815c..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java +++ /dev/null @@ -1,74 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.scorer.config.PromptBasedScorerConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -class SecurityScorerTest { - - /** - * 构造一个 mock LLMService,返回安全评分 JSON 格式(符合 SecurityScorer 的期望) - */ - private LLMService buildMockLLMService() { - return new LLMService() { - @Override - public String chat(String prompt) { - // 返回符合 SecurityScorer parseLLMReply 期望的 JSON 格式 - return "{\"score\":1,\"reason\":\"内容安全,无违规信息\"}"; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - @Test - void testConstructSecurityScorer() { - SecurityScorer securityScorer = new SecurityScorer( - PromptBasedScorerConfig.builder() - .llmService(buildMockLLMService()) - .build() - ) { - @Override - public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "测试文本:你好,今天天气真好!"; - } - }; - - assertNotNull(securityScorer, "SecurityScorer 实例不应为 null"); - } - - @Test - void testEvalWithMockLLM() { - SecurityScorer securityScorer = new SecurityScorer( - PromptBasedScorerConfig.builder() - .llmService(buildMockLLMService()) - .metricName("安全检查") - .totalScore(1) - .enableRetry(false) - .build() - ) { - @Override - public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "测试文本:你好,今天天气真好!"; - } - }; - - DataItem dataItem = new DataItem(); - dataItem.setInputData(new InputData()); - dataItem.setApiCompletionResult(new ApiCompletionResult()); - - ScorerResult result = securityScorer.eval(dataItem); - assertNotNull(result, "评分结果不应为 null"); - assertEquals(1.0, result.getScore(), 1e-6, "安全内容应得满分"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/VectorSimilarityScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/VectorSimilarityScorerTest.java deleted file mode 100644 index b57e35a..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/VectorSimilarityScorerTest.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.node.scorer.config.VectorSimilarityScorerConfig; -import org.apache.commons.lang3.tuple.Pair; - -class VectorSimilarityScorerTest { - void test() { - VectorSimilarityScorer vectorSimilarityScorer = new VectorSimilarityScorer( - VectorSimilarityScorerConfig.builder().similarityThreshold(0.8).build() - ) { - @Override - public Pair prepareFieldPair(DataItem dataItem) { - return null; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/AbstractCheckerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/AbstractCheckerTest.java deleted file mode 100644 index abce9de..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/AbstractCheckerTest.java +++ /dev/null @@ -1,187 +0,0 @@ -package com.evalkit.framework.eval.node.scorer.checker; - -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.scorer.checker.config.CheckerConfig; -import com.evalkit.framework.eval.node.scorer.checker.model.CheckItem; -import com.evalkit.framework.eval.node.scorer.checker.strategy.checkitem.SumCheckItemScoreMergeStrategy; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assertions.*; - -class AbstractCheckerTest { - - // ─────────────── 辅助方法 ──────────────────────────────────────── - - private DataItem buildDataItem(long idx) { - DataItem item = new DataItem(); - item.setDataIndex(idx); - item.setInputData(new InputData(idx, new HashMap<>())); - return item; - } - - /** 构建一个固定检查项分数的简单 Checker */ - private AbstractChecker buildChecker(boolean support, double totalScore, - boolean star, List checkItems) { - CheckerConfig cfg = CheckerConfig.builder() - .name("测试检查器") - .totalScore(totalScore) - .star(star) - .strategy(new SumCheckItemScoreMergeStrategy()) - .build(); - return new AbstractChecker(cfg) { - @Override - public boolean support(DataItem dataItem) { - return support; - } - - @Override - public double getTotalScore() { - return totalScore; - } - - @Override - protected List prepareCheckItems(DataItem dataItem) { - return checkItems; - } - - @Override - protected void check(DataItem dataItem) { - // 简单赋分 - for (CheckItem ci : checkItems) { - ci.setExecuted(true); - } - } - }; - } - - // ─────────────────── checkWrapper: support=false 时跳过 ────────── - - @Test - void checkWrapper_notSupport_skips() { - CheckItem ci = CheckItem.builder().name("项A").build(); - // 初始分 0 - AbstractChecker checker = buildChecker(false, 1.0, false, Arrays.asList(ci)); - DataItem item = buildDataItem(1L); - checker.checkWrapper(item); - // 因为 support=false,check() 没有执行,checkItems 为默认值(builder 里的 empty list) - // 只验证不抛异常 - assertEquals(0.0, checker.getScore(), 1e-6); - } - - // ─────────────────── checkWrapper: 正常流程 ────────────────────── - - @Test - void checkWrapper_normalFlow_checkItemsSetAndMerged() { - CheckItem ci = CheckItem.builder().name("语言检查").totalScore(1.0).build(); - AbstractChecker checker = buildChecker(true, 1.0, false, Arrays.asList(ci)); - // 在 check 时手动设置分数 - DataItem item = buildDataItem(2L); - checker.checkWrapper(item); - // check 里只标记 executed,不设置分数,score 仍 0 - assertTrue(checker.getConfig().getCheckItems().get(0).isExecuted()); - } - - // ─────────────────── getScore / getReason ──────────────────────── - - @Test - void getScore_sumStrategy() { - CheckItem ci1 = CheckItem.builder().name("A").totalScore(1.0).build(); - CheckItem ci2 = CheckItem.builder().name("B").totalScore(1.0).build(); - ci1.setScore(0.8); - ci2.setScore(0.6); - - CheckerConfig cfg = CheckerConfig.builder() - .name("checker") - .totalScore(2.0) - .strategy(new SumCheckItemScoreMergeStrategy()) - .checkItems(Arrays.asList(ci1, ci2)) - .build(); - - AbstractChecker checker = new AbstractChecker(cfg) { - @Override - public boolean support(DataItem d) { return true; } - @Override - public double getTotalScore() { return 2.0; } - @Override - protected List prepareCheckItems(DataItem d) { return cfg.getCheckItems(); } - @Override - protected void check(DataItem d) {} - }; - - assertEquals(0.8 + 0.6, checker.getScore(), 1e-6); - } - - @Test - void getReason_returnsZeroScoreItemReasons() { - CheckItem pass = CheckItem.builder().name("通过项").build(); - CheckItem fail = CheckItem.builder().name("不通过项").build(); - pass.setScore(1.0); - pass.setReason("通过"); - fail.setScore(0.0); - fail.setReason("内容不符合要求"); - - CheckerConfig cfg = CheckerConfig.builder() - .name("checker") - .strategy(new SumCheckItemScoreMergeStrategy()) - .checkItems(Arrays.asList(pass, fail)) - .build(); - - AbstractChecker checker = new AbstractChecker(cfg) { - @Override - public boolean support(DataItem d) { return true; } - @Override - public double getTotalScore() { return 2.0; } - @Override - protected List prepareCheckItems(DataItem d) { return cfg.getCheckItems(); } - @Override - protected void check(DataItem d) {} - }; - - String reason = checker.getReason(); - assertTrue(reason.contains("内容不符合要求")); - assertFalse(reason.contains("通过")); - } - - // ─────────────────── star 标志 ─────────────────────────────────── - - @Test - void isStar_reflectsConfig() { - CheckItem ci = CheckItem.builder().name("x").build(); - AbstractChecker starChecker = buildChecker(true, 1.0, true, Arrays.asList(ci)); - AbstractChecker normalChecker = buildChecker(true, 1.0, false, Arrays.asList(ci)); - - assertTrue(starChecker.isStar()); - assertFalse(normalChecker.isStar()); - } - - // ─────────────────── checkWrapper: 异常传播 ────────────────────── - - @Test - void checkWrapper_exceptionPropagates() { - CheckItem ci = CheckItem.builder().name("x").build(); - CheckerConfig cfg = CheckerConfig.builder() - .name("错误检查器") - .strategy(new SumCheckItemScoreMergeStrategy()) - .build(); - AbstractChecker checker = new AbstractChecker(cfg) { - @Override - public boolean support(DataItem d) { return true; } - @Override - public double getTotalScore() { return 1.0; } - @Override - protected List prepareCheckItems(DataItem d) { return Arrays.asList(ci); } - @Override - protected void check(DataItem d) { throw new RuntimeException("check error"); } - }; - - assertThatThrownBy(() -> checker.checkWrapper(buildDataItem(1L))) - .isInstanceOf(RuntimeException.class) - .hasMessageContaining("check error"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java deleted file mode 100644 index 5bd8372..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.evalkit.framework.eval.node.scorer.checker; - -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.node.scorer.checker.config.LLMBasedCheckerConfig; -import com.evalkit.framework.eval.node.scorer.checker.model.CheckItem; -import com.evalkit.framework.infra.service.llm.LLMService; -import org.junit.jupiter.api.Test; - -import java.util.Collections; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertNotNull; - -class LLMBasedCheckerTest { - - /** - * 构造一个 mock LLMService,不依赖外部服务 - */ - private LLMService buildMockLLMService() { - return new LLMService() { - @Override - public String chat(String prompt) { - return "mock reply"; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - @Test - void testConstructLLMBasedChecker() { - LLMBasedChecker checker = new LLMBasedChecker( - LLMBasedCheckerConfig.builder() - .llmService(buildMockLLMService()) - .build() - ) { - @Override - protected List prepareCheckItems(DataItem dataItem) { - return Collections.emptyList(); - } - - @Override - protected String prepareUserPrompt(DataItem dataItem, int round) { - return ""; - } - - @Override - protected boolean needCheck(DataItem dataItem, int round) { - return false; - } - - @Override - public boolean support(DataItem dataItem) { - return false; - } - - @Override - public double getTotalScore() { - return 0; - } - }; - - assertNotNull(checker, "LLMBasedChecker 实例不应为 null"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/model/CheckItemTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/model/CheckItemTest.java deleted file mode 100644 index 62ebc59..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/model/CheckItemTest.java +++ /dev/null @@ -1,116 +0,0 @@ -package com.evalkit.framework.eval.node.scorer.checker.model; - -import com.evalkit.framework.eval.node.scorer.checker.constants.CheckMethod; -import org.junit.jupiter.api.Test; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assertions.*; - -class CheckItemTest { - - // ─────────────────────────── 默认值验证 ──────────────────────────── - - @Test - void defaultValues_areCorrect() { - CheckItem item = CheckItem.builder().name("检查项").build(); - assertEquals("检查项", item.getName()); - assertEquals(1.0, item.getTotalScore(), 1e-6); - assertEquals(1.0, item.getWeight(), 1e-6); - assertFalse(item.isStar()); - assertTrue(item.isSupport()); - assertEquals(0.0, item.getDefaultScore(), 1e-6); - assertFalse(item.isExecuted()); - assertEquals(CheckMethod.NONE, item.getCheckMethod()); - } - - // ─────────────────────────── 参数校验 ───────────────────────────── - - @Test - void build_blankName_throwsIllegalArgument() { - assertThatThrownBy(() -> CheckItem.builder().name("").build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("不能为空"); - } - - @Test - void build_negativeTotalScore_throwsIllegalArgument() { - assertThatThrownBy(() -> CheckItem.builder().name("x").totalScore(-1).build()) - .isInstanceOf(IllegalArgumentException.class); - } - - @Test - void build_negativeWeight_throwsIllegalArgument() { - assertThatThrownBy(() -> CheckItem.builder().name("x").weight(-0.1).build()) - .isInstanceOf(IllegalArgumentException.class); - } - - @Test - void build_negativeDefaultScore_throwsIllegalArgument() { - assertThatThrownBy(() -> CheckItem.builder().name("x").defaultScore(-1).build()) - .isInstanceOf(IllegalArgumentException.class); - } - - // ─────────────────────────── getWeightScore ─────────────────────── - - @Test - void getWeightScore_normalCase() { - CheckItem item = CheckItem.builder().name("x").weight(2.0).build(); - item.setScore(0.8); - assertEquals(1.6, item.getWeightScore(), 1e-6); - } - - @Test - void getWeightScore_zeroScore() { - CheckItem item = CheckItem.builder().name("x").weight(3.0).build(); - item.setScore(0.0); - assertEquals(0.0, item.getWeightScore(), 1e-6); - } - - // ─────────────────────────── support=false 时初始分数取 defaultScore ─ - - @Test - void support_false_scoreEqualsDefaultScore() { - CheckItem item = CheckItem.builder() - .name("x") - .support(false) - .defaultScore(0.5) - .build(); - assertFalse(item.isSupport()); - assertEquals(0.5, item.getScore(), 1e-6); - } - - // ─────────────────────────── star 标志 ──────────────────────────── - - @Test - void star_flag_isSetCorrectly() { - CheckItem item = CheckItem.builder().name("必过项").star(true).build(); - assertTrue(item.isStar()); - } - - // ─────────────────────────── setter/getter ───────────────────────── - - @Test - void setters_workCorrectly() { - CheckItem item = CheckItem.builder().name("item").build(); - item.setScore(0.9); - item.setReason("测试理由"); - item.setExecuted(true); - item.setCheckMethod(CheckMethod.LLM); - - assertEquals(0.9, item.getScore(), 1e-6); - assertEquals("测试理由", item.getReason()); - assertTrue(item.isExecuted()); - assertEquals(CheckMethod.LLM, item.getCheckMethod()); - } - - // ─────────────────────────── checkDescription ───────────────────── - - @Test - void checkDescription_isSetAndRetrieved() { - CheckItem item = CheckItem.builder() - .name("x") - .checkDescription("这是检查描述") - .build(); - assertEquals("这是检查描述", item.getCheckDescription()); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/strategy/ScoreStrategyTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/strategy/ScoreStrategyTest.java deleted file mode 100644 index 5b9fb04..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/strategy/ScoreStrategyTest.java +++ /dev/null @@ -1,247 +0,0 @@ -package com.evalkit.framework.eval.node.scorer.strategy; - -import com.evalkit.framework.eval.model.ScorerResult; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * 各评估分数策略单元测试 - *

- * 覆盖: SumScoreStrategy / AvgScoreStrategy / MinScoreStrategy - * AvgScoreRateStrategy / MaxScoreRateStrategy / MinScoreRateStrategy / SumScoreRateStrategy - */ -class ScoreStrategyTest { - - // ─────────────── 辅助方法 ──────────────────────────────────────── - - private ScorerResult r(double score, double scoreRate) { - return ScorerResult.builder() - .metric("m") - .score(score) - .scoreRate(scoreRate) - .success(true) - .build(); - } - - // ═══════════════════════════════════════════════════════════════ - // SumScoreStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void sumScore_normalCase() { - SumScoreStrategy s = new SumScoreStrategy(); - List rs = Arrays.asList(r(0.8, 0.8), r(0.6, 0.6)); - assertEquals(1.4, s.calScore(rs), 1e-6); - } - - @Test - void sumScore_emptyList_returnsZero() { - SumScoreStrategy s = new SumScoreStrategy(); - assertEquals(0.0, s.calScore(Collections.emptyList()), 1e-6); - } - - @Test - void sumScore_skipsFailedResults() { - // SumScoreStrategy: 仅对 success=true 的结果求和 - ScorerResult failed = ScorerResult.builder().metric("f").score(0.9).success(false).build(); - ScorerResult passed = ScorerResult.builder().metric("p").score(1.0).success(true).build(); - SumScoreStrategy s = new SumScoreStrategy(); - // failed 不被计入(isSuccess=false 时不加) - assertEquals(1.0, s.calScore(Arrays.asList(failed, passed)), 1e-6); - } - - @Test - void sumScore_strategyName() { - assertEquals("分数求和策略", new SumScoreStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // AvgScoreStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void avgScore_normalCase() { - AvgScoreStrategy s = new AvgScoreStrategy(); - List rs = Arrays.asList(r(0.8, 0.8), r(0.6, 0.6)); - assertEquals(0.7, s.calScore(rs), 1e-6); - } - - @Test - void avgScore_emptyList_returnsZero() { - assertEquals(0.0, new AvgScoreStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void avgScore_singleElement() { - assertEquals(0.9, new AvgScoreStrategy().calScore(Collections.singletonList(r(0.9, 0.9))), 1e-6); - } - - @Test - void avgScore_skipsNegativeScore() { - // score=-1 的结果被跳过 - AvgScoreStrategy s = new AvgScoreStrategy(); - List rs = Arrays.asList(r(1.0, 1.0), r(-1.0, 0.0)); - // 只有 score=1.0 有效 → 平均 = 1.0/1 = 1.0 - assertEquals(1.0, s.calScore(rs), 1e-6); - } - - @Test - void avgScore_strategyName() { - assertEquals("平均分数策略", new AvgScoreStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // MinScoreStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void minScore_normalCase() { - MinScoreStrategy s = new MinScoreStrategy(); - List rs = Arrays.asList(r(0.8, 0.8), r(0.3, 0.3), r(1.0, 1.0)); - assertEquals(0.3, s.calScore(rs), 1e-6); - } - - @Test - void minScore_emptyList_returnsZero() { - assertEquals(0.0, new MinScoreStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void minScore_singleElement() { - assertEquals(0.7, new MinScoreStrategy().calScore(Collections.singletonList(r(0.7, 0.7))), 1e-6); - } - - @Test - void minScore_strategyName() { - assertEquals("最小分数策略", new MinScoreStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // AvgScoreRateStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void avgScoreRate_normalCase() { - AvgScoreRateStrategy s = new AvgScoreRateStrategy(); - // (0.8 + 0.6) / 2 = 0.7 - List rs = Arrays.asList(r(0.8, 0.8), r(0.6, 0.6)); - assertEquals(0.7, s.calScore(rs), 1e-6); - } - - @Test - void avgScoreRate_emptyList_returnsZero() { - assertEquals(0.0, new AvgScoreRateStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void avgScoreRate_singleElement() { - assertEquals(0.5, new AvgScoreRateStrategy().calScore(Collections.singletonList(r(0.5, 0.5))), 1e-6); - } - - @Test - void avgScoreRate_strategyName() { - assertEquals("平均得分率策略", new AvgScoreRateStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // MaxScoreRateStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void maxScoreRate_normalCase() { - MaxScoreRateStrategy s = new MaxScoreRateStrategy(); - List rs = Arrays.asList(r(0.3, 0.3), r(0.9, 0.9), r(0.5, 0.5)); - assertEquals(0.9, s.calScore(rs), 1e-6); - } - - @Test - void maxScoreRate_emptyList_returnsZero() { - assertEquals(0.0, new MaxScoreRateStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void maxScoreRate_strategyName() { - assertEquals("最大得分率策略", new MaxScoreRateStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // MinScoreRateStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void minScoreRate_emptyList_returnsZero() { - assertEquals(0.0, new MinScoreRateStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void minScoreRate_strategyName() { - assertEquals("最小得分率策略", new MinScoreRateStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // SumScoreRateStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void sumScoreRate_normalCase() { - SumScoreRateStrategy s = new SumScoreRateStrategy(); - List rs = Arrays.asList(r(0.5, 0.5), r(0.7, 0.7)); - assertEquals(1.2, s.calScore(rs), 1e-6); - } - - @Test - void sumScoreRate_emptyList_returnsZero() { - assertEquals(0.0, new SumScoreRateStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void sumScoreRate_strategyName() { - assertEquals("得分率求和策略", new SumScoreRateStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // ScoreStrategy 类型判断 - // ═══════════════════════════════════════════════════════════════ - - @Test - void sumScore_isScoreValueStrategy() { - assertTrue(new SumScoreStrategy() instanceof ScoreValueStrategy); - } - - @Test - void avgScore_isScoreValueStrategy() { - assertTrue(new AvgScoreStrategy() instanceof ScoreValueStrategy); - } - - @Test - void minScore_isScoreValueStrategy() { - assertTrue(new MinScoreStrategy() instanceof ScoreValueStrategy); - } - - @Test - void avgScoreRate_isScoreRateStrategy() { - assertTrue(new AvgScoreRateStrategy() instanceof ScoreRateStrategy); - } - - @Test - void maxScoreRate_isScoreRateStrategy() { - assertTrue(new MaxScoreRateStrategy() instanceof ScoreRateStrategy); - } - - @Test - void minScoreRate_isScoreRateStrategy() { - assertTrue(new MinScoreRateStrategy() instanceof ScoreRateStrategy); - } - - @Test - void sumScoreRate_isScoreRateStrategy() { - assertTrue(new SumScoreRateStrategy() instanceof ScoreRateStrategy); - } -} - diff --git a/evalkit-eval/src/test/resources/dataItems.json b/evalkit-eval/src/test/resources/dataItems.json deleted file mode 100644 index 082c1cb..0000000 --- a/evalkit-eval/src/test/resources/dataItems.json +++ /dev/null @@ -1,174 +0,0 @@ -[ - { - "dataIndex": 0, - "inputData": { - "dataIndex": 0, - "inputItem": { - "query": "hello, 元宵节", - "type": "1" - } - }, - "apiCompletionResult": { - "dataIndex": 0, - "resultItem": { - "response": "Mock response for hello, 元宵节" - }, - "startTime": 1763027533462, - "endTime": 1763027533463, - "timeCost": 1, - "success": true - }, - "evalResult": { - "dataIndex": 0, - "score": 0.0, - "reason": "由于用户查询信息不完整(缺少具体日期、目的地等关键要素),AI助手仅提示信息缺失而未主动推荐模糊条件下的机票选项(如全国低价票或春季航班),也未通过交互引导用户补充信息,导致在信息卡片展示、用户偏好匹配和有效回复等所有评估维度均得分为0,反映出系统对模糊查询的处理能力不足。", - "startTime": 0, - "endTime": 0, - "timeCost": 0, - "scorerResults": [ - { - "dataIndex": 0, - "metric": "异常测试", - "score": 0.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "Error: / by zero", - "extra": null, - "statTime": 0, - "endTime": 0, - "timeCost": 0, - "success": false, - "pass": false, - "threshold": 0.0, - "star": false - }, - { - "dataIndex": 0, - "metric": "回复长度检查", - "score": 1.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "hello, 元宵节 的回复长度超过5个字符", - "extra": null, - "statTime": 1763027533557, - "endTime": 1763027533558, - "timeCost": 1, - "success": true, - "pass": true, - "threshold": 0.0, - "star": false - }, - { - "dataIndex": 0, - "metric": "相似度检查level1", - "score": 0.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "相似度为0.0000,小于阈值0.0000", - "extra": { - "similarity": 0.0, - "similarityThreshold": 0.0 - }, - "statTime": 1763027533558, - "endTime": 1763027533900, - "timeCost": 342, - "success": true, - "pass": true, - "threshold": 0.0, - "star": false - } - ], - "success": false, - "pass": false, - "threshold": 1.0, - "scoreStrategyName": "最大得分率策略" - }, - "extra": null - }, - { - "dataIndex": 1, - "inputData": { - "dataIndex": 1, - "inputItem": { - "query": "hello, 国庆节", - "type": "1" - } - }, - "apiCompletionResult": { - "dataIndex": 1, - "resultItem": { - "response": "Mock response for hello, 国庆节" - }, - "startTime": 1763027533463, - "endTime": 1763027533463, - "timeCost": 0, - "success": true - }, - "evalResult": { - "dataIndex": 1, - "score": 0.0, - "reason": "在火车票推荐场景中,由于用户仅提供出发地和目的地而未明确任何偏好(如车次类型、座位等级、时间要求等),AI助手仅默认推荐无座/硬座的普通列车,既未主动引导用户补充信息,也未展示符合常规偏好的推荐方案(如高铁、卧铺等),导致在车次推荐、偏好匹配和有效回复等所有评估维度均得分为0,反映出系统对基础查询的默认推荐策略存在缺陷。", - "startTime": 0, - "endTime": 0, - "timeCost": 0, - "scorerResults": [ - { - "dataIndex": 1, - "metric": "异常测试", - "score": 0.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "Error: / by zero", - "extra": null, - "statTime": 0, - "endTime": 0, - "timeCost": 0, - "success": false, - "pass": false, - "threshold": 0.0, - "star": false - }, - { - "dataIndex": 1, - "metric": "回复长度检查", - "score": 1.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "hello, 国庆节 的回复长度超过5个字符", - "extra": null, - "statTime": 1763027533980, - "endTime": 1763027533980, - "timeCost": 0, - "success": true, - "pass": true, - "threshold": 0.0, - "star": false - }, - { - "dataIndex": 1, - "metric": "相似度检查level1", - "score": 0.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "相似度为0.0000,小于阈值0.0000", - "extra": { - "similarity": 0.0, - "similarityThreshold": 0.0 - }, - "statTime": 1763027533900, - "endTime": 1763027533917, - "timeCost": 17, - "success": true, - "pass": true, - "threshold": 0.0, - "star": false - } - ], - "success": false, - "pass": false, - "threshold": 1.0, - "scoreStrategyName": "最大得分率策略" - }, - "extra": null - } -] \ No newline at end of file diff --git a/evalkit-eval/src/test/resources/travel_demo/scenario2_config.json b/evalkit-eval/src/test/resources/travel_demo/scenario2_config.json deleted file mode 100644 index 9642655..0000000 --- a/evalkit-eval/src/test/resources/travel_demo/scenario2_config.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "scenarioId": "itinerary_transport_hotel_flow", - "sparqlTemplate": "PREFIX travel: \nPREFIX rdfs: \n\nSELECT ?depCityName ?destCityName ?transportType ?transportNo ?hotelName ?roomName ?attractionName\nWHERE {\n ?depCity rdfs:label ?depCityName .\n\n ?destCity rdfs:label ?destCityName .\n\n ?transport travel:departure ?depCity ;\n travel:destination ?destCity ;\n travel:transportType ?transportType ;\n travel:transportNo ?transportNo .\n\n ?hotel travel:locatedIn ?destCity ;\n travel:hotelName ?hotelName .\n\n ?room travel:roomType ?roomType ;\n travel:roomName ?roomName .\n\n ?attr travel:locatedIn ?destCity ;\n travel:attractionName ?attractionName .\n\n FILTER(?depCity != ?destCity)\n\n FILTER(?depCityName != \"上海\")\n}", - "minSimilarity": 0.15, - "maxSimilarity": 0.85, - "goldenCase": { - "kgDataUsed": { - "depCityName": "上海", - "destCityName": "成都", - "transportType": "高铁", - "transportNo": "G321", - "hotelName": "熊猫主题客栈", - "roomName": "竹林亲子套房", - "attractionName": "大熊猫繁育基地" - }, - "dialogue": [ - { - "turn": 1, - "query": "打算带孩子去成都玩几天,有什么必打卡景点推荐吗?", - "expectedVars": [ - "attractionName" - ] - }, - { - "turn": 2, - "query": "从上海出发,有什么推荐的交通方式吗?", - "expectedVars": [ - "transportNo" - ] - }, - { - "turn": 3, - "query": "到了那边晚上住哪里比较方便?", - "expectedVars": [ - "hotelName" - ] - }, - { - "turn": 4, - "query": "家庭房还有吗。", - "expectedVars": [ - "roomName" - ] - }, - { - "turn": 5, - "query": "帮我把刚才看好的车票和这个亲子房一起下单吧。", - "expectedVars": [ - "transportNo", - "roomName" - ] - } - ] - } -} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 5d8e097..d20c037 100644 --- a/pom.xml +++ b/pom.xml @@ -161,10 +161,18 @@ - + DeltaEvalFacadeTest OrderedDeltaEvalFacadeTest OrderedDeltaEvalWithinDataInjectTest + + DAGEvalPerformanceTest + DeltaEvalPerformanceTest + + ActiveMQEmbeddedServerTest + MixedEmbeddedServerTest + + RubricBasedScorerTest