diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f4059c8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,47 @@ +name: CI - Test & Coverage + +on: + push: + branches: + - main + - master + - 1.5.x + pull_request: + branches: + - main + - master + - 1.5.x + +jobs: + test: + name: Run Tests & Upload Coverage + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK 8 + uses: actions/setup-java@v4 + with: + java-version: '8' + distribution: 'temurin' + cache: maven + + - name: Run Tests with JaCoCo + run: mvn -B test --no-transfer-progress -Dgpg.skip=true + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: | + evalkit-common/target/site/jacoco/jacoco.xml + evalkit-workflow/target/site/jacoco/jacoco.xml + evalkit-infra/target/site/jacoco/jacoco.xml + evalkit-eval/target/site/jacoco/jacoco.xml + flags: unittests + name: evalkit-coverage + fail_ci_if_error: false + verbose: true + diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 8a1ccf9..ea7e4e7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -5,7 +5,6 @@ on: branches: - main - master - - 1.4.x paths: - "docs/**" - ".github/workflows/docs.yml" diff --git a/README.md b/README.md index 9015e48..18e3745 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,13 @@ # EvalKit Framework +[![Maven Central](https://img.shields.io/maven-central/v/io.github.zendodx/evalkit-eval?color=blue&logo=apache-maven)](https://mvnrepository.com/artifact/io.github.zendodx/evalkit-eval) +[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0) +[![Java](https://img.shields.io/badge/Java-8%2B-orange?logo=openjdk)](https://www.oracle.com/java/) +[![codecov](https://codecov.io/gh/zendodx/evalkit-framework/branch/master/graph/badge.svg)](https://codecov.io/gh/zendodx/evalkit-framework) +[![GitHub Stars](https://img.shields.io/github/stars/zendodx/evalkit-framework?style=social)](https://github.com/zendodx/evalkit-framework/stargazers) +[![GitHub Forks](https://img.shields.io/github/forks/zendodx/evalkit-framework?style=social)](https://github.com/zendodx/evalkit-framework/forks) + + ##### 📖 䞭文文档 | 📖 [English Documentation](README_en.md) ## 抂述 diff --git a/README_en.md b/README_en.md index 9dc5e59..e763aca 100644 --- a/README_en.md +++ b/README_en.md @@ -1,5 +1,12 @@ # EvalKit Framework +[![Maven Central](https://img.shields.io/maven-central/v/io.github.zendodx/evalkit-eval?color=blue&logo=apache-maven)](https://mvnrepository.com/artifact/io.github.zendodx/evalkit-eval) +[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0) +[![Java](https://img.shields.io/badge/Java-8%2B-orange?logo=openjdk)](https://www.oracle.com/java/) +[![codecov](https://codecov.io/gh/zendodx/evalkit-framework/branch/master/graph/badge.svg)](https://codecov.io/gh/zendodx/evalkit-framework) +[![GitHub Stars](https://img.shields.io/github/stars/zendodx/evalkit-framework?style=social)](https://github.com/zendodx/evalkit-framework/stargazers) +[![GitHub Forks](https://img.shields.io/github/forks/zendodx/evalkit-framework?style=social)](https://github.com/zendodx/evalkit-framework/forks) + ##### 📖 English Documentation | 📖 [䞭文文档](README.md) ## Overview diff --git a/docs/dev-guide/github-codecov.md b/docs/dev-guide/github-codecov.md new file mode 100644 index 0000000..0d84580 --- /dev/null +++ b/docs/dev-guide/github-codecov.md @@ -0,0 +1,248 @@ +--- +layout: default +title: Github Codecov甚法参考 +parent: 匀发指南 +nav_order: 91 +--- +Github Codecov䜿甚方法 + +# 怎么从codecov获取token + +从 Codecov 获取 Token 的步骀劂䞋 + +## 1. 登圕 Codecov + +访问 [https://codecov.io](https://codecov.io)点击右䞊角 **Sign up** / **Log in**选择 **Sign in with GitHub**授权后跳回 Codecov 銖页。 + +## 2. 添加䜠的仓库 + +登圕后巊䟧䌚列出䜠的 GitHub 组织/莊号扟到 `zendodx` → 搜玢 `evalkit-framework` → 点击 **Setup repo**或 **Configure**。 + +## 3. 获取 Token + +进入仓库讟眮页面后䌚看到劂䞋内容 + +``` +Step 2: add the repository upload token as a secret +CODECOV_TOKEN = xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx +``` + +倍制这䞲 UUID 栌匏的 token。 + +## 4. 添加到 GitHub Secrets + +1. 打匀 GitHub 仓库页面`https://github.com/zendodx/evalkit-framework` +2. 点击顶郚 **Settings** 标筟 +3. 巊䟧菜单扟到 **Secrets and variables → Actions** +4. 点击绿色按钮 **New repository secret** +5. 填写 + - **Name**`CODECOV_TOKEN` + - **Secret**粘莎刚才倍制的 token +6. 点击 **Add secret** 保存 + +--- + +完成后䞋次 push 代码觊发 CI 时GitHub Actions 就䌚自劚将芆盖率数据䞊䌠到 Codecov并圚仓库页面展瀺芆盖率趋势囟。 + +# 怎样圚github匀启代码芆盖率扫描 + +基于䜠的项目结构Maven + Java + GitHub Actions以䞋是匀启代码芆盖率扫描的完敎方案 + +## 敎䜓思路 + +需芁做䞀件事 +1. **圚 `pom.xml` 䞭添加 JaCoCo 插件** — 圚 Maven 构建时生成芆盖率报告 +2. **圚 GitHub Actions 䞭添加 CI workflow** — 自劚运行测试并䞊䌠芆盖率到 [Codecov](https://codecov.io)免莹、䞎 GitHub 集成奜 + +--- + +## 第䞀步圚根 `pom.xml` 添加 JaCoCo 插件 + +圚 `pom.xml` 的 `` 䞭加入版本号圚 `` 䞭加入 JaCoCo 插件 + +```xml + + + org.jacoco + jacoco-maven-plugin + ${jacoco-maven-plugin.version} + + + prepare-agent + + prepare-agent + + + + report + test + + report + + + + +``` + +## 第二步创建 GitHub Actions CI 工䜜流 + +```yml +name: CI - Test & Coverage + +on: + push: + branches: + - main + - master + - 1.5.x + - 1.4.x + pull_request: + branches: + - main + - master + - 1.5.x + - 1.4.x + +jobs: + test: + name: Run Tests & Upload Coverage + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK 8 + uses: actions/setup-java@v4 + with: + java-version: '8' + distribution: 'temurin' + cache: maven + + - name: Run Tests with JaCoCo + run: mvn -B test --no-transfer-progress -Dgpg.skip=true + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: | + evalkit-common/target/site/jacoco/jacoco.xml + evalkit-workflow/target/site/jacoco/jacoco.xml + evalkit-infra/target/site/jacoco/jacoco.xml + evalkit-eval/target/site/jacoco/jacoco.xml + evalkit-test/target/site/jacoco/jacoco.xml + flags: unittests + name: evalkit-coverage + fail_ci_if_error: false + verbose: true +``` + +## 第䞉步圚 GitHub 䞊配眮 Codecov Token + +Push 代码前䜠还需芁圚 GitHub 仓库䞭添加 Secret + +1. 访问 [codecov.io](https://codecov.io)甚 GitHub 莊号登圕 +2. 添加䜠的仓库 `zendodx/evalkit-framework`获取 `CODECOV_TOKEN` +3. 圚 GitHub 仓库页面进入 **Settings → Secrets and variables → Actions** +4. 点击 **New repository secret**名称填 `CODECOV_TOKEN`倌填入从 Codecov 获取的 token + +## 配眮完成后的效果 + +| 功胜 | 诎明 | +|------|------| +| 本地芆盖率报告 | 执行 `mvn test` 后圚各暡块的 `target/site/jacoco/index.html` 可查看 HTML 报告 | +| CI 自劚扫描 | 每次 push 或 PR 自劚运行测试生成芆盖率并䞊䌠到 Codecov | +| README 埜章 | Codecov 提䟛埜章可以莎到 `README.md` | + +## 可选圚 README 䞭添加芆盖率埜章 + +```markdown +[![codecov](https://codecov.io/gh/zendodx/evalkit-framework/branch/main/graph/badge.svg?token=YOUR_TOKEN)](https://codecov.io/gh/zendodx/evalkit-framework) +``` + +--- + +**泚意**由于根 `pom.xml` äž­ GPG 插件圚 `verify` 阶段䌚筟名CI 里甚了 `-Dgpg.skip=true` 跳过筟名只跑 `test` 阶段吊则䌚因䞺没有 GPG 私钥而倱莥。 + +# 怎么查看Jacoco的芆盖率 + +查看 Jacoco 芆盖率有以䞋几种方匏 + +## 方匏䞀盎接打匀 HTML 报告最掚荐 + +每次运行 `mvn test` 后Jacoco 䌚圚各暡块的 `target/site/jacoco/` 目圕生成 HTML 报告。 + +先查看项目䞭是吊已有报告 + +报告已经生成了。盎接甚浏览噚打匀 + +浏览噚已打匀报告。以䞋是所有查看方匏的诎明 + +--- + +## 查看 Jacoco 芆盖率的几种方匏 + +### 方匏䞀HTML 报告已自劚打匀 + +每次 `mvn test` 后各暡块郜䌚生成可视化报告路埄规埋䞺 + +``` +{暡块名}/target/site/jacoco/index.html +``` + +本项目已生成的报告 +- `evalkit-eval/target/site/jacoco/index.html` +- `evalkit-infra/target/site/jacoco/index.html` +- `evalkit-workflow/target/site/jacoco/index.html` +- `evalkit-common/target/site/jacoco/index.html` + +报告提䟛**行芆盖率**、**分支芆盖率**、**方法芆盖率**等倚绎床数据可以点击包名 → 类名 → 方法名逐层䞋钻查看具䜓哪些代码行被芆盖绿色=芆盖红色=未芆盖黄色=郚分芆盖。 + +--- + +### 方匏二呜什行查看汇总数据 + +```bash +# 查看某暡块的文本芆盖率汇总CSV 栌匏 +cat evalkit-eval/target/site/jacoco/jacoco.csv | head -5 +``` + +--- + +### 方匏䞉手劚重新生成报告 + +劂果想刷新芆盖率数据只生成报告䞍重跑测试 + +```bash +# 重新跑测试并生成所有暡块报告 +mvn test --no-transfer-progress + +# 仅针对某䞪暡块 +mvn test -pl evalkit-eval --no-transfer-progress +``` + +--- + +### 方匏四生成聚合报告跚暡块汇总 + +劂果项目配眮了 `jacoco-report` 聚合暡块可以甚 + +```bash +mvn jacoco:report-aggregate +``` + +--- + +### 报告解读 + +HTML 报告的每列含义 + +| 列名 | 含义 | +|------|------| +| **Instructions** | 字节码指什芆盖率最粟确 | +| **Branches** | 分支芆盖率if/else/switch | +| **Lines** | 源代码行芆盖率 | +| **Methods** | 方法芆盖率 | +| **Classes** | 类芆盖率 | + diff --git a/docs/dev-guide/github-pages.md b/docs/dev-guide/github-pages.md index 4c636d8..99ca6ac 100644 --- a/docs/dev-guide/github-pages.md +++ b/docs/dev-guide/github-pages.md @@ -2,7 +2,7 @@ layout: default title: Github Pages甚法参考 parent: 匀发指南 -nav_order: 2 +nav_order: 90 --- Github Pages䜿甚方法 diff --git a/evalkit-eval/src/main/java/com/evalkit/framework/eval/node/scorer/config/RouterScorerConfig.java b/evalkit-eval/src/main/java/com/evalkit/framework/eval/node/scorer/config/RouterScorerConfig.java index 5805850..6e88adf 100644 --- a/evalkit-eval/src/main/java/com/evalkit/framework/eval/node/scorer/config/RouterScorerConfig.java +++ b/evalkit-eval/src/main/java/com/evalkit/framework/eval/node/scorer/config/RouterScorerConfig.java @@ -25,8 +25,8 @@ public class RouterScorerConfig extends ScorerConfig { @Builder.Default private Scorer defaultScorer = null; - /* 路由匹配暡匏false=first-matchtrue=match-all(默讀) */ + /* 路由匹配暡匏false=first-match默讀true=match-all */ @Builder.Default - private boolean matchAll = true; + private boolean matchAll = false; } diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/core/CoreTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/core/CoreTest.java index 8fef48d..8b001b1 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/core/CoreTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/core/CoreTest.java @@ -32,7 +32,6 @@ import com.evalkit.framework.eval.node.scorer.strategy.MaxScoreRateStrategy; import com.evalkit.framework.infra.service.llm.LLMService; import com.evalkit.framework.infra.service.llm.LLMTokenMetrics; -import com.evalkit.framework.infra.utils.DebugUtils; import com.evalkit.framework.workflow.WorkflowBuilder; import com.evalkit.framework.workflow.model.WorkflowContext; import com.fasterxml.jackson.core.type.TypeReference; @@ -40,6 +39,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.io.File; @@ -74,25 +74,45 @@ public class CoreTest { JsonReporter jsonReporter; End end; + /** + * 构造䞀䞪固定回倍的 mock LLMService䞍发起任䜕真实 HTTP 请求 + */ + private LLMService buildMockLLMService() { + return new LLMService() { + @Override + public String chat(String prompt) { + // 返回笊合 JSON 栌匏的 mock 回倍满足 AttributeCounter 的期望栌匏 + return "{\"attributes\":[{\"name\":\"mock_attr\",\"value\":\"mock_value\"}]}"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + @BeforeEach public void init() { - LLMService llmService = DebugUtils.buildLLMService(); + // 䜿甚 mock LLMService 替代真实 DeepSeek 服务䞍䟝赖倖郚 token 或 HTTP 请求 + LLMService llmService = buildMockLLMService(); begin = new Begin( BeginConfig.builder() .scoreStrategy(new MaxScoreRateStrategy()) .threshold(1) -// .evalReasonStrategy(new LLMSummaryEvalReasonStrategy(llmService)) .evalReasonStrategy(new JsonEvalReasonStrategy()) .build() ); + // dataGenerator 只圚 dataGeneratorTest已 @Disabled䞭䜿甚䜆仍需初始化 + // travel_demo 盞关文件圚 classpath:src/test/resources/travel_demo/ 䞭已存圚 dataGenerator = new KGBasedQueryGenerator( KGBasedQueryGeneratorConfig.builder() .scenarioConfigFilePath(ListUtils.of("travel_demo/scenario_config.json")) - .kgFilePath("travel_demo/travel_kg_v2.ttl") + .kgFilePath("travel_demo/travel_kg.ttl") .llmService(llmService) - .enableOutputFile(true) + .enableOutputFile(false) .generateCount(1) .threadNum(1) .build() @@ -107,7 +127,7 @@ public void init() { public List prepareDataList() { List inputDatas = new ArrayList<>(); for (int i = 0; i < 10; i++) { - inputDatas.add(new InputData(1L, JsonUtils.fromJson("{\t\"query\":\"hello, {{holiday}}\",\"type\":\"1\"}", new TypeReference>() { + inputDatas.add(new InputData(1L, JsonUtils.fromJson("{\"query\":\"hello, world\",\"type\":\"1\"}", new TypeReference>() { }))); } return inputDatas; @@ -274,6 +294,7 @@ public void fullTest() { } @Test + @Disabled("䟝赖倖郚 LLM 服务需芁 secret.properties token及知识囟谱生成本地手劚测试") public void dataGeneratorTest() { List scorers = ListUtils.of(scorer1, scorer2, scorer3); List reporters = ListUtils.of(reporter, htmlReporter, csvReporter, excelReporter, jsonReporter); diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/FullEvalFacadeWithinDataInjectTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/FullEvalFacadeWithinDataInjectTest.java index 01c8733..a3de77e 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/FullEvalFacadeWithinDataInjectTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/FullEvalFacadeWithinDataInjectTest.java @@ -1,8 +1,8 @@ package com.evalkit.framework.eval.facade; import com.evalkit.framework.common.utils.file.FileUtils; +import com.evalkit.framework.common.utils.json.JsonUtils; import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; import com.evalkit.framework.common.utils.time.DateUtils; import com.evalkit.framework.eval.facade.config.FullEvalConfig; import com.evalkit.framework.eval.model.DataItem; @@ -22,12 +22,18 @@ import com.evalkit.framework.workflow.Workflow; import com.evalkit.framework.workflow.WorkflowBuilder; import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.function.ThrowingSupplier; import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively; @@ -35,6 +41,8 @@ @Slf4j class FullEvalFacadeWithinDataInjectTest { + private File tempJsonFile; + /** * 自定义党量匏评测 */ @@ -58,11 +66,46 @@ protected void afterExecute() { } } + @BeforeEach + void setUp() throws IOException { + // 运行时劚态创建䞎时 JSON 测试文件䞍䟝赖倖郚文件路埄 + // 构造笊合 JsonFileDataLoader 期望栌匏的数据$.dataItems 数组 + List> dataItems = new java.util.ArrayList<>(); + for (int i = 0; i < 5; i++) { + Map inputItem = new HashMap<>(); + inputItem.put("query", "测试问题" + i); + inputItem.put("type", "1"); + + Map inputData = new HashMap<>(); + inputData.put("dataIndex", (long) i); + inputData.put("inputItem", inputItem); + + Map item = new HashMap<>(); + item.put("dataIndex", (long) i); + item.put("inputData", inputData); + dataItems.add(item); + } + Map jsonContent = new HashMap<>(); + jsonContent.put("dataItems", dataItems); + + // 写入䞎时文件 + tempJsonFile = File.createTempFile("full_eval_inject_test_", ".json"); + tempJsonFile.deleteOnExit(); + Files.write(tempJsonFile.toPath(), JsonUtils.toJson(jsonContent).getBytes(StandardCharsets.UTF_8)); + log.info("Created temp test file: {}", tempJsonFile.getAbsolutePath()); + } + + @AfterEach + void tearDown() { + if (tempJsonFile != null && tempJsonFile.exists()) { + tempJsonFile.delete(); + } + } + @Test - @Disabled public void test() throws Exception { - // 数据加蜜噚,匀启数据泚入 - String filePath = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "json-file-datainjector-test-file"); + // 䜿甚运行时创建的䞎时文件䞍䟝赖倖郚文件或 secret.properties + String filePath = tempJsonFile.getAbsolutePath(); JsonFileDataLoader jsonFileDataLoader = new JsonFileDataLoader( JsonFileDataLoaderConfig.builder() .jsonPath("$.dataItems") @@ -101,7 +144,7 @@ public ScorerResult eval(DataItem dataItem) { ScorerResult scorerResult = new ScorerResult(); scorerResult.setMetric("eval-test-2"); scorerResult.setScore(1.0); - scorerResult.setReason("eval test1:" + dataItem.getInputData().get("query")); + scorerResult.setReason("eval test2:" + dataItem.getInputData().get("query")); return scorerResult; } }; diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/OrderedDeltaEvalFacadeWithinDataInjectTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/OrderedDeltaEvalFacadeWithinDataInjectTest.java index fb8d809..4ff8dcb 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/OrderedDeltaEvalFacadeWithinDataInjectTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/facade/OrderedDeltaEvalFacadeWithinDataInjectTest.java @@ -1,8 +1,8 @@ package com.evalkit.framework.eval.facade; import com.evalkit.framework.common.utils.file.FileUtils; +import com.evalkit.framework.common.utils.json.JsonUtils; import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; import com.evalkit.framework.common.utils.time.DateUtils; import com.evalkit.framework.eval.facade.config.DeltaEvalConfig; import com.evalkit.framework.eval.model.DataItem; @@ -23,12 +23,16 @@ import com.evalkit.framework.workflow.Workflow; import com.evalkit.framework.workflow.WorkflowBuilder; import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.function.ThrowingSupplier; import java.io.File; -import java.util.Comparator; -import java.util.List; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.*; import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively; @@ -77,16 +81,67 @@ protected void afterLoadData() { @Override protected void afterExecute() { log.info("===>Finish consume and eval, remain data size:{}, processed data size:{}", getRemainDataCount(), getProcessedDataCount()); - List files = FileUtils.listFiles("attaches/"); + List files = FileUtils.listFiles(config.getAttachDir()); List collect = files.stream().map(File::getName).collect(Collectors.toList()); log.info("===>attaches files:{}", collect); } } + private File tempJsonFile; + + @BeforeEach + void setUp() throws IOException { + // 运行时劚态创建䞎时 JSON 测试文件䞍䟝赖倖郚文件路埄或 secret.properties + // 构造笊合 openInjectData 暡匏的嵌套数据栌匏$.dataItems 数组 + // item.dataIndex → DataInjector.injectDataIndex 读取Long 类型 + // item.inputData → DataInjector.injectInputData 读取包含䞚务字段 + // inputData.dataIndex + // inputData.inputItem → 实际䞚务字段caseId、round、query + // 构建 3 䞪 caseId每䞪 caseId 有 2 蜮数据共 6 条 + List> dataItems = new ArrayList<>(); + long idx = 0L; + for (int caseId = 1; caseId <= 3; caseId++) { + for (int round = 1; round <= 2; round++) { + // 䞚务字段攟圚 inputItem äž­ + Map inputItem = new HashMap<>(); + inputItem.put("caseId", caseId); + inputItem.put("round", round); + inputItem.put("query", "caseId=" + caseId + " round=" + round + " 测试问题"); + + // 嵌套的 inputData 对象 + Map inputData = new HashMap<>(); + inputData.put("dataIndex", idx); + inputData.put("inputItem", inputItem); + + // 顶层 item + Map item = new HashMap<>(); + item.put("dataIndex", idx); + item.put("inputData", inputData); + dataItems.add(item); + idx++; + } + } + Map jsonContent = new HashMap<>(); + jsonContent.put("dataItems", dataItems); + + // 写入䞎时文件 + tempJsonFile = File.createTempFile("ordered_delta_eval_inject_test_", ".json"); + tempJsonFile.deleteOnExit(); + Files.write(tempJsonFile.toPath(), JsonUtils.toJson(jsonContent).getBytes(StandardCharsets.UTF_8)); + log.info("Created temp test file: {}", tempJsonFile.getAbsolutePath()); + } + + @AfterEach + void tearDown() { + if (tempJsonFile != null && tempJsonFile.exists()) { + tempJsonFile.delete(); + } + } + @Test public void test() throws Exception { - // 数据加蜜噚,匀启数据泚入 - String filePath = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "json-file-datainjector-test-file"); + // 䜿甚运行时创建的䞎时文件䞍䟝赖倖郚文件或 secret.properties + String filePath = tempJsonFile.getAbsolutePath(); JsonFileDataLoader jsonFileDataLoader = new JsonFileDataLoader( JsonFileDataLoaderConfig.builder() .jsonPath("$.dataItems") @@ -124,7 +179,7 @@ public ScorerResult eval(DataItem dataItem) { ScorerResult scorerResult = new ScorerResult(); scorerResult.setMetric("eval-test-2"); scorerResult.setScore(1.0); - scorerResult.setReason("eval test1:" + dataItem.getInputData().get("query")); + scorerResult.setReason("eval test2:" + dataItem.getInputData().get("query")); return scorerResult; } }; @@ -140,12 +195,14 @@ public ScorerResult eval(DataItem dataItem) { }; // 评测结果䞊报 + String taskName = "OrderedDeltaEvalWithinDataInjectTest"; + String attachDir = "attachments/" + taskName; String fileName = "ordered_delta_eval_within_datainject_test_" + DateUtils.nowToString(); BasicCounter basicCounter = new BasicCounter(); - HtmlReporter htmlReporter = new HtmlReporter(fileName, fileName); - JsonReporter jsonReporter = new JsonReporter(fileName, fileName); - ExcelReporter excelReporter = new ExcelReporter(fileName, fileName); - CsvReporter csvReporter = new CsvReporter(fileName, fileName); + HtmlReporter htmlReporter = new HtmlReporter(fileName, attachDir); + JsonReporter jsonReporter = new JsonReporter(fileName, attachDir); + ExcelReporter excelReporter = new ExcelReporter(fileName, attachDir); + CsvReporter csvReporter = new CsvReporter(fileName, attachDir); List scorers = ListUtils.of(scorer1, scorer2, scorer3); @@ -156,7 +213,7 @@ public ScorerResult eval(DataItem dataItem) { CustomDeltaEval cfe = new CustomDeltaEval( DeltaEvalConfig.builder() - .taskName("OrderedDeltaEvalWithinDataInjectTest") + .taskName(taskName) .dataLoader(jsonFileDataLoader) .evalWorkflow(evalWorkflow) .reportWorkflow(reportWorkflow) diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/ApiCompletionTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/ApiCompletionTest.java index 7ca1efb..151c335 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/ApiCompletionTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/ApiCompletionTest.java @@ -1,98 +1,378 @@ package com.evalkit.framework.eval.node.api; -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; +import com.evalkit.framework.eval.context.WorkflowContextOps; import com.evalkit.framework.eval.model.ApiCompletionResult; import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.eval.model.InputData; import com.evalkit.framework.eval.node.api.config.ApiCompletionConfig; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.workflow.WorkflowBuilder; +import com.evalkit.framework.workflow.model.WorkflowContext; import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.function.ThrowingSupplier; -import java.util.ArrayList; -import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; -import static org.junit.jupiter.api.Assertions.assertTimeoutPreemptively; +import static org.junit.jupiter.api.Assertions.*; @Slf4j +@DisplayName("ApiCompletion 单元测试") class ApiCompletionTest { - private final class TestApiCompletion extends ApiCompletion { - public TestApiCompletion() { + + // ===================== 工具方法 ===================== + + /** + * 构建䞀䞪简单的 ApiCompletion 实现invoke 固定返回给定结果 + */ + private ApiCompletion buildApiCompletion(ApiCompletionResult fixedResult) { + return new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + return fixedResult; + } + }; + } + + /** + * 构建䞀䞪 invoke 抛出匂垞的 ApiCompletion 实现 + */ + private ApiCompletion buildThrowingApiCompletion(RuntimeException ex) { + return new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + throw ex; + } + }; + } + + /** + * 构造包含指定条数 DataItem 的 WorkflowContext + */ + private WorkflowContext buildContextWithDataItems(int size) { + WorkflowContext ctx = new WorkflowContext(); + List items = new CopyOnWriteArrayList<>(); + for (int i = 0; i < size; i++) { + Map inputItem = new HashMap<>(); + inputItem.put("id", i); + DataItem dataItem = new DataItem((long) i, new InputData(inputItem)); + items.add(dataItem); } + WorkflowContextOps.setDataItems(ctx, items); + return ctx; + } - public TestApiCompletion(ApiCompletionConfig config) { - super(config); + /** + * 䞺 ApiCompletion 泚入䞊䞋文并执行 + */ + private void executeWithContext(ApiCompletion api, WorkflowContext ctx) { + api.setWorkflowContext(ctx); + try { + api.call(); + } catch (Exception e) { + throw new RuntimeException(e); } + } - /* 甚来收集实际执行顺序 */ - private final Map> execOrder = new ConcurrentHashMap<>(); + // ===================== constructor 测试 ===================== - @Override - protected ApiCompletionResult invoke(DataItem dataItem) { - InputData inputData = dataItem.getInputData(); - String caseId = inputData.get("caseId"); - String query = inputData.get("query"); + @Test + @DisplayName("无参构造噚应䜿甚默讀 ApiCompletionConfig䞍抛出匂垞") + void testConstructor_defaultConfig() { + ApiCompletion api = buildApiCompletion(new ApiCompletionResult(new LinkedHashMap<>())); + assertNotNull(api.getConfig(), "默讀构造噚应初始化 config"); + assertEquals(1, api.getConfig().getThreadNum(), "默讀线皋数应䞺 1"); + assertEquals(120, api.getConfig().getTimeout(), "默讀超时应䞺 120"); + } - // 暡拟䞚务耗时 200ms - try { - Thread.sleep(200); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); + @Test + @DisplayName("垊 ApiCompletionConfig 构造噚应正确保存配眮") + void testConstructor_withConfig() { + ApiCompletionConfig config = ApiCompletionConfig.builder().threadNum(4).timeout(60).build(); + ApiCompletion api = new ApiCompletion(config) { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + return null; + } + }; + assertEquals(4, api.getConfig().getThreadNum()); + assertEquals(60, api.getConfig().getTimeout()); + } + + // ===================== invokeWrapper 测试 ===================== + + @Test + @DisplayName("invokeWrapper 正垞调甚时应返回垊 dataIndex 的结果䞔 success=true") + void testInvokeWrapper_success() { + Map resultItem = new HashMap<>(); + resultItem.put("answer", "ok"); + ApiCompletionResult fixedResult = new ApiCompletionResult(resultItem); + + ApiCompletion api = buildApiCompletion(fixedResult); + DataItem dataItem = new DataItem(1L, new InputData(new HashMap<>())); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertNotNull(result, "返回结果䞍应䞺 null"); + assertEquals(1L, result.getDataIndex(), "dataIndex 应䞎 DataItem 䞀臎"); + assertTrue(result.isSuccess(), "正垞调甚时 success 应䞺 true"); + assertEquals("ok", result.get("answer"), "resultItem 内容应䞎 invoke 返回䞀臎"); + } + + @Test + @DisplayName("invokeWrapper 调甚耗时字段应被正确记圕") + void testInvokeWrapper_timeCostRecorded() { + ApiCompletion api = buildApiCompletion(new ApiCompletionResult(new HashMap<>())); + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertTrue(result.getStartTime() > 0, "startTime 应倧于 0"); + assertTrue(result.getEndTime() >= result.getStartTime(), "endTime 应 >= startTime"); + assertTrue(result.getTimeCost() >= 0, "timeCost 应 >= 0"); + } + + @Test + @DisplayName("invoke 返回 null 时invokeWrapper 应返回 success=false 的结果") + void testInvokeWrapper_invokeReturnsNull() { + ApiCompletion api = buildApiCompletion(null); + DataItem dataItem = new DataItem(2L, new InputData(new HashMap<>())); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertNotNull(result); + assertFalse(result.isSuccess(), "invoke 返回 null 时 success 应䞺 false"); + } + + @Test + @DisplayName("invoke 抛出匂垞时invokeWrapper 应捕获匂垞并返回 success=false 的结果") + void testInvokeWrapper_invokeThrows() { + ApiCompletion api = buildThrowingApiCompletion(new RuntimeException("mock error")); + DataItem dataItem = new DataItem(3L, new InputData(new HashMap<>())); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertNotNull(result, "invoke 抛匂垞后䞍应返回 null"); + assertFalse(result.isSuccess(), "invoke 抛匂垞时 success 应䞺 false"); + } + + @Test + @DisplayName("DataItem 已有 apiCompletionResult 时invokeWrapper 应盎接返回已有结果䞍重倍调甚") + void testInvokeWrapper_skipWhenResultExists() { + AtomicBoolean invoked = new AtomicBoolean(false); + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + invoked.set(true); + return new ApiCompletionResult(new HashMap<>()); + } + }; + + ApiCompletionResult existingResult = new ApiCompletionResult(new HashMap<>()); + existingResult.setDataIndex(5L); + DataItem dataItem = new DataItem(5L, new InputData(new HashMap<>())); + dataItem.setApiCompletionResult(existingResult); + + ApiCompletionResult result = api.invokeWrapper(dataItem); + + assertFalse(invoked.get(), "已有 apiCompletionResult 时䞍应再次调甚 invoke"); + assertSame(existingResult, result, "应盎接返回已有结果"); + } + + // ===================== 钩子方法测试 ===================== + + @Test + @DisplayName("beforeInvoke 钩子被调甚可修改 DataItem") + void testBeforeInvoke_called() { + AtomicBoolean beforeCalled = new AtomicBoolean(false); + ApiCompletion api = new ApiCompletion() { + @Override + protected DataItem beforeInvoke(DataItem dataItem) { + beforeCalled.set(true); + return dataItem; + } + + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + return new ApiCompletionResult(new HashMap<>()); + } + }; + + api.invokeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(beforeCalled.get(), "beforeInvoke 钩子应被调甚"); + } + + @Test + @DisplayName("afterInvoke 钩子被调甚可修改返回结果") + void testAfterInvoke_called() { + ApiCompletionResult modifiedResult = new ApiCompletionResult(new HashMap<>()); + modifiedResult.set("modified", true); + + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + return new ApiCompletionResult(new HashMap<>()); + } + + @Override + protected ApiCompletionResult afterInvoke(DataItem dataItem, ApiCompletionResult result) { + return modifiedResult; + } + }; + + ApiCompletionResult result = api.invokeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertSame(modifiedResult, result, "afterInvoke 返回的结果应被最终䜿甚"); + } + + @Test + @DisplayName("onErrorInvoke 钩子圚 invoke 抛匂垞时被调甚") + void testOnErrorInvoke_called() { + AtomicBoolean errorCalled = new AtomicBoolean(false); + AtomicReference capturedError = new AtomicReference<>(); + + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + throw new RuntimeException("test-error"); + } + + @Override + protected void onErrorInvoke(DataItem dataItem, Throwable e) { + errorCalled.set(true); + capturedError.set(e); + } + }; + + api.invokeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(errorCalled.get(), "invoke 抛匂垞时 onErrorInvoke 应被调甚"); + assertNotNull(capturedError.get(), "捕获的匂垞䞍应䞺 null"); + assertEquals("test-error", capturedError.get().getMessage()); + } + + // ===================== doExecute 测试 ===================== + + @Test + @DisplayName("doExecute 正垞执行后DataItem 应被讟眮 apiCompletionResult") + void testDoExecute_resultsSetOnDataItems() { + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + Map item = new HashMap<>(); + item.put("result", "value-" + dataItem.getDataIndex()); + return new ApiCompletionResult(item); } + }; - // 记圕执行顺序 - execOrder.computeIfAbsent(caseId, k -> Collections.synchronizedList(new ArrayList<>())) - .add(query); + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(api, ctx); - String response = "response of " + query; - log.info("caseId:{}, query:{}, response:{}", caseId, query, response); - ApiCompletionResult result = new ApiCompletionResult(); - result.setResultItem(MapUtils.of("response", response)); - return result; + List dataItems = WorkflowContextOps.getDataItems(ctx); + for (DataItem dataItem : dataItems) { + assertNotNull(dataItem.getApiCompletionResult(), + "每䞪 DataItem 郜应有 apiCompletionResult"); + assertEquals("value-" + dataItem.getDataIndex(), + dataItem.getApiCompletionResult().get("result"), + "apiCompletionResult 内容应䞎 invoke 返回䞀臎"); } } @Test - void testConcurrent() { - DataLoader dataLoader = new DataLoader() { + @DisplayName("doExecute 时 DataItem 列衚䞺空应抛出 EvalException") + void testDoExecute_emptyDataItems_throws() { + ApiCompletion api = buildApiCompletion(new ApiCompletionResult(new HashMap<>())); + WorkflowContext ctx = new WorkflowContext(); + WorkflowContextOps.setDataItems(ctx, new CopyOnWriteArrayList<>()); + api.setWorkflowContext(ctx); + + assertThrows(RuntimeException.class, () -> { + try { + api.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }, "DataItems 䞺空时应抛出匂垞"); + } + + @Test + @DisplayName("doExecute 时郚分 invoke 抛匂垞其䜙 DataItem 仍应正垞完成") + void testDoExecute_partialFailure_othersSucceed() { + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + // 只有 dataIndex=1 的抛匂垞 + if (dataItem.getDataIndex() == 1L) { + throw new RuntimeException("mock failure"); + } + Map item = new HashMap<>(); + item.put("ok", true); + return new ApiCompletionResult(item); + } + }; + + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(api, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + // index=0 和 index=2 应成功 + assertTrue(dataItems.get(0).getApiCompletionResult().isSuccess()); + assertFalse(dataItems.get(1).getApiCompletionResult().isSuccess(), + "invoke 倱莥的 DataItem 的 success 应䞺 false"); + assertTrue(dataItems.get(2).getApiCompletionResult().isSuccess()); + } + + @Test + @DisplayName("doExecute 时 DataItem 已有 apiCompletionResult䞍应被芆盖") + void testDoExecute_existingResultNotOverwritten() { + AtomicBoolean invoked = new AtomicBoolean(false); + ApiCompletion api = new ApiCompletion() { @Override - public List prepareDataList() { - return ListUtils.of( - new InputData(MapUtils.of("caseId", "1", "query", "query1")), - new InputData(MapUtils.of("caseId", "1", "query", "query2")), - new InputData(MapUtils.of("caseId", "1", "query", "query3")), - new InputData(MapUtils.of("caseId", "2", "query", "query1")), - new InputData(MapUtils.of("caseId", "2", "query", "query2")), - new InputData(MapUtils.of("caseId", "3", "query", "query1")), - new InputData(MapUtils.of("caseId", "3", "query", "query2")) - ); + protected ApiCompletionResult invoke(DataItem dataItem) { + invoked.set(true); + return new ApiCompletionResult(new HashMap<>()); } }; - Begin begin = new Begin(); - TestApiCompletion apiCompletion = new TestApiCompletion( - ApiCompletionConfig.builder().threadNum(4).build() - ); + WorkflowContext ctx = new WorkflowContext(); + List items = new CopyOnWriteArrayList<>(); + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + ApiCompletionResult existing = new ApiCompletionResult(new HashMap<>()); + existing.setDataIndex(0L); + existing.setSuccess(true); + dataItem.setApiCompletionResult(existing); + items.add(dataItem); + WorkflowContextOps.setDataItems(ctx, items); - // 必须圚指定时闎内跑完吊则讀䞺死锁 / 阻塞 - assertTimeoutPreemptively(java.time.Duration.ofSeconds(10), (ThrowingSupplier) () -> { - new WorkflowBuilder() - .link(begin, dataLoader, apiCompletion) - .build() - .execute(); - return null; - }); + executeWithContext(api, ctx); - // 并发床断蚀3 䞪 case 并行总耗时 < 䞲行 7*200ms - log.info("execOrder={}", apiCompletion.execOrder); + assertFalse(invoked.get(), "已有 apiCompletionResult 时䞍应调甚 invoke"); + assertSame(existing, WorkflowContextOps.getDataItems(ctx).get(0).getApiCompletionResult(), + "已有结果䞍应被芆盖"); } + @Test + @DisplayName("doExecute 按 dataIndex 匹配结果顺序无关") + void testDoExecute_resultMatchedByDataIndex() { + ApiCompletion api = new ApiCompletion() { + @Override + protected ApiCompletionResult invoke(DataItem dataItem) { + Map item = new HashMap<>(); + item.put("idx", dataItem.getDataIndex()); + return new ApiCompletionResult(item); + } + }; + WorkflowContext ctx = buildContextWithDataItems(5); + executeWithContext(api, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + for (DataItem dataItem : dataItems) { + Long idx = (Long) dataItem.getApiCompletionResult().get("idx"); + assertEquals(dataItem.getDataIndex(), idx, + "apiCompletionResult 应按 dataIndex 正确匹配到对应的 DataItem"); + } + } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java deleted file mode 100644 index bfea4a9..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api/HttpApiCompletionTest.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.evalkit.framework.eval.node.api; - -import com.evalkit.framework.common.client.http.model.HttpApiResponse; -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.api.config.HttpApiCompletionConfig; -import org.junit.jupiter.api.Test; - -import java.util.Collections; -import java.util.Map; - -class HttpApiCompletionTest { - void test() { - HttpApiCompletion httpApiCompletion = new HttpApiCompletion( - HttpApiCompletionConfig.builder() - .host("") - .api("") - .method("") - .build() - ) { - @Override - public Map prepareBody(InputData inputData) { - return Collections.emptyMap(); - } - - @Override - public Map prepareParam(InputData inputData) { - return Collections.emptyMap(); - } - - @Override - public Map prepareHeader(InputData inputData) { - return Collections.emptyMap(); - } - - @Override - public ApiCompletionResult buildApiCompletionResult(InputData inputData, HttpApiResponse response) { - return null; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/ApiCompletionWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/ApiCompletionWrapperTest.java index 32ffca5..d943b7e 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/ApiCompletionWrapperTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/ApiCompletionWrapperTest.java @@ -1,380 +1,376 @@ package com.evalkit.framework.eval.node.api_wrapper; -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.eval.constants.NodeNamePrefix; import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.exception.EvalException; import com.evalkit.framework.eval.model.ApiCompletionResult; import com.evalkit.framework.eval.model.DataItem; import com.evalkit.framework.eval.model.InputData; import com.evalkit.framework.eval.node.api_wrapper.config.ApiCompletionWrapperConfig; import com.evalkit.framework.workflow.model.WorkflowContext; +import lombok.extern.slf4j.Slf4j; import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Consumer; import static org.junit.jupiter.api.Assertions.*; -/** - * ApiCompletionWrapper 单元测试 - *

- * 测试芆盖 - *

    - *
  • 节点 ID 前猀规范
  • - *
  • doExecute 空数据保技
  • - *
  • executeWrapper 正垞装饰流皋钩子顺序
  • - *
  • executeWrapper 装饰匂垞时䞍圱响敎䜓、返回原数据项
  • - *
  • onWrapperError 圚装饰匂垞时被调甚
  • - *
  • 倚数据项并发装饰单条倱莥䞍圱响其他条
  • - *
  • wrapper 对 ApiCompletionResult 的修改正确回写
  • - *
- */ -@DisplayName("ApiCompletionWrapper") +@Slf4j +@DisplayName("ApiCompletionWrapper 单元测试") class ApiCompletionWrapperTest { - - // ==================== 工厂方法 ==================== + + // ===================== 工具方法 ===================== /** - * 构造䞀䞪正垞执行的 wrapper将 resultItem 䞭写入指定 key/value + * 构建䞀䞪 ApiCompletionWrapperwrapper 逻蟑由 Consumer 提䟛 */ - private ApiCompletionWrapper buildWrapper(String writeKey, String writeValue) { + private ApiCompletionWrapper buildWrapper(Consumer wrapperLogic) { return new ApiCompletionWrapper() { @Override protected void wrapper(DataItem dataItem) { - ApiCompletionResult result = dataItem.getApiCompletionResult(); - if (result != null) { - result.set(writeKey, writeValue); - } + wrapperLogic.accept(dataItem); } }; } /** - * 构造䞀䞪圚 wrapper 䞭抛出匂垞的 wrapper + * 构建垊自定义 config 的 ApiCompletionWrapper */ - private ApiCompletionWrapper buildThrowingWrapper(RuntimeException ex) { - return new ApiCompletionWrapper() { + private ApiCompletionWrapper buildWrapper(ApiCompletionWrapperConfig config, + Consumer wrapperLogic) { + return new ApiCompletionWrapper(config) { @Override protected void wrapper(DataItem dataItem) { - throw ex; + wrapperLogic.accept(dataItem); } }; } /** - * 构造䞀䞪记圕钩子调甚顺序的 wrapper + * 构造包含指定条数 DataItem每条郜垊 ApiCompletionResult的 WorkflowContext */ - private ApiCompletionWrapper buildHookOrderWrapper(List callLog) { - return new ApiCompletionWrapper() { - @Override - protected void beforeWrapper(DataItem dataItem) { - callLog.add("before"); + private WorkflowContext buildContextWithDataItems(int size) { + WorkflowContext ctx = new WorkflowContext(); + List items = new CopyOnWriteArrayList<>(); + for (int i = 0; i < size; i++) { + Map inputItem = new HashMap<>(); + inputItem.put("id", i); + DataItem dataItem = new DataItem((long) i, new InputData(inputItem)); + Map resultItem = new HashMap<>(); + resultItem.put("output", "raw-" + i); + ApiCompletionResult result = new ApiCompletionResult(resultItem); + result.setDataIndex((long) i); + dataItem.setApiCompletionResult(result); + items.add(dataItem); + } + WorkflowContextOps.setDataItems(ctx, items); + return ctx; + } + + /** + * 䞺 ApiCompletionWrapper 泚入䞊䞋文并执行 + */ + private void executeWithContext(ApiCompletionWrapper wrapper, WorkflowContext ctx) { + wrapper.setWorkflowContext(ctx); + try { + wrapper.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + // ===================== constructor 测试 ===================== + + @Test + @DisplayName("无参构造噚应䜿甚默讀 ApiCompletionWrapperConfigthreadNum=1") + void testConstructor_defaultConfig() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + }); + assertNotNull(wrapper.config, "默讀构造噚应初始化 config"); + assertEquals(1, wrapper.config.getThreadNum(), "默讀线皋数应䞺 1"); + } + + @Test + @DisplayName("垊 ApiCompletionWrapperConfig 构造噚应正确保存配眮") + void testConstructor_withConfig() { + ApiCompletionWrapperConfig config = ApiCompletionWrapperConfig.builder().threadNum(4).build(); + ApiCompletionWrapper wrapper = buildWrapper(config, dataItem -> { + }); + assertEquals(4, wrapper.config.getThreadNum()); + } + + // ===================== executeWrapper 测试 ===================== + + @Test + @DisplayName("executeWrapper 正垞执行时应返回同䞀䞪 DataItem 实䟋") + void testExecuteWrapper_returnsSameDataItem() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + }); + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + DataItem result = wrapper.executeWrapper(dataItem); + assertSame(dataItem, result, "executeWrapper 应返回同䞀 DataItem 实䟋"); + } + + @Test + @DisplayName("executeWrapper äž­ wrapper 逻蟑可修改 ApiCompletionResult 字段") + void testExecuteWrapper_wrapperModifiesApiCompletionResult() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + if (dataItem.getApiCompletionResult() != null) { + dataItem.getApiCompletionResult().set("wrapped", true); } + }); + + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + dataItem.setApiCompletionResult(new ApiCompletionResult(new HashMap<>())); + wrapper.executeWrapper(dataItem); + + assertEquals(true, dataItem.getApiCompletionResult().get("wrapped"), + "wrapper 应胜修改 ApiCompletionResult 字段"); + } + + @Test + @DisplayName("executeWrapper äž­ wrapper 抛出匂垞时应被捕获返回原 DataItem 䞍抛出") + void testExecuteWrapper_wrapperThrows_returnOriginalItem() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + throw new RuntimeException("mock wrapper error"); + }); + + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + DataItem result = assertDoesNotThrow(() -> wrapper.executeWrapper(dataItem), + "wrapper 抛匂垞时 executeWrapper 䞍应向倖抛出"); + assertSame(dataItem, result, "抛匂垞后应返回原始 DataItem"); + } + + // ===================== 钩子方法测试 ===================== + @Test + @DisplayName("beforeWrapper 钩子应圚 wrapper 前被调甚") + void testBeforeWrapper_called() { + List callOrder = new ArrayList<>(); + + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { @Override - protected void wrapper(DataItem dataItem) { - callLog.add("wrapper"); + protected void beforeWrapper(DataItem dataItem) { + callOrder.add("before"); } @Override - protected void afterWrapper(DataItem dataItem) { - callLog.add("after"); + protected void wrapper(DataItem dataItem) { + callOrder.add("wrapper"); } }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("before", "wrapper"), callOrder, + "before 应圚 wrapper 之前调甚"); } - /** - * 构造䞀䞪记圕 onWrapperError 的 wrapper - */ - private ApiCompletionWrapper buildErrorCapturingWrapper(List errors) { - return new ApiCompletionWrapper() { + @Test + @DisplayName("afterWrapper 钩子应圚 wrapper 后被调甚") + void testAfterWrapper_called() { + List callOrder = new ArrayList<>(); + + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { @Override protected void wrapper(DataItem dataItem) { - throw new RuntimeException("故意抛出"); + callOrder.add("wrapper"); } @Override - protected void onWrapperError(DataItem dataItem, Throwable e) { - errors.add(e); + protected void afterWrapper(DataItem dataItem) { + callOrder.add("after"); } }; - } - /** - * 构造最简 DataItem含 ApiCompletionResult - */ - private DataItem buildDataItem(long index) { - DataItem item = new DataItem(); - item.setDataIndex(index); - Map input = new HashMap<>(); - input.put("query", "测试问题-" + index); - item.setInputData(new InputData(index, input)); - Map result = new HashMap<>(); - result.put("answer", "测试回答-" + index); - item.setApiCompletionResult(new ApiCompletionResult(result)); - return item; + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("wrapper", "after"), callOrder, + "after 应圚 wrapper 之后调甚"); } - /** - * 通过 WorkflowContext 驱劚 doExecute - */ - private void executeWithContext(ApiCompletionWrapper wrapper, List dataItems) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setDataItems(ctx, dataItems); - wrapper.setWorkflowContext(ctx); - wrapper.doExecute(); - } + @Test + @DisplayName("䞉䞪钩子按 before→wrapper→after 顺序执行") + void testExecuteWrapper_hookOrder() { + List order = new ArrayList<>(); - // ==================== 节点 ID 规范 ==================== + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { + @Override + protected void beforeWrapper(DataItem dataItem) { + order.add("before"); + } - @Nested - @DisplayName("节点 ID") - class NodeIdTest { + @Override + protected void wrapper(DataItem dataItem) { + order.add("wrapper"); + } - @Test - @DisplayName("节点 ID 应以 apiCompletionWrapper- 䞺前猀") - void nodeId_startsWithCorrectPrefix() { - ApiCompletionWrapper wrapper = buildWrapper("k", "v"); - assertTrue(wrapper.getId().startsWith(NodeNamePrefix.API_COMPLETION_WRAPPER), - "节点 ID 应以 '" + NodeNamePrefix.API_COMPLETION_WRAPPER + "' 匀倎实际: " + wrapper.getId()); - } + @Override + protected void afterWrapper(DataItem dataItem) { + order.add("after"); + } + }; - @Test - @DisplayName("每䞪实䟋的节点 ID 应唯䞀") - void nodeId_isUnique() { - ApiCompletionWrapper w1 = buildWrapper("k", "v"); - ApiCompletionWrapper w2 = buildWrapper("k", "v"); - assertNotEquals(w1.getId(), w2.getId()); - } + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("before", "wrapper", "after"), order, + "钩子应按 before→wrapper→after 顺序执行"); } - // ==================== doExecute 保技 ==================== + @Test + @DisplayName("wrapper 抛匂垞时 onWrapperError 钩子被调甚并䌠入正确匂垞") + void testOnWrapperError_called() { + AtomicBoolean errorCalled = new AtomicBoolean(false); + AtomicReference capturedError = new AtomicReference<>(); - @Nested - @DisplayName("doExecute 空数据保技") - class DoExecuteGuardTest { - - @Test - @DisplayName("dataItems 䞺 null 时应抛出 EvalException") - void doExecute_nullDataItems_throwsEvalException() { - ApiCompletionWrapper wrapper = buildWrapper("k", "v"); - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setDataItems(ctx, null); // null → remove key → getDataItems 返回 null - wrapper.setWorkflowContext(ctx); - assertThrows(EvalException.class, wrapper::doExecute); - } + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + throw new RuntimeException("test-error"); + } - @Test - @DisplayName("dataItems 䞺空列衚时应抛出 EvalException") - void doExecute_emptyDataItems_throwsEvalException() { - ApiCompletionWrapper wrapper = buildWrapper("k", "v"); - assertThrows(EvalException.class, - () -> executeWithContext(wrapper, new ArrayList<>())); - } + @Override + protected void onWrapperError(DataItem dataItem, Throwable e) { + errorCalled.set(true); + capturedError.set(e); + } + }; - @Test - @DisplayName("dataItems 非空时正垞执行䞍抛匂垞") - void doExecute_normalDataItems_noException() { - ApiCompletionWrapper wrapper = buildWrapper("transformed", "yes"); - List items = ListUtils.of(buildDataItem(1L)); - assertDoesNotThrow(() -> executeWithContext(wrapper, items)); - } + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(errorCalled.get(), "wrapper 抛匂垞时 onWrapperError 应被调甚"); + assertNotNull(capturedError.get()); + assertEquals("test-error", capturedError.get().getMessage()); } - // ==================== 钩子顺序 ==================== + @Test + @DisplayName("wrapper 抛匂垞时 afterWrapper 䞍被调甚") + void testAfterWrapper_notCalledOnError() { + AtomicBoolean afterCalled = new AtomicBoolean(false); - @Nested - @DisplayName("钩子调甚顺序") - class HookOrderTest { - - @Test - @DisplayName("正垞执行时钩子顺序䞺 before → wrapper → after") - void executeWrapper_hookOrder_beforeWrapperAfter() { - List callLog = new ArrayList<>(); - ApiCompletionWrapper wrapper = buildHookOrderWrapper(callLog); - DataItem item = buildDataItem(1L); - - wrapper.executeWrapper(item); + ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + throw new RuntimeException("error"); + } - assertEquals(ListUtils.of("before", "wrapper", "after"), callLog); - } + @Override + protected void afterWrapper(DataItem dataItem) { + afterCalled.set(true); + } + }; - @Test - @DisplayName("wrapper 抛匂垞时 after 䞍执行after 前已记圕 before") - void executeWrapper_exceptionInWrapper_afterNotCalled() { - List callLog = new ArrayList<>(); - ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { - @Override - protected void beforeWrapper(DataItem dataItem) { - callLog.add("before"); - } - - @Override - protected void wrapper(DataItem dataItem) { - callLog.add("wrapper-throws"); - throw new RuntimeException("匂垞"); - } - - @Override - protected void afterWrapper(DataItem dataItem) { - callLog.add("after"); - } - }; - - wrapper.executeWrapper(buildDataItem(1L)); - - assertTrue(callLog.contains("before")); - assertTrue(callLog.contains("wrapper-throws")); - assertFalse(callLog.contains("after"), "after 䞍应圚 wrapper 抛匂垞后执行"); - } + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertFalse(afterCalled.get(), "wrapper 抛匂垞时 afterWrapper 䞍应被调甚"); } - // ==================== 匂垞隔犻 ==================== + // ===================== doExecute 测试 ===================== - @Nested - @DisplayName("单条匂垞䞍圱响敎䜓") - class ExceptionIsolationTest { + @Test + @DisplayName("doExecute 对 WorkflowContext 䞭的每䞪 DataItem 郜应执行 wrapper") + void testDoExecute_wrapperCalledForEachDataItem() { + AtomicInteger wrapperCount = new AtomicInteger(0); + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> wrapperCount.incrementAndGet()); - @Test - @DisplayName("wrapper 抛匂垞时 executeWrapper 返回原 DataItem䞍䞺 null") - void executeWrapper_exceptionInWrapper_returnsOriginalItem() { - RuntimeException ex = new RuntimeException("装饰倱莥"); - ApiCompletionWrapper wrapper = buildThrowingWrapper(ex); - DataItem item = buildDataItem(1L); + WorkflowContext ctx = buildContextWithDataItems(5); + executeWithContext(wrapper, ctx); - DataItem returned = wrapper.executeWrapper(item); - - assertSame(item, returned, "应原样返回 DataItem"); - } - - @Test - @DisplayName("onWrapperError 圚 wrapper 抛匂垞时被调甚䞔携垊正确匂垞") - void executeWrapper_exceptionInWrapper_onWrapperErrorCalled() { - List errors = new ArrayList<>(); - ApiCompletionWrapper wrapper = buildErrorCapturingWrapper(errors); - wrapper.executeWrapper(buildDataItem(1L)); - - assertEquals(1, errors.size()); - assertEquals("故意抛出", errors.get(0).getMessage()); - } - - @Test - @DisplayName("倚条数据项䞭郚分倱莥䞍圱响其他条的装饰结果") - void doExecute_partialFailure_otherItemsStillWrapped() { - // 奇数 index 的 DataItem 觊发匂垞偶数的正垞写入 - ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { - @Override - protected void wrapper(DataItem dataItem) { - if (dataItem.getDataIndex() % 2 != 0) { - throw new RuntimeException("奇数行故意倱莥"); - } - dataItem.getApiCompletionResult().set("wrapped", "true"); - } - }; - - List items = ListUtils.of( - buildDataItem(1L), // 倱莥 - buildDataItem(2L), // 成功 - buildDataItem(3L), // 倱莥 - buildDataItem(4L) // 成功 - ); - - assertDoesNotThrow(() -> executeWithContext(wrapper, items)); - - // 偶数条应成功写入 - assertEquals("true", items.get(1).getApiCompletionResult().get("wrapped")); - assertEquals("true", items.get(3).getApiCompletionResult().get("wrapped")); - // 奇数条结果䞍变 - assertNull(items.get(0).getApiCompletionResult().get("wrapped")); - assertNull(items.get(2).getApiCompletionResult().get("wrapped")); - } + assertEquals(5, wrapperCount.get(), "wrapper 应对每䞪 DataItem 郜被调甚䞀次"); } - // ==================== 装饰结果回写 ==================== - - @Nested - @DisplayName("装饰结果回写") - class WrapperResultTest { - - @Test - @DisplayName("wrapper 对 ApiCompletionResult 的修改应正确回写到 DataItem") - void wrapper_modifiesApiCompletionResult_changesPersist() { - ApiCompletionWrapper wrapper = buildWrapper("normalized_answer", "hello world"); - DataItem item = buildDataItem(1L); + @Test + @DisplayName("doExecute 后 ApiCompletionResult 的修改应被持久化到 WorkflowContext") + void testDoExecute_modificationsPersisted() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + if (dataItem.getApiCompletionResult() != null) { + dataItem.getApiCompletionResult().set("processed", true); + } + }); - wrapper.executeWrapper(item); + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(wrapper, ctx); - assertEquals("hello world", item.getApiCompletionResult().get("normalized_answer")); + List dataItems = WorkflowContextOps.getDataItems(ctx); + for (DataItem dataItem : dataItems) { + assertEquals(true, dataItem.getApiCompletionResult().get("processed"), + "每䞪 DataItem 的 ApiCompletionResult 郜应包含 wrapper 写入的字段"); } + } - @Test - @DisplayName("倚䞪字段同时写入均应正确保留") - void wrapper_multipleFieldsWritten_allPersist() { - ApiCompletionWrapper wrapper = new ApiCompletionWrapper() { - @Override - protected void wrapper(DataItem dataItem) { - ApiCompletionResult result = dataItem.getApiCompletionResult(); - result.set("field_a", "valueA"); - result.set("field_b", 42); - result.set("field_c", true); - } - }; - DataItem item = buildDataItem(1L); - wrapper.executeWrapper(item); - - ApiCompletionResult result = item.getApiCompletionResult(); - assertEquals("valueA", result.get("field_a")); - assertEquals(42, (Integer) result.get("field_b")); - assertEquals(true, result.get("field_c")); - } + @Test + @DisplayName("doExecute 时 DataItem 列衚䞺空应抛出 EvalException") + void testDoExecute_emptyDataItems_throws() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + }); + WorkflowContext ctx = new WorkflowContext(); + WorkflowContextOps.setDataItems(ctx, new CopyOnWriteArrayList<>()); + wrapper.setWorkflowContext(ctx); - @Test - @DisplayName("doExecute 批量执行后所有 DataItem 均被正确装饰") - void doExecute_batchWrapper_allItemsDecorated() { - ApiCompletionWrapper wrapper = buildWrapper("done", "yes"); - List items = ListUtils.of( - buildDataItem(1L), buildDataItem(2L), buildDataItem(3L) - ); + assertThrows(RuntimeException.class, () -> { + try { + wrapper.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }, "DataItems 䞺空时 doExecute 应抛出匂垞"); + } - executeWithContext(wrapper, items); + @Test + @DisplayName("doExecute 时 DataItem 列衚䞺 null应抛出 EvalException") + void testDoExecute_nullDataItems_throws() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + }); + WorkflowContext ctx = new WorkflowContext(); + // 䞍讟眮 dataItems默讀䞺 null + wrapper.setWorkflowContext(ctx); - for (DataItem item : items) { - assertEquals("yes", item.getApiCompletionResult().get("done"), - "DataItem[" + item.getDataIndex() + "] 未被正确装饰"); + assertThrows(RuntimeException.class, () -> { + try { + wrapper.call(); + } catch (Exception e) { + throw new RuntimeException(e); } - } + }, "DataItems 䞺 null 时 doExecute 应抛出匂垞"); } - // ==================== 自定义 Config ==================== + @Test + @DisplayName("doExecute 时郚分 wrapper 抛匂垞其䜙 DataItem 仍应正垞倄理") + void testDoExecute_partialFailure_othersSucceed() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + if (dataItem.getDataIndex() == 1L) { + throw new RuntimeException("mock error"); + } + dataItem.getApiCompletionResult().set("done", true); + }); + + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(wrapper, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertEquals(true, dataItems.get(0).getApiCompletionResult().get("done"), + "index=0 应正垞完成"); + assertNull(dataItems.get(1).getApiCompletionResult().get("done"), + "index=1 wrapper 倱莥done 字段䞍应被讟眮"); + assertEquals(true, dataItems.get(2).getApiCompletionResult().get("done"), + "index=2 应正垞完成"); + } - @Nested - @DisplayName("Config 生效") - class ConfigTest { + @Test + @DisplayName("doExecute 对原始 output 字段进行芆写后WorkflowContext 䞭的倌应已曎新") + void testDoExecute_outputFieldOverwritten() { + ApiCompletionWrapper wrapper = buildWrapper(dataItem -> { + ApiCompletionResult result = dataItem.getApiCompletionResult(); + if (result != null) { + String original = result.get("output"); + result.set("output", "wrapped-" + original); + } + }); - @Test - @DisplayName("默讀构造噚䜿甚 threadNum=1") - void defaultConstructor_threadNumIsOne() { - ApiCompletionWrapper wrapper = buildWrapper("k", "v"); - assertEquals(1, wrapper.config.getThreadNum()); - } + WorkflowContext ctx = buildContextWithDataItems(2); + executeWithContext(wrapper, ctx); - @Test - @DisplayName("自定义 config 的 threadNum 正确生效") - void customConfig_threadNumApplied() { - ApiCompletionWrapperConfig config = ApiCompletionWrapperConfig.builder() - .threadNum(4) - .build(); - ApiCompletionWrapper wrapper = new ApiCompletionWrapper(config) { - @Override - protected void wrapper(DataItem dataItem) { - } - }; - assertEquals(4, wrapper.config.getThreadNum()); - } + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertEquals("wrapped-raw-0", dataItems.get(0).getApiCompletionResult().get("output")); + assertEquals("wrapped-raw-1", dataItems.get(1).getApiCompletionResult().get("output")); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/LLMBasedApiCompletionWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/LLMBasedApiCompletionWrapperTest.java deleted file mode 100644 index f870f06..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/api_wrapper/LLMBasedApiCompletionWrapperTest.java +++ /dev/null @@ -1,472 +0,0 @@ -package com.evalkit.framework.eval.node.api_wrapper; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.api_wrapper.config.LLMBasedApiCompletionConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.workflow.model.WorkflowContext; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; - -import static org.junit.jupiter.api.Assertions.*; - -/** - * LLMBasedApiCompletionWrapper 单元测试 - *

- * 测试芆盖 - *

    - *
  • llmConfig 正确绑定无字段遮蔜
  • - *
  • ApiCompletionResult 䞺 null 时跳过䞍调甚 LLM
  • - *
  • preparePrompt 返回空/null 时跳过䞍调甚 LLM
  • - *
  • 正垞流皋LLM 调甚结果通过 applyLLMOutput 写回结果
  • - *
  • preparePrompt 接收到完敎的 DataItem含蟓入数据和接口结果
  • - *
  • LLM 抛匂垞时executeWrapper 䞍向倖䌠播单条倱莥隔犻
  • - *
  • 倚条数据批量执行LLM 分别独立调甚
  • - *
- */ -@DisplayName("LLMBasedApiCompletionWrapper") -class LLMBasedApiCompletionWrapperTest { - - // ==================== 工厂方法 ==================== - - /** - * 构造固定回倍的 mock LLMService - */ - private LLMService mockLLM(String reply) { - return new LLMService() { - @Override - public String chat(String prompt) { - return reply; - } - - @Override - public String getModel() { - return "mock-model"; - } - }; - } - - /** - * 构造抛匂垞的 mock LLMService - */ - private LLMService throwingLLM(String msg) { - return new LLMService() { - @Override - public String chat(String prompt) { - throw new RuntimeException(msg); - } - - @Override - public String getModel() { - return "throwing-model"; - } - }; - } - - /** - * 构造记圕调甚 prompt 的 mock LLMService - */ - private LLMService capturingLLM(List promptLog, String reply) { - return new LLMService() { - @Override - public String chat(String prompt) { - promptLog.add(prompt); - return reply; - } - - @Override - public String getModel() { - return "capturing-model"; - } - }; - } - - /** - * 构造标准 LLMBasedApiCompletionWrapper - * - preparePrompt: 拌接 query + answer - * - applyLLMOutput: 写入 "wrapped_answer" 字段 - */ - private LLMBasedApiCompletionWrapper buildWrapper(LLMService llmService) { - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(llmService) - .build(); - return new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - String query = dataItem.getInputData().get("query"); - String answer = dataItem.getApiCompletionResult().get("answer"); - return "query=" + query + " answer=" + answer; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - result.set("wrapped_answer", llmOutput); - } - }; - } - - /** - * 构造最简 DataItem含 ApiCompletionResult - */ - private DataItem buildDataItem(long index) { - DataItem item = new DataItem(); - item.setDataIndex(index); - Map input = new HashMap<>(); - input.put("query", "测试问题-" + index); - item.setInputData(new InputData(index, input)); - Map result = new HashMap<>(); - result.put("answer", "测试回答-" + index); - item.setApiCompletionResult(new ApiCompletionResult(result)); - return item; - } - - /** - * 通过 WorkflowContext 驱劚 doExecute - */ - private void executeWithContext(LLMBasedApiCompletionWrapper wrapper, List items) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setDataItems(ctx, items); - wrapper.setWorkflowContext(ctx); - wrapper.doExecute(); - } - - // ==================== Config 绑定 ==================== - - @Nested - @DisplayName("Config 绑定") - class ConfigBindingTest { - - @Test - @DisplayName("llmConfig 字段䞎构造噚䌠入的 config 是同䞀实䟋䞍存圚字段遮蔜") - void llmConfig_sameInstanceAsConstructorArg() { - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(mockLLM("ok")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "prompt"; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - assertSame(config, wrapper.llmConfig, "llmConfig 应䞎构造噚䌠入的 config 是同䞀对象"); - } - - @Test - @DisplayName("父类 config 侎 llmConfig 指向同䞀实䟋") - void parentConfig_sameAsLlmConfig() { - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(mockLLM("ok")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "prompt"; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - // 父类 config 也应䞎 llmConfig 䞀臎 - assertSame(wrapper.llmConfig, wrapper.config, - "父类 config 侎 llmConfig 应䞺同䞀实䟋"); - } - } - - // ==================== 跳过条件 ==================== - - @Nested - @DisplayName("跳过条件") - class SkipConditionTest { - - @Test - @DisplayName("ApiCompletionResult 䞺 null 时䞍调甚 LLM盎接跳过") - void wrapper_nullApiCompletionResult_skipsLLM() { - AtomicInteger callCount = new AtomicInteger(0); - LLMService countingLLM = new LLMService() { - @Override - public String chat(String prompt) { - callCount.incrementAndGet(); - return "output"; - } - - @Override - public String getModel() { - return "counting-model"; - } - }; - LLMBasedApiCompletionWrapper wrapper = buildWrapper(countingLLM); - - DataItem item = buildDataItem(1L); - item.setApiCompletionResult(null); // 讟䞺 null - - wrapper.executeWrapper(item); - - assertEquals(0, callCount.get(), "ApiCompletionResult 䞺 null 时䞍应调甚 LLM"); - } - - @Test - @DisplayName("preparePrompt 返回 null 时䞍调甚 LLM盎接跳过") - void wrapper_nullPrompt_skipsLLM() { - AtomicInteger callCount = new AtomicInteger(0); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(new LLMService() { - @Override - public String chat(String prompt) { - callCount.incrementAndGet(); - return "output"; - } - - @Override - public String getModel() { - return "counting-model"; - } - }) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return null; // 返回 null - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - - wrapper.executeWrapper(buildDataItem(1L)); - - assertEquals(0, callCount.get(), "preparePrompt 返回 null 时䞍应调甚 LLM"); - } - - @Test - @DisplayName("preparePrompt 返回空字笊䞲时䞍调甚 LLM盎接跳过") - void wrapper_emptyPrompt_skipsLLM() { - AtomicInteger callCount = new AtomicInteger(0); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(new LLMService() { - @Override - public String chat(String prompt) { - callCount.incrementAndGet(); - return "output"; - } - - @Override - public String getModel() { - return "counting-model"; - } - }) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return ""; // 返回空字笊䞲 - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - - wrapper.executeWrapper(buildDataItem(1L)); - - assertEquals(0, callCount.get(), "preparePrompt 返回空字笊䞲时䞍应调甚 LLM"); - } - } - - // ==================== 正垞流皋 ==================== - - @Nested - @DisplayName("正垞装饰流皋") - class NormalFlowTest { - - @Test - @DisplayName("LLM 蟓出通过 applyLLMOutput 正确写回 ApiCompletionResult") - void wrapper_llmOutput_appliedToResult() { - LLMBasedApiCompletionWrapper wrapper = buildWrapper(mockLLM("蜬化后的内容")); - DataItem item = buildDataItem(1L); - - wrapper.executeWrapper(item); - - assertEquals("蜬化后的内容", item.getApiCompletionResult().get("wrapped_answer")); - } - - @Test - @DisplayName("preparePrompt 接收到正确的 DataItem含 inputData 和 apiCompletionResult") - void wrapper_preparePrompt_receivesCorrectDataItem() { - AtomicReference capturedPrompt = new AtomicReference<>(); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(mockLLM("output")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - String q = dataItem.getInputData().get("query"); - String a = dataItem.getApiCompletionResult().get("answer"); - capturedPrompt.set("q=" + q + ",a=" + a); - return capturedPrompt.get(); - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - - DataItem item = buildDataItem(42L); - wrapper.executeWrapper(item); - - assertEquals("q=测试问题-42,a=测试回答-42", capturedPrompt.get()); - } - - @Test - @DisplayName("LLM 被调甚时收到的 prompt 侎 preparePrompt 返回倌䞀臎") - void wrapper_llmReceivesCorrectPrompt() { - List promptLog = new ArrayList<>(); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(capturingLLM(promptLog, "output")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "固定提瀺词"; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - } - }; - - wrapper.executeWrapper(buildDataItem(1L)); - - assertEquals(1, promptLog.size()); - assertEquals("固定提瀺词", promptLog.get(0)); - } - } - - // ==================== 匂垞隔犻 ==================== - - @Nested - @DisplayName("LLM 匂垞隔犻") - class LLMExceptionIsolationTest { - - @Test - @DisplayName("LLM 抛匂垞时executeWrapper 䞍向倖䌠播返回原 DataItem") - void wrapper_llmThrows_exceptionIsolated() { - LLMBasedApiCompletionWrapper wrapper = buildWrapper(throwingLLM("LLM 服务故障")); - DataItem item = buildDataItem(1L); - - DataItem returned = assertDoesNotThrow(() -> wrapper.executeWrapper(item)); - - assertSame(item, returned, "应原样返回 DataItem"); - } - - @Test - @DisplayName("倚条数据䞭郚分 LLM 匂垞䞍圱响其他条") - void doExecute_partialLLMFailure_otherItemsStillWrapped() { - AtomicInteger callCount = new AtomicInteger(0); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(new LLMService() { - @Override - public String chat(String prompt) { - // 第䞀次调甚倱莥后续正垞 - if (callCount.getAndIncrement() == 0) { - throw new RuntimeException("第䞀次倱莥"); - } - return "success"; - } - - @Override - public String getModel() { - return "partial-fail-model"; - } - }) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "prompt"; - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - result.set("wrapped", llmOutput); - } - }; - - List items = ListUtils.of(buildDataItem(1L), buildDataItem(2L), buildDataItem(3L)); - assertDoesNotThrow(() -> executeWithContext(wrapper, items)); - - // 后䞀条应成功 - int successCount = 0; - for (DataItem item : items) { - if ("success".equals(item.getApiCompletionResult().get("wrapped"))) { - successCount++; - } - } - assertEquals(2, successCount, "第䞀条倱莥后后续䞀条应正垞装饰"); - } - } - - // ==================== 批量执行 ==================== - - @Nested - @DisplayName("批量执行") - class BatchExecutionTest { - - @Test - @DisplayName("doExecute 对所有 DataItem 各调甚䞀次 LLM") - void doExecute_callsLLMForEachItem() { - List promptLog = new ArrayList<>(); - LLMBasedApiCompletionConfig config = LLMBasedApiCompletionConfig.builder() - .llmService(capturingLLM(promptLog, "output")) - .build(); - LLMBasedApiCompletionWrapper wrapper = new LLMBasedApiCompletionWrapper(config) { - @Override - public String preparePrompt(DataItem dataItem) { - return "prompt-" + dataItem.getDataIndex(); - } - - @Override - public void applyLLMOutput(ApiCompletionResult result, String llmOutput) { - result.set("out", llmOutput); - } - }; - - List items = ListUtils.of(buildDataItem(1L), buildDataItem(2L), buildDataItem(3L)); - executeWithContext(wrapper, items); - - assertEquals(3, promptLog.size(), "应䞺每条数据各调甚䞀次 LLM"); - } - - @Test - @DisplayName("doExecute 完成后所有 DataItem 的结果均被正确写入") - void doExecute_allItemsDecorated() { - LLMBasedApiCompletionWrapper wrapper = buildWrapper(mockLLM("processed")); - List items = ListUtils.of( - buildDataItem(1L), buildDataItem(2L), buildDataItem(3L) - ); - - executeWithContext(wrapper, items); - - for (DataItem item : items) { - assertEquals("processed", item.getApiCompletionResult().get("wrapped_answer"), - "DataItem[" + item.getDataIndex() + "] 装饰结果䞍正确"); - } - } - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/begin/BeginTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/begin/BeginTest.java new file mode 100644 index 0000000..899623f --- /dev/null +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/begin/BeginTest.java @@ -0,0 +1,239 @@ +package com.evalkit.framework.eval.node.begin; + +import com.evalkit.framework.eval.context.WorkflowContextOps; +import com.evalkit.framework.eval.node.begin.config.BeginConfig; +import com.evalkit.framework.eval.node.scorer.strategy.*; +import com.evalkit.framework.infra.service.llm.LLMService; +import com.evalkit.framework.workflow.model.WorkflowContext; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +@Slf4j +@DisplayName("Begin 单元测试") +class BeginTest { + + /** + * 构造䞀䞪最简 mock LLMService + */ + private LLMService mockLlmService() { + return new LLMService() { + @Override + public String chat(String prompt) { + return "mock-response"; + } + + @Override + public String getModel() { + return "mock-model"; + } + }; + } + + /** + * 䞺 Begin 泚入 WorkflowContext 并执行 + */ + private WorkflowContext executeWithContext(Begin begin) { + WorkflowContext ctx = new WorkflowContext(); + begin.setWorkflowContext(ctx); + try { + begin.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + return ctx; + } + + // ===================== constructor 测试 ===================== + + @Test + @DisplayName("无参构造噚应䜿甚默讀 BeginConfig䞍抛出匂垞") + void testConstructor_defaultConfig() { + Begin begin = new Begin(); + assertNotNull(begin.getConfig(), "默讀构造噚应初始化 config"); + assertNotNull(begin.getConfig().getScoreStrategy(), "默讀 ScoreStrategy 䞍应䞺 null"); + assertNotNull(begin.getConfig().getEvalReasonStrategy(), "默讀 EvalReasonStrategy 䞍应䞺 null"); + } + + @Test + @DisplayName("垊 BeginConfig 构造噚应正确保存配眮") + void testConstructor_withConfig() { + ScoreStrategy strategy = new AvgScoreStrategy(); + BeginConfig config = BeginConfig.builder().scoreStrategy(strategy).threshold(0.8).build(); + Begin begin = new Begin(config); + assertSame(strategy, begin.getConfig().getScoreStrategy()); + assertEquals(0.8, begin.getConfig().getThreshold(), 1e-9); + } + + // ===================== validConfig 测试 ===================== + + @Test + @DisplayName("config 䞺 null 时应抛出 IllegalArgumentException") + void testValidConfig_nullConfigThrows() { + assertThrows(IllegalArgumentException.class, () -> new Begin(null), + "config 䞺 null 时应抛匂垞"); + } + + @Test + @DisplayName("ScoreStrategy 䞺 null 时应抛出 IllegalArgumentException") + void testValidConfig_nullScoreStrategyThrows() { + BeginConfig config = BeginConfig.builder().scoreStrategy(null).build(); + assertThrows(IllegalArgumentException.class, () -> new Begin(config), + "ScoreStrategy 䞺 null 时应抛匂垞"); + } + + @Test + @DisplayName("EvalReasonStrategy 䞺 null 时应抛出 IllegalArgumentException") + void testValidConfig_nullEvalReasonStrategyThrows() { + BeginConfig config = BeginConfig.builder().evalReasonStrategy(null).build(); + assertThrows(IllegalArgumentException.class, () -> new Begin(config), + "EvalReasonStrategy 䞺 null 时应抛匂垞"); + } + + @Test + @DisplayName("LLMSummaryEvalReasonStrategy äž­ LLMService 䞺 null 时应抛出 IllegalArgumentException") + void testValidConfig_llmStrategyWithNullLlmServiceThrows() { + LLMSummaryEvalReasonStrategy strategy = new LLMSummaryEvalReasonStrategy(null, "some-prompt"); + BeginConfig config = BeginConfig.builder().evalReasonStrategy(strategy).build(); + assertThrows(IllegalArgumentException.class, () -> new Begin(config), + "LLMSummaryEvalReasonStrategy LLMService 䞺 null 时应抛匂垞"); + } + + @Test + @DisplayName("LLMSummaryEvalReasonStrategy äž­ sysPrompt 䞺空时应抛出 IllegalArgumentException") + void testValidConfig_llmStrategyWithEmptySysPromptThrows() { + LLMSummaryEvalReasonStrategy strategy = new LLMSummaryEvalReasonStrategy(mockLlmService(), ""); + BeginConfig config = BeginConfig.builder().evalReasonStrategy(strategy).build(); + assertThrows(IllegalArgumentException.class, () -> new Begin(config), + "LLMSummaryEvalReasonStrategy sysPrompt 䞺空时应抛匂垞"); + } + + @Test + @DisplayName("LLMSummaryEvalReasonStrategy 配眮合法时䞍应抛出匂垞") + void testValidConfig_llmStrategyValid() { + LLMSummaryEvalReasonStrategy strategy = new LLMSummaryEvalReasonStrategy(mockLlmService(), "valid-prompt"); + BeginConfig config = BeginConfig.builder().evalReasonStrategy(strategy).build(); + assertDoesNotThrow(() -> new Begin(config), + "LLMSummaryEvalReasonStrategy 配眮合法时䞍应抛匂垞"); + } + + // ===================== initWorkflowContext 测试 ===================== + + @Test + @DisplayName("执行后 WorkflowContext 侭的 ScoreStrategy 应䞎配眮䞀臎") + void testInitWorkflowContext_scorerStrategySet() { + ScoreStrategy strategy = new AvgScoreStrategy(); + Begin begin = new Begin(BeginConfig.builder().scoreStrategy(strategy).build()); + WorkflowContext ctx = executeWithContext(begin); + assertSame(strategy, WorkflowContextOps.getScorerStrategy(ctx), + "䞊䞋文䞭的 ScoreStrategy 应䞎配眮䞀臎"); + } + + @Test + @DisplayName("执行后 WorkflowContext 侭的 EvalReasonStrategy 应䞎配眮䞀臎") + void testInitWorkflowContext_evalReasonStrategySet() { + EvalReasonStrategy reason = new JsonEvalReasonStrategy(); + Begin begin = new Begin(BeginConfig.builder().evalReasonStrategy(reason).build()); + WorkflowContext ctx = executeWithContext(begin); + assertSame(reason, WorkflowContextOps.getEvalReasonStrategy(ctx), + "䞊䞋文䞭的 EvalReasonStrategy 应䞎配眮䞀臎"); + } + + @Test + @DisplayName("执行后 WorkflowContext 侭的 threshold 应䞎配眮䞀臎") + void testInitWorkflowContext_thresholdSet() { + double threshold = 0.75; + Begin begin = new Begin(BeginConfig.builder().threshold(threshold).build()); + WorkflowContext ctx = executeWithContext(begin); + assertEquals(threshold, WorkflowContextOps.getThreshold(ctx), 1e-9, + "䞊䞋文䞭的 threshold 应䞎配眮䞀臎"); + } + + @Test + @DisplayName("执行后 WorkflowContext 侭的 dataItems 应被初始化䞺非 null 空列衚") + void testInitWorkflowContext_dataItemsInitialized() { + Begin begin = new Begin(); + WorkflowContext ctx = executeWithContext(begin); + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertNotNull(dataItems, "dataItems 䞍应䞺 null"); + assertTrue(dataItems.isEmpty(), "初始化后 dataItems 应䞺空列衚"); + } + + @Test + @DisplayName("若 WorkflowContext 䞭已有 dataItems执行后䞍应芆盖原有数据") + void testInitWorkflowContext_existingDataItemsNotOverwritten() { + Begin begin = new Begin(); + WorkflowContext ctx = new WorkflowContext(); + // 预先写入非空 dataItems + java.util.List existing = new java.util.concurrent.CopyOnWriteArrayList<>(); + existing.add(new com.evalkit.framework.eval.model.DataItem(0L, null)); + WorkflowContextOps.setDataItems(ctx, existing); + begin.setWorkflowContext(ctx); + try { + begin.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertEquals(1, dataItems.size(), "已有 dataItems 时䞍应被枅空"); + } + + @Test + @DisplayName("执行后 WorkflowContext 侭的 countResults 应被初始化䞺非 null 空 Map") + void testInitWorkflowContext_countResultsInitialized() { + Begin begin = new Begin(); + WorkflowContext ctx = executeWithContext(begin); + java.util.Map countResults = WorkflowContextOps.getCountResults(ctx); + assertNotNull(countResults, "countResults 䞍应䞺 null"); + assertTrue(countResults.isEmpty(), "初始化后 countResults 应䞺空 Map"); + } + + @Test + @DisplayName("执行后 WorkflowContext 侭的 extra 应被初始化䞺非 null 空 Map") + void testInitWorkflowContext_extraInitialized() { + Begin begin = new Begin(); + WorkflowContext ctx = executeWithContext(begin); + java.util.Map extra = WorkflowContextOps.getExtra(ctx); + assertNotNull(extra, "extra 䞍应䞺 null"); + assertTrue(extra.isEmpty(), "初始化后 extra 应䞺空 Map"); + } + + @Test + @DisplayName("threshold 默讀倌䞺 0") + void testInitWorkflowContext_defaultThresholdIsZero() { + Begin begin = new Begin(); + WorkflowContext ctx = executeWithContext(begin); + assertEquals(0d, WorkflowContextOps.getThreshold(ctx), 1e-9, + "未指定 threshold 时默讀倌应䞺 0"); + } + + // ===================== 䞍同 ScoreStrategy 验证 ===================== + + @Test + @DisplayName("䜿甚 SumScoreStrategy 时䞊䞋文䞭策略类型正确") + void testWithSumScoreStrategy() { + Begin begin = new Begin(BeginConfig.builder().scoreStrategy(new SumScoreStrategy()).build()); + WorkflowContext ctx = executeWithContext(begin); + assertTrue(WorkflowContextOps.getScorerStrategy(ctx) instanceof SumScoreStrategy); + } + + @Test + @DisplayName("䜿甚 MinScoreStrategy 时䞊䞋文䞭策略类型正确") + void testWithMinScoreStrategy() { + Begin begin = new Begin(BeginConfig.builder().scoreStrategy(new MinScoreStrategy()).build()); + WorkflowContext ctx = executeWithContext(begin); + assertTrue(WorkflowContextOps.getScorerStrategy(ctx) instanceof MinScoreStrategy); + } + + @Test + @DisplayName("䜿甚 NormalEvalReasonStrategy 时䞊䞋文䞭策略类型正确") + void testWithNormalEvalReasonStrategy() { + Begin begin = new Begin(BeginConfig.builder().evalReasonStrategy(new NormalEvalReasonStrategy()).build()); + WorkflowContext ctx = executeWithContext(begin); + assertTrue(WorkflowContextOps.getEvalReasonStrategy(ctx) instanceof NormalEvalReasonStrategy); + } +} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java deleted file mode 100644 index 5a41749..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterTest.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.evalkit.framework.eval.node.counter; - -import com.evalkit.framework.common.utils.json.JsonUtils; -import com.evalkit.framework.eval.model.CountResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; -import com.fasterxml.jackson.core.type.TypeReference; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -import java.util.List; - -@Slf4j -class AttributeCounterTest { - @Test - @Disabled - public void test() { - LLMService llmService = DebugUtils.buildLLMService(); - List dataItems = JsonUtils.readJsonFile("classpath:dataItems.json", new TypeReference>() { - }); - dataItems = dataItems.subList(0, 2); - AttributeCounter counter = new AttributeCounter(llmService); - CountResult countResult = counter.count(dataItems); - log.info("countResult: {}", JsonUtils.toJson(countResult)); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java deleted file mode 100644 index 2b6e779..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/AttributeCounterV2Test.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.evalkit.framework.eval.node.counter; - -import com.evalkit.framework.common.utils.json.JsonUtils; -import com.evalkit.framework.eval.model.CountResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; -import com.fasterxml.jackson.core.type.TypeReference; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -import java.util.List; - -@Slf4j -class AttributeCounterV2Test { - @Test - @Disabled - public void test() { - LLMService llmService = DebugUtils.buildLLMService(); - List dataItems = JsonUtils.readJsonFile("classpath:dataItems.json", new TypeReference>() { - }); - dataItems = dataItems.subList(0, 2); - AttributeCounterV2 counter = new AttributeCounterV2(llmService); - CountResult countResult = counter.count(dataItems); - log.info("countResult: {}", JsonUtils.toJson(countResult)); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/CounterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/CounterTest.java deleted file mode 100644 index 17952c4..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/counter/CounterTest.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.evalkit.framework.eval.node.counter; - -import com.evalkit.framework.eval.model.CountResult; -import com.evalkit.framework.eval.model.DataItem; - -import java.util.List; - -class CounterTest { - void test() { - Counter counter = new Counter() { - @Override - protected CountResult count(List dataItems) { - return null; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/EvalCaseDataGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/EvalCaseDataGeneratorTest.java deleted file mode 100644 index 0bd6042..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/EvalCaseDataGeneratorTest.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator; - -import com.evalkit.framework.eval.node.data_generator.config.EvalCaseDataGeneratorConfig; -import com.evalkit.framework.eval.node.querygen.MockQueryGenerator; -import org.junit.jupiter.api.Test; - -class EvalCaseDataGeneratorTest { - @Test - void test() throws Exception { - MockQueryGenerator mockQueryGenerator = new MockQueryGenerator() { - @Override - public String prepareTemplateQuery() { - return "{{between_chinese_holiday 20250815 20251101}} 去 {{city 河北省}}"; - } - }; - - EvalCaseDataGenerator evalCaseDataGenerator = new EvalCaseDataGenerator( - EvalCaseDataGeneratorConfig.builder() - .queryGenerator(mockQueryGenerator) - .enableOutputFile(true) - .genCount(5) - .roundCount(5) - .randomRound(true) - .build() - ); - evalCaseDataGenerator.prepareDataList(); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java deleted file mode 100644 index 0722fec..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/KGBasedQueryGeneratorTest.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.data_generator.config.KGBasedQueryGeneratorConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Test; - -import java.util.List; - -@Slf4j -class KGBasedQueryGeneratorTest { - - @Test - public void test() throws Exception { - String kgFilePath = "travel_demo/travel_kg.ttl"; - String scenarioConfigFilePath = "travel_demo/scenario_config.json"; - String scenarioConfigFilePath2 = "travel_demo/scenario2_config.json"; - LLMService llmService = DebugUtils.buildLLMService(); - - KGBasedQueryGenerator generator = new KGBasedQueryGenerator( - KGBasedQueryGeneratorConfig.builder() - .scenarioConfigFilePath(ListUtils.of(scenarioConfigFilePath, scenarioConfigFilePath2)) - .kgFilePath(kgFilePath) - .llmService(llmService) - .enableOutputFile(true) - .generateCount(1) - .threadNum(1) - .sessionIdFieldName("session_id") - .turnFieldName("turn") - .queryFieldName("query") - .enableOneRawOneSession(false) - .build() - ); - - List generated = generator.generateWrapper(); - log.debug("generated: {}", generated); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/LoaderBasedDataGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/LoaderBasedDataGeneratorTest.java deleted file mode 100644 index d7a0275..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/LoaderBasedDataGeneratorTest.java +++ /dev/null @@ -1,104 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator; - -import com.evalkit.framework.common.utils.convert.TypeConvertUtils; -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.common.utils.random.UuidUtils; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.eval.node.querygen.config.LoaderBasedDataGeneratorConfig; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.util.*; - -/** - * 基于数据集加蜜噚的数据生成噚单测 - */ -@Slf4j -class LoaderBasedDataGeneratorTest { - - DataLoader dataLoader; - - @BeforeEach - public void setUp() { - // 数据加蜜噚,每行是䞀蜮对话 - dataLoader = new DataLoader() { - @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("queries", "s1q1#s1q2")), - new InputData(MapUtils.of("queries", "s2q1#s2q2#s2q3")) - ); - } - }; - } - - - @Test - public void test() throws Exception { - // 数据生成噚, 将原始数据变成倚蜮对话, 每行是䞀䞪Query - LoaderBasedDataGenerator generator = new LoaderBasedDataGenerator( - LoaderBasedDataGeneratorConfig.builder() - .dataLoader(dataLoader) - .threadNum(2) - .build() - ) { - @Override - public List> processSingleInputData(Map inputItem) { - String queries = TypeConvertUtils.toString(inputItem.getOrDefault("queries", null)); - if (StringUtils.isEmpty(queries)) { - return Collections.emptyList(); - } - String[] split = StringUtils.split(queries, "#"); - if (split.length == 0) { - return Collections.emptyList(); - } - List> result = new ArrayList<>(); - String sessionId = UuidUtils.generateUuid(); - for (int i = 0; i < split.length; i++) { - Map map = new HashMap<>(); - map.put("sessionId", sessionId); - map.put("turn", i + 1); - map.put("query", split[i]); - result.add(map); - } - return result; - } - }; - - // 验证 - log.info("raw input data: {}", dataLoader.loadWrapper()); - List generateDataList = generator.prepareDataList(); - log.info("generated data: {}", generateDataList); - Assertions.assertTrue(CollectionUtils.isNotEmpty(generateDataList)); - Assertions.assertEquals(5, generateDataList.size()); - } - - @Test - public void testBadProcess() throws Exception { - // 数据生成噚, 将原始数据变成倚蜮对话, 每行是䞀䞪Query - LoaderBasedDataGenerator generator = new LoaderBasedDataGenerator( - LoaderBasedDataGeneratorConfig.builder() - .dataLoader(dataLoader) - .threadNum(2) - .build() - ) { - @Override - public List> processSingleInputData(Map inputItem) { - int i = 1 / 0; - return ListUtils.of(inputItem); - } - }; - - // 验证 - log.info("raw input data: {}", dataLoader.loadWrapper()); - List generateDataList = generator.prepareDataList(); - log.info("generated data: {}", generateDataList); - Assertions.assertTrue(CollectionUtils.isEmpty(generateDataList)); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java deleted file mode 100644 index 7a0d22c..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/MultiDataGeneratorTest.java +++ /dev/null @@ -1,48 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.eval.node.data_generator.config.KGBasedQueryGeneratorConfig; -import com.evalkit.framework.eval.node.data_generator.config.MultiDataGeneratorConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; -import org.junit.jupiter.api.Test; - -class MultiDataGeneratorTest { - @Test - public void test() { - String kgFilePath = "travel_demo/travel_kg.ttl"; - String scenarioConfigFilePath = "travel_demo/scenario_config.json"; - String scenario2ConfigFilePath = "travel_demo/scenario2_config.json"; - - LLMService llmService = DebugUtils.buildLLMService(); - - KGBasedQueryGenerator generator1 = new KGBasedQueryGenerator( - KGBasedQueryGeneratorConfig.builder() - .scenarioConfigFilePath(ListUtils.of(scenarioConfigFilePath)) - .kgFilePath(kgFilePath) - .llmService(llmService) - .enableOutputFile(true) - .generateCount(1) - .build() - ); - - KGBasedQueryGenerator generator2 = new KGBasedQueryGenerator( - KGBasedQueryGeneratorConfig.builder() - .scenarioConfigFilePath(ListUtils.of(scenario2ConfigFilePath)) - .kgFilePath(kgFilePath) - .llmService(llmService) - .enableOutputFile(true) - .generateCount(1) - .build() - ); - - - MultiDataGenerator multiDataGenerator = new MultiDataGenerator( - MultiDataGeneratorConfig.builder() - .dataGenerators(ListUtils.of(generator1, generator2)) - .enableOutputFile(true) - .build() - ); - multiDataGenerator.generateWrapper(); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/MockQueryGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/MockQueryGeneratorTest.java deleted file mode 100644 index c368288..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/MockQueryGeneratorTest.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator.querygen; - -import com.evalkit.framework.eval.node.querygen.MockQueryGenerator; -import com.evalkit.framework.eval.node.querygen.config.MockerQueryGeneratorConfig; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Test; - -import java.util.List; - -@Slf4j -class MockQueryGeneratorTest { - @Test - void test() { - String templateQuery = "{{between_chinese_holiday 20250815 20251101}} 去 {{city 河北省}}"; - - MockQueryGenerator mockQueryGenerator = new MockQueryGenerator( - MockerQueryGeneratorConfig.builder() - .genCount(5) - .build() - ) { - @Override - public String prepareTemplateQuery() { - return templateQuery; - } - }; - - List genQueries = mockQueryGenerator.generate(); - log.info("template: {}, generate queries: {}", templateQuery, genQueries); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java deleted file mode 100644 index dc027ce..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/data_generator/querygen/PromptBasedQueryGeneratorTest.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.evalkit.framework.eval.node.data_generator.querygen; - -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; -import com.evalkit.framework.eval.node.querygen.PromptBasedQueryGenerator; -import com.evalkit.framework.eval.node.querygen.config.PromptBasedQueryGeneratorConfig; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; -import com.evalkit.framework.infra.service.llm.config.DeepseekLLMServiceConfig; -import com.evalkit.framework.infra.service.llm.constants.LLMServiceEnum; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Test; - -import java.util.List; - -@Slf4j -class PromptBasedQueryGeneratorTest { - @Test - void test() { - String deepSeekToken = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "deepseek-token"); - LLMService llmService = LLMServiceFactory.createLLMService( - LLMServiceEnum.DEEPSEEK.name(), - DeepseekLLMServiceConfig.builder() - .apiToken(deepSeekToken) - .build() - ); - - PromptBasedQueryGenerator promptBasedQueryGenerator = new PromptBasedQueryGenerator( - PromptBasedQueryGeneratorConfig.builder() - .llmService(llmService) - .genCount(2) - .userPrompt("关键词: 预订机祚") - .build() - ); - List queries = promptBasedQueryGenerator.generate(); - log.info("queries: {}", queries); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java deleted file mode 100644 index 1449054..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ApiDataLoaderTest.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.eval.node.dataloader.config.ApiDataLoaderConfig; -import org.junit.jupiter.api.Test; - -import java.util.Collections; -import java.util.Map; -import java.util.concurrent.TimeUnit; - -class ApiDataLoaderTest { - - void test() { - ApiDataLoader apiDataLoader = new ApiDataLoader( - ApiDataLoaderConfig.builder() - .host("") - .api("") - .method("get") - .timeout(10) - .timeUnit(TimeUnit.SECONDS) - .build() - ) { - @Override - public Map prepareBody() { - return Collections.emptyMap(); - } - - @Override - public Map prepareParam() { - return Collections.emptyMap(); - } - - @Override - public Map prepareHeader() { - return Collections.emptyMap(); - } - - @Override - public String prepareJsonpath() { - return "$.data"; - } - }; - } - -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/DataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/DataLoaderTest.java index 0a79ac6..97fd3a4 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/DataLoaderTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/DataLoaderTest.java @@ -1,62 +1,336 @@ package com.evalkit.framework.eval.node.dataloader; -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.dataloader.config.JsonFileDataLoaderConfig; +import com.evalkit.framework.eval.node.dataloader.config.DataLoaderConfig; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; -import java.util.List; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.junit.jupiter.api.Assertions.*; @Slf4j +@DisplayName("DataLoader 单元测试") class DataLoaderTest { - private DataLoader dataLoader; + /** + * 构造䞀䞪简单的 DataLoader 匿名实现返回指定数量的 InputData + */ + private DataLoader buildDataLoader(int dataSize) { + return buildDataLoader(DataLoaderConfig.builder().build(), dataSize); + } - @BeforeEach - void setUp() { - dataLoader = new DataLoader() { + private DataLoader buildDataLoader(DataLoaderConfig config, int dataSize) { + return new DataLoader(config) { @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("query", "1")), - new InputData(MapUtils.of("query", "2")) - ); + public List prepareDataList() { + return buildInputDataList(dataSize); } }; } + /** + * 构造测试甚的 InputData 列衚 + */ + private List buildInputDataList(int size) { + List list = new ArrayList<>(); + for (int i = 0; i < size; i++) { + Map item = new HashMap<>(); + item.put("id", i); + item.put("value", "v" + i); + list.add(new InputData((long) i, item)); + } + return list; + } + + // ===================== validConfig 测试 ===================== + + @Test + @DisplayName("config 䞺 null 时应抛出 IllegalArgumentException") + void testValidConfig_nullConfigThrows() { + assertThrows(IllegalArgumentException.class, () -> new DataLoader(null) { + @Override + public List prepareDataList() { + return new ArrayList<>(); + } + }, "Config 䞺 null 时应抛出 IllegalArgumentException"); + } + + @Test + @DisplayName("offset 䞺莟数时应抛出 IllegalArgumentException") + void testValidConfig_negativeOffsetThrows() { + assertThrows(IllegalArgumentException.class, () -> + buildDataLoader(DataLoaderConfig.builder().offset(-1).build(), 5), + "offset 䞺莟数时应抛出 IllegalArgumentException"); + } + + @Test + @DisplayName("limit 小于 -1 时应抛出 IllegalArgumentException") + void testValidConfig_limitLessThanNegativeOneThrows() { + assertThrows(IllegalArgumentException.class, () -> + buildDataLoader(DataLoaderConfig.builder().limit(-2).build(), 5), + "limit 小于 -1 时应抛出 IllegalArgumentException"); + } + + @Test + @DisplayName("offset=0, limit=-1 䞺合法配眮䞍应抛出匂垞") + void testValidConfig_zeroOffsetAndNegativeOneLimitOk() { + assertDoesNotThrow(() -> buildDataLoader(DataLoaderConfig.builder().offset(0).limit(-1).build(), 5), + "offset=0, limit=-1 䞺合法配眮"); + } + + // ===================== addFilter / addFilters 测试 ===================== + + @Test + @DisplayName("添加 null 过滀噚时䞍应写入过滀噚列衚") + void testAddFilter_nullFilterIsIgnored() { + DataLoader loader = buildDataLoader(5); + loader.addFilter(null); + assertTrue(loader.getConfig().getFilters().isEmpty(), "添加 null 过滀噚时䞍应写入列衚"); + } + + @Test + @DisplayName("添加单䞪过滀噚后过滀噚列衚倧小䞺 1") + void testAddFilter_singleFilter() { + DataLoader loader = buildDataLoader(5); + loader.addFilter(inputData -> true); + assertEquals(1, loader.getConfig().getFilters().size(), "应成功添加 1 䞪过滀噚"); + } + @Test - void loadWrapper() { - List inputData = dataLoader.loadWrapper(); - log.info("inputData:{}", inputData); - Assertions.assertEquals(2, inputData.size()); - } - - @Test - @Disabled - public void testInjectData() { - String filePath = ""; - JsonFileDataLoader jsonFileDataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .filePath(filePath) - .openInjectData(true) - .jsonPath("$.dataItems") - .filters( - ListUtils.of( - inputData -> { - String query = inputData.get("query"); - return StringUtils.equals(query, "1"); - } - ) - ) - .build() - ); - jsonFileDataLoader.loadWrapper(); + @DisplayName("批量添加过滀噚后过滀噚列衚倧小正确") + void testAddFilters_multipleFilters() { + DataLoader loader = buildDataLoader(5); + loader.addFilters(Arrays.asList(inputData -> true, inputData -> false)); + assertEquals(2, loader.getConfig().getFilters().size(), "应成功添加 2 䞪过滀噚"); + } + + // ===================== setOffsetAndLimit 测试 ===================== + + @Test + @DisplayName("setOffsetAndLimit 应正确曎新 config 侭的 offset 和 limit") + void testSetOffsetAndLimit() { + DataLoader loader = buildDataLoader(10); + loader.setOffsetAndLimit(2, 3); + assertEquals(2, loader.getConfig().getOffset()); + assertEquals(3, loader.getConfig().getLimit()); + } + + // ===================== slice 测试 ===================== + + @Test + @DisplayName("limit=-1 时 slice 应返回党郚数据") + void testSlice_limitNegativeOne_returnsAll() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(0).limit(-1).build(), 10); + List data = buildInputDataList(10); + List result = loader.slice(data); + assertEquals(10, result.size(), "limit=-1 时应返回党郚数据"); + } + + @Test + @DisplayName("slice 按 offset 和 limit 正确截取数据") + void testSlice_offsetAndLimit() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(2).limit(3).build(), 10); + List data = buildInputDataList(10); + List result = loader.slice(data); + assertEquals(3, result.size(), "slice 后应返回 3 条数据"); + assertEquals(2L, result.get(0).getDataIndex()); + assertEquals(3L, result.get(1).getDataIndex()); + assertEquals(4L, result.get(2).getDataIndex()); + } + + @Test + @DisplayName("offset 超过数据总量时 slice 返回空列衚") + void testSlice_offsetBeyondTotal_returnsEmpty() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(20).limit(5).build(), 10); + List data = buildInputDataList(10); + List result = loader.slice(data); + assertTrue(result.isEmpty(), "offset 超过数据总量时应返回空列衚"); + } + + @Test + @DisplayName("空列衚 slice 后仍䞺空列衚") + void testSlice_emptyList_returnsEmpty() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(0).limit(5).build(), 0); + List result = loader.slice(new ArrayList<>()); + assertTrue(result.isEmpty(), "空列衚 slice 后䟝然䞺空"); + } + + @Test + @DisplayName("limit 超过剩䜙数据量时 slice 返回剩䜙党郚数据") + void testSlice_limitExceedsRemaining_returnsRest() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(8).limit(5).build(), 10); + List data = buildInputDataList(10); + List result = loader.slice(data); + assertEquals(2, result.size(), "limit 超过剩䜙数据量时应返回剩䜙所有数据"); + } + + // ===================== filter 测试 ===================== + + @Test + @DisplayName("无过滀噚时数据列衚䞍应被修改") + void testFilter_noFilters_listUnchanged() { + DataLoader loader = buildDataLoader(5); + List data = new ArrayList<>(buildInputDataList(5)); + loader.filter(data); + assertEquals(5, data.size(), "没有过滀噚时数据䞍应被过滀"); + } + + @Test + @DisplayName("过滀噚拒绝所有数据时列衚应䞺空") + void testFilter_filterOutAll() { + DataLoader loader = buildDataLoader(5); + loader.addFilter(inputData -> false); + List data = new ArrayList<>(buildInputDataList(5)); + loader.filter(data); + assertTrue(data.isEmpty(), "过滀噚拊截所有数据后列衚应䞺空"); + } + + @Test + @DisplayName("按字段倌过滀时只保留满足条件的数据") + void testFilter_filterByValue() { + DataLoader loader = buildDataLoader(5); + loader.addFilter(inputData -> (int) inputData.get("id") < 3); + List data = new ArrayList<>(buildInputDataList(5)); + loader.filter(data); + assertEquals(3, data.size(), "过滀后应只保留 id=0,1,2 的䞉条数据"); + } + + @Test + @DisplayName("倚䞪过滀噚之闎䞺 AND 逻蟑同时满足才保留") + void testFilter_multipleFilters_andLogic() { + DataLoader loader = buildDataLoader(10); + loader.addFilter(inputData -> (int) inputData.get("id") >= 2); + loader.addFilter(inputData -> (int) inputData.get("id") <= 7); + List data = new ArrayList<>(buildInputDataList(10)); + loader.filter(data); + assertEquals(6, data.size(), "倚过滀噚应 AND 逻蟑保留 id=2..7 共 6 条数据"); + } + + // ===================== addDataIndex 测试 ===================== + + @Test + @DisplayName("addDataIndex 应从 0 匀始顺序䞺数据项赋予玢匕") + void testAddDataIndex_assignsSequentialIndex() { + DataLoader loader = buildDataLoader(5); + List data = buildInputDataList(5); + data.forEach(d -> d.setDataIndex(null)); + loader.addDataIndex(data); + for (int i = 0; i < data.size(); i++) { + assertEquals(i, data.get(i).getDataIndex(), "玢匕应从 0 匀始顺序递增"); + } + } + + @Test + @DisplayName("空列衚调甚 addDataIndex 䞍应抛出匂垞") + void testAddDataIndex_emptyList_noException() { + DataLoader loader = buildDataLoader(0); + assertDoesNotThrow(() -> loader.addDataIndex(new ArrayList<>()), + "空列衚调甚 addDataIndex 䞍应抛出匂垞"); + } + + // ===================== loadWrapper 测试 ===================== + + @Test + @DisplayName("loadWrapper 正垞加蜜时应返回完敎数据列衚") + void testLoadWrapper_success_returnsDataList() { + DataLoader loader = buildDataLoader(DataLoaderConfig.builder().offset(0).limit(-1).build(), 5); + List result = loader.loadWrapper(); + assertNotNull(result, "loadWrapper 正垞情况䞋应返回非 null 列衚"); + assertEquals(5, result.size(), "应返回党郚 5 条数据"); + } + + @Test + @DisplayName("prepareDataList 返回空时 loadWrapper 应返回 null") + void testLoadWrapper_emptyPrepareDataList_returnsNull() { + DataLoader loader = new DataLoader() { + @Override + public List prepareDataList() { + return new ArrayList<>(); + } + }; + List result = loader.loadWrapper(); + assertNull(result, "prepareDataList 返回空时 loadWrapper 应返回 null"); + } + + @Test + @DisplayName("loadWrapper 配合过滀噚应正确过滀数据") + void testLoadWrapper_withFilter_filtersCorrectly() { + DataLoaderConfig config = DataLoaderConfig.builder().offset(0).limit(-1).build(); + DataLoader loader = buildDataLoader(config, 10); + loader.addFilter(inputData -> (int) inputData.get("id") % 2 == 0); + List result = loader.loadWrapper(); + assertNotNull(result); + assertEquals(5, result.size(), "过滀奇数 id 后应只剩 5 条数据"); + result.forEach(d -> assertEquals(0, (int) d.get("id") % 2, "保留的 id 应均䞺偶数")); + } + + @Test + @DisplayName("loadWrapper 应正确应甚 offset 和 limit 截取数据") + void testLoadWrapper_withOffsetAndLimit() { + DataLoaderConfig config = DataLoaderConfig.builder().offset(2).limit(3).build(); + DataLoader loader = buildDataLoader(config, 10); + List result = loader.loadWrapper(); + assertNotNull(result); + assertEquals(3, result.size(), "offset=2, limit=3 时应返回 3 条"); + } + + @Test + @DisplayName("loadWrapper 返回的每条数据郜应讟眮了 dataIndex") + void testLoadWrapper_dataIndexAssigned() { + DataLoader loader = buildDataLoader(5); + List result = loader.loadWrapper(); + assertNotNull(result); + for (InputData inputData : result) { + assertNotNull(inputData.getDataIndex(), "每条数据的 dataIndex 䞍应䞺 null"); + } + } + + @Test + @DisplayName("匀启 shuffle 后数据总条数䞍变䞔内容完敎") + void testLoadWrapper_shuffleDoesNotLoseData() { + DataLoaderConfig config = DataLoaderConfig.builder().shuffle(true).build(); + DataLoader loader = buildDataLoader(config, 20); + List result = loader.loadWrapper(); + assertNotNull(result); + assertEquals(20, result.size(), "shuffle 后数据条数䞍应改变"); + List ids = result.stream() + .map(d -> (int) d.get("id")) + .sorted() + .collect(Collectors.toList()); + List expected = IntStream.range(0, 20).boxed().collect(Collectors.toList()); + assertEquals(expected, ids, "shuffle 后 id 集合应仍䞺 0..19"); + } + + // ===================== constructor 测试 ===================== + + @Test + @DisplayName("无参构造噚应初始化默讀 configoffset=0, limit=-1") + void testConstructor_defaultConfig() { + DataLoader loader = new DataLoader() { + @Override + public List prepareDataList() { + return buildInputDataList(1); + } + }; + assertNotNull(loader.getConfig(), "默讀构造噚应初始化 config"); + assertEquals(0, loader.getConfig().getOffset()); + assertEquals(-1, loader.getConfig().getLimit()); + } + + @Test + @DisplayName("(offset, limit) 构造噚应正确讟眮 config 侭的 offset 和 limit") + void testConstructor_offsetAndLimit() { + DataLoader loader = new DataLoader(3, 7) { + @Override + public List prepareDataList() { + return buildInputDataList(1); + } + }; + assertEquals(3, loader.getConfig().getOffset()); + assertEquals(7, loader.getConfig().getLimit()); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ExcelDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ExcelDataLoaderTest.java deleted file mode 100644 index b8ffb23..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/ExcelDataLoaderTest.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.eval.node.dataloader.config.ExcelDataLoaderConfig; -import org.junit.jupiter.api.Test; - -class ExcelDataLoaderTest { - @Test - void validConfigTest() { - ExcelDataLoader excelDataLoader = new ExcelDataLoader( - ExcelDataLoaderConfig.builder().filePath("test.xlsx").build() - ); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java deleted file mode 100644 index f65721e..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JdbcDataLoaderTest.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.eval.node.dataloader.config.JdbcDataLoaderConfig; -import org.junit.jupiter.api.Test; - -class JdbcDataLoaderTest { - void test() { - JdbcDataLoader jdbcDataLoader = new JdbcDataLoader( - JdbcDataLoaderConfig.builder() - .driver("com.mysql.jdbc.Driver") - .url("jdbc:mysql://127.0.0.1:3306/evalkit?useSSL=false&serverTimezone=Asia/Shanghai&characterEncoding=utf8") - .user("root") - .password("root") - .build() - ) { - @Override - public String prepareSql() { - return "select * from testcase"; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonFileDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonFileDataLoaderTest.java deleted file mode 100644 index eabb610..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonFileDataLoaderTest.java +++ /dev/null @@ -1,105 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.common.utils.file.FileUtils; -import com.evalkit.framework.common.utils.json.JsonUtils; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.dataloader.config.JsonFileDataLoaderConfig; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.List; -import java.util.Map; - -@Slf4j -class JsonFileDataLoaderTest { - - private String jsonObjectFilePath; - private String jsonArrayFilePath; - private String jsonArrayFilePath2; - - /** - * 构造Json䞎时文件 - */ - @BeforeEach - public void setUp() throws IOException { - String j1 = "{\"code\":0,\"success\":true,\"data\":{\t\"query\":\"hello\",\"type\":\"test\"}}"; - String j2 = "{\"code\":0,\"success\":true,\"data\":[{\"query\":\"hello\",\"type\":\"test\"},{\"query\":\"hi\",\"type\":\"test\"}]}"; - String j3 = "[{\"query\":\"hello\",\"type\":\"test\"},{\"query\":\"hi\",\"type\":\"test\"}]"; - - Path jsonObjectTempFile = Files.createTempFile("temp", ".json"); - jsonObjectFilePath = jsonObjectTempFile.toString(); - Path jsonArrayTempFile = Files.createTempFile("temp", ".json"); - jsonArrayFilePath = jsonArrayTempFile.toString(); - Path jsonArrayTempFile2 = Files.createTempFile("temp", ".json"); - jsonArrayFilePath2 = jsonArrayTempFile2.toString(); - JsonUtils.writeJsonFile(jsonObjectFilePath, JsonUtils.fromJson(j1, Map.class)); - JsonUtils.writeJsonFile(jsonArrayFilePath, JsonUtils.fromJson(j2, Map.class)); - JsonUtils.writeJsonFile(jsonArrayFilePath2, JsonUtils.fromJson(j3, List.class)); - } - - /** - * 执行删陀䞎时文件 - */ - @AfterEach - public void tearDown() { - FileUtils.deleteFile(jsonObjectFilePath); - FileUtils.deleteFile(jsonArrayFilePath); - } - - @Test - public void testLoadJsonObject() throws Exception { - JsonFileDataLoader dataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .jsonPath("$") - .filePath(jsonObjectFilePath) - .build() - ); - List inputData = dataLoader.prepareDataList(); - log.info("Json File DataLoader: {}", inputData); - Assertions.assertEquals(1, inputData.size()); - } - - @Test - public void testLoadJsonObjectWithJsonpath() throws Exception { - JsonFileDataLoader dataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .jsonPath("$.data") - .filePath(jsonObjectFilePath) - .build() - ); - List inputData = dataLoader.prepareDataList(); - log.info("Json File DataLoader: {}", inputData); - Assertions.assertEquals(1, inputData.size()); - } - - @Test - public void testLoadJsonArray() throws Exception { - JsonFileDataLoader dataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .jsonPath("$.data") - .filePath(jsonArrayFilePath) - .build() - ); - List inputData = dataLoader.prepareDataList(); - log.info("Json File DataLoader: {}", inputData); - Assertions.assertEquals(2, inputData.size()); - } - - @Test - public void testLoadJsonArray2() throws Exception { - JsonFileDataLoader dataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .filePath(jsonArrayFilePath2) - .build() - ); - List inputData = dataLoader.prepareDataList(); - log.info("Json File DataLoader: {}", inputData); - Assertions.assertEquals(2, inputData.size()); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonTextDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonTextDataLoaderTest.java deleted file mode 100644 index 08f963d..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/JsonTextDataLoaderTest.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import org.junit.jupiter.api.Test; - -class JsonTextDataLoaderTest { - - void test() { - JsonTextDataLoader jsonTextDataLoader = new JsonTextDataLoader() { - @Override - public String prepareJsonpath() { - return "$"; - } - - @Override - public String prepareJson() { - return "{\"query\":\"hello\"}"; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/MultiDataLoaderTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/MultiDataLoaderTest.java deleted file mode 100644 index 70aa28a..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/MultiDataLoaderTest.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.model.InputData; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.util.List; - -@Slf4j -class MultiDataLoaderTest { - - MultiDataLoader multiDataLoader; - - @BeforeEach - void setUp() { - DataLoader d1 = new DataLoader() { - @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("query", "1")) - ); - } - }; - DataLoader d2 = new DataLoader() { - @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("query", "2")) - ); - } - }; - multiDataLoader = new MultiDataLoader(ListUtils.of(d1, d2)); - } - - @Test - public void testPrepareDataList() { - List inputData = multiDataLoader.prepareDataList(); - log.info("multi data loader: {}", inputData); - Assertions.assertEquals(2, inputData.size()); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/datainjector/DataInjectorTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/datainjector/DataInjectorTest.java deleted file mode 100644 index c50c3aa..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader/datainjector/DataInjectorTest.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader.datainjector; - -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; -import com.evalkit.framework.common.utils.time.DateUtils; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.counter.BasicCounter; -import com.evalkit.framework.eval.node.dataloader.JsonDataLoader; -import com.evalkit.framework.eval.node.dataloader.JsonFileDataLoader; -import com.evalkit.framework.eval.node.dataloader.config.JsonFileDataLoaderConfig; -import com.evalkit.framework.eval.node.reporter.CsvReporter; -import com.evalkit.framework.eval.node.reporter.ExcelReporter; -import com.evalkit.framework.eval.node.reporter.JsonReporter; -import com.evalkit.framework.eval.node.reporter.html.HtmlReporter; -import com.evalkit.framework.eval.node.scorer.Scorer; -import com.evalkit.framework.workflow.WorkflowBuilder; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -class DataInjectorTest { - String filePath = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "json-file-datainjector-test-file"); - - @Test - @Disabled - void test() { - Begin begin = new Begin(); - - JsonDataLoader jsonDataLoader = new JsonFileDataLoader( - JsonFileDataLoaderConfig.builder() - .jsonPath("$.dataItems") - .filePath(filePath) - .openInjectData(true) - .build() - ); - - BasicCounter basicCounter = new BasicCounter(); - - Scorer scorer99 = new Scorer() { - @Override - public ScorerResult eval(DataItem dataItem) throws Exception { - return new ScorerResult("评䌰噚99", 0, 1, "无理由", null); - } - }; - - String fileName = "DataInjectorTest_" + DateUtils.nowToString("yyyyMMddHHmmss"); - HtmlReporter htmlReporter = new HtmlReporter(fileName, fileName); - JsonReporter jsonReporter = new JsonReporter(fileName, fileName); - ExcelReporter excelReporter = new ExcelReporter(fileName, fileName); - CsvReporter csvReporter = new CsvReporter(fileName, fileName); - - new WorkflowBuilder().link(begin, jsonDataLoader, scorer99, basicCounter, htmlReporter, jsonReporter, excelReporter, csvReporter).build().execute(); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/DataLoaderWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/DataLoaderWrapperTest.java index 919c754..2b8cb00 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/DataLoaderWrapperTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/DataLoaderWrapperTest.java @@ -1,17 +1,333 @@ package com.evalkit.framework.eval.node.dataloader_wrapper; +import com.evalkit.framework.eval.context.WorkflowContextOps; import com.evalkit.framework.eval.model.DataItem; +import com.evalkit.framework.eval.model.InputData; import com.evalkit.framework.eval.node.dataloader_wrapper.config.DataLoaderWrapperConfig; +import com.evalkit.framework.workflow.model.WorkflowContext; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import java.util.*; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.junit.jupiter.api.Assertions.*; + +@Slf4j +@DisplayName("DataLoaderWrapper 单元测试") class DataLoaderWrapperTest { - void test() { - DataLoaderWrapper dataLoaderWrapper = new DataLoaderWrapper( - DataLoaderWrapperConfig.builder().build() - ) { + + // ===================== 工具方法 ===================== + + /** + * 构建䞀䞪简单的 DataLoaderWrapperwrapper 逻蟑由 Runnable 提䟛 + */ + private DataLoaderWrapper buildWrapper(java.util.function.Consumer wrapperLogic) { + return new DataLoaderWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + wrapperLogic.accept(dataItem); + } + }; + } + + /** + * 构建垊自定义 config 的 DataLoaderWrapper + */ + private DataLoaderWrapper buildWrapper(DataLoaderWrapperConfig config, + java.util.function.Consumer wrapperLogic) { + return new DataLoaderWrapper(config) { + @Override + protected void wrapper(DataItem dataItem) { + wrapperLogic.accept(dataItem); + } + }; + } + + /** + * 构造包含指定条数 DataItem 的 WorkflowContext + */ + private WorkflowContext buildContextWithDataItems(int size) { + WorkflowContext ctx = new WorkflowContext(); + List items = new CopyOnWriteArrayList<>(); + for (int i = 0; i < size; i++) { + Map inputItem = new HashMap<>(); + inputItem.put("id", i); + inputItem.put("value", "v" + i); + items.add(new DataItem((long) i, new InputData(inputItem))); + } + WorkflowContextOps.setDataItems(ctx, items); + return ctx; + } + + /** + * 䞺 DataLoaderWrapper 泚入䞊䞋文并执行 + */ + private void executeWithContext(DataLoaderWrapper wrapper, WorkflowContext ctx) { + wrapper.setWorkflowContext(ctx); + try { + wrapper.call(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + // ===================== constructor 测试 ===================== + + @Test + @DisplayName("无参构造噚应䜿甚默讀 DataLoaderWrapperConfigthreadNum=1") + void testConstructor_defaultConfig() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + }); + assertNotNull(wrapper.config, "默讀构造噚应初始化 config"); + assertEquals(1, wrapper.config.getThreadNum(), "默讀线皋数应䞺 1"); + } + + @Test + @DisplayName("垊 DataLoaderWrapperConfig 构造噚应正确保存配眮") + void testConstructor_withConfig() { + DataLoaderWrapperConfig config = DataLoaderWrapperConfig.builder().threadNum(4).build(); + DataLoaderWrapper wrapper = buildWrapper(config, dataItem -> { + }); + assertEquals(4, wrapper.config.getThreadNum()); + } + + // ===================== executeWrapper 测试 ===================== + + @Test + @DisplayName("executeWrapper 正垞执行时应返回同䞀䞪 DataItem 实䟋") + void testExecuteWrapper_returnsSameDataItem() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + }); + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + DataItem result = wrapper.executeWrapper(dataItem); + assertSame(dataItem, result, "executeWrapper 应返回同䞀 DataItem 实䟋"); + } + + @Test + @DisplayName("executeWrapper äž­ wrapper 逻蟑可修改 DataItem 的 InputData 字段") + void testExecuteWrapper_wrapperModifiesDataItem() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> + dataItem.getInputData().set("modified", true)); + + Map inputItem = new HashMap<>(); + DataItem dataItem = new DataItem(0L, new InputData(inputItem)); + wrapper.executeWrapper(dataItem); + + assertEquals(true, dataItem.getInputData().get("modified"), + "wrapper 应胜修改 DataItem 的 InputData 字段"); + } + + @Test + @DisplayName("executeWrapper äž­ wrapper 抛出匂垞时应被捕获返回原 DataItem 䞍抛出") + void testExecuteWrapper_wrapperThrows_returnOriginalItem() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + throw new RuntimeException("mock wrapper error"); + }); + + DataItem dataItem = new DataItem(0L, new InputData(new HashMap<>())); + DataItem result = assertDoesNotThrow(() -> wrapper.executeWrapper(dataItem), + "wrapper 抛匂垞时 executeWrapper 䞍应向倖抛出"); + assertSame(dataItem, result, "抛匂垞后应返回原始 DataItem"); + } + + // ===================== 钩子方法测试 ===================== + + @Test + @DisplayName("beforeWrapper 钩子圚 wrapper 前被调甚") + void testBeforeWrapper_called() { + AtomicBoolean beforeCalled = new AtomicBoolean(false); + AtomicBoolean wrapperCalled = new AtomicBoolean(false); + List callOrder = new ArrayList<>(); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { + @Override + protected void beforeWrapper(DataItem dataItem) { + beforeCalled.set(true); + callOrder.add("before"); + } + + @Override + protected void wrapper(DataItem dataItem) { + wrapperCalled.set(true); + callOrder.add("wrapper"); + } + }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(beforeCalled.get(), "beforeWrapper 应被调甚"); + assertEquals(Arrays.asList("before", "wrapper"), callOrder, "before 应圚 wrapper 之前调甚"); + } + + @Test + @DisplayName("afterWrapper 钩子圚 wrapper 后被调甚") + void testAfterWrapper_called() { + List callOrder = new ArrayList<>(); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + callOrder.add("wrapper"); + } + + @Override + protected void afterWrapper(DataItem dataItem) { + callOrder.add("after"); + } + }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("wrapper", "after"), callOrder, "after 应圚 wrapper 之后调甚"); + } + + @Test + @DisplayName("wrapper 抛匂垞时 onWrapperError 钩子被调甚并䌠入正确匂垞") + void testOnWrapperError_called() { + AtomicBoolean errorCalled = new AtomicBoolean(false); + AtomicReference capturedError = new AtomicReference<>(); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { @Override protected void wrapper(DataItem dataItem) { - // 增区dataItem + throw new RuntimeException("test-error"); + } + + @Override + protected void onWrapperError(DataItem dataItem, Throwable e) { + errorCalled.set(true); + capturedError.set(e); } }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertTrue(errorCalled.get(), "wrapper 抛匂垞时 onWrapperError 应被调甚"); + assertNotNull(capturedError.get()); + assertEquals("test-error", capturedError.get().getMessage()); + } + + @Test + @DisplayName("wrapper 抛匂垞时 afterWrapper 䞍被调甚") + void testAfterWrapper_notCalledOnError() { + AtomicBoolean afterCalled = new AtomicBoolean(false); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { + @Override + protected void wrapper(DataItem dataItem) { + throw new RuntimeException("error"); + } + + @Override + protected void afterWrapper(DataItem dataItem) { + afterCalled.set(true); + } + }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertFalse(afterCalled.get(), "wrapper 抛匂垞时 afterWrapper 䞍应被调甚"); + } + + // ===================== doExecute 测试 ===================== + + @Test + @DisplayName("doExecute 对 WorkflowContext 䞭的每䞪 DataItem 郜应执行 wrapper") + void testDoExecute_wrapperCalledForEachDataItem() { + AtomicInteger wrapperCount = new AtomicInteger(0); + DataLoaderWrapper wrapper = buildWrapper(dataItem -> wrapperCount.incrementAndGet()); + + WorkflowContext ctx = buildContextWithDataItems(5); + executeWithContext(wrapper, ctx); + + assertEquals(5, wrapperCount.get(), "wrapper 应对每䞪 DataItem 郜被调甚䞀次"); + } + + @Test + @DisplayName("doExecute 后 DataItem 䞭的修改应被持久化到 WorkflowContext") + void testDoExecute_modificationsPersisted() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> + dataItem.getInputData().set("wrapped", true)); + + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(wrapper, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + for (DataItem dataItem : dataItems) { + assertEquals(true, dataItem.getInputData().get("wrapped"), + "每䞪 DataItem 的 InputData 郜应包含 wrapper 写入的字段"); + } + } + + @Test + @DisplayName("doExecute 时郚分 wrapper 抛匂垞其䜙 DataItem 仍应正垞倄理") + void testDoExecute_partialFailure_othersSucceed() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + if (dataItem.getDataIndex() == 1L) { + throw new RuntimeException("mock error"); + } + dataItem.getInputData().set("done", true); + }); + + WorkflowContext ctx = buildContextWithDataItems(3); + executeWithContext(wrapper, ctx); + + List dataItems = WorkflowContextOps.getDataItems(ctx); + assertEquals(true, dataItems.get(0).getInputData().get("done"), + "index=0 应正垞完成"); + assertNull(dataItems.get(1).getInputData().get("done"), + "index=1 wrapper 倱莥done 字段䞍应被讟眮"); + assertEquals(true, dataItems.get(2).getInputData().get("done"), + "index=2 应正垞完成"); + } + + @Test + @DisplayName("doExecute 时 DataItem 列衚䞺 null䞍应抛出匂垞") + void testDoExecute_nullDataItems_noThrow() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + }); + WorkflowContext ctx = new WorkflowContext(); + // 䞍讟眮 dataItems默讀䞺 null + assertDoesNotThrow(() -> executeWithContext(wrapper, ctx), + "DataItems 䞺 null 时 doExecute 䞍应抛出匂垞"); + } + + @Test + @DisplayName("doExecute 时 DataItem 列衚䞺空䞍应抛出匂垞") + void testDoExecute_emptyDataItems_noThrow() { + DataLoaderWrapper wrapper = buildWrapper(dataItem -> { + }); + WorkflowContext ctx = new WorkflowContext(); + WorkflowContextOps.setDataItems(ctx, new CopyOnWriteArrayList<>()); + assertDoesNotThrow(() -> executeWithContext(wrapper, ctx), + "DataItems 䞺空时 doExecute 䞍应抛出匂垞"); + } + + @Test + @DisplayName("executeWrapper 䞉䞪钩子按 before→wrapper→after 顺序执行") + void testExecuteWrapper_hookOrder() { + List order = new ArrayList<>(); + + DataLoaderWrapper wrapper = new DataLoaderWrapper() { + @Override + protected void beforeWrapper(DataItem dataItem) { + order.add("before"); + } + + @Override + protected void wrapper(DataItem dataItem) { + order.add("wrapper"); + } + + @Override + protected void afterWrapper(DataItem dataItem) { + order.add("after"); + } + }; + + wrapper.executeWrapper(new DataItem(0L, new InputData(new HashMap<>()))); + assertEquals(Arrays.asList("before", "wrapper", "after"), order, + "钩子应按 before→wrapper→after 顺序执行"); } } \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PolishDataLoaderWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PolishDataLoaderWrapperTest.java deleted file mode 100644 index aed09a1..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PolishDataLoaderWrapperTest.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader_wrapper; - -import com.evalkit.framework.eval.node.dataloader_wrapper.config.PolishDataLoaderWrapperConfig; - -class PolishDataLoaderWrapperTest { - void test() { - PolishDataLoaderWrapper polishDataLoaderWrapper = new PolishDataLoaderWrapper( - PolishDataLoaderWrapperConfig.builder().build() - ) { - @Override - public String selectField() { - return ""; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PromptDataLoaderWrapperTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PromptDataLoaderWrapperTest.java deleted file mode 100644 index 1a02410..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/PromptDataLoaderWrapperTest.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader_wrapper; - -import com.evalkit.framework.eval.node.dataloader_wrapper.config.DataLoaderWrapperConfig; - -class PromptDataLoaderWrapperTest { - void test() { - PromptDataLoaderWrapper promptDataLoaderWrapper = new PromptDataLoaderWrapper( - DataLoaderWrapperConfig.builder().build() - ) { - @Override - public String preparePrompt() { - return ""; - } - - @Override - public String selectField() { - return ""; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/DateMockerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/DateMockerTest.java deleted file mode 100644 index 7677d88..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/DateMockerTest.java +++ /dev/null @@ -1,115 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader_wrapper.mock.mocker; - -import com.evalkit.framework.common.utils.time.DateUtils; -import com.evalkit.framework.eval.mock.mocker.DateMocker; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.RepeatedTest; -import org.junit.jupiter.api.Test; - -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Collections; -import java.util.Date; - -import static org.junit.jupiter.api.Assertions.*; - -@Slf4j -class DateMockerTest { - private final DateMocker mocker = new DateMocker(); - - @Test - void testSupportRuleName() { - assertTrue(mocker.support("date", null)); - assertTrue(mocker.support("future_date", null)); - assertTrue(mocker.support("past_date", null)); - assertFalse(mocker.support("random_string", null)); - } - - @Test - void testNowStrategyDefaultPattern() throws ParseException { - String result = mocker.mock("date", Collections.emptyList()); - assertNotNull(result); - new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(result); - } - - @Test - void testNowStrategyCustomPattern() throws ParseException { - String pattern = "yyyy/MM/dd"; - String result = mocker.mock("date", Collections.singletonList(pattern)); - assertNotNull(result); - new SimpleDateFormat(pattern).parse(result); - } - - @RepeatedTest(100) - void testFutureDateWithinRange() throws ParseException { - String result = mocker.mock("future_date", Arrays.asList("15", "365")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), 14))); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), 366))); - - } - - @RepeatedTest(100) - void testPastDateWithinRange() throws ParseException { - String result = mocker.mock("past_date", Arrays.asList("15", "365")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), -14))); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), -366))); - } - - @RepeatedTest(100) - void testFutureDateWithCustomPattern() throws ParseException { - String result = mocker.mock("future_date", Arrays.asList("366", "yyyy/MM/dd")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), 366))); - } - - @RepeatedTest(100) - void testPastDateWithCustomPattern() throws ParseException { - String result = mocker.mock("past_date", Arrays.asList("365", "yyyy/MM/dd")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), -366))); - } - - @RepeatedTest(100) - void testFutureDateWithinRangeWithCustomPattern() throws ParseException { - String result = mocker.mock("future_date", Arrays.asList("15", "365", "yyyy/MM/dd")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), 14))); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), 366))); - } - - @RepeatedTest(100) - void testPastDateWithinRangeWithCustomPattern() throws ParseException { - String result = mocker.mock("past_date", Arrays.asList("15", "365", "yyyy/MM/dd")); - log.info("result:{}", result); - assertNotNull(result); - SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); - assertTrue(sdf.parse(result).before(DateUtils.addDays(new Date(), -14))); - assertTrue(sdf.parse(result).after(DateUtils.addDays(new Date(), -366))); - } - - @Test - void testInvalidArgsThrowsException() { - IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, - () -> mocker.mock("future_date", Arrays.asList("abc", "xyz"))); - log.info(ex.getMessage()); - assertTrue(ex.getMessage().contains("Error parsing args")); - } - - @Test - void testUnsupportedRuleReturnsNull() { - assertNull(mocker.mock("unknown_rule", Collections.emptyList())); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/NumberMockerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/NumberMockerTest.java deleted file mode 100644 index 353865a..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/dataloader_wrapper/mock/mocker/NumberMockerTest.java +++ /dev/null @@ -1,164 +0,0 @@ -package com.evalkit.framework.eval.node.dataloader_wrapper.mock.mocker; - -import com.evalkit.framework.eval.mock.mocker.NumberMocker; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.RepeatedTest; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.Collections; - -import static org.junit.jupiter.api.Assertions.*; - -@Slf4j -class NumberMockerTest { - private final NumberMocker mocker = new NumberMocker(); - - @Test - void testSupportRuleName() { - assertTrue(mocker.support("int", null)); - assertTrue(mocker.support("INT", null)); - assertTrue(mocker.support("float", null)); - assertTrue(mocker.support("FLOAT", null)); - assertFalse(mocker.support("string", null)); - assertFalse(mocker.support("date", null)); - } - - @Test - void testIntegerDefaultRange() { - String result = mocker.mock("int", Collections.emptyList()); - assertNotNull(result); - int value = Integer.parseInt(result); - assertTrue(value >= 0 && value <= 100, "Value should be between 0 and 100"); - log.info("Random int (default range): {}", result); - } - - @RepeatedTest(50) - void testIntegerWithMinValue() { - String result = mocker.mock("int", Collections.singletonList("50")); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= 50 && value <= 100, "Value should be between 50 and 100"); - log.info("Random int (min=50): {}", result); - } - - @RepeatedTest(50) - void testIntegerWithRange() { - String result = mocker.mock("int", Arrays.asList("100", "200")); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= 100 && value <= 200, "Value should be between 100 and 200"); - log.info("Random int (100-200): {}", result); - } - - @Test - void testFloatDefaultRange() { - String result = mocker.mock("float", Collections.emptyList()); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= 0.0 && value < 100.0, "Value should be between 0.0 and 100.0"); - log.info("Random float (default range): {}", result); - } - - @RepeatedTest(50) - void testFloatWithMinValue() { - String result = mocker.mock("float", Collections.singletonList("10.5")); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= 10.5 && value < 100.0, "Value should be between 10.5 and 100.0"); - log.info("Random float (min=10.5): {}", result); - } - - @RepeatedTest(50) - void testFloatWithRange() { - String result = mocker.mock("float", Arrays.asList("5.5", "15.5")); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= 5.5 && value < 15.5, "Value should be between 5.5 and 15.5"); - log.info("Random float (5.5-15.5): {}", result); - } - - @RepeatedTest(50) - void testNegativeIntegerRange() { - String result = mocker.mock("int", Arrays.asList("-100", "-10")); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= -100 && value <= -10, "Value should be between -100 and -10"); - log.info("Random int (negative range): {}", result); - } - - @RepeatedTest(50) - void testNegativeFloatRange() { - String result = mocker.mock("float", Arrays.asList("-50.5", "-10.5")); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= -50.5 && value < -10.5, "Value should be between -50.5 and -10.5"); - log.info("Random float (negative range): {}", result); - } - - @Test - void testZeroValue() { - String result = mocker.mock("int", Arrays.asList("0", "0")); - assertNotNull(result); - long value = Long.parseLong(result); - assertEquals(0, value, "Value should be 0"); - log.info("Random int (0-0): {}", result); - } - - @Test - void testLargeIntegerValue() { - String result = mocker.mock("int", Arrays.asList("1000000", "2000000")); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= 1000000 && value <= 2000000, "Value should be in range"); - log.info("Random int (large range): {}", result); - } - - @Test - void testInvalidIntegerArgsThrowsException() { - IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, - () -> mocker.mock("int", Collections.singletonList("abc"))); - log.info(ex.getMessage()); - assertTrue(ex.getMessage().contains("Error parsing args")); - } - - @Test - void testInvalidFloatArgsThrowsException() { - IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, - () -> mocker.mock("float", Arrays.asList("10.5", "abc"))); - log.info(ex.getMessage()); - assertTrue(ex.getMessage().contains("Error parsing args")); - } - - @Test - void testTooManyArgsThrowsException() { - IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, - () -> mocker.mock("int", Arrays.asList("10", "20", "30", "40"))); - log.info(ex.getMessage()); - assertTrue(ex.getMessage().contains("Error parsing args")); - } - - @Test - void testUnsupportedRuleReturnsNull() { - assertNull(mocker.mock("unknown_rule", Collections.emptyList())); - } - - @Test - void testIntegerCaseInsensitive() { - String result = mocker.mock("INT", Collections.emptyList()); - assertNotNull(result); - long value = Long.parseLong(result); - assertTrue(value >= 0 && value <= 100, "Value should be between 0 and 100"); - log.info("Random INT (case-insensitive): {}", result); - } - - @Test - void testFloatCaseInsensitive() { - String result = mocker.mock("FLOAT", Collections.emptyList()); - assertNotNull(result); - double value = Double.parseDouble(result); - assertTrue(value >= 0.0 && value < 100.0, "Value should be between 0.0 and 100.0"); - log.info("Random FLOAT (case-insensitive): {}", result); - } -} - diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java deleted file mode 100644 index 6c8a9b4..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ApiReporterTest.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.evalkit.framework.eval.node.reporter; - -import com.evalkit.framework.eval.model.DataItem; - -import java.util.Collections; -import java.util.Map; - -class ApiReporterTest { - void test() { - String host = "http://localhost:8080"; - String api = "/api/test"; - String method = "POST"; - ApiReporter apiReporter = new ApiReporter(host, api, method) { - @Override - public Map prepareBody(DataItem item) { - return Collections.emptyMap(); - } - - @Override - public Map prepareHeader(DataItem item) { - return Collections.emptyMap(); - } - - @Override - public Map prepareParams(DataItem item) { - return Collections.emptyMap(); - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java deleted file mode 100644 index 79e81c3..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JdbcReportTest.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.evalkit.framework.eval.node.reporter; - -class JdbcReportTest { - void test() { - String driver = "com.mysql.cj.jdbc.Driver"; - String url = "jdbc:mysql://127.0.0.1:3306/evalkit?useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai"; - String username = "root"; - String password = "123456"; - JdbcReport jdbcReport = new JdbcReport(driver, url, username, password) { - @Override - public String prepareTableName() { - return ""; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JsonReporterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JsonReporterTest.java deleted file mode 100644 index f8fa01f..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/JsonReporterTest.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.evalkit.framework.eval.node.reporter; - -import static org.junit.jupiter.api.Assertions.*; - -class JsonReporterTest { - void test(){ - JsonReporter jsonReporter = new JsonReporter("test.json"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ReporterTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ReporterTest.java deleted file mode 100644 index d0c4169..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/reporter/ReporterTest.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.evalkit.framework.eval.node.reporter; - -import com.evalkit.framework.eval.model.ReportData; - -import java.io.IOException; - -class ReporterTest { - void test() { - Reporter reporter = new Reporter() { - @Override - protected void report(ReportData reportData) throws IOException { - - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/DifyWorkflowScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/DifyWorkflowScorerTest.java deleted file mode 100644 index e46d42a..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/DifyWorkflowScorerTest.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.scorer.config.DifyWorkflowScorerConfig; -import org.junit.jupiter.api.Test; - -import java.util.Collections; -import java.util.Map; - -class DifyWorkflowScorerTest { - void test() { - DifyWorkflowScorer difyWorkflowScorer = new DifyWorkflowScorer( - DifyWorkflowScorerConfig.builder().build() - ) { - @Override - public Map prepareInputParams(InputData inputData, ApiCompletionResult apiCompletionResult) { - return Collections.emptyMap(); - } - - @Override - public ScorerResult prepareScorerResult(InputData inputData, ApiCompletionResult apiCompletionResult, Map outputs) { - return null; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java deleted file mode 100644 index 692064b..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/GSBScorerTest.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.scorer.config.PromptBasedScorerConfig; -import com.evalkit.framework.infra.service.llm.DeepSeekLLMService; -import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; -import com.evalkit.framework.infra.service.llm.config.DeepseekLLMServiceConfig; -import com.evalkit.framework.infra.service.llm.config.LLMServiceConfig; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -@Slf4j -class GSBScorerTest { - LLMService llmService; - - @BeforeEach - void setUp() { - String deepSeekToken = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "deepseek-token"); - LLMServiceFactory.registerLLMService("DeepSeek_Test", (LLMServiceFactory.LLMServiceBuilder) config -> new DeepSeekLLMService((DeepseekLLMServiceConfig) config)); - DeepseekLLMServiceConfig config = DeepseekLLMServiceConfig.builder() - .apiToken(deepSeekToken) - .build(); - llmService = LLMServiceFactory.createLLMService("DeepSeek_Test", config); - } - - @Test - void test() { - GSBScorer gsbScorer = new GSBScorer( - PromptBasedScorerConfig.builder() - .llmService(llmService) - .build() - ) { - @Override - public String prepareGoldAnswer(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "乔垃斯是矎囜人"; - } - - @Override - public String prepareCandidateAnswer(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "乔垃矎囜人"; - } - - @Override - public String prepareInput(InputData inputData, ApiCompletionResult apiCompletionResult) { - return "乔垃斯是非掲人"; - } - }; - DataItem dataItem = new DataItem(); - dataItem.setInputData(new InputData()); - dataItem.setApiCompletionResult(new ApiCompletionResult()); - ScorerResult scorerResult = gsbScorer.eval(dataItem); - log.error("scorerResult:{}", scorerResult); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/MultiCheckerBasedScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/MultiCheckerBasedScorerTest.java deleted file mode 100644 index 950e839..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/MultiCheckerBasedScorerTest.java +++ /dev/null @@ -1,167 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.begin.config.BeginConfig; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.eval.node.reporter.StdReporter; -import com.evalkit.framework.eval.node.scorer.checker.AbstractChecker; -import com.evalkit.framework.eval.node.scorer.checker.Checker; -import com.evalkit.framework.eval.node.scorer.checker.config.CheckerConfig; -import com.evalkit.framework.eval.node.scorer.checker.constants.CheckMethod; -import com.evalkit.framework.eval.node.scorer.checker.model.CheckItem; -import com.evalkit.framework.eval.node.scorer.strategy.AvgScoreRateStrategy; -import com.evalkit.framework.workflow.WorkflowBuilder; -import org.junit.jupiter.api.Test; - -import java.util.List; - -/** - * 倚检查噚评䌰噚测试类 - */ -class MultiCheckerBasedScorerTest { - - /** - * 必过检查噚 - */ - class StarChecker extends AbstractChecker { - - public StarChecker() { - } - - public StarChecker(CheckerConfig config) { - super(config); - } - - /* 必过检查项 */ - private final CheckItem starCheckItem = CheckItem.builder() - .name("starCheckItem") - .star(true) - .build(); - /* 䞀般检查项 */ - private final CheckItem normalCheckItem = CheckItem.builder() - .name("normalCheckItem") - .star(false) - .build(); - - @Override - protected List prepareCheckItems(DataItem dataItem) { - return ListUtils.of( - starCheckItem, normalCheckItem - ); - } - - @Override - protected void check(DataItem dataItem) { - // 暡拟必过项没过,普通项通过 - starCheckItem.setScore(0); - starCheckItem.setExecuted(true); - starCheckItem.setReason("䞍通过"); - starCheckItem.setCheckMethod(CheckMethod.RULE); - - normalCheckItem.setScore(1); - normalCheckItem.setExecuted(true); - normalCheckItem.setReason("通过"); - normalCheckItem.setCheckMethod(CheckMethod.RULE); - } - - @Override - public boolean support(DataItem dataItem) { - return true; - } - - @Override - public double getTotalScore() { - return 2; - } - } - - /** - * 普通检查噚 - */ - class NormalChecker extends AbstractChecker { - - public NormalChecker() { - } - - public NormalChecker(CheckerConfig config) { - super(config); - } - - /* 䞀般检查项 */ - private final CheckItem normalCheckItem = CheckItem.builder() - .name("normalCheckItem") - .star(false) - .build(); - - @Override - protected List prepareCheckItems(DataItem dataItem) { - return ListUtils.of( - normalCheckItem - ); - } - - @Override - protected void check(DataItem dataItem) { - normalCheckItem.setScore(1); - normalCheckItem.setExecuted(true); - normalCheckItem.setReason("通过"); - normalCheckItem.setCheckMethod(CheckMethod.RULE); - } - - @Override - public boolean support(DataItem dataItem) { - return true; - } - - @Override - public double getTotalScore() { - return 1; - } - } - - /** - * 自定义评䌰噚 - */ - class CustomScorer extends MultiCheckerBasedScorer { - @Override - public List prepareCheckers(DataItem dataItem) { - return ListUtils.of( - new StarChecker( - CheckerConfig.builder().name("StarChecker").star(true).totalScore(2).build() - ), - new NormalChecker( - CheckerConfig.builder().name("NormalChecker").star(false).totalScore(2).build() - ) - ); - } - } - - @Test - void test() { - Begin begin = new Begin( - BeginConfig.builder() - .threshold(0.5) - .scoreStrategy(new AvgScoreRateStrategy()) - .build() - ); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() throws Exception { - return ListUtils.of( - new InputData(MapUtils.of("query", "1")) - ); - } - }; - - CustomScorer customScorer = new CustomScorer(); - - StdReporter stdReporter = new StdReporter(); - - new WorkflowBuilder().link(begin, dataLoader, customScorer, stdReporter).build().execute(); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java deleted file mode 100644 index b7b9bb1..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/PromptBasedScorerTest.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.scorer.config.PromptBasedScorerConfig; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; - -class PromptBasedScorerTest { - void test() { - PromptBasedScorer promptBasedScorer = new PromptBasedScorer( - PromptBasedScorerConfig.builder() - .llmService(LLMServiceFactory.createLLMService("test", null)) - .build() - ) { - @Override - public String prepareSysPrompt() { - return ""; - } - - @Override - public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { - return ""; - } - - @Override - public LLMResult parseLLMReply(String reply) { - return null; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RouterScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RouterScorerTest.java deleted file mode 100644 index 3070501..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RouterScorerTest.java +++ /dev/null @@ -1,577 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.begin.config.BeginConfig; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.eval.node.reporter.StdReporter; -import com.evalkit.framework.eval.node.scorer.config.RouterScorerConfig; -import com.evalkit.framework.eval.node.scorer.config.ScorerConfig; -import com.evalkit.framework.eval.node.scorer.model.ScorerRoute; -import com.evalkit.framework.eval.node.scorer.strategy.SumScoreStrategy; -import com.evalkit.framework.workflow.WorkflowBuilder; -import com.evalkit.framework.workflow.model.WorkflowContext; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assertions.*; - -/** - * 方案B{@link RouterScorer} 路由评䌰噚的单元测试。 - * - *

测试芆盖 - *

    - *
  • 构造校验routes 䞺空时抛出 IllegalArgumentException
  • - *
  • first-match 暡匏呜䞭第䞀条规则后续规则䞍执行
  • - *
  • first-match 暡匏无路由呜䞭䞔无兜底返回跳过结果
  • - *
  • first-match 暡匏无路由呜䞭䜆有兜底 Scorer委托兜底执行
  • - *
  • match-all 暡匏所有呜䞭规则均执行结果取平均
  • - *
  • match-all 暡匏无呜䞭时返回跳过结果
  • - *
  • {@link ScorerRoute#of} 工厂方法
  • - *
  • {@link ScorerRoute#matches} 逻蟑
  • - *
  • 端到端䞉场景数据集RouterScorer 单节点完成所有场景分流
  • - *
- *

- */ -@DisplayName("方案B - RouterScorer 路由评䌰噚") -class RouterScorerTest { - - // ───────────────────────── 蟅助 Builder ───────────────────────── - - /** - * 构造䞀䞪固定返回 returnScore 的简单 Scorer䞍垊 condition - */ - private Scorer fixedScorer(String metric, double returnScore, double totalScore) { - ScorerConfig cfg = ScorerConfig.builder() - .metricName(metric) - .totalScore(totalScore) - .build(); - return new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult(metric, returnScore, totalScore, metric + " 评䌰结果"); - } - }; - } - - /** - * 构造垊 WorkflowContext 的 DataItem - */ - private DataItem buildDataItem(long index, String scene, Scorer scorer) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - WorkflowContextOps.setThreshold(ctx, 0.0); - scorer.setWorkflowContext(ctx); - - DataItem item = new DataItem(); - item.setDataIndex(index); - item.setInputData(new InputData(index, MapUtils.of("scene", scene))); - return item; - } - - // ═══════════════════════════════════════════════════════════════ - // 构造校验 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("构造校验") - class ConstructorValidationTest { - - @Test - @DisplayName("routes 䞺 null 时抛出 IllegalArgumentException") - void nullRoutes_throwsIllegalArgument() { - assertThatThrownBy(() -> new RouterScorer( - RouterScorerConfig.builder() - .metricName("路由评䌰") - .routes(null) - .build() - )).isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("routes"); - } - - @Test - @DisplayName("routes 䞺空列衚时抛出 IllegalArgumentException") - void emptyRoutes_throwsIllegalArgument() { - assertThatThrownBy(() -> new RouterScorer( - RouterScorerConfig.builder() - .metricName("路由评䌰") - .routes(java.util.Collections.emptyList()) - .build() - )).isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("routes"); - } - } - - // ═══════════════════════════════════════════════════════════════ - // ScorerRoute 工具方法 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("ScorerRoute") - class ScorerRouteTest { - - @Test - @DisplayName("of() 工厂方法构造正确") - void of_buildsRouteCorrectly() { - Scorer scorer = fixedScorer("m", 1.0, 1.0); - ScorerRoute route = ScorerRoute.of(item -> true, scorer, "测试路由"); - - assertEquals("测试路由", route.getRouteName()); - assertNotNull(route.getCondition()); - assertSame(scorer, route.getScorer()); - } - - @Test - @DisplayName("matches() 条件䞺 true 时返回 true") - void matches_conditionTrue_returnsTrue() { - ScorerRoute route = ScorerRoute.of( - item -> "chat".equals(item.getInputData().get("scene")), - fixedScorer("m", 1.0, 1.0), - "对话场景" - ); - DataItem item = new DataItem(); - item.setInputData(new InputData(MapUtils.of("scene", "chat"))); - assertTrue(route.matches(item)); - } - - @Test - @DisplayName("matches() 条件䞺 false 时返回 false") - void matches_conditionFalse_returnsFalse() { - ScorerRoute route = ScorerRoute.of( - item -> "chat".equals(item.getInputData().get("scene")), - fixedScorer("m", 1.0, 1.0), - "对话场景" - ); - DataItem item = new DataItem(); - item.setInputData(new InputData(MapUtils.of("scene", "search"))); - assertFalse(route.matches(item)); - } - } - - // ═══════════════════════════════════════════════════════════════ - // first-match 暡匏默讀 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("first-match 暡匏默讀") - class FirstMatchModeTest { - - @Test - @DisplayName("呜䞭第䞀条规则返回该规则的 Scorer 结果") - void firstMatch_hitsFirstRoute_returnsFirstResult() throws Exception { - Scorer chatScorer = fixedScorer("对话莚量", 0.8, 1.0); - Scorer searchScorer = fixedScorer("搜玢盞关性", 0.6, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评䌰") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), searchScorer, "搜玢") - )) - .build()); - - DataItem chatItem = buildDataItem(1L, "chat", router); - ScorerResult result = router.eval(chatItem); - - assertEquals("对话莚量", result.getMetric()); - assertEquals(0.8, result.getScore(), 1e-6); - assertEquals("对话莚量 评䌰结果", result.getReason()); - } - - @Test - @DisplayName("呜䞭第二条规则第䞀条未呜䞭返回第二条规则的结果") - void firstMatch_hitsSecondRoute_returnsSecondResult() throws Exception { - Scorer chatScorer = fixedScorer("对话莚量", 0.8, 1.0); - Scorer searchScorer = fixedScorer("搜玢盞关性", 0.6, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评䌰") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), searchScorer, "搜玢") - )) - .build()); - - DataItem searchItem = buildDataItem(2L, "search", router); - ScorerResult result = router.eval(searchItem); - - assertEquals("搜玢盞关性", result.getMetric()); - assertEquals(0.6, result.getScore(), 1e-6); - } - - @Test - @DisplayName("无路由呜䞭䞔无兜底返回跳过结果score=0, totalScore=0") - void firstMatch_noMatchNoDefault_returnsSkipResult() throws Exception { - Scorer chatScorer = fixedScorer("对话莚量", 0.8, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评䌰") - .routes(ListUtils.of( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话") - )) - .build()); - - DataItem unknownItem = buildDataItem(3L, "unknown", router); - ScorerResult result = router.eval(unknownItem); - - assertEquals("skipped by condition", result.getReason()); - assertEquals(0.0, result.getScore(), 1e-6); - assertEquals(0.0, result.getTotalScore(), 1e-6); - assertTrue(result.isSuccess()); - assertTrue(result.isPass()); - } - - @Test - @DisplayName("无路由呜䞭䜆有兜底 Scorer委托兜底执行") - void firstMatch_noMatchWithDefault_delegatesToDefaultScorer() throws Exception { - Scorer chatScorer = fixedScorer("对话莚量", 0.8, 1.0); - Scorer fallbackScorer = fixedScorer("兜底评䌰", 0.3, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评䌰") - .routes(ListUtils.of( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话") - )) - .defaultScorer(fallbackScorer) - .build()); - - DataItem unknownItem = buildDataItem(4L, "unknown", router); - ScorerResult result = router.eval(unknownItem); - - assertEquals("兜底评䌰", result.getMetric()); - assertEquals(0.3, result.getScore(), 1e-6); - } - - @Test - @DisplayName("倚条规则均呜䞭时only 第䞀条规则生效first-match 语义") - void firstMatch_multipleRoutesMatch_onlyFirstTaken() throws Exception { - Scorer scorer1 = fixedScorer("指标1", 0.9, 1.0); - Scorer scorer2 = fixedScorer("指标2", 0.5, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评䌰") - .routes(Arrays.asList( - ScorerRoute.of(item -> true, scorer1, "党匹配1"), // 始终呜䞭 - ScorerRoute.of(item -> true, scorer2, "党匹配2") // 也始终呜䞭 - )) - .matchAll(false) - .build()); - - DataItem item = buildDataItem(5L, "any", router); - ScorerResult result = router.eval(item); - - // first-match: 只取第䞀条 - assertEquals("指标1", result.getMetric()); - assertEquals(0.9, result.getScore(), 1e-6); - } - } - - // ═══════════════════════════════════════════════════════════════ - // match-all 暡匏 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("match-all 暡匏") - class MatchAllModeTest { - - @Test - @DisplayName("倚条规则均呜䞭结果取所有呜䞭 Scorer 的平均分") - void matchAll_allRouteMatch_returnsAvgScore() throws Exception { - Scorer scorer1 = fixedScorer("指标1", 0.8, 1.0); - Scorer scorer2 = fixedScorer("指标2", 0.6, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("倚绎评䌰") - .routes(Arrays.asList( - ScorerRoute.of(item -> true, scorer1, "绎床1"), - ScorerRoute.of(item -> true, scorer2, "绎床2") - )) - .matchAll(true) - .build()); - - DataItem item = buildDataItem(1L, "any", router); - ScorerResult result = router.eval(item); - - // 平均分 = (0.8 + 0.6) / 2 = 0.7 - assertEquals("倚绎评䌰", result.getMetric()); - assertThat(result.getScore()).isCloseTo(0.7, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("只有郚分规则呜䞭只对呜䞭规则求平均") - void matchAll_partialMatch_averagesMatchedOnly() throws Exception { - Scorer scorer1 = fixedScorer("指标1", 1.0, 1.0); - Scorer scorer2 = fixedScorer("指标2", 0.0, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("郚分匹配") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), scorer1, "对话"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), scorer2, "搜玢") - )) - .matchAll(true) - .build()); - - // scene=chat 只呜䞭第䞀条规则 - DataItem chatItem = buildDataItem(1L, "chat", router); - ScorerResult result = router.eval(chatItem); - - // 只有 scorer1 呜䞭score = 1.0 - assertThat(result.getScore()).isCloseTo(1.0, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("match-all 无呜䞭时返回跳过结果") - void matchAll_noMatch_returnsSkipResult() throws Exception { - Scorer scorer = fixedScorer("指标", 1.0, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("无呜䞭") - .routes(ListUtils.of( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), scorer, "对话") - )) - .matchAll(true) - .build()); - - DataItem item = buildDataItem(1L, "unknown", router); - ScorerResult result = router.eval(item); - - assertEquals("skipped by condition", result.getReason()); - assertEquals(0.0, result.getTotalScore(), 1e-6); - } - - @Test - @DisplayName("match-all 理由拌接了所有呜䞭路由的 metric 和 reason") - void matchAll_reasonContainsAllMatchedMetrics() throws Exception { - Scorer scorer1 = fixedScorer("指标1", 0.9, 1.0); - Scorer scorer2 = fixedScorer("指标2", 0.7, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("倚绎路由") - .routes(Arrays.asList( - ScorerRoute.of(item -> true, scorer1, "绎床1"), - ScorerRoute.of(item -> true, scorer2, "绎床2") - )) - .matchAll(true) - .build()); - - DataItem item = buildDataItem(1L, "any", router); - ScorerResult result = router.eval(item); - - assertThat(result.getReason()).contains("指标1").contains("指标2"); - } - } - - // ═══════════════════════════════════════════════════════════════ - // workflowContext 䌠递校验 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("workflowContext 䌠递") - class WorkflowContextPropagationTest { - - @Test - @DisplayName("子 Scorer 圚 eval 时可访问 RouterScorer 的 workflowContext") - void subScorer_receivesWorkflowContext() throws Exception { - // 子 Scorer 通过 getWorkflowContext() 读取 threshold 做断蚀 - final double[] capturedThreshold = {-1}; - ScorerConfig cfg = ScorerConfig.builder().metricName("䞊䞋文校验").totalScore(1.0).build(); - Scorer contextAwareScorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - capturedThreshold[0] = WorkflowContextOps.getThreshold(getWorkflowContext()); - return new ScorerResult("䞊䞋文校验", 1.0, 1.0, "OK"); - } - }; - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("路由评䌰") - .routes(ListUtils.of( - ScorerRoute.of(item -> true, contextAwareScorer, "党匹配") - )) - .build()); - - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - WorkflowContextOps.setThreshold(ctx, 0.75); // 讟眮特定阈倌 - router.setWorkflowContext(ctx); - - DataItem item = new DataItem(); - item.setDataIndex(1L); - item.setInputData(new InputData(MapUtils.of("x", "y"))); - - router.eval(item); - - // 验证子 Scorer 拿到了正确的 threshold - assertEquals(0.75, capturedThreshold[0], 1e-6); - } - } - - // ═══════════════════════════════════════════════════════════════ - // 端到端集成测试通过 WorkflowBuilder - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("端到端RouterScorer + WorkflowBuilder") - class EndToEndTest { - - /** - * 数据集包含 chat/search/rag 䞉种场景各䞀条 - * RouterScorer 单节点通过 first-match 完成分流。 - * 验证每䞪 DataItem 的 EvalResult 分数来自对应场景的 Scorer。 - */ - @Test - @DisplayName("䞉场景数据集单䞪 RouterScorer 节点完成所有场景分流") - void endToEnd_threeScenes_singleRouterNode() { - Scorer chatScorer = fixedScorer("对话莚量", 0.8, 1.0); - Scorer searchScorer = fixedScorer("搜玢盞关性", 0.7, 1.0); - Scorer ragScorer = fixedScorer("RAG准确率", 0.9, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("场景路由评䌰") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话场景"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), searchScorer, "搜玢场景"), - ScorerRoute.of(item -> "rag".equals(item.getInputData().get("scene")), ragScorer, "RAG场景") - )) - .build()); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .threshold(0) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of( - new InputData(MapUtils.of("scene", "chat", "query", "䜠奜")), - new InputData(MapUtils.of("scene", "search", "query", "搜玢词")), - new InputData(MapUtils.of("scene", "rag", "query", "文档问题")) - ); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, router, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - List dataItems = WorkflowContextOps.getDataItems(ctx); - assertThat(dataItems).hasSize(3); - - DataItem chatItem = dataItems.stream() - .filter(d -> "chat".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - assertThat(chatItem.getEvalResult().getScore()).isCloseTo(0.8, org.assertj.core.data.Offset.offset(1e-6)); - // 验证 metric 是对话莚量由 chatScorer 的结果写入 - assertThat(chatItem.getEvalResult().getScorerResults().get(0).getMetric()).isEqualTo("对话莚量"); - - DataItem searchItem = dataItems.stream() - .filter(d -> "search".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - assertThat(searchItem.getEvalResult().getScore()).isCloseTo(0.7, org.assertj.core.data.Offset.offset(1e-6)); - - DataItem ragItem = dataItems.stream() - .filter(d -> "rag".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - assertThat(ragItem.getEvalResult().getScore()).isCloseTo(0.9, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("RouterScorer + 通甹 Scorer 䞲联通甚 Scorer 对所有 DataItem 生效路由 Scorer 按场景分流") - void endToEnd_routerPlusUniversalScorer() { - // 通甹 Scorer无 condition - Scorer universalScorer = fixedScorer("通甚栌匏检查", 0.5, 1.0); - - // 路由 Scorer - Scorer chatScorer = fixedScorer("对话莚量", 0.8, 1.0); - Scorer searchScorer = fixedScorer("搜玢盞关性", 0.6, 1.0); - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("场景路由") - .routes(Arrays.asList( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话"), - ScorerRoute.of(item -> "search".equals(item.getInputData().get("scene")), searchScorer, "搜玢") - )) - .build()); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of( - new InputData(MapUtils.of("scene", "chat")), - new InputData(MapUtils.of("scene", "search")) - ); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, universalScorer, router, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - List dataItems = WorkflowContextOps.getDataItems(ctx); - - DataItem chatItem = dataItems.stream() - .filter(d -> "chat".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - // chat: universalScorer(0.5) + chatScorer(0.8) = 1.3 - assertThat(chatItem.getEvalResult().getScore()).isCloseTo(1.3, org.assertj.core.data.Offset.offset(1e-6)); - - DataItem searchItem = dataItems.stream() - .filter(d -> "search".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - // search: universalScorer(0.5) + searchScorer(0.6) = 1.1 - assertThat(searchItem.getEvalResult().getScore()).isCloseTo(1.1, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("未知场景数据䜿甚 defaultScorer 兜底") - void endToEnd_unknownScene_defaultScorerApplied() { - Scorer fallback = fixedScorer("兜底评䌰", 0.1, 1.0); - Scorer chatScorer = fixedScorer("对话莚量", 0.8, 1.0); - - RouterScorer router = new RouterScorer(RouterScorerConfig.builder() - .metricName("场景路由") - .routes(ListUtils.of( - ScorerRoute.of(item -> "chat".equals(item.getInputData().get("scene")), chatScorer, "对话") - )) - .defaultScorer(fallback) - .build()); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of(new InputData(MapUtils.of("scene", "unknown"))); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, router, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - // 未知场景呜䞭 defaultScorer分数=0.1 - assertThat(item.getEvalResult().getScore()).isCloseTo(0.1, org.assertj.core.data.Offset.offset(1e-6)); - assertThat(item.getEvalResult().getScorerResults().get(0).getMetric()).isEqualTo("兜底评䌰"); - } - } -} - diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java index 3173506..7bb27bf 100644 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java +++ b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/RubricBasedScorerTest.java @@ -19,7 +19,6 @@ import com.evalkit.framework.eval.node.scorer.model.RubricMergeStrategy; import com.evalkit.framework.eval.node.scorer.model.RubricScoreType; import com.evalkit.framework.infra.service.llm.LLMService; -import com.evalkit.framework.infra.utils.DebugUtils; import com.evalkit.framework.workflow.Workflow; import com.evalkit.framework.workflow.WorkflowBuilder; import lombok.extern.slf4j.Slf4j; @@ -36,20 +35,6 @@ import static org.junit.jupiter.api.Assertions.*; -/** - * RubricBasedScorer 单元测试 - *

- * 测试芆盖 - *

    - *
  • 配眮校验validRubricConfig
  • - *
  • 五种合并策略WEIGHTED_AVERAGE / SIMPLE_AVERAGE / LOGICAL_AND / STAR_GATE / COMPLETION_RATE
  • - *
  • 二元分区制纊束BINARY scoreType
  • - *
  • 園䞀化公匏minScore > 0 的区闎園䞀化
  • - *
  • 倚次采样取均倌 + 代衚性采样保留
  • - *
  • extra 字段透䌠
  • - *
  • 采样党倱莥时抛匂垞
  • - *
- */ @Slf4j class RubricBasedScorerTest { @@ -978,10 +963,13 @@ void minScoreGtZero_starGate_triggersZero() { // ==================== 真实铟路 ==================== @Test - @DisplayName("真实铟路") + @DisplayName("真实铟路mock LLM") void realLink() { - LLMService llm = DebugUtils.buildLLMService(); - // LLMService llm = mockLLMSequence(cotJson(1, "最差"), cotJson(5, "最奜")); + // 䜿甚 mock LLMService 替代真实 DeepSeek 服务䞍䟝赖倖郚 token 或 HTTP 请求 + // criteriaBatchSize=2每次 LLM 调甚需返回包含 2 䞪绎床评分结果的 JSON 数组 + // 3 条数据 × 1 次批量调甚2 䞪绎床合并䞺䞀次 = 3 次 LLM 调甚 + String batchCotJson = "[" + cotJson(4, "回倍莚量良奜") + "," + cotJson(5, "内容安党") + "]"; + LLMService llm = mockLLMSequence(batchCotJson, batchCotJson, batchCotJson); // 匀始节点 Begin begin = new Begin(); diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerConditionTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerConditionTest.java deleted file mode 100644 index 2002aff..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerConditionTest.java +++ /dev/null @@ -1,468 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.EvalResult; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.begin.Begin; -import com.evalkit.framework.eval.node.begin.config.BeginConfig; -import com.evalkit.framework.eval.node.dataloader.DataLoader; -import com.evalkit.framework.eval.node.reporter.StdReporter; -import com.evalkit.framework.eval.node.scorer.config.ScorerConfig; -import com.evalkit.framework.eval.node.scorer.strategy.SumScoreStrategy; -import com.evalkit.framework.workflow.WorkflowBuilder; -import com.evalkit.framework.workflow.model.WorkflowContext; -import org.junit.jupiter.api.DisplayName; -import org.junit.jupiter.api.Nested; -import org.junit.jupiter.api.Test; - -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.*; - -/** - * 方案AScorerConfig.condition 场景路由条件的单元测试。 - * - *

测试芆盖 - *

    - *
  • {@link Scorer#shouldEval} 条件䞺 null 时始终执行
  • - *
  • {@link Scorer#shouldEval} 条件呜䞭时执行未呜䞭时跳过
  • - *
  • {@link Scorer#buildSkipResult} 跳过结果的各字段正确性
  • - *
  • 通过 WorkflowBuilder 的端到端集成倚 Scorer 按 scene 字段分流互䞍干扰
  • - *
  • 跳过结果的 totalScore=0 䞍圱响汇总分数
  • - *
  • skipScore 自定义倌被写入跳过结果
  • - *
- *

- */ -@DisplayName("方案A - Scorer condition 场景条件过滀") -class ScorerConditionTest { - - // ───────────────────────── 蟅助 Builder ───────────────────────── - - /** - * 构造䞀䞪固定返回 returnScore 的简单 Scorer可携垊 condition - */ - private Scorer buildScorer(String metric, double returnScore, double totalScore, - java.util.function.Function condition) { - ScorerConfig cfg = ScorerConfig.builder() - .metricName(metric) - .totalScore(totalScore) - .condition(condition) - .build(); - return new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult(metric, returnScore, totalScore, "正垞评䌰结果"); - } - }; - } - - /** - * 构造䞀䞪携垊 scene 字段的 DataItem并泚入 WorkflowContext - */ - private DataItem buildDataItem(long index, String scene, Scorer scorer) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - WorkflowContextOps.setThreshold(ctx, 0.0); - scorer.setWorkflowContext(ctx); - - InputData inputData = new InputData(index, MapUtils.of("scene", scene)); - DataItem item = new DataItem(); - item.setDataIndex(index); - item.setInputData(inputData); - return item; - } - - // ═══════════════════════════════════════════════════════════════ - // shouldEval 方法 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("shouldEval") - class ShouldEvalTest { - - @Test - @DisplayName("condition=null 时对任意 DataItem 均返回 true") - void condition_null_alwaysEval() { - Scorer scorer = buildScorer("m", 1.0, 1.0, null); - DataItem item = new DataItem(); - item.setDataIndex(1L); - assertTrue(scorer.shouldEval(item)); - } - - @Test - @DisplayName("condition 返回 true 时返回 true") - void condition_matches_returnsTrue() { - Scorer scorer = buildScorer("m", 1.0, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - DataItem item = buildDataItem(1L, "chat", scorer); - assertTrue(scorer.shouldEval(item)); - } - - @Test - @DisplayName("condition 返回 false 时返回 false") - void condition_notMatches_returnsFalse() { - Scorer scorer = buildScorer("m", 1.0, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - DataItem item = buildDataItem(1L, "search", scorer); - assertFalse(scorer.shouldEval(item)); - } - - @Test - @DisplayName("condition 返回 null 时视䞺 false防埡 NPE") - void condition_returnsNull_treatedAsFalse() { - Scorer scorer = buildScorer("m", 1.0, 1.0, item -> null); - DataItem item = new DataItem(); - item.setDataIndex(1L); - assertFalse(scorer.shouldEval(item)); - } - } - - // ═══════════════════════════════════════════════════════════════ - // buildSkipResult 方法 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("buildSkipResult") - class BuildSkipResultTest { - - @Test - @DisplayName("跳过结果的基本字段正确") - void skipResult_basicFields() { - Scorer scorer = buildScorer("指标A", 1.0, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - DataItem item = buildDataItem(42L, "search", scorer); - - ScorerResult skipResult = scorer.buildSkipResult(item); - - assertEquals(42L, skipResult.getDataIndex()); - assertEquals("指标A", skipResult.getMetric()); - assertEquals(0.0, skipResult.getScore(), 1e-6); - // totalScore=0 确保䞍圱响汇总分数 - assertEquals(0.0, skipResult.getTotalScore(), 1e-6); - assertEquals("skipped by condition", skipResult.getReason()); - assertTrue(skipResult.isSuccess()); - assertTrue(skipResult.isPass()); // 跳过䞍算倱莥 - } - - @Test - @DisplayName("star 字段固定䞺 false跳过结果䞍觊发䞀祚吊决") - void skipResult_starIsFalse() { - ScorerConfig cfg = ScorerConfig.builder() - .metricName("必过指标") - .star(true) // config 䞭讟眮了 star - .condition(item -> false) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult("必过指标", 1.0, 1.0, ""); - } - }; - DataItem item = new DataItem(); - item.setDataIndex(1L); - - ScorerResult skipResult = scorer.buildSkipResult(item); - // 跳过结果的 star=false䞍䌚觊发䞀祚吊决 - assertFalse(skipResult.isStar()); - } - - @Test - @DisplayName("skipScore 自定义倌被写入跳过结果") - void skipResult_customSkipScore() { - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(item -> false) - .skipScore(0.5) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult("m", 1.0, 1.0, ""); - } - }; - DataItem item = new DataItem(); - item.setDataIndex(1L); - - ScorerResult skipResult = scorer.buildSkipResult(item); - assertEquals(0.5, skipResult.getScore(), 1e-6); - } - } - - // ═══════════════════════════════════════════════════════════════ - // evalWrapper 集成 condition 过滀 - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("evalWrapper 集成 condition") - class EvalWrapperWithConditionTest { - - @Test - @DisplayName("条件呜䞭时正垞执行评䌰并返回评䌰结果") - void evalWrapper_conditionMatches_executesNormally() { - Scorer scorer = buildScorer("对话莚量", 0.9, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - DataItem item = buildDataItem(1L, "chat", scorer); - - ScorerResult result = scorer.evalWrapper(item); - - assertTrue(result.isSuccess()); - assertEquals(0.9, result.getScore(), 1e-6); - assertEquals("正垞评䌰结果", result.getReason()); - } - - @Test - @DisplayName("条件未呜䞭时doExecute 返回跳过结果score=0, totalScore=0") - void evalWrapper_conditionNotMatches_doExecuteReturnsSkipResult() { - // 泚意条件过滀圚 doExecute 的调床层shouldEval ? evalWrapper : buildSkipResult - // 䞍圚 evalWrapper 本身。本测试通过 Workflow 端到端验证跳过行䞺。 - Scorer scorer = buildScorer("对话莚量", 0.9, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - // scene=search䞍满足 condition需芁 chat - return ListUtils.of(new InputData(MapUtils.of("scene", "search"))); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, scorer, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - EvalResult evalResult = item.getEvalResult(); - - // 条件未呜䞭跳过结果score=0, totalScore=0 - ScorerResult skipResult = evalResult.getScorerResults().get(0); - assertTrue(skipResult.isSuccess()); - assertTrue(skipResult.isPass()); - assertEquals(0.0, skipResult.getScore(), 1e-6); - assertEquals(0.0, skipResult.getTotalScore(), 1e-6); - assertEquals("skipped by condition", skipResult.getReason()); - } - - @Test - @DisplayName("condition=null 时行䞺䞎无 condition 完党䞀臎") - void evalWrapper_nullCondition_behavesLikeNormal() { - Scorer scorer = buildScorer("无条件", 1.0, 1.0, null); - DataItem item = buildDataItem(1L, "any_scene", scorer); - - ScorerResult result = scorer.evalWrapper(item); - - assertTrue(result.isSuccess()); - assertEquals(1.0, result.getScore(), 1e-6); - } - } - - // ═══════════════════════════════════════════════════════════════ - // 端到端集成测试倚 Scorer 按 scene 分流通过 WorkflowBuilder - // ═══════════════════════════════════════════════════════════════ - - @Nested - @DisplayName("端到端倚 Scorer 按 scene 场景分流") - class EndToEndMultiSceneTest { - - /** - * 数据集包含 chat/search/rag 䞉种场景各䞀条 - * 䞉䞪 Scorer 分别只倄理对应场景的 DataItem - * 验证每䞪 DataItem 只被对应 Scorer 评䌰跳过结果䞍圱响最终分数。 - */ - @Test - @DisplayName("䞉场景数据集各 Scorer 只倄理对应场景数据") - void multiScene_eachScorerHandlesOwnScene() { - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .threshold(0) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of( - new InputData(MapUtils.of("scene", "chat", "query", "䜠奜")), - new InputData(MapUtils.of("scene", "search", "query", "搜玢词")), - new InputData(MapUtils.of("scene", "rag", "query", "文档问题")) - ); - } - }; - - // chat 评䌰噚只倄理 scene=chat固定埗分 0.8 - Scorer chatScorer = buildScorer("对话莚量", 0.8, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - - // search 评䌰噚只倄理 scene=search固定埗分 0.7 - Scorer searchScorer = buildScorer("搜玢盞关性", 0.7, 1.0, - item -> "search".equals(item.getInputData().get("scene"))); - - // rag 评䌰噚只倄理 scene=rag固定埗分 0.9 - Scorer ragScorer = buildScorer("RAG准确率", 0.9, 1.0, - item -> "rag".equals(item.getInputData().get("scene"))); - - StdReporter reporter = new StdReporter(); - - new WorkflowBuilder() - .link(begin, dataLoader, chatScorer, searchScorer, ragScorer, reporter) - .build() - .execute(); - - // 通过 WorkflowContext 获取最终结果 - WorkflowContext ctx = begin.getWorkflowContext(); - List dataItems = WorkflowContextOps.getDataItems(ctx); - assertThat(dataItems).hasSize(3); - - // 扟到 chat 数据项 - DataItem chatItem = dataItems.stream() - .filter(d -> "chat".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - EvalResult chatResult = chatItem.getEvalResult(); - // chat 数据项chatScorer 埗分0.8searchScorer/ragScorer 跳过totalScore=0䞍计入 - // SumScoreStrategy 只计入 success=true 的 scoreskip result score=0 + totalScore=0 - // 最终 score = 0.8 + 0 + 0 = 0.8跳过的 totalScore=0䞍圱响園䞀化基准 - assertThat(chatResult.getScore()).isCloseTo(0.8, org.assertj.core.data.Offset.offset(1e-6)); - // 验证 chat 数据项确实包含 chatScorer 的正垞评䌰结果 - boolean hasChatScore = chatResult.getScorerResults().stream() - .anyMatch(r -> "对话莚量".equals(r.getMetric()) && r.getScore() > 0); - assertTrue(hasChatScore, "chat 数据项应包含对话莚量评䌰结果"); - - // 扟到 search 数据项 - DataItem searchItem = dataItems.stream() - .filter(d -> "search".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - EvalResult searchResult = searchItem.getEvalResult(); - assertThat(searchResult.getScore()).isCloseTo(0.7, org.assertj.core.data.Offset.offset(1e-6)); - - // 扟到 rag 数据项 - DataItem ragItem = dataItems.stream() - .filter(d -> "rag".equals(d.getInputData().get("scene"))) - .findFirst().orElseThrow(RuntimeException::new); - EvalResult ragResult = ragItem.getEvalResult(); - assertThat(ragResult.getScore()).isCloseTo(0.9, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("同䞀数据项被倚䞪 Scorer 评䌰时无 condition分数正垞环加") - void noCondition_allScorersEvaluateAllItems() { - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of(new InputData(MapUtils.of("query", "测试"))); - } - }; - - // 䞀䞪无 condition 的 Scorer分别埗 0.6 和 0.4 - Scorer scorer1 = buildScorer("指标1", 0.6, 1.0, null); - Scorer scorer2 = buildScorer("指标2", 0.4, 1.0, null); - StdReporter reporter = new StdReporter(); - - new WorkflowBuilder() - .link(begin, dataLoader, scorer1, scorer2, reporter) - .build() - .execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - // SumScoreStrategy: 0.6 + 0.4 = 1.0 - assertThat(item.getEvalResult().getScore()).isCloseTo(1.0, org.assertj.core.data.Offset.offset(1e-6)); - } - - @Test - @DisplayName("所有 Scorer 均未呜䞭党郚跳过最终分数䞺 0") - void allScorersSkip_finalScoreIsZero() { - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - return ListUtils.of(new InputData(MapUtils.of("scene", "unknown"))); - } - }; - - Scorer chatScorer = buildScorer("对话莚量", 0.8, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - Scorer searchScorer = buildScorer("搜玢盞关性", 0.7, 1.0, - item -> "search".equals(item.getInputData().get("scene"))); - StdReporter reporter = new StdReporter(); - - new WorkflowBuilder() - .link(begin, dataLoader, chatScorer, searchScorer, reporter) - .build() - .execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - // 䞀䞪 Scorer 郜跳过score=0+0=0 - assertThat(item.getEvalResult().getScore()).isCloseTo(0.0, org.assertj.core.data.Offset.offset(1e-6)); - } - } - - // ═══════════════════════════════════════════════════════════════ - // AvgScoreRateStrategy 䞋的跳过验证验证 totalScore=0 䞍圱响均倌 - // ═══════════════════════════════════════════════════════════════ - - @Test - @DisplayName("跳过结果totalScore=0䞍圱响敎䜓埗分通过 Workflow 端到端验证") - void skipResult_doesNotInfluenceFinalScore() { - // chat 场景chatScorer 正垞评 1.0searchScorer 跳过doExecute 层返回 totalScore=0 - // 验证最终 EvalResult.score 只包含正垞评䌰的分数 - Scorer chatScorer = buildScorer("对话莚量", 1.0, 1.0, - item -> "chat".equals(item.getInputData().get("scene"))); - Scorer searchScorer = buildScorer("搜玢盞关性", 0.5, 1.0, - item -> "search".equals(item.getInputData().get("scene"))); - - Begin begin = new Begin(BeginConfig.builder() - .scoreStrategy(new SumScoreStrategy()) - .build()); - - DataLoader dataLoader = new DataLoader() { - @Override - public List prepareDataList() { - // 只有 chat 场景的䞀条数据 - return ListUtils.of(new InputData(MapUtils.of("scene", "chat"))); - } - }; - - StdReporter reporter = new StdReporter(); - new WorkflowBuilder().link(begin, dataLoader, chatScorer, searchScorer, reporter).build().execute(); - - WorkflowContext ctx = begin.getWorkflowContext(); - DataItem item = WorkflowContextOps.getDataItems(ctx).get(0); - List scorerResults = item.getEvalResult().getScorerResults(); - assertThat(scorerResults).hasSize(2); - - // chatScorer 正垞评䌰score=1.0totalScore=1.0 - ScorerResult chatResult = scorerResults.stream() - .filter(r -> "对话莚量".equals(r.getMetric()) && !"skipped by condition".equals(r.getReason())) - .findFirst().orElseThrow(RuntimeException::new); - assertEquals(1.0, chatResult.getScore(), 1e-6); - assertEquals(1.0, chatResult.getTotalScore(), 1e-6); - - // searchScorer 跳过score=0.0totalScore=0.0䞍计入汇总基准 - ScorerResult skipResult = scorerResults.stream() - .filter(r -> "skipped by condition".equals(r.getReason())) - .findFirst().orElseThrow(RuntimeException::new); - assertEquals(0.0, skipResult.getScore(), 1e-6); - assertEquals(0.0, skipResult.getTotalScore(), 1e-6); - assertTrue(skipResult.isSuccess()); - assertTrue(skipResult.isPass()); - - // SumScoreStrategy 最终分数 = 1.0skip 的 score=0 䞍圱响 - assertThat(item.getEvalResult().getScore()).isCloseTo(1.0, org.assertj.core.data.Offset.offset(1e-6)); - } -} - diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerTest.java deleted file mode 100644 index a00651f..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/ScorerTest.java +++ /dev/null @@ -1,402 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.common.utils.map.MapUtils; -import com.evalkit.framework.eval.context.WorkflowContextOps; -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.model.ScorerResult; -import com.evalkit.framework.eval.node.scorer.config.ScorerConfig; -import com.evalkit.framework.eval.node.scorer.strategy.AvgScoreRateStrategy; -import com.evalkit.framework.eval.node.scorer.strategy.SumScoreStrategy; -import com.evalkit.framework.workflow.model.WorkflowContext; -import org.junit.jupiter.api.Test; - -import java.util.HashMap; -import java.util.Map; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assertions.*; - -class ScorerTest { - - /** - * 构造䞀䞪最简单的具䜓 Scorer始终返回指定分数 - */ - private Scorer buildScorer(String metric, double totalScore, double threshold, boolean star, double returnScore) { - ScorerConfig cfg = ScorerConfig.builder() - .metricName(metric) - .totalScore(totalScore) - .threshold(threshold) - .star(star) - .build(); - return new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return new ScorerResult(metric, returnScore, totalScore, "理由"); - } - }; - } - - /** - * 构造䞀䞪始终抛匂垞的 Scorer - */ - private Scorer buildThrowingScorer(String metric) { - ScorerConfig cfg = ScorerConfig.builder().metricName(metric).build(); - return new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) throws Exception { - throw new RuntimeException("故意抛出的匂垞"); - } - }; - } - - /** - * 构建垊䞊䞋文的 DataItem - */ - private DataItem buildDataItem(long dataIndex, Scorer scorer, SumScoreStrategy strategy) { - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, strategy); - WorkflowContextOps.setThreshold(ctx, 0.5); - scorer.setWorkflowContext(ctx); - - DataItem dataItem = new DataItem(); - dataItem.setDataIndex(dataIndex); - Map input = new HashMap<>(); - input.put("query", "测试查询"); - dataItem.setInputData(new InputData(dataIndex, input)); - ApiCompletionResult result = new ApiCompletionResult(); - result.setSuccess(true); - Map res = new HashMap<>(); - res.put("response", "测试回倍"); - result.setResultItem(res); - dataItem.setApiCompletionResult(result); - return dataItem; - } - - // ─────────────────────────── calcScoreRate ─────────────────────────── - - @Test - void calcScoreRate_normalCase() { - double rate = Scorer.calcScoreRate(0.8, 1.0); - assertEquals(0.8, rate, 1e-6); - } - - @Test - void calcScoreRate_totalScoreIsZero_returnsZero() { - double rate = Scorer.calcScoreRate(0.5, 0.0); - assertEquals(0.0, rate, 1e-6); - } - - @Test - void calcScoreRate_fullScore() { - double rate = Scorer.calcScoreRate(3.0, 3.0); - assertEquals(1.0, rate, 1e-6); - } - - @Test - void calcScoreRate_zeroScore() { - double rate = Scorer.calcScoreRate(0.0, 5.0); - assertEquals(0.0, rate, 1e-6); - } - - // ─────────────────────────── validConfig ───────────────────────────── - - @Test - void validConfig_nullConfig_throwsIllegalArgument() { - assertThatThrownBy(() -> buildScorer(null, 1, 0, false, 1)) - .isInstanceOf(IllegalArgumentException.class); - } - - @Test - void validConfig_negativeThreshold_throwsIllegalArgument() { - assertThatThrownBy(() -> { - ScorerConfig cfg = ScorerConfig.builder().metricName("m").threshold(-0.1).build(); - new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return null; - } - }; - }).isInstanceOf(IllegalArgumentException.class); - } - - @Test - void validConfig_zeroThreadNum_throwsIllegalArgument() { - assertThatThrownBy(() -> { - ScorerConfig cfg = ScorerConfig.builder().metricName("m").threadNum(0).build(); - new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - return null; - } - }; - }).isInstanceOf(IllegalArgumentException.class); - } - - // ─────────────────────────── buildErrorResult ──────────────────────── - - @Test - void buildErrorResult_returnsFailedResult() { - Scorer scorer = buildScorer("m", 1.0, 0, false, 1); - DataItem item = new DataItem(); - item.setDataIndex(42L); - RuntimeException ex = new RuntimeException("test error"); - - ScorerResult result = scorer.buildErrorResult(item, ex); - - assertFalse(result.isSuccess()); - assertFalse(result.isPass()); - assertEquals(0, result.getScore(), 1e-6); - assertEquals(42L, result.getDataIndex()); - assertTrue(result.getReason().contains("test error")); - } - - // ─────────────────────────── evalWrapper ───────────────────────────── - - @Test - void evalWrapper_normalEval_returnsCorrectResult() { - Scorer scorer = buildScorer("准确率", 1.0, 0.5, false, 1.0); - DataItem item = buildDataItem(1L, scorer, new SumScoreStrategy()); - - ScorerResult result = scorer.evalWrapper(item); - - assertTrue(result.isSuccess()); - assertEquals(1.0, result.getScore(), 1e-6); - assertEquals(1.0, result.getScoreRate(), 1e-6); - assertEquals("准确率", result.getMetric()); - } - - @Test - void evalWrapper_exceptionInEval_returnsErrorResult() { - Scorer scorer = buildThrowingScorer("匂垞评䌰噚"); - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - scorer.setWorkflowContext(ctx); - DataItem item = new DataItem(); - item.setDataIndex(99L); - item.setInputData(new InputData(99L, new HashMap<>())); - - ScorerResult result = scorer.evalWrapper(item); - - assertFalse(result.isSuccess()); - assertEquals(0, result.getScore(), 1e-6); - assertTrue(result.getReason().contains("故意抛出的匂垞")); - } - - // ─────────────────────────── decidePass (via evalWrapper) ───────────── - - @Test - void decidePass_scoreValueStrategy_pass() { - // SumScoreStrategy is ScoreValueStrategy, threshold=0.5, score=1.0 → pass - Scorer scorer = buildScorer("m", 1.0, 0.5, false, 1.0); - DataItem item = buildDataItem(1L, scorer, new SumScoreStrategy()); - - ScorerResult result = scorer.evalWrapper(item); - assertTrue(result.isPass()); - } - - @Test - void decidePass_scoreValueStrategy_fail() { - // threshold=0.9, score=0.5 → fail - Scorer scorer = buildScorer("m", 1.0, 0.9, false, 0.5); - DataItem item = buildDataItem(2L, scorer, new SumScoreStrategy()); - - ScorerResult result = scorer.evalWrapper(item); - assertFalse(result.isPass()); - } - - @Test - void decidePass_scoreRateStrategy_pass() { - // AvgScoreRateStrategy is ScoreRateStrategy, threshold=0.5, score=0.8/1.0=0.8 → pass - Scorer scorer = buildScorer("m", 1.0, 0.5, false, 0.8); - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new AvgScoreRateStrategy()); - scorer.setWorkflowContext(ctx); - DataItem item = new DataItem(); - item.setDataIndex(3L); - item.setInputData(new InputData(3L, new HashMap<>())); - - ScorerResult result = scorer.evalWrapper(item); - assertTrue(result.isPass()); - } - - // ─────────────────────────── star field propagation ─────────────────── - - @Test - void evalWrapper_starFlag_propagatedToResult() { - Scorer scorer = buildScorer("必过项", 1.0, 0.5, true, 1.0); - DataItem item = buildDataItem(10L, scorer, new SumScoreStrategy()); - - ScorerResult result = scorer.evalWrapper(item); - assertTrue(result.isStar()); - } - - // ─────────────────────────── dynamicTotalScore ─────────────────────── - - @Test - void evalWrapper_dynamicTotalScore_usesResultTotalScore() { - ScorerConfig cfg = ScorerConfig.builder() - .metricName("劚态总分") - .totalScore(1.0) // 配眮总分1 - .dynamicTotalScore(true) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem dataItem) { - // 返回评䌰结果䞭的 totalScore=5分数=4 - return new ScorerResult("劚态总分", 4.0, 5.0, "理由"); - } - }; - WorkflowContext ctx = new WorkflowContext(); - WorkflowContextOps.setScorerStrategy(ctx, new SumScoreStrategy()); - scorer.setWorkflowContext(ctx); - DataItem item = new DataItem(); - item.setDataIndex(5L); - item.setInputData(new InputData(5L, new HashMap<>())); - - ScorerResult result = scorer.evalWrapper(item); - - // totalScore 来自评䌰结果䞭的 5, scoreRate=4/5=0.8 - assertThat(result.getTotalScore()).isCloseTo(5.0, org.assertj.core.data.Offset.offset(1e-6)); - assertThat(result.getScoreRate()).isCloseTo(0.8, org.assertj.core.data.Offset.offset(1e-6)); - } - - // ─────────────────────────── shouldEval条件跳过──────────────────── - - @Test - void shouldEval_nullCondition_alwaysTrue() { - // condition=null 时shouldEval 始终返回 true向前兌容䞍过滀任䜕数据项 - Scorer scorer = buildScorer("m", 1.0, 0, false, 1.0); - DataItem item = new DataItem(); - item.setDataIndex(1L); - assertTrue(scorer.shouldEval(item)); - } - - @Test - void shouldEval_conditionMatches_returnsTrue() { - // condition 呜䞭时返回 true本 Scorer 正垞执行 - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(i -> "chat".equals(i.getInputData().get("scene"))) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - DataItem item = new DataItem(); - item.setInputData(new InputData(MapUtils.of("scene", "chat"))); - assertTrue(scorer.shouldEval(item)); - } - - @Test - void shouldEval_conditionNotMatches_returnsFalse() { - // condition 未呜䞭时返回 falsedoExecute 层将调甚 buildSkipResult 跳过 - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(i -> "chat".equals(i.getInputData().get("scene"))) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - DataItem item = new DataItem(); - item.setInputData(new InputData(MapUtils.of("scene", "search"))); - assertFalse(scorer.shouldEval(item)); - } - - @Test - void shouldEval_conditionReturnsNull_treatedAsFalse() { - // condition 返回 null 时视䞺 false防止 NPE - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(i -> null) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - assertFalse(scorer.shouldEval(new DataItem())); - } - - // ─────────────────────────── buildSkipResult跳过结果────────────── - - @Test - void buildSkipResult_fieldsCorrect() { - // 跳过结果的各字段语义success=true、pass=true䞍拉䜎通过率 - // totalScore=0䞍圱响汇总基准reason 固定䞺 "skipped by condition" - Scorer scorer = buildScorer("指标A", 1.0, 0.5, false, 1.0); - DataItem item = new DataItem(); - item.setDataIndex(42L); - - ScorerResult skip = scorer.buildSkipResult(item); - - assertEquals(42L, skip.getDataIndex()); - assertEquals("指标A", skip.getMetric()); - assertEquals(0.0, skip.getScore(), 1e-6); - assertEquals(0.0, skip.getTotalScore(), 1e-6); - assertEquals("skipped by condition", skip.getReason()); - assertTrue(skip.isSuccess()); - assertTrue(skip.isPass()); // 跳过䞍算倱莥 - } - - @Test - void buildSkipResult_starIsFalse_noVeto() { - // 即䜿 config äž­ star=true跳过结果的 star 必须䞺 false - // 防止跳过的数据项觊发䞀祚吊决逻蟑 - ScorerConfig cfg = ScorerConfig.builder() - .metricName("必过项") - .star(true) - .condition(i -> false) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - DataItem item = new DataItem(); - item.setDataIndex(1L); - - assertFalse(scorer.buildSkipResult(item).isStar()); - } - - @Test - void buildSkipResult_customSkipScore_writtenToResult() { - // skipScore 配眮的自定义倌应写入跳过结果的 score 字段 - ScorerConfig cfg = ScorerConfig.builder() - .metricName("m") - .condition(i -> false) - .skipScore(0.5) - .build(); - Scorer scorer = new Scorer(cfg) { - @Override - public ScorerResult eval(DataItem d) { - return null; - } - }; - DataItem item = new DataItem(); - item.setDataIndex(1L); - - assertEquals(0.5, scorer.buildSkipResult(item).getScore(), 1e-6); - } - - @Test - void buildSkipResult_scorerTypePreserved() { - // 跳过结果应携垊 scorerType䟿于报告层区分来源 - Scorer scorer = buildScorer("m", 1.0, 0, false, 1.0); - DataItem item = new DataItem(); - item.setDataIndex(1L); - - ScorerResult skip = scorer.buildSkipResult(item); - assertNotNull(skip.getScorerType()); - assertFalse(skip.getScorerType().isEmpty()); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java deleted file mode 100644 index 457a22c..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/SecurityScorerTest.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.eval.model.ApiCompletionResult; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.scorer.config.PromptBasedScorerConfig; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; - -class SecurityScorerTest { - void test() { - SecurityScorer securityScorer = new SecurityScorer( - PromptBasedScorerConfig.builder() - .llmService(LLMServiceFactory.createLLMService("test", null)) - .build() - ) { - @Override - public String prepareUserPrompt(InputData inputData, ApiCompletionResult apiCompletionResult) { - return ""; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/VectorSimilarityScorerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/VectorSimilarityScorerTest.java deleted file mode 100644 index b57e35a..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/VectorSimilarityScorerTest.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.evalkit.framework.eval.node.scorer; - -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.node.scorer.config.VectorSimilarityScorerConfig; -import org.apache.commons.lang3.tuple.Pair; - -class VectorSimilarityScorerTest { - void test() { - VectorSimilarityScorer vectorSimilarityScorer = new VectorSimilarityScorer( - VectorSimilarityScorerConfig.builder().similarityThreshold(0.8).build() - ) { - @Override - public Pair prepareFieldPair(DataItem dataItem) { - return null; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/AbstractCheckerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/AbstractCheckerTest.java deleted file mode 100644 index abce9de..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/AbstractCheckerTest.java +++ /dev/null @@ -1,187 +0,0 @@ -package com.evalkit.framework.eval.node.scorer.checker; - -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.model.InputData; -import com.evalkit.framework.eval.node.scorer.checker.config.CheckerConfig; -import com.evalkit.framework.eval.node.scorer.checker.model.CheckItem; -import com.evalkit.framework.eval.node.scorer.checker.strategy.checkitem.SumCheckItemScoreMergeStrategy; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assertions.*; - -class AbstractCheckerTest { - - // ─────────────── 蟅助方法 ──────────────────────────────────────── - - private DataItem buildDataItem(long idx) { - DataItem item = new DataItem(); - item.setDataIndex(idx); - item.setInputData(new InputData(idx, new HashMap<>())); - return item; - } - - /** 构建䞀䞪固定检查项分数的简单 Checker */ - private AbstractChecker buildChecker(boolean support, double totalScore, - boolean star, List checkItems) { - CheckerConfig cfg = CheckerConfig.builder() - .name("测试检查噚") - .totalScore(totalScore) - .star(star) - .strategy(new SumCheckItemScoreMergeStrategy()) - .build(); - return new AbstractChecker(cfg) { - @Override - public boolean support(DataItem dataItem) { - return support; - } - - @Override - public double getTotalScore() { - return totalScore; - } - - @Override - protected List prepareCheckItems(DataItem dataItem) { - return checkItems; - } - - @Override - protected void check(DataItem dataItem) { - // 简单赋分 - for (CheckItem ci : checkItems) { - ci.setExecuted(true); - } - } - }; - } - - // ─────────────────── checkWrapper: support=false 时跳过 ────────── - - @Test - void checkWrapper_notSupport_skips() { - CheckItem ci = CheckItem.builder().name("项A").build(); - // 初始分 0 - AbstractChecker checker = buildChecker(false, 1.0, false, Arrays.asList(ci)); - DataItem item = buildDataItem(1L); - checker.checkWrapper(item); - // 因䞺 support=falsecheck() 没有执行checkItems 䞺默讀倌builder 里的 empty list - // 只验证䞍抛匂垞 - assertEquals(0.0, checker.getScore(), 1e-6); - } - - // ─────────────────── checkWrapper: 正垞流皋 ────────────────────── - - @Test - void checkWrapper_normalFlow_checkItemsSetAndMerged() { - CheckItem ci = CheckItem.builder().name("语蚀检查").totalScore(1.0).build(); - AbstractChecker checker = buildChecker(true, 1.0, false, Arrays.asList(ci)); - // 圚 check 时手劚讟眮分数 - DataItem item = buildDataItem(2L); - checker.checkWrapper(item); - // check 里只标记 executed䞍讟眮分数score 仍 0 - assertTrue(checker.getConfig().getCheckItems().get(0).isExecuted()); - } - - // ─────────────────── getScore / getReason ──────────────────────── - - @Test - void getScore_sumStrategy() { - CheckItem ci1 = CheckItem.builder().name("A").totalScore(1.0).build(); - CheckItem ci2 = CheckItem.builder().name("B").totalScore(1.0).build(); - ci1.setScore(0.8); - ci2.setScore(0.6); - - CheckerConfig cfg = CheckerConfig.builder() - .name("checker") - .totalScore(2.0) - .strategy(new SumCheckItemScoreMergeStrategy()) - .checkItems(Arrays.asList(ci1, ci2)) - .build(); - - AbstractChecker checker = new AbstractChecker(cfg) { - @Override - public boolean support(DataItem d) { return true; } - @Override - public double getTotalScore() { return 2.0; } - @Override - protected List prepareCheckItems(DataItem d) { return cfg.getCheckItems(); } - @Override - protected void check(DataItem d) {} - }; - - assertEquals(0.8 + 0.6, checker.getScore(), 1e-6); - } - - @Test - void getReason_returnsZeroScoreItemReasons() { - CheckItem pass = CheckItem.builder().name("通过项").build(); - CheckItem fail = CheckItem.builder().name("䞍通过项").build(); - pass.setScore(1.0); - pass.setReason("通过"); - fail.setScore(0.0); - fail.setReason("内容䞍笊合芁求"); - - CheckerConfig cfg = CheckerConfig.builder() - .name("checker") - .strategy(new SumCheckItemScoreMergeStrategy()) - .checkItems(Arrays.asList(pass, fail)) - .build(); - - AbstractChecker checker = new AbstractChecker(cfg) { - @Override - public boolean support(DataItem d) { return true; } - @Override - public double getTotalScore() { return 2.0; } - @Override - protected List prepareCheckItems(DataItem d) { return cfg.getCheckItems(); } - @Override - protected void check(DataItem d) {} - }; - - String reason = checker.getReason(); - assertTrue(reason.contains("内容䞍笊合芁求")); - assertFalse(reason.contains("通过")); - } - - // ─────────────────── star 标志 ─────────────────────────────────── - - @Test - void isStar_reflectsConfig() { - CheckItem ci = CheckItem.builder().name("x").build(); - AbstractChecker starChecker = buildChecker(true, 1.0, true, Arrays.asList(ci)); - AbstractChecker normalChecker = buildChecker(true, 1.0, false, Arrays.asList(ci)); - - assertTrue(starChecker.isStar()); - assertFalse(normalChecker.isStar()); - } - - // ─────────────────── checkWrapper: 匂垞䌠播 ────────────────────── - - @Test - void checkWrapper_exceptionPropagates() { - CheckItem ci = CheckItem.builder().name("x").build(); - CheckerConfig cfg = CheckerConfig.builder() - .name("错误检查噚") - .strategy(new SumCheckItemScoreMergeStrategy()) - .build(); - AbstractChecker checker = new AbstractChecker(cfg) { - @Override - public boolean support(DataItem d) { return true; } - @Override - public double getTotalScore() { return 1.0; } - @Override - protected List prepareCheckItems(DataItem d) { return Arrays.asList(ci); } - @Override - protected void check(DataItem d) { throw new RuntimeException("check error"); } - }; - - assertThatThrownBy(() -> checker.checkWrapper(buildDataItem(1L))) - .isInstanceOf(RuntimeException.class) - .hasMessageContaining("check error"); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java deleted file mode 100644 index 5b95614..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/LLMBasedCheckerTest.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.evalkit.framework.eval.node.scorer.checker; - -import com.evalkit.framework.eval.model.DataItem; -import com.evalkit.framework.eval.node.scorer.checker.config.LLMBasedCheckerConfig; -import com.evalkit.framework.eval.node.scorer.checker.model.CheckItem; -import com.evalkit.framework.infra.service.llm.LLMServiceFactory; - -import java.util.Collections; -import java.util.List; - -class LLMBasedCheckerTest { - void test() { - LLMBasedChecker checker = new LLMBasedChecker( - LLMBasedCheckerConfig.builder() - .llmService(LLMServiceFactory.createLLMService("test", null)) - .build() - ) { - @Override - protected List prepareCheckItems(DataItem dataItem) { - return Collections.emptyList(); - } - - @Override - protected String prepareUserPrompt(DataItem dataItem, int round) { - return ""; - } - - @Override - protected boolean needCheck(DataItem dataItem, int round) { - return false; - } - - @Override - public boolean support(DataItem dataItem) { - return false; - } - - @Override - public double getTotalScore() { - return 0; - } - }; - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/model/CheckItemTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/model/CheckItemTest.java deleted file mode 100644 index 62ebc59..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/checker/model/CheckItemTest.java +++ /dev/null @@ -1,116 +0,0 @@ -package com.evalkit.framework.eval.node.scorer.checker.model; - -import com.evalkit.framework.eval.node.scorer.checker.constants.CheckMethod; -import org.junit.jupiter.api.Test; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.junit.jupiter.api.Assertions.*; - -class CheckItemTest { - - // ─────────────────────────── 默讀倌验证 ──────────────────────────── - - @Test - void defaultValues_areCorrect() { - CheckItem item = CheckItem.builder().name("检查项").build(); - assertEquals("检查项", item.getName()); - assertEquals(1.0, item.getTotalScore(), 1e-6); - assertEquals(1.0, item.getWeight(), 1e-6); - assertFalse(item.isStar()); - assertTrue(item.isSupport()); - assertEquals(0.0, item.getDefaultScore(), 1e-6); - assertFalse(item.isExecuted()); - assertEquals(CheckMethod.NONE, item.getCheckMethod()); - } - - // ─────────────────────────── 参数校验 ───────────────────────────── - - @Test - void build_blankName_throwsIllegalArgument() { - assertThatThrownBy(() -> CheckItem.builder().name("").build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("䞍胜䞺空"); - } - - @Test - void build_negativeTotalScore_throwsIllegalArgument() { - assertThatThrownBy(() -> CheckItem.builder().name("x").totalScore(-1).build()) - .isInstanceOf(IllegalArgumentException.class); - } - - @Test - void build_negativeWeight_throwsIllegalArgument() { - assertThatThrownBy(() -> CheckItem.builder().name("x").weight(-0.1).build()) - .isInstanceOf(IllegalArgumentException.class); - } - - @Test - void build_negativeDefaultScore_throwsIllegalArgument() { - assertThatThrownBy(() -> CheckItem.builder().name("x").defaultScore(-1).build()) - .isInstanceOf(IllegalArgumentException.class); - } - - // ─────────────────────────── getWeightScore ─────────────────────── - - @Test - void getWeightScore_normalCase() { - CheckItem item = CheckItem.builder().name("x").weight(2.0).build(); - item.setScore(0.8); - assertEquals(1.6, item.getWeightScore(), 1e-6); - } - - @Test - void getWeightScore_zeroScore() { - CheckItem item = CheckItem.builder().name("x").weight(3.0).build(); - item.setScore(0.0); - assertEquals(0.0, item.getWeightScore(), 1e-6); - } - - // ─────────────────────────── support=false 时初始分数取 defaultScore ─ - - @Test - void support_false_scoreEqualsDefaultScore() { - CheckItem item = CheckItem.builder() - .name("x") - .support(false) - .defaultScore(0.5) - .build(); - assertFalse(item.isSupport()); - assertEquals(0.5, item.getScore(), 1e-6); - } - - // ─────────────────────────── star 标志 ──────────────────────────── - - @Test - void star_flag_isSetCorrectly() { - CheckItem item = CheckItem.builder().name("必过项").star(true).build(); - assertTrue(item.isStar()); - } - - // ─────────────────────────── setter/getter ───────────────────────── - - @Test - void setters_workCorrectly() { - CheckItem item = CheckItem.builder().name("item").build(); - item.setScore(0.9); - item.setReason("测试理由"); - item.setExecuted(true); - item.setCheckMethod(CheckMethod.LLM); - - assertEquals(0.9, item.getScore(), 1e-6); - assertEquals("测试理由", item.getReason()); - assertTrue(item.isExecuted()); - assertEquals(CheckMethod.LLM, item.getCheckMethod()); - } - - // ─────────────────────────── checkDescription ───────────────────── - - @Test - void checkDescription_isSetAndRetrieved() { - CheckItem item = CheckItem.builder() - .name("x") - .checkDescription("这是检查描述") - .build(); - assertEquals("这是检查描述", item.getCheckDescription()); - } -} \ No newline at end of file diff --git a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/strategy/ScoreStrategyTest.java b/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/strategy/ScoreStrategyTest.java deleted file mode 100644 index 5b9fb04..0000000 --- a/evalkit-eval/src/test/java/com/evalkit/framework/eval/node/scorer/strategy/ScoreStrategyTest.java +++ /dev/null @@ -1,247 +0,0 @@ -package com.evalkit.framework.eval.node.scorer.strategy; - -import com.evalkit.framework.eval.model.ScorerResult; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * 各评䌰分数策略单元测试 - *

- * 芆盖: SumScoreStrategy / AvgScoreStrategy / MinScoreStrategy - * AvgScoreRateStrategy / MaxScoreRateStrategy / MinScoreRateStrategy / SumScoreRateStrategy - */ -class ScoreStrategyTest { - - // ─────────────── 蟅助方法 ──────────────────────────────────────── - - private ScorerResult r(double score, double scoreRate) { - return ScorerResult.builder() - .metric("m") - .score(score) - .scoreRate(scoreRate) - .success(true) - .build(); - } - - // ═══════════════════════════════════════════════════════════════ - // SumScoreStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void sumScore_normalCase() { - SumScoreStrategy s = new SumScoreStrategy(); - List rs = Arrays.asList(r(0.8, 0.8), r(0.6, 0.6)); - assertEquals(1.4, s.calScore(rs), 1e-6); - } - - @Test - void sumScore_emptyList_returnsZero() { - SumScoreStrategy s = new SumScoreStrategy(); - assertEquals(0.0, s.calScore(Collections.emptyList()), 1e-6); - } - - @Test - void sumScore_skipsFailedResults() { - // SumScoreStrategy: 仅对 success=true 的结果求和 - ScorerResult failed = ScorerResult.builder().metric("f").score(0.9).success(false).build(); - ScorerResult passed = ScorerResult.builder().metric("p").score(1.0).success(true).build(); - SumScoreStrategy s = new SumScoreStrategy(); - // failed 䞍被计入isSuccess=false 时䞍加 - assertEquals(1.0, s.calScore(Arrays.asList(failed, passed)), 1e-6); - } - - @Test - void sumScore_strategyName() { - assertEquals("分数求和策略", new SumScoreStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // AvgScoreStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void avgScore_normalCase() { - AvgScoreStrategy s = new AvgScoreStrategy(); - List rs = Arrays.asList(r(0.8, 0.8), r(0.6, 0.6)); - assertEquals(0.7, s.calScore(rs), 1e-6); - } - - @Test - void avgScore_emptyList_returnsZero() { - assertEquals(0.0, new AvgScoreStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void avgScore_singleElement() { - assertEquals(0.9, new AvgScoreStrategy().calScore(Collections.singletonList(r(0.9, 0.9))), 1e-6); - } - - @Test - void avgScore_skipsNegativeScore() { - // score=-1 的结果被跳过 - AvgScoreStrategy s = new AvgScoreStrategy(); - List rs = Arrays.asList(r(1.0, 1.0), r(-1.0, 0.0)); - // 只有 score=1.0 有效 → 平均 = 1.0/1 = 1.0 - assertEquals(1.0, s.calScore(rs), 1e-6); - } - - @Test - void avgScore_strategyName() { - assertEquals("平均分数策略", new AvgScoreStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // MinScoreStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void minScore_normalCase() { - MinScoreStrategy s = new MinScoreStrategy(); - List rs = Arrays.asList(r(0.8, 0.8), r(0.3, 0.3), r(1.0, 1.0)); - assertEquals(0.3, s.calScore(rs), 1e-6); - } - - @Test - void minScore_emptyList_returnsZero() { - assertEquals(0.0, new MinScoreStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void minScore_singleElement() { - assertEquals(0.7, new MinScoreStrategy().calScore(Collections.singletonList(r(0.7, 0.7))), 1e-6); - } - - @Test - void minScore_strategyName() { - assertEquals("最小分数策略", new MinScoreStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // AvgScoreRateStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void avgScoreRate_normalCase() { - AvgScoreRateStrategy s = new AvgScoreRateStrategy(); - // (0.8 + 0.6) / 2 = 0.7 - List rs = Arrays.asList(r(0.8, 0.8), r(0.6, 0.6)); - assertEquals(0.7, s.calScore(rs), 1e-6); - } - - @Test - void avgScoreRate_emptyList_returnsZero() { - assertEquals(0.0, new AvgScoreRateStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void avgScoreRate_singleElement() { - assertEquals(0.5, new AvgScoreRateStrategy().calScore(Collections.singletonList(r(0.5, 0.5))), 1e-6); - } - - @Test - void avgScoreRate_strategyName() { - assertEquals("平均埗分率策略", new AvgScoreRateStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // MaxScoreRateStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void maxScoreRate_normalCase() { - MaxScoreRateStrategy s = new MaxScoreRateStrategy(); - List rs = Arrays.asList(r(0.3, 0.3), r(0.9, 0.9), r(0.5, 0.5)); - assertEquals(0.9, s.calScore(rs), 1e-6); - } - - @Test - void maxScoreRate_emptyList_returnsZero() { - assertEquals(0.0, new MaxScoreRateStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void maxScoreRate_strategyName() { - assertEquals("最倧埗分率策略", new MaxScoreRateStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // MinScoreRateStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void minScoreRate_emptyList_returnsZero() { - assertEquals(0.0, new MinScoreRateStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void minScoreRate_strategyName() { - assertEquals("最小埗分率策略", new MinScoreRateStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // SumScoreRateStrategy - // ═══════════════════════════════════════════════════════════════ - - @Test - void sumScoreRate_normalCase() { - SumScoreRateStrategy s = new SumScoreRateStrategy(); - List rs = Arrays.asList(r(0.5, 0.5), r(0.7, 0.7)); - assertEquals(1.2, s.calScore(rs), 1e-6); - } - - @Test - void sumScoreRate_emptyList_returnsZero() { - assertEquals(0.0, new SumScoreRateStrategy().calScore(Collections.emptyList()), 1e-6); - } - - @Test - void sumScoreRate_strategyName() { - assertEquals("埗分率求和策略", new SumScoreRateStrategy().getStrategyName()); - } - - // ═══════════════════════════════════════════════════════════════ - // ScoreStrategy 类型刀断 - // ═══════════════════════════════════════════════════════════════ - - @Test - void sumScore_isScoreValueStrategy() { - assertTrue(new SumScoreStrategy() instanceof ScoreValueStrategy); - } - - @Test - void avgScore_isScoreValueStrategy() { - assertTrue(new AvgScoreStrategy() instanceof ScoreValueStrategy); - } - - @Test - void minScore_isScoreValueStrategy() { - assertTrue(new MinScoreStrategy() instanceof ScoreValueStrategy); - } - - @Test - void avgScoreRate_isScoreRateStrategy() { - assertTrue(new AvgScoreRateStrategy() instanceof ScoreRateStrategy); - } - - @Test - void maxScoreRate_isScoreRateStrategy() { - assertTrue(new MaxScoreRateStrategy() instanceof ScoreRateStrategy); - } - - @Test - void minScoreRate_isScoreRateStrategy() { - assertTrue(new MinScoreRateStrategy() instanceof ScoreRateStrategy); - } - - @Test - void sumScoreRate_isScoreRateStrategy() { - assertTrue(new SumScoreRateStrategy() instanceof ScoreRateStrategy); - } -} - diff --git a/evalkit-eval/src/test/resources/dataItems.json b/evalkit-eval/src/test/resources/dataItems.json deleted file mode 100644 index 082c1cb..0000000 --- a/evalkit-eval/src/test/resources/dataItems.json +++ /dev/null @@ -1,174 +0,0 @@ -[ - { - "dataIndex": 0, - "inputData": { - "dataIndex": 0, - "inputItem": { - "query": "hello, 元宵节", - "type": "1" - } - }, - "apiCompletionResult": { - "dataIndex": 0, - "resultItem": { - "response": "Mock response for hello, 元宵节" - }, - "startTime": 1763027533462, - "endTime": 1763027533463, - "timeCost": 1, - "success": true - }, - "evalResult": { - "dataIndex": 0, - "score": 0.0, - "reason": "由于甚户查询信息䞍完敎猺少具䜓日期、目的地等关键芁玠AI助手仅提瀺信息猺倱而未䞻劚掚荐暡糊条件䞋的机祚选项劂党囜䜎价祚或春季航班也未通过亀互匕富甚户补充信息富臎圚信息卡片展瀺、甚户偏奜匹配和有效回倍等所有评䌰绎床均埗分䞺0反映出系统对暡糊查询的倄理胜力䞍足。", - "startTime": 0, - "endTime": 0, - "timeCost": 0, - "scorerResults": [ - { - "dataIndex": 0, - "metric": "匂垞测试", - "score": 0.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "Error: / by zero", - "extra": null, - "statTime": 0, - "endTime": 0, - "timeCost": 0, - "success": false, - "pass": false, - "threshold": 0.0, - "star": false - }, - { - "dataIndex": 0, - "metric": "回倍长床检查", - "score": 1.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "hello, 元宵节 的回倍长床超过5䞪字笊", - "extra": null, - "statTime": 1763027533557, - "endTime": 1763027533558, - "timeCost": 1, - "success": true, - "pass": true, - "threshold": 0.0, - "star": false - }, - { - "dataIndex": 0, - "metric": "盞䌌床检查level1", - "score": 0.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "盞䌌床䞺0.0000小于阈倌0.0000", - "extra": { - "similarity": 0.0, - "similarityThreshold": 0.0 - }, - "statTime": 1763027533558, - "endTime": 1763027533900, - "timeCost": 342, - "success": true, - "pass": true, - "threshold": 0.0, - "star": false - } - ], - "success": false, - "pass": false, - "threshold": 1.0, - "scoreStrategyName": "最倧埗分率策略" - }, - "extra": null - }, - { - "dataIndex": 1, - "inputData": { - "dataIndex": 1, - "inputItem": { - "query": "hello, 囜庆节", - "type": "1" - } - }, - "apiCompletionResult": { - "dataIndex": 1, - "resultItem": { - "response": "Mock response for hello, 囜庆节" - }, - "startTime": 1763027533463, - "endTime": 1763027533463, - "timeCost": 0, - "success": true - }, - "evalResult": { - "dataIndex": 1, - "score": 0.0, - "reason": "圚火蜊祚掚荐场景䞭由于甚户仅提䟛出发地和目的地而未明确任䜕偏奜劂蜊次类型、座䜍等级、时闎芁求等AI助手仅默讀掚荐无座/硬座的普通列蜊既未䞻劚匕富甚户补充信息也未展瀺笊合垞规偏奜的掚荐方案劂高铁、卧铺等富臎圚蜊次掚荐、偏奜匹配和有效回倍等所有评䌰绎床均埗分䞺0反映出系统对基础查询的默讀掚荐策略存圚猺陷。", - "startTime": 0, - "endTime": 0, - "timeCost": 0, - "scorerResults": [ - { - "dataIndex": 1, - "metric": "匂垞测试", - "score": 0.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "Error: / by zero", - "extra": null, - "statTime": 0, - "endTime": 0, - "timeCost": 0, - "success": false, - "pass": false, - "threshold": 0.0, - "star": false - }, - { - "dataIndex": 1, - "metric": "回倍长床检查", - "score": 1.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "hello, 囜庆节 的回倍长床超过5䞪字笊", - "extra": null, - "statTime": 1763027533980, - "endTime": 1763027533980, - "timeCost": 0, - "success": true, - "pass": true, - "threshold": 0.0, - "star": false - }, - { - "dataIndex": 1, - "metric": "盞䌌床检查level1", - "score": 0.0, - "scoreRate": 0.0, - "totalScore": 1.0, - "reason": "盞䌌床䞺0.0000小于阈倌0.0000", - "extra": { - "similarity": 0.0, - "similarityThreshold": 0.0 - }, - "statTime": 1763027533900, - "endTime": 1763027533917, - "timeCost": 17, - "success": true, - "pass": true, - "threshold": 0.0, - "star": false - } - ], - "success": false, - "pass": false, - "threshold": 1.0, - "scoreStrategyName": "最倧埗分率策略" - }, - "extra": null - } -] \ No newline at end of file diff --git a/evalkit-eval/src/test/resources/travel_demo/scenario2_config.json b/evalkit-eval/src/test/resources/travel_demo/scenario2_config.json deleted file mode 100644 index 9642655..0000000 --- a/evalkit-eval/src/test/resources/travel_demo/scenario2_config.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "scenarioId": "itinerary_transport_hotel_flow", - "sparqlTemplate": "PREFIX travel: \nPREFIX rdfs: \n\nSELECT ?depCityName ?destCityName ?transportType ?transportNo ?hotelName ?roomName ?attractionName\nWHERE {\n ?depCity rdfs:label ?depCityName .\n\n ?destCity rdfs:label ?destCityName .\n\n ?transport travel:departure ?depCity ;\n travel:destination ?destCity ;\n travel:transportType ?transportType ;\n travel:transportNo ?transportNo .\n\n ?hotel travel:locatedIn ?destCity ;\n travel:hotelName ?hotelName .\n\n ?room travel:roomType ?roomType ;\n travel:roomName ?roomName .\n\n ?attr travel:locatedIn ?destCity ;\n travel:attractionName ?attractionName .\n\n FILTER(?depCity != ?destCity)\n\n FILTER(?depCityName != \"䞊海\")\n}", - "minSimilarity": 0.15, - "maxSimilarity": 0.85, - "goldenCase": { - "kgDataUsed": { - "depCityName": "䞊海", - "destCityName": "成郜", - "transportType": "高铁", - "transportNo": "G321", - "hotelName": "熊猫䞻题客栈", - "roomName": "竹林亲子套房", - "attractionName": "倧熊猫繁育基地" - }, - "dialogue": [ - { - "turn": 1, - "query": "打算垊孩子去成郜玩几倩有什么必打卡景点掚荐吗", - "expectedVars": [ - "attractionName" - ] - }, - { - "turn": 2, - "query": "从䞊海出发有什么掚荐的亀通方匏吗", - "expectedVars": [ - "transportNo" - ] - }, - { - "turn": 3, - "query": "到了那蟹晚䞊䜏哪里比蟃方䟿", - "expectedVars": [ - "hotelName" - ] - }, - { - "turn": 4, - "query": "家庭房还有吗。", - "expectedVars": [ - "roomName" - ] - }, - { - "turn": 5, - "query": "垮我把刚才看奜的蜊祚和这䞪亲子房䞀起䞋单吧。", - "expectedVars": [ - "transportNo", - "roomName" - ] - } - ] - } -} \ No newline at end of file diff --git a/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LLMServiceFactoryTest.java b/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LLMServiceFactoryTest.java index 181df1d..0626680 100644 --- a/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LLMServiceFactoryTest.java +++ b/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LLMServiceFactoryTest.java @@ -1,34 +1,74 @@ package com.evalkit.framework.infra.service.llm; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; -import com.evalkit.framework.infra.service.llm.config.DeepseekLLMServiceConfig; import com.evalkit.framework.infra.service.llm.config.LLMServiceConfig; import lombok.extern.slf4j.Slf4j; import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + @Slf4j class LLMServiceFactoryTest { - String deepSeekToken = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "deepseek-token"); + /** + * 构造䞀䞪固定回倍的 mock LLMService䞍䟝赖任䜕倖郚服务 + */ + private LLMService mockLLMService(String fixedReply) { + return new LLMService() { + @Override + public String chat(String prompt) { + return fixedReply; + } - @Test - public void test() { - // 泚册DeepSeek_Test倧暡型服务 - LLMServiceFactory.registerLLMService("DeepSeek_Test", new LLMServiceFactory.LLMServiceBuilder() { @Override - public LLMService build(LLMServiceConfig config) { - return new DeepSeekLLMService((DeepseekLLMServiceConfig) config); + public String getModel() { + return "mock-model"; } - }); + }; + } + + @Test + void testRegisterAndCreateLLMService() { + // 䜿甚 mock builder 泚册服务䞍䟝赖任䜕倖郚 token 或 HTTP 请求 + LLMServiceFactory.registerLLMService("Mock_Test", + (LLMServiceFactory.LLMServiceBuilder) + config -> mockLLMService("hello from mock")); // 创建服务实䟋 - DeepseekLLMServiceConfig config = DeepseekLLMServiceConfig.builder() - .apiToken(deepSeekToken) - .build(); - LLMService llmService = LLMServiceFactory.createLLMService("DeepSeek_Test", config); - - String query = "hello"; - String reply = llmService.chat(query); - log.info("llm service config:{}, query:{}, reply:{}", config, query, reply); + LLMService llmService = LLMServiceFactory.createLLMService("Mock_Test", + LLMServiceConfig.builder().model("mock-model").build()); + + assertNotNull(llmService, "创建的 LLMService 䞍应䞺 null"); + + // 验证 mock 调甚可以正垞返回而䞍䌚真正发起 HTTP 请求 + String reply = llmService.chat("hello"); + assertEquals("hello from mock", reply, "mock LLMService 应返回预期的固定回倍"); + log.info("llmService model:{}, reply:{}", llmService.getModel(), reply); + } + + @Test + void testCreateUnregisteredServiceThrowsException() { + // 访问未泚册的服务名称应抛出 IllegalArgumentException + assertThrows(IllegalArgumentException.class, + () -> LLMServiceFactory.createLLMService("NonExistentService", null), + "访问未泚册服务应抛出 IllegalArgumentException"); + } + + @Test + void testRegisterOverwriteExistingService() { + // 先泚册䞀䞪返回 "v1" 的服务 + LLMServiceFactory.registerLLMService("Override_Test", + (LLMServiceFactory.LLMServiceBuilder) + config -> mockLLMService("v1")); + LLMService v1 = LLMServiceFactory.createLLMService("Override_Test", + LLMServiceConfig.builder().model("mock").build()); + assertEquals("v1", v1.chat("test")); + + // 芆盖泚册䞺返回 "v2" 的服务 + LLMServiceFactory.registerLLMService("Override_Test", + (LLMServiceFactory.LLMServiceBuilder) + config -> mockLLMService("v2")); + LLMService v2 = LLMServiceFactory.createLLMService("Override_Test", + LLMServiceConfig.builder().model("mock").build()); + assertEquals("v2", v2.chat("test"), "芆盖泚册后新服务应返回新的回倍"); } } \ No newline at end of file diff --git a/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LoadBalanceLLMServiceTest.java b/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LoadBalanceLLMServiceTest.java index 59482cd..474bba0 100644 --- a/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LoadBalanceLLMServiceTest.java +++ b/evalkit-infra/src/test/java/com/evalkit/framework/infra/service/llm/LoadBalanceLLMServiceTest.java @@ -1,9 +1,6 @@ package com.evalkit.framework.infra.service.llm; import com.evalkit.framework.common.utils.list.ListUtils; -import com.evalkit.framework.common.utils.runtime.RuntimeEnvUtils; -import com.evalkit.framework.infra.service.llm.config.DeepseekLLMServiceConfig; -import com.evalkit.framework.infra.service.llm.config.LLMServiceConfig; import com.evalkit.framework.infra.service.llm.config.LoadBalanceLLMServiceConfig; import com.evalkit.framework.infra.service.llm.strategy.RoundRobinLoadBalanceStrategy; import lombok.extern.slf4j.Slf4j; @@ -11,34 +8,44 @@ import org.junit.jupiter.api.Test; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.*; @Slf4j class LoadBalanceLLMServiceTest { LoadBalanceLLMService loadBalanceLLMService; + /** + * 构造䞀䞪固定返回指定内容的 mock LLMService䞍发起任䜕 HTTP 请求 + */ + private LLMService mockLLMService(String model, String fixedReply) { + return new LLMService() { + @Override + public String chat(String prompt) { + return fixedReply; + } + + @Override + public String getModel() { + return model; + } + }; + } + @BeforeEach void setUp() { - String deepSeekToken = RuntimeEnvUtils.getPropertyFromResource("secret.properties", "deepseek-token"); - - // 泚册 - LLMServiceFactory.registerLLMService("DeepSeek_Test1", (LLMServiceFactory.LLMServiceBuilder) config -> new DeepSeekLLMService((DeepseekLLMServiceConfig) config)); - LLMServiceFactory.registerLLMService("DeepSeek_Test2", (LLMServiceFactory.LLMServiceBuilder) config -> new DeepSeekLLMService((DeepseekLLMServiceConfig) config)); - - // 创建 - DeepseekLLMServiceConfig config = DeepseekLLMServiceConfig.builder() - .apiToken(deepSeekToken) - .build(); - LLMService llmService10 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService11 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService12 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService13 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService14 = LLMServiceFactory.createLLMService("DeepSeek_Test1", config); - LLMService llmService20 = LLMServiceFactory.createLLMService("DeepSeek_Test2", config); - LLMService llmService21 = LLMServiceFactory.createLLMService("DeepSeek_Test2", config); - - // 莟蜜 - List llmServices = ListUtils.of(llmService10, llmService11, llmService12, llmService13, llmService14, llmService20, llmService21); + // 甹 mock LLMService 替代真实的 DeepSeek 服务䞍䟝赖倖郚 token 或 HTTP + LLMService llmService1 = mockLLMService("mock-model-1", "reply from model-1"); + LLMService llmService2 = mockLLMService("mock-model-1", "reply from model-1"); + LLMService llmService3 = mockLLMService("mock-model-1", "reply from model-1"); + LLMService llmService4 = mockLLMService("mock-model-2", "reply from model-2"); + LLMService llmService5 = mockLLMService("mock-model-2", "reply from model-2"); + + List llmServices = ListUtils.of( + llmService1, llmService2, llmService3, llmService4, llmService5); + loadBalanceLLMService = new LoadBalanceLLMService( LoadBalanceLLMServiceConfig.builder() .llmServices(llmServices) @@ -48,10 +55,35 @@ void setUp() { } @Test - void test() { + void testGetModel() { String model = loadBalanceLLMService.getModel(); + assertNotNull(model, "getModel() 䞍应返回 null"); log.info("models: {}", model); - loadBalanceLLMService.chat("hello,world"); - loadBalanceLLMService.chat("今日倩气"); + } + + @Test + void testChatRoundRobin() { + // 验证蜮询策略倚次调甚应分垃圚䞍同服务䞊 + AtomicInteger callCount = new AtomicInteger(0); + for (int i = 0; i < 5; i++) { + String reply = loadBalanceLLMService.chat("test query " + i); + assertNotNull(reply, "chat() 返回䞍应䞺 null"); + callCount.incrementAndGet(); + } + assertEquals(5, callCount.get(), "应成功完成 5 次 chat 调甚"); + log.info("完成 {} 次 chat 调甚莟蜜均衡正垞", callCount.get()); + } + + @Test + void testEmptyLLMServicesThrowsException() { + // 校验空 services 列衚时构造应抛出匂垞 + assertThrows(IllegalArgumentException.class, () -> + new LoadBalanceLLMService( + LoadBalanceLLMServiceConfig.builder() + .llmServices(ListUtils.of()) + .loadBalanceStrategy(new RoundRobinLoadBalanceStrategy()) + .build() + ) + ); } } \ No newline at end of file diff --git a/evalkit-test/pom.xml b/evalkit-test/pom.xml index 9a271c2..eca635b 100644 --- a/evalkit-test/pom.xml +++ b/evalkit-test/pom.xml @@ -40,6 +40,11 @@ HEAD + + + true + + io.github.zendodx diff --git a/evalkit-test/src/test/java/com/evalkit/framework/test/DAGEvalPerformanceTest.java b/evalkit-test/src/test/java/com/evalkit/framework/test/DAGEvalPerformanceTest.java index b0f1e89..cf90394 100644 --- a/evalkit-test/src/test/java/com/evalkit/framework/test/DAGEvalPerformanceTest.java +++ b/evalkit-test/src/test/java/com/evalkit/framework/test/DAGEvalPerformanceTest.java @@ -24,7 +24,7 @@ public class DAGEvalPerformanceTest { private static final Logger logger = LoggerFactory.getLogger(DAGEvalPerformanceTest.class); private static final String tempDir = System.getProperty("java.io.tmpdir"); private static final String fileName = "DAGEvalTest_" + UuidUtils.generateUuid() + ".xlsx"; - public static int caseCount = 10000 * 6; + public static int caseCount = 10000; private static PerformanceMonitor performanceMonitor; /** diff --git a/evalkit-test/src/test/java/com/evalkit/framework/test/DeltaEvalPerformanceTest.java b/evalkit-test/src/test/java/com/evalkit/framework/test/DeltaEvalPerformanceTest.java index 1e552e9..eac5a55 100644 --- a/evalkit-test/src/test/java/com/evalkit/framework/test/DeltaEvalPerformanceTest.java +++ b/evalkit-test/src/test/java/com/evalkit/framework/test/DeltaEvalPerformanceTest.java @@ -24,7 +24,7 @@ public class DeltaEvalPerformanceTest { private static final Logger logger = LoggerFactory.getLogger(DeltaEvalPerformanceTest.class); private static final String tempDir = System.getProperty("java.io.tmpdir"); private static final String fileName = "DeltaEvalPerformanceTest_" + UuidUtils.generateUuid() + ".xlsx"; - public static int caseCount = 10000 * 5; + public static int caseCount = 10000; private static PerformanceMonitor performanceMonitor; /** diff --git a/pom.xml b/pom.xml index c335578..d20c037 100644 --- a/pom.xml +++ b/pom.xml @@ -64,6 +64,7 @@ 1.5 3.2.5 1.7.3 + 0.8.12 @@ -127,6 +128,27 @@ + + + org.jacoco + jacoco-maven-plugin + ${jacoco-maven-plugin.version} + + + prepare-agent + + prepare-agent + + + + report + test + + report + + + + org.apache.maven.plugins @@ -139,11 +161,24 @@ - + DeltaEvalFacadeTest OrderedDeltaEvalFacadeTest OrderedDeltaEvalWithinDataInjectTest + + DAGEvalPerformanceTest + DeltaEvalPerformanceTest + + ActiveMQEmbeddedServerTest + MixedEmbeddedServerTest + + RubricBasedScorerTest + + ${argLine} -Dnet.bytebuddy.experimental=true