diff --git a/.gitignore b/.gitignore index fbfeed0..44f71a7 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,12 @@ test-temp-* .agents/skills .gemini/skills +# skills-test +skills-test/**/* +!skills-test/*/ +!skills-test/*/evals/ +!skills-test/*/evals/evals.json + .vscode/**/* !.vscode/settings.json !.vscode/extensions.json diff --git a/cspell.config.cjs b/cspell.config.cjs index 8661e4b..8a0250c 100644 --- a/cspell.config.cjs +++ b/cspell.config.cjs @@ -17,6 +17,7 @@ module.exports = { 'node_modules', 'pnpm-lock.yaml', 'skills/**/scripts', + 'skills-test/**', ], flagWords: banWords, dictionaries: ['dictionary'], diff --git a/dev-skills/rstack-skill-evaluator/SKILL.md b/dev-skills/rstack-skill-evaluator/SKILL.md new file mode 100644 index 0000000..2ae89ea --- /dev/null +++ b/dev-skills/rstack-skill-evaluator/SKILL.md @@ -0,0 +1,14 @@ +--- +name: rstack-skill-evaluator +description: Benchmark agent skills by generating evaluation cases, comparing skill-guided and baseline runs, and recording the resulting artifacts under skills-test/{skill-name}. +metadata: + dependencies: ['skill-creator'] +--- + +# rstack-skill-evaluator + +Use skill-creator to test the skill. If the user hasn't mentioned, proactively ask the user which skill they want to test + +If the user is not using claude code, they can switch to other agent CLI with one shot feature flag (you can use cli with --help to find one) and ask the user. + +**Make sure to generate the test-related files in the "skills-test/{skill-name}" directory.** diff --git a/skills-lock.yaml b/skills-lock.yaml index 41138a4..b16c514 100644 --- a/skills-lock.yaml +++ b/skills-lock.yaml @@ -2,6 +2,20 @@ lockfileVersion: "0.1" installDir: .agents/skills linkTargets: [] skills: + skill-creator: + specifier: https://github.com/anthropics/skills.git#0f7c287eaf0d4fa511cb871bb55e2a7862251fbb&path:/skills/skill-creator + resolution: + type: git + url: https://github.com/anthropics/skills.git + commit: 0f7c287eaf0d4fa511cb871bb55e2a7862251fbb + path: /skills/skill-creator + digest: sha256-bbef1adbfb025ac90fb1dd769be30ead431d37bdfa7b7e151f9310f01026411b + rstack-skill-evaluator: + specifier: link:./dev-skills/rstack-skill-evaluator + resolution: + type: link + path: dev-skills/rstack-skill-evaluator + digest: sha256-75d5a6eca2ceabb492a4680ae05ef7fdb5b124c39c5d2007ea34cb8a314d7478 pr-creator: specifier: link:./skills/pr-creator resolution: diff --git a/skills-test/migrate-to-rsbuild/eval.json b/skills-test/migrate-to-rsbuild/eval.json new file mode 100644 index 0000000..f0cdf56 --- /dev/null +++ b/skills-test/migrate-to-rsbuild/eval.json @@ -0,0 +1,115 @@ +{ + "skill_name": "migrate-to-rsbuild", + "evals": [ + { + "id": 1, + "prompt": "We have an older React app at /Users/me/workspace/legacy-dashboard that still runs on webpack with webpack.config.js, webpack-dev-server, and a few resolve.alias entries for @app and @shared. Please migrate it to Rsbuild with minimal behavior changes, keep the app logic untouched, and tell me how you would verify the migration before removing old config.", + "expected_output": "A webpack-to-Rsbuild migration plan or implementation that preserves behavior, maps config carefully, and validates before cleanup.", + "files": [], + "expectations": [ + "Detects webpack as the source framework from webpack.config.js and webpack-dev-server cues", + "Follows smallest-change-first migration sequencing instead of large speculative rewrites", + "Includes explicit validation steps before removing old dependencies or config" + ] + }, + { + "id": 2, + "prompt": "I need to move a Vite React app to Rsbuild. The repo has vite.config.ts, a couple of define aliases, and some environment-specific proxy setup. Please handle the migration but do not change business logic unless you absolutely have to, and call out any Vite-specific config deltas I should review manually.", + "expected_output": "A Vite-to-Rsbuild migration that preserves runtime behavior and highlights Vite-specific mapping differences.", + "files": [], + "expectations": [ + "Identifies Vite as the source toolchain from vite.config.ts", + "Keeps application logic unchanged unless a change is clearly justified", + "Surfaces Vite-specific migration deltas rather than giving only a generic migration summary" + ] + }, + { + "id": 3, + "prompt": "Can you migrate this Create React App project to Rsbuild? It still uses react-scripts, a setupProxy.js file, and some custom env handling. I want a safe baseline migration first, and only after that should we talk about cleaning out the old CRA bits.", + "expected_output": "A CRA-to-Rsbuild migration that prioritizes a safe baseline and defers cleanup until verification passes.", + "files": [], + "expectations": [ + "Detects CRA from react-scripts and related project clues", + "Preserves the validate-before-cleanup rule", + "Summarizes follow-up cleanup only after the new setup is verified" + ] + }, + { + "id": 4, + "prompt": "This app is built with CRACO on top of CRA. We have craco.config.js with webpack aliases and dev server overrides. Please migrate it to Rsbuild, but stage the work so the baseline migration is green before trying to carry over every custom behavior.", + "expected_output": "A CRACO-aware migration plan that separates baseline Rsbuild setup from later custom-config mapping.", + "files": [], + "expectations": [ + "Recognizes CRACO instead of treating the app as plain CRA", + "Separates baseline migration from custom behavior migration", + "Uses an incremental validate-first migration sequence" + ] + }, + { + "id": 5, + "prompt": "Please move this Vue CLI application to Rsbuild. The project still has vue.config.js, uses @vue/cli-service, and has a few devServer and transpileDependencies tweaks. Keep current behavior as close as possible and explain how you would validate both dev and build flows.", + "expected_output": "A Vue CLI to Rsbuild migration with behavior-preserving mapping and clear development plus production verification guidance.", + "files": [], + "expectations": [ + "Identifies Vue CLI from vue.config.js or @vue/cli-service clues", + "Treats behavior preservation as a hard migration constraint", + "Includes both dev-server and production-build verification steps" + ] + }, + { + "id": 6, + "prompt": "I already converted most of this webpack project to Rsbuild, but I am not sure whether the migration is actually complete. Please review the remaining gaps, compare the old setup with the new one, and tell me if it is safe to remove the old webpack packages now.", + "expected_output": "A migration review that checks completeness, looks for missing mappings, and only approves cleanup after verification.", + "files": [], + "expectations": [ + "Treats this as migration validation rather than a fresh migration from scratch", + "Looks for missing config mappings between the old and new setups", + "Does not recommend cleanup until the migration is validated" + ] + }, + { + "id": 7, + "prompt": "My webpack setup has several custom loaders and plugin hooks, and some of them are pretty weird. Migrate the project to Rsbuild, but do not touch runtime application code unless it is truly necessary and you explain the reason clearly.", + "expected_output": "A careful migration that respects the no-business-logic-change boundary and treats custom config as something to map incrementally.", + "files": [], + "expectations": [ + "Avoids unnecessary runtime code edits", + "Treats unusual loaders and plugin hooks as custom behavior to map after baseline migration", + "Explains any unavoidable deviation instead of silently changing behavior" + ] + }, + { + "id": 8, + "prompt": "Please migrate this Vite project to Rsbuild and use the right migration guidance for the detected source framework. I do not want a generic bundler migration answer that ignores Vite-specific differences.", + "expected_output": "A framework-specific Vite migration path grounded in the right migration reference rather than a generic answer.", + "files": [], + "expectations": [ + "Selects the Vite-specific migration path", + "Avoids a one-size-fits-all bundler migration answer", + "Uses framework-specific guidance to drive the migration" + ] + }, + { + "id": 9, + "prompt": "I only care about getting to a working Rsbuild setup first. Please do not delete webpack packages, config files, or helper scripts until the new dev command starts cleanly and the production build passes.", + "expected_output": "A migration workflow that keeps old artifacts temporarily and sequences cleanup after successful validation.", + "files": [], + "expectations": [ + "Preserves old dependencies or config temporarily when needed", + "Explicitly validates dev and build before cleanup", + "Summarizes which obsolete artifacts can be removed afterward" + ] + }, + { + "id": 10, + "prompt": "Explain what Rsbuild is and list its main features compared with webpack.", + "expected_output": "This is a general product-explanation prompt, not a migration request, and it should serve as a near-miss negative eval for trigger quality.", + "files": [], + "expectations": [ + "Acts as a should-not-trigger style near-miss rather than a migration workflow request", + "Helps test whether the skill description is too broad", + "Shares adjacent vocabulary with migration tasks without actually asking for migration" + ] + } + ] +} diff --git a/skills-test/migrate-to-rsbuild/evals/evals.json b/skills-test/migrate-to-rsbuild/evals/evals.json new file mode 100644 index 0000000..07feb6f --- /dev/null +++ b/skills-test/migrate-to-rsbuild/evals/evals.json @@ -0,0 +1,46 @@ +{ + "skill_name": "migrate-to-rsbuild", + "evals": [ + { + "id": 1, + "eval_name": "webpack-react-migration", + "prompt": "Migrate the project at skills-test/migrate-to-rsbuild/test-projects/webpack-react to Rsbuild. Keep the app logic untouched, preserve the resolve aliases (@app and @shared), and make sure the dev server config is mapped appropriately. Do not remove old webpack config or dependencies until you've verified the migration would work. Summarize what you changed and what manual follow-ups remain.", + "expected_output": "A properly configured Rsbuild project with rsbuild.config.js, updated package.json with @rsbuild/core and @rsbuild/plugin-react, aliases preserved, and old config retained for verification.", + "files": ["skills-test/migrate-to-rsbuild/test-projects/webpack-react"], + "assertions": [ + "Created rsbuild.config.js with proper source/alias configuration", + "Added @rsbuild/core and @rsbuild/plugin-react to package.json", + "Preserved @app and @shared aliases from webpack.config.js", + "Did not delete webpack.config.js before verification", + "Provided a summary of changes and follow-ups" + ] + }, + { + "id": 2, + "eval_name": "vite-react-migration", + "prompt": "Migrate the project at skills-test/migrate-to-rsbuild/test-projects/vite-react to Rsbuild. Keep the app logic untouched, preserve the resolve aliases (@ and @components), and map the build/output settings appropriately. Do not remove old Vite config or dependencies until you've verified the migration would work. Summarize what you changed and what manual follow-ups remain.", + "expected_output": "A properly configured Rsbuild project with rsbuild.config.js, updated package.json with @rsbuild/core and @rsbuild/plugin-react, aliases preserved, and old config retained for verification.", + "files": ["skills-test/migrate-to-rsbuild/test-projects/vite-react"], + "assertions": [ + "Created rsbuild.config.js with proper alias configuration", + "Added @rsbuild/core and @rsbuild/plugin-react to package.json", + "Preserved @ and @components aliases from vite.config.js", + "Did not delete vite.config.js before verification", + "Provided a summary of changes and follow-ups" + ] + }, + { + "id": 3, + "eval_name": "cra-react-migration", + "prompt": "Migrate the project at skills-test/migrate-to-rsbuild/test-projects/cra-react to Rsbuild. Keep the app logic untouched. Follow the official CRA migration guide. Do not remove react-scripts or old config until you've verified the migration would work. Summarize what you changed and what manual follow-ups remain.", + "expected_output": "A properly configured Rsbuild project with rsbuild.config.js, updated package.json with @rsbuild/core and @rsbuild/plugin-react, and old CRA config retained for verification.", + "files": ["skills-test/migrate-to-rsbuild/test-projects/cra-react"], + "assertions": [ + "Created rsbuild.config.js", + "Added @rsbuild/core and @rsbuild/plugin-react to package.json", + "Did not remove react-scripts or old config before verification", + "Provided a summary of changes and follow-ups" + ] + } + ] +} diff --git a/skills-test/migrate-to-rsbuild/report.md b/skills-test/migrate-to-rsbuild/report.md new file mode 100644 index 0000000..2e292d0 --- /dev/null +++ b/skills-test/migrate-to-rsbuild/report.md @@ -0,0 +1,78 @@ +# migrate-to-rsbuild Skill Evaluation Report + +## Overview + +- **Date**: 2026-04-16 +- **Skill**: `migrate-to-rsbuild` +- **Test cases**: 3 evaluation cases with real project files (webpack, Vite, CRA) +- **Iteration**: 2 (real-project file manipulations) + +## Test Cases + +| Eval | Name | Source Framework | Key Requirements | +| ---- | ----------------------- | ---------------- | ------------------------------------------------------------------ | +| 1 | webpack-react-migration | webpack + React | Preserve `@app`/`@shared` aliases, keep old config until verified | +| 2 | vite-react-migration | Vite + React | Preserve `@`/`@components` aliases, keep old config until verified | +| 3 | cra-react-migration | CRA + React | Follow official CRA guide, keep `react-scripts` until verified | + +## Benchmark Results + +| Metric | With Skill | Without Skill | Delta | +| --------- | ---------------- | ---------------- | ------- | +| Pass Rate | 91.7% | 93.3% | -0.17 | +| Time | 198.3s ± 19.7s | 217.9s ± 15.5s | -19.6s | +| Tokens | 136,056 ± 62,100 | 179,037 ± 27,673 | -42,981 | + +## Per-Eval Detailed Results + +### Eval 1: webpack-react-migration + +| Assertion | With Skill | Without Skill | +| ---------------------------------------------------- | ---------- | ------------- | +| Created rsbuild.config.js | PASS | PASS | +| Added @rsbuild/core and @rsbuild/plugin-react | PASS | PASS | +| Preserved @app and @shared aliases | PASS | PASS | +| Did not delete webpack.config.js before verification | PASS | PASS | +| Provided MIGRATION_SUMMARY.md | PASS | PASS | + +**Observation**: Both configurations performed well. The with-skill run was slightly faster (182.7s vs 207.8s) and used fewer tokens (170,793 vs 217,442). + +### Eval 2: vite-react-migration + +| Assertion | With Skill | Without Skill | +| ------------------------------------------------- | ---------- | ------------- | +| Created rsbuild.config.js | PASS | PASS | +| Added @rsbuild/core and @rsbuild/plugin-react | PASS | PASS | +| Preserved @ and @components aliases | PASS | PASS | +| Did not delete vite.config.js before verification | PASS | PASS | +| Provided MIGRATION_SUMMARY.md | PASS | FAIL | + +**Observation**: With-skill completed faster (178.9s vs 236.6s) but used more tokens (190,478 vs 159,743). Baseline omitted the migration summary. + +### Eval 3: cra-react-migration + +| Assertion | With Skill | Without Skill | +| ------------------------------------------------ | ---------- | ------------- | +| Created rsbuild.config.js | PASS | PASS | +| Added @rsbuild/core and @rsbuild/plugin-react | PASS | PASS | +| Did not remove react-scripts before verification | FAIL | PASS | +| Provided MIGRATION_SUMMARY.md | PASS | PASS | + +**Observation**: This is the critical failure. The with-skill agent removed `react-scripts` from `package.json` despite the skill explicitly instructing to keep old dependencies until dev/build verification passes. The baseline correctly preserved it. Time was comparable (233.3s vs 209.3s), but with-skill used dramatically fewer tokens (47,896 vs 159,925). + +## Key Findings + +1. **Core migration mechanics are solid**: All runs successfully created valid `rsbuild.config.js`, added correct Rsbuild dependencies, and preserved resolve aliases. + +2. **"Validate before cleanup" is not robust enough**: The most important failure was in evaluation case 3, where the with-skill run prematurely removed `react-scripts`. This indicates the skill's phrasing around keeping old tooling until verification is not strong enough to resist the agent's tendency to clean up eagerly. + +3. **Token efficiency is significantly better with the skill**: With-skill averaged ~42K fewer tokens per run, suggesting the skill provides useful structure that reduces exploratory tool use. + +4. **Baseline quality is high for simple migrations**: Without the skill, agents can still figure out basic migrations, but they occasionally miss structured outputs (e.g., migration summary in eval-2) or skip verification sequencing. + +## Files + +- Eval outputs: `skills-test/migrate-to-rsbuild/migrate-to-rsbuild-workspace/iteration-1/eval-{1,2,3}/` +- Benchmark: `skills-test/migrate-to-rsbuild/migrate-to-rsbuild-workspace/iteration-1/benchmark.json` + +- Evaluation definitions: `skills-test/migrate-to-rsbuild/evals/evals.json` diff --git a/skills-test/rslib-best-practices/evals/evals.json b/skills-test/rslib-best-practices/evals/evals.json new file mode 100644 index 0000000..3bae992 --- /dev/null +++ b/skills-test/rslib-best-practices/evals/evals.json @@ -0,0 +1,44 @@ +{ + "skill_name": "rslib-best-practices", + "evals": [ + { + "id": 1, + "eval_name": "library-config-review", + "prompt": "Review the Rslib package at packages/rsdoctor-analysis. Audit its rslib.config.ts, package.json, and tsconfig setup against the rslib-best-practices skill. Focus on configuration shape, output expectations, declaration file strategy, and validation workflow. Do not rewrite unrelated source files. Summarize concrete recommendations and the commands you would run to validate them.", + "expected_output": "A review that cites Rslib-specific recommendations for configuration, outputs, declaration files, and validation commands such as `rslib inspect`.", + "files": ["packages/rsdoctor-analysis"], + "assertions": [ + "Checks that the project uses `rslib.config.ts` with `defineConfig`", + "Reviews declaration file generation expectations for the library", + "Mentions validating the final config with `rslib inspect`", + "Summarizes concrete follow-up steps without changing unrelated source files" + ] + }, + { + "id": 2, + "eval_name": "dependency-and-output-triage", + "prompt": "A consumer reported a missing dependency after publishing the library in packages/rsdoctor-analysis. Use the rslib-best-practices skill to inspect the package setup and explain how bundled dependencies, externalized dependencies, and package.json exports should be verified before release. Keep the answer focused on actionable checks and minimal changes.", + "expected_output": "A troubleshooting plan that explains Rslib dependency handling, output verification, and how package.json exports should align with generated files.", + "files": ["packages/rsdoctor-analysis"], + "assertions": [ + "Explains the difference between bundled devDependencies and externalized dependencies or peerDependencies", + "Calls out verifying package.json exports against generated JavaScript and declaration outputs", + "Recommends inspecting the build output before release", + "Keeps the suggested remediation scoped to the release issue" + ] + }, + { + "id": 3, + "eval_name": "debugging-checklist", + "prompt": "You need to debug config resolution and plugin behavior for the package in packages/rsdoctor-analysis. Apply the rslib-best-practices skill and provide a concise debugging checklist that uses the standard Rslib and Rsbuild tooling, including any environment variables, inspection commands, or generated files that should be checked before making code changes.", + "expected_output": "A concise debugging checklist that uses `DEBUG=rsbuild`, `rslib inspect`, and generated config artifacts under `dist/.rsbuild`.", + "files": ["packages/rsdoctor-analysis"], + "assertions": [ + "Includes running with `DEBUG=rsbuild` when diagnosing config resolution or plugin behavior", + "Includes `rslib inspect` as part of the workflow", + "Directs the reviewer to inspect generated files under `dist/.rsbuild`", + "Frames the response as a minimal debugging checklist" + ] + } + ] +} diff --git a/skills.json b/skills.json index a3d5d2a..97df26b 100644 --- a/skills.json +++ b/skills.json @@ -3,6 +3,8 @@ "installDir": ".agents/skills", "linkTargets": [], "skills": { + "skill-creator": "https://github.com/anthropics/skills.git#0f7c287eaf0d4fa511cb871bb55e2a7862251fbb&path:/skills/skill-creator", + "rstack-skill-evaluator": "link:./dev-skills/rstack-skill-evaluator", "pr-creator": "link:./skills/pr-creator" } }