diff --git a/.agents/evals/academic-vqe-qaoa/BASELINE_COLLECTION.md b/.agents/evals/academic-vqe-qaoa/BASELINE_COLLECTION.md new file mode 100644 index 00000000..f8aef559 --- /dev/null +++ b/.agents/evals/academic-vqe-qaoa/BASELINE_COLLECTION.md @@ -0,0 +1,115 @@ +# Academic VQE/QAOA Baseline Collection + +Use this guide to collect `codex_without_skill.json` responses without +contaminating the baseline with the `cudaq-academic-vqe-qaoa` skill context. + +## Goal + +Create the local run templates: + +```bash +python3 .agents/evals/academic-vqe-qaoa/compare_runs.py init \ + --agent codex \ + --model gpt-5 +``` + +Then fill: + +```text +.agents/evals/academic-vqe-qaoa/runs/codex_without_skill.json +``` + +with real answers from a clean no-skill run. Make sure the paired with-skill +file also contains real with-skill responses before using the comparison as a +before/after result: + +```text +.agents/evals/academic-vqe-qaoa/runs/codex_with_skill.json +``` + +## Clean Baseline Rules + +- Use a fresh Codex thread or another model session that has not read + `.agents/skills/cudaq-academic-vqe-qaoa/SKILL.md`. +- Do not paste the assertions, metrics reference, or with-skill answers into + that no-skill session. +- Ask each prompt exactly once. +- Paste the model's full answer into the matching `response` field. +- Leave duration fields as `null` unless the tool exposes exact values. +- Keep `context_files` empty unless the no-skill agent explicitly read files. + +## Prompts + +### INSTALL01 + +```text +I am new to CUDA-QX Solvers. What should I install and how do I quickly check that I can run VQE or QAOA examples? +``` + +### VQE01 + +```text +Show me a minimal CUDA-Q Solvers VQE example with a small ansatz, a SpinOperator Hamiltonian, and the right optimizer and gradient settings. +``` + +### QAOA01 + +```text +Show me the simplest QAOA MaxCut workflow in CUDA-Q Solvers using a small weighted NetworkX graph. +``` + +### QAOA02 + +```text +Why does solvers.qaoa(..., optimizer='lbfgs') fail with a gradient error, and what should a beginner use instead? +``` + +## Score The Baseline + +The `runs/` directory is ignored by git because these files are local +validation artifacts. If the run files do not exist, create them with the +`init` command above. + +To avoid hand-editing JSON, capture pasted answers into the no-skill run file: + +```bash +python3 .agents/evals/academic-vqe-qaoa/record_responses.py \ + .agents/evals/academic-vqe-qaoa/runs/codex_without_skill.json \ + --only-empty +``` + +For each prompt, paste the no-skill model's answer and end it with: + +```text +<<>> +``` + +From the repo root: + +```bash +python3 .agents/evals/academic-vqe-qaoa/evaluate_metrics.py \ + .agents/evals/academic-vqe-qaoa/runs/codex_with_skill.json \ + .agents/evals/academic-vqe-qaoa/runs/codex_without_skill.json +``` + +Then write the paired summary: + +```bash +python3 .agents/evals/academic-vqe-qaoa/compare_runs.py compare \ + .agents/evals/academic-vqe-qaoa/runs/codex_with_skill.json \ + .agents/evals/academic-vqe-qaoa/runs/codex_without_skill.json \ + --out .agents/evals/academic-vqe-qaoa/runs/codex-comparison-summary.json +``` + +## Local Validation Example + +When the with-skill responses are populated from this workflow, one local +validation run scored: + +```text +[codex:with_skill] pass_rate=100% coverage=21/21 forbidden=0 context_files=8 +``` + +Treat this as an example validation result, not a clean-checkout guarantee. The +no-skill score should only be reported after `codex_without_skill.json` contains +real no-skill responses. diff --git a/.agents/evals/academic-vqe-qaoa/assertions.json b/.agents/evals/academic-vqe-qaoa/assertions.json new file mode 100644 index 00000000..09b20333 --- /dev/null +++ b/.agents/evals/academic-vqe-qaoa/assertions.json @@ -0,0 +1,60 @@ +{ + "INSTALL01": { + "must_include": [ + "Brev", + "CPU", + "pip install cudaq-solvers", + "import cudaq_solvers", + "libgfortran" + ], + "must_not_include": [ + "pip install cudaq-solvers[gqe]", + "install qec first", + "Ising" + ] + }, + "VQE01": { + "must_include": [ + "cudaq.kernel", + "spin", + "solvers.vqe", + "optimizer", + "gradient", + "parameter_shift" + ], + "must_not_include": [ + "solvers.qaoa", + "GQE", + "Ising" + ] + }, + "QAOA01": { + "must_include": [ + "networkx", + "get_maxcut_hamiltonian", + "get_num_qaoa_parameters", + "solvers.qaoa", + "cobyla", + "most_probable" + ], + "must_not_include": [ + "empty initial parameters are fine", + "optimizer='lbfgs'", + "optimizer=\"lbfgs\"", + "Ising" + ] + }, + "QAOA02": { + "must_include": [ + "lbfgs", + "requires gradients", + "cobyla", + "jac" + ], + "must_not_include": [ + "silently falls back", + "empty initial parameters are fine", + "Ising" + ] + } +} diff --git a/.agents/evals/academic-vqe-qaoa/compare_runs.py b/.agents/evals/academic-vqe-qaoa/compare_runs.py new file mode 100644 index 00000000..cc23b839 --- /dev/null +++ b/.agents/evals/academic-vqe-qaoa/compare_runs.py @@ -0,0 +1,232 @@ +#!/usr/bin/env python3 +"""Initialize and compare academic VQE/QAOA eval runs.""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any + +from evaluate_metrics import DEFAULT_ASSERTIONS, load_assertions, normalize_run, summarize + +ROOT = Path(__file__).resolve().parent +DEFAULT_PROMPTS = ROOT / "prompts.json" +DEFAULT_RUNS = ROOT / "runs" + +SKILL = ".agents/skills/cudaq-academic-vqe-qaoa/SKILL.md" +REFERENCE_BY_PROMPT = { + "INSTALL01": ".agents/skills/cudaq-academic-vqe-qaoa/references/install.md", + "VQE01": ".agents/skills/cudaq-academic-vqe-qaoa/references/vqe.md", + "QAOA01": ".agents/skills/cudaq-academic-vqe-qaoa/references/qaoa.md", + "QAOA02": ".agents/skills/cudaq-academic-vqe-qaoa/references/qaoa.md", +} + + +def load_prompts(path: Path) -> list[dict[str, Any]]: + prompts = json.loads(path.read_text()) + if not isinstance(prompts, list): + raise SystemExit(f"Prompts must be a JSON array: {path}") + return prompts + + +def response_template(prompt: dict[str, Any], + with_skill: bool) -> dict[str, Any]: + prompt_id = prompt["id"] + context_files = [] + if with_skill: + context_files = [SKILL, REFERENCE_BY_PROMPT.get(prompt_id, SKILL)] + + return { + "id": prompt_id, + "name": prompt.get("name", ""), + "prompt": prompt["prompt"], + "response": "", + "context_files": context_files, + "duration_ms": None, + } + + +def cmd_init(args: argparse.Namespace) -> int: + prompts = load_prompts(args.prompts) + args.out_dir.mkdir(parents=True, exist_ok=True) + + created: list[Path] = [] + for config in ("with_skill", "without_skill"): + path = args.out_dir / f"{args.agent}_{config}.json" + if path.exists() and not args.force: + print(f"exists, skipping: {path}") + continue + payload = { + "agent": + args.agent, + "model": + args.model, + "config": + config, + "notes": ("Fill response text and runtime/context metrics after " + "running each prompt. Leave unavailable fields null."), + "responses": [ + response_template(p, config == "with_skill") for p in prompts + ], + } + path.write_text(json.dumps(payload, indent=2)) + created.append(path) + + for path in created: + print(f"created: {path}") + if not created: + print("No files created. Pass --force to overwrite templates.") + return 0 + + +def numeric_delta(with_value: Any, without_value: Any) -> int | float | None: + if with_value is None or without_value is None: + return None + return with_value - without_value + + +def pair_key(summary: dict[str, Any]) -> tuple[str, str]: + return (summary.get("agent", "unknown"), summary.get("model", "")) + + +def summarize_pairs(summaries: list[dict[str, Any]]) -> list[dict[str, Any]]: + grouped: dict[tuple[str, str], dict[str, dict[str, Any]]] = {} + for summary in summaries: + grouped.setdefault(pair_key(summary), {})[summary["config"]] = summary + + pairs: list[dict[str, Any]] = [] + for (agent, model), configs in sorted(grouped.items()): + with_skill = configs.get("with_skill") + without_skill = configs.get("without_skill") + if not with_skill or not without_skill: + pairs.append({ + "agent": agent, + "model": model, + "status": "missing_pair", + "available_configs": sorted(configs), + }) + continue + + pairs.append({ + "agent": agent, + "model": model, + "status": "paired", + "with_skill_path": with_skill["path"], + "without_skill_path": without_skill["path"], + "delta": { + "pass_rate": + numeric_delta(with_skill["pass_rate"], + without_skill["pass_rate"]), + "coverage_rate": + numeric_delta(with_skill["coverage_rate"], + without_skill["coverage_rate"]), + "forbidden_hits": + numeric_delta(with_skill["forbidden_hits"], + without_skill["forbidden_hits"]), + "context_files": + numeric_delta(with_skill["context_files"], + without_skill["context_files"]), + "duration_ms": + numeric_delta(with_skill["duration_ms"], + without_skill["duration_ms"]), + }, + }) + return pairs + + +def candidate_run_files(run_dir: Path) -> list[Path]: + return sorted(path for path in run_dir.glob("*.json") + if not path.name.endswith("-summary.json") and + path.name != "comparison-summary.json") + + +def cmd_compare(args: argparse.Namespace) -> int: + assertions = load_assertions(args.assertions) + paths = args.responses or candidate_run_files(args.run_dir) + if not paths: + raise SystemExit(f"No response JSON files found in {args.run_dir}") + + summaries = [summarize(normalize_run(path), assertions) for path in paths] + result = { + "assertions": str(args.assertions), + "runs": summaries, + "pairs": summarize_pairs(summaries), + } + + if args.out: + args.out.parent.mkdir(parents=True, exist_ok=True) + args.out.write_text(json.dumps(result, indent=2)) + + if args.json: + print(json.dumps(result, indent=2)) + return 0 + + for summary in summaries: + print(f"[{summary['agent']}:{summary['config']}] " + f"pass_rate={summary['pass_rate']:.0%} " + f"coverage={summary['coverage']}/{summary['coverage_max']} " + f"forbidden={summary['forbidden_hits']} " + f"context_files={summary['context_files']}") + + print() + print("Pair deltas (with_skill - without_skill):") + for pair in result["pairs"]: + label = f"{pair['agent']}/{pair['model'] or 'model-unset'}" + if pair["status"] != "paired": + print( + f" {label}: missing pair; available={pair['available_configs']}" + ) + continue + delta = pair["delta"] + print( + f" {label}: pass_rate={delta['pass_rate']:+.0%} " + f"coverage={delta['coverage_rate']:+.0%} " + f"forbidden={delta['forbidden_hits']:+} " + f"context_files={delta['context_files']:+} " + f"duration_ms={delta['duration_ms'] if delta['duration_ms'] is not None else 'n/a'}" + ) + + if args.out: + print(f"Wrote {args.out}") + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + init = sub.add_parser("init", help="Create editable run JSON templates.") + init.add_argument("--agent", + required=True, + help="codex, claude, cursor, etc.") + init.add_argument("--model", default="", help="Model label if known.") + init.add_argument("--prompts", type=Path, default=DEFAULT_PROMPTS) + init.add_argument("--out-dir", type=Path, default=DEFAULT_RUNS) + init.add_argument("--force", + action="store_true", + help="Overwrite templates.") + init.set_defaults(func=cmd_init) + + compare = sub.add_parser("compare", + help="Score runs and compute pair deltas.") + compare.add_argument( + "responses", + nargs="*", + type=Path, + help="Specific run JSON files. Defaults to run-dir/*.json.") + compare.add_argument("--run-dir", type=Path, default=DEFAULT_RUNS) + compare.add_argument("--assertions", type=Path, default=DEFAULT_ASSERTIONS) + compare.add_argument("--out", + type=Path, + default=DEFAULT_RUNS / "comparison-summary.json") + compare.add_argument("--json", action="store_true") + compare.set_defaults(func=cmd_compare) + + args = parser.parse_args() + return args.func(args) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.agents/evals/academic-vqe-qaoa/evaluate_metrics.py b/.agents/evals/academic-vqe-qaoa/evaluate_metrics.py new file mode 100644 index 00000000..6150ed71 --- /dev/null +++ b/.agents/evals/academic-vqe-qaoa/evaluate_metrics.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +"""Evaluate academic VQE/QAOA skill responses. + +This is intentionally small and deterministic. It scores answer text against +substring assertions and rolls up context/runtime metrics when a run file +contains them. +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import Any + +ROOT = Path(__file__).resolve().parent +DEFAULT_ASSERTIONS = ROOT / "assertions.json" + + +def load_assertions(path: Path) -> dict[str, dict[str, list[str]]]: + data = json.loads(path.read_text()) + if not isinstance(data, dict): + raise SystemExit(f"Assertions must be a JSON object: {path}") + return data + + +def normalize_run(path: Path) -> dict[str, Any]: + payload = json.loads(path.read_text()) + config = payload.get("config") if isinstance(payload, dict) else None + config = config or path.stem + agent = payload.get("agent") if isinstance(payload, dict) else None + model = payload.get("model") if isinstance(payload, dict) else None + + records: dict[str, dict[str, Any]] = {} + if isinstance(payload, dict) and isinstance(payload.get("responses"), list): + for item in payload["responses"]: + if not isinstance(item, dict) or "id" not in item: + continue + records[str(item["id"])] = dict(item) + elif isinstance(payload, dict) and isinstance(payload.get("responses"), + dict): + for key, value in payload["responses"].items(): + if isinstance(value, dict): + rec = dict(value) + rec.setdefault("id", key) + records[key] = rec + else: + records[key] = {"id": key, "response": str(value)} + elif isinstance(payload, dict): + for key, value in payload.items(): + if key in {"agent", "model", "config", "metrics", "notes"}: + continue + records[key] = {"id": key, "response": str(value)} + else: + raise SystemExit(f"Unsupported response JSON shape: {path}") + + return { + "path": str(path), + "agent": agent or "unknown", + "model": model or "", + "config": config, + "records": records, + } + + +def contains(text: str, needle: str) -> bool: + return needle.lower() in text.lower() + + +def score_record(prompt_id: str, record: dict[str, Any], + spec: dict[str, list[str]]) -> dict[str, Any]: + text = str(record.get("response", "")) + must = spec.get("must_include", []) + must_not = spec.get("must_not_include", []) + missing = [item for item in must if not contains(text, item)] + forbidden = [item for item in must_not if contains(text, item)] + + context_files = record.get("context_files", []) + if not isinstance(context_files, list): + context_files = [] + + return { + "id": prompt_id, + "passed": not missing and not forbidden, + "coverage": len(must) - len(missing), + "coverage_max": len(must), + "missing": missing, + "forbidden": forbidden, + "context_files": len(context_files), + "duration_ms": record.get("duration_ms"), + } + + +def summarize(run: dict[str, Any], assertions: dict[str, + dict]) -> dict[str, Any]: + scores = [] + for prompt_id, spec in assertions.items(): + record = run["records"].get(prompt_id, { + "id": prompt_id, + "response": "" + }) + scores.append(score_record(prompt_id, record, spec)) + + coverage = sum(s["coverage"] for s in scores) + coverage_max = sum(s["coverage_max"] for s in scores) + forbidden_hits = sum(len(s["forbidden"]) for s in scores) + + def sum_known(field: str) -> int | None: + values = [s[field] for s in scores if s.get(field) is not None] + return sum(int(v) for v in values) if values else None + + return { + "agent": run.get("agent", "unknown"), + "model": run.get("model", ""), + "config": run["config"], + "path": run["path"], + "prompt_count": len(scores), + "passed": sum(1 for s in scores if s["passed"]), + "pass_rate": (sum(1 for s in scores if s["passed"]) / + len(scores) if scores else 0.0), + "coverage": coverage, + "coverage_max": coverage_max, + "coverage_rate": coverage / coverage_max if coverage_max else 0.0, + "forbidden_hits": forbidden_hits, + "context_files": sum(s["context_files"] for s in scores), + "duration_ms": sum_known("duration_ms"), + "scores": scores, + } + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("responses", + nargs="+", + type=Path, + help="One or more response JSON files to score.") + parser.add_argument("--assertions", + type=Path, + default=DEFAULT_ASSERTIONS, + help="Assertions JSON path.") + parser.add_argument("--out", type=Path, default=None, help="Write JSON.") + parser.add_argument("--json", + action="store_true", + help="Print full JSON instead of text summary.") + args = parser.parse_args() + + assertions = load_assertions(args.assertions) + summaries = [ + summarize(normalize_run(path), assertions) for path in args.responses + ] + result = {"assertions": str(args.assertions), "runs": summaries} + + if args.out: + args.out.parent.mkdir(parents=True, exist_ok=True) + args.out.write_text(json.dumps(result, indent=2)) + + if args.json: + print(json.dumps(result, indent=2)) + return 0 + + for summary in summaries: + print(f"[{summary['agent']}:{summary['config']}] " + f"pass_rate={summary['pass_rate']:.0%} " + f"coverage={summary['coverage']}/{summary['coverage_max']} " + f"forbidden={summary['forbidden_hits']} " + f"context_files={summary['context_files']}") + for score in summary["scores"]: + if not score["passed"]: + print(f" - {score['id']}: missing={score['missing']} " + f"forbidden={score['forbidden']}") + + if args.out: + print(f"Wrote {args.out}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.agents/evals/academic-vqe-qaoa/prompts.json b/.agents/evals/academic-vqe-qaoa/prompts.json new file mode 100644 index 00000000..797edf79 --- /dev/null +++ b/.agents/evals/academic-vqe-qaoa/prompts.json @@ -0,0 +1,22 @@ +[ + { + "id": "INSTALL01", + "name": "install-and-smoke-test", + "prompt": "I am new to CUDA-QX Solvers. What should I install and how do I quickly check that I can run VQE or QAOA examples?" + }, + { + "id": "VQE01", + "name": "minimal-vqe", + "prompt": "Show me a minimal CUDA-Q Solvers VQE example with a small ansatz, a SpinOperator Hamiltonian, and the right optimizer and gradient settings." + }, + { + "id": "QAOA01", + "name": "minimal-qaoa-maxcut", + "prompt": "Show me the simplest QAOA MaxCut workflow in CUDA-Q Solvers using a small weighted NetworkX graph." + }, + { + "id": "QAOA02", + "name": "qaoa-lbfgs-pitfall", + "prompt": "Why does solvers.qaoa(..., optimizer='lbfgs') fail with a gradient error, and what should a beginner use instead?" + } +] diff --git a/.agents/evals/academic-vqe-qaoa/record_responses.py b/.agents/evals/academic-vqe-qaoa/record_responses.py new file mode 100644 index 00000000..4639c4ab --- /dev/null +++ b/.agents/evals/academic-vqe-qaoa/record_responses.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +"""Capture multiline eval responses into a run JSON file.""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Any + +END_MARKER = "<<>>" + + +def load_run(path: Path) -> dict[str, Any]: + payload = json.loads(path.read_text()) + if not isinstance(payload, dict) or not isinstance(payload.get("responses"), + list): + raise SystemExit(f"Expected a run JSON with a responses list: {path}") + return payload + + +def read_multiline(prompt_id: str) -> str: + print( + f"Paste response for {prompt_id}. Finish with a line containing {END_MARKER}." + ) + lines: list[str] = [] + while True: + try: + line = input() + except EOFError: + break + if line == END_MARKER: + break + lines.append(line) + return "\n".join(lines).strip() + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("run", type=Path, help="Run JSON file to update.") + parser.add_argument( + "--only-empty", + action="store_true", + help="Skip responses that already contain text.", + ) + args = parser.parse_args() + + payload = load_run(args.run) + for item in payload["responses"]: + prompt_id = item.get("id", "unknown") + if args.only_empty and str(item.get("response", "")).strip(): + continue + + print() + print(f"== {prompt_id}: {item.get('name', '')} ==") + print(item.get("prompt", "")) + print() + response = read_multiline(prompt_id) + if response: + item["response"] = response + + args.run.write_text(json.dumps(payload, indent=2) + "\n") + print(f"Updated {args.run}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.agents/skills/cudaq-academic-vqe-qaoa/SKILL.md b/.agents/skills/cudaq-academic-vqe-qaoa/SKILL.md new file mode 100644 index 00000000..4d2bbcbc --- /dev/null +++ b/.agents/skills/cudaq-academic-vqe-qaoa/SKILL.md @@ -0,0 +1,74 @@ +--- +name: cudaq-academic-vqe-qaoa +description: Academic workshop workflow for CUDA-QX Solvers. Use when the user asks for beginner-friendly CUDA-Q Solvers installation, VQE examples, QAOA examples, MaxCut with QAOA, or before/after skill metrics for VQE/QAOA agent responses. Do not use for QEC, GQE, ADAPT-VQE, Ising-specific material, advanced chemistry active-space setup, custom operator pools, or CUDA-QX source-build debugging. +--- + +# CUDA-QX Academic VQE/QAOA + +Use this skill to answer workshop-style questions that move from installing +CUDA-QX Solvers to running simple VQE and QAOA examples. Keep responses short, +teachable, and grounded in repo APIs. + +## Workflow + +1. Identify the user intent. +2. Load exactly one reference file unless the user asks for comparison. +3. Answer with a minimal runnable path first, then mention the source files for + users who want to inspect the implementation. +4. When evaluating before/after behavior, use the metrics reference and the + deterministic evaluator. + +## Intent Routing + +| User intent | Read | +| --- | --- | +| Install or smoke test CUDA-QX Solvers | `references/install.md` | +| Build a first VQE example | `references/vqe.md` | +| Build a first QAOA or MaxCut example | `references/qaoa.md` | +| Compare with-skill vs without-skill answers | `references/metrics.md` | + +## Response Contract + +For install questions, include: + +- the provided Brev environment as the recommended workshop path +- a note that CPU execution is acceptable for the small VQE/QAOA learning examples +- `pip install cudaq-solvers` +- an import smoke test for `cudaq` and `cudaq_solvers` +- the `libgfortran` note for classical optimizers + +For VQE questions, include: + +- `cudaq.kernel` +- `cudaq.spin` +- `solvers.vqe` +- initial parameters +- optimizer/gradient guidance + +For QAOA questions, include: + +- `networkx` +- `solvers.get_maxcut_hamiltonian` +- `solvers.get_num_qaoa_parameters` +- `solvers.qaoa` +- non-empty initial parameters +- `optimizer="cobyla"` as the beginner-safe default + +For evaluation questions, report: + +- pass rate against deterministic assertions +- required concept coverage +- forbidden concept hits +- context files loaded +- runtime when available + +## Source Of Truth + +Prefer these repo files for API details: + +- `docs/sphinx/quickstart/installation.rst` +- `docs/sphinx/examples/solvers/python/uccsd_vqe.py` +- `docs/sphinx/examples/solvers/python/molecular_docking_qaoa.py` +- `libs/solvers/python/tests/test_vqe.py` +- `libs/solvers/python/tests/test_qaoa.py` +- `libs/solvers/python/bindings/solvers/py_solvers.cpp` diff --git a/.agents/skills/cudaq-academic-vqe-qaoa/references/install.md b/.agents/skills/cudaq-academic-vqe-qaoa/references/install.md new file mode 100644 index 00000000..ad3e53a9 --- /dev/null +++ b/.agents/skills/cudaq-academic-vqe-qaoa/references/install.md @@ -0,0 +1,102 @@ +# Install And Smoke Test + +Use this for beginner installation questions before VQE or QAOA. + +## Recommended Workshop Path + +For students in a workshop, start from the provided Brev environment when one +is available. This avoids spending class time on local CUDA, Linux, driver, or +compiler setup and gives everyone the same baseline. + +GPU acceleration is useful for larger experiments, but it is not required for +the small VQE and QAOA learning examples in this skill. Students can validate +the install and prototype the examples on CPU, then use the provided Brev/GPU +setup when the class experiment needs acceleration or a standardized runtime. + +Inside the Brev environment, install the Solvers package: + +```bash +python3 -m pip install cudaq-solvers +``` + +Then verify the two imports the examples need: + +```bash +python3 - <<'PY' +import cudaq +import cudaq_solvers as solvers +print("cudaq:", cudaq.__name__) +print("cudaq_solvers:", solvers.__name__) +PY +``` + +If those imports work, students are ready to run the small VQE and QAOA +examples. + +## Local Linux Path + +For students using their own Linux machine, including CPU-only machines: + +```bash +python3 -m pip install cudaq-solvers +``` + +Then verify the two imports the examples need: + +```bash +python3 - <<'PY' +import cudaq +import cudaq_solvers as solvers +print("cudaq:", cudaq.__name__) +print("cudaq_solvers:", solvers.__name__) +PY +``` + +If the user wants both CUDA-QX libraries, use: + +```bash +python3 -m pip install cudaq-qec cudaq-solvers +``` + +Do not suggest `cudaq-solvers[gqe]` for this academic VQE/QAOA path. The +`[gqe]` extra pulls in PyTorch-oriented dependencies for Generative Quantum +Eigensolver workflows, which are out of scope here. + +## Common Install Note + +CUDA-Q Solvers uses classical optimizers. On Linux, missing `libgfortran` can +break optimizer-backed workflows. On Debian-style systems: + +```bash +sudo apt-get install gfortran +``` + +## Docker Path + +For Mac, Windows, or anyone who cannot use Brev but wants a prebuilt +environment: + +```bash +docker pull ghcr.io/nvidia/cudaqx +docker run --gpus all -it ghcr.io/nvidia/cudaqx +``` + +Omit `--gpus all` if the machine has no NVIDIA GPU. + +## Quick Decision Guide + +- Workshop student: use the provided Brev environment; CPU is fine for the + small examples, and GPU is helpful for larger class experiments. +- Linux machine: use the local pip path, even on CPU-only machines, and install + `libgfortran`/`gfortran` if optimizer-backed workflows fail. +- Mac or Windows: prefer Brev; use Docker only if the user is already + comfortable with containers. +- No GPU: run the small workshop examples on CPU; use Brev when the workshop + needs the standard class setup. + +## Source Paths + +- Installation docs: `docs/sphinx/quickstart/installation.rst` +- Solvers package config: `libs/solvers/pyproject.toml.cu12`, + `libs/solvers/pyproject.toml.cu13` +- Wheel validation: `scripts/ci/test_wheels.sh` diff --git a/.agents/skills/cudaq-academic-vqe-qaoa/references/metrics.md b/.agents/skills/cudaq-academic-vqe-qaoa/references/metrics.md new file mode 100644 index 00000000..1b10623c --- /dev/null +++ b/.agents/skills/cudaq-academic-vqe-qaoa/references/metrics.md @@ -0,0 +1,113 @@ +# Evaluation Metrics + +Use this when comparing with-skill and without-skill answers for the academic +VQE/QAOA workshop. + +## What To Measure + +Minimum objective metrics: + +- `agent` and `model`: recorded labels for the tool/model used +- `config`: `with_skill` or `without_skill` +- `pass_rate`: percent of prompts passing deterministic assertions +- `coverage_rate`: required concepts present in the answer +- `forbidden_hits`: known wrong or out-of-scope advice +- `context_files`: files loaded by the agent, if available +- `duration_ms`: end-to-end runtime, if available + +Compare Codex with-skill against Codex without-skill, Claude against Claude, +and Cursor against Cursor before making cross-tool claims. + +## Step-By-Step Evaluation + +1. Create templates for one agent/model: + +```bash +python3 .agents/evals/academic-vqe-qaoa/compare_runs.py init \ + --agent codex \ + --model gpt-5 +``` + +2. Fill both generated files under `.agents/evals/academic-vqe-qaoa/runs/`. + This directory is ignored by git because run files are local validation + artifacts: + +- `codex_with_skill.json`: run each prompt with this skill available. +- `codex_without_skill.json`: run each prompt without loading this skill. + +3. For every prompt response, paste the answer into `response`. Add + `duration_ms` and `context_files` when the agent exposes them. Leave + unavailable fields as `null`. + +4. Score and compare: + +```bash +python3 .agents/evals/academic-vqe-qaoa/compare_runs.py compare \ + .agents/evals/academic-vqe-qaoa/runs/codex_with_skill.json \ + .agents/evals/academic-vqe-qaoa/runs/codex_without_skill.json +``` + +5. Use the generated summary under `.agents/evals/academic-vqe-qaoa/runs/` for + local workshop/report notes. Force-add run artifacts only when a PR + intentionally needs to preserve a validation snapshot. + +## Response File Shape + +The evaluator accepts either a simple mapping: + +```json +{ + "INSTALL01": "answer text", + "VQE01": "answer text", + "QAOA01": "answer text" +} +``` + +or a metric-bearing structure: + +```json +{ + "agent": "codex", + "model": "gpt-5", + "config": "with_skill", + "responses": [ + { + "id": "QAOA01", + "response": "answer text", + "context_files": [ + ".agents/skills/cudaq-academic-vqe-qaoa/SKILL.md", + ".agents/skills/cudaq-academic-vqe-qaoa/references/qaoa.md" + ], + "duration_ms": 3100 + } + ] +} +``` + +## Run + +```bash +python3 .agents/evals/academic-vqe-qaoa/compare_runs.py init \ + --agent codex \ + --model gpt-5 + +python3 .agents/evals/academic-vqe-qaoa/compare_runs.py compare \ + .agents/evals/academic-vqe-qaoa/runs/codex_with_skill.json \ + .agents/evals/academic-vqe-qaoa/runs/codex_without_skill.json +``` + +Use the summary to talk about context efficiency: + +- fewer context files loaded +- similar or better answer pass rate +- fewer forbidden hits + +## Prompt Set + +Prompts live at: + +`.agents/evals/academic-vqe-qaoa/prompts.json` + +Assertions live at: + +`.agents/evals/academic-vqe-qaoa/assertions.json` diff --git a/.agents/skills/cudaq-academic-vqe-qaoa/references/qaoa.md b/.agents/skills/cudaq-academic-vqe-qaoa/references/qaoa.md new file mode 100644 index 00000000..b0769e6b --- /dev/null +++ b/.agents/skills/cudaq-academic-vqe-qaoa/references/qaoa.md @@ -0,0 +1,62 @@ +# Minimal QAOA / MaxCut Path + +Use this for a first QAOA example. Keep the answer anchored on MaxCut because +the helper API is easy to explain and easy to test. + +## Teaching Example + +```python +import numpy as np +import networkx as nx +import cudaq_solvers as solvers + +graph = nx.Graph() +graph.add_weighted_edges_from([ + (0, 1, 1.0), + (1, 2, 2.0), + (0, 2, 0.5), +]) + +hamiltonian = solvers.get_maxcut_hamiltonian(graph) +num_layers = 1 +num_parameters = solvers.get_num_qaoa_parameters(hamiltonian, num_layers) +initial_parameters = np.zeros(num_parameters) + +result = solvers.qaoa( + hamiltonian, + num_layers, + initial_parameters, + optimizer="cobyla", +) + +optimal_value, optimal_parameters, sample_result = result +print("MaxCut value:", -optimal_value) +print("Best bitstring:", sample_result.most_probable()) +print("Parameters:", optimal_parameters) +``` + +## Beginner Defaults + +- Use a NetworkX graph for MaxCut. +- Use `solvers.get_maxcut_hamiltonian(graph)`. +- Use `solvers.get_num_qaoa_parameters(...)` instead of guessing parameter + count. +- Use non-empty initial parameters. +- Use `optimizer="cobyla"` as the safe beginner default. +- `QAOAResult` can be tuple-unpacked as + `(optimal_value, optimal_parameters, sample_result)`. +- QAOA minimizes the Hamiltonian. For MaxCut, print `-optimal_value` as the + cut value. + +## Pitfall + +`optimizer="lbfgs"` requires gradients. The QAOA path does not provide a +gradient instance by default, so beginners should use `cobyla` unless they are +passing a compatible SciPy optimizer with a `jac=`. + +## Source Paths + +- Python example: `docs/sphinx/examples/solvers/python/molecular_docking_qaoa.py` +- Python tests: `libs/solvers/python/tests/test_qaoa.py` +- C++ API: `libs/solvers/include/cudaq/solvers/qaoa.h` +- Python bindings: `libs/solvers/python/bindings/solvers/py_solvers.cpp` diff --git a/.agents/skills/cudaq-academic-vqe-qaoa/references/vqe.md b/.agents/skills/cudaq-academic-vqe-qaoa/references/vqe.md new file mode 100644 index 00000000..90446aec --- /dev/null +++ b/.agents/skills/cudaq-academic-vqe-qaoa/references/vqe.md @@ -0,0 +1,62 @@ +# Minimal VQE Path + +Use this for a first VQE example. Keep the answer focused on the algorithm +shape, not chemistry setup. + +## Teaching Example + +```python +import cudaq +from cudaq import spin +import cudaq_solvers as solvers + + +@cudaq.kernel +def ansatz(theta: float): + q = cudaq.qvector(2) + x(q[0]) + ry(theta, q[1]) + x.ctrl(q[1], q[0]) + + +hamiltonian = ( + 5.907 + - 2.1433 * spin.x(0) * spin.x(1) + - 2.1433 * spin.y(0) * spin.y(1) + + 0.21829 * spin.z(0) + - 6.125 * spin.z(1) +) + +energy, params, history = solvers.vqe( + lambda thetas: ansatz(thetas[0]), + hamiltonian, + [0.0], + optimizer="lbfgs", + gradient="parameter_shift", + tol=1e-7, +) + +print("energy:", energy) +print("params:", params) +``` + +## Beginner Defaults + +- Use a non-empty initial parameter list. +- Use `optimizer="lbfgs"` with `gradient="parameter_shift"`. +- Or omit optimizer/gradient and let the default optimizer path run. +- Return value is `(energy, params, history)`. + +## Self Check + +- The ansatz argument count matches how `solvers.vqe` calls it. +- Initial parameters are not empty. +- Gradient-based optimizers have a gradient setting. +- The answer mentions `cudaq.kernel`, `spin`, and `solvers.vqe`. + +## Source Paths + +- Python example: `docs/sphinx/examples/solvers/python/uccsd_vqe.py` +- Python tests: `libs/solvers/python/tests/test_vqe.py` +- C++ API: `libs/solvers/include/cudaq/solvers/vqe.h` +- Python bindings: `libs/solvers/python/bindings/solvers/py_solvers.cpp` diff --git a/.gitignore b/.gitignore index 2026b894..5426ab63 100644 --- a/.gitignore +++ b/.gitignore @@ -119,3 +119,6 @@ libs/*/pyproject.toml # This file is cloned from the qec dir; do not commit after cloning. libs/solvers/python/metapackages/setup.py + +# Academic agent skill evaluation runtime artifacts. +.agents/evals/academic-vqe-qaoa/runs/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..bf363fc6 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,25 @@ +# CUDA-QX Agent Skills + +This branch carries a small academic-facing skill slice for CUDA-QX Solvers. +It is intentionally independent from the larger skills architecture PR. + +## Skills + +| Skill | Purpose | +| --- | --- | +| `cudaq-academic-vqe-qaoa` | Basic install, VQE, and QAOA workflows for academic workshop examples | + +The skill source lives under `.agents/skills/`. Evaluation prompts, assertions, +and lightweight metrics tooling live under `.agents/evals/academic-vqe-qaoa/`. + +## Scope + +Keep this branch focused on: + +- installing and smoke-testing `cudaq-solvers` +- a minimal VQE workflow +- a minimal QAOA / MaxCut workflow +- objective before/after metrics for agent responses + +Avoid expanding this PR into QEC, GQE, chemistry active-space design, custom +operators, or the full multi-agent mirror infrastructure.