From 73fe1f053f85d6f0e1cff343076b1a7fa24dbff0 Mon Sep 17 00:00:00 2001 From: Taeil Ma Date: Sat, 20 Jun 2026 20:56:20 +0900 Subject: [PATCH] feat(v1.10.0): asset hand-off (asset_sources lifts the code-only ceiling) + restore lost v1.9.0 SKILL prose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The f1 probe ran option (a) — supply the authored assets v1.9.0 flagged human_required. asset_ceiling()/asset_ceiling_hit() + config.asset_sources: once the operator supplies authored assets, the ceiling_without_assets lifts and the loop resumes leaping. Also RESTORED the v1.9.0 SKILL.md engine prose (5-G benchmark anchoring, 3-K technique menu, mega-leap, asset-ceiling rule) that the v1.9.0 commit never staged and a hard-reset discarded — scripts shipped but the spec didn't. verify.py 63. plugin 1.9.0→1.10.0. Co-Authored-By: Claude Opus 4.8 --- .claude-plugin/plugin.json | 2 +- CHANGELOG.md | 26 ++++++++++++++++++++ config.example.json | 4 +++- scripts/rubric_score.py | 22 +++++++++++++++++ skills/evolve/SKILL.md | 49 ++++++++++++++++++++++++++++++++------ tests/verify.py | 16 +++++++++++++ 6 files changed, 110 insertions(+), 9 deletions(-) diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 3d1298d..4de84ff 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "ooda-loop", "displayName": "OODA-loop", - "version": "1.9.0", + "version": "1.10.0", "description": "An autonomous operations layer for your live side project. It watches, re-orients from which PRs you merge and reject, and opens small revertible PRs — bounded by a HALT file, protected paths, and a hard cost cap. Built on Boyd's OODA loop. You stay in command.", "author": { "name": "Taeil Ma", diff --git a/CHANGELOG.md b/CHANGELOG.md index a94d949..8ac10bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,32 @@ independently. Bump there signals migration work for downstream projects. --- +## [v1.10.0] — 2026-06-20 + +### Added — asset hand-off; restored v1.9.0 engine prose (probe-found gap) + +The f1 probe ran option (a): supply the authored assets v1.9.0 flagged as +`human_required`. Two findings → fixes: + +- **Asset hand-off (`asset_sources`).** A dimension's `ceiling_without_assets` + caps CODE-ONLY work; once the operator lists `asset_sources` (CDN/repo paths to + glTF models, textures, HDRIs, audio), `rubric_score.asset_ceiling()` returns + None — the ceiling lifts, the `human_required` skill_gap resolves, and the loop + resumes leaping the dimension toward `bar_coast` integrating the assets. + Demonstrated: the f1 box-car ceiling (~0.21) was broken by supplying a real + glTF car + an HDRI. New `asset_ceiling()`/`asset_ceiling_hit()` (deterministic), + config `asset_sources`, evolve 5-G/2-G rule. verify.py 62 → 63. +- **Restored lost v1.9.0 SKILL.md prose (probe-found bug).** The v1.9.0 commit + staged scripts/config/tests but NOT `skills/evolve/SKILL.md`, then a hard-reset + discarded the engine-prose edits — so the 5-G benchmark-anchoring critic prompt, + the 3-K technique-menu injection, the mega-leap step, and the asset-ceiling rule + were missing from `main` even though their scripts/config shipped. Re-applied + here (the dual-bar/anchor/technique/mega-leap behaviour is now actually in the + engine spec, matching the v1.9.0 scripts). Lesson: stage the engine spec with + its scripts. + +plugin 1.9.0→1.10.0. + ## [v1.9.0] — 2026-06-19 ### Added — "Ambition": let the loop make RADICAL jumps, not prototype plateaus (config schema 1.5.0) diff --git a/config.example.json b/config.example.json index 08f98c9..37e3c98 100644 --- a/config.example.json +++ b/config.example.json @@ -289,7 +289,9 @@ "score_0.90": "authored models, motion blur, SSAO, weather, near-photoreal" }, "ceiling_without_assets": 0.35, - "ceiling_note": "Procedural geometry + CDN Three.js addons (post-processing, PBR/IBL, particles, Sky shader) top out ~0.35. Above that needs authored glTF models / textures / HDRIs — the loop should record a human_required skill_gap, not keep leaping.", + "ceiling_note": "Procedural geometry + CDN Three.js addons (post-processing, PBR/IBL, particles, Sky shader) top out ~0.35. Above that needs authored glTF models / textures / HDRIs — the loop records a human_required skill_gap, not keep leaping.", + "__asset_sources_doc__": "v1.10.0 hand-off — the OPERATOR supplies authored assets here (CDN URLs or repo paths to glTF models, textures, HDRIs, audio) once the code-only ceiling is hit. Non-empty asset_sources LIFTS ceiling_without_assets (rubric_score.asset_ceiling → None): the human_required gap resolves and the loop resumes leaping toward bar_coast, now integrating the assets. The f1 probe drove this: the loop flagged the box-car ceiling at ~0.21; a real glTF car + an HDRI were supplied here and leaping resumed.", + "asset_sources": [], "techniques": [ "EffectComposer: RenderPass + UnrealBloomPass + OutputPass (ACES tone map)", "PMREMGenerator from RoomEnvironment or Sky → scene.environment (IBL)", diff --git a/scripts/rubric_score.py b/scripts/rubric_score.py index 486afc4..bdec07c 100644 --- a/scripts/rubric_score.py +++ b/scripts/rubric_score.py @@ -265,6 +265,28 @@ def failed_leaps(outcomes: list, dimension: str, min_delta: float) -> int: return n +def asset_ceiling(dimension: dict): + """v1.10.0 asset hand-off: the score above which CODE-ONLY work can't climb. + Returns None (no ceiling) once the OPERATOR has supplied `asset_sources` for the + dimension — the v1.9.0 `human_required` hand-off is fulfilled, so the loop may + keep leaping toward bar_coast with the authored assets (models/textures/HDRIs/ + audio). Otherwise returns `ceiling_without_assets` (or None if unset). This is + what closes the v1.9.0 ceiling concept: the loop stops at the code-only limit, + flags human_required, and — once assets arrive — resumes.""" + if dimension.get("asset_sources"): + return None + c = dimension.get("ceiling_without_assets") + return float(c) if isinstance(c, (int, float)) else None + + +def asset_ceiling_hit(dimension: dict, score) -> bool: + """True iff a code-only leap has topped out: at/above the ceiling AND no assets + supplied yet. evolve 2-G records a `human_required` skill_gap instead of leaping + again; supplying `asset_sources` clears it and re-opens leaping.""" + ceil = asset_ceiling(dimension) + return ceil is not None and score is not None and score >= ceil + + def lock_target(outcomes: list, rubric: dict, leap_target: str | None) -> str | None: """v1.8.0 dimension-lock: after a SUCCESSFUL leap whose target is still below (bar − eps), return that target so evolve 2-G keeps the plateau active on it diff --git a/skills/evolve/SKILL.md b/skills/evolve/SKILL.md index e4097b6..9b25848 100644 --- a/skills/evolve/SKILL.md +++ b/skills/evolve/SKILL.md @@ -830,12 +830,26 @@ if orient.plateau.active AND not plateau_leap_blocked AND not dry_run: ``` When `cycle_mode == "leap"`, Step 4-B passes `leap_mode=true`, -`targeted_dimension`, and `config.leap.max_lines` (a larger size budget than -`max_lines_per_pr`) to the build skill (dev-cycle), instructing it to make a -**step-change on the targeted dimension** (overhaul / rebuild / refactor-for- -cohesion) rather than pick the top-RICE *feature*. Steps 3-G (complexity level) -and 3-F (confidence) STILL apply — a leap is not exempt from safety gates. The -leap's verification is the 5-G artifact gate (4-C2), not only the unit test. +`targeted_dimension`, `config.leap.max_lines` (a larger size budget than +`max_lines_per_pr`), AND — **v1.9.0** — the targeted dimension's `techniques` + +`technique_cdns` menu + any `asset_sources`, to the build skill (dev-cycle), +instructing it to make a **step-change on the targeted dimension** (overhaul / +rebuild / refactor-for-cohesion) rather than pick the top-RICE *feature*. The +technique menu is the fix for "the loop reached for more BoxGeometry instead of +EffectComposer": the leap instruction is *"pick the ONE technique from this menu +(or integrate the supplied asset_sources) most likely to move the score toward +bar_coast and implement it completely; do not add game features."* Steps 3-G +(complexity level) and 3-F (confidence) STILL apply. The leap's verification is the +5-G artifact gate (4-C2), not only the unit test. + +**Mega-leap (v1.9.0, optional).** When a dimension can't be moved by a normal leap +(the 2-G thrashing guard would HALT), an operator may author `config.leap.mega_leap` ++ an approved `mega_leap_plan.json` to unlock a multi-cycle RE-PLATFORM: a much +larger budget (`mega_leap.max_lines`) across up to `max_cycles`, NO per-cycle +revert, only a final-cycle gate (revert ALL cycles if cumulative artifact delta < +`min_artifact_delta_at_completion`). `requires_human_plan_approval` keeps the loop +from self-authorising a rewrite — this is how the loop makes a genuinely RADICAL +jump (replace a whole pipeline) instead of only incremental overhauls. ### 3-G: Progressive Complexity Filter (apply FIRST) @@ -1886,11 +1900,32 @@ verdict = critic( or api output). Cite concrete evidence per axis. Be harsh; default low when unsure. A good HUD cannot compensate for a broken core. For a metrics axis, judge against the axis description's targets, not vibes. + --- BENCHMARK ANCHORING (v1.9.0, critical): if the axis has `reference` + anchors, score ONLY against those named real-product levels — NOT + relative to this artifact's past or to other prototype work. Name the + anchor the artifact most resembles before the number. A flat/primitive/ + 'it exists and works' result is ~0.10 (score_0.10), NOT 0.5+. Worse than + score_0.10 → below 0.10. Do not grade on a curve where 'a decent + prototype' = good. --- Score null only if the evidence is null. Output {dimension_scores:{axis:score|null}, weakest_dimension, critique(<=30 words)}.", - input: { mission, rubric.dimensions, evidence: dim_artifact } + input: { mission, rubric.dimensions (with reference anchors), evidence: dim_artifact } ) -> { dimension_scores, weakest_dimension, critique } +-- ASSET CEILING + HAND-OFF (v1.9.0 → v1.10.0). For the targeted dimension, after +-- the leap is scored: if rubric_score.asset_ceiling_hit(dimension, score) — i.e. +-- a CODE-ONLY leap reached `ceiling_without_assets` AND no `asset_sources` are +-- supplied — record a skill_gap { type:"human_required", name:"asset_ceiling_{dim}", +-- detail: dimension.ceiling_note } and STOP leaping that dimension (further gain +-- needs authored models/textures/audio the loop can't make). When the operator +-- adds `dimension.asset_sources` (CDN/local authored assets), asset_ceiling() +-- returns None → the ceiling lifts, the human_required gap is resolved, and the +-- loop resumes leaping the dimension toward bar_coast WITH the assets. (The f1 +-- probe drove this: the loop flagged the box-car ceiling; a real glTF car + +-- HDRI were supplied as asset_sources; leaping resumed.) NOTE: assets often load +-- async — integrate with a "rebuild on asset-ready" step so a cycle isn't scored +-- before the asset is in the frame. + -- AGGREGATE deterministically (scripts/rubric_score.py — pure, no model): agg = rubric_score.aggregate(verdict.dimension_scores, rubric) artifact_score = agg.artifact_score -- weighted mean diff --git a/tests/verify.py b/tests/verify.py index 760f237..9752881 100644 --- a/tests/verify.py +++ b/tests/verify.py @@ -736,6 +736,22 @@ def _mod(name, fn): f"good(0.88)→leap={R.detect_plateau(good, rub19)['plateau']}, legacy bars={legacy['bar_leap']}/{legacy['bar_coast']}", ) + # 12) v1.10.0 asset hand-off: a code-only dimension is ceiling-capped until the + # operator supplies asset_sources, after which the ceiling lifts (the + # human_required hand-off the f1 probe drove — supply a glTF model → keep leaping). + code_only = {"name": "visual_fidelity", "ceiling_without_assets": 0.35} + with_assets = {"name": "visual_fidelity", "ceiling_without_assets": 0.35, + "asset_sources": ["cdn://ferrari.glb", "cdn://venice.hdr"]} + r.check( + "artifact-axis: asset ceiling caps code-only work, lifts once assets supplied (v1.10.0)", + R.asset_ceiling(code_only) == 0.35 + and R.asset_ceiling_hit(code_only, 0.36) is True + and R.asset_ceiling(with_assets) is None + and R.asset_ceiling_hit(with_assets, 0.36) is False, + f"code-only ceiling={R.asset_ceiling(code_only)} hit@0.36={R.asset_ceiling_hit(code_only,0.36)}; " + f"with-assets ceiling={R.asset_ceiling(with_assets)} hit@0.36={R.asset_ceiling_hit(with_assets,0.36)}", + ) + def main() -> int: r = Runner()