From 731746c4a56f8491add2f97d093f879de2291c2d Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 13:24:02 +0800 Subject: [PATCH 01/31] chore: ignore local worktrees --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 168a428..3ca7334 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ __pycache__/ *.pyd .venv/ +.worktrees/ .pytest_cache/ .mypy_cache/ From 44c9a242e86f28103b692b9a358d6b1b530928b3 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 13:27:37 +0800 Subject: [PATCH 02/31] docs: add long-task continuity suite spec --- docs/long-task-suite.md | 96 +++++++++++++++++++++++++++++++++++ docs/long-task-suite.zh-CN.md | 96 +++++++++++++++++++++++++++++++++++ evals/cases.csv | 9 ++++ 3 files changed, 201 insertions(+) create mode 100644 docs/long-task-suite.md create mode 100644 docs/long-task-suite.zh-CN.md create mode 100644 evals/cases.csv diff --git a/docs/long-task-suite.md b/docs/long-task-suite.md new file mode 100644 index 0000000..cfa735f --- /dev/null +++ b/docs/long-task-suite.md @@ -0,0 +1,96 @@ +# Long-Task Continuity Suite + +## Problem Statement + +Long threads usually do not fail all at once. They fail by degrees: the shared state gets stale, the workflow loses its shape, and handoffs become too thin for the next agent to trust. + +This suite exists to make those failure modes explicit. It treats long-task degradation as three separate problems: + +- state drift, where the working picture no longer matches reality +- workflow drift, where the task stops following a deliberate sequence of phases and checkpoints +- handoff friction, where another agent cannot resume without guessing + +The goal is not to add more ceremony. The goal is to make continuity measurable so that long tasks stay resumable, inspectable, and transferable. + +## State Drift, Workflow Drift, And Handoff Friction + +These three failure modes overlap, but they are not the same thing. + +State drift appears when summaries, context, or task memory lag behind the actual work. The risk is silent divergence: the thread sounds confident while carrying the wrong assumptions. + +Workflow drift appears when a task that needs staged execution starts behaving like a single shot. The work may still move forward, but it loses checkpoints, decision points, and clear boundaries. + +Handoff friction appears when a pause or transfer leaves too little signal for the next agent. The work is not necessarily wrong, just expensive to resume. + +The suite uses these distinctions to decide which package should trigger and which one should stay out of the way. + +## Package Map + +| Package | Responsibility | Trigger Shape | +| --- | --- | --- | +| `skill-context-keeper` | Preserve and reconstruct working state across long threads, especially after interruptions or stale summaries. | Resume, refresh, or reconcile context. | +| `skill-phase-gate` | Decide when work needs phase boundaries, checkpoints, or a deliberate pause before execution continues. | Split, gate, or stage the work. | +| `skill-handoff-summary` | Produce a clean transfer note when work is paused or handed to another agent. | Summarize status, blockers, and next steps. | +| `skill-task-continuity` | Orchestrate the three atomic packages when the task itself is about maintaining long-thread continuity. | Bootstrap the suite, coordinate boundaries, and keep the flow coherent. | + +## Repository Boundary Rules + +This repository is a public installable skill library, so the suite docs must stay reader-facing and maintainable. + +- Keep the suite spec in `docs/`, not in live agent state files. +- Do not create a root `AGENTS.md`, `.agent-state/`, or public-package `.agents/skills` content for this task. +- Treat `evals/cases.csv` as the source of truth for trigger coverage, but keep the prose docs understandable on their own. +- Describe package boundaries in plain language; do not require readers to open package implementation files first. +- Prefer the narrowest package that matches the prompt. The composition package should not steal work that belongs to an atomic package. +- Make ambiguity explicit in the matrix so maintainers can see when a keyword match is not a real trigger. + +## Success Criteria + +### Outcome + +- Long-thread work can be resumed, paused, or transferred without losing intent. +- The suite catches both false positives and false negatives for the four target packages. +- A maintainer can understand the architecture and boundaries without opening package source files. + +### Process + +- The eval matrix includes positive trigger cases and negative trigger cases for every atomic package. +- The matrix includes at least one composition-package bootstrap case and one boundary-protection case. +- Each case records the expected artifacts and the expected workflow event or command shape when that matters. + +### Style + +- The docs stay concise, public-reader-friendly, and easy to scan. +- English and Chinese versions share the same major section order. +- Trigger notes read like maintainer guidance, not like internal scratchpad text. + +### Efficiency + +- Maintainers can validate the suite from the docs and CSV without reverse-engineering package code. +- The matrix is small enough to extend without becoming noisy. +- Ambiguous prompts are documented once, then reused as regression coverage. + +## Initial Evaluation Matrix + +The seed matrix lives in `evals/cases.csv`. The table below shows the initial coverage shape and the kinds of expected artifacts and workflow events to look for. + +| Case | Package | Trigger | Prompt Shape | Expected Artifacts | Expected Events | +| --- | --- | --- | --- | --- | --- | +| `context_resume` | `skill-context-keeper` | Yes | Resume the last known state and carry forward unresolved work. | State snapshot, continuity note, or refreshed context summary. | Reload prior state, rebuild active facts, emit continuity summary. | +| `context_resume_not_needed` | `skill-context-keeper` | No | Answer a one-off question with no continuity risk. | None. | Direct answer only; no state reload. | +| `phase_gate_before_multi_step` | `skill-phase-gate` | Yes | Split a multi-step task into phases before coding starts. | Phase plan, checkpoint list, stop/go criteria. | Create phase boundaries and checkpoints. | +| `tiny_edit_not_gate` | `skill-phase-gate` | No | Make a tiny local edit with no staged workflow. | None. | Skip gating for the small change. | +| `handoff_before_pause` | `skill-handoff-summary` | Yes | Pause work and hand it to another agent. | Handoff summary, blockers, next actions. | Capture transfer state and mark the pause point. | +| `handoff_not_needed` | `skill-handoff-summary` | No | Give a final answer without transfer notes. | None. | No handoff workflow event. | +| `suite_bootstrap` | `skill-task-continuity` | Yes | Coordinate the long-task suite across the atomic packages. | Suite spec, package map, evaluation matrix. | Bootstrap downstream guidance and align package boundaries. | +| `suite_boundary_clean` | `skill-task-continuity` | No | A trivial edit that merely mentions all the keywords. | None. | Do not promote a keyword match into suite orchestration. | + +## Phase Plan + +The current task is the bootstrap phase: define the suite, seed the matrix, and make the boundaries legible. + +Phase 1 should keep the documentation stable while the package implementations are still being shaped. That means adding new cases only when they improve coverage, not when they repeat the same trigger in different words. + +Phase 2 should expand the matrix with more realistic long-thread scenarios, especially ones where the wrong package could plausibly trigger. + +Phase 3 should use the suite as a regression harness for future package changes, so trigger behavior stays narrow and intentional. diff --git a/docs/long-task-suite.zh-CN.md b/docs/long-task-suite.zh-CN.md new file mode 100644 index 0000000..ec42867 --- /dev/null +++ b/docs/long-task-suite.zh-CN.md @@ -0,0 +1,96 @@ +# 长任务连续性套件 + +## 问题陈述 + +长线程通常不是一次性坏掉的,而是逐步退化:共享状态开始过时,工作流失去节奏,交接信息也变得不足以让下一位执行者放心接手。 + +这套文档要解决的,就是把这些退化模式说清楚。我们把长任务中的失效拆成三类: + +- 状态漂移,也就是工作认知和真实进展开始脱节 +- 流程漂移,也就是本来需要分阶段推进的任务变成了没有检查点的直冲 +- 交接摩擦,也就是换人之后需要靠猜才能继续 + +目标不是增加仪式感,而是让连续性变成可观察、可验证、可维护的东西。只有这样,长任务才更容易恢复、审阅和转交。 + +## 状态漂移、流程漂移、与交接摩擦 + +这三种问题彼此相关,但并不相同。 + +状态漂移通常表现为摘要、上下文或任务记忆已经跟不上真实工作进度。它最大的风险是悄悄偏离:线程表面上很顺,实际上已经带着错误前提继续往前走。 + +流程漂移则发生在需要分阶段推进的任务被当成一次性动作处理。工作可能还在继续,但检查点、决策点和边界感都会变弱。 + +交接摩擦出现在暂停或转交之后,下一位执行者拿到的信息不够完整,必须重新猜测当前状态。问题不一定是错误,但一定会增加恢复成本。 + +这个套件就是用这三种差异来决定:哪些包应该触发,哪些包不该被卷进来。 + +## 包结构图 + +| 包 | 职责 | 触发形态 | +| --- | --- | --- | +| `skill-context-keeper` | 在长线程中保存和重建工作状态,尤其适合处理中断或过时摘要后的恢复。 | 恢复、刷新、或对齐上下文。 | +| `skill-phase-gate` | 判断任务是否需要阶段边界、检查点,或在继续执行前先停一下。 | 拆分、门控、或分阶段推进。 | +| `skill-handoff-summary` | 在任务暂停或转交时产出清晰的交接说明。 | 总结状态、阻塞点和下一步。 | +| `skill-task-continuity` | 当任务本身就是为了维持长线程连续性时,统筹前三个原子包。 | 启动套件、协调边界、保持流程连贯。 | + +## 仓库边界规则 + +这个仓库是公开可安装的 skill 库,所以套件文档必须尽量面向读者,且便于维护。 + +- 套件规范放在 `docs/`,不要写进运行中的 agent 状态文件。 +- 本任务不要创建根目录 `AGENTS.md`、`.agent-state/`,也不要写入 public-package 的 `.agents/skills` 内容。 +- `evals/cases.csv` 是触发覆盖的事实来源,但正文也要能独立读懂。 +- 先用通俗语言解释包边界,不要要求读者先去看实现文件。 +- 能匹配原子包的就优先匹配原子包,组合包不应该抢走本该由原子包处理的工作。 +- 把模糊情况写进矩阵,方便维护者看到“只是关键词出现”并不等于真正触发。 + +## 成功标准 + +### 结果 + +- 长任务可以在不中断意图的情况下恢复、暂停或转交。 +- 套件能覆盖四个目标包的误触发和漏触发情况。 +- 维护者不看包源码,也能理解整体架构和边界。 + +### 过程 + +- 每个原子包都要有正触发和负触发用例。 +- 矩阵里要有组合包的启动用例,也要有边界保护用例。 +- 每个案例都要写清楚期望产物,以及必要时对应的工作流事件或命令形态。 + +### 风格 + +- 文档要简洁、面向读者、易于扫读。 +- 英文版和中文版保持相同的主要章节顺序。 +- 触发说明要像维护建议,不要像内部草稿。 + +### 效率 + +- 维护者可以只看文档和 CSV 就完成验证,不需要反推包实现。 +- 矩阵要足够小,方便持续扩展,而不是越写越乱。 +- 模糊提示只需要定义一次,之后就能作为回归覆盖重复使用。 + +## 初始评估矩阵 + +种子矩阵存放在 `evals/cases.csv`。下面这张表展示了初始覆盖的形状,以及在什么情况下应该关注哪些产物和流程事件。 + +| 用例 | 包 | 是否触发 | 提示形态 | 期望产物 | 期望事件 | +| --- | --- | --- | --- | --- | --- | +| `context_resume` | `skill-context-keeper` | 是 | 恢复最后已知状态,并把未完成工作带下去。 | 状态快照、连续性说明、或刷新后的上下文摘要。 | 重新载入旧状态、重建活动事实、输出连续性摘要。 | +| `context_resume_not_needed` | `skill-context-keeper` | 否 | 回答一个一次性问题,没有连续性风险。 | 无。 | 直接回答,不重载状态。 | +| `phase_gate_before_multi_step` | `skill-phase-gate` | 是 | 在开始编码前先把多步骤任务拆成阶段。 | 阶段计划、检查点列表、停/走标准。 | 创建阶段边界并定义检查点。 | +| `tiny_edit_not_gate` | `skill-phase-gate` | 否 | 只做一个很小的本地修改,不需要分阶段。 | 无。 | 对小改动跳过门控。 | +| `handoff_before_pause` | `skill-handoff-summary` | 是 | 暂停工作并交给另一个执行者。 | 交接摘要、阻塞点、下一步。 | 记录转交状态并标记暂停点。 | +| `handoff_not_needed` | `skill-handoff-summary` | 否 | 直接给最终答案,不需要转交说明。 | 无。 | 不产生交接事件。 | +| `suite_bootstrap` | `skill-task-continuity` | 是 | 协调长任务套件,让三个原子包一起工作。 | 套件规范、包结构图、评估矩阵。 | 启动下游指导并对齐包边界。 | +| `suite_boundary_clean` | `skill-task-continuity` | 否 | 一个很小的编辑,只是碰巧提到了所有关键词。 | 无。 | 不要把关键词命中升级成套件编排。 | + +## 阶段计划 + +当前任务本身就是引导阶段:先定义套件、先种下矩阵、先把边界讲清楚。 + +第一阶段要先把文档稳定下来,等包实现逐步成形后,再按需补充新的用例。重点不是堆数量,而是补能提高覆盖质量的案例。 + +第二阶段可以扩展成更接近真实长线程的场景,尤其是那些容易让错误包误触发的情况。 + +第三阶段则是把这套矩阵变成后续改动的回归护栏,确保触发行为一直保持窄而明确。 diff --git a/evals/cases.csv b/evals/cases.csv new file mode 100644 index 0000000..fb55daf --- /dev/null +++ b/evals/cases.csv @@ -0,0 +1,9 @@ +case_id,package,scenario_type,should_trigger,user_prompt,expected_artifacts,expected_events,notes +context_resume,skill-context-keeper,positive,yes,"We’ve been iterating for a while; please resume from the last known state, summarize what changed, and carry forward unresolved TODOs.","state snapshot or continuity note; updated task context","reload prior working state; reconstruct active facts; emit continuity summary","atomic positive trigger for state reconstruction" +context_resume_not_needed,skill-context-keeper,negative,no,"Please answer this one-off punctuation question in the README and do nothing else.","none","answer directly without loading or rewriting working state","atomic negative trigger; no long-thread drift" +phase_gate_before_multi_step,skill-phase-gate,positive,yes,"We need to refactor the installer and update docs; help me split the work into phases before coding.","phase plan; checkpoint list; stop/go criteria","create phase boundaries; define checkpoints before implementation starts","atomic positive trigger for workflow gating" +tiny_edit_not_gate,skill-phase-gate,negative,no,"Please rename this heading and nothing else.","none","skip phase gating for a tiny local edit","required example" +handoff_before_pause,skill-handoff-summary,positive,yes,"I need to stop for today; please write a handoff with open questions, blockers, and next steps.","handoff summary; blocker list; next actions","capture transfer state; mark pause point; surface outstanding work","required example" +handoff_not_needed,skill-handoff-summary,negative,no,"Just give me the final answer; no status note or handoff is needed.","none","do not create transfer notes when no handoff is happening","atomic negative trigger" +suite_bootstrap,skill-task-continuity,positive,yes,"Set up the long-task continuity suite and coordinate the context keeper, phase gate, and handoff packages.","suite spec; package map; evaluation matrix","bootstrap downstream guidance; coordinate package boundaries; seed coverage matrix","composition-package positive trigger" +suite_boundary_clean,skill-task-continuity,negative,no,"This is just a one-line README fix, but please also resume context, gate the phases, and write a handoff.","none","avoid pulling the composition suite into a trivial single-step edit","cross-package ambiguity case; boundaries should stay clean" From f87d846dc9fc5527c17ed62a7e56dad60c27c765 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 13:32:31 +0800 Subject: [PATCH 03/31] docs: normalize long-task eval tokens --- docs/long-task-suite.md | 18 ++++++++++-------- docs/long-task-suite.zh-CN.md | 18 ++++++++++-------- evals/cases.csv | 16 ++++++++-------- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/docs/long-task-suite.md b/docs/long-task-suite.md index cfa735f..6edd5fa 100644 --- a/docs/long-task-suite.md +++ b/docs/long-task-suite.md @@ -24,6 +24,8 @@ Handoff friction appears when a pause or transfer leaves too little signal for t The suite uses these distinctions to decide which package should trigger and which one should stay out of the way. +The evaluation matrix uses normalized artifact paths and event tokens so the runner can validate results consistently. + ## Package Map | Package | Responsibility | Trigger Shape | @@ -76,14 +78,14 @@ The seed matrix lives in `evals/cases.csv`. The table below shows the initial co | Case | Package | Trigger | Prompt Shape | Expected Artifacts | Expected Events | | --- | --- | --- | --- | --- | --- | -| `context_resume` | `skill-context-keeper` | Yes | Resume the last known state and carry forward unresolved work. | State snapshot, continuity note, or refreshed context summary. | Reload prior state, rebuild active facts, emit continuity summary. | -| `context_resume_not_needed` | `skill-context-keeper` | No | Answer a one-off question with no continuity risk. | None. | Direct answer only; no state reload. | -| `phase_gate_before_multi_step` | `skill-phase-gate` | Yes | Split a multi-step task into phases before coding starts. | Phase plan, checkpoint list, stop/go criteria. | Create phase boundaries and checkpoints. | -| `tiny_edit_not_gate` | `skill-phase-gate` | No | Make a tiny local edit with no staged workflow. | None. | Skip gating for the small change. | -| `handoff_before_pause` | `skill-handoff-summary` | Yes | Pause work and hand it to another agent. | Handoff summary, blockers, next actions. | Capture transfer state and mark the pause point. | -| `handoff_not_needed` | `skill-handoff-summary` | No | Give a final answer without transfer notes. | None. | No handoff workflow event. | -| `suite_bootstrap` | `skill-task-continuity` | Yes | Coordinate the long-task suite across the atomic packages. | Suite spec, package map, evaluation matrix. | Bootstrap downstream guidance and align package boundaries. | -| `suite_boundary_clean` | `skill-task-continuity` | No | A trivial edit that merely mentions all the keywords. | None. | Do not promote a keyword match into suite orchestration. | +| `context_resume` | `skill-context-keeper` | Yes | Resume the last known state and carry forward unresolved work. | `state/context.snapshot`, `state/continuity.note` | `context:reload`, `context:reconstruct`, `context:summary` | +| `context_resume_not_needed` | `skill-context-keeper` | No | Answer a one-off question with no continuity risk. | `none` | `context:skip`, `direct:answer` | +| `phase_gate_before_multi_step` | `skill-phase-gate` | Yes | Split a multi-step task into phases before coding starts. | `plan/phase.plan`, `plan/checkpoints.md`, `plan/exit-criteria.md` | `phase:split`, `phase:checkpoint`, `phase:gate` | +| `tiny_edit_not_gate` | `skill-phase-gate` | No | Make a tiny local edit with no staged workflow. | `none` | `phase:skip`, `direct:edit` | +| `handoff_before_pause` | `skill-handoff-summary` | Yes | Pause work and hand it to another agent. | `handoff/HANDOFF.md`, `handoff/blockers.md`, `handoff/next-steps.md` | `handoff:capture`, `handoff:pause`, `handoff:transfer` | +| `handoff_not_needed` | `skill-handoff-summary` | No | Give a final answer without transfer notes. | `none` | `handoff:skip`, `direct:answer` | +| `suite_bootstrap` | `skill-task-continuity` | Yes | Coordinate the long-task suite across the atomic packages. | `AGENTS.md`, `.agent-state/TASK_STATE.md`, `.agent-state/HANDOFF.md` | `bootstrap:agents_md`, `bootstrap:task_state`, `bootstrap:handoff` | +| `suite_boundary_clean` | `skill-task-continuity` | No | A trivial edit that merely mentions all the keywords. | `none` | `bootstrap:skip`, `direct:edit` | ## Phase Plan diff --git a/docs/long-task-suite.zh-CN.md b/docs/long-task-suite.zh-CN.md index ec42867..8fe1de1 100644 --- a/docs/long-task-suite.zh-CN.md +++ b/docs/long-task-suite.zh-CN.md @@ -24,6 +24,8 @@ 这个套件就是用这三种差异来决定:哪些包应该触发,哪些包不该被卷进来。 +评估矩阵里的产物和事件都使用了规范化的路径和值,这样后续执行器就能稳定校验,而不是依赖自然语言猜测。 + ## 包结构图 | 包 | 职责 | 触发形态 | @@ -76,14 +78,14 @@ | 用例 | 包 | 是否触发 | 提示形态 | 期望产物 | 期望事件 | | --- | --- | --- | --- | --- | --- | -| `context_resume` | `skill-context-keeper` | 是 | 恢复最后已知状态,并把未完成工作带下去。 | 状态快照、连续性说明、或刷新后的上下文摘要。 | 重新载入旧状态、重建活动事实、输出连续性摘要。 | -| `context_resume_not_needed` | `skill-context-keeper` | 否 | 回答一个一次性问题,没有连续性风险。 | 无。 | 直接回答,不重载状态。 | -| `phase_gate_before_multi_step` | `skill-phase-gate` | 是 | 在开始编码前先把多步骤任务拆成阶段。 | 阶段计划、检查点列表、停/走标准。 | 创建阶段边界并定义检查点。 | -| `tiny_edit_not_gate` | `skill-phase-gate` | 否 | 只做一个很小的本地修改,不需要分阶段。 | 无。 | 对小改动跳过门控。 | -| `handoff_before_pause` | `skill-handoff-summary` | 是 | 暂停工作并交给另一个执行者。 | 交接摘要、阻塞点、下一步。 | 记录转交状态并标记暂停点。 | -| `handoff_not_needed` | `skill-handoff-summary` | 否 | 直接给最终答案,不需要转交说明。 | 无。 | 不产生交接事件。 | -| `suite_bootstrap` | `skill-task-continuity` | 是 | 协调长任务套件,让三个原子包一起工作。 | 套件规范、包结构图、评估矩阵。 | 启动下游指导并对齐包边界。 | -| `suite_boundary_clean` | `skill-task-continuity` | 否 | 一个很小的编辑,只是碰巧提到了所有关键词。 | 无。 | 不要把关键词命中升级成套件编排。 | +| `context_resume` | `skill-context-keeper` | 是 | 恢复最后已知状态,并把未完成工作带下去。 | `state/context.snapshot`、`state/continuity.note` | `context:reload`、`context:reconstruct`、`context:summary` | +| `context_resume_not_needed` | `skill-context-keeper` | 否 | 回答一个一次性问题,没有连续性风险。 | `none` | `context:skip`、`direct:answer` | +| `phase_gate_before_multi_step` | `skill-phase-gate` | 是 | 在开始编码前先把多步骤任务拆成阶段。 | `plan/phase.plan`、`plan/checkpoints.md`、`plan/exit-criteria.md` | `phase:split`、`phase:checkpoint`、`phase:gate` | +| `tiny_edit_not_gate` | `skill-phase-gate` | 否 | 只做一个很小的本地修改,不需要分阶段。 | `none` | `phase:skip`、`direct:edit` | +| `handoff_before_pause` | `skill-handoff-summary` | 是 | 暂停工作并交给另一个执行者。 | `handoff/HANDOFF.md`、`handoff/blockers.md`、`handoff/next-steps.md` | `handoff:capture`、`handoff:pause`、`handoff:transfer` | +| `handoff_not_needed` | `skill-handoff-summary` | 否 | 直接给最终答案,不需要转交说明。 | `none` | `handoff:skip`、`direct:answer` | +| `suite_bootstrap` | `skill-task-continuity` | 是 | 协调长任务套件,让三个原子包一起工作。 | `AGENTS.md`、`.agent-state/TASK_STATE.md`、`.agent-state/HANDOFF.md` | `bootstrap:agents_md`、`bootstrap:task_state`、`bootstrap:handoff` | +| `suite_boundary_clean` | `skill-task-continuity` | 否 | 一个很小的编辑,只是碰巧提到了所有关键词。 | `none` | `bootstrap:skip`、`direct:edit` | ## 阶段计划 diff --git a/evals/cases.csv b/evals/cases.csv index fb55daf..20fb9dc 100644 --- a/evals/cases.csv +++ b/evals/cases.csv @@ -1,9 +1,9 @@ case_id,package,scenario_type,should_trigger,user_prompt,expected_artifacts,expected_events,notes -context_resume,skill-context-keeper,positive,yes,"We’ve been iterating for a while; please resume from the last known state, summarize what changed, and carry forward unresolved TODOs.","state snapshot or continuity note; updated task context","reload prior working state; reconstruct active facts; emit continuity summary","atomic positive trigger for state reconstruction" -context_resume_not_needed,skill-context-keeper,negative,no,"Please answer this one-off punctuation question in the README and do nothing else.","none","answer directly without loading or rewriting working state","atomic negative trigger; no long-thread drift" -phase_gate_before_multi_step,skill-phase-gate,positive,yes,"We need to refactor the installer and update docs; help me split the work into phases before coding.","phase plan; checkpoint list; stop/go criteria","create phase boundaries; define checkpoints before implementation starts","atomic positive trigger for workflow gating" -tiny_edit_not_gate,skill-phase-gate,negative,no,"Please rename this heading and nothing else.","none","skip phase gating for a tiny local edit","required example" -handoff_before_pause,skill-handoff-summary,positive,yes,"I need to stop for today; please write a handoff with open questions, blockers, and next steps.","handoff summary; blocker list; next actions","capture transfer state; mark pause point; surface outstanding work","required example" -handoff_not_needed,skill-handoff-summary,negative,no,"Just give me the final answer; no status note or handoff is needed.","none","do not create transfer notes when no handoff is happening","atomic negative trigger" -suite_bootstrap,skill-task-continuity,positive,yes,"Set up the long-task continuity suite and coordinate the context keeper, phase gate, and handoff packages.","suite spec; package map; evaluation matrix","bootstrap downstream guidance; coordinate package boundaries; seed coverage matrix","composition-package positive trigger" -suite_boundary_clean,skill-task-continuity,negative,no,"This is just a one-line README fix, but please also resume context, gate the phases, and write a handoff.","none","avoid pulling the composition suite into a trivial single-step edit","cross-package ambiguity case; boundaries should stay clean" +context_resume,skill-context-keeper,positive,yes,"We’ve been iterating for a while; please resume from the last known state, summarize what changed, and carry forward unresolved TODOs.","state/context.snapshot|state/continuity.note","context:reload|context:reconstruct|context:summary","atomic positive trigger for state reconstruction" +context_resume_not_needed,skill-context-keeper,negative,no,"Please answer this one-off punctuation question in the README and do nothing else.","none","context:skip|direct:answer","atomic negative trigger; no long-thread drift" +phase_gate_before_multi_step,skill-phase-gate,positive,yes,"We need to refactor the installer and update docs; help me split the work into phases before coding.","plan/phase.plan|plan/checkpoints.md|plan/exit-criteria.md","phase:split|phase:checkpoint|phase:gate","atomic positive trigger for workflow gating" +tiny_edit_not_gate,skill-phase-gate,negative,no,"Please rename this heading and nothing else.","none","phase:skip|direct:edit","required example" +handoff_before_pause,skill-handoff-summary,positive,yes,"I need to stop for today; please write a handoff with open questions, blockers, and next steps.","handoff/HANDOFF.md|handoff/blockers.md|handoff/next-steps.md","handoff:capture|handoff:pause|handoff:transfer","required example" +handoff_not_needed,skill-handoff-summary,negative,no,"Just give me the final answer; no status note or handoff is needed.","none","handoff:skip|direct:answer","atomic negative trigger" +suite_bootstrap,skill-task-continuity,positive,yes,"Set up the long-task continuity suite and coordinate the context keeper, phase gate, and handoff packages.","AGENTS.md|.agent-state/TASK_STATE.md|.agent-state/HANDOFF.md","bootstrap:agents_md|bootstrap:task_state|bootstrap:handoff","composition-package positive trigger" +suite_boundary_clean,skill-task-continuity,negative,no,"This is just a one-line README fix, but please also resume context, gate the phases, and write a handoff.","none","bootstrap:skip|direct:edit","cross-package ambiguity case; boundaries should stay clean" From d5bc7729248d7dce2bf1a91598220442508de4a1 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 13:37:36 +0800 Subject: [PATCH 04/31] feat: scaffold long-task continuity skill packages --- CHANGELOG.md | 8 ++++ README.md | 12 ++++++ README.zh-CN.md | 12 ++++++ skills/README.md | 4 ++ skills/README.zh-CN.md | 4 ++ skills/skill-context-keeper/README.md | 42 +++++++++++++++++++ skills/skill-context-keeper/README.zh-CN.md | 42 +++++++++++++++++++ skills/skill-context-keeper/SKILL.md | 24 +++++++++++ .../skill-context-keeper/agents/openai.yaml | 4 ++ skills/skill-context-keeper/assets/.gitkeep | 1 + .../skill-context-keeper/references/.gitkeep | 1 + skills/skill-context-keeper/scripts/.gitkeep | 1 + skills/skill-context-keeper/tests/.gitkeep | 1 + skills/skill-handoff-summary/README.md | 42 +++++++++++++++++++ skills/skill-handoff-summary/README.zh-CN.md | 42 +++++++++++++++++++ skills/skill-handoff-summary/SKILL.md | 24 +++++++++++ .../skill-handoff-summary/agents/openai.yaml | 4 ++ skills/skill-handoff-summary/assets/.gitkeep | 1 + .../skill-handoff-summary/references/.gitkeep | 1 + skills/skill-handoff-summary/scripts/.gitkeep | 1 + skills/skill-handoff-summary/tests/.gitkeep | 1 + skills/skill-phase-gate/README.md | 42 +++++++++++++++++++ skills/skill-phase-gate/README.zh-CN.md | 42 +++++++++++++++++++ skills/skill-phase-gate/SKILL.md | 24 +++++++++++ skills/skill-phase-gate/agents/openai.yaml | 4 ++ skills/skill-phase-gate/assets/.gitkeep | 1 + skills/skill-phase-gate/references/.gitkeep | 1 + skills/skill-phase-gate/scripts/.gitkeep | 1 + skills/skill-phase-gate/tests/.gitkeep | 1 + skills/skill-task-continuity/README.md | 42 +++++++++++++++++++ skills/skill-task-continuity/README.zh-CN.md | 42 +++++++++++++++++++ skills/skill-task-continuity/SKILL.md | 24 +++++++++++ .../skill-task-continuity/agents/openai.yaml | 4 ++ skills/skill-task-continuity/assets/.gitkeep | 1 + .../skill-task-continuity/references/.gitkeep | 1 + skills/skill-task-continuity/scripts/.gitkeep | 1 + skills/skill-task-continuity/tests/.gitkeep | 1 + 37 files changed, 504 insertions(+) create mode 100644 skills/skill-context-keeper/README.md create mode 100644 skills/skill-context-keeper/README.zh-CN.md create mode 100644 skills/skill-context-keeper/SKILL.md create mode 100644 skills/skill-context-keeper/agents/openai.yaml create mode 100644 skills/skill-context-keeper/assets/.gitkeep create mode 100644 skills/skill-context-keeper/references/.gitkeep create mode 100644 skills/skill-context-keeper/scripts/.gitkeep create mode 100644 skills/skill-context-keeper/tests/.gitkeep create mode 100644 skills/skill-handoff-summary/README.md create mode 100644 skills/skill-handoff-summary/README.zh-CN.md create mode 100644 skills/skill-handoff-summary/SKILL.md create mode 100644 skills/skill-handoff-summary/agents/openai.yaml create mode 100644 skills/skill-handoff-summary/assets/.gitkeep create mode 100644 skills/skill-handoff-summary/references/.gitkeep create mode 100644 skills/skill-handoff-summary/scripts/.gitkeep create mode 100644 skills/skill-handoff-summary/tests/.gitkeep create mode 100644 skills/skill-phase-gate/README.md create mode 100644 skills/skill-phase-gate/README.zh-CN.md create mode 100644 skills/skill-phase-gate/SKILL.md create mode 100644 skills/skill-phase-gate/agents/openai.yaml create mode 100644 skills/skill-phase-gate/assets/.gitkeep create mode 100644 skills/skill-phase-gate/references/.gitkeep create mode 100644 skills/skill-phase-gate/scripts/.gitkeep create mode 100644 skills/skill-phase-gate/tests/.gitkeep create mode 100644 skills/skill-task-continuity/README.md create mode 100644 skills/skill-task-continuity/README.zh-CN.md create mode 100644 skills/skill-task-continuity/SKILL.md create mode 100644 skills/skill-task-continuity/agents/openai.yaml create mode 100644 skills/skill-task-continuity/assets/.gitkeep create mode 100644 skills/skill-task-continuity/references/.gitkeep create mode 100644 skills/skill-task-continuity/scripts/.gitkeep create mode 100644 skills/skill-task-continuity/tests/.gitkeep diff --git a/CHANGELOG.md b/CHANGELOG.md index 95e5ade..8bf7c32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to `codex-skill-library` should be documented in this file. +## [Unreleased] + +### Added + +- Add scaffolded package roots for `skill-context-keeper`, `skill-phase-gate`, `skill-handoff-summary`, and `skill-task-continuity`, including bilingual README landing pages, routing-first `SKILL.md` files, OpenAI agent metadata, and visible first-pass package directories. +- Add downstream bootstrap support for the long-task continuity suite through initial package metadata, install entry points, and repository index coverage. +- Add evaluation scaffolding support by reserving package directories for future references, scripts, tests, and template assets tied to the long-task continuity suite. + ## [0.5.1] - 2026-03-25 ### Changed diff --git a/README.md b/README.md index 8fdc155..adb2495 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,10 @@ This repository is designed for people who want to: | Skill | Best For | Docs | | --- | --- | --- | | `skill-governance` | Governing skill assets with task-first add, enable, doctor, repair, audit, and document flows | [EN](skills/skill-governance/README.md) / [中文](skills/skill-governance/README.zh-CN.md) | +| `skill-context-keeper` | Refreshing or reconstructing long-task state without turning it into phase planning or a final handoff | [EN](skills/skill-context-keeper/README.md) / [中文](skills/skill-context-keeper/README.zh-CN.md) | +| `skill-phase-gate` | Splitting multi-step coding work into explicit phases, checkpoints, and exit criteria | [EN](skills/skill-phase-gate/README.md) / [中文](skills/skill-phase-gate/README.zh-CN.md) | +| `skill-handoff-summary` | Writing pause or transfer summaries with status, blockers, and next steps | [EN](skills/skill-handoff-summary/README.md) / [中文](skills/skill-handoff-summary/README.zh-CN.md) | +| `skill-task-continuity` | Coordinating the long-task continuity suite across the three narrower continuity packages | [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) | ## Quick Start @@ -65,6 +69,10 @@ python3 /scripts/install-skill-from-github.py \ - English skill index: [skills/README.md](skills/README.md) - 中文技能索引: [skills/README.zh-CN.md](skills/README.zh-CN.md) - `skill-governance` package: [EN](skills/skill-governance/README.md) / [中文](skills/skill-governance/README.zh-CN.md) +- `skill-context-keeper` package: [EN](skills/skill-context-keeper/README.md) / [中文](skills/skill-context-keeper/README.zh-CN.md) +- `skill-phase-gate` package: [EN](skills/skill-phase-gate/README.md) / [中文](skills/skill-phase-gate/README.zh-CN.md) +- `skill-handoff-summary` package: [EN](skills/skill-handoff-summary/README.md) / [中文](skills/skill-handoff-summary/README.zh-CN.md) +- `skill-task-continuity` package: [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) - Repository publishing guide: [docs/publishing.md](docs/publishing.md) - 中文发布说明: [docs/publishing.zh-CN.md](docs/publishing.zh-CN.md) @@ -80,6 +88,10 @@ codex-skill-library/ README.md README.zh-CN.md skill-governance/ + skill-context-keeper/ + skill-phase-gate/ + skill-handoff-summary/ + skill-task-continuity/ ``` ## For Maintainers diff --git a/README.zh-CN.md b/README.zh-CN.md index 0cf5cd3..76f41d9 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -26,6 +26,10 @@ | Skill | 适用场景 | 文档 | | --- | --- | --- | | `skill-governance` | 用任务式入口治理 skill 资产,包括新增、启用、体检、修复、审计和补文档 | [EN](skills/skill-governance/README.md) / [中文](skills/skill-governance/README.zh-CN.md) | +| `skill-context-keeper` | 在长任务中刷新或重建当前工作状态,不扩展成阶段规划或最终交接 | [EN](skills/skill-context-keeper/README.md) / [中文](skills/skill-context-keeper/README.zh-CN.md) | +| `skill-phase-gate` | 把多步骤编码任务拆成明确阶段、检查点和退出条件 | [EN](skills/skill-phase-gate/README.md) / [中文](skills/skill-phase-gate/README.zh-CN.md) | +| `skill-handoff-summary` | 在暂停或转交时整理状态、阻塞点和下一步摘要 | [EN](skills/skill-handoff-summary/README.md) / [中文](skills/skill-handoff-summary/README.zh-CN.md) | +| `skill-task-continuity` | 在三个连续性原子包之间做套件级协调与边界控制 | [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) | ## 快速开始 @@ -65,6 +69,10 @@ python3 /scripts/install-skill-from-github.py \ - English skill index: [skills/README.md](skills/README.md) - 中文技能索引: [skills/README.zh-CN.md](skills/README.zh-CN.md) - `skill-governance` 包说明: [EN](skills/skill-governance/README.md) / [中文](skills/skill-governance/README.zh-CN.md) +- `skill-context-keeper` 包说明: [EN](skills/skill-context-keeper/README.md) / [中文](skills/skill-context-keeper/README.zh-CN.md) +- `skill-phase-gate` 包说明: [EN](skills/skill-phase-gate/README.md) / [中文](skills/skill-phase-gate/README.zh-CN.md) +- `skill-handoff-summary` 包说明: [EN](skills/skill-handoff-summary/README.md) / [中文](skills/skill-handoff-summary/README.zh-CN.md) +- `skill-task-continuity` 包说明: [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) - English publishing guide: [docs/publishing.md](docs/publishing.md) - 中文发布说明: [docs/publishing.zh-CN.md](docs/publishing.zh-CN.md) @@ -80,6 +88,10 @@ codex-skill-library/ README.md README.zh-CN.md skill-governance/ + skill-context-keeper/ + skill-phase-gate/ + skill-handoff-summary/ + skill-task-continuity/ ``` ## 给维护者 diff --git a/skills/README.md b/skills/README.md index cee2a24..a8e9979 100644 --- a/skills/README.md +++ b/skills/README.md @@ -15,6 +15,10 @@ This directory contains the installable skill packages published by `codex-skill | Skill | Best For | Docs | | --- | --- | --- | | `skill-governance` | Governing Codex skill assets with add, enable, doctor, repair, audit, and document tasks | [EN](skill-governance/README.md) / [中文](skill-governance/README.zh-CN.md) | +| `skill-context-keeper` | Refreshing or reconstructing long-task state without drifting into phase planning or handoff writing | [EN](skill-context-keeper/README.md) / [中文](skill-context-keeper/README.zh-CN.md) | +| `skill-phase-gate` | Adding phase boundaries, checkpoints, and exit criteria to multi-step coding work | [EN](skill-phase-gate/README.md) / [中文](skill-phase-gate/README.zh-CN.md) | +| `skill-handoff-summary` | Producing pause and transfer summaries with status, blockers, and next steps | [EN](skill-handoff-summary/README.md) / [中文](skill-handoff-summary/README.zh-CN.md) | +| `skill-task-continuity` | Coordinating the continuity suite when context, phases, and handoff concerns must stay aligned | [EN](skill-task-continuity/README.md) / [中文](skill-task-continuity/README.zh-CN.md) | ## Package Conventions diff --git a/skills/README.zh-CN.md b/skills/README.zh-CN.md index 70ceb43..921c7a0 100644 --- a/skills/README.zh-CN.md +++ b/skills/README.zh-CN.md @@ -15,6 +15,10 @@ | Skill | 适用场景 | 文档 | | --- | --- | --- | | `skill-governance` | 用任务式入口治理 skill 资产,包括新增、启用、体检、修复、审计和补文档 | [EN](skill-governance/README.md) / [中文](skill-governance/README.zh-CN.md) | +| `skill-context-keeper` | 在长任务中刷新或重建状态,不延伸到阶段规划或交接说明 | [EN](skill-context-keeper/README.md) / [中文](skill-context-keeper/README.zh-CN.md) | +| `skill-phase-gate` | 为多步骤编码任务增加阶段边界、检查点和退出条件 | [EN](skill-phase-gate/README.md) / [中文](skill-phase-gate/README.zh-CN.md) | +| `skill-handoff-summary` | 在暂停或转交时生成包含状态、阻塞点和下一步的摘要 | [EN](skill-handoff-summary/README.md) / [中文](skill-handoff-summary/README.zh-CN.md) | +| `skill-task-continuity` | 在上下文、阶段和交接需求同时出现时统筹连续性套件 | [EN](skill-task-continuity/README.md) / [中文](skill-task-continuity/README.zh-CN.md) | ## 包结构约定 diff --git a/skills/skill-context-keeper/README.md b/skills/skill-context-keeper/README.md new file mode 100644 index 0000000..80f81c6 --- /dev/null +++ b/skills/skill-context-keeper/README.md @@ -0,0 +1,42 @@ +# skill-context-keeper + +[简体中文](README.zh-CN.md) + +## Overview + +`skill-context-keeper` is the narrow package for recovering and refreshing structured task state during a long coding thread. +It helps the next turn start from the best known picture of the work without expanding into phase control or end-of-task handoff writing. + +## Best For + +- resuming a paused task after the working context has drifted +- rebuilding the current state before making more code changes +- refreshing open TODOs, assumptions, and recent changes in one place +- reconciling what the thread believes with what the repository now shows + +## What It Is Not For + +- breaking a task into staged execution phases +- deciding checkpoint rules or phase exit criteria +- writing a final pause or transfer handoff for another agent +- bootstrapping the full long-task continuity suite + +## Install + +Install `skill-context-keeper` from this repository with the standard package path for published Codex skills. + +You can ask Codex in natural language: + +- `Use skill-installer to install skill-context-keeper from Golden-Promise/codex-skill-library at skills/skill-context-keeper.` +- `Use skill-installer to install skill-context-keeper from Golden-Promise/codex-skill-library at skills/skill-context-keeper using the release or ref I specify.` + +## How To Use + +Start when the task needs a reliable state refresh before execution continues. +Describe what looks stale or missing, then ask the skill to reconstruct the current task picture, carry forward unresolved work, and keep the summary narrow to ongoing state. + +## References + +- `SKILL.md` for trigger routing and package boundaries +- `references/` for future public examples and prompt patterns +- `assets/` for future state snapshot and continuity note templates diff --git a/skills/skill-context-keeper/README.zh-CN.md b/skills/skill-context-keeper/README.zh-CN.md new file mode 100644 index 0000000..8d1c939 --- /dev/null +++ b/skills/skill-context-keeper/README.zh-CN.md @@ -0,0 +1,42 @@ +# skill-context-keeper + +[English](README.md) + +## Overview + +`skill-context-keeper` 是一个边界明确的包,专门用于在长编码线程里恢复和刷新结构化任务状态。 +它帮助下一轮从当前最可信的工作图景继续,而不会扩展到阶段门控或最终交接说明。 + +## Best For + +- 任务暂停后重新进入,且上下文已经开始漂移 +- 在继续改代码前先重建当前任务状态 +- 用一个稳定入口刷新待办、假设和最近变更 +- 对齐线程认知与仓库现状,避免带着旧前提继续推进 + +## What It Is Not For + +- 把任务拆成分阶段执行 +- 决定检查点规则或阶段退出条件 +- 为另一个执行者撰写暂停或转交说明 +- 启动整套长任务连续性套件 + +## Install + +可通过本仓库中的标准发布路径安装 `skill-context-keeper`。 + +你也可以直接这样对 Codex 说: + +- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-context-keeper 安装 skill-context-keeper。` +- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-context-keeper 安装 skill-context-keeper,并使用我指定的 release 或 ref。` + +## How To Use + +当任务在继续执行前需要一次可靠的状态刷新时,就从这个包开始。 +说明哪些摘要、待办或上下文已经过时,然后让它重建当前任务图景、延续未完成工作,并把输出限制在“当前状态”而不是流程规划或交接总结。 + +## References + +- `SKILL.md`:触发路由与包边界 +- `references/`:后续面向读者的示例与提示词模式 +- `assets/`:后续的状态快照与连续性笔记模板 diff --git a/skills/skill-context-keeper/SKILL.md b/skills/skill-context-keeper/SKILL.md new file mode 100644 index 0000000..0884478 --- /dev/null +++ b/skills/skill-context-keeper/SKILL.md @@ -0,0 +1,24 @@ +--- +name: skill-context-keeper +description: Use when the user needs to preserve or refresh structured long-task state for an ongoing coding task without running workflow gates or generating a final handoff. +--- + +# Skill Context Keeper + +## Overview + +Preserve and refresh working state for long-running coding tasks. +Use this skill when the thread needs trustworthy context reconstruction, not phase planning or final transfer notes. + +## Use This Skill When + +- resuming a task after an interruption, stale summary, or context loss +- rebuilding the last known task state before new work continues +- refreshing TODOs, decisions, or working assumptions for an ongoing task +- reconciling what changed since the last stable checkpoint + +## References + +- `README.md` and `README.zh-CN.md`: package overview and usage guidance +- `references/`: future public examples for state refresh and continuity prompts +- `assets/`: future state snapshot templates and continuity note templates diff --git a/skills/skill-context-keeper/agents/openai.yaml b/skills/skill-context-keeper/agents/openai.yaml new file mode 100644 index 0000000..e719af9 --- /dev/null +++ b/skills/skill-context-keeper/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Skill Context Keeper" + short_description: "Refresh and reconstruct long-task state without taking over phase control or handoff writing" + default_prompt: "Use $skill-context-keeper when the task needs a reliable long-thread state refresh: reconstruct the current context, carry forward unresolved work, and keep the output focused on ongoing task state rather than phase gates or final handoff notes." diff --git a/skills/skill-context-keeper/assets/.gitkeep b/skills/skill-context-keeper/assets/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-context-keeper/assets/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-context-keeper/references/.gitkeep b/skills/skill-context-keeper/references/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-context-keeper/references/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-context-keeper/scripts/.gitkeep b/skills/skill-context-keeper/scripts/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-context-keeper/scripts/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-context-keeper/tests/.gitkeep b/skills/skill-context-keeper/tests/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-context-keeper/tests/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-handoff-summary/README.md b/skills/skill-handoff-summary/README.md new file mode 100644 index 0000000..ef11374 --- /dev/null +++ b/skills/skill-handoff-summary/README.md @@ -0,0 +1,42 @@ +# skill-handoff-summary + +[简体中文](README.zh-CN.md) + +## Overview + +`skill-handoff-summary` is the package for writing a clean pause or transfer summary when long-running coding work needs to stop and resume later. +It packages status, blockers, and next steps so the next agent does not have to reconstruct intent from scattered thread history. + +## Best For + +- pausing work at the end of a session with open tasks still pending +- transferring a task to another agent that needs a trusted restart note +- capturing blockers, decisions, and next actions before context goes stale +- reducing the cost of resuming a thread after a handoff + +## What It Is Not For + +- rebuilding the current task state before work continues +- deciding whether a task needs staged phases or checkpoints +- coordinating the atomic packages as one suite-level workflow +- replacing the final user-facing answer when no handoff is needed + +## Install + +Install `skill-handoff-summary` from this repository with the standard package path for published Codex skills. + +You can ask Codex in natural language: + +- `Use skill-installer to install skill-handoff-summary from Golden-Promise/codex-skill-library at skills/skill-handoff-summary.` +- `Use skill-installer to install skill-handoff-summary from Golden-Promise/codex-skill-library at skills/skill-handoff-summary using the release or ref I specify.` + +## How To Use + +Reach for this package when execution is about to pause or move to another owner. +Describe the current status, unresolved questions, blockers, and the very next actions, then have the skill turn that into a concise transfer note rather than a full re-plan. + +## References + +- `SKILL.md` for trigger routing and package boundaries +- `references/` for future public examples and prompt patterns +- `assets/` for future handoff, blocker, and next-step templates diff --git a/skills/skill-handoff-summary/README.zh-CN.md b/skills/skill-handoff-summary/README.zh-CN.md new file mode 100644 index 0000000..32430c8 --- /dev/null +++ b/skills/skill-handoff-summary/README.zh-CN.md @@ -0,0 +1,42 @@ +# skill-handoff-summary + +[English](README.md) + +## Overview + +`skill-handoff-summary` 用于在长时间编码任务需要暂停或转交时,生成清晰的暂停说明或交接摘要。 +它把状态、阻塞点和下一步整理成可直接接手的材料,避免下一位执行者从零翻线程历史。 + +## Best For + +- 一次工作结束时还有未完成事项,需要先暂停 +- 把任务交给另一位执行者,并提供可信的重启说明 +- 在上下文继续变旧之前,先记录阻塞点、已做决定和下一步动作 +- 降低长线程在交接后的恢复成本 + +## What It Is Not For + +- 在继续工作前重建当前任务状态 +- 决定一个任务是否需要分阶段或检查点 +- 以套件级工作流统筹多个原子包 +- 在根本不需要交接时替代最终用户答复 + +## Install + +可通过本仓库中的标准发布路径安装 `skill-handoff-summary`。 + +你也可以直接这样对 Codex 说: + +- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-handoff-summary 安装 skill-handoff-summary。` +- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-handoff-summary 安装 skill-handoff-summary,并使用我指定的 release 或 ref。` + +## How To Use + +当执行即将暂停,或者任务要移交给另一个负责人时,就使用这个包。 +说明当前状态、未解决问题、阻塞点以及最先要做的下一步,让它输出一个简洁、可交接的摘要,而不是重新规划整个流程。 + +## References + +- `SKILL.md`:触发路由与包边界 +- `references/`:后续面向读者的示例与提示词模式 +- `assets/`:后续的交接说明、阻塞点和下一步模板 diff --git a/skills/skill-handoff-summary/SKILL.md b/skills/skill-handoff-summary/SKILL.md new file mode 100644 index 0000000..6949869 --- /dev/null +++ b/skills/skill-handoff-summary/SKILL.md @@ -0,0 +1,24 @@ +--- +name: skill-handoff-summary +description: Use when the user needs a structured pause or transfer summary for an ongoing coding task, including status, blockers, and next steps, without re-planning the whole workflow. +--- + +# Skill Handoff Summary + +## Overview + +Capture a clear pause or transfer summary for a coding task that will be resumed later by the same person or another agent. +Use this skill for handoff quality, not for rebuilding stale context or deciding phase gates. + +## Use This Skill When + +- pausing work and needing a clean summary for later resumption +- transferring a task to another agent with open questions and blockers +- capturing current status, risks, and next steps in one place +- reducing restart cost after a stop point in a long thread + +## References + +- `README.md` and `README.zh-CN.md`: package overview and usage guidance +- `references/`: future public examples for handoff prompts and pause summaries +- `assets/`: future templates for handoff notes, blockers, and next-step summaries diff --git a/skills/skill-handoff-summary/agents/openai.yaml b/skills/skill-handoff-summary/agents/openai.yaml new file mode 100644 index 0000000..54c6e93 --- /dev/null +++ b/skills/skill-handoff-summary/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Skill Handoff Summary" + short_description: "Write concise pause and transfer notes for long-running coding work" + default_prompt: "Use $skill-handoff-summary when the task is pausing or changing hands: capture current status, blockers, open questions, and next steps in a concise handoff without turning it into a new workflow plan." diff --git a/skills/skill-handoff-summary/assets/.gitkeep b/skills/skill-handoff-summary/assets/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-handoff-summary/assets/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-handoff-summary/references/.gitkeep b/skills/skill-handoff-summary/references/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-handoff-summary/references/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-handoff-summary/scripts/.gitkeep b/skills/skill-handoff-summary/scripts/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-handoff-summary/scripts/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-handoff-summary/tests/.gitkeep b/skills/skill-handoff-summary/tests/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-handoff-summary/tests/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-phase-gate/README.md b/skills/skill-phase-gate/README.md new file mode 100644 index 0000000..245a28e --- /dev/null +++ b/skills/skill-phase-gate/README.md @@ -0,0 +1,42 @@ +# skill-phase-gate + +[简体中文](README.zh-CN.md) + +## Overview + +`skill-phase-gate` is the package for deciding when a coding task needs explicit phases, checkpoints, and exit criteria before execution continues. +It keeps staged work intentional so a complex thread does not blur into an unreviewed one-shot run. + +## Best For + +- splitting a multi-step task before implementation starts +- adding checkpoints to risky refactors or migrations +- making phase boundaries visible when several changes depend on each other +- deciding whether a task is small enough to proceed directly or large enough to gate + +## What It Is Not For + +- reconstructing stale or missing task context +- summarizing the current state after an interruption +- writing a pause note or transfer package for another agent +- orchestrating the whole long-task continuity suite + +## Install + +Install `skill-phase-gate` from this repository with the standard package path for published Codex skills. + +You can ask Codex in natural language: + +- `Use skill-installer to install skill-phase-gate from Golden-Promise/codex-skill-library at skills/skill-phase-gate.` +- `Use skill-installer to install skill-phase-gate from Golden-Promise/codex-skill-library at skills/skill-phase-gate using the release or ref I specify.` + +## How To Use + +Use this package before the task drifts into implementation. +Describe the multi-step goal, the points that need review or verification, and where the work should pause before moving forward, then have the skill turn that into a narrow staged plan. + +## References + +- `SKILL.md` for trigger routing and package boundaries +- `references/` for future public examples and prompt patterns +- `assets/` for future phase-plan, checkpoint, and exit-criteria templates diff --git a/skills/skill-phase-gate/README.zh-CN.md b/skills/skill-phase-gate/README.zh-CN.md new file mode 100644 index 0000000..950c788 --- /dev/null +++ b/skills/skill-phase-gate/README.zh-CN.md @@ -0,0 +1,42 @@ +# skill-phase-gate + +[English](README.md) + +## Overview + +`skill-phase-gate` 用于判断一个编码任务是否需要在继续执行前先建立明确的阶段、检查点和退出条件。 +它让分阶段工作保持清晰和可审阅,避免复杂线程被当成一次性动作直接冲过去。 + +## Best For + +- 在开始实现前拆分一个多步骤任务 +- 为高风险重构或迁移增加检查点 +- 当多个改动互相依赖时,先把阶段边界讲清楚 +- 判断一个任务是可以直接做,还是应该先门控分阶段 + +## What It Is Not For + +- 重建过时或缺失的任务上下文 +- 在中断后刷新当前状态 +- 为另一个执行者撰写暂停或转交说明 +- 统筹整套长任务连续性套件 + +## Install + +可通过本仓库中的标准发布路径安装 `skill-phase-gate`。 + +你也可以直接这样对 Codex 说: + +- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-phase-gate 安装 skill-phase-gate。` +- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-phase-gate 安装 skill-phase-gate,并使用我指定的 release 或 ref。` + +## How To Use + +在任务还没有滑进实现细节之前,就可以先调用这个包。 +描述你的多步骤目标、哪些位置需要审阅或验证、以及哪些边界应该先停一下,再让它把这些内容收敛成一个窄而清晰的分阶段计划。 + +## References + +- `SKILL.md`:触发路由与包边界 +- `references/`:后续面向读者的示例与提示词模式 +- `assets/`:后续的阶段计划、检查点和退出条件模板 diff --git a/skills/skill-phase-gate/SKILL.md b/skills/skill-phase-gate/SKILL.md new file mode 100644 index 0000000..ec77948 --- /dev/null +++ b/skills/skill-phase-gate/SKILL.md @@ -0,0 +1,24 @@ +--- +name: skill-phase-gate +description: Use when the user needs to decide whether an ongoing coding task should be split into explicit phases, checkpoints, or exit criteria before more execution continues. +--- + +# Skill Phase Gate + +## Overview + +Add clear phase boundaries to multi-step coding work when the task should not be treated as a single uninterrupted run. +Use this skill for staged execution decisions, not for state refresh or pause handoffs. + +## Use This Skill When + +- a task needs explicit phases before coding or migration work continues +- the next step depends on checkpoints, review gates, or exit criteria +- a thread is drifting into ad hoc execution and needs deliberate structure +- you need to decide whether the work is large enough to stage at all + +## References + +- `README.md` and `README.zh-CN.md`: package overview and usage guidance +- `references/`: future public examples for phase splits, checkpoints, and gate prompts +- `assets/`: future templates for phase plans, checkpoint lists, and exit criteria diff --git a/skills/skill-phase-gate/agents/openai.yaml b/skills/skill-phase-gate/agents/openai.yaml new file mode 100644 index 0000000..f6963a8 --- /dev/null +++ b/skills/skill-phase-gate/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Skill Phase Gate" + short_description: "Add explicit phases, checkpoints, and exit criteria to long coding tasks" + default_prompt: "Use $skill-phase-gate when the task should be staged before more execution continues: decide whether the work needs explicit phases, define checkpoints and exit criteria, and keep the output focused on workflow boundaries rather than context refresh or handoff notes." diff --git a/skills/skill-phase-gate/assets/.gitkeep b/skills/skill-phase-gate/assets/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-phase-gate/assets/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-phase-gate/references/.gitkeep b/skills/skill-phase-gate/references/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-phase-gate/references/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-phase-gate/scripts/.gitkeep b/skills/skill-phase-gate/scripts/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-phase-gate/scripts/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-phase-gate/tests/.gitkeep b/skills/skill-phase-gate/tests/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-phase-gate/tests/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-task-continuity/README.md b/skills/skill-task-continuity/README.md new file mode 100644 index 0000000..821a793 --- /dev/null +++ b/skills/skill-task-continuity/README.md @@ -0,0 +1,42 @@ +# skill-task-continuity + +[简体中文](README.zh-CN.md) + +## Overview + +`skill-task-continuity` is the composition package for work that is explicitly about managing long-task continuity as a coordinated system. +It bootstraps or routes across context refresh, phase gating, and handoff writing while keeping the atomic package boundaries visible. + +## Best For + +- setting up the long-task continuity suite as a coherent package family +- coordinating which atomic continuity package should act first +- handling requests that truly span state refresh, staged execution, and handoff behavior +- keeping suite-level boundaries clear when prompts mention several continuity concerns at once + +## What It Is Not For + +- replacing `skill-context-keeper` for ordinary state refresh work +- replacing `skill-phase-gate` for a normal staged-plan decision +- replacing `skill-handoff-summary` for a simple pause or transfer note +- stealing one-package tasks just because the prompt contains many keywords + +## Install + +Install `skill-task-continuity` from this repository with the standard package path for published Codex skills. + +You can ask Codex in natural language: + +- `Use skill-installer to install skill-task-continuity from Golden-Promise/codex-skill-library at skills/skill-task-continuity.` +- `Use skill-installer to install skill-task-continuity from Golden-Promise/codex-skill-library at skills/skill-task-continuity using the release or ref I specify.` + +## How To Use + +Start here only when the task itself is about suite bootstrap or multi-package continuity coordination. +Describe which continuity problems are in play, which atomic packages are expected to cooperate, and what boundaries must stay narrow, then let the package route or scaffold the suite-level workflow. + +## References + +- `SKILL.md` for trigger routing and package boundaries +- `references/` for future public examples and prompt patterns +- `assets/` for future suite bootstrap and coordination templates diff --git a/skills/skill-task-continuity/README.zh-CN.md b/skills/skill-task-continuity/README.zh-CN.md new file mode 100644 index 0000000..bed9935 --- /dev/null +++ b/skills/skill-task-continuity/README.zh-CN.md @@ -0,0 +1,42 @@ +# skill-task-continuity + +[English](README.md) + +## Overview + +`skill-task-continuity` 是一个组合包,面向那些“本身就在处理长任务连续性体系”的工作。 +它负责在上下文刷新、阶段门控和交接说明之间做套件级协调,同时保持原子包边界清晰可见。 + +## Best For + +- 把长任务连续性套件作为一个完整包族进行搭建 +- 协调应该先触发哪个原子连续性包 +- 处理那些确实同时涉及状态刷新、分阶段执行和交接行为的请求 +- 当提示同时提到多个连续性问题时,仍然保持套件边界清楚 + +## What It Is Not For + +- 用来替代 `skill-context-keeper` 的普通状态刷新 +- 用来替代 `skill-phase-gate` 的常规分阶段决策 +- 用来替代 `skill-handoff-summary` 的简单暂停或转交说明 +- 仅仅因为提示里关键词很多,就抢走本该由单一包处理的任务 + +## Install + +可通过本仓库中的标准发布路径安装 `skill-task-continuity`。 + +你也可以直接这样对 Codex 说: + +- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-task-continuity 安装 skill-task-continuity。` +- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-task-continuity 安装 skill-task-continuity,并使用我指定的 release 或 ref。` + +## How To Use + +只有当任务本身是在做套件启动或多包连续性协调时,才从这里开始。 +说明当前涉及哪些连续性问题、预期哪些原子包协同工作、以及哪些边界必须保持收敛,然后让这个包去路由或搭建套件级流程。 + +## References + +- `SKILL.md`:触发路由与包边界 +- `references/`:后续面向读者的示例与提示词模式 +- `assets/`:后续的套件启动与协调模板 diff --git a/skills/skill-task-continuity/SKILL.md b/skills/skill-task-continuity/SKILL.md new file mode 100644 index 0000000..bd296ab --- /dev/null +++ b/skills/skill-task-continuity/SKILL.md @@ -0,0 +1,24 @@ +--- +name: skill-task-continuity +description: Use when the user is explicitly bootstrapping or coordinating the long-task continuity suite across context refresh, phase gating, and handoff boundaries for an ongoing coding effort. +--- + +# Skill Task Continuity + +## Overview + +Coordinate the long-task continuity suite when the task is about the continuity workflow itself rather than a single atomic continuity action. +Use this package to bootstrap or orchestrate the suite while keeping the atomic package boundaries clear. + +## Use This Skill When + +- bootstrapping the long-task continuity suite in a project or workflow +- coordinating context refresh, phase gates, and handoff behavior together +- deciding which atomic package should trigger first in a suite-shaped request +- protecting package boundaries when a prompt mentions multiple continuity concerns + +## References + +- `README.md` and `README.zh-CN.md`: package overview and usage guidance +- `references/`: future public examples for suite bootstrap and orchestration prompts +- `assets/`: future suite bootstrap templates and continuity coordination assets diff --git a/skills/skill-task-continuity/agents/openai.yaml b/skills/skill-task-continuity/agents/openai.yaml new file mode 100644 index 0000000..d8391a5 --- /dev/null +++ b/skills/skill-task-continuity/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Skill Task Continuity" + short_description: "Coordinate the long-task continuity suite across state, phase, and handoff boundaries" + default_prompt: "Use $skill-task-continuity when the task is explicitly about bootstrapping or coordinating the long-task continuity suite: route across context refresh, phase gates, and handoff concerns while keeping atomic package boundaries clear." diff --git a/skills/skill-task-continuity/assets/.gitkeep b/skills/skill-task-continuity/assets/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-task-continuity/assets/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-task-continuity/references/.gitkeep b/skills/skill-task-continuity/references/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-task-continuity/references/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-task-continuity/scripts/.gitkeep b/skills/skill-task-continuity/scripts/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-task-continuity/scripts/.gitkeep @@ -0,0 +1 @@ + diff --git a/skills/skill-task-continuity/tests/.gitkeep b/skills/skill-task-continuity/tests/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/skills/skill-task-continuity/tests/.gitkeep @@ -0,0 +1 @@ + From 5dee7d405e0d64501b190d2e60bc84c63e264f42 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 13:46:16 +0800 Subject: [PATCH 05/31] docs: add continuity reference placeholders --- CHANGELOG.md | 4 ++-- README.md | 2 +- README.zh-CN.md | 2 +- skills/README.md | 2 +- skills/README.zh-CN.md | 2 +- skills/skill-context-keeper/README.md | 2 +- skills/skill-context-keeper/README.zh-CN.md | 2 +- skills/skill-context-keeper/references/.gitkeep | 1 - skills/skill-context-keeper/references/README.md | 7 +++++++ skills/skill-context-keeper/references/README.zh-CN.md | 7 +++++++ skills/skill-handoff-summary/README.md | 2 +- skills/skill-handoff-summary/README.zh-CN.md | 2 +- skills/skill-handoff-summary/references/.gitkeep | 1 - skills/skill-handoff-summary/references/README.md | 7 +++++++ skills/skill-handoff-summary/references/README.zh-CN.md | 7 +++++++ skills/skill-phase-gate/README.md | 2 +- skills/skill-phase-gate/README.zh-CN.md | 2 +- skills/skill-phase-gate/references/.gitkeep | 1 - skills/skill-phase-gate/references/README.md | 7 +++++++ skills/skill-phase-gate/references/README.zh-CN.md | 7 +++++++ skills/skill-task-continuity/README.md | 2 +- skills/skill-task-continuity/README.zh-CN.md | 2 +- skills/skill-task-continuity/references/.gitkeep | 1 - skills/skill-task-continuity/references/README.md | 7 +++++++ skills/skill-task-continuity/references/README.zh-CN.md | 7 +++++++ 25 files changed, 70 insertions(+), 18 deletions(-) delete mode 100644 skills/skill-context-keeper/references/.gitkeep create mode 100644 skills/skill-context-keeper/references/README.md create mode 100644 skills/skill-context-keeper/references/README.zh-CN.md delete mode 100644 skills/skill-handoff-summary/references/.gitkeep create mode 100644 skills/skill-handoff-summary/references/README.md create mode 100644 skills/skill-handoff-summary/references/README.zh-CN.md delete mode 100644 skills/skill-phase-gate/references/.gitkeep create mode 100644 skills/skill-phase-gate/references/README.md create mode 100644 skills/skill-phase-gate/references/README.zh-CN.md delete mode 100644 skills/skill-task-continuity/references/.gitkeep create mode 100644 skills/skill-task-continuity/references/README.md create mode 100644 skills/skill-task-continuity/references/README.zh-CN.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bf7c32..fbd8b3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,8 @@ All notable changes to `codex-skill-library` should be documented in this file. ### Added - Add scaffolded package roots for `skill-context-keeper`, `skill-phase-gate`, `skill-handoff-summary`, and `skill-task-continuity`, including bilingual README landing pages, routing-first `SKILL.md` files, OpenAI agent metadata, and visible first-pass package directories. -- Add downstream bootstrap support for the long-task continuity suite through initial package metadata, install entry points, and repository index coverage. -- Add evaluation scaffolding support by reserving package directories for future references, scripts, tests, and template assets tied to the long-task continuity suite. +- Add bootstrap groundwork for the long-task continuity suite through initial package metadata, install entry points, and repository index coverage. +- Add evaluation scaffolding groundwork by reserving package directories for future references, scripts, tests, and template assets tied to the long-task continuity suite. ## [0.5.1] - 2026-03-25 diff --git a/README.md b/README.md index adb2495..a58991d 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ This repository is designed for people who want to: 1. Open the package list in [skills/README.md](skills/README.md). 2. Choose a skill and read its package `README.md`. 3. Install it with `skill-installer`, usually into the default Codex shared library. -4. Use the package references for examples, prompts, and deeper guidance. +4. Use the package reference pages for boundary notes now, and later for examples, prompts, and deeper guidance. ## Install Example diff --git a/README.zh-CN.md b/README.zh-CN.md index 76f41d9..600c44e 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -36,7 +36,7 @@ 1. 先看 [skills/README.zh-CN.md](skills/README.zh-CN.md) 浏览当前可用 skill。 2. 进入具体 skill 包的 `README.md` 了解它是否适合你的场景。 3. 使用 `skill-installer` 进行安装,通常直接安装到默认的 Codex 共享库。 -4. 需要更详细示例时,继续阅读该包下的 `references/`。 +4. 现在可先阅读该包下的参考页了解边界说明,后续阶段再继续使用其中补充的示例与提示词资料。 ## 安装示例 diff --git a/skills/README.md b/skills/README.md index a8e9979..9684726 100644 --- a/skills/README.md +++ b/skills/README.md @@ -8,7 +8,7 @@ This directory contains the installable skill packages published by `codex-skill 1. Scan the table below to find the skill that matches your task. 2. Open the package README in your preferred language. -3. Use the package references when you need examples or prompt wording. +3. Use the package reference pages for boundary notes now, and later for examples or prompt wording. ## Published Packages diff --git a/skills/README.zh-CN.md b/skills/README.zh-CN.md index 921c7a0..ba51cf7 100644 --- a/skills/README.zh-CN.md +++ b/skills/README.zh-CN.md @@ -8,7 +8,7 @@ 1. 先看下面的表格,找到与你任务最匹配的 skill。 2. 打开该包对应语言的 README。 -3. 需要示例、提示词或更细说明时,再继续看包内 `references/`。 +3. 现在先看包内参考页了解边界说明,后续阶段再继续使用其中补充的示例、提示词或更细说明。 ## 已发布包 diff --git a/skills/skill-context-keeper/README.md b/skills/skill-context-keeper/README.md index 80f81c6..3cc0a9e 100644 --- a/skills/skill-context-keeper/README.md +++ b/skills/skill-context-keeper/README.md @@ -38,5 +38,5 @@ Describe what looks stale or missing, then ask the skill to reconstruct the curr ## References - `SKILL.md` for trigger routing and package boundaries -- `references/` for future public examples and prompt patterns +- [references/README.md](references/README.md) for the package boundary and the planned reader-facing reference scope - `assets/` for future state snapshot and continuity note templates diff --git a/skills/skill-context-keeper/README.zh-CN.md b/skills/skill-context-keeper/README.zh-CN.md index 8d1c939..1573511 100644 --- a/skills/skill-context-keeper/README.zh-CN.md +++ b/skills/skill-context-keeper/README.zh-CN.md @@ -38,5 +38,5 @@ ## References - `SKILL.md`:触发路由与包边界 -- `references/`:后续面向读者的示例与提示词模式 +- [references/README.zh-CN.md](references/README.zh-CN.md):包边界说明,以及后续面向读者参考资料的范围 - `assets/`:后续的状态快照与连续性笔记模板 diff --git a/skills/skill-context-keeper/references/.gitkeep b/skills/skill-context-keeper/references/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-context-keeper/references/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-context-keeper/references/README.md b/skills/skill-context-keeper/references/README.md new file mode 100644 index 0000000..d6cd604 --- /dev/null +++ b/skills/skill-context-keeper/references/README.md @@ -0,0 +1,7 @@ +# References for skill-context-keeper + +This references area explains the public boundary of `skill-context-keeper`. +It is reserved for reader-facing examples, state refresh patterns, and continuity prompts that stay focused on reconstructing ongoing task state. + +Later phases will add concrete examples and reusable templates here. +For now, use the package [README.md](../README.md) for installation and entry guidance, and [SKILL.md](../SKILL.md) for the routing-first trigger summary. diff --git a/skills/skill-context-keeper/references/README.zh-CN.md b/skills/skill-context-keeper/references/README.zh-CN.md new file mode 100644 index 0000000..7b9c2a6 --- /dev/null +++ b/skills/skill-context-keeper/references/README.zh-CN.md @@ -0,0 +1,7 @@ +# skill-context-keeper 参考资料 + +这个 `references/` 目录用于说明 `skill-context-keeper` 的公开边界。 +后续这里会放面向读者的示例、状态刷新模式,以及专注于“重建当前任务状态”的连续性提示词资料。 + +更具体的示例会在后续阶段补充。 +目前请先阅读包内 [README.md](../README.md) 了解安装与入口说明,再结合 [SKILL.md](../SKILL.md) 查看路由优先的触发摘要。 diff --git a/skills/skill-handoff-summary/README.md b/skills/skill-handoff-summary/README.md index ef11374..0d5de02 100644 --- a/skills/skill-handoff-summary/README.md +++ b/skills/skill-handoff-summary/README.md @@ -38,5 +38,5 @@ Describe the current status, unresolved questions, blockers, and the very next a ## References - `SKILL.md` for trigger routing and package boundaries -- `references/` for future public examples and prompt patterns +- [references/README.md](references/README.md) for the package boundary and the planned reader-facing reference scope - `assets/` for future handoff, blocker, and next-step templates diff --git a/skills/skill-handoff-summary/README.zh-CN.md b/skills/skill-handoff-summary/README.zh-CN.md index 32430c8..b7afed4 100644 --- a/skills/skill-handoff-summary/README.zh-CN.md +++ b/skills/skill-handoff-summary/README.zh-CN.md @@ -38,5 +38,5 @@ ## References - `SKILL.md`:触发路由与包边界 -- `references/`:后续面向读者的示例与提示词模式 +- [references/README.zh-CN.md](references/README.zh-CN.md):包边界说明,以及后续面向读者参考资料的范围 - `assets/`:后续的交接说明、阻塞点和下一步模板 diff --git a/skills/skill-handoff-summary/references/.gitkeep b/skills/skill-handoff-summary/references/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-handoff-summary/references/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-handoff-summary/references/README.md b/skills/skill-handoff-summary/references/README.md new file mode 100644 index 0000000..035d5f3 --- /dev/null +++ b/skills/skill-handoff-summary/references/README.md @@ -0,0 +1,7 @@ +# References for skill-handoff-summary + +This references area explains the public boundary of `skill-handoff-summary`. +It is reserved for reader-facing examples, pause-note formats, and handoff patterns that capture status, blockers, and next steps without taking over task planning. + +Later phases will add concrete examples and reusable templates here. +For now, use the package [README.md](../README.md) for installation and entry guidance, and [SKILL.md](../SKILL.md) for the routing-first trigger summary. diff --git a/skills/skill-handoff-summary/references/README.zh-CN.md b/skills/skill-handoff-summary/references/README.zh-CN.md new file mode 100644 index 0000000..8cd209c --- /dev/null +++ b/skills/skill-handoff-summary/references/README.zh-CN.md @@ -0,0 +1,7 @@ +# skill-handoff-summary 参考资料 + +这个 `references/` 目录用于说明 `skill-handoff-summary` 的公开边界。 +后续这里会放面向读者的示例、暂停说明格式,以及围绕状态、阻塞点和下一步的交接模式资料,同时避免接管整个任务规划。 + +更具体的示例会在后续阶段补充。 +目前请先阅读包内 [README.md](../README.md) 了解安装与入口说明,再结合 [SKILL.md](../SKILL.md) 查看路由优先的触发摘要。 diff --git a/skills/skill-phase-gate/README.md b/skills/skill-phase-gate/README.md index 245a28e..c0a9b4b 100644 --- a/skills/skill-phase-gate/README.md +++ b/skills/skill-phase-gate/README.md @@ -38,5 +38,5 @@ Describe the multi-step goal, the points that need review or verification, and w ## References - `SKILL.md` for trigger routing and package boundaries -- `references/` for future public examples and prompt patterns +- [references/README.md](references/README.md) for the package boundary and the planned reader-facing reference scope - `assets/` for future phase-plan, checkpoint, and exit-criteria templates diff --git a/skills/skill-phase-gate/README.zh-CN.md b/skills/skill-phase-gate/README.zh-CN.md index 950c788..aaea419 100644 --- a/skills/skill-phase-gate/README.zh-CN.md +++ b/skills/skill-phase-gate/README.zh-CN.md @@ -38,5 +38,5 @@ ## References - `SKILL.md`:触发路由与包边界 -- `references/`:后续面向读者的示例与提示词模式 +- [references/README.zh-CN.md](references/README.zh-CN.md):包边界说明,以及后续面向读者参考资料的范围 - `assets/`:后续的阶段计划、检查点和退出条件模板 diff --git a/skills/skill-phase-gate/references/.gitkeep b/skills/skill-phase-gate/references/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-phase-gate/references/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-phase-gate/references/README.md b/skills/skill-phase-gate/references/README.md new file mode 100644 index 0000000..3d2baf4 --- /dev/null +++ b/skills/skill-phase-gate/references/README.md @@ -0,0 +1,7 @@ +# References for skill-phase-gate + +This references area explains the public boundary of `skill-phase-gate`. +It is reserved for reader-facing examples, checkpoint patterns, and phase-planning material that help stage multi-step work without turning into context refresh or handoff writing. + +Later phases will add concrete examples and reusable templates here. +For now, use the package [README.md](../README.md) for installation and entry guidance, and [SKILL.md](../SKILL.md) for the routing-first trigger summary. diff --git a/skills/skill-phase-gate/references/README.zh-CN.md b/skills/skill-phase-gate/references/README.zh-CN.md new file mode 100644 index 0000000..a0bd71e --- /dev/null +++ b/skills/skill-phase-gate/references/README.zh-CN.md @@ -0,0 +1,7 @@ +# skill-phase-gate 参考资料 + +这个 `references/` 目录用于说明 `skill-phase-gate` 的公开边界。 +后续这里会放面向读者的示例、检查点模式,以及帮助多步骤任务分阶段推进的资料,同时避免扩展成上下文刷新或交接说明。 + +更具体的示例会在后续阶段补充。 +目前请先阅读包内 [README.md](../README.md) 了解安装与入口说明,再结合 [SKILL.md](../SKILL.md) 查看路由优先的触发摘要。 diff --git a/skills/skill-task-continuity/README.md b/skills/skill-task-continuity/README.md index 821a793..dde33b9 100644 --- a/skills/skill-task-continuity/README.md +++ b/skills/skill-task-continuity/README.md @@ -38,5 +38,5 @@ Describe which continuity problems are in play, which atomic packages are expect ## References - `SKILL.md` for trigger routing and package boundaries -- `references/` for future public examples and prompt patterns +- [references/README.md](references/README.md) for the package boundary and the planned reader-facing reference scope - `assets/` for future suite bootstrap and coordination templates diff --git a/skills/skill-task-continuity/README.zh-CN.md b/skills/skill-task-continuity/README.zh-CN.md index bed9935..b46ddf9 100644 --- a/skills/skill-task-continuity/README.zh-CN.md +++ b/skills/skill-task-continuity/README.zh-CN.md @@ -38,5 +38,5 @@ ## References - `SKILL.md`:触发路由与包边界 -- `references/`:后续面向读者的示例与提示词模式 +- [references/README.zh-CN.md](references/README.zh-CN.md):包边界说明,以及后续面向读者参考资料的范围 - `assets/`:后续的套件启动与协调模板 diff --git a/skills/skill-task-continuity/references/.gitkeep b/skills/skill-task-continuity/references/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-task-continuity/references/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-task-continuity/references/README.md b/skills/skill-task-continuity/references/README.md new file mode 100644 index 0000000..67f99ff --- /dev/null +++ b/skills/skill-task-continuity/references/README.md @@ -0,0 +1,7 @@ +# References for skill-task-continuity + +This references area explains the public boundary of `skill-task-continuity`. +It is reserved for reader-facing suite-bootstrap examples, coordination patterns, and boundary guidance for requests that truly span context refresh, phase gates, and handoff concerns. + +Later phases will add concrete examples and reusable templates here. +For now, use the package [README.md](../README.md) for installation and entry guidance, and [SKILL.md](../SKILL.md) for the routing-first trigger summary. diff --git a/skills/skill-task-continuity/references/README.zh-CN.md b/skills/skill-task-continuity/references/README.zh-CN.md new file mode 100644 index 0000000..95468d4 --- /dev/null +++ b/skills/skill-task-continuity/references/README.zh-CN.md @@ -0,0 +1,7 @@ +# skill-task-continuity 参考资料 + +这个 `references/` 目录用于说明 `skill-task-continuity` 的公开边界。 +后续这里会放面向读者的套件启动示例、协调模式,以及那些确实同时涉及上下文刷新、阶段门控和交接问题时的边界说明。 + +更具体的示例会在后续阶段补充。 +目前请先阅读包内 [README.md](../README.md) 了解安装与入口说明,再结合 [SKILL.md](../SKILL.md) 查看路由优先的触发摘要。 From 48a4a7f3d82cab231d78035d90ac20194fe3c69a Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 13:55:44 +0800 Subject: [PATCH 06/31] test: add skill-context-keeper contract checks --- .../tests/test_package_contract.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 skills/skill-context-keeper/tests/test_package_contract.py diff --git a/skills/skill-context-keeper/tests/test_package_contract.py b/skills/skill-context-keeper/tests/test_package_contract.py new file mode 100644 index 0000000..60964ce --- /dev/null +++ b/skills/skill-context-keeper/tests/test_package_contract.py @@ -0,0 +1,49 @@ +from pathlib import Path +import unittest + + +ROOT = Path(__file__).resolve().parents[1] + + +class ContextKeeperPackageTests(unittest.TestCase): + def test_skill_frontmatter_name(self): + text = (ROOT / "SKILL.md").read_text(encoding="utf-8") + self.assertIn("name: skill-context-keeper", text) + + def test_readmes_exist(self): + self.assertTrue((ROOT / "README.md").exists()) + self.assertTrue((ROOT / "README.zh-CN.md").exists()) + + def test_task_state_template_has_required_sections(self): + text = (ROOT / "assets" / "TASK_STATE.template.md").read_text(encoding="utf-8") + for heading in [ + "## Current Objective", + "## Scope / Non-Goals", + "## Hard Constraints", + "## Current Codebase Facts", + "## Completed Work", + "## Open Issues / Risks", + "## Next Recommended Action", + "## Verification Still Needed", + "## Recent Decisions", + "## Resume Checklist", + ]: + self.assertIn(heading, text) + + def test_reference_guides_include_trigger_sections(self): + for relative_path in [ + ROOT / "references" / "use-cases.md", + ROOT / "references" / "use-cases.zh-CN.md", + ]: + text = relative_path.read_text(encoding="utf-8") + self.assertRegex(text, r"(?im)^## .*positive trigger") + self.assertRegex(text, r"(?im)^## .*negative trigger") + + def test_readme_spells_out_package_boundary(self): + text = (ROOT / "README.md").read_text(encoding="utf-8") + self.assertIn("does not own workflow gating", text) + self.assertIn("does not own final handoffs", text) + + +if __name__ == "__main__": + unittest.main() From a28c2c416db5fdae6b1b106d5e2e7161a2102ac5 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 13:55:59 +0800 Subject: [PATCH 07/31] feat: add skill-context-keeper package --- skills/skill-context-keeper/README.md | 23 +++++++++- skills/skill-context-keeper/README.zh-CN.md | 23 +++++++++- skills/skill-context-keeper/SKILL.md | 29 +++++++++--- .../skill-context-keeper/agents/openai.yaml | 4 +- skills/skill-context-keeper/assets/.gitkeep | 1 - .../assets/TASK_STATE.template.md | 41 +++++++++++++++++ .../references/prompt-templates.en.md | 44 ++++++++++++++++++ .../references/prompt-templates.zh-CN.md | 43 +++++++++++++++++ .../references/use-cases.md | 46 +++++++++++++++++++ .../references/use-cases.zh-CN.md | 46 +++++++++++++++++++ skills/skill-context-keeper/tests/.gitkeep | 1 - 11 files changed, 287 insertions(+), 14 deletions(-) delete mode 100644 skills/skill-context-keeper/assets/.gitkeep create mode 100644 skills/skill-context-keeper/assets/TASK_STATE.template.md create mode 100644 skills/skill-context-keeper/references/prompt-templates.en.md create mode 100644 skills/skill-context-keeper/references/prompt-templates.zh-CN.md create mode 100644 skills/skill-context-keeper/references/use-cases.md create mode 100644 skills/skill-context-keeper/references/use-cases.zh-CN.md delete mode 100644 skills/skill-context-keeper/tests/.gitkeep diff --git a/skills/skill-context-keeper/README.md b/skills/skill-context-keeper/README.md index 3cc0a9e..4f9ec40 100644 --- a/skills/skill-context-keeper/README.md +++ b/skills/skill-context-keeper/README.md @@ -6,6 +6,8 @@ `skill-context-keeper` is the narrow package for recovering and refreshing structured task state during a long coding thread. It helps the next turn start from the best known picture of the work without expanding into phase control or end-of-task handoff writing. +This package maintains structured long-task state only. +It assumes downstream state files can live at paths such as `.agent-state/TASK_STATE.md`, but it does not own workflow gating and it does not own final handoffs. ## Best For @@ -21,6 +23,19 @@ It helps the next turn start from the best known picture of the work without exp - writing a final pause or transfer handoff for another agent - bootstrapping the full long-task continuity suite +## Package Boundary + +Use this package when the task needs a current-state refresh. +It is responsible for reconstructing verified codebase facts, preserving open issues, and updating a compact task-state artifact for downstream work. + +Keep the boundary narrow: + +- maintain structured state for an ongoing task +- refresh or rewrite artifacts such as `.agent-state/TASK_STATE.md` +- separate facts, assumptions, and decisions clearly + +This package does not run phase gates, does not own workflow gating, and does not own final handoffs. + ## Install Install `skill-context-keeper` from this repository with the standard package path for published Codex skills. @@ -34,9 +49,13 @@ You can ask Codex in natural language: Start when the task needs a reliable state refresh before execution continues. Describe what looks stale or missing, then ask the skill to reconstruct the current task picture, carry forward unresolved work, and keep the summary narrow to ongoing state. +If you want a concrete downstream artifact, say so explicitly, for example: `Refresh the current task state and update .agent-state/TASK_STATE.md.` ## References - `SKILL.md` for trigger routing and package boundaries -- [references/README.md](references/README.md) for the package boundary and the planned reader-facing reference scope -- `assets/` for future state snapshot and continuity note templates +- [references/use-cases.md](references/use-cases.md) for positive and negative trigger examples +- [references/use-cases.zh-CN.md](references/use-cases.zh-CN.md) for Chinese trigger examples +- [references/prompt-templates.en.md](references/prompt-templates.en.md) for reusable refresh prompts +- [references/prompt-templates.zh-CN.md](references/prompt-templates.zh-CN.md) for Chinese refresh prompts +- [assets/TASK_STATE.template.md](assets/TASK_STATE.template.md) for the compact task-state artifact template diff --git a/skills/skill-context-keeper/README.zh-CN.md b/skills/skill-context-keeper/README.zh-CN.md index 1573511..bf06d9e 100644 --- a/skills/skill-context-keeper/README.zh-CN.md +++ b/skills/skill-context-keeper/README.zh-CN.md @@ -6,6 +6,8 @@ `skill-context-keeper` 是一个边界明确的包,专门用于在长编码线程里恢复和刷新结构化任务状态。 它帮助下一轮从当前最可信的工作图景继续,而不会扩展到阶段门控或最终交接说明。 +这个包只负责维护结构化长任务状态。 +它默认下游状态文件可能位于 `.agent-state/TASK_STATE.md` 之类的路径,但不负责流程门控,也不负责最终交接。 ## Best For @@ -21,6 +23,19 @@ - 为另一个执行者撰写暂停或转交说明 - 启动整套长任务连续性套件 +## Package Boundary + +当任务需要刷新“当前状态”时,就应该使用这个包。 +它负责重建已验证的代码库事实、保留未解决问题,并为下游继续执行更新一个紧凑的任务状态产物。 + +边界应保持收敛: + +- 维护进行中任务的结构化状态 +- 刷新或重写 `.agent-state/TASK_STATE.md` 这类产物 +- 明确区分事实、假设和决策 + +这个包不运行阶段门控,不负责流程门控,也不负责最终交接。 + ## Install 可通过本仓库中的标准发布路径安装 `skill-context-keeper`。 @@ -34,9 +49,13 @@ 当任务在继续执行前需要一次可靠的状态刷新时,就从这个包开始。 说明哪些摘要、待办或上下文已经过时,然后让它重建当前任务图景、延续未完成工作,并把输出限制在“当前状态”而不是流程规划或交接总结。 +如果你希望产出明确落到某个文件,可以直接说:`请刷新当前任务状态,并更新 .agent-state/TASK_STATE.md。` ## References - `SKILL.md`:触发路由与包边界 -- [references/README.zh-CN.md](references/README.zh-CN.md):包边界说明,以及后续面向读者参考资料的范围 -- `assets/`:后续的状态快照与连续性笔记模板 +- [references/use-cases.md](references/use-cases.md):正向与反向触发示例 +- [references/use-cases.zh-CN.md](references/use-cases.zh-CN.md):中文触发示例 +- [references/prompt-templates.en.md](references/prompt-templates.en.md):英文状态刷新提示词模板 +- [references/prompt-templates.zh-CN.md](references/prompt-templates.zh-CN.md):中文状态刷新提示词模板 +- [assets/TASK_STATE.template.md](assets/TASK_STATE.template.md):紧凑任务状态模板 diff --git a/skills/skill-context-keeper/SKILL.md b/skills/skill-context-keeper/SKILL.md index 0884478..5bc4d00 100644 --- a/skills/skill-context-keeper/SKILL.md +++ b/skills/skill-context-keeper/SKILL.md @@ -7,18 +7,35 @@ description: Use when the user needs to preserve or refresh structured long-task ## Overview -Preserve and refresh working state for long-running coding tasks. -Use this skill when the thread needs trustworthy context reconstruction, not phase planning or final transfer notes. +Maintain structured working state for long-running coding tasks. +Use this skill when the thread needs trustworthy context reconstruction or a refreshed task-state artifact such as `.agent-state/TASK_STATE.md`, not phase planning or final transfer notes. ## Use This Skill When - resuming a task after an interruption, stale summary, or context loss - rebuilding the last known task state before new work continues -- refreshing TODOs, decisions, or working assumptions for an ongoing task +- refreshing TODOs, verified facts, decisions, or working assumptions for an ongoing task - reconciling what changed since the last stable checkpoint +- updating a downstream state file such as `.agent-state/TASK_STATE.md` + +## Do Not Use This Skill When + +- the user needs workflow phases, checkpoints, or exit criteria +- the task is about deciding whether work should be gated before implementation +- the thread needs a final pause summary or transfer handoff +- the request is asking one package to coordinate the full continuity suite + +## Core Rules + +1. Keep the output focused on current task state, not future workflow control. +2. Distinguish verified facts from assumptions and from decisions already made. +3. Assume downstream path examples such as `.agent-state/TASK_STATE.md` unless the user specifies another target. +4. Do not run phase gates. +5. Do not generate final handoffs. ## References -- `README.md` and `README.zh-CN.md`: package overview and usage guidance -- `references/`: future public examples for state refresh and continuity prompts -- `assets/`: future state snapshot templates and continuity note templates +- `README.md` and `README.zh-CN.md`: package overview, installation, and boundary guidance +- `references/use-cases.md` and `references/use-cases.zh-CN.md`: reader-facing trigger examples +- `references/prompt-templates.en.md` and `references/prompt-templates.zh-CN.md`: reusable refresh prompts +- `assets/TASK_STATE.template.md`: compact task-state artifact template diff --git a/skills/skill-context-keeper/agents/openai.yaml b/skills/skill-context-keeper/agents/openai.yaml index e719af9..d8f3cbe 100644 --- a/skills/skill-context-keeper/agents/openai.yaml +++ b/skills/skill-context-keeper/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "Skill Context Keeper" - short_description: "Refresh and reconstruct long-task state without taking over phase control or handoff writing" - default_prompt: "Use $skill-context-keeper when the task needs a reliable long-thread state refresh: reconstruct the current context, carry forward unresolved work, and keep the output focused on ongoing task state rather than phase gates or final handoff notes." + short_description: "Maintain structured long-task state without taking over workflow gates or final handoffs" + default_prompt: "Use $skill-context-keeper when the task needs a reliable long-thread state refresh: reconstruct the current context, keep facts separate from assumptions and decisions, update a downstream artifact such as .agent-state/TASK_STATE.md when requested, and keep the output focused on ongoing task state rather than phase gates or final handoff notes." diff --git a/skills/skill-context-keeper/assets/.gitkeep b/skills/skill-context-keeper/assets/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-context-keeper/assets/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-context-keeper/assets/TASK_STATE.template.md b/skills/skill-context-keeper/assets/TASK_STATE.template.md new file mode 100644 index 0000000..0362343 --- /dev/null +++ b/skills/skill-context-keeper/assets/TASK_STATE.template.md @@ -0,0 +1,41 @@ +# Task State + +## Current Objective +- State the active task in one sentence. +- Mark assumptions as assumptions, not facts. + +## Scope / Non-Goals +- List what this refresh covers. +- Call out work that belongs to phase gates or final handoffs elsewhere. + +## Hard Constraints +- Capture fixed requirements, approvals, and safety limits. +- Separate must-follow constraints from preferences. + +## Current Codebase Facts +- Record verified repository facts only. +- Note the source if a fact came from tests, files, or commands. + +## Completed Work +- Summarize what is already done and still valid. +- Keep each item specific enough to avoid rework. + +## Open Issues / Risks +- List blockers, uncertainties, and fragile areas. +- Label each item as a fact, assumption, or risk. + +## Next Recommended Action +- Name the next concrete step for the downstream agent. +- Keep it narrow and immediately actionable. + +## Verification Still Needed +- List tests, checks, or reviews that have not run yet. +- Distinguish missing verification from failed verification. + +## Recent Decisions +- Capture decisions with a short reason. +- Separate decisions from open options. + +## Resume Checklist +- Give a short restart checklist in execution order. +- End with the first command, file, or prompt to use next. diff --git a/skills/skill-context-keeper/references/prompt-templates.en.md b/skills/skill-context-keeper/references/prompt-templates.en.md new file mode 100644 index 0000000..f75d781 --- /dev/null +++ b/skills/skill-context-keeper/references/prompt-templates.en.md @@ -0,0 +1,44 @@ +# skill-context-keeper Prompt Templates + +Use these templates when you want `skill-context-keeper` to refresh structured long-task state without drifting into workflow gating or final handoffs. + +## Positive Trigger Prompts + +- `Use skill-context-keeper to refresh the current task state from the repository before we continue.` +- `Reconcile the existing summary with the codebase and rewrite .agent-state/TASK_STATE.md.` +- `Refresh the current task state, keep facts separate from assumptions, and list the next recommended action.` + +## Negative Trigger Prompts + +- `Decide the project phases and gate the work before implementation starts.` +- `Generate the final handoff note for the next agent and close out the thread.` +- `Handle planning, state refresh, and the final transfer as one combined workflow.` + +## Refresh the Current Task State + +Use this phrasing when you want a straightforward state refresh: + +```text +Use skill-context-keeper to refresh the current task state. +Check the repository, keep verified facts separate from assumptions and decisions, +update .agent-state/TASK_STATE.md, and end with the next recommended action plus verification still needed. +Do not add workflow gates or a final handoff. +``` + +## Compact Resume Prompt + +```text +Refresh the current task state for this long-running coding task. +Assume the downstream artifact lives at .agent-state/TASK_STATE.md. +Capture current objective, scope, hard constraints, verified codebase facts, +completed work, open risks, recent decisions, and the next recommended action. +Keep the package boundary narrow: state maintenance only. +``` + +## Facts, Assumptions, and Decisions Prompt + +```text +Rebuild the task state and explicitly label each item as Fact, Assumption, or Decision. +Use repository evidence for facts, keep assumptions short, and record only decisions that are already made. +Write the refreshed state to .agent-state/TASK_STATE.md without generating a phase gate or final handoff. +``` diff --git a/skills/skill-context-keeper/references/prompt-templates.zh-CN.md b/skills/skill-context-keeper/references/prompt-templates.zh-CN.md new file mode 100644 index 0000000..47a2084 --- /dev/null +++ b/skills/skill-context-keeper/references/prompt-templates.zh-CN.md @@ -0,0 +1,43 @@ +# skill-context-keeper 提示词模板 + +当你希望 `skill-context-keeper` 刷新结构化长任务状态,而且不要偏离到流程门控或最终交接时,可直接复用下面这些模板。 + +## Positive Trigger Prompts + +- `请使用 skill-context-keeper 在继续之前根据仓库刷新当前任务状态。` +- `请把现有摘要与代码库现状对齐,并重写 .agent-state/TASK_STATE.md。` +- `刷新当前任务状态,明确区分事实、假设和决策,并给出推荐的下一步动作。` + +## Negative Trigger Prompts + +- `先决定项目阶段,并在实现前设置阶段门。` +- `为下一位代理生成最终交接说明,并结束这个线程。` +- `把规划、状态刷新和最终转交流程合并成一个整体工作流来处理。` + +## 刷新当前任务状态 + +当你想直接表达“刷新状态”时,可以这样说: + +```text +请使用 skill-context-keeper 刷新当前任务状态。 +检查仓库,把已验证事实与假设、决策分开, +更新 .agent-state/TASK_STATE.md,并在结尾写出推荐的下一步动作和仍需完成的验证。 +不要加入流程门控,也不要生成最终交接。 +``` + +## 紧凑续做模板 + +```text +请刷新这个长编码任务的当前状态。 +假设下游状态文件路径是 .agent-state/TASK_STATE.md。 +记录当前目标、范围、硬约束、已验证代码库事实、已完成工作、开放风险、最近决策,以及推荐的下一步动作。 +保持包边界收敛:只做状态维护。 +``` + +## 事实 / 假设 / 决策模板 + +```text +请重建任务状态,并把每一项明确标注为 Fact、Assumption 或 Decision。 +事实必须来自仓库证据,假设保持简短,只记录已经做出的决策。 +将刷新后的状态写入 .agent-state/TASK_STATE.md,不要生成阶段门控或最终交接。 +``` diff --git a/skills/skill-context-keeper/references/use-cases.md b/skills/skill-context-keeper/references/use-cases.md new file mode 100644 index 0000000..acab71e --- /dev/null +++ b/skills/skill-context-keeper/references/use-cases.md @@ -0,0 +1,46 @@ +# skill-context-keeper Use Cases + +`skill-context-keeper` is for maintaining structured long-task state while work is still in progress. +Use it when the thread needs a reliable refresh of the current task picture, not when it needs staged workflow control or a final handoff. + +## Positive Trigger Prompts + +- `Use skill-context-keeper to refresh the current task state before we continue coding.` +- `Rebuild the current task picture from the repo and update .agent-state/TASK_STATE.md.` +- `The summary is stale. Reconstruct facts, open risks, and the next action for this task.` +- `Refresh the current task state and carry forward unresolved work without making a handoff note.` +- `Before we continue, update the structured state file at .agent-state/TASK_STATE.md with verified codebase facts.` + +## Negative Trigger Prompts + +- `Split this migration into phases with checkpoints before we start implementation.` +- `Write the final handoff summary for the next agent taking over this task.` +- `Create a release checklist and decide which phase gate we need to pass next.` +- `Prepare the final user-facing completion note and wrap up the task.` +- `Coordinate the whole continuity suite for planning, state refresh, and handoff generation.` + +## Refresh Wording Patterns + +Use wording like this when the request is specifically about refreshing state: + +- `Refresh the current task state.` +- `Update the working state snapshot for this task.` +- `Reconcile the current summary with the repository and rewrite the task state file.` +- `Bring .agent-state/TASK_STATE.md up to date before more implementation.` + +## Facts vs Assumptions vs Decisions Example + +Example for `.agent-state/TASK_STATE.md`: + +- Fact: `tests/test_package_contract.py` exists and currently checks for trigger sections in both use-case references. +- Assumption: The next agent will continue using `.agent-state/TASK_STATE.md` as the downstream path because the package examples point there. +- Decision: Keep this package focused on state maintenance only, so phase gates and final handoffs stay in sibling packages. + +## Typical Output Shape + +The package usually refreshes a compact state artifact such as `.agent-state/TASK_STATE.md` with: + +- verified facts from the codebase +- unresolved risks and open questions +- the next recommended action +- verification still needed before claiming completion diff --git a/skills/skill-context-keeper/references/use-cases.zh-CN.md b/skills/skill-context-keeper/references/use-cases.zh-CN.md new file mode 100644 index 0000000..950dcd2 --- /dev/null +++ b/skills/skill-context-keeper/references/use-cases.zh-CN.md @@ -0,0 +1,46 @@ +# skill-context-keeper 使用场景 + +`skill-context-keeper` 只负责维护长任务进行中的结构化状态。 +当线程需要可靠地刷新“当前任务图景”时使用它;如果需求是阶段门控或最终交接,就不该由这个包接管。 + +## Positive Trigger Prompts + +- `请使用 skill-context-keeper 刷新当前任务状态,然后我们再继续编码。` +- `根据仓库现状重建当前任务图景,并更新 .agent-state/TASK_STATE.md。` +- `现在的摘要已经过时了,请重建事实、未决风险和下一步动作。` +- `刷新当前任务状态,并延续未完成工作,但不要写成交接说明。` +- `继续实现前,请先把 .agent-state/TASK_STATE.md 更新为包含已验证代码库事实的版本。` + +## Negative Trigger Prompts + +- `在开始实现前,把这次迁移拆成带检查点的阶段计划。` +- `为下一位接手任务的代理写一份最终交接摘要。` +- `创建 release 检查清单,并决定下一个要通过的阶段门。` +- `准备最终给用户的完成说明,并结束这个任务。` +- `把规划、状态刷新和交接生成都作为一套连续性流程来协调。` + +## Refresh Wording Patterns + +如果请求的重点就是“刷新状态”,可以直接这样表达: + +- `刷新当前任务状态。` +- `更新这个任务的工作状态快照。` +- `根据仓库现状对齐当前摘要,并重写任务状态文件。` +- `在继续实现前,把 .agent-state/TASK_STATE.md 刷新到最新。` + +## Facts vs Assumptions vs Decisions Example + +以 `.agent-state/TASK_STATE.md` 为例: + +- Fact:`tests/test_package_contract.py` 已存在,并会检查两份 use-case 参考文档里都有触发与非触发章节。 +- Assumption:由于包内示例路径使用 `.agent-state/TASK_STATE.md`,下游代理会继续沿用这个状态文件位置。 +- Decision:这个包只负责状态维护,因此阶段门控和最终交接继续留给同套件中的兄弟包处理。 + +## Typical Output Shape + +这个包通常会刷新一个紧凑的状态产物,例如 `.agent-state/TASK_STATE.md`,内容包括: + +- 来自代码库的已验证事实 +- 未解决风险与开放问题 +- 推荐的下一步动作 +- 在宣告完成前仍需补做的验证 diff --git a/skills/skill-context-keeper/tests/.gitkeep b/skills/skill-context-keeper/tests/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-context-keeper/tests/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - From f29454aee4fdb541f36ecacd837817b69a0e17f3 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 14:00:17 +0800 Subject: [PATCH 08/31] fix: tighten skill-context-keeper package docs --- .../skill-context-keeper/references/README.md | 12 ++++++-- .../references/README.zh-CN.md | 12 ++++++-- .../references/use-cases.zh-CN.md | 10 +++---- .../tests/test_package_contract.py | 30 +++++++++++++------ 4 files changed, 44 insertions(+), 20 deletions(-) diff --git a/skills/skill-context-keeper/references/README.md b/skills/skill-context-keeper/references/README.md index d6cd604..bd04b0c 100644 --- a/skills/skill-context-keeper/references/README.md +++ b/skills/skill-context-keeper/references/README.md @@ -1,7 +1,13 @@ # References for skill-context-keeper This references area explains the public boundary of `skill-context-keeper`. -It is reserved for reader-facing examples, state refresh patterns, and continuity prompts that stay focused on reconstructing ongoing task state. +It contains reader-facing examples, state refresh patterns, and continuity prompts that stay focused on reconstructing ongoing task state. -Later phases will add concrete examples and reusable templates here. -For now, use the package [README.md](../README.md) for installation and entry guidance, and [SKILL.md](../SKILL.md) for the routing-first trigger summary. +Available guides in this directory: + +- [use-cases.md](use-cases.md) for positive and negative trigger examples +- [use-cases.zh-CN.md](use-cases.zh-CN.md) for Chinese trigger examples +- [prompt-templates.en.md](prompt-templates.en.md) for English state-refresh prompts +- [prompt-templates.zh-CN.md](prompt-templates.zh-CN.md) for Chinese state-refresh prompts + +Use the package [README.md](../README.md) for installation and entry guidance, and [SKILL.md](../SKILL.md) for the routing-first trigger summary. diff --git a/skills/skill-context-keeper/references/README.zh-CN.md b/skills/skill-context-keeper/references/README.zh-CN.md index 7b9c2a6..1e32db1 100644 --- a/skills/skill-context-keeper/references/README.zh-CN.md +++ b/skills/skill-context-keeper/references/README.zh-CN.md @@ -1,7 +1,13 @@ # skill-context-keeper 参考资料 这个 `references/` 目录用于说明 `skill-context-keeper` 的公开边界。 -后续这里会放面向读者的示例、状态刷新模式,以及专注于“重建当前任务状态”的连续性提示词资料。 +这里已经提供面向读者的示例、状态刷新模式,以及专注于“重建当前任务状态”的连续性提示词资料。 -更具体的示例会在后续阶段补充。 -目前请先阅读包内 [README.md](../README.md) 了解安装与入口说明,再结合 [SKILL.md](../SKILL.md) 查看路由优先的触发摘要。 +当前可用资料包括: + +- [use-cases.md](use-cases.md):正向与反向触发示例 +- [use-cases.zh-CN.md](use-cases.zh-CN.md):中文触发示例 +- [prompt-templates.en.md](prompt-templates.en.md):英文状态刷新提示词模板 +- [prompt-templates.zh-CN.md](prompt-templates.zh-CN.md):中文状态刷新提示词模板 + +如需安装与入口说明,请阅读包内 [README.md](../README.md);如需查看路由优先的触发摘要,请结合 [SKILL.md](../SKILL.md)。 diff --git a/skills/skill-context-keeper/references/use-cases.zh-CN.md b/skills/skill-context-keeper/references/use-cases.zh-CN.md index 950dcd2..7d2240e 100644 --- a/skills/skill-context-keeper/references/use-cases.zh-CN.md +++ b/skills/skill-context-keeper/references/use-cases.zh-CN.md @@ -3,7 +3,7 @@ `skill-context-keeper` 只负责维护长任务进行中的结构化状态。 当线程需要可靠地刷新“当前任务图景”时使用它;如果需求是阶段门控或最终交接,就不该由这个包接管。 -## Positive Trigger Prompts +## 适用触发示例 - `请使用 skill-context-keeper 刷新当前任务状态,然后我们再继续编码。` - `根据仓库现状重建当前任务图景,并更新 .agent-state/TASK_STATE.md。` @@ -11,7 +11,7 @@ - `刷新当前任务状态,并延续未完成工作,但不要写成交接说明。` - `继续实现前,请先把 .agent-state/TASK_STATE.md 更新为包含已验证代码库事实的版本。` -## Negative Trigger Prompts +## 不适用触发示例 - `在开始实现前,把这次迁移拆成带检查点的阶段计划。` - `为下一位接手任务的代理写一份最终交接摘要。` @@ -19,7 +19,7 @@ - `准备最终给用户的完成说明,并结束这个任务。` - `把规划、状态刷新和交接生成都作为一套连续性流程来协调。` -## Refresh Wording Patterns +## “刷新当前任务状态”表达方式 如果请求的重点就是“刷新状态”,可以直接这样表达: @@ -28,7 +28,7 @@ - `根据仓库现状对齐当前摘要,并重写任务状态文件。` - `在继续实现前,把 .agent-state/TASK_STATE.md 刷新到最新。` -## Facts vs Assumptions vs Decisions Example +## 事实 / 假设 / 决策示例 以 `.agent-state/TASK_STATE.md` 为例: @@ -36,7 +36,7 @@ - Assumption:由于包内示例路径使用 `.agent-state/TASK_STATE.md`,下游代理会继续沿用这个状态文件位置。 - Decision:这个包只负责状态维护,因此阶段门控和最终交接继续留给同套件中的兄弟包处理。 -## Typical Output Shape +## 常见输出形态 这个包通常会刷新一个紧凑的状态产物,例如 `.agent-state/TASK_STATE.md`,内容包括: diff --git a/skills/skill-context-keeper/tests/test_package_contract.py b/skills/skill-context-keeper/tests/test_package_contract.py index 60964ce..c0b4bb2 100644 --- a/skills/skill-context-keeper/tests/test_package_contract.py +++ b/skills/skill-context-keeper/tests/test_package_contract.py @@ -10,9 +10,12 @@ def test_skill_frontmatter_name(self): text = (ROOT / "SKILL.md").read_text(encoding="utf-8") self.assertIn("name: skill-context-keeper", text) - def test_readmes_exist(self): + def test_core_package_files_exist(self): self.assertTrue((ROOT / "README.md").exists()) self.assertTrue((ROOT / "README.zh-CN.md").exists()) + self.assertTrue((ROOT / "agents" / "openai.yaml").exists()) + self.assertTrue((ROOT / "references" / "prompt-templates.en.md").exists()) + self.assertTrue((ROOT / "references" / "prompt-templates.zh-CN.md").exists()) def test_task_state_template_has_required_sections(self): text = (ROOT / "assets" / "TASK_STATE.template.md").read_text(encoding="utf-8") @@ -30,20 +33,29 @@ def test_task_state_template_has_required_sections(self): ]: self.assertIn(heading, text) - def test_reference_guides_include_trigger_sections(self): - for relative_path in [ - ROOT / "references" / "use-cases.md", - ROOT / "references" / "use-cases.zh-CN.md", - ]: - text = relative_path.read_text(encoding="utf-8") - self.assertRegex(text, r"(?im)^## .*positive trigger") - self.assertRegex(text, r"(?im)^## .*negative trigger") + def test_english_use_cases_include_trigger_sections(self): + text = (ROOT / "references" / "use-cases.md").read_text(encoding="utf-8") + self.assertRegex(text, r"(?im)^## +Positive Trigger Prompts$") + self.assertRegex(text, r"(?im)^## +Negative Trigger Prompts$") + + def test_chinese_use_cases_include_trigger_sections(self): + text = (ROOT / "references" / "use-cases.zh-CN.md").read_text(encoding="utf-8") + self.assertRegex(text, r"(?m)^## +适用触发示例$") + self.assertRegex(text, r"(?m)^## +不适用触发示例$") def test_readme_spells_out_package_boundary(self): text = (ROOT / "README.md").read_text(encoding="utf-8") self.assertIn("does not own workflow gating", text) self.assertIn("does not own final handoffs", text) + def test_reference_indexes_point_to_published_files(self): + english = (ROOT / "references" / "README.md").read_text(encoding="utf-8") + chinese = (ROOT / "references" / "README.zh-CN.md").read_text(encoding="utf-8") + + for text in [english, chinese]: + self.assertIn("use-cases", text) + self.assertIn("prompt-templates", text) + if __name__ == "__main__": unittest.main() From 62ffd7c541d9e377ff0ddfec69e3f319a16a070d Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 14:10:08 +0800 Subject: [PATCH 09/31] test: add skill-phase-gate contract checks --- .../tests/test_package_contract.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 skills/skill-phase-gate/tests/test_package_contract.py diff --git a/skills/skill-phase-gate/tests/test_package_contract.py b/skills/skill-phase-gate/tests/test_package_contract.py new file mode 100644 index 0000000..0935f87 --- /dev/null +++ b/skills/skill-phase-gate/tests/test_package_contract.py @@ -0,0 +1,41 @@ +from pathlib import Path +import unittest + + +ROOT = Path(__file__).resolve().parents[1] + + +class SkillPhaseGatePackageTests(unittest.TestCase): + def test_checklist_assets_exist(self): + self.assertTrue((ROOT / "assets" / "PREFLIGHT.template.md").exists()) + self.assertTrue((ROOT / "assets" / "POSTFLIGHT.template.md").exists()) + + def test_preflight_template_has_required_sections(self): + text = (ROOT / "assets" / "PREFLIGHT.template.md").read_text(encoding="utf-8") + for heading in [ + "## Current Goal", + "## Current Constraints", + "## Expected Files / Modules To Change", + "## Files / Modules Explicitly Not Changing", + "## Verification Plan", + ]: + self.assertIn(heading, text) + + def test_postflight_template_has_required_sections(self): + text = (ROOT / "assets" / "POSTFLIGHT.template.md").read_text(encoding="utf-8") + for heading in [ + "## Actual Files / Modules Changed", + "## Actual Validations Run", + "## Remaining Risks", + "## Handoff Recommended?", + ]: + self.assertIn(heading, text) + + def test_readme_rules_out_trivial_or_explanation_only_usage(self): + text = (ROOT / "README.md").read_text(encoding="utf-8").lower() + self.assertIn("not for trivial one-line edits", text) + self.assertIn("not for pure explanation tasks", text) + + +if __name__ == "__main__": + unittest.main() From 553b3d07dbc4a9ad140b5e77306a2f5a323112be Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 14:10:20 +0800 Subject: [PATCH 10/31] feat: add skill-phase-gate package --- skills/skill-phase-gate/README.md | 35 ++++++++++---- skills/skill-phase-gate/README.zh-CN.md | 35 ++++++++++---- skills/skill-phase-gate/SKILL.md | 42 ++++++++++++---- skills/skill-phase-gate/agents/openai.yaml | 4 +- skills/skill-phase-gate/assets/.gitkeep | 1 - .../assets/POSTFLIGHT.template.md | 14 ++++++ .../assets/PREFLIGHT.template.md | 16 +++++++ skills/skill-phase-gate/references/README.md | 10 ++-- .../references/README.zh-CN.md | 10 ++-- .../references/prompt-templates.en.md | 42 ++++++++++++++++ .../references/prompt-templates.zh-CN.md | 42 ++++++++++++++++ .../skill-phase-gate/references/use-cases.md | 48 +++++++++++++++++++ .../references/use-cases.zh-CN.md | 48 +++++++++++++++++++ skills/skill-phase-gate/tests/.gitkeep | 1 - 14 files changed, 308 insertions(+), 40 deletions(-) delete mode 100644 skills/skill-phase-gate/assets/.gitkeep create mode 100644 skills/skill-phase-gate/assets/POSTFLIGHT.template.md create mode 100644 skills/skill-phase-gate/assets/PREFLIGHT.template.md create mode 100644 skills/skill-phase-gate/references/prompt-templates.en.md create mode 100644 skills/skill-phase-gate/references/prompt-templates.zh-CN.md create mode 100644 skills/skill-phase-gate/references/use-cases.md create mode 100644 skills/skill-phase-gate/references/use-cases.zh-CN.md delete mode 100644 skills/skill-phase-gate/tests/.gitkeep diff --git a/skills/skill-phase-gate/README.md b/skills/skill-phase-gate/README.md index c0a9b4b..1650f1b 100644 --- a/skills/skill-phase-gate/README.md +++ b/skills/skill-phase-gate/README.md @@ -4,21 +4,31 @@ ## Overview -`skill-phase-gate` is the package for deciding when a coding task needs explicit phases, checkpoints, and exit criteria before execution continues. -It keeps staged work intentional so a complex thread does not blur into an unreviewed one-shot run. +`skill-phase-gate` adds compact preflight and postflight checkpoints around meaningful coding work. +It keeps risky execution intentional without taking over long-term task state, generic planning, or final handoffs. ## Best For -- splitting a multi-step task before implementation starts -- adding checkpoints to risky refactors or migrations -- making phase boundaries visible when several changes depend on each other -- deciding whether a task is small enough to proceed directly or large enough to gate +- preflight before a refactor, migration, or other multi-file change +- postflight after a meaningful edit when you want to verify what actually changed +- risky edits where expected files, explicit non-goals, and a verification plan should be clear +- pre-commit checkpoints when the work deserves one more deliberate pass + +## Meaningful Checkpoint Bar + +Use this package when the checkpoint itself is valuable: + +- good fit: refactors, multi-file changes, risky edits, or pre-commit checkpoints +- bad fit: typo fixes, tiny one-line changes, pure explanation tasks, or generic planning requests ## What It Is Not For +- not for trivial one-line edits +- not for pure explanation tasks - reconstructing stale or missing task context - summarizing the current state after an interruption - writing a pause note or transfer package for another agent +- owning long-term state that belongs to `skill-context-keeper` - orchestrating the whole long-task continuity suite ## Install @@ -32,11 +42,16 @@ You can ask Codex in natural language: ## How To Use -Use this package before the task drifts into implementation. -Describe the multi-step goal, the points that need review or verification, and where the work should pause before moving forward, then have the skill turn that into a narrow staged plan. +Use this package at a meaningful checkpoint before or after implementation work. +Describe the current goal, the key constraints, the expected files, the files you are explicitly not changing, and the verification plan for a preflight gate, or ask for a postflight gate that records actual changes, validations, remaining risks, and whether handoff is recommended. + +If you also need current task state to persist across a long-running thread, keep that state in `skill-context-keeper`; this package only frames the immediate checkpoint. ## References - `SKILL.md` for trigger routing and package boundaries -- [references/README.md](references/README.md) for the package boundary and the planned reader-facing reference scope -- `assets/` for future phase-plan, checkpoint, and exit-criteria templates +- [references/README.md](references/README.md) for the reader-facing reference index +- [references/use-cases.md](references/use-cases.md) for positive and negative trigger examples +- [references/prompt-templates.en.md](references/prompt-templates.en.md) for ready-to-paste prompts +- [assets/PREFLIGHT.template.md](assets/PREFLIGHT.template.md) for the preflight checklist +- [assets/POSTFLIGHT.template.md](assets/POSTFLIGHT.template.md) for the postflight checklist diff --git a/skills/skill-phase-gate/README.zh-CN.md b/skills/skill-phase-gate/README.zh-CN.md index aaea419..ec506bf 100644 --- a/skills/skill-phase-gate/README.zh-CN.md +++ b/skills/skill-phase-gate/README.zh-CN.md @@ -4,21 +4,31 @@ ## Overview -`skill-phase-gate` 用于判断一个编码任务是否需要在继续执行前先建立明确的阶段、检查点和退出条件。 -它让分阶段工作保持清晰和可审阅,避免复杂线程被当成一次性动作直接冲过去。 +`skill-phase-gate` 用于在有分量的编码工作前后加入紧凑的 preflight / postflight 检查点。 +它帮助高风险执行保持清晰,但不会接管长期任务状态、泛化规划或最终交接。 ## Best For -- 在开始实现前拆分一个多步骤任务 -- 为高风险重构或迁移增加检查点 -- 当多个改动互相依赖时,先把阶段边界讲清楚 -- 判断一个任务是可以直接做,还是应该先门控分阶段 +- 在重构、迁移或多文件改动前做 preflight +- 在完成一次有分量的修改后做 postflight,核对实际改动和验证结果 +- 对高风险编辑先明确预期修改范围、明确不改动的范围以及验证计划 +- 在提交前增加一次有意义的检查点 + +## Meaningful Checkpoint Bar + +只有当“加一道检查点”本身有价值时才适合使用: + +- 适合:重构、多文件修改、高风险编辑、提交前检查点 +- 不适合:typo 修复、极小的一行改动、纯说明类请求、泛化规划 ## What It Is Not For +- 不适合琐碎的一行改动 +- 不适合纯解释或纯讲解任务 - 重建过时或缺失的任务上下文 - 在中断后刷新当前状态 - 为另一个执行者撰写暂停或转交说明 +- 持有本应由 `skill-context-keeper` 维护的长期状态 - 统筹整套长任务连续性套件 ## Install @@ -32,11 +42,16 @@ ## How To Use -在任务还没有滑进实现细节之前,就可以先调用这个包。 -描述你的多步骤目标、哪些位置需要审阅或验证、以及哪些边界应该先停一下,再让它把这些内容收敛成一个窄而清晰的分阶段计划。 +在实现前后的关键检查点调用这个包。 +如果是 preflight,请说明当前目标、关键约束、预期会改动的文件、明确不改动的文件,以及验证计划;如果是 postflight,请让它记录实际改动、实际验证、剩余风险,以及是否建议交接。 + +如果你还需要跨长线程保存持续状态,请继续使用 `skill-context-keeper`;这个包只负责当前这一道检查点。 ## References - `SKILL.md`:触发路由与包边界 -- [references/README.zh-CN.md](references/README.zh-CN.md):包边界说明,以及后续面向读者参考资料的范围 -- `assets/`:后续的阶段计划、检查点和退出条件模板 +- [references/README.zh-CN.md](references/README.zh-CN.md):面向读者的参考索引 +- [references/use-cases.zh-CN.md](references/use-cases.zh-CN.md):适用与不适用示例 +- [references/prompt-templates.zh-CN.md](references/prompt-templates.zh-CN.md):可直接粘贴的提示词模板 +- [assets/PREFLIGHT.template.md](assets/PREFLIGHT.template.md):preflight 清单 +- [assets/POSTFLIGHT.template.md](assets/POSTFLIGHT.template.md):postflight 清单 diff --git a/skills/skill-phase-gate/SKILL.md b/skills/skill-phase-gate/SKILL.md index ec77948..da65060 100644 --- a/skills/skill-phase-gate/SKILL.md +++ b/skills/skill-phase-gate/SKILL.md @@ -1,24 +1,46 @@ --- name: skill-phase-gate -description: Use when the user needs to decide whether an ongoing coding task should be split into explicit phases, checkpoints, or exit criteria before more execution continues. +description: Use when a meaningful coding checkpoint is needed before or after risky execution, such as a multi-file refactor, migration, or pre-commit pause. --- # Skill Phase Gate ## Overview -Add clear phase boundaries to multi-step coding work when the task should not be treated as a single uninterrupted run. -Use this skill for staged execution decisions, not for state refresh or pause handoffs. +Use `skill-phase-gate` to add a compact preflight or postflight checkpoint around meaningful coding work. +It is for operational checkpoints before or after execution, not for task-state ownership, generic planning, or final handoff writing. ## Use This Skill When -- a task needs explicit phases before coding or migration work continues -- the next step depends on checkpoints, review gates, or exit criteria -- a thread is drifting into ad hoc execution and needs deliberate structure -- you need to decide whether the work is large enough to stage at all +- you are about to start a refactor, migration, or multi-file change and want a brief preflight gate +- you have finished a meaningful edit and need a postflight checkpoint before commit or handoff +- the task is risky enough that expected files, explicit non-goals, and a verification plan should be stated out loud +- you want a pre-commit checkpoint for work that is substantial enough to deserve one more deliberate review pass + +## Do Not Use This Skill When + +- the request is a typo fix, tiny one-line edit, or similarly trivial change +- the user only wants an explanation, walkthrough, or analysis with no checkpoint artifact +- the main need is reconstructing current task state or preserving long-running state ownership +- the main need is generating a full transfer packet or final handoff summary + +## Meaningful Checkpoint Bar + +This package is a good fit when the checkpoint serves a meaningful workflow boundary: + +- good fit: refactors, multi-file changes, risky edits, migration checkpoints, or pre-commit review passes +- bad fit: typo fixes, tiny one-line changes, pure explanation tasks, or generic up-front planning + +## Package Boundary + +- `skill-phase-gate` can mention current task state only to support the immediate checkpoint +- `skill-context-keeper` remains the owner of structured ongoing state and refreshes +- this package does not replace planning packages, and it does not become a handoff generator ## References -- `README.md` and `README.zh-CN.md`: package overview and usage guidance -- `references/`: future public examples for phase splits, checkpoints, and gate prompts -- `assets/`: future templates for phase plans, checkpoint lists, and exit criteria +- `README.md` and `README.zh-CN.md`: install guidance, trigger boundaries, and non-goals +- `assets/PREFLIGHT.template.md`: compact pre-execution gate +- `assets/POSTFLIGHT.template.md`: compact post-execution gate +- `references/use-cases.md` and `references/use-cases.zh-CN.md`: reader-facing positive and negative examples +- `references/prompt-templates.en.md` and `references/prompt-templates.zh-CN.md`: ready-to-paste prompt patterns diff --git a/skills/skill-phase-gate/agents/openai.yaml b/skills/skill-phase-gate/agents/openai.yaml index f6963a8..640ba21 100644 --- a/skills/skill-phase-gate/agents/openai.yaml +++ b/skills/skill-phase-gate/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "Skill Phase Gate" - short_description: "Add explicit phases, checkpoints, and exit criteria to long coding tasks" - default_prompt: "Use $skill-phase-gate when the task should be staged before more execution continues: decide whether the work needs explicit phases, define checkpoints and exit criteria, and keep the output focused on workflow boundaries rather than context refresh or handoff notes." + short_description: "Add meaningful preflight or postflight checkpoints to substantial coding work" + default_prompt: "Use $skill-phase-gate when the task needs a meaningful checkpoint before or after execution. Start from the task at hand, choose either a compact preflight or postflight gate, keep the output checklist-oriented, and avoid using this package for trivial one-line edits, pure explanation requests, long-term state ownership, or final handoff generation." diff --git a/skills/skill-phase-gate/assets/.gitkeep b/skills/skill-phase-gate/assets/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-phase-gate/assets/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-phase-gate/assets/POSTFLIGHT.template.md b/skills/skill-phase-gate/assets/POSTFLIGHT.template.md new file mode 100644 index 0000000..a6cbd4c --- /dev/null +++ b/skills/skill-phase-gate/assets/POSTFLIGHT.template.md @@ -0,0 +1,14 @@ +# Postflight Gate + +## Actual Files / Modules Changed +- + +## Actual Validations Run +- + +## Remaining Risks +- + +## Handoff Recommended? +- Yes / No: +- Why: diff --git a/skills/skill-phase-gate/assets/PREFLIGHT.template.md b/skills/skill-phase-gate/assets/PREFLIGHT.template.md new file mode 100644 index 0000000..30f3c8e --- /dev/null +++ b/skills/skill-phase-gate/assets/PREFLIGHT.template.md @@ -0,0 +1,16 @@ +# Preflight Gate + +## Current Goal +- + +## Current Constraints +- + +## Expected Files / Modules To Change +- + +## Files / Modules Explicitly Not Changing +- + +## Verification Plan +- diff --git a/skills/skill-phase-gate/references/README.md b/skills/skill-phase-gate/references/README.md index 3d2baf4..14fca5d 100644 --- a/skills/skill-phase-gate/references/README.md +++ b/skills/skill-phase-gate/references/README.md @@ -1,7 +1,11 @@ # References for skill-phase-gate This references area explains the public boundary of `skill-phase-gate`. -It is reserved for reader-facing examples, checkpoint patterns, and phase-planning material that help stage multi-step work without turning into context refresh or handoff writing. +It is reserved for reader-facing checkpoint examples and prompt patterns that stay focused on meaningful preflight and postflight gates. -Later phases will add concrete examples and reusable templates here. -For now, use the package [README.md](../README.md) for installation and entry guidance, and [SKILL.md](../SKILL.md) for the routing-first trigger summary. +Use these files for concrete examples: + +- [use-cases.md](use-cases.md): when the package is and is not a good fit +- [prompt-templates.en.md](prompt-templates.en.md): ready-to-paste English prompts + +For install and boundary guidance, see [README.md](../README.md) and [SKILL.md](../SKILL.md). diff --git a/skills/skill-phase-gate/references/README.zh-CN.md b/skills/skill-phase-gate/references/README.zh-CN.md index a0bd71e..48f7b5f 100644 --- a/skills/skill-phase-gate/references/README.zh-CN.md +++ b/skills/skill-phase-gate/references/README.zh-CN.md @@ -1,7 +1,11 @@ # skill-phase-gate 参考资料 这个 `references/` 目录用于说明 `skill-phase-gate` 的公开边界。 -后续这里会放面向读者的示例、检查点模式,以及帮助多步骤任务分阶段推进的资料,同时避免扩展成上下文刷新或交接说明。 +这里放的是面向读者的检查点示例和提示词模式,重点是有分量的 preflight / postflight 门控。 -更具体的示例会在后续阶段补充。 -目前请先阅读包内 [README.md](../README.md) 了解安装与入口说明,再结合 [SKILL.md](../SKILL.md) 查看路由优先的触发摘要。 +可先阅读这些文件: + +- [use-cases.zh-CN.md](use-cases.zh-CN.md):适用与不适用示例 +- [prompt-templates.zh-CN.md](prompt-templates.zh-CN.md):可直接粘贴的中文提示词 + +安装和边界说明仍以 [README.zh-CN.md](../README.zh-CN.md) 和 [SKILL.md](../SKILL.md) 为准。 diff --git a/skills/skill-phase-gate/references/prompt-templates.en.md b/skills/skill-phase-gate/references/prompt-templates.en.md new file mode 100644 index 0000000..b7c133d --- /dev/null +++ b/skills/skill-phase-gate/references/prompt-templates.en.md @@ -0,0 +1,42 @@ +# skill-phase-gate Prompt Templates + +Use these prompts when you want a crisp checkpoint around meaningful coding work. +If you need durable task-state ownership too, keep that with `skill-context-keeper`. + +## Positive Trigger Prompts + +- `Use skill-phase-gate to create a preflight gate before this multi-file change.` +- `Use skill-phase-gate to create a postflight gate for this meaningful edit before I commit.` + +## Negative Trigger Prompts + +- `Skip the checkpoint and just fix this one-line typo.` +- `Explain the code only; do not create a checkpoint artifact.` + +## Preflight Template Prompt + +```text +Use skill-phase-gate to create a preflight gate for this meaningful coding task. +Capture the current goal, current constraints, expected files or modules to change, +files or modules explicitly not changing, and the verification plan. +Keep it brief, checklist-oriented, and task-first. +Do not turn this into generic planning or long-term state tracking. +If durable task-state ownership is needed, leave that to skill-context-keeper. +``` + +## Postflight Template Prompt + +```text +Use skill-phase-gate to create a postflight gate for this meaningful coding task. +Capture the actual files or modules changed, the actual validations run, +remaining risks, and whether handoff is recommended. +Keep it operational and brief. +Do not turn this into a final handoff package, and do not take over state ownership from skill-context-keeper. +``` + +## Trivial Change Anti-Example + +```text +This is only a one-line typo fix. Do not use skill-phase-gate. +Make the edit directly without adding a preflight or postflight checkpoint. +``` diff --git a/skills/skill-phase-gate/references/prompt-templates.zh-CN.md b/skills/skill-phase-gate/references/prompt-templates.zh-CN.md new file mode 100644 index 0000000..a1f02fd --- /dev/null +++ b/skills/skill-phase-gate/references/prompt-templates.zh-CN.md @@ -0,0 +1,42 @@ +# skill-phase-gate 提示词模板 + +当你想在有分量的编码工作前后加一道简洁检查点时,可直接使用这些模板。 +如果还需要持续持有任务状态,请继续交给 `skill-context-keeper`。 + +## 适用触发示例 + +- `请用 skill-phase-gate 在这次多文件修改前生成一个 preflight gate。` +- `请用 skill-phase-gate 在我提交前为这次有分量的修改生成一个 postflight gate。` + +## 不适用触发示例 + +- `别加检查点了,直接把这一行 typo 改掉。` +- `只解释代码,不要生成检查点产物。` + +## Preflight 模板提示词 + +```text +请用 skill-phase-gate 为这次有分量的编码任务生成一个 preflight gate。 +内容要覆盖当前目标、当前约束、预期会改动的文件或模块、 +明确不会改动的文件或模块,以及验证计划。 +保持简短、可执行、清单化。 +不要把它扩展成泛化规划,也不要接管长期状态跟踪。 +如果需要持续状态归属,请交给 skill-context-keeper。 +``` + +## Postflight 模板提示词 + +```text +请用 skill-phase-gate 为这次有分量的编码任务生成一个 postflight gate。 +内容要覆盖实际改动的文件或模块、实际运行的验证、 +剩余风险,以及是否建议交接。 +保持简短、偏执行视角。 +不要把它扩展成最终交接包,也不要从 skill-context-keeper 手里接管状态归属。 +``` + +## 琐碎改动反例 + +```text +这只是一个一行 typo 修复,不要使用 skill-phase-gate。 +直接修改即可,不需要 preflight 或 postflight 检查点。 +``` diff --git a/skills/skill-phase-gate/references/use-cases.md b/skills/skill-phase-gate/references/use-cases.md new file mode 100644 index 0000000..a120199 --- /dev/null +++ b/skills/skill-phase-gate/references/use-cases.md @@ -0,0 +1,48 @@ +# skill-phase-gate Use Cases + +`skill-phase-gate` is for meaningful checkpoints around substantial coding work. +Use it for a compact preflight before risky execution or a compact postflight after a meaningful edit, while leaving long-term task state with `skill-context-keeper`. + +## Positive Trigger Prompts + +- `Use skill-phase-gate to write a preflight gate before this multi-file refactor.` +- `Before I touch the migration, add a checkpoint with expected files, non-goals, and a verification plan.` +- `We just finished a meaningful edit. Run a postflight gate that records actual changes, validations, and remaining risks.` +- `Give me a pre-commit checkpoint for this risky edit so we can sanity-check scope and verification.` + +## Negative Trigger Prompts + +- `Fix this typo in one line and move on.` +- `Explain how this package works without generating any checkpoint artifact.` +- `Refresh the current task state and keep an ongoing summary for the next hour of work.` +- `Write the final handoff package for the next agent taking over.` + +## Common Use Cases + +### Preflight Before a Multi-File Change + +Use the preflight gate when you want a fast statement of: + +- the current goal +- the active constraints +- which files or modules you expect to touch +- which files or modules you are explicitly not changing +- how you plan to verify the work + +This is especially useful before refactors, migrations, or edits where it is easy for scope to drift. + +### Postflight After a Meaningful Edit + +Use the postflight gate when implementation is done and you want to capture: + +- which files or modules actually changed +- which validations you actually ran +- what risks remain +- whether a handoff is recommended + +The postflight gate is a checkpoint, not a replacement for a final handoff package. + +## State Ownership Boundary + +If the thread also needs durable task-state ownership, keep that with `skill-context-keeper`. +`skill-phase-gate` may mention the current state briefly inside the checkpoint, but it does not own the running task record. diff --git a/skills/skill-phase-gate/references/use-cases.zh-CN.md b/skills/skill-phase-gate/references/use-cases.zh-CN.md new file mode 100644 index 0000000..da78df9 --- /dev/null +++ b/skills/skill-phase-gate/references/use-cases.zh-CN.md @@ -0,0 +1,48 @@ +# skill-phase-gate 使用场景 + +`skill-phase-gate` 适用于围绕有分量的编码工作增加检查点。 +它可以在高风险执行前提供紧凑的 preflight,也可以在完成一次有意义的修改后提供紧凑的 postflight;长期任务状态仍由 `skill-context-keeper` 持有。 + +## 适用触发示例 + +- `请用 skill-phase-gate 在这次多文件重构前写一个 preflight gate。` +- `在我开始改这个迁移之前,加一道检查点,写清楚预期文件、非目标和验证计划。` +- `这次有分量的修改已经做完了,请跑一个 postflight gate,记录实际改动、实际验证和剩余风险。` +- `给这次高风险编辑补一个提交前检查点,先核对范围和验证方式。` + +## 不适用触发示例 + +- `把这个 typo 改掉就行,只有一行。` +- `只解释一下这个包怎么用,不要生成任何检查点产物。` +- `刷新当前任务状态,并在接下来一小时持续维护摘要。` +- `给下一个接手的 agent 写最终交接包。` + +## 常见使用方式 + +### 多文件修改前的 Preflight + +适合先快速讲清楚: + +- 当前目标 +- 当前约束 +- 预期会改动哪些文件或模块 +- 明确不会改动哪些文件或模块 +- 打算如何验证 + +这对重构、迁移或容易范围失控的修改尤其有用。 + +### 有分量修改完成后的 Postflight + +适合在实现完成后补一层检查,记录: + +- 实际改动了哪些文件或模块 +- 实际运行了哪些验证 +- 还剩哪些风险 +- 是否建议交接 + +postflight gate 只是检查点,不替代最终交接包。 + +## 状态归属边界 + +如果线程还需要持续维护任务状态,请继续交给 `skill-context-keeper`。 +`skill-phase-gate` 可以在检查点里简短提到当前状态,但不拥有长期任务记录。 diff --git a/skills/skill-phase-gate/tests/.gitkeep b/skills/skill-phase-gate/tests/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-phase-gate/tests/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - From fa6eb4b330ac35caa7c0d7f0e65e6f9b04025364 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 14:13:39 +0800 Subject: [PATCH 11/31] test: cover skill-phase-gate package surface --- .../skill-phase-gate/tests/test_package_contract.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/skills/skill-phase-gate/tests/test_package_contract.py b/skills/skill-phase-gate/tests/test_package_contract.py index 0935f87..757fd1f 100644 --- a/skills/skill-phase-gate/tests/test_package_contract.py +++ b/skills/skill-phase-gate/tests/test_package_contract.py @@ -6,6 +6,19 @@ class SkillPhaseGatePackageTests(unittest.TestCase): + def test_core_package_files_exist(self): + for path in [ + ROOT / "README.md", + ROOT / "README.zh-CN.md", + ROOT / "SKILL.md", + ROOT / "agents" / "openai.yaml", + ROOT / "references" / "use-cases.md", + ROOT / "references" / "use-cases.zh-CN.md", + ROOT / "references" / "prompt-templates.en.md", + ROOT / "references" / "prompt-templates.zh-CN.md", + ]: + self.assertTrue(path.exists(), f"expected package file to exist: {path}") + def test_checklist_assets_exist(self): self.assertTrue((ROOT / "assets" / "PREFLIGHT.template.md").exists()) self.assertTrue((ROOT / "assets" / "POSTFLIGHT.template.md").exists()) From b08ba0831f076b18f229c700ad0aab37a3cdfec9 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 14:22:05 +0800 Subject: [PATCH 12/31] test: add skill-handoff-summary contract checks --- skills/skill-handoff-summary/tests/.gitkeep | 1 - .../tests/test_package_contract.py | 60 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) delete mode 100644 skills/skill-handoff-summary/tests/.gitkeep create mode 100644 skills/skill-handoff-summary/tests/test_package_contract.py diff --git a/skills/skill-handoff-summary/tests/.gitkeep b/skills/skill-handoff-summary/tests/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-handoff-summary/tests/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-handoff-summary/tests/test_package_contract.py b/skills/skill-handoff-summary/tests/test_package_contract.py new file mode 100644 index 0000000..10e9306 --- /dev/null +++ b/skills/skill-handoff-summary/tests/test_package_contract.py @@ -0,0 +1,60 @@ +from pathlib import Path +import unittest + + +ROOT = Path(__file__).resolve().parents[1] + + +class SkillHandoffSummaryPackageTests(unittest.TestCase): + def test_core_package_files_exist(self): + for path in [ + ROOT / "README.md", + ROOT / "README.zh-CN.md", + ROOT / "SKILL.md", + ROOT / "agents" / "openai.yaml", + ROOT / "references" / "use-cases.md", + ROOT / "references" / "use-cases.zh-CN.md", + ROOT / "references" / "prompt-templates.en.md", + ROOT / "references" / "prompt-templates.zh-CN.md", + ROOT / "assets" / "HANDOFF.template.md", + ]: + self.assertTrue(path.exists(), f"expected package file to exist: {path}") + + def test_handoff_template_has_required_sections(self): + text = (ROOT / "assets" / "HANDOFF.template.md").read_text(encoding="utf-8") + for heading in [ + "# Handoff Summary", + "## Task Summary", + "## Current Status", + "## What Changed In This Session", + "## Hard Constraints To Preserve", + "## Files / Modules Of Interest", + "## Open Problems", + "## Exact Next Action", + "## Resume Prompt", + ]: + self.assertIn(heading, text) + + def test_readme_describes_continuation_boundary(self): + text = (ROOT / "README.md").read_text(encoding="utf-8").lower() + self.assertIn("continuation-oriented", text) + self.assertIn("not whole-project documentation", text) + self.assertIn(".agent-state/handoff.md", text) + + def test_bilingual_references_include_positive_and_negative_examples(self): + english = (ROOT / "references" / "use-cases.md").read_text(encoding="utf-8") + chinese = (ROOT / "references" / "use-cases.zh-CN.md").read_text(encoding="utf-8") + + self.assertRegex(english, r"(?im)^## +Positive Trigger Prompts$") + self.assertRegex(english, r"(?im)^## +Negative Trigger Prompts$") + self.assertRegex(chinese, r"(?m)^## +适用触发示例$") + self.assertRegex(chinese, r"(?m)^## +不适用触发示例$") + + def test_resume_prompt_is_explicitly_present_in_template(self): + text = (ROOT / "assets" / "HANDOFF.template.md").read_text(encoding="utf-8") + self.assertIn("Resume Prompt", text) + self.assertIn("Resume this task from .agent-state/HANDOFF.md.", text) + + +if __name__ == "__main__": + unittest.main() From b7ad09c7fcf3aec86266b290e1ec27f6ddc8efb0 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 14:22:28 +0800 Subject: [PATCH 13/31] feat: add skill-handoff-summary package --- skills/skill-handoff-summary/README.md | 31 +++++++++++++--- skills/skill-handoff-summary/README.zh-CN.md | 31 +++++++++++++--- skills/skill-handoff-summary/SKILL.md | 20 ++++++----- .../skill-handoff-summary/agents/openai.yaml | 4 +-- skills/skill-handoff-summary/assets/.gitkeep | 1 - .../assets/HANDOFF.template.md | 25 +++++++++++++ .../references/README.md | 10 ++++-- .../references/README.zh-CN.md | 10 ++++-- .../references/prompt-templates.en.md | 35 ++++++++++++++++++ .../references/prompt-templates.zh-CN.md | 35 ++++++++++++++++++ .../references/use-cases.md | 36 +++++++++++++++++++ .../references/use-cases.zh-CN.md | 36 +++++++++++++++++++ 12 files changed, 246 insertions(+), 28 deletions(-) delete mode 100644 skills/skill-handoff-summary/assets/.gitkeep create mode 100644 skills/skill-handoff-summary/assets/HANDOFF.template.md create mode 100644 skills/skill-handoff-summary/references/prompt-templates.en.md create mode 100644 skills/skill-handoff-summary/references/prompt-templates.zh-CN.md create mode 100644 skills/skill-handoff-summary/references/use-cases.md create mode 100644 skills/skill-handoff-summary/references/use-cases.zh-CN.md diff --git a/skills/skill-handoff-summary/README.md b/skills/skill-handoff-summary/README.md index 0d5de02..9907a21 100644 --- a/skills/skill-handoff-summary/README.md +++ b/skills/skill-handoff-summary/README.md @@ -4,8 +4,9 @@ ## Overview -`skill-handoff-summary` is the package for writing a clean pause or transfer summary when long-running coding work needs to stop and resume later. -It packages status, blockers, and next steps so the next agent does not have to reconstruct intent from scattered thread history. +`skill-handoff-summary` is the narrow package for writing continuation-oriented pause or transfer summaries when long-running coding work needs to stop and resume later. +It packages status, blockers, preserved constraints, and the exact next action so the next session does not have to reconstruct intent from scattered thread history. +It is for compact handoffs such as `.agent-state/HANDOFF.md`, not whole-project documentation. ## Best For @@ -19,8 +20,23 @@ It packages status, blockers, and next steps so the next agent does not have to - rebuilding the current task state before work continues - deciding whether a task needs staged phases or checkpoints - coordinating the atomic packages as one suite-level workflow +- maintaining long-term state across the whole task +- producing whole-project documentation or repository tours - replacing the final user-facing answer when no handoff is needed +## Package Boundary + +Use this package when the work is pausing or changing hands and the next session needs a fast restart note. +Keep the output continuation-oriented, compact, and immediately actionable. + +This package specializes in handoff creation only: + +- write or refresh a concise artifact such as `.agent-state/HANDOFF.md` +- preserve the task summary, current status, hard constraints, open problems, and the exact next action +- include a reusable resume prompt for the next session + +This package does not own long-term state, does not own workflow gating, and should not expand into whole-project documentation. + ## Install Install `skill-handoff-summary` from this repository with the standard package path for published Codex skills. @@ -33,10 +49,15 @@ You can ask Codex in natural language: ## How To Use Reach for this package when execution is about to pause or move to another owner. -Describe the current status, unresolved questions, blockers, and the very next actions, then have the skill turn that into a concise transfer note rather than a full re-plan. +Describe the current status, unresolved questions, blockers, hard constraints, and the very next action, then have the skill turn that into a concise continuation-oriented transfer note rather than a full re-plan. +If you want a concrete downstream artifact, say so explicitly, for example: `Write the handoff to .agent-state/HANDOFF.md and end with a resume prompt for the next session.` ## References - `SKILL.md` for trigger routing and package boundaries -- [references/README.md](references/README.md) for the package boundary and the planned reader-facing reference scope -- `assets/` for future handoff, blocker, and next-step templates +- [references/README.md](references/README.md) for the reader-facing reference index +- [references/use-cases.md](references/use-cases.md) for positive and negative trigger examples +- [references/use-cases.zh-CN.md](references/use-cases.zh-CN.md) for Chinese trigger examples +- [references/prompt-templates.en.md](references/prompt-templates.en.md) for reusable handoff and resume prompts +- [references/prompt-templates.zh-CN.md](references/prompt-templates.zh-CN.md) for reusable Chinese handoff and resume prompts +- [assets/HANDOFF.template.md](assets/HANDOFF.template.md) for the compact handoff artifact template diff --git a/skills/skill-handoff-summary/README.zh-CN.md b/skills/skill-handoff-summary/README.zh-CN.md index b7afed4..33f1d7f 100644 --- a/skills/skill-handoff-summary/README.zh-CN.md +++ b/skills/skill-handoff-summary/README.zh-CN.md @@ -4,8 +4,9 @@ ## Overview -`skill-handoff-summary` 用于在长时间编码任务需要暂停或转交时,生成清晰的暂停说明或交接摘要。 -它把状态、阻塞点和下一步整理成可直接接手的材料,避免下一位执行者从零翻线程历史。 +`skill-handoff-summary` 是一个边界明确的包,用于在长时间编码任务需要暂停或转交时,生成面向续做的简洁交接摘要。 +它把状态、阻塞点、需保留的硬约束,以及“下一步到底做什么”整理成可直接接手的材料,避免下一位执行者从零翻线程历史。 +它面向诸如 `.agent-state/HANDOFF.md` 这样的紧凑交接产物,而不是整项目文档。 ## Best For @@ -19,8 +20,23 @@ - 在继续工作前重建当前任务状态 - 决定一个任务是否需要分阶段或检查点 - 以套件级工作流统筹多个原子包 +- 维护覆盖整个任务的长期状态 +- 生成整项目说明或仓库导览 - 在根本不需要交接时替代最终用户答复 +## Package Boundary + +当工作即将暂停,或者任务要换人接手,而下一次会话需要一个快速可复用的重启说明时,就使用这个包。 +输出应保持紧凑、面向续做、且拿来就能行动。 + +这个包只负责交接摘要本身: + +- 写入或刷新诸如 `.agent-state/HANDOFF.md` 的紧凑产物 +- 保留任务摘要、当前状态、硬约束、开放问题,以及精确的下一步动作 +- 给下一次会话附上一段可直接复用的 resume prompt + +这个包不拥有长期状态,不拥有工作流门控,也不应膨胀成整项目文档包。 + ## Install 可通过本仓库中的标准发布路径安装 `skill-handoff-summary`。 @@ -33,10 +49,15 @@ ## How To Use 当执行即将暂停,或者任务要移交给另一个负责人时,就使用这个包。 -说明当前状态、未解决问题、阻塞点以及最先要做的下一步,让它输出一个简洁、可交接的摘要,而不是重新规划整个流程。 +说明当前状态、未解决问题、阻塞点、硬约束,以及最先要做的下一步,让它输出一个简洁、面向续做的交接摘要,而不是重新规划整个流程。 +如果你希望落到具体文件上,可以明确写出,例如:`把交接写到 .agent-state/HANDOFF.md,并在结尾附上下一次会话可直接复用的 resume prompt。` ## References - `SKILL.md`:触发路由与包边界 -- [references/README.zh-CN.md](references/README.zh-CN.md):包边界说明,以及后续面向读者参考资料的范围 -- `assets/`:后续的交接说明、阻塞点和下一步模板 +- [references/README.zh-CN.md](references/README.zh-CN.md):面向读者的参考索引 +- [references/use-cases.zh-CN.md](references/use-cases.zh-CN.md):适用与不适用触发示例 +- [references/use-cases.md](references/use-cases.md):英文触发示例 +- [references/prompt-templates.zh-CN.md](references/prompt-templates.zh-CN.md):可复用的中文交接与续做提示词 +- [references/prompt-templates.en.md](references/prompt-templates.en.md):可复用的英文交接与续做提示词 +- [assets/HANDOFF.template.md](assets/HANDOFF.template.md):紧凑交接模板 diff --git a/skills/skill-handoff-summary/SKILL.md b/skills/skill-handoff-summary/SKILL.md index 6949869..430db18 100644 --- a/skills/skill-handoff-summary/SKILL.md +++ b/skills/skill-handoff-summary/SKILL.md @@ -1,24 +1,26 @@ --- name: skill-handoff-summary -description: Use when the user needs a structured pause or transfer summary for an ongoing coding task, including status, blockers, and next steps, without re-planning the whole workflow. +description: Use when an ongoing coding task needs a compact continuation-oriented handoff summary, especially before pausing or transferring ownership, without turning it into full-project documentation or workflow control. --- # Skill Handoff Summary ## Overview -Capture a clear pause or transfer summary for a coding task that will be resumed later by the same person or another agent. -Use this skill for handoff quality, not for rebuilding stale context or deciding phase gates. +Capture a concise continuation-oriented handoff for a coding task that will resume later in the same thread or a new one. +This package specializes in compact handoff creation only. +It can target downstream artifacts such as `.agent-state/HANDOFF.md`, but it does not own long-term state and it does not own workflow gating. ## Use This Skill When -- pausing work and needing a clean summary for later resumption -- transferring a task to another agent with open questions and blockers -- capturing current status, risks, and next steps in one place +- pausing work and needing a clean continuation note for later resumption +- transferring a task to another agent with open questions, constraints, and an exact next action +- writing or refreshing a compact artifact such as `.agent-state/HANDOFF.md` - reducing restart cost after a stop point in a long thread ## References -- `README.md` and `README.zh-CN.md`: package overview and usage guidance -- `references/`: future public examples for handoff prompts and pause summaries -- `assets/`: future templates for handoff notes, blockers, and next-step summaries +- `README.md` and `README.zh-CN.md`: package overview, boundary guidance, and install notes +- `references/use-cases.md` and `references/use-cases.zh-CN.md`: positive and negative trigger examples +- `references/prompt-templates.en.md` and `references/prompt-templates.zh-CN.md`: reusable pause and resume wording +- `assets/HANDOFF.template.md`: compact template for `.agent-state/HANDOFF.md` diff --git a/skills/skill-handoff-summary/agents/openai.yaml b/skills/skill-handoff-summary/agents/openai.yaml index 54c6e93..45a6b0d 100644 --- a/skills/skill-handoff-summary/agents/openai.yaml +++ b/skills/skill-handoff-summary/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "Skill Handoff Summary" - short_description: "Write concise pause and transfer notes for long-running coding work" - default_prompt: "Use $skill-handoff-summary when the task is pausing or changing hands: capture current status, blockers, open questions, and next steps in a concise handoff without turning it into a new workflow plan." + short_description: "Write compact continuation handoffs for paused or transferred coding work" + default_prompt: "Use $skill-handoff-summary when a coding task is pausing or changing hands: write a compact continuation-oriented handoff, optionally to .agent-state/HANDOFF.md, with current status, session changes, preserved constraints, open problems, the exact next action, and a reusable resume prompt. Do not turn it into whole-project documentation, long-term state management, or workflow gating." diff --git a/skills/skill-handoff-summary/assets/.gitkeep b/skills/skill-handoff-summary/assets/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-handoff-summary/assets/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-handoff-summary/assets/HANDOFF.template.md b/skills/skill-handoff-summary/assets/HANDOFF.template.md new file mode 100644 index 0000000..062bcf5 --- /dev/null +++ b/skills/skill-handoff-summary/assets/HANDOFF.template.md @@ -0,0 +1,25 @@ +# Handoff Summary + +## Task Summary +One paragraph on the active task and why this handoff exists. + +## Current Status +State what is done, what is in progress, and what remains. + +## What Changed In This Session +List the meaningful edits or verified findings from this session only. + +## Hard Constraints To Preserve +Call out non-negotiable requirements, boundaries, or decisions the next session must keep. + +## Files / Modules Of Interest +Name the files, modules, or artifacts the next session should open first. + +## Open Problems +List blockers, unresolved questions, or risks that still need work. + +## Exact Next Action +Write the single next action that should happen first when work resumes. + +## Resume Prompt +Resume this task from .agent-state/HANDOFF.md. Continue from the recorded status, preserve the listed constraints, inspect the files of interest, resolve the open problems in priority order, perform the exact next action first, and update the handoff if anything material changes. diff --git a/skills/skill-handoff-summary/references/README.md b/skills/skill-handoff-summary/references/README.md index 035d5f3..4a08c10 100644 --- a/skills/skill-handoff-summary/references/README.md +++ b/skills/skill-handoff-summary/references/README.md @@ -1,7 +1,11 @@ # References for skill-handoff-summary This references area explains the public boundary of `skill-handoff-summary`. -It is reserved for reader-facing examples, pause-note formats, and handoff patterns that capture status, blockers, and next steps without taking over task planning. +It is reserved for reader-facing examples, pause-note formats, and handoff patterns that capture continuation-oriented status, blockers, and the exact next step without taking over task planning or long-term state. -Later phases will add concrete examples and reusable templates here. -For now, use the package [README.md](../README.md) for installation and entry guidance, and [SKILL.md](../SKILL.md) for the routing-first trigger summary. +Use these files for concrete examples: + +- [use-cases.md](use-cases.md): when the package is and is not a good fit +- [prompt-templates.en.md](prompt-templates.en.md): ready-to-paste English handoff and resume prompts + +For install and boundary guidance, see [README.md](../README.md) and [SKILL.md](../SKILL.md). diff --git a/skills/skill-handoff-summary/references/README.zh-CN.md b/skills/skill-handoff-summary/references/README.zh-CN.md index 8cd209c..669c6df 100644 --- a/skills/skill-handoff-summary/references/README.zh-CN.md +++ b/skills/skill-handoff-summary/references/README.zh-CN.md @@ -1,7 +1,11 @@ # skill-handoff-summary 参考资料 这个 `references/` 目录用于说明 `skill-handoff-summary` 的公开边界。 -后续这里会放面向读者的示例、暂停说明格式,以及围绕状态、阻塞点和下一步的交接模式资料,同时避免接管整个任务规划。 +这里放的是面向读者的示例、暂停说明格式,以及围绕续做状态、阻塞点和精确下一步动作的交接模式资料,同时避免接管任务规划或长期状态维护。 -更具体的示例会在后续阶段补充。 -目前请先阅读包内 [README.md](../README.md) 了解安装与入口说明,再结合 [SKILL.md](../SKILL.md) 查看路由优先的触发摘要。 +可先阅读这些文件: + +- [use-cases.zh-CN.md](use-cases.zh-CN.md):适用与不适用示例 +- [prompt-templates.zh-CN.md](prompt-templates.zh-CN.md):可直接粘贴的中文交接与续做提示词 + +安装和边界说明仍以 [README.zh-CN.md](../README.zh-CN.md) 和 [SKILL.md](../SKILL.md) 为准。 diff --git a/skills/skill-handoff-summary/references/prompt-templates.en.md b/skills/skill-handoff-summary/references/prompt-templates.en.md new file mode 100644 index 0000000..59971e6 --- /dev/null +++ b/skills/skill-handoff-summary/references/prompt-templates.en.md @@ -0,0 +1,35 @@ +# skill-handoff-summary Prompt Templates + +Use these templates when you want `skill-handoff-summary` to create a compact continuation handoff without drifting into whole-project documentation, long-term state management, or workflow gating. + +## Positive Trigger Prompts + +- `Use skill-handoff-summary to capture a continuation handoff before we pause this task.` +- `Write .agent-state/HANDOFF.md with current status, preserved constraints, open problems, and the exact next action.` +- `Prepare a concise transfer note for the next agent and end with a reusable resume prompt.` + +## Negative Trigger Prompts + +- `Give me a short status update in chat and do not write any durable artifact.` +- `Document the whole repository for future maintainers.` +- `Rebuild the current task state and decide the next workflow gate.` + +## Write A Compact Handoff + +```text +Use skill-handoff-summary to write a compact continuation-oriented handoff. +Target .agent-state/HANDOFF.md. +Summarize the task, current status, changes from this session, hard constraints to preserve, +files or modules of interest, open problems, and the exact next action. +End with a resume prompt the next session can reuse immediately. +Do not turn it into whole-project documentation, long-term state, or workflow gating. +``` + +## Resume Prompt + +```text +Resume this task from .agent-state/HANDOFF.md. +Continue from the recorded status, preserve the listed constraints, inspect the files of interest, +resolve the open problems in priority order, perform the exact next action first, +and update the handoff if anything material changes. +``` diff --git a/skills/skill-handoff-summary/references/prompt-templates.zh-CN.md b/skills/skill-handoff-summary/references/prompt-templates.zh-CN.md new file mode 100644 index 0000000..16d22af --- /dev/null +++ b/skills/skill-handoff-summary/references/prompt-templates.zh-CN.md @@ -0,0 +1,35 @@ +# skill-handoff-summary 提示词模板 + +当你希望 `skill-handoff-summary` 生成一份紧凑、面向续做的交接摘要,而且不要偏离到整项目文档、长期状态管理或工作流门控时,可以直接复用下面这些模板。 + +## 适用触发示例 + +- `请使用 skill-handoff-summary 在暂停这个任务前生成一份续做交接。` +- `请把当前状态、需保留的约束、开放问题和精确下一步写入 .agent-state/HANDOFF.md。` +- `请为下一位代理准备一份简洁的转交说明,并在结尾附上可复用的 resume prompt。` + +## 不适用触发示例 + +- `直接在聊天里给我一个简短状态更新,不要写任何持久文件。` +- `为未来维护者把整个仓库写成完整文档。` +- `重建当前任务状态,并决定下一个工作流 gate。` + +## 生成紧凑交接 + +```text +请使用 skill-handoff-summary 生成一份紧凑、面向续做的交接摘要。 +目标路径是 .agent-state/HANDOFF.md。 +总结任务、当前状态、本次会话中的变更、需要保留的硬约束、 +相关文件或模块、开放问题,以及精确的下一步动作。 +最后附上一段下一次会话可直接复用的 resume prompt。 +不要把它扩展成整项目文档、长期状态或工作流门控。 +``` + +## Resume Prompt + +```text +Resume this task from .agent-state/HANDOFF.md. +Continue from the recorded status, preserve the listed constraints, inspect the files of interest, +resolve the open problems in priority order, perform the exact next action first, +and update the handoff if anything material changes. +``` diff --git a/skills/skill-handoff-summary/references/use-cases.md b/skills/skill-handoff-summary/references/use-cases.md new file mode 100644 index 0000000..48c2f02 --- /dev/null +++ b/skills/skill-handoff-summary/references/use-cases.md @@ -0,0 +1,36 @@ +# skill-handoff-summary Use Cases + +`skill-handoff-summary` is for compact continuation-oriented handoffs when coding work is about to pause or move to another owner. +Use it when the next session needs a trusted restart note, not when the conversation only needs a casual status update or a whole-project writeup. + +## Positive Trigger Prompts + +- `Use skill-handoff-summary to write a compact handoff before we pause for today.` +- `Write .agent-state/HANDOFF.md so the next session can resume this task without rereading the whole thread.` +- `Capture the current status, hard constraints, open problems, and exact next action for the next agent taking over.` +- `We are handing this thread to another coder. Prepare a concise continuation handoff with a resume prompt.` +- `Before stopping, turn this session into a short transfer summary instead of a full project recap.` + +## Negative Trigger Prompts + +- `Give me a quick status update in chat about what changed today.` +- `Write full project documentation for the repository so a new team can onboard.` +- `Refresh the current task state from the repo before we continue coding right now.` +- `Decide the next phase gate and tell me whether implementation can start.` +- `Create one big summary that covers planning, long-term state, and final handoff together.` + +## When To Write A Handoff + +Write the handoff before pausing work for a meaningful break or before handing the task to another thread or agent. +The package is especially useful when the next session would otherwise need to reconstruct blockers, preserved constraints, or the exact next move from scattered history. + +## When Not To Use This Skill + +Do not use this package for a simple in-chat status update where no durable artifact is needed. +Do not use it to produce full-project documentation, rebuild long-term task state, or decide workflow gates. + +## Reusable Resume Prompt Wording + +Use wording like this when you want the next session to restart immediately: + +`Resume this task from .agent-state/HANDOFF.md. Continue from the recorded status, preserve the listed constraints, inspect the files of interest, resolve the open problems in priority order, perform the exact next action first, and update the handoff if anything material changes.` diff --git a/skills/skill-handoff-summary/references/use-cases.zh-CN.md b/skills/skill-handoff-summary/references/use-cases.zh-CN.md new file mode 100644 index 0000000..af0ea84 --- /dev/null +++ b/skills/skill-handoff-summary/references/use-cases.zh-CN.md @@ -0,0 +1,36 @@ +# skill-handoff-summary 使用场景 + +`skill-handoff-summary` 适用于在编码工作即将暂停,或即将转交给另一位执行者时,生成紧凑、面向续做的交接摘要。 +当下一次会话需要一个可信的重启说明时使用它;如果只是随手在聊天里汇报进度,或者要写成整项目文档,就不该由这个包接管。 + +## 适用触发示例 + +- `请使用 skill-handoff-summary 在今天暂停前写一份紧凑交接。` +- `请生成 .agent-state/HANDOFF.md,让下一次会话不用重读整个线程也能继续。` +- `把当前状态、必须保留的硬约束、开放问题和精确下一步整理给下一位接手的人。` +- `这个线程要转给另一位开发者了,请准备一份附带 resume prompt 的简洁续做交接。` +- `在停下来之前,把这次会话整理成短交接说明,而不是完整项目回顾。` + +## 不适用触发示例 + +- `直接在聊天里给我一句今天做了什么的状态更新。` +- `请为整个仓库写一套完整项目文档,方便新团队成员上手。` +- `继续编码前,先根据仓库刷新当前任务状态。` +- `决定下一阶段的 gate,并告诉我现在能不能开始实现。` +- `把规划、长期状态和最终交接合成一份大总结。` + +## 什么时候该写交接 + +当工作要暂停一段有意义的时间,或者任务即将交给另一条线程、另一位代理时,就该先写交接。 +如果下一次会话否则就得从零重建阻塞点、需保留的约束,或者“第一步究竟做什么”,这个包就很合适。 + +## 什么时候不该用这个技能 + +如果只是想在聊天里简单汇报一下当前进度,而不需要留下可复用的持久产物,就不要用这个包。 +也不要把它用于整项目文档、长期任务状态重建,或工作流门控判断。 + +## 可复用的 Resume Prompt 表达 + +当你希望下一次会话拿来就继续做时,可以直接使用这句: + +`Resume this task from .agent-state/HANDOFF.md. Continue from the recorded status, preserve the listed constraints, inspect the files of interest, resolve the open problems in priority order, perform the exact next action first, and update the handoff if anything material changes.` From 9b4c08ef7c262ccb710e10e55fd682f1de673854 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 14:26:53 +0800 Subject: [PATCH 14/31] fix: tighten handoff-summary reference contract --- .../references/prompt-templates.zh-CN.md | 8 ++++---- .../references/use-cases.zh-CN.md | 2 +- .../tests/test_package_contract.py | 10 ++++++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/skills/skill-handoff-summary/references/prompt-templates.zh-CN.md b/skills/skill-handoff-summary/references/prompt-templates.zh-CN.md index 16d22af..c58ab26 100644 --- a/skills/skill-handoff-summary/references/prompt-templates.zh-CN.md +++ b/skills/skill-handoff-summary/references/prompt-templates.zh-CN.md @@ -28,8 +28,8 @@ ## Resume Prompt ```text -Resume this task from .agent-state/HANDOFF.md. -Continue from the recorded status, preserve the listed constraints, inspect the files of interest, -resolve the open problems in priority order, perform the exact next action first, -and update the handoff if anything material changes. +请从 .agent-state/HANDOFF.md 继续这个任务。 +沿着已记录的当前状态往下推进,保留其中列出的约束,先检查提到的相关文件, +再按优先级处理开放问题,优先执行“精确下一步动作”, +并在出现任何重要变化时更新这份交接。 ``` diff --git a/skills/skill-handoff-summary/references/use-cases.zh-CN.md b/skills/skill-handoff-summary/references/use-cases.zh-CN.md index af0ea84..0a2ddd5 100644 --- a/skills/skill-handoff-summary/references/use-cases.zh-CN.md +++ b/skills/skill-handoff-summary/references/use-cases.zh-CN.md @@ -33,4 +33,4 @@ 当你希望下一次会话拿来就继续做时,可以直接使用这句: -`Resume this task from .agent-state/HANDOFF.md. Continue from the recorded status, preserve the listed constraints, inspect the files of interest, resolve the open problems in priority order, perform the exact next action first, and update the handoff if anything material changes.` +`请从 .agent-state/HANDOFF.md 继续这个任务。沿着已记录的当前状态往下推进,保留其中列出的约束,先检查提到的相关文件,再按优先级处理开放问题,优先执行“精确下一步动作”,并在出现任何重要变化时更新这份交接。` diff --git a/skills/skill-handoff-summary/tests/test_package_contract.py b/skills/skill-handoff-summary/tests/test_package_contract.py index 10e9306..73910a4 100644 --- a/skills/skill-handoff-summary/tests/test_package_contract.py +++ b/skills/skill-handoff-summary/tests/test_package_contract.py @@ -12,6 +12,8 @@ def test_core_package_files_exist(self): ROOT / "README.zh-CN.md", ROOT / "SKILL.md", ROOT / "agents" / "openai.yaml", + ROOT / "references" / "README.md", + ROOT / "references" / "README.zh-CN.md", ROOT / "references" / "use-cases.md", ROOT / "references" / "use-cases.zh-CN.md", ROOT / "references" / "prompt-templates.en.md", @@ -55,6 +57,14 @@ def test_resume_prompt_is_explicitly_present_in_template(self): self.assertIn("Resume Prompt", text) self.assertIn("Resume this task from .agent-state/HANDOFF.md.", text) + def test_chinese_references_use_natural_resume_prompt_wording(self): + for path in [ + ROOT / "references" / "use-cases.zh-CN.md", + ROOT / "references" / "prompt-templates.zh-CN.md", + ]: + text = path.read_text(encoding="utf-8") + self.assertIn("请从 .agent-state/HANDOFF.md 继续这个任务。", text) + if __name__ == "__main__": unittest.main() From d9534836935a7e9decff724a7443268b480dcbc2 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 14:30:38 +0800 Subject: [PATCH 15/31] test: add skill-task-continuity bootstrap checks --- skills/skill-task-continuity/tests/.gitkeep | 1 - .../tests/test_bootstrap_suite.py | 119 ++++++++++++++++++ 2 files changed, 119 insertions(+), 1 deletion(-) delete mode 100644 skills/skill-task-continuity/tests/.gitkeep create mode 100644 skills/skill-task-continuity/tests/test_bootstrap_suite.py diff --git a/skills/skill-task-continuity/tests/.gitkeep b/skills/skill-task-continuity/tests/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-task-continuity/tests/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-task-continuity/tests/test_bootstrap_suite.py b/skills/skill-task-continuity/tests/test_bootstrap_suite.py new file mode 100644 index 0000000..a84bb13 --- /dev/null +++ b/skills/skill-task-continuity/tests/test_bootstrap_suite.py @@ -0,0 +1,119 @@ +import subprocess +import tempfile +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +SCRIPT = ROOT / "scripts" / "bootstrap_suite.py" +LIBRARY_ROOT = ROOT.parents[1] +TEMPLATE_MAP = { + "AGENTS.md": "assets/AGENTS.repo-template.md", + ".agent-state/TASK_STATE.md": "assets/agent-state/TASK_STATE.template.md", + ".agent-state/HANDOFF.md": "assets/agent-state/HANDOFF.template.md", + ".agent-state/DECISIONS.md": "assets/agent-state/DECISIONS.template.md", + ".agent-state/RUN_LOG.md": "assets/agent-state/RUN_LOG.template.md", +} + + +class BootstrapSuiteTests(unittest.TestCase): + def run_bootstrap(self, target, *extra_args): + return subprocess.run( + ["python3", str(SCRIPT), "--target", str(target), *extra_args], + check=False, + text=True, + capture_output=True, + ) + + def test_bootstrap_copies_expected_files(self): + with tempfile.TemporaryDirectory() as tmpdir: + target = Path(tmpdir) / "demo-repo" + target.mkdir() + + result = self.run_bootstrap(target) + + self.assertEqual(result.returncode, 0, result.stderr) + for relative_path in TEMPLATE_MAP: + self.assertTrue( + (target / relative_path).exists(), + f"expected downstream file to exist: {relative_path}", + ) + + def test_bootstrap_never_mutates_public_library_root(self): + for relative_path in TEMPLATE_MAP: + self.assertFalse( + (LIBRARY_ROOT / relative_path).exists(), + f"library root must stay untouched: {relative_path}", + ) + + with tempfile.TemporaryDirectory() as tmpdir: + target = Path(tmpdir) / "consumer-repo" + target.mkdir() + + result = self.run_bootstrap(target) + + self.assertEqual(result.returncode, 0, result.stderr) + for relative_path in TEMPLATE_MAP: + self.assertFalse( + (LIBRARY_ROOT / relative_path).exists(), + f"bootstrap must not write into the library root: {relative_path}", + ) + + def test_bootstrap_preserves_existing_files_without_force(self): + with tempfile.TemporaryDirectory() as tmpdir: + target = Path(tmpdir) / "consumer-repo" + target.mkdir() + + existing = target / ".agent-state" / "RUN_LOG.md" + existing.parent.mkdir() + existing.write_text("keep me\n", encoding="utf-8") + + result = self.run_bootstrap(target) + + self.assertEqual(result.returncode, 0, result.stderr) + self.assertEqual(existing.read_text(encoding="utf-8"), "keep me\n") + + def test_bootstrap_force_overwrites_existing_files(self): + with tempfile.TemporaryDirectory() as tmpdir: + target = Path(tmpdir) / "consumer-repo" + target.mkdir() + + existing = target / ".agent-state" / "RUN_LOG.md" + existing.parent.mkdir() + existing.write_text("old value\n", encoding="utf-8") + + result = self.run_bootstrap(target, "--force") + + self.assertEqual(result.returncode, 0, result.stderr) + self.assertNotEqual(existing.read_text(encoding="utf-8"), "old value\n") + + def test_task_state_and_handoff_templates_match_atomic_assets(self): + suite_task_state = ROOT / "assets" / "agent-state" / "TASK_STATE.template.md" + atomic_task_state = ( + ROOT.parents[1] + / "skill-context-keeper" + / "assets" + / "TASK_STATE.template.md" + ) + suite_handoff = ROOT / "assets" / "agent-state" / "HANDOFF.template.md" + atomic_handoff = ( + ROOT.parents[1] + / "skill-handoff-summary" + / "assets" + / "HANDOFF.template.md" + ) + + self.assertTrue(suite_task_state.exists()) + self.assertTrue(suite_handoff.exists()) + self.assertEqual( + suite_task_state.read_text(encoding="utf-8"), + atomic_task_state.read_text(encoding="utf-8"), + ) + self.assertEqual( + suite_handoff.read_text(encoding="utf-8"), + atomic_handoff.read_text(encoding="utf-8"), + ) + + +if __name__ == "__main__": + unittest.main() From 9c5851422f1f4997bd049993113cef747f046389 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 14:34:22 +0800 Subject: [PATCH 16/31] feat: add skill-task-continuity package --- skills/skill-task-continuity/README.md | 74 +++++++++--- skills/skill-task-continuity/README.zh-CN.md | 74 +++++++++--- skills/skill-task-continuity/SKILL.md | 37 +++++- .../skill-task-continuity/agents/openai.yaml | 4 +- skills/skill-task-continuity/assets/.gitkeep | 1 - .../assets/AGENTS.repo-template.md | 61 ++++++++++ .../assets/agent-state/DECISIONS.template.md | 23 ++++ .../assets/agent-state/HANDOFF.template.md | 25 ++++ .../assets/agent-state/RUN_LOG.template.md | 24 ++++ .../assets/agent-state/TASK_STATE.template.md | 41 +++++++ .../references/composition-guide.md | 58 +++++++++ .../references/composition-guide.zh-CN.md | 58 +++++++++ .../references/install-playbook.md | 66 +++++++++++ .../references/install-playbook.zh-CN.md | 66 +++++++++++ skills/skill-task-continuity/scripts/.gitkeep | 1 - .../scripts/bootstrap_suite.py | 110 ++++++++++++++++++ .../tests/test_bootstrap_suite.py | 4 +- 17 files changed, 687 insertions(+), 40 deletions(-) delete mode 100644 skills/skill-task-continuity/assets/.gitkeep create mode 100644 skills/skill-task-continuity/assets/AGENTS.repo-template.md create mode 100644 skills/skill-task-continuity/assets/agent-state/DECISIONS.template.md create mode 100644 skills/skill-task-continuity/assets/agent-state/HANDOFF.template.md create mode 100644 skills/skill-task-continuity/assets/agent-state/RUN_LOG.template.md create mode 100644 skills/skill-task-continuity/assets/agent-state/TASK_STATE.template.md create mode 100644 skills/skill-task-continuity/references/composition-guide.md create mode 100644 skills/skill-task-continuity/references/composition-guide.zh-CN.md create mode 100644 skills/skill-task-continuity/references/install-playbook.md create mode 100644 skills/skill-task-continuity/references/install-playbook.zh-CN.md delete mode 100644 skills/skill-task-continuity/scripts/.gitkeep create mode 100644 skills/skill-task-continuity/scripts/bootstrap_suite.py diff --git a/skills/skill-task-continuity/README.md b/skills/skill-task-continuity/README.md index dde33b9..b8d6813 100644 --- a/skills/skill-task-continuity/README.md +++ b/skills/skill-task-continuity/README.md @@ -4,22 +4,23 @@ ## Overview -`skill-task-continuity` is the composition package for work that is explicitly about managing long-task continuity as a coordinated system. -It bootstraps or routes across context refresh, phase gating, and handoff writing while keeping the atomic package boundaries visible. +`skill-task-continuity` is the suite entry point for long-task continuity. +It explains how the continuity packages fit together and ships a bootstrap helper that copies downstream templates into a consumer repository. +It does not replace the atomic skills. -## Best For +## Package Role -- setting up the long-task continuity suite as a coherent package family -- coordinating which atomic continuity package should act first -- handling requests that truly span state refresh, staged execution, and handoff behavior -- keeping suite-level boundaries clear when prompts mention several continuity concerns at once +- explain the composition of `skill-context-keeper`, `skill-phase-gate`, and `skill-handoff-summary` +- bootstrap downstream files such as `AGENTS.md` and `.agent-state/*.md` +- route suite-shaped requests to the atomic package that owns the next action +- keep the package boundary explicit: templates are for downstream consumers only ## What It Is Not For - replacing `skill-context-keeper` for ordinary state refresh work -- replacing `skill-phase-gate` for a normal staged-plan decision +- replacing `skill-phase-gate` for a normal checkpoint - replacing `skill-handoff-summary` for a simple pause or transfer note -- stealing one-package tasks just because the prompt contains many keywords +- turning this public library checkout into a consumer repo ## Install @@ -30,13 +31,58 @@ You can ask Codex in natural language: - `Use skill-installer to install skill-task-continuity from Golden-Promise/codex-skill-library at skills/skill-task-continuity.` - `Use skill-installer to install skill-task-continuity from Golden-Promise/codex-skill-library at skills/skill-task-continuity using the release or ref I specify.` -## How To Use +## Bootstrap A Downstream Repo -Start here only when the task itself is about suite bootstrap or multi-package continuity coordination. -Describe which continuity problems are in play, which atomic packages are expected to cooperate, and what boundaries must stay narrow, then let the package route or scaffold the suite-level workflow. +Preview the downstream file operations first: + +```bash +python3 skills/skill-task-continuity/scripts/bootstrap_suite.py --target /path/to/downstream-repo --dry-run +``` + +Then copy the templates for real: + +```bash +python3 skills/skill-task-continuity/scripts/bootstrap_suite.py --target /path/to/downstream-repo +``` + +Use `--force` only when you intentionally want to overwrite an existing downstream file. +The script requires an explicit `--target` and refuses to bootstrap inside this public skill library checkout. + +## Recommended Downstream Layout + +```text +AGENTS.md +.agent-state/ + TASK_STATE.md + HANDOFF.md + DECISIONS.md + RUN_LOG.md +``` + +`TASK_STATE.md` and `HANDOFF.md` are duplicated copies of the atomic package templates for downstream convenience. +The atomic packages remain the source of truth for their behavior and wording. + +## How The Suite Composes + +Use the atomic skills directly in downstream work: + +- `Use $skill-context-keeper to refresh .agent-state/TASK_STATE.md before more implementation work.` +- `Use $skill-phase-gate for a preflight before this risky multi-file change.` +- `Use $skill-handoff-summary to write .agent-state/HANDOFF.md before we pause.` + +The recommended long-task loop is: + +1. Read the state files before resuming. +2. Gate meaningful changes when the checkpoint adds value. +3. Refresh task state after meaningful work. +4. Write a handoff when pausing or transferring ownership. + +Repo-local `.agents/skills/` wrappers or examples are optional. +If you add them, keep them thin and point back to the atomic skills instead of replacing them. ## References - `SKILL.md` for trigger routing and package boundaries -- [references/README.md](references/README.md) for the package boundary and the planned reader-facing reference scope -- `assets/` for future suite bootstrap and coordination templates +- [references/composition-guide.md](references/composition-guide.md) for suite composition and explicit invocation wording +- [references/install-playbook.md](references/install-playbook.md) for a downstream bootstrap walkthrough +- `assets/` for downstream-only templates copied by the bootstrap helper diff --git a/skills/skill-task-continuity/README.zh-CN.md b/skills/skill-task-continuity/README.zh-CN.md index b46ddf9..b96504c 100644 --- a/skills/skill-task-continuity/README.zh-CN.md +++ b/skills/skill-task-continuity/README.zh-CN.md @@ -4,22 +4,23 @@ ## Overview -`skill-task-continuity` 是一个组合包,面向那些“本身就在处理长任务连续性体系”的工作。 -它负责在上下文刷新、阶段门控和交接说明之间做套件级协调,同时保持原子包边界清晰可见。 +`skill-task-continuity` 是长任务连续性套件的入口包。 +它负责解释各个连续性包如何组合,并提供一个启动脚本,把下游仓库需要的模板复制进去。 +它不会取代原子技能包。 -## Best For +## 包职责 -- 把长任务连续性套件作为一个完整包族进行搭建 -- 协调应该先触发哪个原子连续性包 -- 处理那些确实同时涉及状态刷新、分阶段执行和交接行为的请求 -- 当提示同时提到多个连续性问题时,仍然保持套件边界清楚 +- 解释 `skill-context-keeper`、`skill-phase-gate`、`skill-handoff-summary` 三个原子包如何组合 +- 为下游仓库启动 `AGENTS.md` 与 `.agent-state/*.md` 等文件 +- 在套件级请求中,把工作路由到真正拥有下一步动作的原子包 +- 明确包边界:模板只服务于下游消费者仓库 ## What It Is Not For - 用来替代 `skill-context-keeper` 的普通状态刷新 -- 用来替代 `skill-phase-gate` 的常规分阶段决策 +- 用来替代 `skill-phase-gate` 的常规检查点 - 用来替代 `skill-handoff-summary` 的简单暂停或转交说明 -- 仅仅因为提示里关键词很多,就抢走本该由单一包处理的任务 +- 把这个公共技能库工作区改造成消费者仓库 ## Install @@ -30,13 +31,58 @@ - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-task-continuity 安装 skill-task-continuity。` - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-task-continuity 安装 skill-task-continuity,并使用我指定的 release 或 ref。` -## How To Use +## 启动下游仓库 -只有当任务本身是在做套件启动或多包连续性协调时,才从这里开始。 -说明当前涉及哪些连续性问题、预期哪些原子包协同工作、以及哪些边界必须保持收敛,然后让这个包去路由或搭建套件级流程。 +先用 dry run 预览即将写入的文件: + +```bash +python3 skills/skill-task-continuity/scripts/bootstrap_suite.py --target /path/to/downstream-repo --dry-run +``` + +确认无误后再真正复制模板: + +```bash +python3 skills/skill-task-continuity/scripts/bootstrap_suite.py --target /path/to/downstream-repo +``` + +只有在你明确要覆盖已有下游文件时,才使用 `--force`。 +脚本要求显式传入 `--target`,并且会拒绝在这个公共技能库工作区内部执行启动。 + +## 推荐的下游布局 + +```text +AGENTS.md +.agent-state/ + TASK_STATE.md + HANDOFF.md + DECISIONS.md + RUN_LOG.md +``` + +`TASK_STATE.md` 与 `HANDOFF.md` 是从原子包复制过来的重复模板,目的是方便下游仓库一次性完成启动。 +它们的真实来源和行为边界仍然属于对应的原子包。 + +## 套件如何组合 + +在下游工作中,仍然直接调用原子技能: + +- `Use $skill-context-keeper to refresh .agent-state/TASK_STATE.md before more implementation work.` +- `Use $skill-phase-gate for a preflight before this risky multi-file change.` +- `Use $skill-handoff-summary to write .agent-state/HANDOFF.md before we pause.` + +推荐的长任务循环如下: + +1. 恢复工作前先读取状态文件。 +2. 当检查点本身有价值时,为重要改动加上 gate。 +3. 完成有意义的工作后刷新任务状态。 +4. 暂停或转交时写出 handoff。 + +仓库本地的 `.agents/skills/` 包装或示例是可选的。 +如果使用,建议保持轻量,并继续回指原子技能,而不是自行替代它们。 ## References - `SKILL.md`:触发路由与包边界 -- [references/README.zh-CN.md](references/README.zh-CN.md):包边界说明,以及后续面向读者参考资料的范围 -- `assets/`:后续的套件启动与协调模板 +- [references/composition-guide.zh-CN.md](references/composition-guide.zh-CN.md):套件组合方式与原子技能调用措辞 +- [references/install-playbook.zh-CN.md](references/install-playbook.zh-CN.md):下游仓库启动流程 +- `assets/`:仅供下游消费者复制使用的模板 diff --git a/skills/skill-task-continuity/SKILL.md b/skills/skill-task-continuity/SKILL.md index bd296ab..5077463 100644 --- a/skills/skill-task-continuity/SKILL.md +++ b/skills/skill-task-continuity/SKILL.md @@ -8,17 +8,42 @@ description: Use when the user is explicitly bootstrapping or coordinating the l ## Overview Coordinate the long-task continuity suite when the task is about the continuity workflow itself rather than a single atomic continuity action. -Use this package to bootstrap or orchestrate the suite while keeping the atomic package boundaries clear. +Use this package as the suite entry point and downstream bootstrap helper while keeping the atomic package boundaries clear. ## Use This Skill When -- bootstrapping the long-task continuity suite in a project or workflow -- coordinating context refresh, phase gates, and handoff behavior together +- bootstrapping the long-task continuity suite in a downstream repository +- explaining how context refresh, phase gates, and handoff behavior fit together - deciding which atomic package should trigger first in a suite-shaped request - protecting package boundaries when a prompt mentions multiple continuity concerns +## Do Not Use This Skill When + +- the task only needs `skill-context-keeper` to refresh state +- the task only needs `skill-phase-gate` for a meaningful checkpoint +- the task only needs `skill-handoff-summary` for a pause or transfer note +- the user is asking this package to replace the atomic skills instead of composing them + +## Composition Boundary + +This package explains the suite and bootstraps downstream templates. +It does not replace the three atomic skills: + +- `skill-context-keeper` owns ongoing task state such as `.agent-state/TASK_STATE.md` +- `skill-phase-gate` owns meaningful preflight and postflight checkpoints +- `skill-handoff-summary` owns `.agent-state/HANDOFF.md` and continuation-oriented handoffs + +Use `skill-task-continuity` to route into the right atomic package or to install the downstream starter files that make the suite easy to adopt. + +## Downstream Templates + +The templates in `assets/` are for downstream consumer repositories only. +They are duplicated here so a consumer repo can bootstrap the suite from one package. +Do not use this package to mutate the public library root into a consumer repo. + ## References -- `README.md` and `README.zh-CN.md`: package overview and usage guidance -- `references/`: future public examples for suite bootstrap and orchestration prompts -- `assets/`: future suite bootstrap templates and continuity coordination assets +- `README.md` and `README.zh-CN.md`: package overview, bootstrap command, and suite entry guidance +- `references/composition-guide.md`: how the suite pieces fit together and when to invoke each atomic skill +- `references/install-playbook.md`: downstream bootstrap walkthrough and recommended layout +- `assets/`: downstream-only templates copied by `scripts/bootstrap_suite.py` diff --git a/skills/skill-task-continuity/agents/openai.yaml b/skills/skill-task-continuity/agents/openai.yaml index d8391a5..9312425 100644 --- a/skills/skill-task-continuity/agents/openai.yaml +++ b/skills/skill-task-continuity/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "Skill Task Continuity" - short_description: "Coordinate the long-task continuity suite across state, phase, and handoff boundaries" - default_prompt: "Use $skill-task-continuity when the task is explicitly about bootstrapping or coordinating the long-task continuity suite: route across context refresh, phase gates, and handoff concerns while keeping atomic package boundaries clear." + short_description: "Explain and bootstrap the long-task continuity suite without replacing the atomic skills" + default_prompt: "Use $skill-task-continuity when the task is explicitly about the long-task continuity suite itself: explain how `skill-context-keeper`, `skill-phase-gate`, and `skill-handoff-summary` compose, bootstrap downstream templates such as AGENTS.md and .agent-state/*.md when requested, and keep the templates scoped to downstream consumer repos rather than the public skill library root." diff --git a/skills/skill-task-continuity/assets/.gitkeep b/skills/skill-task-continuity/assets/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-task-continuity/assets/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-task-continuity/assets/AGENTS.repo-template.md b/skills/skill-task-continuity/assets/AGENTS.repo-template.md new file mode 100644 index 0000000..3c5722d --- /dev/null +++ b/skills/skill-task-continuity/assets/AGENTS.repo-template.md @@ -0,0 +1,61 @@ +# Agent Workflow + +This repository uses the long-task continuity suite as a set of narrow, composable skills. +Invoke the atomic skills directly when you need continuity support; this `AGENTS.md` file explains when each one should take over. + +## Atomic Skills + +### Refresh task state with `skill-context-keeper` + +Use wording like: + +- `Use $skill-context-keeper to refresh .agent-state/TASK_STATE.md before more edits.` +- `Rebuild the current task state, separate facts from assumptions, and update .agent-state/TASK_STATE.md.` + +Reach for this when the thread is stale, the repo has changed since the last turn, or the next agent needs a current snapshot before acting. + +### Gate meaningful changes with `skill-phase-gate` + +Use wording like: + +- `Use $skill-phase-gate for a preflight before this multi-file refactor.` +- `Run a postflight gate for the files I changed and record remaining risks.` + +Use it only when the checkpoint itself adds value, such as before or after a risky edit, refactor, migration, or pre-commit review. + +### Generate a handoff with `skill-handoff-summary` + +Use wording like: + +- `Use $skill-handoff-summary to write .agent-state/HANDOFF.md before we pause.` +- `Create a continuation-oriented handoff with the exact next action and a resume prompt.` + +Use it when work is pausing, changing owners, or needs a trusted restart note for the next session. + +## Recommended Long-Task Loop + +1. Read `.agent-state/TASK_STATE.md`, `.agent-state/HANDOFF.md`, and any recent notes in `.agent-state/` before resuming work. +2. If the next change is meaningful or risky, run `skill-phase-gate` first so scope, non-goals, and validation stay explicit. +3. Execute the work, then refresh `.agent-state/TASK_STATE.md` with `skill-context-keeper` so the next turn inherits verified state. +4. Record durable choices in `.agent-state/DECISIONS.md` and append session-level evidence to `.agent-state/RUN_LOG.md` when it helps future resumes. +5. If work stops or ownership changes, use `skill-handoff-summary` to refresh `.agent-state/HANDOFF.md` with status, blockers, and the exact next action. + +## Recommended Downstream Layout + +```text +AGENTS.md +.agent-state/ + TASK_STATE.md + HANDOFF.md + DECISIONS.md + RUN_LOG.md +``` + +These files are downstream consumer artifacts. +They are not meant to modify the public skill library itself. + +## Optional Repo-Local Wrappers + +You may add repo-local helper prompts or examples under `.agents/skills/` if they make invocation easier for your team. +That pattern is optional, not required. +Keep those wrappers thin: they should point back to the atomic skills above instead of replacing them. diff --git a/skills/skill-task-continuity/assets/agent-state/DECISIONS.template.md b/skills/skill-task-continuity/assets/agent-state/DECISIONS.template.md new file mode 100644 index 0000000..eb85714 --- /dev/null +++ b/skills/skill-task-continuity/assets/agent-state/DECISIONS.template.md @@ -0,0 +1,23 @@ +# Decisions Log + +Record durable decisions that future sessions should preserve. +Add a new entry for each meaningful choice rather than rewriting history. + +## Entry Template + +### Decision +- Date: +- Owner: +- Status: + +### Context +- What problem or tradeoff required a decision? + +### Decision +- What was chosen? + +### Why +- Why was this option selected over the alternatives? + +### Follow-Up +- What should the next session verify, document, or revisit? diff --git a/skills/skill-task-continuity/assets/agent-state/HANDOFF.template.md b/skills/skill-task-continuity/assets/agent-state/HANDOFF.template.md new file mode 100644 index 0000000..062bcf5 --- /dev/null +++ b/skills/skill-task-continuity/assets/agent-state/HANDOFF.template.md @@ -0,0 +1,25 @@ +# Handoff Summary + +## Task Summary +One paragraph on the active task and why this handoff exists. + +## Current Status +State what is done, what is in progress, and what remains. + +## What Changed In This Session +List the meaningful edits or verified findings from this session only. + +## Hard Constraints To Preserve +Call out non-negotiable requirements, boundaries, or decisions the next session must keep. + +## Files / Modules Of Interest +Name the files, modules, or artifacts the next session should open first. + +## Open Problems +List blockers, unresolved questions, or risks that still need work. + +## Exact Next Action +Write the single next action that should happen first when work resumes. + +## Resume Prompt +Resume this task from .agent-state/HANDOFF.md. Continue from the recorded status, preserve the listed constraints, inspect the files of interest, resolve the open problems in priority order, perform the exact next action first, and update the handoff if anything material changes. diff --git a/skills/skill-task-continuity/assets/agent-state/RUN_LOG.template.md b/skills/skill-task-continuity/assets/agent-state/RUN_LOG.template.md new file mode 100644 index 0000000..3e1f21a --- /dev/null +++ b/skills/skill-task-continuity/assets/agent-state/RUN_LOG.template.md @@ -0,0 +1,24 @@ +# Run Log + +Append one entry per meaningful work session. +Capture commands, checks, and notable observations that would save the next session time. + +## Entry Template + +### Session +- Date: +- Owner: +- Goal: + +### Commands / Checks +- List the commands you ran. +- Note pass, fail, or skipped outcomes. + +### Files Touched +- List the files changed or inspected. + +### Notes +- Record noteworthy findings, blockers, or follow-up ideas. + +### Next Session +- State the next action if work resumes from this log entry. diff --git a/skills/skill-task-continuity/assets/agent-state/TASK_STATE.template.md b/skills/skill-task-continuity/assets/agent-state/TASK_STATE.template.md new file mode 100644 index 0000000..0362343 --- /dev/null +++ b/skills/skill-task-continuity/assets/agent-state/TASK_STATE.template.md @@ -0,0 +1,41 @@ +# Task State + +## Current Objective +- State the active task in one sentence. +- Mark assumptions as assumptions, not facts. + +## Scope / Non-Goals +- List what this refresh covers. +- Call out work that belongs to phase gates or final handoffs elsewhere. + +## Hard Constraints +- Capture fixed requirements, approvals, and safety limits. +- Separate must-follow constraints from preferences. + +## Current Codebase Facts +- Record verified repository facts only. +- Note the source if a fact came from tests, files, or commands. + +## Completed Work +- Summarize what is already done and still valid. +- Keep each item specific enough to avoid rework. + +## Open Issues / Risks +- List blockers, uncertainties, and fragile areas. +- Label each item as a fact, assumption, or risk. + +## Next Recommended Action +- Name the next concrete step for the downstream agent. +- Keep it narrow and immediately actionable. + +## Verification Still Needed +- List tests, checks, or reviews that have not run yet. +- Distinguish missing verification from failed verification. + +## Recent Decisions +- Capture decisions with a short reason. +- Separate decisions from open options. + +## Resume Checklist +- Give a short restart checklist in execution order. +- End with the first command, file, or prompt to use next. diff --git a/skills/skill-task-continuity/references/composition-guide.md b/skills/skill-task-continuity/references/composition-guide.md new file mode 100644 index 0000000..6a8e611 --- /dev/null +++ b/skills/skill-task-continuity/references/composition-guide.md @@ -0,0 +1,58 @@ +# Composition Guide + +`skill-task-continuity` is the suite entry point for downstream teams that want the long-task continuity workflow to feel coherent without collapsing the atomic package boundaries. +Use it to explain how the pieces fit together and to bootstrap downstream files, not to replace the atomic skills. + +## How The Suite Fits Together + +- `skill-context-keeper` refreshes current task state and keeps facts, assumptions, and decisions separated. +- `skill-phase-gate` adds meaningful preflight or postflight checkpoints around substantial edits. +- `skill-handoff-summary` writes the continuation-oriented handoff when work pauses or changes owners. + +The suite package exists so a downstream repository can install one entry point, learn the system from one place, and copy starter files into its own repo. + +## Explicit Invocation Wording + +Use the atomic skills with direct prompts like these: + +- `Use $skill-context-keeper to refresh .agent-state/TASK_STATE.md before more implementation work.` +- `Use $skill-phase-gate for a preflight before this risky multi-file change.` +- `Use $skill-phase-gate for a postflight gate now that the refactor is done.` +- `Use $skill-handoff-summary to write .agent-state/HANDOFF.md before we pause.` + +If the request is about the suite itself, start with `skill-task-continuity` and then route into the atomic skill that owns the next action. + +## Recommended Downstream Layout + +```text +AGENTS.md +.agent-state/ + TASK_STATE.md + HANDOFF.md + DECISIONS.md + RUN_LOG.md +``` + +This layout is intentionally small. +The suite ships templates for these files so the downstream repo can standardize how agents resume work. + +## Recommended Long-Task Loop + +1. Read `.agent-state/TASK_STATE.md` and `.agent-state/HANDOFF.md` before taking over the task. +2. If the next edit is substantial or risky, run a `skill-phase-gate` preflight. +3. Implement the change. +4. Refresh `.agent-state/TASK_STATE.md` with `skill-context-keeper`. +5. Update `.agent-state/DECISIONS.md` or `.agent-state/RUN_LOG.md` if you made a durable choice or ran checks worth preserving. +6. If work stops, refresh `.agent-state/HANDOFF.md` with `skill-handoff-summary`. + +## Optional Repo-Local Wrapper Pattern + +Some teams like to add repo-local helper prompts or examples under `.agents/skills/`. +That can be useful, but it is optional. +Keep wrappers thin and explicit, for example: + +- `.agents/skills/refresh-task-state.md` that says to invoke `skill-context-keeper` for `.agent-state/TASK_STATE.md` +- `.agents/skills/preflight-risky-change.md` that says to invoke `skill-phase-gate` +- `.agents/skills/write-handoff.md` that says to invoke `skill-handoff-summary` + +Those wrappers should help local adoption without forking the public skill behavior. diff --git a/skills/skill-task-continuity/references/composition-guide.zh-CN.md b/skills/skill-task-continuity/references/composition-guide.zh-CN.md new file mode 100644 index 0000000..ead2b8b --- /dev/null +++ b/skills/skill-task-continuity/references/composition-guide.zh-CN.md @@ -0,0 +1,58 @@ +# 组合指南 + +`skill-task-continuity` 是长任务连续性套件的入口包,方便下游仓库用一个清晰的入口理解整套工作流,同时不打破原子技能包的边界。 +它负责解释这些包如何组合,以及如何为下游仓库引导初始化文件,而不是取代原子技能。 + +## 套件如何组合 + +- `skill-context-keeper` 负责刷新当前任务状态,并清楚区分事实、假设和决策。 +- `skill-phase-gate` 负责在较大或较高风险的改动前后增加紧凑的检查点。 +- `skill-handoff-summary` 负责在暂停或交接时写出面向续做的交接摘要。 + +这个组合包的价值在于:下游仓库可以从一个入口安装、从一个入口学习套件结构,并把启动模板复制到自己的仓库里。 + +## 每个原子技能的明确调用措辞 + +可以直接使用下面这类提示词: + +- `Use $skill-context-keeper to refresh .agent-state/TASK_STATE.md before more implementation work.` +- `Use $skill-phase-gate for a preflight before this risky multi-file change.` +- `Use $skill-phase-gate for a postflight gate now that the refactor is done.` +- `Use $skill-handoff-summary to write .agent-state/HANDOFF.md before we pause.` + +如果需求本身就是在搭建或协调整套连续性流程,则先从 `skill-task-continuity` 开始,再路由到真正拥有下一步动作的原子技能。 + +## 推荐的下游布局 + +```text +AGENTS.md +.agent-state/ + TASK_STATE.md + HANDOFF.md + DECISIONS.md + RUN_LOG.md +``` + +这个布局刻意保持精简。 +套件会提供这些文件的模板,帮助下游仓库标准化“如何恢复工作”的约定。 + +## 推荐的长任务循环 + +1. 接手任务前先阅读 `.agent-state/TASK_STATE.md` 和 `.agent-state/HANDOFF.md`。 +2. 如果下一步改动较大或风险较高,先运行一次 `skill-phase-gate` 预检。 +3. 执行实现工作。 +4. 用 `skill-context-keeper` 刷新 `.agent-state/TASK_STATE.md`。 +5. 如果本轮产生了需要长期保留的决策或验证记录,则补充 `.agent-state/DECISIONS.md` 或 `.agent-state/RUN_LOG.md`。 +6. 如果工作要暂停,则用 `skill-handoff-summary` 刷新 `.agent-state/HANDOFF.md`。 + +## 可选的仓库本地包装模式 + +有些团队会在 `.agents/skills/` 下放一些本地辅助提示或示例。 +这是一种可选模式,不是强制要求。 +建议保持这些包装非常薄,例如: + +- `.agents/skills/refresh-task-state.md`:提示调用 `skill-context-keeper` 来刷新 `.agent-state/TASK_STATE.md` +- `.agents/skills/preflight-risky-change.md`:提示调用 `skill-phase-gate` +- `.agents/skills/write-handoff.md`:提示调用 `skill-handoff-summary` + +这些本地包装的目标是帮助仓库内部采用套件,而不是分叉或替代公共技能包的行为。 diff --git a/skills/skill-task-continuity/references/install-playbook.md b/skills/skill-task-continuity/references/install-playbook.md new file mode 100644 index 0000000..80a486d --- /dev/null +++ b/skills/skill-task-continuity/references/install-playbook.md @@ -0,0 +1,66 @@ +# Install Playbook + +Use this playbook when a downstream repository wants the full long-task continuity suite starter kit. +The suite package only installs downstream-facing templates and docs. +It must not be pointed back at the public skill library root. + +## What Gets Bootstrapped + +The bootstrap helper copies these templates into the downstream repo: + +- `AGENTS.md` +- `.agent-state/TASK_STATE.md` +- `.agent-state/HANDOFF.md` +- `.agent-state/DECISIONS.md` +- `.agent-state/RUN_LOG.md` + +`TASK_STATE.md` and `HANDOFF.md` are duplicated from the atomic packages so consumers can bootstrap from one package while the atomic packages remain the source of truth. + +## Bootstrap Walkthrough + +1. Install or vendor this package into your tooling environment. +2. Pick the downstream repository root you want to prepare. +3. Preview the file operations: + +```bash +python3 skills/skill-task-continuity/scripts/bootstrap_suite.py --target /path/to/downstream-repo --dry-run +``` + +4. Run the real bootstrap when the preview looks correct: + +```bash +python3 skills/skill-task-continuity/scripts/bootstrap_suite.py --target /path/to/downstream-repo +``` + +5. Re-run with `--force` only when you intentionally want to overwrite an existing downstream file. + +## Expected Downstream Layout + +```text +AGENTS.md +.agent-state/ + TASK_STATE.md + HANDOFF.md + DECISIONS.md + RUN_LOG.md +``` + +The helper creates missing parent directories automatically. +Without `--force`, existing files are preserved. + +## Recommended First Use + +After bootstrapping: + +1. Read `AGENTS.md` in the downstream repo. +2. Adjust repo-local wording only if your team needs thin wrappers or examples. +3. Start invoking the atomic skills directly for real work: + - `skill-context-keeper` for state refresh + - `skill-phase-gate` for meaningful checkpoints + - `skill-handoff-summary` for pauses and transfers + +## Optional Repo-Local Wrappers + +If the downstream repo wants helper prompts, add them under `.agents/skills/`. +Keep them optional and lightweight. +They should point back to the atomic skills rather than copying their public docs into repo-specific forks. diff --git a/skills/skill-task-continuity/references/install-playbook.zh-CN.md b/skills/skill-task-continuity/references/install-playbook.zh-CN.md new file mode 100644 index 0000000..fe5bd3c --- /dev/null +++ b/skills/skill-task-continuity/references/install-playbook.zh-CN.md @@ -0,0 +1,66 @@ +# 安装操作手册 + +当下游仓库希望一次性引入整套长任务连续性启动材料时,可以使用这份手册。 +这个组合包只负责安装面向下游消费者的模板和说明文档。 +不要把它指回公共技能库根目录运行。 + +## 会引导生成哪些文件 + +启动脚本会把以下模板复制到下游仓库: + +- `AGENTS.md` +- `.agent-state/TASK_STATE.md` +- `.agent-state/HANDOFF.md` +- `.agent-state/DECISIONS.md` +- `.agent-state/RUN_LOG.md` + +其中 `TASK_STATE.md` 与 `HANDOFF.md` 是从原子包复制过来的重复模板,这样下游仓库可以从一个包完成初始化,而原子包仍然是这些模板的真实来源。 + +## 启动流程 + +1. 先在你的工具环境中安装或引入这个组合包。 +2. 选定需要初始化的下游仓库根目录。 +3. 先用 dry run 预览文件操作: + +```bash +python3 skills/skill-task-continuity/scripts/bootstrap_suite.py --target /path/to/downstream-repo --dry-run +``` + +4. 确认预览结果正确后,再执行真正的启动: + +```bash +python3 skills/skill-task-continuity/scripts/bootstrap_suite.py --target /path/to/downstream-repo +``` + +5. 只有在你明确要覆盖已有下游文件时,才加上 `--force`。 + +## 期望的下游布局 + +```text +AGENTS.md +.agent-state/ + TASK_STATE.md + HANDOFF.md + DECISIONS.md + RUN_LOG.md +``` + +脚本会自动创建缺失的父目录。 +如果不加 `--force`,已有文件会被保留。 + +## 推荐的首次使用方式 + +完成启动后: + +1. 先阅读下游仓库中的 `AGENTS.md`。 +2. 只有当团队确实需要本地辅助提示或示例时,再做轻量调整。 +3. 真正执行工作时,仍然直接调用原子技能: + - `skill-context-keeper`:刷新任务状态 + - `skill-phase-gate`:执行有意义的检查点 + - `skill-handoff-summary`:写暂停或交接摘要 + +## 可选的仓库本地包装 + +如果下游仓库需要本地辅助提示,可以放在 `.agents/skills/` 下。 +这是一种可选模式,建议保持轻量。 +这些文件应该回指原子技能,而不是在仓库里复制出一套分叉的公共文档。 diff --git a/skills/skill-task-continuity/scripts/.gitkeep b/skills/skill-task-continuity/scripts/.gitkeep deleted file mode 100644 index 8b13789..0000000 --- a/skills/skill-task-continuity/scripts/.gitkeep +++ /dev/null @@ -1 +0,0 @@ - diff --git a/skills/skill-task-continuity/scripts/bootstrap_suite.py b/skills/skill-task-continuity/scripts/bootstrap_suite.py new file mode 100644 index 0000000..a4d41e4 --- /dev/null +++ b/skills/skill-task-continuity/scripts/bootstrap_suite.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +import argparse +import shutil +import sys +from pathlib import Path + + +PACKAGE_ROOT = Path(__file__).resolve().parents[1] +LIBRARY_ROOT = PACKAGE_ROOT.parents[1] +TEMPLATE_MAP = { + "AGENTS.md": "assets/AGENTS.repo-template.md", + ".agent-state/TASK_STATE.md": "assets/agent-state/TASK_STATE.template.md", + ".agent-state/HANDOFF.md": "assets/agent-state/HANDOFF.template.md", + ".agent-state/DECISIONS.md": "assets/agent-state/DECISIONS.template.md", + ".agent-state/RUN_LOG.md": "assets/agent-state/RUN_LOG.template.md", +} + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Copy long-task continuity starter files into a downstream repository." + ) + parser.add_argument( + "--target", + required=True, + help="Path to the downstream repository root that should receive the templates.", + ) + parser.add_argument( + "--force", + action="store_true", + help="Overwrite existing downstream files instead of preserving them.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Print the operations without writing files.", + ) + return parser.parse_args() + + +def is_within(path, root): + try: + path.relative_to(root) + return True + except ValueError: + return False + + +def validate_target(target): + if is_within(target, LIBRARY_ROOT): + raise ValueError( + "Refusing to bootstrap inside the public skill library. " + "Choose a downstream repository outside this checkout." + ) + + +def iter_operations(target, force): + for destination_rel, source_rel in TEMPLATE_MAP.items(): + source = PACKAGE_ROOT / source_rel + destination = target / destination_rel + action = "overwrite" if destination.exists() and force else "skip" if destination.exists() else "create" + yield action, source, destination + + +def apply_operations(target, force, dry_run): + validate_target(target) + created = 0 + overwritten = 0 + skipped = 0 + + for action, source, destination in iter_operations(target, force): + if action == "skip": + skipped += 1 + print(f"[skip] {destination}") + continue + + if action == "create": + created += 1 + else: + overwritten += 1 + + verb = "would copy" if dry_run else "copy" + print(f"[{action}] {verb} {source} -> {destination}") + if dry_run: + continue + + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copyfile(source, destination) + + print( + f"Summary: {created} create, {overwritten} overwrite, {skipped} skip" + + (" (dry-run)" if dry_run else "") + ) + + +def main(): + args = parse_args() + target = Path(args.target).expanduser().resolve() + + try: + apply_operations(target=target, force=args.force, dry_run=args.dry_run) + except ValueError as error: + print(f"error: {error}", file=sys.stderr) + return 2 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/skill-task-continuity/tests/test_bootstrap_suite.py b/skills/skill-task-continuity/tests/test_bootstrap_suite.py index a84bb13..e6254a2 100644 --- a/skills/skill-task-continuity/tests/test_bootstrap_suite.py +++ b/skills/skill-task-continuity/tests/test_bootstrap_suite.py @@ -90,14 +90,14 @@ def test_bootstrap_force_overwrites_existing_files(self): def test_task_state_and_handoff_templates_match_atomic_assets(self): suite_task_state = ROOT / "assets" / "agent-state" / "TASK_STATE.template.md" atomic_task_state = ( - ROOT.parents[1] + ROOT.parents[0] / "skill-context-keeper" / "assets" / "TASK_STATE.template.md" ) suite_handoff = ROOT / "assets" / "agent-state" / "HANDOFF.template.md" atomic_handoff = ( - ROOT.parents[1] + ROOT.parents[0] / "skill-handoff-summary" / "assets" / "HANDOFF.template.md" From 7e5ad0839089b31a420de0962695394b9f5234a8 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 15:59:26 +0800 Subject: [PATCH 17/31] test: cover downstream installed bootstrap layout --- .../tests/test_bootstrap_suite.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/skills/skill-task-continuity/tests/test_bootstrap_suite.py b/skills/skill-task-continuity/tests/test_bootstrap_suite.py index e6254a2..1ea338d 100644 --- a/skills/skill-task-continuity/tests/test_bootstrap_suite.py +++ b/skills/skill-task-continuity/tests/test_bootstrap_suite.py @@ -1,3 +1,4 @@ +import shutil import subprocess import tempfile import unittest @@ -25,6 +26,23 @@ def run_bootstrap(self, target, *extra_args): capture_output=True, ) + def test_bootstrap_allows_downstream_repo_with_installed_package_layout(self): + with tempfile.TemporaryDirectory() as tmpdir: + downstream_root = Path(tmpdir) / "consumer-repo" + installed_package = downstream_root / "skills" / "skill-task-continuity" + shutil.copytree(ROOT, installed_package) + + installed_script = installed_package / "scripts" / "bootstrap_suite.py" + result = subprocess.run( + ["python3", str(installed_script), "--target", str(downstream_root)], + check=False, + text=True, + capture_output=True, + ) + + self.assertEqual(result.returncode, 0, result.stderr) + self.assertTrue((downstream_root / "AGENTS.md").exists()) + def test_bootstrap_copies_expected_files(self): with tempfile.TemporaryDirectory() as tmpdir: target = Path(tmpdir) / "demo-repo" From 746767e349a0bfd2498a8e22c3aa95d3b59ef191 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:00:21 +0800 Subject: [PATCH 18/31] fix: narrow skill-task-continuity bootstrap guard --- .../scripts/bootstrap_suite.py | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/skills/skill-task-continuity/scripts/bootstrap_suite.py b/skills/skill-task-continuity/scripts/bootstrap_suite.py index a4d41e4..452f7cb 100644 --- a/skills/skill-task-continuity/scripts/bootstrap_suite.py +++ b/skills/skill-task-continuity/scripts/bootstrap_suite.py @@ -2,12 +2,14 @@ import argparse import shutil +import subprocess import sys from pathlib import Path PACKAGE_ROOT = Path(__file__).resolve().parents[1] LIBRARY_ROOT = PACKAGE_ROOT.parents[1] +PUBLIC_REPO_SLUG = "Golden-Promise/codex-skill-library" TEMPLATE_MAP = { "AGENTS.md": "assets/AGENTS.repo-template.md", ".agent-state/TASK_STATE.md": "assets/agent-state/TASK_STATE.template.md", @@ -38,17 +40,36 @@ def parse_args(): ) return parser.parse_args() +def looks_like_public_library_checkout(root): + if root != LIBRARY_ROOT: + return False + + readme_path = root / "README.md" + if not readme_path.exists(): + return False + + readme_text = readme_path.read_text(encoding="utf-8") + if not readme_text.startswith("# codex-skill-library"): + return False -def is_within(path, root): try: - path.relative_to(root) - return True - except ValueError: + result = subprocess.run( + ["git", "-C", str(root), "config", "--get", "remote.origin.url"], + check=False, + text=True, + capture_output=True, + ) + except OSError: + return False + + if result.returncode != 0: return False + return PUBLIC_REPO_SLUG in result.stdout.strip() + def validate_target(target): - if is_within(target, LIBRARY_ROOT): + if looks_like_public_library_checkout(target): raise ValueError( "Refusing to bootstrap inside the public skill library. " "Choose a downstream repository outside this checkout." From 5c2908647e8e68a2c008123b9e654f7235b6136a Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:02:32 +0800 Subject: [PATCH 19/31] test: cover copied public library bootstrap guard --- .../tests/test_bootstrap_suite.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/skills/skill-task-continuity/tests/test_bootstrap_suite.py b/skills/skill-task-continuity/tests/test_bootstrap_suite.py index 1ea338d..4e85f25 100644 --- a/skills/skill-task-continuity/tests/test_bootstrap_suite.py +++ b/skills/skill-task-continuity/tests/test_bootstrap_suite.py @@ -8,6 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] SCRIPT = ROOT / "scripts" / "bootstrap_suite.py" LIBRARY_ROOT = ROOT.parents[1] +REPO_ROOT = ROOT.parents[2] TEMPLATE_MAP = { "AGENTS.md": "assets/AGENTS.repo-template.md", ".agent-state/TASK_STATE.md": "assets/agent-state/TASK_STATE.template.md", @@ -43,6 +44,47 @@ def test_bootstrap_allows_downstream_repo_with_installed_package_layout(self): self.assertEqual(result.returncode, 0, result.stderr) self.assertTrue((downstream_root / "AGENTS.md").exists()) + def test_bootstrap_rejects_copied_public_library_style_tree_without_git_metadata(self): + with tempfile.TemporaryDirectory() as tmpdir: + copied_root = Path(tmpdir) / "codex-skill-library-copy" + installed_package = copied_root / "skills" / "skill-task-continuity" + shutil.copytree(ROOT, installed_package) + + for relative_path in [ + "README.md", + "skills/README.md", + "skills/README.zh-CN.md", + "docs/publishing.md", + "docs/publishing.zh-CN.md", + ]: + source = REPO_ROOT / relative_path + destination = copied_root / relative_path + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copyfile(source, destination) + + for package_name in [ + "skill-context-keeper", + "skill-phase-gate", + "skill-handoff-summary", + ]: + package_dir = copied_root / "skills" / package_name + package_dir.mkdir(parents=True) + (package_dir / "README.md").write_text( + f"# {package_name}\n", + encoding="utf-8", + ) + + installed_script = installed_package / "scripts" / "bootstrap_suite.py" + result = subprocess.run( + ["python3", str(installed_script), "--target", str(copied_root)], + check=False, + text=True, + capture_output=True, + ) + + self.assertNotEqual(result.returncode, 0) + self.assertIn("Refusing to bootstrap inside the public skill library", result.stderr) + def test_bootstrap_copies_expected_files(self): with tempfile.TemporaryDirectory() as tmpdir: target = Path(tmpdir) / "demo-repo" From 4d7d202943f87fbe9dae3941d885ee31f4f9b7c0 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:02:38 +0800 Subject: [PATCH 20/31] test: define long-task eval contract --- evals/test_run_evals.py | 112 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 evals/test_run_evals.py diff --git a/evals/test_run_evals.py b/evals/test_run_evals.py new file mode 100644 index 0000000..f4d775f --- /dev/null +++ b/evals/test_run_evals.py @@ -0,0 +1,112 @@ +import csv +import importlib.util +import tempfile +import sys +import unittest +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +RUNNER = ROOT / "evals" / "run_evals.py" +CASES_CSV = ROOT / "evals" / "cases.csv" + + +def load_runner_module(): + spec = importlib.util.spec_from_file_location("run_evals", RUNNER) + module = importlib.util.module_from_spec(spec) + assert spec is not None + assert spec.loader is not None + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +class RunEvalsTests(unittest.TestCase): + def test_seed_cases_csv_has_expected_columns(self): + self.assertTrue(CASES_CSV.exists(), "seed cases file should exist") + + with CASES_CSV.open(encoding="utf-8", newline="") as handle: + rows = list(csv.DictReader(handle)) + + self.assertEqual(len(rows), 8) + self.assertEqual( + list(rows[0].keys()), + [ + "case_id", + "package", + "scenario_type", + "should_trigger", + "user_prompt", + "expected_artifacts", + "expected_events", + "notes", + ], + ) + + def test_load_cases_normalizes_seed_matrix(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + cases = module.load_cases(CASES_CSV) + + self.assertEqual(len(cases), 8) + context_resume = next(case for case in cases if case.case_id == "context_resume") + self.assertTrue(context_resume.should_trigger) + self.assertEqual( + context_resume.expected_artifacts, + ["state/context.snapshot", "state/continuity.note"], + ) + self.assertEqual( + context_resume.expected_events, + ["context:reload", "context:reconstruct", "context:summary"], + ) + + suite_boundary_clean = next( + case for case in cases if case.case_id == "suite_boundary_clean" + ) + self.assertFalse(suite_boundary_clean.should_trigger) + + def test_run_evaluations_produces_case_and_dimension_results(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + result = module.run_evaluations(ROOT, CASES_CSV) + + self.assertEqual(result["summary"]["cases"], 8) + self.assertIn("passed", result["summary"]) + self.assertIn("failed", result["summary"]) + self.assertIn("routing_quality", result["dimensions"]) + self.assertIn("artifact_presence", result["dimensions"]) + self.assertIn("workflow_completeness", result["dimensions"]) + self.assertIn("docs_clarity", result["dimensions"]) + + case = next(item for item in result["cases"] if item["case_id"] == "suite_bootstrap") + self.assertIn("routing_quality", case["dimensions"]) + self.assertIn("artifact_presence", case["dimensions"]) + self.assertIn("workflow_completeness", case["dimensions"]) + self.assertIn("docs_clarity", case["dimensions"]) + + def test_optional_guardrail_columns_are_supported(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + with tempfile.TemporaryDirectory() as tmpdir: + csv_path = Path(tmpdir) / "cases.csv" + csv_path.write_text( + "\n".join( + [ + "case_id,package,scenario_type,should_trigger,user_prompt,expected_artifacts,expected_events,notes,max_commands,max_verbosity", + "demo,skill-context-keeper,positive,yes,Refresh state,assets/TASK_STATE.template.md,context:reload,optional guardrails,3,low", + ] + ) + + "\n", + encoding="utf-8", + ) + + cases = module.load_cases(csv_path) + self.assertEqual(cases[0].max_commands, 3) + self.assertEqual(cases[0].max_verbosity, "low") + + +if __name__ == "__main__": + unittest.main() From 40706c4d4febe6382589616727a82cd54351d955 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:02:42 +0800 Subject: [PATCH 21/31] feat: add static long-task eval harness --- CHANGELOG.md | 1 + docs/publishing.md | 8 + docs/publishing.zh-CN.md | 8 + evals/README.md | 37 ++++ evals/run_evals.py | 435 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 489 insertions(+) create mode 100644 evals/README.md create mode 100644 evals/run_evals.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fbd8b3b..11064f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ All notable changes to `codex-skill-library` should be documented in this file. - Add scaffolded package roots for `skill-context-keeper`, `skill-phase-gate`, `skill-handoff-summary`, and `skill-task-continuity`, including bilingual README landing pages, routing-first `SKILL.md` files, OpenAI agent metadata, and visible first-pass package directories. - Add bootstrap groundwork for the long-task continuity suite through initial package metadata, install entry points, and repository index coverage. - Add evaluation scaffolding groundwork by reserving package directories for future references, scripts, tests, and template assets tied to the long-task continuity suite. +- Add a static CSV-driven evaluation harness under `evals/` with repo-local reporting for trigger routing, artifact expectations, boundary language, and future guardrail columns. ## [0.5.1] - 2026-03-25 diff --git a/docs/publishing.md b/docs/publishing.md index 29b1dbb..ec8174a 100644 --- a/docs/publishing.md +++ b/docs/publishing.md @@ -33,6 +33,13 @@ python3 scripts/manage_skill.py --validate-only python3 -m unittest discover -s tests -p 'test_*.py' -v ``` +Long-task continuity suite validation: + +```bash +python3 evals/run_evals.py +python3 -m unittest discover -s evals -p 'test_*.py' -v +``` + ## Versioning Rules - Use repository-level tags such as `v0.1.0`, `v0.2.0`, and `v1.0.0`. @@ -61,4 +68,5 @@ python3 /scripts/install-skill-from-github.py \ - Keep repository-wide guidance in the root `README.md` and `docs/`. - Keep runtime guidance inside the skill package itself. +- Keep static suite checks in `evals/` so maintainers can validate trigger coverage and package boundaries without executing a model. - Prefer bilingual key docs when the repository is meant for public sharing. diff --git a/docs/publishing.zh-CN.md b/docs/publishing.zh-CN.md index 34bdb12..1e9eae9 100644 --- a/docs/publishing.zh-CN.md +++ b/docs/publishing.zh-CN.md @@ -33,6 +33,13 @@ python3 scripts/manage_skill.py --validate-only python3 -m unittest discover -s tests -p 'test_*.py' -v ``` +长任务连续性套件的校验方式: + +```bash +python3 evals/run_evals.py +python3 -m unittest discover -s evals -p 'test_*.py' -v +``` + ## 版本规则 - 使用仓库级 tag,例如 `v0.1.0`、`v0.2.0`、`v1.0.0` @@ -61,4 +68,5 @@ python3 /scripts/install-skill-from-github.py \ - 仓库级说明放在根目录 `README.md` 和 `docs/` - 运行时说明保留在具体 skill 包内部 +- 静态套件校验放在 `evals/`,方便维护者在不运行模型的情况下验证触发覆盖和包边界 - 如果仓库面向公开分享,关键说明建议提供中英文两个版本 diff --git a/evals/README.md b/evals/README.md new file mode 100644 index 0000000..9d058b8 --- /dev/null +++ b/evals/README.md @@ -0,0 +1,37 @@ +# Long-Task Continuity Evals + +This directory contains the static, repo-driven evaluation harness for the long-task continuity suite. +It uses `evals/cases.csv` as the seed matrix and checks the published repository shape rather than executing a model. + +## What It Checks + +- CSV parsing and normalization +- should-trigger vs should-not-trigger coverage +- expected artifact templates for each package +- routing hints in `SKILL.md` +- boundary language and non-overlap guidance in package READMEs +- suite-level downstream templates and assets for `skill-task-continuity` +- optional command-count or verbosity guardrails when future cases add them + +## Run It + +```bash +python3 evals/run_evals.py +``` + +For machine-readable output: + +```bash +python3 evals/run_evals.py --format json +``` + +Run the tests directly when you change the harness: + +```bash +python3 -m unittest discover -s evals -p 'test_*.py' -v +``` + +## Adding Cases + +Add a new row to `evals/cases.csv`, then rerun the harness. +If the case needs optional guardrails later, add the relevant CSV columns and the runner will normalize them without changing the API. diff --git a/evals/run_evals.py b/evals/run_evals.py new file mode 100644 index 0000000..e1a29c5 --- /dev/null +++ b/evals/run_evals.py @@ -0,0 +1,435 @@ +#!/usr/bin/env python3 +"""Static evaluation harness for the long-task continuity suite.""" + +from __future__ import annotations + +import argparse +import csv +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +DIMENSIONS = ( + "routing_quality", + "artifact_presence", + "workflow_completeness", + "docs_clarity", + "guardrails", +) + + +@dataclass(frozen=True) +class EvalCase: + case_id: str + package: str + scenario_type: str + should_trigger: bool + user_prompt: str + expected_artifacts: list[str] + expected_events: list[str] + notes: str + max_commands: int | None = None + max_verbosity: str | None = None + + +PACKAGE_RULES: dict[str, dict[str, Any]] = { + "skill-context-keeper": { + "required_files": [ + "README.md", + "README.zh-CN.md", + "SKILL.md", + "references/use-cases.md", + "references/use-cases.zh-CN.md", + "references/prompt-templates.en.md", + "references/prompt-templates.zh-CN.md", + "assets/TASK_STATE.template.md", + ], + "routing_phrases": [ + "use this skill when", + "resuming a task", + "rebuilding the last known task state", + "updating a downstream state file such as `.agent-state/task_state.md`", + ], + "boundary_phrases": [ + "do not run phase gates", + "do not generate final handoffs", + "does not own workflow gating", + "does not own final handoffs", + ], + "artifact_map": { + "state/context.snapshot": "assets/TASK_STATE.template.md", + "state/continuity.note": "assets/TASK_STATE.template.md", + }, + "workflow_files": [ + "references/use-cases.md", + "references/use-cases.zh-CN.md", + "references/prompt-templates.en.md", + "references/prompt-templates.zh-CN.md", + ], + }, + "skill-phase-gate": { + "required_files": [ + "README.md", + "README.zh-CN.md", + "SKILL.md", + "references/README.md", + "references/README.zh-CN.md", + "references/use-cases.md", + "references/use-cases.zh-CN.md", + "references/prompt-templates.en.md", + "references/prompt-templates.zh-CN.md", + "assets/PREFLIGHT.template.md", + "assets/POSTFLIGHT.template.md", + ], + "routing_phrases": [ + "meaningful coding checkpoint", + "preflight or postflight", + "meaningful checkpoint bar", + "use this skill when", + ], + "boundary_phrases": [ + "not for trivial one-line edits", + "not for pure explanation tasks", + "does not replace planning packages", + "does not become a handoff generator", + ], + "artifact_map": { + "plan/phase.plan": "assets/PREFLIGHT.template.md", + "plan/checkpoints.md": "assets/PREFLIGHT.template.md", + "plan/exit-criteria.md": "assets/POSTFLIGHT.template.md", + }, + "workflow_files": [ + "references/README.md", + "references/README.zh-CN.md", + "references/use-cases.md", + "references/use-cases.zh-CN.md", + "assets/PREFLIGHT.template.md", + "assets/POSTFLIGHT.template.md", + ], + }, + "skill-handoff-summary": { + "required_files": [ + "README.md", + "README.zh-CN.md", + "SKILL.md", + "references/README.md", + "references/README.zh-CN.md", + "references/use-cases.md", + "references/use-cases.zh-CN.md", + "references/prompt-templates.en.md", + "references/prompt-templates.zh-CN.md", + "assets/HANDOFF.template.md", + ], + "routing_phrases": [ + "continuation-oriented", + "handoff", + "use this skill when", + "writing or refreshing a compact artifact such as `.agent-state/handoff.md`", + ], + "boundary_phrases": [ + "does not own long-term state", + "does not own workflow gating", + "not a full-project documentation", + "replacing the final user-facing answer when no handoff is needed", + ], + "artifact_map": { + "handoff/HANDOFF.md": "assets/HANDOFF.template.md", + "handoff/blockers.md": "assets/HANDOFF.template.md", + "handoff/next-steps.md": "assets/HANDOFF.template.md", + }, + "workflow_files": [ + "references/README.md", + "references/README.zh-CN.md", + "references/use-cases.md", + "references/use-cases.zh-CN.md", + "assets/HANDOFF.template.md", + ], + }, + "skill-task-continuity": { + "required_files": [ + "README.md", + "README.zh-CN.md", + "SKILL.md", + "references/composition-guide.md", + "references/install-playbook.md", + "assets/AGENTS.repo-template.md", + "assets/agent-state/TASK_STATE.template.md", + "assets/agent-state/HANDOFF.template.md", + "assets/agent-state/DECISIONS.template.md", + "assets/agent-state/RUN_LOG.template.md", + ], + "routing_phrases": [ + "bootstrapping the long-task continuity suite", + "deciding which atomic package should trigger first", + "composition boundary", + "use this skill when", + ], + "boundary_phrases": [ + "does not replace the three atomic skills", + "do not use this package to mutate the public library root into a consumer repo", + "downstream templates", + "route suite-shaped requests to the atomic package", + ], + "artifact_map": { + "AGENTS.md": "assets/AGENTS.repo-template.md", + ".agent-state/TASK_STATE.md": "assets/agent-state/TASK_STATE.template.md", + ".agent-state/HANDOFF.md": "assets/agent-state/HANDOFF.template.md", + }, + "workflow_files": [ + "references/composition-guide.md", + "references/install-playbook.md", + "assets/AGENTS.repo-template.md", + "assets/agent-state/TASK_STATE.template.md", + "assets/agent-state/HANDOFF.template.md", + ], + }, +} + + +def _normalize_text(text: str) -> str: + return " ".join(text.lower().split()) + + +def _split_tokens(raw: str) -> list[str]: + value = raw.strip() + if not value or value.lower() == "none": + return [] + return [part.strip() for part in value.split("|") if part.strip()] + + +def _parse_bool(raw: str) -> bool: + value = raw.strip().lower() + if value in {"yes", "true", "1"}: + return True + if value in {"no", "false", "0"}: + return False + raise ValueError(f"invalid boolean value: {raw!r}") + + +def _optional_int(raw: str | None) -> int | None: + if raw is None: + return None + value = raw.strip() + if not value: + return None + return int(value) + + +def load_cases(csv_path: Path) -> list[EvalCase]: + with csv_path.open(encoding="utf-8", newline="") as handle: + rows = list(csv.DictReader(handle)) + + cases: list[EvalCase] = [] + for row in rows: + cases.append( + EvalCase( + case_id=row["case_id"].strip(), + package=row["package"].strip(), + scenario_type=row["scenario_type"].strip(), + should_trigger=_parse_bool(row["should_trigger"]), + user_prompt=row["user_prompt"].strip(), + expected_artifacts=_split_tokens(row["expected_artifacts"]), + expected_events=_split_tokens(row["expected_events"]), + notes=row["notes"].strip(), + max_commands=_optional_int(row.get("max_commands")), + max_verbosity=(row.get("max_verbosity") or "").strip() or None, + ) + ) + return cases + + +def _package_dir(repo_root: Path, package: str) -> Path: + return repo_root / "skills" / package + + +def _read_text(path: Path) -> str: + return path.read_text(encoding="utf-8") + + +def _check_required_files(package_root: Path, required_files: list[str]) -> list[str]: + missing = [] + for relative in required_files: + if not (package_root / relative).exists(): + missing.append(relative) + return missing + + +def _contains_any(text: str, phrases: list[str]) -> bool: + normalized = _normalize_text(text) + return any(phrase in normalized for phrase in phrases) + + +def _artifact_targets(case: EvalCase, package_rules: dict[str, Any]) -> list[Path]: + artifact_map: dict[str, str] = package_rules["artifact_map"] + targets = [] + for artifact in case.expected_artifacts: + mapped = artifact_map.get(artifact) + if mapped is not None: + targets.append(mapped) + return [Path(target) for target in targets] + + +def evaluate_case(repo_root: Path, case: EvalCase) -> dict[str, Any]: + package_rules = PACKAGE_RULES.get(case.package) + package_root = _package_dir(repo_root, case.package) + result: dict[str, Any] = { + "case_id": case.case_id, + "package": case.package, + "scenario_type": case.scenario_type, + "should_trigger": case.should_trigger, + "dimensions": {}, + "details": {}, + } + + if package_rules is None: + result["dimensions"] = { + dimension: {"status": "fail", "reason": f"unknown package: {case.package}"} + for dimension in DIMENSIONS + } + result["details"]["unknown_package"] = case.package + return result + + missing_files = _check_required_files(package_root, package_rules["required_files"]) + if missing_files: + docs_status = "fail" + docs_reason = f"missing required files: {', '.join(missing_files)}" + else: + docs_status = "pass" + docs_reason = "required docs and assets are present" + + sk_text = _read_text(package_root / "SKILL.md") + readme_text = _read_text(package_root / "README.md") + readme_zh_text = _read_text(package_root / "README.zh-CN.md") + routing_status = "pass" if _contains_any(sk_text + "\n" + readme_text, package_rules["routing_phrases"]) else "fail" + routing_reason = "routing hints present" if routing_status == "pass" else "missing routing hints" + + boundary_sources = readme_text + "\n" + readme_zh_text + "\n" + sk_text + workflow_status = "pass" if _contains_any(boundary_sources, package_rules["boundary_phrases"]) else "fail" + workflow_reason = "workflow and boundary language present" if workflow_status == "pass" else "missing workflow boundary language" + + artifact_targets = [package_root / target for target in _artifact_targets(case, package_rules)] + artifact_missing = [str(path.relative_to(package_root)) for path in artifact_targets if not path.exists()] + if artifact_missing: + artifact_status = "fail" + artifact_reason = f"missing mapped artifacts: {', '.join(artifact_missing)}" + else: + artifact_status = "pass" + artifact_reason = "expected artifact templates are present" + + if case.max_commands is None and case.max_verbosity is None: + guardrail_status = "skipped" + guardrail_reason = "no optional guardrails configured" + else: + guardrail_status = "pass" + guardrail_reason = "optional guardrails parsed" + + result["dimensions"] = { + "routing_quality": {"status": routing_status, "reason": routing_reason}, + "artifact_presence": {"status": artifact_status, "reason": artifact_reason}, + "workflow_completeness": {"status": workflow_status, "reason": workflow_reason}, + "docs_clarity": {"status": docs_status, "reason": docs_reason}, + "guardrails": {"status": guardrail_status, "reason": guardrail_reason}, + } + + result["details"] = { + "required_files": package_rules["required_files"], + "expected_events": list(case.expected_events), + "expected_artifacts": list(case.expected_artifacts), + } + return result + + +def _summarize_cases(case_results: list[dict[str, Any]]) -> dict[str, Any]: + summary = { + "cases": len(case_results), + "passed": 0, + "failed": 0, + "dimensions": {name: {"pass": 0, "fail": 0, "skipped": 0} for name in DIMENSIONS}, + } + + for case in case_results: + dimension_statuses = [item["status"] for item in case["dimensions"].values()] + if any(status == "fail" for status in dimension_statuses): + summary["failed"] += 1 + else: + summary["passed"] += 1 + for name, dimension in case["dimensions"].items(): + summary["dimensions"][name][dimension["status"]] += 1 + return summary + + +def run_evaluations(repo_root: Path, cases_csv: Path) -> dict[str, Any]: + cases = load_cases(cases_csv) + case_results = [evaluate_case(repo_root, case) for case in cases] + return { + "repo_root": str(repo_root), + "cases_csv": str(cases_csv), + "cases": case_results, + "summary": _summarize_cases(case_results), + "dimensions": list(DIMENSIONS), + } + + +def render_text_report(report: dict[str, Any]) -> str: + lines = [] + lines.append(f"Repo: {report['repo_root']}") + lines.append(f"Cases: {report['summary']['cases']} Passed: {report['summary']['passed']} Failed: {report['summary']['failed']}") + lines.append("") + lines.append("By case:") + for case in report["cases"]: + statuses = ", ".join( + f"{name}={dimension['status']}" + for name, dimension in case["dimensions"].items() + ) + lines.append( + f"- {case['case_id']} [{case['package']}] should_trigger={str(case['should_trigger']).lower()} -> {statuses}" + ) + for name, dimension in case["dimensions"].items(): + lines.append(f" - {name}: {dimension['status']} ({dimension['reason']})") + lines.append("") + lines.append("By dimension:") + for name, counts in report["summary"]["dimensions"].items(): + lines.append( + f"- {name}: pass={counts['pass']} fail={counts['fail']} skipped={counts['skipped']}" + ) + return "\n".join(lines) + + +def build_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--repo-root", + type=Path, + default=Path(__file__).resolve().parents[1], + help="Repository root to evaluate.", + ) + parser.add_argument( + "--cases", + type=Path, + default=Path(__file__).resolve().with_name("cases.csv"), + help="Path to the seed cases CSV.", + ) + parser.add_argument( + "--format", + choices=("text", "json"), + default="text", + help="Output format for the report.", + ) + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_arg_parser() + args = parser.parse_args(argv) + report = run_evaluations(args.repo_root, args.cases) + if args.format == "json": + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print(render_text_report(report)) + return 0 if report["summary"]["failed"] == 0 else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) From 7135ae209b61b870d7159712643f883ff6575e43 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:02:47 +0800 Subject: [PATCH 22/31] test: fix copied public library regression fixture --- skills/skill-task-continuity/tests/test_bootstrap_suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skills/skill-task-continuity/tests/test_bootstrap_suite.py b/skills/skill-task-continuity/tests/test_bootstrap_suite.py index 4e85f25..e079ecb 100644 --- a/skills/skill-task-continuity/tests/test_bootstrap_suite.py +++ b/skills/skill-task-continuity/tests/test_bootstrap_suite.py @@ -8,7 +8,7 @@ ROOT = Path(__file__).resolve().parents[1] SCRIPT = ROOT / "scripts" / "bootstrap_suite.py" LIBRARY_ROOT = ROOT.parents[1] -REPO_ROOT = ROOT.parents[2] +REPO_ROOT = ROOT.parents[1] TEMPLATE_MAP = { "AGENTS.md": "assets/AGENTS.repo-template.md", ".agent-state/TASK_STATE.md": "assets/agent-state/TASK_STATE.template.md", From e896dfa2ffdebaaacc1f92eaf7e4ef2b991a8095 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:03:23 +0800 Subject: [PATCH 23/31] fix: detect public library bootstrap targets by footprint --- .../scripts/bootstrap_suite.py | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/skills/skill-task-continuity/scripts/bootstrap_suite.py b/skills/skill-task-continuity/scripts/bootstrap_suite.py index 452f7cb..5b3ffc4 100644 --- a/skills/skill-task-continuity/scripts/bootstrap_suite.py +++ b/skills/skill-task-continuity/scripts/bootstrap_suite.py @@ -2,14 +2,12 @@ import argparse import shutil -import subprocess import sys from pathlib import Path PACKAGE_ROOT = Path(__file__).resolve().parents[1] LIBRARY_ROOT = PACKAGE_ROOT.parents[1] -PUBLIC_REPO_SLUG = "Golden-Promise/codex-skill-library" TEMPLATE_MAP = { "AGENTS.md": "assets/AGENTS.repo-template.md", ".agent-state/TASK_STATE.md": "assets/agent-state/TASK_STATE.template.md", @@ -40,32 +38,38 @@ def parse_args(): ) return parser.parse_args() + +PUBLIC_LIBRARY_MARKER_FILES = [ + "README.md", + "skills/README.md", + "skills/README.zh-CN.md", + "docs/publishing.md", + "docs/publishing.zh-CN.md", +] +PUBLIC_LIBRARY_MARKER_PACKAGES = [ + "skill-context-keeper", + "skill-phase-gate", + "skill-handoff-summary", + "skill-task-continuity", +] + + def looks_like_public_library_checkout(root): if root != LIBRARY_ROOT: return False - readme_path = root / "README.md" - if not readme_path.exists(): + if any(not (root / relative_path).exists() for relative_path in PUBLIC_LIBRARY_MARKER_FILES): return False - readme_text = readme_path.read_text(encoding="utf-8") + readme_text = (root / "README.md").read_text(encoding="utf-8") if not readme_text.startswith("# codex-skill-library"): return False - try: - result = subprocess.run( - ["git", "-C", str(root), "config", "--get", "remote.origin.url"], - check=False, - text=True, - capture_output=True, - ) - except OSError: - return False - - if result.returncode != 0: - return False + for package_name in PUBLIC_LIBRARY_MARKER_PACKAGES: + if not (root / "skills" / package_name / "README.md").exists(): + return False - return PUBLIC_REPO_SLUG in result.stdout.strip() + return True def validate_target(target): From c10522e6febda2bc3a2aa1be2a4cb38dbe766605 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:07:35 +0800 Subject: [PATCH 24/31] test: tighten eval scoring contract --- evals/test_run_evals.py | 165 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 156 insertions(+), 9 deletions(-) diff --git a/evals/test_run_evals.py b/evals/test_run_evals.py index f4d775f..8e15e67 100644 --- a/evals/test_run_evals.py +++ b/evals/test_run_evals.py @@ -21,6 +21,30 @@ def load_runner_module(): return module +def write_cases_csv(path: Path, rows: list[dict[str, str]]) -> None: + fieldnames = [ + "case_id", + "package", + "scenario_type", + "should_trigger", + "user_prompt", + "expected_artifacts", + "expected_events", + "notes", + ] + extra_fields = [] + for row in rows: + for key in row: + if key not in fieldnames and key not in extra_fields: + extra_fields.append(key) + fieldnames.extend(extra_fields) + + with path.open("w", encoding="utf-8", newline="") as handle: + writer = csv.DictWriter(handle, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + + class RunEvalsTests(unittest.TestCase): def test_seed_cases_csv_has_expected_columns(self): self.assertTrue(CASES_CSV.exists(), "seed cases file should exist") @@ -92,21 +116,144 @@ def test_optional_guardrail_columns_are_supported(self): with tempfile.TemporaryDirectory() as tmpdir: csv_path = Path(tmpdir) / "cases.csv" - csv_path.write_text( - "\n".join( - [ - "case_id,package,scenario_type,should_trigger,user_prompt,expected_artifacts,expected_events,notes,max_commands,max_verbosity", - "demo,skill-context-keeper,positive,yes,Refresh state,assets/TASK_STATE.template.md,context:reload,optional guardrails,3,low", - ] - ) - + "\n", - encoding="utf-8", + write_cases_csv( + csv_path, + [ + { + "case_id": "demo", + "package": "skill-context-keeper", + "scenario_type": "positive", + "should_trigger": "yes", + "user_prompt": "Refresh state", + "expected_artifacts": "assets/TASK_STATE.template.md", + "expected_events": "context:reload", + "notes": "optional guardrails", + "max_commands": "3", + "max_verbosity": "low", + } + ], ) cases = module.load_cases(csv_path) self.assertEqual(cases[0].max_commands, 3) self.assertEqual(cases[0].max_verbosity, "low") + def test_positive_case_requires_trigger_cues_to_pass(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + with tempfile.TemporaryDirectory() as tmpdir: + csv_path = Path(tmpdir) / "cases.csv" + write_cases_csv( + csv_path, + [ + { + "case_id": "bad_positive", + "package": "skill-context-keeper", + "scenario_type": "positive", + "should_trigger": "yes", + "user_prompt": "Please answer this one-off punctuation question in the README and do nothing else.", + "expected_artifacts": "state/context.snapshot", + "expected_events": "context:reload", + "notes": "should fail if routing ignores polarity", + } + ], + ) + + report = module.run_evaluations(ROOT, csv_path) + + case = report["cases"][0] + self.assertEqual(report["summary"]["failed"], 1) + self.assertEqual(case["dimensions"]["routing_quality"]["status"], "fail") + self.assertIn("trigger cues", case["dimensions"]["routing_quality"]["reason"]) + + def test_negative_case_requires_suppress_cues_to_block_routing(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + with tempfile.TemporaryDirectory() as tmpdir: + csv_path = Path(tmpdir) / "cases.csv" + write_cases_csv( + csv_path, + [ + { + "case_id": "bad_negative", + "package": "skill-handoff-summary", + "scenario_type": "negative", + "should_trigger": "no", + "user_prompt": "Please write a handoff with blockers and next steps for the pause.", + "expected_artifacts": "none", + "expected_events": "handoff:skip|direct:answer", + "notes": "should fail if negative polarity is ignored", + } + ], + ) + + report = module.run_evaluations(ROOT, csv_path) + + case = report["cases"][0] + self.assertEqual(report["summary"]["failed"], 1) + self.assertEqual(case["dimensions"]["routing_quality"]["status"], "fail") + self.assertIn("suppress cues", case["dimensions"]["routing_quality"]["reason"]) + + def test_expected_events_must_match_package_namespace(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + with tempfile.TemporaryDirectory() as tmpdir: + csv_path = Path(tmpdir) / "cases.csv" + write_cases_csv( + csv_path, + [ + { + "case_id": "bad_events", + "package": "skill-phase-gate", + "scenario_type": "positive", + "should_trigger": "yes", + "user_prompt": "We need to split this multi-step refactor into phases before coding.", + "expected_artifacts": "plan/phase.plan", + "expected_events": "context:reload", + "notes": "should fail if expected events are not scored", + } + ], + ) + + report = module.run_evaluations(ROOT, csv_path) + + case = report["cases"][0] + self.assertEqual(report["summary"]["failed"], 1) + self.assertEqual(case["dimensions"]["workflow_completeness"]["status"], "fail") + self.assertIn("event", case["dimensions"]["workflow_completeness"]["reason"]) + + def test_unmapped_expected_artifacts_fail_strict_mapping(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + with tempfile.TemporaryDirectory() as tmpdir: + csv_path = Path(tmpdir) / "cases.csv" + write_cases_csv( + csv_path, + [ + { + "case_id": "bad_artifacts", + "package": "skill-handoff-summary", + "scenario_type": "positive", + "should_trigger": "yes", + "user_prompt": "I need to stop for today; please write a handoff with blockers and next steps.", + "expected_artifacts": "handoff/HANDOFF.md|handoff/missing.md", + "expected_events": "handoff:capture", + "notes": "should fail if unmapped artifact tokens are ignored", + } + ], + ) + + report = module.run_evaluations(ROOT, csv_path) + + case = report["cases"][0] + self.assertEqual(report["summary"]["failed"], 1) + self.assertEqual(case["dimensions"]["artifact_presence"]["status"], "fail") + self.assertIn("unmapped", case["dimensions"]["artifact_presence"]["reason"]) + if __name__ == "__main__": unittest.main() From f847028e6de8aaf1ab338912e0aece40f3411d6d Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:07:39 +0800 Subject: [PATCH 25/31] feat: make long-task evals case-aware --- CHANGELOG.md | 2 +- docs/publishing.md | 4 +- docs/publishing.zh-CN.md | 4 +- evals/README.md | 8 +- evals/run_evals.py | 220 +++++++++++++++++++++++++++++++-------- 5 files changed, 189 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11064f0..02860c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ All notable changes to `codex-skill-library` should be documented in this file. - Add scaffolded package roots for `skill-context-keeper`, `skill-phase-gate`, `skill-handoff-summary`, and `skill-task-continuity`, including bilingual README landing pages, routing-first `SKILL.md` files, OpenAI agent metadata, and visible first-pass package directories. - Add bootstrap groundwork for the long-task continuity suite through initial package metadata, install entry points, and repository index coverage. - Add evaluation scaffolding groundwork by reserving package directories for future references, scripts, tests, and template assets tied to the long-task continuity suite. -- Add a static CSV-driven evaluation harness under `evals/` with repo-local reporting for trigger routing, artifact expectations, boundary language, and future guardrail columns. +- Add a static CSV-driven evaluation harness under `evals/` with repo-local reporting for trigger routing, artifact expectations, event namespaces, boundary language, and future guardrail columns. ## [0.5.1] - 2026-03-25 diff --git a/docs/publishing.md b/docs/publishing.md index ec8174a..56205e3 100644 --- a/docs/publishing.md +++ b/docs/publishing.md @@ -40,6 +40,8 @@ python3 evals/run_evals.py python3 -m unittest discover -s evals -p 'test_*.py' -v ``` +The suite runner now scores prompt polarity, event namespaces, and strict artifact mapping in addition to repository shape checks. + ## Versioning Rules - Use repository-level tags such as `v0.1.0`, `v0.2.0`, and `v1.0.0`. @@ -68,5 +70,5 @@ python3 /scripts/install-skill-from-github.py \ - Keep repository-wide guidance in the root `README.md` and `docs/`. - Keep runtime guidance inside the skill package itself. -- Keep static suite checks in `evals/` so maintainers can validate trigger coverage and package boundaries without executing a model. +- Keep static suite checks in `evals/` so maintainers can validate trigger coverage, package boundaries, event namespaces, and artifact mapping without executing a model. - Prefer bilingual key docs when the repository is meant for public sharing. diff --git a/docs/publishing.zh-CN.md b/docs/publishing.zh-CN.md index 1e9eae9..dd5c520 100644 --- a/docs/publishing.zh-CN.md +++ b/docs/publishing.zh-CN.md @@ -40,6 +40,8 @@ python3 evals/run_evals.py python3 -m unittest discover -s evals -p 'test_*.py' -v ``` +套件运行器现在除了仓库形状检查之外,还会按提示词正负向、事件命名空间和严格产物映射来评分。 + ## 版本规则 - 使用仓库级 tag,例如 `v0.1.0`、`v0.2.0`、`v1.0.0` @@ -68,5 +70,5 @@ python3 /scripts/install-skill-from-github.py \ - 仓库级说明放在根目录 `README.md` 和 `docs/` - 运行时说明保留在具体 skill 包内部 -- 静态套件校验放在 `evals/`,方便维护者在不运行模型的情况下验证触发覆盖和包边界 +- 静态套件校验放在 `evals/`,方便维护者在不运行模型的情况下验证触发覆盖、包边界、事件命名空间和产物映射 - 如果仓库面向公开分享,关键说明建议提供中英文两个版本 diff --git a/evals/README.md b/evals/README.md index 9d058b8..e2299c0 100644 --- a/evals/README.md +++ b/evals/README.md @@ -6,10 +6,11 @@ It uses `evals/cases.csv` as the seed matrix and checks the published repository ## What It Checks - CSV parsing and normalization -- should-trigger vs should-not-trigger coverage -- expected artifact templates for each package -- routing hints in `SKILL.md` +- should-trigger vs should-not-trigger coverage from prompt polarity +- expected artifact templates for each package, with strict token-to-template mapping +- routing hints in `SKILL.md` and package READMEs - boundary language and non-overlap guidance in package READMEs +- expected event tokens by package namespace and case polarity - suite-level downstream templates and assets for `skill-task-continuity` - optional command-count or verbosity guardrails when future cases add them @@ -35,3 +36,4 @@ python3 -m unittest discover -s evals -p 'test_*.py' -v Add a new row to `evals/cases.csv`, then rerun the harness. If the case needs optional guardrails later, add the relevant CSV columns and the runner will normalize them without changing the API. +For polarity-sensitive cases, make sure the prompt cues, expected events, and artifact tokens all match the package’s routing boundary. diff --git a/evals/run_evals.py b/evals/run_evals.py index e1a29c5..432e47f 100644 --- a/evals/run_evals.py +++ b/evals/run_evals.py @@ -46,15 +46,21 @@ class EvalCase: "references/prompt-templates.zh-CN.md", "assets/TASK_STATE.template.md", ], - "routing_phrases": [ + "trigger_cues": [ "use this skill when", "resuming a task", + "resume from the last known state", + "refresh state", "rebuilding the last known task state", - "updating a downstream state file such as `.agent-state/task_state.md`", + "carry forward unresolved todo", + "carry forward unresolved todos", + "update .agent-state/task_state.md", ], - "boundary_phrases": [ - "do not run phase gates", - "do not generate final handoffs", + "suppress_cues": [ + "one-off", + "do nothing else", + "just answer", + "punctuation question", "does not own workflow gating", "does not own final handoffs", ], @@ -62,6 +68,8 @@ class EvalCase: "state/context.snapshot": "assets/TASK_STATE.template.md", "state/continuity.note": "assets/TASK_STATE.template.md", }, + "positive_event_prefixes": ["context:"], + "negative_event_prefixes": ["context:skip", "direct:answer"], "workflow_files": [ "references/use-cases.md", "references/use-cases.zh-CN.md", @@ -83,14 +91,22 @@ class EvalCase: "assets/PREFLIGHT.template.md", "assets/POSTFLIGHT.template.md", ], - "routing_phrases": [ + "trigger_cues": [ "meaningful coding checkpoint", - "preflight or postflight", - "meaningful checkpoint bar", + "preflight", + "postflight", + "split", + "phases", + "multi-step", + "refactor", + "before coding", "use this skill when", ], - "boundary_phrases": [ + "suppress_cues": [ "not for trivial one-line edits", + "tiny edit", + "rename this heading", + "one-line change", "not for pure explanation tasks", "does not replace planning packages", "does not become a handoff generator", @@ -100,6 +116,8 @@ class EvalCase: "plan/checkpoints.md": "assets/PREFLIGHT.template.md", "plan/exit-criteria.md": "assets/POSTFLIGHT.template.md", }, + "positive_event_prefixes": ["phase:"], + "negative_event_prefixes": ["phase:skip", "direct:edit"], "workflow_files": [ "references/README.md", "references/README.zh-CN.md", @@ -122,13 +140,20 @@ class EvalCase: "references/prompt-templates.zh-CN.md", "assets/HANDOFF.template.md", ], - "routing_phrases": [ + "trigger_cues": [ "continuation-oriented", "handoff", + "write a handoff", + "pause", + "transfer", "use this skill when", "writing or refreshing a compact artifact such as `.agent-state/handoff.md`", ], - "boundary_phrases": [ + "suppress_cues": [ + "final answer", + "no handoff", + "just give me", + "do nothing else", "does not own long-term state", "does not own workflow gating", "not a full-project documentation", @@ -139,6 +164,8 @@ class EvalCase: "handoff/blockers.md": "assets/HANDOFF.template.md", "handoff/next-steps.md": "assets/HANDOFF.template.md", }, + "positive_event_prefixes": ["handoff:"], + "negative_event_prefixes": ["handoff:skip", "direct:answer"], "workflow_files": [ "references/README.md", "references/README.zh-CN.md", @@ -160,16 +187,20 @@ class EvalCase: "assets/agent-state/DECISIONS.template.md", "assets/agent-state/RUN_LOG.template.md", ], - "routing_phrases": [ + "trigger_cues": [ "bootstrapping the long-task continuity suite", + "continuity suite", + "set up the long-task continuity suite", + "coordinate", "deciding which atomic package should trigger first", "composition boundary", "use this skill when", ], - "boundary_phrases": [ - "does not replace the three atomic skills", + "suppress_cues": [ + "one-line readme fix", + "trivial edit", "do not use this package to mutate the public library root into a consumer repo", - "downstream templates", + "does not replace the three atomic skills", "route suite-shaped requests to the atomic package", ], "artifact_map": { @@ -177,6 +208,8 @@ class EvalCase: ".agent-state/TASK_STATE.md": "assets/agent-state/TASK_STATE.template.md", ".agent-state/HANDOFF.md": "assets/agent-state/HANDOFF.template.md", }, + "positive_event_prefixes": ["bootstrap:"], + "negative_event_prefixes": ["bootstrap:skip", "direct:edit"], "workflow_files": [ "references/composition-guide.md", "references/install-playbook.md", @@ -256,19 +289,119 @@ def _check_required_files(package_root: Path, required_files: list[str]) -> list return missing -def _contains_any(text: str, phrases: list[str]) -> bool: +def _matched_phrases(text: str, phrases: list[str]) -> list[str]: normalized = _normalize_text(text) - return any(phrase in normalized for phrase in phrases) + return [phrase for phrase in phrases if phrase in normalized] -def _artifact_targets(case: EvalCase, package_rules: dict[str, Any]) -> list[Path]: +def _artifact_targets(case: EvalCase, package_rules: dict[str, Any]) -> tuple[list[Path], list[str]]: artifact_map: dict[str, str] = package_rules["artifact_map"] targets = [] + unmapped = [] for artifact in case.expected_artifacts: mapped = artifact_map.get(artifact) if mapped is not None: targets.append(mapped) - return [Path(target) for target in targets] + else: + unmapped.append(artifact) + return [Path(target) for target in targets], unmapped + + +def _score_routing(case: EvalCase, package_rules: dict[str, Any]) -> dict[str, Any]: + trigger_hits = _matched_phrases(case.user_prompt, package_rules["trigger_cues"]) + suppress_hits = _matched_phrases(case.user_prompt, package_rules["suppress_cues"]) + + if case.should_trigger: + status = "pass" if trigger_hits and not suppress_hits else "fail" + if status == "pass": + reason = f"trigger cues matched: {', '.join(trigger_hits)}" + else: + reason = ( + "missing trigger cues or conflicting suppress cues: " + f"trigger={trigger_hits or ['none']}, suppress={suppress_hits or ['none']}" + ) + else: + status = "pass" if suppress_hits else "fail" + if status == "pass": + reason = f"suppress cues matched: {', '.join(suppress_hits)}" + else: + reason = "missing suppress cues for a should-not-trigger case" + + return { + "status": status, + "reason": reason, + "trigger_hits": trigger_hits, + "suppress_hits": suppress_hits, + } + + +def _score_events(case: EvalCase, package_rules: dict[str, Any]) -> dict[str, Any]: + allowed_prefixes = ( + package_rules["positive_event_prefixes"] + if case.should_trigger + else package_rules["negative_event_prefixes"] + ) + matching = [] + mismatched = [] + for event in case.expected_events: + if any(event.startswith(prefix) for prefix in allowed_prefixes): + matching.append(event) + else: + mismatched.append(event) + + if not case.expected_events: + status = "fail" + reason = "expected events were not provided" + elif mismatched: + status = "fail" + reason = ( + "event tokens do not match the expected namespace: " + f"allowed={allowed_prefixes}, mismatched={mismatched}" + ) + else: + status = "pass" + reason = f"event namespace matched: {', '.join(matching)}" + + return { + "status": status, + "reason": reason, + "matching": matching, + "mismatched": mismatched, + } + + +def _score_docs(package_root: Path, package_rules: dict[str, Any]) -> dict[str, Any]: + missing_files = _check_required_files(package_root, package_rules["required_files"]) + missing_workflow_docs = _check_required_files(package_root, package_rules["workflow_files"]) + boundary_sources = ( + _read_text(package_root / "README.md") + + "\n" + + _read_text(package_root / "README.zh-CN.md") + + "\n" + + _read_text(package_root / "SKILL.md") + ) + boundary_hits = _matched_phrases(boundary_sources, package_rules["suppress_cues"]) + + if missing_files: + status = "fail" + reason = f"missing required files: {', '.join(missing_files)}" + elif missing_workflow_docs: + status = "fail" + reason = f"missing workflow docs: {', '.join(missing_workflow_docs)}" + elif not boundary_hits: + status = "fail" + reason = "missing boundary language in README and SKILL docs" + else: + status = "pass" + reason = "required docs, workflow docs, and boundary language are present" + + return { + "status": status, + "reason": reason, + "missing_files": missing_files, + "missing_workflow_docs": missing_workflow_docs, + "boundary_hits": boundary_hits, + } def evaluate_case(repo_root: Path, case: EvalCase) -> dict[str, Any]: @@ -291,27 +424,16 @@ def evaluate_case(repo_root: Path, case: EvalCase) -> dict[str, Any]: result["details"]["unknown_package"] = case.package return result - missing_files = _check_required_files(package_root, package_rules["required_files"]) - if missing_files: - docs_status = "fail" - docs_reason = f"missing required files: {', '.join(missing_files)}" - else: - docs_status = "pass" - docs_reason = "required docs and assets are present" - - sk_text = _read_text(package_root / "SKILL.md") - readme_text = _read_text(package_root / "README.md") - readme_zh_text = _read_text(package_root / "README.zh-CN.md") - routing_status = "pass" if _contains_any(sk_text + "\n" + readme_text, package_rules["routing_phrases"]) else "fail" - routing_reason = "routing hints present" if routing_status == "pass" else "missing routing hints" - - boundary_sources = readme_text + "\n" + readme_zh_text + "\n" + sk_text - workflow_status = "pass" if _contains_any(boundary_sources, package_rules["boundary_phrases"]) else "fail" - workflow_reason = "workflow and boundary language present" if workflow_status == "pass" else "missing workflow boundary language" - - artifact_targets = [package_root / target for target in _artifact_targets(case, package_rules)] - artifact_missing = [str(path.relative_to(package_root)) for path in artifact_targets if not path.exists()] - if artifact_missing: + routing = _score_routing(case, package_rules) + events = _score_events(case, package_rules) + docs = _score_docs(package_root, package_rules) + + artifact_targets, unmapped_artifacts = _artifact_targets(case, package_rules) + artifact_missing = [str((package_root / path).relative_to(package_root)) for path in artifact_targets if not (package_root / path).exists()] + if unmapped_artifacts: + artifact_status = "fail" + artifact_reason = f"unmapped expected artifacts: {', '.join(unmapped_artifacts)}" + elif artifact_missing: artifact_status = "fail" artifact_reason = f"missing mapped artifacts: {', '.join(artifact_missing)}" else: @@ -326,17 +448,29 @@ def evaluate_case(repo_root: Path, case: EvalCase) -> dict[str, Any]: guardrail_reason = "optional guardrails parsed" result["dimensions"] = { - "routing_quality": {"status": routing_status, "reason": routing_reason}, + "routing_quality": { + "status": routing["status"], + "reason": routing["reason"], + }, "artifact_presence": {"status": artifact_status, "reason": artifact_reason}, - "workflow_completeness": {"status": workflow_status, "reason": workflow_reason}, - "docs_clarity": {"status": docs_status, "reason": docs_reason}, + "workflow_completeness": { + "status": events["status"], + "reason": events["reason"], + }, + "docs_clarity": {"status": docs["status"], "reason": docs["reason"]}, "guardrails": {"status": guardrail_status, "reason": guardrail_reason}, } result["details"] = { "required_files": package_rules["required_files"], + "workflow_files": package_rules["workflow_files"], "expected_events": list(case.expected_events), "expected_artifacts": list(case.expected_artifacts), + "trigger_hits": routing["trigger_hits"], + "suppress_hits": routing["suppress_hits"], + "matching_events": events["matching"], + "mismatched_events": events["mismatched"], + "unmapped_artifacts": unmapped_artifacts, } return result From 8a8419113a89e5a8c30b528099ebb1590913578f Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:20:27 +0800 Subject: [PATCH 26/31] test: tighten long-task eval coverage --- evals/test_run_evals.py | 106 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/evals/test_run_evals.py b/evals/test_run_evals.py index 8e15e67..a356195 100644 --- a/evals/test_run_evals.py +++ b/evals/test_run_evals.py @@ -1,8 +1,10 @@ import csv import importlib.util +import re import tempfile import sys import unittest +import shutil from pathlib import Path @@ -254,6 +256,110 @@ def test_unmapped_expected_artifacts_fail_strict_mapping(self): self.assertEqual(case["dimensions"]["artifact_presence"]["status"], "fail") self.assertIn("unmapped", case["dimensions"]["artifact_presence"]["reason"]) + def test_routing_quality_fails_when_trigger_docs_are_missing(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + with tempfile.TemporaryDirectory() as tmpdir: + repo_root = Path(tmpdir) / "repo" + package_root = repo_root / "skills" / "skill-context-keeper" + shutil.copytree(ROOT / "skills" / "skill-context-keeper", package_root) + for relative in ["README.md", "README.zh-CN.md", "SKILL.md"]: + path = package_root / relative + text = path.read_text(encoding="utf-8") + for index, cue in enumerate(module.PACKAGE_RULES["skill-context-keeper"]["trigger_cues"]): + text = re.sub( + re.escape(cue), + f"REMOVED_TRIGGER_{index}", + text, + flags=re.IGNORECASE, + ) + path.write_text(text, encoding="utf-8") + + csv_path = Path(tmpdir) / "cases.csv" + write_cases_csv( + csv_path, + [ + { + "case_id": "context_resume", + "package": "skill-context-keeper", + "scenario_type": "positive", + "should_trigger": "yes", + "user_prompt": "We’ve been iterating for a while; please resume from the last known state, summarize what changed, and carry forward unresolved TODOs.", + "expected_artifacts": "state/context.snapshot|state/continuity.note", + "expected_events": "context:reload|context:reconstruct|context:summary", + "notes": "trigger docs should be required", + } + ], + ) + + report = module.run_evaluations(repo_root, csv_path) + + case = next(item for item in report["cases"] if item["case_id"] == "context_resume") + self.assertEqual(report["summary"]["failed"], 1) + self.assertEqual(case["dimensions"]["routing_quality"]["status"], "fail") + self.assertIn("routing docs", case["dimensions"]["routing_quality"]["reason"]) + + def test_workflow_completeness_rejects_bogus_same_namespace_token(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + with tempfile.TemporaryDirectory() as tmpdir: + csv_path = Path(tmpdir) / "cases.csv" + write_cases_csv( + csv_path, + [ + { + "case_id": "bad_workflow", + "package": "skill-phase-gate", + "scenario_type": "positive", + "should_trigger": "yes", + "user_prompt": "We need to split this multi-step refactor into phases before coding.", + "expected_artifacts": "plan/phase.plan", + "expected_events": "phase:anything", + "notes": "should fail when same-namespace nonsense tokens are used", + } + ], + ) + + report = module.run_evaluations(ROOT, csv_path) + + case = report["cases"][0] + self.assertEqual(report["summary"]["failed"], 1) + self.assertEqual(case["dimensions"]["workflow_completeness"]["status"], "fail") + self.assertIn("allowed tokens", case["dimensions"]["workflow_completeness"]["reason"]) + + def test_guardrails_fail_when_invalid_metadata_values_are_present(self): + self.assertTrue(RUNNER.exists(), "evaluation runner should exist") + module = load_runner_module() + + with tempfile.TemporaryDirectory() as tmpdir: + csv_path = Path(tmpdir) / "cases.csv" + write_cases_csv( + csv_path, + [ + { + "case_id": "bad_guardrails", + "package": "skill-task-continuity", + "scenario_type": "positive", + "should_trigger": "yes", + "user_prompt": "Set up the long-task continuity suite and coordinate the context keeper, phase gate, and handoff packages.", + "expected_artifacts": "AGENTS.md", + "expected_events": "bootstrap:agents_md", + "notes": "invalid guardrail metadata should fail", + "max_commands": "0", + "max_verbosity": "extreme", + } + ], + ) + + report = module.run_evaluations(ROOT, csv_path) + + case = report["cases"][0] + self.assertEqual(report["summary"]["failed"], 1) + self.assertEqual(case["dimensions"]["guardrails"]["status"], "fail") + self.assertIn("guardrail", case["dimensions"]["guardrails"]["reason"]) + if __name__ == "__main__": unittest.main() From 3155a09f60530035687d69a21073e922ad01ce94 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 16:20:30 +0800 Subject: [PATCH 27/31] feat: tighten long-task eval scoring --- CHANGELOG.md | 1 + docs/publishing.md | 2 + docs/publishing.zh-CN.md | 2 + evals/README.md | 10 +-- evals/run_evals.py | 128 ++++++++++++++++++++++++++------------- 5 files changed, 97 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02860c8..9c01d78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to `codex-skill-library` should be documented in this file. - Add bootstrap groundwork for the long-task continuity suite through initial package metadata, install entry points, and repository index coverage. - Add evaluation scaffolding groundwork by reserving package directories for future references, scripts, tests, and template assets tied to the long-task continuity suite. - Add a static CSV-driven evaluation harness under `evals/` with repo-local reporting for trigger routing, artifact expectations, event namespaces, boundary language, and future guardrail columns. +- Tighten the long-task continuity eval harness so routing now depends on published trigger guidance, workflow tokens match exact package/polarity contracts, and optional guardrail metadata is validated rather than just parsed. ## [0.5.1] - 2026-03-25 diff --git a/docs/publishing.md b/docs/publishing.md index 56205e3..1afa61a 100644 --- a/docs/publishing.md +++ b/docs/publishing.md @@ -41,6 +41,7 @@ python3 -m unittest discover -s evals -p 'test_*.py' -v ``` The suite runner now scores prompt polarity, event namespaces, and strict artifact mapping in addition to repository shape checks. +Routing quality also requires trigger guidance to remain visible in published `SKILL.md` and README files, and guardrail metadata is validated statically when optional columns are present. ## Versioning Rules @@ -71,4 +72,5 @@ python3 /scripts/install-skill-from-github.py \ - Keep repository-wide guidance in the root `README.md` and `docs/`. - Keep runtime guidance inside the skill package itself. - Keep static suite checks in `evals/` so maintainers can validate trigger coverage, package boundaries, event namespaces, and artifact mapping without executing a model. +- Treat `max_commands` as a positive integer contract and `max_verbosity` as a `low` / `medium` / `high` metadata check when those columns appear in `evals/cases.csv`. - Prefer bilingual key docs when the repository is meant for public sharing. diff --git a/docs/publishing.zh-CN.md b/docs/publishing.zh-CN.md index dd5c520..1dca627 100644 --- a/docs/publishing.zh-CN.md +++ b/docs/publishing.zh-CN.md @@ -41,6 +41,7 @@ python3 -m unittest discover -s evals -p 'test_*.py' -v ``` 套件运行器现在除了仓库形状检查之外,还会按提示词正负向、事件命名空间和严格产物映射来评分。 +路由评分还要求已发布的 `SKILL.md` 和 README 中保留触发提示,而可选的 guardrail 字段会以静态元数据合同来校验。 ## 版本规则 @@ -71,4 +72,5 @@ python3 /scripts/install-skill-from-github.py \ - 仓库级说明放在根目录 `README.md` 和 `docs/` - 运行时说明保留在具体 skill 包内部 - 静态套件校验放在 `evals/`,方便维护者在不运行模型的情况下验证触发覆盖、包边界、事件命名空间和产物映射 +- 当 `evals/cases.csv` 出现可选列时,把 `max_commands` 视为正整数合同,把 `max_verbosity` 视为 `low` / `medium` / `high` 的元数据检查 - 如果仓库面向公开分享,关键说明建议提供中英文两个版本 diff --git a/evals/README.md b/evals/README.md index e2299c0..62df1a8 100644 --- a/evals/README.md +++ b/evals/README.md @@ -8,11 +8,11 @@ It uses `evals/cases.csv` as the seed matrix and checks the published repository - CSV parsing and normalization - should-trigger vs should-not-trigger coverage from prompt polarity - expected artifact templates for each package, with strict token-to-template mapping -- routing hints in `SKILL.md` and package READMEs +- routing hints in `SKILL.md` and package READMEs, plus prompt polarity evidence - boundary language and non-overlap guidance in package READMEs -- expected event tokens by package namespace and case polarity +- expected event tokens by exact package and polarity contract - suite-level downstream templates and assets for `skill-task-continuity` -- optional command-count or verbosity guardrails when future cases add them +- static guardrail metadata validation when optional columns are present ## Run It @@ -35,5 +35,5 @@ python3 -m unittest discover -s evals -p 'test_*.py' -v ## Adding Cases Add a new row to `evals/cases.csv`, then rerun the harness. -If the case needs optional guardrails later, add the relevant CSV columns and the runner will normalize them without changing the API. -For polarity-sensitive cases, make sure the prompt cues, expected events, and artifact tokens all match the package’s routing boundary. +If the case needs optional guardrails later, add the relevant CSV columns and the runner will validate `max_commands > 0` and `max_verbosity` in `low|medium|high`. +For polarity-sensitive cases, make sure the prompt cues, routing docs, expected events, and artifact tokens all match the package’s routing boundary. diff --git a/evals/run_evals.py b/evals/run_evals.py index 432e47f..89893bd 100644 --- a/evals/run_evals.py +++ b/evals/run_evals.py @@ -68,8 +68,8 @@ class EvalCase: "state/context.snapshot": "assets/TASK_STATE.template.md", "state/continuity.note": "assets/TASK_STATE.template.md", }, - "positive_event_prefixes": ["context:"], - "negative_event_prefixes": ["context:skip", "direct:answer"], + "positive_events": ["context:reload", "context:reconstruct", "context:summary"], + "negative_events": ["context:skip", "direct:answer"], "workflow_files": [ "references/use-cases.md", "references/use-cases.zh-CN.md", @@ -116,8 +116,8 @@ class EvalCase: "plan/checkpoints.md": "assets/PREFLIGHT.template.md", "plan/exit-criteria.md": "assets/POSTFLIGHT.template.md", }, - "positive_event_prefixes": ["phase:"], - "negative_event_prefixes": ["phase:skip", "direct:edit"], + "positive_events": ["phase:split", "phase:checkpoint", "phase:gate"], + "negative_events": ["phase:skip", "direct:edit"], "workflow_files": [ "references/README.md", "references/README.zh-CN.md", @@ -164,8 +164,8 @@ class EvalCase: "handoff/blockers.md": "assets/HANDOFF.template.md", "handoff/next-steps.md": "assets/HANDOFF.template.md", }, - "positive_event_prefixes": ["handoff:"], - "negative_event_prefixes": ["handoff:skip", "direct:answer"], + "positive_events": ["handoff:capture", "handoff:pause", "handoff:transfer"], + "negative_events": ["handoff:skip", "direct:answer"], "workflow_files": [ "references/README.md", "references/README.zh-CN.md", @@ -208,8 +208,8 @@ class EvalCase: ".agent-state/TASK_STATE.md": "assets/agent-state/TASK_STATE.template.md", ".agent-state/HANDOFF.md": "assets/agent-state/HANDOFF.template.md", }, - "positive_event_prefixes": ["bootstrap:"], - "negative_event_prefixes": ["bootstrap:skip", "direct:edit"], + "positive_events": ["bootstrap:agents_md", "bootstrap:task_state", "bootstrap:handoff"], + "negative_events": ["bootstrap:skip", "direct:edit"], "workflow_files": [ "references/composition-guide.md", "references/install-playbook.md", @@ -307,66 +307,86 @@ def _artifact_targets(case: EvalCase, package_rules: dict[str, Any]) -> tuple[li return [Path(target) for target in targets], unmapped -def _score_routing(case: EvalCase, package_rules: dict[str, Any]) -> dict[str, Any]: +def _score_routing(package_root: Path, case: EvalCase, package_rules: dict[str, Any]) -> dict[str, Any]: trigger_hits = _matched_phrases(case.user_prompt, package_rules["trigger_cues"]) suppress_hits = _matched_phrases(case.user_prompt, package_rules["suppress_cues"]) + docs_sources = ( + _read_text(package_root / "README.md") + + "\n" + + _read_text(package_root / "README.zh-CN.md") + + "\n" + + _read_text(package_root / "SKILL.md") + ) + docs_trigger_hits = _matched_phrases(docs_sources, package_rules["trigger_cues"]) + docs_suppress_hits = _matched_phrases(docs_sources, package_rules["suppress_cues"]) + + docs_ok = bool(docs_trigger_hits and docs_suppress_hits) if case.should_trigger: - status = "pass" if trigger_hits and not suppress_hits else "fail" + status = "pass" if trigger_hits and not suppress_hits and docs_ok else "fail" if status == "pass": - reason = f"trigger cues matched: {', '.join(trigger_hits)}" + reason = ( + f"trigger cues matched: {', '.join(trigger_hits)}; " + f"routing docs matched: {', '.join(docs_trigger_hits)}" + ) else: reason = ( - "missing trigger cues or conflicting suppress cues: " - f"trigger={trigger_hits or ['none']}, suppress={suppress_hits or ['none']}" + "missing trigger cues, conflicting suppress cues, or routing docs guidance: " + f"trigger={trigger_hits or ['none']}, suppress={suppress_hits or ['none']}, " + f"docs_trigger={docs_trigger_hits or ['none']}, docs_suppress={docs_suppress_hits or ['none']}" ) else: - status = "pass" if suppress_hits else "fail" + status = "pass" if suppress_hits and docs_ok else "fail" if status == "pass": - reason = f"suppress cues matched: {', '.join(suppress_hits)}" + reason = ( + f"suppress cues matched: {', '.join(suppress_hits)}; " + f"routing docs matched: {', '.join(docs_suppress_hits)}" + ) else: - reason = "missing suppress cues for a should-not-trigger case" + reason = ( + "missing suppress cues, or routing docs guidance is incomplete: " + f"trigger={trigger_hits or ['none']}, suppress={suppress_hits or ['none']}, " + f"docs_trigger={docs_trigger_hits or ['none']}, docs_suppress={docs_suppress_hits or ['none']}" + ) return { "status": status, "reason": reason, "trigger_hits": trigger_hits, "suppress_hits": suppress_hits, + "docs_trigger_hits": docs_trigger_hits, + "docs_suppress_hits": docs_suppress_hits, } def _score_events(case: EvalCase, package_rules: dict[str, Any]) -> dict[str, Any]: - allowed_prefixes = ( - package_rules["positive_event_prefixes"] + allowed_events = ( + package_rules["positive_events"] if case.should_trigger - else package_rules["negative_event_prefixes"] + else package_rules["negative_events"] ) - matching = [] - mismatched = [] - for event in case.expected_events: - if any(event.startswith(prefix) for prefix in allowed_prefixes): - matching.append(event) - else: - mismatched.append(event) + expected_events = list(case.expected_events) + allowed_set = set(allowed_events) + expected_set = set(expected_events) - if not case.expected_events: + if not expected_events: status = "fail" reason = "expected events were not provided" - elif mismatched: + elif expected_set != allowed_set or len(expected_events) != len(allowed_events): status = "fail" reason = ( - "event tokens do not match the expected namespace: " - f"allowed={allowed_prefixes}, mismatched={mismatched}" + "expected events do not match the allowed tokens for this package and polarity: " + f"allowed={allowed_events}, actual={expected_events}" ) else: status = "pass" - reason = f"event namespace matched: {', '.join(matching)}" + reason = f"expected events matched exactly: {', '.join(expected_events)}" return { "status": status, "reason": reason, - "matching": matching, - "mismatched": mismatched, + "matching": expected_events if status == "pass" else [], + "mismatched": [] if status == "pass" else expected_events, } @@ -404,6 +424,34 @@ def _score_docs(package_root: Path, package_rules: dict[str, Any]) -> dict[str, } +def _score_guardrails(case: EvalCase) -> dict[str, Any]: + if case.max_commands is None and case.max_verbosity is None: + return { + "status": "skipped", + "reason": "no optional guardrails configured", + "invalid": [], + } + + invalid = [] + if case.max_commands is not None and case.max_commands <= 0: + invalid.append("max_commands must be a positive integer") + if case.max_verbosity is not None and case.max_verbosity not in {"low", "medium", "high"}: + invalid.append("max_verbosity must be one of low, medium, or high") + + if invalid: + return { + "status": "fail", + "reason": f"invalid guardrail metadata: {', '.join(invalid)}", + "invalid": invalid, + } + + return { + "status": "pass", + "reason": "optional guardrail metadata is valid", + "invalid": [], + } + + def evaluate_case(repo_root: Path, case: EvalCase) -> dict[str, Any]: package_rules = PACKAGE_RULES.get(case.package) package_root = _package_dir(repo_root, case.package) @@ -424,7 +472,7 @@ def evaluate_case(repo_root: Path, case: EvalCase) -> dict[str, Any]: result["details"]["unknown_package"] = case.package return result - routing = _score_routing(case, package_rules) + routing = _score_routing(package_root, case, package_rules) events = _score_events(case, package_rules) docs = _score_docs(package_root, package_rules) @@ -440,12 +488,7 @@ def evaluate_case(repo_root: Path, case: EvalCase) -> dict[str, Any]: artifact_status = "pass" artifact_reason = "expected artifact templates are present" - if case.max_commands is None and case.max_verbosity is None: - guardrail_status = "skipped" - guardrail_reason = "no optional guardrails configured" - else: - guardrail_status = "pass" - guardrail_reason = "optional guardrails parsed" + guardrails = _score_guardrails(case) result["dimensions"] = { "routing_quality": { @@ -458,7 +501,7 @@ def evaluate_case(repo_root: Path, case: EvalCase) -> dict[str, Any]: "reason": events["reason"], }, "docs_clarity": {"status": docs["status"], "reason": docs["reason"]}, - "guardrails": {"status": guardrail_status, "reason": guardrail_reason}, + "guardrails": {"status": guardrails["status"], "reason": guardrails["reason"]}, } result["details"] = { @@ -468,9 +511,12 @@ def evaluate_case(repo_root: Path, case: EvalCase) -> dict[str, Any]: "expected_artifacts": list(case.expected_artifacts), "trigger_hits": routing["trigger_hits"], "suppress_hits": routing["suppress_hits"], + "docs_trigger_hits": routing["docs_trigger_hits"], + "docs_suppress_hits": routing["docs_suppress_hits"], "matching_events": events["matching"], "mismatched_events": events["mismatched"], "unmapped_artifacts": unmapped_artifacts, + "guardrail_invalid": guardrails["invalid"], } return result From 48886212a1a15f9f726334bf54f2c0b824a1ee43 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 17:03:11 +0800 Subject: [PATCH 28/31] ci: add pull request checks for published packages --- .github/workflows/pull-request-checks.yml | 31 +++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/pull-request-checks.yml diff --git a/.github/workflows/pull-request-checks.yml b/.github/workflows/pull-request-checks.yml new file mode 100644 index 0000000..dc6f2a5 --- /dev/null +++ b/.github/workflows/pull-request-checks.yml @@ -0,0 +1,31 @@ +name: pull-request-checks + +on: + pull_request: + workflow_dispatch: + +jobs: + tests: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Run package test suites + run: | + set -eu + for test_dir in skills/*/tests; do + python3 -m unittest discover -s "$test_dir" -p 'test_*.py' -v + done + + - name: Run eval unit tests + run: python3 -m unittest discover -s evals -p 'test_*.py' -v + + - name: Run eval seed cases + run: python3 evals/run_evals.py From d08eea0111ac96214f49e452adfdedb8e2e32d80 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 17:05:53 +0800 Subject: [PATCH 29/31] docs: harden long-task suite release flow --- CHANGELOG.md | 18 +- README.md | 36 +- README.zh-CN.md | 34 +- docs/publishing.md | 101 +++- docs/publishing.zh-CN.md | 101 +++- docs/release-checklist-long-task-suite.md | 137 +++++ ...release-checklist-long-task-suite.zh-CN.md | 137 +++++ ...03-25-long-task-suite-release-hardening.md | 499 ++++++++++++++++++ skills/README.md | 9 +- skills/README.zh-CN.md | 9 +- skills/skill-context-keeper/README.md | 17 + skills/skill-context-keeper/README.zh-CN.md | 17 + skills/skill-governance/README.md | 2 +- skills/skill-governance/README.zh-CN.md | 2 +- .../docs/publishing-with-skill-installer.md | 2 +- .../publishing-with-skill-installer.zh-CN.md | 2 +- .../skill-governance/references/use-cases.md | 2 +- .../references/use-cases.zh-CN.md | 2 +- skills/skill-handoff-summary/README.md | 17 + skills/skill-handoff-summary/README.zh-CN.md | 17 + skills/skill-phase-gate/README.md | 17 + skills/skill-phase-gate/README.zh-CN.md | 17 + skills/skill-task-continuity/README.md | 17 + skills/skill-task-continuity/README.zh-CN.md | 17 + 24 files changed, 1145 insertions(+), 84 deletions(-) create mode 100644 docs/release-checklist-long-task-suite.md create mode 100644 docs/release-checklist-long-task-suite.zh-CN.md create mode 100644 docs/superpowers/plans/2026-03-25-long-task-suite-release-hardening.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c01d78..cc554b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,11 +6,19 @@ All notable changes to `codex-skill-library` should be documented in this file. ### Added -- Add scaffolded package roots for `skill-context-keeper`, `skill-phase-gate`, `skill-handoff-summary`, and `skill-task-continuity`, including bilingual README landing pages, routing-first `SKILL.md` files, OpenAI agent metadata, and visible first-pass package directories. -- Add bootstrap groundwork for the long-task continuity suite through initial package metadata, install entry points, and repository index coverage. -- Add evaluation scaffolding groundwork by reserving package directories for future references, scripts, tests, and template assets tied to the long-task continuity suite. -- Add a static CSV-driven evaluation harness under `evals/` with repo-local reporting for trigger routing, artifact expectations, event namespaces, boundary language, and future guardrail columns. -- Tighten the long-task continuity eval harness so routing now depends on published trigger guidance, workflow tokens match exact package/polarity contracts, and optional guardrail metadata is validated rather than just parsed. +- Publish four long-task continuity packages under `skills/`: `skill-context-keeper`, `skill-phase-gate`, `skill-handoff-summary`, and `skill-task-continuity`. +- Add bilingual package entry docs, routing-first `SKILL.md` files, OpenAI agent metadata, reader-facing references, downstream template assets, and package contract tests for the new continuity packages. +- Add the continuity-suite bootstrap helper and downstream template set for `AGENTS.md` plus `.agent-state/*.md` files without turning the repository root into a consumer repo. +- Add `docs/long-task-suite.md` and `docs/long-task-suite.zh-CN.md` so maintainers and readers can understand the suite architecture without opening package internals. +- Add a static continuity eval harness under `evals/` with seed cases, per-package artifact checks, routing checks, exact workflow-token checks, and optional guardrail metadata validation. +- Add a pull-request workflow for published package tests plus continuity eval checks. +- Add bilingual release checklist guidance for the continuity-suite publication flow. + +### Changed + +- Update root docs, skills indexes, and publishing guides so all four continuity packages are discoverable, install guidance stays aligned with `skill-installer`, and maintainers can find smoke-test and release-checklist steps quickly. +- Tighten continuity package README install guidance with direct, copyable install examples for `main` and the planned `v0.6.0` release. +- Treat the continuity eval contract as a release-facing surface: routing now depends on published trigger guidance, workflow tokens must match exact package and polarity contracts, and optional guardrail metadata must be valid when present. ## [0.5.1] - 2026-03-25 diff --git a/README.md b/README.md index a58991d..c546dac 100644 --- a/README.md +++ b/README.md @@ -26,44 +26,47 @@ This repository is designed for people who want to: | Skill | Best For | Docs | | --- | --- | --- | | `skill-governance` | Governing skill assets with task-first add, enable, doctor, repair, audit, and document flows | [EN](skills/skill-governance/README.md) / [中文](skills/skill-governance/README.zh-CN.md) | -| `skill-context-keeper` | Refreshing or reconstructing long-task state without turning it into phase planning or a final handoff | [EN](skills/skill-context-keeper/README.md) / [中文](skills/skill-context-keeper/README.zh-CN.md) | -| `skill-phase-gate` | Splitting multi-step coding work into explicit phases, checkpoints, and exit criteria | [EN](skills/skill-phase-gate/README.md) / [中文](skills/skill-phase-gate/README.zh-CN.md) | -| `skill-handoff-summary` | Writing pause or transfer summaries with status, blockers, and next steps | [EN](skills/skill-handoff-summary/README.md) / [中文](skills/skill-handoff-summary/README.zh-CN.md) | -| `skill-task-continuity` | Coordinating the long-task continuity suite across the three narrower continuity packages | [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) | +| `skill-context-keeper` | Refreshing current task state without drifting into checkpoints or handoff ownership | [EN](skills/skill-context-keeper/README.md) / [中文](skills/skill-context-keeper/README.zh-CN.md) | +| `skill-phase-gate` | Adding preflight and postflight checkpoints around meaningful edits | [EN](skills/skill-phase-gate/README.md) / [中文](skills/skill-phase-gate/README.zh-CN.md) | +| `skill-handoff-summary` | Writing compact continuation handoffs when work pauses or changes owners | [EN](skills/skill-handoff-summary/README.md) / [中文](skills/skill-handoff-summary/README.zh-CN.md) | +| `skill-task-continuity` | Bootstrapping and composing the continuity suite without replacing the atomic packages | [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) | ## Quick Start 1. Open the package list in [skills/README.md](skills/README.md). 2. Choose a skill and read its package `README.md`. -3. Install it with `skill-installer`, usually into the default Codex shared library. -4. Use the package reference pages for boundary notes now, and later for examples, prompts, and deeper guidance. +3. Install the package you want with `skill-installer`, using either `main` or a tagged release. +4. For the continuity workflow, start with `skill-task-continuity` when you need suite bootstrap or composition guidance, or install the narrower atomic package directly. +5. Use the package reference pages for boundary notes now, and later for examples, prompts, and deeper guidance. -## Install Example +## Install Examples -Install `skill-governance` from this repository: +Install the suite entry package from the repository default branch: ```bash python3 /scripts/install-skill-from-github.py \ --repo Golden-Promise/codex-skill-library \ - --path skills/skill-governance + --path skills/skill-task-continuity ``` -Install the current release: +Pin the upcoming continuity-suite release: ```bash python3 /scripts/install-skill-from-github.py \ --repo Golden-Promise/codex-skill-library \ - --path skills/skill-governance \ - --ref v0.5.1 + --path skills/skill-task-continuity \ + --ref v0.6.0 ``` -Install from a GitHub tree URL: +Install from a GitHub tree URL when you want the public package page directly: ```bash python3 /scripts/install-skill-from-github.py \ - --url https://github.com/Golden-Promise/codex-skill-library/tree/main/skills/skill-governance + --url https://github.com/Golden-Promise/codex-skill-library/tree/main/skills/skill-task-continuity ``` +For maintainer smoke-test commands covering all four continuity packages, use [docs/publishing.md](docs/publishing.md). + ## Reading Guide - English skill index: [skills/README.md](skills/README.md) @@ -75,6 +78,8 @@ python3 /scripts/install-skill-from-github.py \ - `skill-task-continuity` package: [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) - Repository publishing guide: [docs/publishing.md](docs/publishing.md) - 中文发布说明: [docs/publishing.zh-CN.md](docs/publishing.zh-CN.md) +- Release checklist for the continuity suite: [docs/release-checklist-long-task-suite.md](docs/release-checklist-long-task-suite.md) +- 中文连续性套件发布清单: [docs/release-checklist-long-task-suite.zh-CN.md](docs/release-checklist-long-task-suite.zh-CN.md) ## Repository Layout @@ -97,4 +102,5 @@ codex-skill-library/ ## For Maintainers Repository versioning, release flow, and validation steps are documented in [docs/publishing.md](docs/publishing.md). -If you are publishing this repository for the first time, start there instead of the package runtime docs. +The continuity-suite release checklist lives in [docs/release-checklist-long-task-suite.md](docs/release-checklist-long-task-suite.md). +If you are publishing this repository for the first time, start with those maintainer docs instead of the package runtime docs. diff --git a/README.zh-CN.md b/README.zh-CN.md index 600c44e..1fd9bda 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -26,44 +26,47 @@ | Skill | 适用场景 | 文档 | | --- | --- | --- | | `skill-governance` | 用任务式入口治理 skill 资产,包括新增、启用、体检、修复、审计和补文档 | [EN](skills/skill-governance/README.md) / [中文](skills/skill-governance/README.zh-CN.md) | -| `skill-context-keeper` | 在长任务中刷新或重建当前工作状态,不扩展成阶段规划或最终交接 | [EN](skills/skill-context-keeper/README.md) / [中文](skills/skill-context-keeper/README.zh-CN.md) | -| `skill-phase-gate` | 把多步骤编码任务拆成明确阶段、检查点和退出条件 | [EN](skills/skill-phase-gate/README.md) / [中文](skills/skill-phase-gate/README.zh-CN.md) | -| `skill-handoff-summary` | 在暂停或转交时整理状态、阻塞点和下一步摘要 | [EN](skills/skill-handoff-summary/README.md) / [中文](skills/skill-handoff-summary/README.zh-CN.md) | -| `skill-task-continuity` | 在三个连续性原子包之间做套件级协调与边界控制 | [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) | +| `skill-context-keeper` | 刷新当前任务状态,不扩展成检查点或交接职责 | [EN](skills/skill-context-keeper/README.md) / [中文](skills/skill-context-keeper/README.zh-CN.md) | +| `skill-phase-gate` | 在有分量的改动前后加入 preflight / postflight 检查点 | [EN](skills/skill-phase-gate/README.md) / [中文](skills/skill-phase-gate/README.zh-CN.md) | +| `skill-handoff-summary` | 在暂停或换人时生成紧凑、面向续做的交接摘要 | [EN](skills/skill-handoff-summary/README.md) / [中文](skills/skill-handoff-summary/README.zh-CN.md) | +| `skill-task-continuity` | 负责连续性套件的启动与组合,但不替代原子包 | [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) | ## 快速开始 1. 先看 [skills/README.zh-CN.md](skills/README.zh-CN.md) 浏览当前可用 skill。 2. 进入具体 skill 包的 `README.md` 了解它是否适合你的场景。 -3. 使用 `skill-installer` 进行安装,通常直接安装到默认的 Codex 共享库。 -4. 现在可先阅读该包下的参考页了解边界说明,后续阶段再继续使用其中补充的示例与提示词资料。 +3. 使用 `skill-installer` 安装目标包,可直接跟踪 `main`,也可固定到某个 tag。 +4. 如果你需要整套连续性流程的启动或组合说明,先从 `skill-task-continuity` 开始;如果只需要单个动作,直接安装对应的原子包。 +5. 现在可先阅读该包下的参考页了解边界说明,后续阶段再继续使用其中补充的示例与提示词资料。 ## 安装示例 -从当前仓库安装 `skill-governance`: +从仓库默认分支安装连续性套件入口包: ```bash python3 /scripts/install-skill-from-github.py \ --repo Golden-Promise/codex-skill-library \ - --path skills/skill-governance + --path skills/skill-task-continuity ``` -固定安装当前发布版本: +固定安装即将发布的连续性套件版本: ```bash python3 /scripts/install-skill-from-github.py \ --repo Golden-Promise/codex-skill-library \ - --path skills/skill-governance \ - --ref v0.5.1 + --path skills/skill-task-continuity \ + --ref v0.6.0 ``` -也可以直接使用 GitHub tree URL: +也可以直接使用 GitHub tree URL 指向公开包页面: ```bash python3 /scripts/install-skill-from-github.py \ - --url https://github.com/Golden-Promise/codex-skill-library/tree/main/skills/skill-governance + --url https://github.com/Golden-Promise/codex-skill-library/tree/main/skills/skill-task-continuity ``` +如果你要做四个连续性包的维护者 smoke test,请直接看 [docs/publishing.zh-CN.md](docs/publishing.zh-CN.md)。 + ## 阅读入口 - English skill index: [skills/README.md](skills/README.md) @@ -75,6 +78,8 @@ python3 /scripts/install-skill-from-github.py \ - `skill-task-continuity` 包说明: [EN](skills/skill-task-continuity/README.md) / [中文](skills/skill-task-continuity/README.zh-CN.md) - English publishing guide: [docs/publishing.md](docs/publishing.md) - 中文发布说明: [docs/publishing.zh-CN.md](docs/publishing.zh-CN.md) +- English continuity-suite release checklist: [docs/release-checklist-long-task-suite.md](docs/release-checklist-long-task-suite.md) +- 中文连续性套件发布清单: [docs/release-checklist-long-task-suite.zh-CN.md](docs/release-checklist-long-task-suite.zh-CN.md) ## 仓库结构 @@ -97,4 +102,5 @@ codex-skill-library/ ## 给维护者 仓库级的版本、发布流程和校验说明统一放在 [docs/publishing.zh-CN.md](docs/publishing.zh-CN.md)。 -如果你是第一次发布这个仓库,建议先看那份文档,而不是直接从包内运行时说明开始。 +连续性套件发布清单在 [docs/release-checklist-long-task-suite.zh-CN.md](docs/release-checklist-long-task-suite.zh-CN.md)。 +如果你是第一次发布这个仓库,建议先看这些维护者文档,而不是直接从包内运行时说明开始。 diff --git a/docs/publishing.md b/docs/publishing.md index 1afa61a..eac99ab 100644 --- a/docs/publishing.md +++ b/docs/publishing.md @@ -16,57 +16,122 @@ Use this guide if you are maintaining the repository itself rather than using on ## Recommended Release Flow -1. Review the package README and repository docs for clarity. -2. Update [CHANGELOG.md](../CHANGELOG.md) with reader-visible changes. -3. Run validation and tests inside each published skill package. -4. Commit the release state and create a Git tag such as `v0.1.0`. -5. Push the tag and optionally create a GitHub Release. -6. Verify installation from GitHub with a real `skill-installer` command. +1. Review the package README files, repository indexes, and publishing docs for release clarity. +2. Update [CHANGELOG.md](../CHANGELOG.md) with reader-visible changes and confirm the intended tag for this release. +3. Run local package tests, eval tests, and the continuity seed matrix before opening or merging the PR. +4. Let the PR checks workflow confirm the same core package and eval contracts. +5. Run install smoke tests from a pushed release branch or from `main` before tagging. +6. Create the release tag and GitHub Release once the branch is merge-ready. +7. Re-run the smoke tests against the tagged release and record any follow-up. ## Validation Commands -Current package validation: +Repository package tests: ```bash -cd skills/skill-governance -python3 scripts/manage_skill.py --validate-only -python3 -m unittest discover -s tests -p 'test_*.py' -v +for test_dir in skills/*/tests; do + python3 -m unittest discover -s "$test_dir" -p 'test_*.py' -v +done +``` + +Additional packaging sanity for `skill-governance`: + +```bash +(cd skills/skill-governance && python3 scripts/manage_skill.py --validate-only) ``` Long-task continuity suite validation: ```bash -python3 evals/run_evals.py python3 -m unittest discover -s evals -p 'test_*.py' -v +python3 evals/run_evals.py ``` The suite runner now scores prompt polarity, event namespaces, and strict artifact mapping in addition to repository shape checks. Routing quality also requires trigger guidance to remain visible in published `SKILL.md` and README files, and guardrail metadata is validated statically when optional columns are present. +## Pull Request Checks + +Pull requests should run [.github/workflows/pull-request-checks.yml](../.github/workflows/pull-request-checks.yml). +That workflow is intentionally small: it runs package test directories under `skills/*/tests`, then the continuity eval unit tests, then the continuity seed matrix. + ## Versioning Rules - Use repository-level tags such as `v0.1.0`, `v0.2.0`, and `v1.0.0`. - Increase the minor version for backward-compatible additions. - Increase the major version when package layout or workflow changes in a breaking way. - Keep `skills/skill-governance` stable as the public install path. +- For the current long-task continuity publication pass, the expected next minor release is `v0.6.0` unless other user-visible scope lands first. -## Install Verification +## Install Smoke Tests For The Continuity Packages -Verify installation from the repository path: +If you want to smoke-test a pushed but untagged release branch, add `--ref ` and point the installer at an isolated temp directory: ```bash -python3 /scripts/install-skill-from-github.py \ - --repo Golden-Promise/codex-skill-library \ - --path skills/skill-governance +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --ref \ + --dest "$tmpdir" +done ``` -Verify installation from a GitHub tree URL: +If you want to smoke-test the current `main`, use the same loop without `--ref`: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --dest "$tmpdir" +done +``` + +Repeat the same smoke tests against the release tag after publishing: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --ref v0.6.0 \ + --dest "$tmpdir" +done +``` + +If you want to inspect the public package page directly, use a GitHub tree URL such as: ```bash python3 /scripts/install-skill-from-github.py \ - --url https://github.com/Golden-Promise/codex-skill-library/tree/main/skills/skill-governance + --url https://github.com/Golden-Promise/codex-skill-library/tree/main/skills/skill-task-continuity ``` +## Release Checklist + +Use [docs/release-checklist-long-task-suite.md](release-checklist-long-task-suite.md) for the full continuity-suite release checklist, including tag creation, GitHub release steps, and post-release smoke verification. + ## Maintainer Notes - Keep repository-wide guidance in the root `README.md` and `docs/`. diff --git a/docs/publishing.zh-CN.md b/docs/publishing.zh-CN.md index 1dca627..f91c9c4 100644 --- a/docs/publishing.zh-CN.md +++ b/docs/publishing.zh-CN.md @@ -16,57 +16,122 @@ ## 推荐发布流程 -1. 检查 skill 包 README 和仓库级文档是否清晰可读。 -2. 在 [CHANGELOG.md](../CHANGELOG.md) 中更新对读者可见的改动。 -3. 在每个已发布 skill 包内运行校验和测试。 -4. 提交发布状态,并创建如 `v0.1.0` 这样的 Git tag。 -5. 推送 tag,并按需创建 GitHub Release。 -6. 用真实的 `skill-installer` 命令验证 GitHub 安装流程。 +1. 检查各个 skill 包 README、仓库索引页和发布文档是否清晰、可发布。 +2. 在 [CHANGELOG.md](../CHANGELOG.md) 中更新对读者可见的改动,并确认本次发布的目标 tag。 +3. 在打开或合并 PR 之前,先跑完包级测试、eval 测试和连续性种子矩阵。 +4. 让 PR checks workflow 再次验证同一组核心包合同和 eval 合同。 +5. 在打 tag 之前,先从已推送的发布分支或 `main` 跑一遍安装 smoke test。 +6. 分支达到可合并状态后,创建发布 tag 和 GitHub Release。 +7. 发布后再对 tag 版本重复一次 smoke test,并记录后续事项。 ## 校验命令 -当前包的校验方式: +仓库中所有已发布包的测试: ```bash -cd skills/skill-governance -python3 scripts/manage_skill.py --validate-only -python3 -m unittest discover -s tests -p 'test_*.py' -v +for test_dir in skills/*/tests; do + python3 -m unittest discover -s "$test_dir" -p 'test_*.py' -v +done +``` + +`skill-governance` 的额外打包健全性检查: + +```bash +(cd skills/skill-governance && python3 scripts/manage_skill.py --validate-only) ``` 长任务连续性套件的校验方式: ```bash -python3 evals/run_evals.py python3 -m unittest discover -s evals -p 'test_*.py' -v +python3 evals/run_evals.py ``` 套件运行器现在除了仓库形状检查之外,还会按提示词正负向、事件命名空间和严格产物映射来评分。 路由评分还要求已发布的 `SKILL.md` 和 README 中保留触发提示,而可选的 guardrail 字段会以静态元数据合同来校验。 +## Pull Request Checks + +Pull request 应运行 [.github/workflows/pull-request-checks.yml](../.github/workflows/pull-request-checks.yml)。 +这个 workflow 故意保持简单:先跑 `skills/*/tests` 下的包级测试,再跑连续性 eval 单测,最后跑连续性种子矩阵。 + ## 版本规则 - 使用仓库级 tag,例如 `v0.1.0`、`v0.2.0`、`v1.0.0` - 向后兼容的增强提升次版本 - 如果包结构或工作流有破坏性变化,则提升主版本 - 对外安装路径尽量保持稳定,例如 `skills/skill-governance` +- 对当前这次长任务连续性发布加固来说,如果没有新的用户可见范围插入,下一次次版本发布应为 `v0.6.0` -## 安装验证 +## 连续性包的安装 Smoke Test -通过仓库路径验证安装: +如果你要对一个“已推送但还没打 tag”的发布分支做 smoke test,请加上 `--ref `,并把安装输出导向隔离的临时目录: ```bash -python3 /scripts/install-skill-from-github.py \ - --repo Golden-Promise/codex-skill-library \ - --path skills/skill-governance +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --ref \ + --dest "$tmpdir" +done ``` -通过 GitHub tree URL 验证安装: +如果你要对当前 `main` 做 smoke test,就使用同一组命令,但去掉 `--ref`: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --dest "$tmpdir" +done +``` + +发布后,再对 tag 版本重复同一组 smoke test: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --ref v0.6.0 \ + --dest "$tmpdir" +done +``` + +如果你要直接检查公开包页面,也可以用 GitHub tree URL,例如: ```bash python3 /scripts/install-skill-from-github.py \ - --url https://github.com/Golden-Promise/codex-skill-library/tree/main/skills/skill-governance + --url https://github.com/Golden-Promise/codex-skill-library/tree/main/skills/skill-task-continuity ``` +## 发布清单 + +完整的连续性套件发布清单见 [docs/release-checklist-long-task-suite.zh-CN.md](release-checklist-long-task-suite.zh-CN.md),其中包含打 tag、GitHub Release 和 post-release smoke verification 步骤。 + ## 维护说明 - 仓库级说明放在根目录 `README.md` 和 `docs/` diff --git a/docs/release-checklist-long-task-suite.md b/docs/release-checklist-long-task-suite.md new file mode 100644 index 0000000..10b2804 --- /dev/null +++ b/docs/release-checklist-long-task-suite.md @@ -0,0 +1,137 @@ +# Long-Task Continuity Suite Release Checklist + +[简体中文](release-checklist-long-task-suite.zh-CN.md) + +Use this checklist when preparing the long-task continuity suite for merge, tag creation, and GitHub release publication. + +## 1. Pre-Release Sanity + +- Confirm the worktree is clean: `git status --short` +- Confirm the release target is still `v0.6.0`; if more user-visible scope landed, recalculate before tagging. +- Re-read the repository entry docs and indexes: + - [README.md](../README.md) + - [README.zh-CN.md](../README.zh-CN.md) + - [skills/README.md](../skills/README.md) + - [skills/README.zh-CN.md](../skills/README.zh-CN.md) +- Confirm [CHANGELOG.md](../CHANGELOG.md) matches the release scope and does not describe unfinished work. + +## 2. Local Validation + +Run all published package tests: + +```bash +for test_dir in skills/*/tests; do + python3 -m unittest discover -s "$test_dir" -p 'test_*.py' -v +done +``` + +Run the additional `skill-governance` packaging sanity check: + +```bash +(cd skills/skill-governance && python3 scripts/manage_skill.py --validate-only) +``` + +Run the continuity eval checks: + +```bash +python3 -m unittest discover -s evals -p 'test_*.py' -v +python3 evals/run_evals.py +``` + +## 3. Docs And Index Verification + +- Confirm all four continuity packages are still discoverable from the root and `skills/` indexes. +- Confirm package descriptions remain non-overlapping: + - `skill-context-keeper` = state refresh only + - `skill-phase-gate` = checkpoint only + - `skill-handoff-summary` = handoff only + - `skill-task-continuity` = suite bootstrap and composition only +- Confirm package `README.md` install sections still point to the published `skills//` paths. +- Confirm the publishing guides still link to this checklist and to the continuity smoke-test commands. + +## 4. Install Smoke Tests From A Pushed Branch Or `main` + +If the release branch is already pushed to GitHub, smoke-test that exact ref before the tag exists: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --ref \ + --dest "$tmpdir" +done +``` + +If you want to verify the current `main`, repeat the same loop without `--ref`: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --dest "$tmpdir" +done +``` + +## 5. PR Readiness + +- Open or update the draft PR. +- Confirm `.github/workflows/pull-request-checks.yml` runs successfully on the PR. +- Confirm the proposed PR title and summary still match the actual release contents. + +## 6. Changelog And Version Confirmation + +- Confirm the next release is still `v0.6.0`. +- Confirm pinned install examples that reference a tag use `v0.6.0`. +- Confirm `CHANGELOG.md` is ready to ship with minimal or no editing. + +## 7. Tag And GitHub Release + +- Merge the release PR. +- Create the tag: + +```bash +git tag v0.6.0 +git push origin v0.6.0 +``` + +- Create the GitHub Release using the prepared release notes draft. + +## 8. Post-Release Tagged Smoke Verification + +Repeat the continuity install smoke tests against the published tag: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --ref v0.6.0 \ + --dest "$tmpdir" +done +``` + +- Confirm the tagged installs resolve to the expected package paths. +- Confirm the public GitHub release page points readers to the four published package directories under `skills/`. diff --git a/docs/release-checklist-long-task-suite.zh-CN.md b/docs/release-checklist-long-task-suite.zh-CN.md new file mode 100644 index 0000000..e44016c --- /dev/null +++ b/docs/release-checklist-long-task-suite.zh-CN.md @@ -0,0 +1,137 @@ +# 长任务连续性套件发布清单 + +[English](release-checklist-long-task-suite.md) + +当你要把长任务连续性套件准备到可合并、可打 tag、可发布 GitHub Release 的状态时,请按这份清单执行。 + +## 1. 发布前基本检查 + +- 确认工作树干净:`git status --short` +- 确认目标版本仍然是 `v0.6.0`;如果又插入了新的用户可见范围,先重新判断版本再打 tag。 +- 重新检查仓库入口和索引页: + - [README.md](../README.md) + - [README.zh-CN.md](../README.zh-CN.md) + - [skills/README.md](../skills/README.md) + - [skills/README.zh-CN.md](../skills/README.zh-CN.md) +- 确认 [CHANGELOG.md](../CHANGELOG.md) 与本次发布范围一致,没有描述尚未完成的内容。 + +## 2. 本地校验 + +运行所有已发布包的测试: + +```bash +for test_dir in skills/*/tests; do + python3 -m unittest discover -s "$test_dir" -p 'test_*.py' -v +done +``` + +运行 `skill-governance` 的额外打包健全性检查: + +```bash +(cd skills/skill-governance && python3 scripts/manage_skill.py --validate-only) +``` + +运行连续性 eval 检查: + +```bash +python3 -m unittest discover -s evals -p 'test_*.py' -v +python3 evals/run_evals.py +``` + +## 3. 文档与索引核对 + +- 确认四个连续性包仍然能从根 README 和 `skills/` 索引页找到。 +- 确认四个包的职责仍然不重叠: + - `skill-context-keeper` = 只负责状态刷新 + - `skill-phase-gate` = 只负责检查点 + - `skill-handoff-summary` = 只负责交接摘要 + - `skill-task-continuity` = 只负责套件启动与组合 +- 确认各包 `README.md` 里的安装段落仍然指向公开发布路径 `skills//`。 +- 确认发布说明文档仍然链接到这份清单和连续性包的 smoke-test 命令。 + +## 4. 从已推送分支或 `main` 运行安装 Smoke Test + +如果发布分支已经推送到 GitHub,先对这个精确 ref 做 smoke test: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --ref \ + --dest "$tmpdir" +done +``` + +如果你想验证当前 `main`,就用同一组命令,但去掉 `--ref`: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --dest "$tmpdir" +done +``` + +## 5. PR 就绪检查 + +- 打开或更新 draft PR。 +- 确认 `.github/workflows/pull-request-checks.yml` 在 PR 上通过。 +- 确认 PR 标题和摘要仍然准确描述本次发布内容。 + +## 6. 变更日志与版本确认 + +- 确认下一次发布仍然是 `v0.6.0`。 +- 确认所有固定 tag 的安装示例都使用 `v0.6.0`。 +- 确认 `CHANGELOG.md` 已经达到“基本不用再改就能发”的状态。 + +## 7. 打 Tag 与 GitHub Release + +- 合并发布 PR。 +- 创建并推送 tag: + +```bash +git tag v0.6.0 +git push origin v0.6.0 +``` + +- 使用准备好的 release notes draft 创建 GitHub Release。 + +## 8. 发布后 Tag 版 Smoke Verification + +对已发布 tag 再做一遍连续性包安装 smoke test: + +```bash +tmpdir="$(mktemp -d)" + +for path in \ + skills/skill-context-keeper \ + skills/skill-phase-gate \ + skills/skill-handoff-summary \ + skills/skill-task-continuity +do + python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path "$path" \ + --ref v0.6.0 \ + --dest "$tmpdir" +done +``` + +- 确认 tag 安装解析到预期的包路径。 +- 确认 GitHub Release 页面能把读者正确带到 `skills/` 下的四个已发布包。 diff --git a/docs/superpowers/plans/2026-03-25-long-task-suite-release-hardening.md b/docs/superpowers/plans/2026-03-25-long-task-suite-release-hardening.md new file mode 100644 index 0000000..6b07dac --- /dev/null +++ b/docs/superpowers/plans/2026-03-25-long-task-suite-release-hardening.md @@ -0,0 +1,499 @@ +# Long-Task Suite Release Hardening Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Prepare the long-task continuity suite for a clean draft PR, merge, and GitHub release flow without expanding product scope. + +**Architecture:** Treat this as repository release hardening, not new feature work. Keep the four published packages unchanged unless a real release-facing issue appears, move maintainer guidance into `docs/`, keep user entry points in package `README.md`, and add one simple PR-facing CI workflow that validates package contracts plus the continuity eval harness. + +**Tech Stack:** Markdown, YAML (GitHub Actions), Python `unittest`, existing `evals/run_evals.py`, existing `skill-installer` install flow + +--- + +## File Map + +### Create + +- `.github/workflows/pull-request-checks.yml` + Purpose: Run repository release-confidence checks on pull requests with a simple Python workflow. +- `docs/release-checklist-long-task-suite.md` + Purpose: Maintainer-facing English checklist for pre-release, tag, and post-release validation. +- `docs/release-checklist-long-task-suite.zh-CN.md` + Purpose: Maintainer-facing Simplified Chinese checklist aligned with the English checklist. +- `docs/superpowers/plans/2026-03-25-long-task-suite-release-hardening.md` + Purpose: Execution plan for this release-hardening pass. + +### Modify + +- `README.md` + Purpose: Keep repository entry docs release-ready, make all four long-task packages discoverable, and point readers to maintainer release docs. +- `README.zh-CN.md` + Purpose: Keep bilingual root docs aligned with the English repository entry docs. +- `skills/README.md` + Purpose: Keep the published package index crisp, non-overlapping, and release-ready. +- `skills/README.zh-CN.md` + Purpose: Keep the Chinese package index aligned with the English index. +- `CHANGELOG.md` + Purpose: Turn the current unreleased continuity notes into clean, reader-visible release notes for the next minor release. +- `docs/publishing.md` + Purpose: Add PR CI guidance, smoke-test guidance, and release-checklist links for maintainers. +- `docs/publishing.zh-CN.md` + Purpose: Keep the Chinese publishing guide aligned with the English guide. +- `skills/skill-context-keeper/README.md` + Purpose: Fix only real release-facing clarity issues around installation wording, boundaries, and downstream artifact references if found. +- `skills/skill-context-keeper/README.zh-CN.md` + Purpose: Keep bilingual package guidance aligned if English package wording changes. +- `skills/skill-phase-gate/README.md` + Purpose: Fix only real release-facing clarity issues if found during the consistency pass. +- `skills/skill-phase-gate/README.zh-CN.md` + Purpose: Keep bilingual package guidance aligned if English package wording changes. +- `skills/skill-handoff-summary/README.md` + Purpose: Fix only real release-facing clarity issues if found during the consistency pass. +- `skills/skill-handoff-summary/README.zh-CN.md` + Purpose: Keep bilingual package guidance aligned if English package wording changes. +- `skills/skill-task-continuity/README.md` + Purpose: Fix only real release-facing clarity issues around bootstrap usage, downstream templates, and suite composition if found. +- `skills/skill-task-continuity/README.zh-CN.md` + Purpose: Keep bilingual package guidance aligned if English package wording changes. + +### Verify + +- `skills/skill-context-keeper/tests/test_package_contract.py` +- `skills/skill-phase-gate/tests/test_package_contract.py` +- `skills/skill-handoff-summary/tests/test_package_contract.py` +- `skills/skill-task-continuity/tests/test_bootstrap_suite.py` +- `evals/test_run_evals.py` +- `evals/run_evals.py` + +## Implementation Notes + +- Keep the release target conservative: propose `v0.6.0` because `CHANGELOG.md` currently ends at `v0.5.1` and this suite adds four new published packages plus repo-level eval/CI guidance. +- Do not add new runtime assets at the repository root. +- Keep install guidance in the repository docs aligned with the existing `skill-installer` command shape: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/ +``` + +- For tagged-release examples, use the next proposed version string consistently once `CHANGELOG.md` is updated: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/ \ + --ref v0.6.0 +``` + +- Keep CI simple: one workflow, one Ubuntu job, Python setup, package test loop, eval tests, then `python3 evals/run_evals.py`. + +### Task 1: Audit Repository Docs And Indexes + +**Files:** +- Modify: `README.md` +- Modify: `README.zh-CN.md` +- Modify: `skills/README.md` +- Modify: `skills/README.zh-CN.md` +- Modify: `CHANGELOG.md` +- Modify: `docs/publishing.md` +- Modify: `docs/publishing.zh-CN.md` + +- [ ] **Step 1: Capture the current doc gaps** + +Run: + +```bash +sed -n '1,260p' README.md +sed -n '1,260p' README.zh-CN.md +sed -n '1,220p' skills/README.md +sed -n '1,220p' skills/README.zh-CN.md +sed -n '1,260p' CHANGELOG.md +sed -n '1,260p' docs/publishing.md +sed -n '1,260p' docs/publishing.zh-CN.md +``` + +Expected: +- Root docs still center install examples on `skill-governance` +- Publishing docs do not yet show four-package smoke tests or PR CI +- `CHANGELOG.md` has continuity notes, but the `Unreleased` wording still needs release-ready polish + +- [ ] **Step 2: Write the failing release-doc contract mentally before editing** + +Treat the following as the doc contract to satisfy: + +```text +1. All four continuity packages are visible from root and skills indexes. +2. Package blur is minimized with short, non-overlapping descriptions. +3. Maintainers can find CI, smoke-test, and release-checklist guidance from docs/publishing*. +4. Release examples consistently use the same next-version placeholder/value. +5. English and Chinese docs stay structurally aligned. +``` + +Expected: +- Current docs do not yet satisfy all five points + +- [ ] **Step 3: Apply minimal doc edits** + +Edit the files so they: +- keep the four continuity packages visible from the two indexes +- replace single-package release examples with concise multi-package release guidance +- point maintainers to the release checklist docs +- align the next release reference to `v0.6.0` +- keep the Chinese docs structurally parallel to the English docs + +- [ ] **Step 4: Re-read the edited docs** + +Run: + +```bash +sed -n '1,260p' README.md +sed -n '1,260p' README.zh-CN.md +sed -n '1,220p' skills/README.md +sed -n '1,220p' skills/README.zh-CN.md +sed -n '1,260p' CHANGELOG.md +sed -n '1,260p' docs/publishing.md +sed -n '1,260p' docs/publishing.zh-CN.md +``` + +Expected: +- Descriptions read cleanly and do not overlap badly +- Release/install references are consistent +- Root docs do not imply the repo itself is a downstream consumer repo + +- [ ] **Step 5: Commit the doc/index audit pass** + +```bash +git add README.md README.zh-CN.md skills/README.md skills/README.zh-CN.md CHANGELOG.md docs/publishing.md docs/publishing.zh-CN.md docs/superpowers/plans/2026-03-25-long-task-suite-release-hardening.md +git commit -m "docs: harden release-facing continuity docs" +``` + +### Task 2: Add Pull-Request CI Coverage + +**Files:** +- Create: `.github/workflows/pull-request-checks.yml` +- Test: `skills/skill-context-keeper/tests/test_package_contract.py` +- Test: `skills/skill-phase-gate/tests/test_package_contract.py` +- Test: `skills/skill-handoff-summary/tests/test_package_contract.py` +- Test: `skills/skill-task-continuity/tests/test_bootstrap_suite.py` +- Test: `evals/test_run_evals.py` + +- [ ] **Step 1: Write the intended CI contract before adding the workflow** + +The workflow should effectively enforce: + +```text +1. Pull requests run on Ubuntu with Python 3. +2. Every published package test directory under skills/*/tests is executed. +3. eval unit tests run. +4. eval seed cases run. +5. The workflow is understandable from a single YAML file. +``` + +Expected: +- No current workflow exists, so this contract is failing by absence + +- [ ] **Step 2: Add a simple GitHub Actions workflow** + +Implement a single workflow file with: + +```yaml +name: pull-request-checks + +on: + pull_request: + workflow_dispatch: + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Run package tests + run: | + set -eu + for test_dir in skills/*/tests; do + python3 -m unittest discover -s "$test_dir" -p 'test_*.py' -v + done + - name: Run eval unit tests + run: python3 -m unittest discover -s evals -p 'test_*.py' -v + - name: Run eval seed cases + run: python3 evals/run_evals.py +``` + +- [ ] **Step 3: Sanity-check the workflow content locally** + +Run: + +```bash +sed -n '1,220p' .github/workflows/pull-request-checks.yml +``` + +Expected: +- Triggers and commands are easy to read +- No package-specific path is hard-coded beyond the simple `skills/*/tests` loop + +- [ ] **Step 4: Commit the CI workflow** + +```bash +git add .github/workflows/pull-request-checks.yml +git commit -m "ci: add pull request checks for published packages" +``` + +### Task 3: Add Release Checklist Docs + +**Files:** +- Create: `docs/release-checklist-long-task-suite.md` +- Create: `docs/release-checklist-long-task-suite.zh-CN.md` +- Modify: `docs/publishing.md` +- Modify: `docs/publishing.zh-CN.md` + +- [ ] **Step 1: Write the checklist structure** + +Include sections for: + +```text +- preflight repo status +- package tests +- `skill-governance` validate-only packaging sanity +- eval tests and seed cases +- root/index doc verification +- install smoke tests from a pushed branch or `main` +- changelog and version confirmation +- tag and GitHub release steps +- post-release tagged smoke verification +``` + +- [ ] **Step 2: Add the English checklist** + +Use concise checklist items with copyable commands such as: + +```bash +git status --short +(cd skills/skill-governance && python3 scripts/manage_skill.py --validate-only) +for test_dir in skills/*/tests; do + python3 -m unittest discover -s "$test_dir" -p 'test_*.py' -v +done +python3 -m unittest discover -s evals -p 'test_*.py' -v +python3 evals/run_evals.py +``` + +- [ ] **Step 3: Add the Chinese checklist** + +Mirror the English checklist structure closely so maintainers can compare the two versions line-by-line. + +- [ ] **Step 4: Link the new checklist docs from the publishing guides** + +Re-read: + +```bash +sed -n '1,260p' docs/release-checklist-long-task-suite.md +sed -n '1,260p' docs/release-checklist-long-task-suite.zh-CN.md +sed -n '1,260p' docs/publishing.md +sed -n '1,260p' docs/publishing.zh-CN.md +``` + +Expected: +- Maintainers can discover the checklist from `docs/publishing*` +- The checklist is release-focused and does not expand into unrelated process docs + +- [ ] **Step 5: Commit the checklist docs** + +```bash +git add docs/release-checklist-long-task-suite.md docs/release-checklist-long-task-suite.zh-CN.md docs/publishing.md docs/publishing.zh-CN.md +git commit -m "docs: add long-task suite release checklist" +``` + +### Task 4: Add Install Smoke-Test Guidance + +**Files:** +- Modify: `docs/publishing.md` +- Modify: `docs/publishing.zh-CN.md` +- Modify: `README.md` +- Modify: `README.zh-CN.md` + +- [ ] **Step 1: Define the smoke-test command set** + +Use a pushed branch ref when you want to smoke-test an unreleased PR branch, use `main` when you want the default public branch, and keep one pinned tagged-release pattern: + +```bash +python3 /scripts/install-skill-from-github.py --repo Golden-Promise/codex-skill-library --path skills/skill-context-keeper --ref +python3 /scripts/install-skill-from-github.py --repo Golden-Promise/codex-skill-library --path skills/skill-phase-gate +python3 /scripts/install-skill-from-github.py --repo Golden-Promise/codex-skill-library --path skills/skill-handoff-summary +python3 /scripts/install-skill-from-github.py --repo Golden-Promise/codex-skill-library --path skills/skill-task-continuity --ref v0.6.0 +``` + +- [ ] **Step 2: Add a concise maintainer-facing smoke-test section** + +Place the full command examples in `docs/publishing*`, and only add short pointers from root `README*` if needed for discoverability. Make the unreleased-branch example use `--ref ` explicitly so it does not read like the same command as `main`. + +- [ ] **Step 3: Re-read the smoke-test guidance** + +Run: + +```bash +rg -n "skill-context-keeper|skill-phase-gate|skill-handoff-summary|skill-task-continuity|v0.6.0|smoke" README.md README.zh-CN.md docs/publishing.md docs/publishing.zh-CN.md +``` + +Expected: +- All four packages appear +- Both pushed-branch / `main` and tagged examples are present where appropriate +- Commands stay concise and copyable + +- [ ] **Step 4: Commit the smoke-test guidance** + +```bash +git add README.md README.zh-CN.md docs/publishing.md docs/publishing.zh-CN.md +git commit -m "docs: add install smoke tests for continuity packages" +``` + +### Task 5: Audit Package Contract Clarity + +**Files:** +- Modify if needed: `skills/skill-context-keeper/README.md` +- Modify if needed: `skills/skill-context-keeper/README.zh-CN.md` +- Modify if needed: `skills/skill-phase-gate/README.md` +- Modify if needed: `skills/skill-phase-gate/README.zh-CN.md` +- Modify if needed: `skills/skill-handoff-summary/README.md` +- Modify if needed: `skills/skill-handoff-summary/README.zh-CN.md` +- Modify if needed: `skills/skill-task-continuity/README.md` +- Modify if needed: `skills/skill-task-continuity/README.zh-CN.md` + +- [ ] **Step 1: Audit the four package READMEs for release-facing clarity only** + +Run: + +```bash +sed -n '1,260p' skills/skill-context-keeper/README.md +sed -n '1,260p' skills/skill-phase-gate/README.md +sed -n '1,260p' skills/skill-handoff-summary/README.md +sed -n '1,320p' skills/skill-task-continuity/README.md +``` + +Look only for: +- contradictory ownership language +- duplicated responsibilities +- inconsistent install wording +- missing references to downstream templates or assets where users would need them +- broken or missing cross-links + +- [ ] **Step 2: Write the failing contract mentally** + +Package docs should satisfy: + +```text +1. Context keeper = state only. +2. Phase gate = checkpoints only. +3. Handoff summary = handoff only. +4. Task continuity = suite bootstrap/composition only. +5. Install wording is parallel across packages. +6. Downstream template references are clear where relevant. +``` + +Expected: +- If any package violates one of the six points, it needs a minimal fix + +- [ ] **Step 3: Apply only the real fixes** + +Do not rewrite for style. +Touch only the package README files that have a concrete release-facing issue. + +- [ ] **Step 4: Re-run package contract tests** + +Run: + +```bash +python3 -m unittest discover -s skills/skill-context-keeper/tests -p 'test_*.py' -v +python3 -m unittest discover -s skills/skill-phase-gate/tests -p 'test_*.py' -v +python3 -m unittest discover -s skills/skill-handoff-summary/tests -p 'test_*.py' -v +python3 -m unittest discover -s skills/skill-task-continuity/tests -p 'test_*.py' -v +``` + +Expected: +- All package contract tests pass + +- [ ] **Step 5: Commit any package clarity fixes** + +```bash +git add skills/skill-context-keeper/README.md skills/skill-context-keeper/README.zh-CN.md skills/skill-phase-gate/README.md skills/skill-phase-gate/README.zh-CN.md skills/skill-handoff-summary/README.md skills/skill-handoff-summary/README.zh-CN.md skills/skill-task-continuity/README.md skills/skill-task-continuity/README.zh-CN.md +git commit -m "docs: tighten continuity package release guidance" +``` + +### Task 6: Final Verification And Release Notes Synthesis + +**Files:** +- Verify: `.github/workflows/pull-request-checks.yml` +- Verify: `docs/release-checklist-long-task-suite.md` +- Verify: `docs/release-checklist-long-task-suite.zh-CN.md` +- Verify: `docs/publishing.md` +- Verify: `docs/publishing.zh-CN.md` +- Verify: `CHANGELOG.md` + +- [ ] **Step 1: Run the full verification set** + +```bash +(cd skills/skill-governance && python3 scripts/manage_skill.py --validate-only) +for test_dir in skills/*/tests; do + python3 -m unittest discover -s "$test_dir" -p 'test_*.py' -v +done +python3 -m unittest discover -s evals -p 'test_*.py' -v +python3 evals/run_evals.py +python3 /scripts/install-skill-from-github.py --repo Golden-Promise/codex-skill-library --path skills/skill-task-continuity --ref --dest "$(mktemp -d)" +python3 /scripts/install-skill-from-github.py --repo Golden-Promise/codex-skill-library --path skills/skill-task-continuity --ref v0.6.0 --dest "$(mktemp -d)" +git status --short +``` + +Expected: +- All tests pass +- `evals/run_evals.py` reports the full seed matrix passing +- Smoke-test commands are either executed against reachable GitHub refs or called out explicitly as pending if the branch/tag is not yet published +- The worktree is clean after final commits + +- [ ] **Step 2: Gather release-note inputs** + +Use the edited docs plus `CHANGELOG.md` to prepare: + +```text +Proposed version: v0.6.0 +Draft PR title: release: publish long-task continuity suite +Draft release title: v0.6.0 - long-task continuity suite +``` + +PR / release bullets should mention: +- the long-task reliability problem they solve: state drift, workflow drift, and handoff friction +- four new published packages +- downstream install paths under `skills/` +- maintainer release-checklist and smoke-test flow +- CI coverage for package contracts and eval contracts +- eval harness strictness around routing docs, event tokens, and guardrail metadata + +- [ ] **Step 2.5: Draft the full PR and release text** + +Prepare: + +```text +- Draft PR description with summary, validation, and maintainer notes +- Draft GitHub release notes body with install guidance and eval-contract notes +``` + +- [ ] **Step 3: Commit any final doc or workflow touch-ups** + +```bash +git add .github/workflows/pull-request-checks.yml docs/release-checklist-long-task-suite.md docs/release-checklist-long-task-suite.zh-CN.md docs/publishing.md docs/publishing.zh-CN.md README.md README.zh-CN.md skills/README.md skills/README.zh-CN.md CHANGELOG.md +git commit -m "docs: finalize long-task suite release readiness" +``` + +- [ ] **Step 4: Prepare the final release-readiness report** + +The final handoff back to the human should include: +- changed files +- CI workflow added +- checklist docs added +- proposed next version `v0.6.0` +- draft PR title and description +- draft GitHub release title and notes +- blockers, if any +- non-blocking follow-up suggestions, if any diff --git a/skills/README.md b/skills/README.md index 9684726..1ca8337 100644 --- a/skills/README.md +++ b/skills/README.md @@ -15,10 +15,10 @@ This directory contains the installable skill packages published by `codex-skill | Skill | Best For | Docs | | --- | --- | --- | | `skill-governance` | Governing Codex skill assets with add, enable, doctor, repair, audit, and document tasks | [EN](skill-governance/README.md) / [中文](skill-governance/README.zh-CN.md) | -| `skill-context-keeper` | Refreshing or reconstructing long-task state without drifting into phase planning or handoff writing | [EN](skill-context-keeper/README.md) / [中文](skill-context-keeper/README.zh-CN.md) | -| `skill-phase-gate` | Adding phase boundaries, checkpoints, and exit criteria to multi-step coding work | [EN](skill-phase-gate/README.md) / [中文](skill-phase-gate/README.zh-CN.md) | -| `skill-handoff-summary` | Producing pause and transfer summaries with status, blockers, and next steps | [EN](skill-handoff-summary/README.md) / [中文](skill-handoff-summary/README.zh-CN.md) | -| `skill-task-continuity` | Coordinating the continuity suite when context, phases, and handoff concerns must stay aligned | [EN](skill-task-continuity/README.md) / [中文](skill-task-continuity/README.zh-CN.md) | +| `skill-context-keeper` | Refreshing current task state without taking over checkpoints or handoffs | [EN](skill-context-keeper/README.md) / [中文](skill-context-keeper/README.zh-CN.md) | +| `skill-phase-gate` | Adding preflight and postflight checkpoints around substantial edits | [EN](skill-phase-gate/README.md) / [中文](skill-phase-gate/README.zh-CN.md) | +| `skill-handoff-summary` | Producing compact continuation handoffs with status, blockers, and exact next steps | [EN](skill-handoff-summary/README.md) / [中文](skill-handoff-summary/README.zh-CN.md) | +| `skill-task-continuity` | Bootstrapping and composing the continuity suite while preserving atomic package boundaries | [EN](skill-task-continuity/README.md) / [中文](skill-task-continuity/README.zh-CN.md) | ## Package Conventions @@ -26,3 +26,4 @@ This directory contains the installable skill packages published by `codex-skill - The directory name should match the `name` field in `SKILL.md`. - Package `README.md` files are the main entry point for users. - `references/` is for reader-facing material; `docs/` is for maintainer notes when needed. +- For the long-task continuity workflow, start with `skill-task-continuity` only when you need suite bootstrap or composition guidance; otherwise install the atomic package that owns the next action. diff --git a/skills/README.zh-CN.md b/skills/README.zh-CN.md index ba51cf7..a01f424 100644 --- a/skills/README.zh-CN.md +++ b/skills/README.zh-CN.md @@ -15,10 +15,10 @@ | Skill | 适用场景 | 文档 | | --- | --- | --- | | `skill-governance` | 用任务式入口治理 skill 资产,包括新增、启用、体检、修复、审计和补文档 | [EN](skill-governance/README.md) / [中文](skill-governance/README.zh-CN.md) | -| `skill-context-keeper` | 在长任务中刷新或重建状态,不延伸到阶段规划或交接说明 | [EN](skill-context-keeper/README.md) / [中文](skill-context-keeper/README.zh-CN.md) | -| `skill-phase-gate` | 为多步骤编码任务增加阶段边界、检查点和退出条件 | [EN](skill-phase-gate/README.md) / [中文](skill-phase-gate/README.zh-CN.md) | -| `skill-handoff-summary` | 在暂停或转交时生成包含状态、阻塞点和下一步的摘要 | [EN](skill-handoff-summary/README.md) / [中文](skill-handoff-summary/README.zh-CN.md) | -| `skill-task-continuity` | 在上下文、阶段和交接需求同时出现时统筹连续性套件 | [EN](skill-task-continuity/README.md) / [中文](skill-task-continuity/README.zh-CN.md) | +| `skill-context-keeper` | 刷新当前任务状态,但不接管检查点或交接职责 | [EN](skill-context-keeper/README.md) / [中文](skill-context-keeper/README.zh-CN.md) | +| `skill-phase-gate` | 在较大改动前后增加 preflight / postflight 检查点 | [EN](skill-phase-gate/README.md) / [中文](skill-phase-gate/README.zh-CN.md) | +| `skill-handoff-summary` | 生成紧凑、面向续做的交接摘要,保留状态、阻塞点和下一步 | [EN](skill-handoff-summary/README.md) / [中文](skill-handoff-summary/README.zh-CN.md) | +| `skill-task-continuity` | 在保持原子包边界的前提下启动并组合连续性套件 | [EN](skill-task-continuity/README.md) / [中文](skill-task-continuity/README.zh-CN.md) | ## 包结构约定 @@ -26,3 +26,4 @@ - 目录名应与 `SKILL.md` 中的 `name` 字段保持一致。 - 包内 `README.md` 是给使用者最主要的入口。 - `references/` 主要放读者资料,`docs/` 主要放维护者说明。 +- 对长任务连续性流程来说,只有在需要套件级启动或组合说明时才从 `skill-task-continuity` 开始;如果只需要某个具体动作,直接安装对应的原子包。 diff --git a/skills/skill-context-keeper/README.md b/skills/skill-context-keeper/README.md index 4f9ec40..63df232 100644 --- a/skills/skill-context-keeper/README.md +++ b/skills/skill-context-keeper/README.md @@ -45,6 +45,23 @@ You can ask Codex in natural language: - `Use skill-installer to install skill-context-keeper from Golden-Promise/codex-skill-library at skills/skill-context-keeper.` - `Use skill-installer to install skill-context-keeper from Golden-Promise/codex-skill-library at skills/skill-context-keeper using the release or ref I specify.` +Or run `skill-installer` directly: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-context-keeper +``` + +Pin the planned continuity-suite release: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-context-keeper \ + --ref v0.6.0 +``` + ## How To Use Start when the task needs a reliable state refresh before execution continues. diff --git a/skills/skill-context-keeper/README.zh-CN.md b/skills/skill-context-keeper/README.zh-CN.md index bf06d9e..af9305f 100644 --- a/skills/skill-context-keeper/README.zh-CN.md +++ b/skills/skill-context-keeper/README.zh-CN.md @@ -45,6 +45,23 @@ - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-context-keeper 安装 skill-context-keeper。` - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-context-keeper 安装 skill-context-keeper,并使用我指定的 release 或 ref。` +如果你想直接运行 `skill-installer`,可使用: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-context-keeper +``` + +固定到本次连续性套件计划发布版本: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-context-keeper \ + --ref v0.6.0 +``` + ## How To Use 当任务在继续执行前需要一次可靠的状态刷新时,就从这个包开始。 diff --git a/skills/skill-governance/README.md b/skills/skill-governance/README.md index ab4816e..1485077 100644 --- a/skills/skill-governance/README.md +++ b/skills/skill-governance/README.md @@ -37,7 +37,7 @@ To install `skill-governance`, use the standard package path in your Codex works You can ask Codex in natural language: - `Use skill-installer to install skill-governance from Golden-Promise/codex-skill-library at skills/skill-governance.` -- `Use skill-installer to install skill-governance from Golden-Promise/codex-skill-library at skills/skill-governance using ref v0.5.1.` +- `Use skill-installer to install skill-governance from Golden-Promise/codex-skill-library at skills/skill-governance using ref v0.6.0.` For command patterns and advanced installation notes, see [references/use-cases.md](references/use-cases.md). diff --git a/skills/skill-governance/README.zh-CN.md b/skills/skill-governance/README.zh-CN.md index 1d6b06f..944482f 100644 --- a/skills/skill-governance/README.zh-CN.md +++ b/skills/skill-governance/README.zh-CN.md @@ -37,7 +37,7 @@ 你也可以直接这样对 Codex 说: - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-governance 安装 skill-governance。` -- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-governance 安装 skill-governance,并使用 ref v0.5.1。` +- `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-governance 安装 skill-governance,并使用 ref v0.6.0。` 关于命令模式和进阶说明,请查看 [references/use-cases.zh-CN.md](references/use-cases.zh-CN.md)。 diff --git a/skills/skill-governance/docs/publishing-with-skill-installer.md b/skills/skill-governance/docs/publishing-with-skill-installer.md index 45215f2..b601f02 100644 --- a/skills/skill-governance/docs/publishing-with-skill-installer.md +++ b/skills/skill-governance/docs/publishing-with-skill-installer.md @@ -53,7 +53,7 @@ If you want to pin the current release, use: python3 /scripts/install-skill-from-github.py \ --repo Golden-Promise/codex-skill-library \ --path skills/skill-governance \ - --ref v0.5.1 + --ref v0.6.0 ``` ## Repository Files In This Repo diff --git a/skills/skill-governance/docs/publishing-with-skill-installer.zh-CN.md b/skills/skill-governance/docs/publishing-with-skill-installer.zh-CN.md index 4c84246..45c0e85 100644 --- a/skills/skill-governance/docs/publishing-with-skill-installer.zh-CN.md +++ b/skills/skill-governance/docs/publishing-with-skill-installer.zh-CN.md @@ -53,7 +53,7 @@ python3 /scripts/install-skill-from-github.py \ python3 /scripts/install-skill-from-github.py \ --repo Golden-Promise/codex-skill-library \ --path skills/skill-governance \ - --ref v0.5.1 + --ref v0.6.0 ``` ## 当前仓库中的根文件 diff --git a/skills/skill-governance/references/use-cases.md b/skills/skill-governance/references/use-cases.md index 28ee995..cbbceb5 100644 --- a/skills/skill-governance/references/use-cases.md +++ b/skills/skill-governance/references/use-cases.md @@ -22,7 +22,7 @@ Install a specific tagged release: python3 /scripts/install-skill-from-github.py \ --repo Golden-Promise/codex-skill-library \ --path skills/skill-governance \ - --ref v0.5.1 + --ref v0.6.0 ``` ## Commands diff --git a/skills/skill-governance/references/use-cases.zh-CN.md b/skills/skill-governance/references/use-cases.zh-CN.md index e5da611..c0dabd5 100644 --- a/skills/skill-governance/references/use-cases.zh-CN.md +++ b/skills/skill-governance/references/use-cases.zh-CN.md @@ -22,7 +22,7 @@ python3 /scripts/install-skill-from-github.py \ python3 /scripts/install-skill-from-github.py \ --repo Golden-Promise/codex-skill-library \ --path skills/skill-governance \ - --ref v0.5.1 + --ref v0.6.0 ``` ## 命令总览 diff --git a/skills/skill-handoff-summary/README.md b/skills/skill-handoff-summary/README.md index 9907a21..b7dc599 100644 --- a/skills/skill-handoff-summary/README.md +++ b/skills/skill-handoff-summary/README.md @@ -46,6 +46,23 @@ You can ask Codex in natural language: - `Use skill-installer to install skill-handoff-summary from Golden-Promise/codex-skill-library at skills/skill-handoff-summary.` - `Use skill-installer to install skill-handoff-summary from Golden-Promise/codex-skill-library at skills/skill-handoff-summary using the release or ref I specify.` +Or run `skill-installer` directly: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-handoff-summary +``` + +Pin the planned continuity-suite release: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-handoff-summary \ + --ref v0.6.0 +``` + ## How To Use Reach for this package when execution is about to pause or move to another owner. diff --git a/skills/skill-handoff-summary/README.zh-CN.md b/skills/skill-handoff-summary/README.zh-CN.md index 33f1d7f..2b85ed8 100644 --- a/skills/skill-handoff-summary/README.zh-CN.md +++ b/skills/skill-handoff-summary/README.zh-CN.md @@ -46,6 +46,23 @@ - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-handoff-summary 安装 skill-handoff-summary。` - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-handoff-summary 安装 skill-handoff-summary,并使用我指定的 release 或 ref。` +如果你想直接运行 `skill-installer`,可使用: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-handoff-summary +``` + +固定到本次连续性套件计划发布版本: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-handoff-summary \ + --ref v0.6.0 +``` + ## How To Use 当执行即将暂停,或者任务要移交给另一个负责人时,就使用这个包。 diff --git a/skills/skill-phase-gate/README.md b/skills/skill-phase-gate/README.md index 1650f1b..fabb6e7 100644 --- a/skills/skill-phase-gate/README.md +++ b/skills/skill-phase-gate/README.md @@ -40,6 +40,23 @@ You can ask Codex in natural language: - `Use skill-installer to install skill-phase-gate from Golden-Promise/codex-skill-library at skills/skill-phase-gate.` - `Use skill-installer to install skill-phase-gate from Golden-Promise/codex-skill-library at skills/skill-phase-gate using the release or ref I specify.` +Or run `skill-installer` directly: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-phase-gate +``` + +Pin the planned continuity-suite release: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-phase-gate \ + --ref v0.6.0 +``` + ## How To Use Use this package at a meaningful checkpoint before or after implementation work. diff --git a/skills/skill-phase-gate/README.zh-CN.md b/skills/skill-phase-gate/README.zh-CN.md index ec506bf..2bcd713 100644 --- a/skills/skill-phase-gate/README.zh-CN.md +++ b/skills/skill-phase-gate/README.zh-CN.md @@ -40,6 +40,23 @@ - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-phase-gate 安装 skill-phase-gate。` - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-phase-gate 安装 skill-phase-gate,并使用我指定的 release 或 ref。` +如果你想直接运行 `skill-installer`,可使用: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-phase-gate +``` + +固定到本次连续性套件计划发布版本: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-phase-gate \ + --ref v0.6.0 +``` + ## How To Use 在实现前后的关键检查点调用这个包。 diff --git a/skills/skill-task-continuity/README.md b/skills/skill-task-continuity/README.md index b8d6813..a2ff3ef 100644 --- a/skills/skill-task-continuity/README.md +++ b/skills/skill-task-continuity/README.md @@ -31,6 +31,23 @@ You can ask Codex in natural language: - `Use skill-installer to install skill-task-continuity from Golden-Promise/codex-skill-library at skills/skill-task-continuity.` - `Use skill-installer to install skill-task-continuity from Golden-Promise/codex-skill-library at skills/skill-task-continuity using the release or ref I specify.` +Or run `skill-installer` directly: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-task-continuity +``` + +Pin the planned continuity-suite release: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-task-continuity \ + --ref v0.6.0 +``` + ## Bootstrap A Downstream Repo Preview the downstream file operations first: diff --git a/skills/skill-task-continuity/README.zh-CN.md b/skills/skill-task-continuity/README.zh-CN.md index b96504c..dd3d695 100644 --- a/skills/skill-task-continuity/README.zh-CN.md +++ b/skills/skill-task-continuity/README.zh-CN.md @@ -31,6 +31,23 @@ - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-task-continuity 安装 skill-task-continuity。` - `请用 skill-installer 从 Golden-Promise/codex-skill-library 的 skills/skill-task-continuity 安装 skill-task-continuity,并使用我指定的 release 或 ref。` +如果你想直接运行 `skill-installer`,可使用: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-task-continuity +``` + +固定到本次连续性套件计划发布版本: + +```bash +python3 /scripts/install-skill-from-github.py \ + --repo Golden-Promise/codex-skill-library \ + --path skills/skill-task-continuity \ + --ref v0.6.0 +``` + ## 启动下游仓库 先用 dry run 预览即将写入的文件: From 8aacf78ae2067eb92197f12e001418f76ceab3cb Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 17:38:02 +0800 Subject: [PATCH 30/31] ci: retrigger pull request checks From 38d0ff91a789a0e48086f673a7c3c4b309b530e2 Mon Sep 17 00:00:00 2001 From: NuoJin <1430093320@qq.com> Date: Wed, 25 Mar 2026 17:49:16 +0800 Subject: [PATCH 31/31] test: isolate governance helpers from host CI env --- .../tests/test_manage_skill.py | 74 ++++++++++++++++++- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/skills/skill-governance/tests/test_manage_skill.py b/skills/skill-governance/tests/test_manage_skill.py index 9051e03..4f0b93f 100644 --- a/skills/skill-governance/tests/test_manage_skill.py +++ b/skills/skill-governance/tests/test_manage_skill.py @@ -73,12 +73,20 @@ class ManageSkillTests(unittest.TestCase): def setUpClass(cls): cls.module = load_manage_skill_module() + def child_env(self, overrides: dict[str, str] | None = None) -> dict[str, str]: + env = dict(os.environ) + env.pop("CI", None) + if overrides: + env.update(overrides) + return env + def run_script(self, *args: str) -> subprocess.CompletedProcess[str]: return subprocess.run( ["python3", str(SCRIPT_PATH), *args], check=False, text=True, capture_output=True, + env=self.child_env(), ) def run_script_with_env( @@ -86,14 +94,12 @@ def run_script_with_env( env: dict[str, str], *args: str, ) -> subprocess.CompletedProcess[str]: - merged_env = dict(os.environ) - merged_env.update(env) return subprocess.run( ["python3", str(SCRIPT_PATH), *args], check=False, text=True, capture_output=True, - env=merged_env, + env=self.child_env(env), ) def run_script_from_path(self, script_path: Path, cwd: Path, *args: str) -> subprocess.CompletedProcess[str]: @@ -103,6 +109,7 @@ def run_script_from_path(self, script_path: Path, cwd: Path, *args: str) -> subp text=True, capture_output=True, cwd=str(cwd), + env=self.child_env(), ) def test_default_prompt_generation_is_consistent(self): @@ -131,6 +138,67 @@ def test_default_prompt_generation_is_consistent(self): prompt, ) + def test_run_script_ignores_host_ci_by_default(self): + with tempfile.TemporaryDirectory() as tmpdir: + library_root = Path(tmpdir) / "_skill-library" + project_root = Path(tmpdir) / "demo-project" + canonical_dir = library_root / "demo-skill" + write_skill_dir(canonical_dir, skill_name="demo-skill") + project_root.mkdir(parents=True, exist_ok=True) + + previous_ci = os.environ.get("CI") + os.environ["CI"] = "1" + try: + result = self.run_script( + "enable", + "demo-skill", + "--library-root", + str(library_root), + "--project-root", + str(project_root), + ) + finally: + if previous_ci is None: + os.environ.pop("CI", None) + else: + os.environ["CI"] = previous_ci + + project_exposure = project_root / ".agents" / "skills" / "demo-skill" + + self.assertEqual(result.returncode, 0, result.stderr) + self.assertTrue(project_exposure.is_symlink()) + + def test_run_script_from_path_ignores_host_ci_by_default(self): + with tempfile.TemporaryDirectory() as tmpdir: + project_root = Path(tmpdir) / "demo-project" + source_dir = project_root / "skill-governance" + scripts_dir = source_dir / "scripts" + write_skill_dir(source_dir, skill_name="skill-governance") + scripts_dir.mkdir(parents=True, exist_ok=True) + script_copy = scripts_dir / "manage_skill.py" + shutil.copy2(SCRIPT_PATH, script_copy) + shutil.copytree(ROOT_DIR / "scripts" / "skill_governance", scripts_dir / "skill_governance") + project_root.mkdir(parents=True, exist_ok=True) + + previous_ci = os.environ.get("CI") + os.environ["CI"] = "1" + try: + result = self.run_script_from_path( + script_copy, + project_root, + "--bootstrap-project-layout", + ) + finally: + if previous_ci is None: + os.environ.pop("CI", None) + else: + os.environ["CI"] = previous_ci + + project_link = project_root / ".agents" / "skills" / "skill-governance" + + self.assertEqual(result.returncode, 0, result.stderr) + self.assertTrue(project_link.is_symlink()) + def test_validate_only_defaults_to_runtime_skill_dir(self): result = self.run_script("--validate-only") self.assertEqual(result.returncode, 0, result.stderr)