From f8c0fa88db575bd4f428a09d688e904c8770a436 Mon Sep 17 00:00:00 2001 From: Khaliq Date: Fri, 10 Apr 2026 19:05:27 +0200 Subject: [PATCH] feat: add workflow hardening investigation workflow --- workflows/PLAN-workflow-hardening.md | 46 ++++++ workflows/workflow-hardening-investigation.ts | 138 ++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 workflows/PLAN-workflow-hardening.md create mode 100644 workflows/workflow-hardening-investigation.ts diff --git a/workflows/PLAN-workflow-hardening.md b/workflows/PLAN-workflow-hardening.md new file mode 100644 index 000000000..54c73f614 --- /dev/null +++ b/workflows/PLAN-workflow-hardening.md @@ -0,0 +1,46 @@ +# PLAN — workflow hardening and diagnosis + +## Goal +Create a workflow that identifies, reproduces, and helps iron out workflow execution problems discovered during real runs. + +## Problems to target +1. Agent planning fragility + - Claude plan steps can fail, idle, or return low-quality output. + - Workflows should support deterministic plan docs or strict validation gates. + +2. Active checkout vs hard-coded path issues + - Agents/workflow steps must operate against the current checkout/worktree, not fixed absolute repo paths. + +3. Missing workflow assets + - Plan docs and helper files must be present and validated early. + +4. Opaque validation/build phases + - Large monolithic rebuild steps hide the real failing sub-step. + - Steps should be split for observability. + +5. Environment drift / local state problems + - stale `.agent-relay/` + - PATH shadowing + - tracked `.trajectories` causing false dirty states + - SSH/fetch issues that affect reruns + +6. Build-tooling assumptions + - package builds that rely on ambient tool resolution instead of deterministic invocation + +## Desired outcome +A workflow that: +- uses Claude for plan/research +- uses Codex for implementation +- records environment diagnostics up front +- validates required workflow assets before agent work begins +- verifies the active checkout/worktree path before implementation +- splits build/validation into explicit steps +- produces review output with actionable distinctions: + - workflow flaw + - repo/tooling flaw + - environment-specific issue + +## Acceptance criteria +- Workflow file added to repo +- Supporting deterministic plan/research doc added +- New PR opened diff --git a/workflows/workflow-hardening-investigation.ts b/workflows/workflow-hardening-investigation.ts new file mode 100644 index 000000000..d18575b35 --- /dev/null +++ b/workflows/workflow-hardening-investigation.ts @@ -0,0 +1,138 @@ +import { workflow } from '@agent-relay/sdk/workflows'; +import { ClaudeModels } from '@agent-relay/sdk'; + +await workflow('workflow-hardening-investigation') + .description('Diagnose and harden workflow execution issues across planning, checkout scoping, environment drift, and validation/build observability.') + .pattern('dag') + .channel('wf-workflow-hardening') + .maxConcurrency(3) + .timeout(3600000) + + .agent('planner', { + cli: 'claude', + preset: 'lead', + role: 'Workflow planning and failure-analysis researcher', + model: ClaudeModels.SONNET, + retries: 2, + }) + .agent('implementer', { + cli: 'codex', + preset: 'worker', + role: 'Workflow hardening implementer', + retries: 2, + }) + .agent('reviewer', { + cli: 'codex', + preset: 'reviewer', + role: 'Workflow hardening reviewer', + retries: 1, + }) + + .step('capture-env', { + type: 'deterministic', + command: ` + set -e + echo 'PWD='$PWD + echo 'PATH='$PATH + echo 'agent-relay versions:' + which -a agent-relay || true + agent-relay --version || true + echo 'git branch:' + git rev-parse --abbrev-ref HEAD + echo 'dirty:' + git status --short || true + echo 'has .agent-relay?' + [ -d .agent-relay ] && echo yes || echo no + echo 'has .trajectories?' + [ -d .trajectories ] && echo yes || echo no + `, + captureOutput: true, + failOnError: true, + }) + + .step('read-plan-doc', { + type: 'deterministic', + command: 'cat workflows/PLAN-workflow-hardening.md', + captureOutput: true, + failOnError: true, + }) + + .step('plan', { + agent: 'planner', + dependsOn: ['capture-env', 'read-plan-doc'], + task: `Create a concise workflow-hardening plan for this repo. + +Plan doc: +{{steps.read-plan-doc.output}} + +Current environment: +{{steps.capture-env.output}} + +Return sections: +1. WORKFLOW_FLAWS +2. ENVIRONMENT_SPECIFIC_ISSUES +3. REPO_TOOLING_ISSUES +4. IMPLEMENTATION_PLAN +5. VALIDATION_PLAN + +End with PLAN_COMPLETE.`, + verification: { type: 'output_contains', value: 'PLAN_COMPLETE' }, + retries: 2, + }) + + .step('implement', { + agent: 'implementer', + dependsOn: ['plan'], + task: `Implement the workflow hardening plan in the current checkout/worktree. + +Plan: +{{steps.plan.output}} + +Requirements: +- keep edits focused on workflow reliability, diagnostics, and validation clarity +- prefer current-checkout semantics over hard-coded paths +- add/adjust files needed to make workflow runs easier to debug and more deterministic +- write code/files to disk +- end by printing CHANGES_COMPLETE`, + verification: { type: 'exit_code' }, + retries: 2, + }) + + .step('verify-diff', { + type: 'deterministic', + dependsOn: ['implement'], + command: ` + set -e + if git diff --quiet; then + echo NO_CHANGES_DETECTED + exit 1 + fi + git diff --stat + `, + captureOutput: true, + failOnError: true, + }) + + .step('review', { + agent: 'reviewer', + dependsOn: ['plan', 'verify-diff'], + task: `Review the workflow hardening changes. + +Plan: +{{steps.plan.output}} + +Diff summary: +{{steps.verify-diff.output}} + +Return: +- PASS_FAIL +- what workflow flaws were addressed +- what environment-specific issues remain out of scope +- what repo/tooling follow-ups still remain + +End with REVIEW_COMPLETE.`, + verification: { type: 'output_contains', value: 'REVIEW_COMPLETE' }, + retries: 1, + }) + + .run({ cwd: process.cwd() });