From f8c0fa88db575bd4f428a09d688e904c8770a436 Mon Sep 17 00:00:00 2001
From: Khaliq <khaliqgant@gmail.com>
Date: Fri, 10 Apr 2026 19:05:27 +0200
Subject: [PATCH] feat: add workflow hardening investigation workflow

---
 workflows/PLAN-workflow-hardening.md          |  46 ++++++
 workflows/workflow-hardening-investigation.ts | 138 ++++++++++++++++++
 2 files changed, 184 insertions(+)
 create mode 100644 workflows/PLAN-workflow-hardening.md
 create mode 100644 workflows/workflow-hardening-investigation.ts

diff --git a/workflows/PLAN-workflow-hardening.md b/workflows/PLAN-workflow-hardening.md
new file mode 100644
index 000000000..54c73f614
--- /dev/null
+++ b/workflows/PLAN-workflow-hardening.md
@@ -0,0 +1,46 @@
+# PLAN — workflow hardening and diagnosis
+
+## Goal
+Create a workflow that identifies, reproduces, and helps iron out workflow execution problems discovered during real runs.
+
+## Problems to target
+1. Agent planning fragility
+   - Claude plan steps can fail, idle, or return low-quality output.
+   - Workflows should support deterministic plan docs or strict validation gates.
+
+2. Active checkout vs hard-coded path issues
+   - Agents/workflow steps must operate against the current checkout/worktree, not fixed absolute repo paths.
+
+3. Missing workflow assets
+   - Plan docs and helper files must be present and validated early.
+
+4. Opaque validation/build phases
+   - Large monolithic rebuild steps hide the real failing sub-step.
+   - Steps should be split for observability.
+
+5. Environment drift / local state problems
+   - stale `.agent-relay/`
+   - PATH shadowing
+   - tracked `.trajectories` causing false dirty states
+   - SSH/fetch issues that affect reruns
+
+6. Build-tooling assumptions
+   - package builds that rely on ambient tool resolution instead of deterministic invocation
+
+## Desired outcome
+A workflow that:
+- uses Claude for plan/research
+- uses Codex for implementation
+- records environment diagnostics up front
+- validates required workflow assets before agent work begins
+- verifies the active checkout/worktree path before implementation
+- splits build/validation into explicit steps
+- produces review output with actionable distinctions:
+  - workflow flaw
+  - repo/tooling flaw
+  - environment-specific issue
+
+## Acceptance criteria
+- Workflow file added to repo
+- Supporting deterministic plan/research doc added
+- New PR opened
diff --git a/workflows/workflow-hardening-investigation.ts b/workflows/workflow-hardening-investigation.ts
new file mode 100644
index 000000000..d18575b35
--- /dev/null
+++ b/workflows/workflow-hardening-investigation.ts
@@ -0,0 +1,138 @@
+import { workflow } from '@agent-relay/sdk/workflows';
+import { ClaudeModels } from '@agent-relay/sdk';
+
+await workflow('workflow-hardening-investigation')
+  .description('Diagnose and harden workflow execution issues across planning, checkout scoping, environment drift, and validation/build observability.')
+  .pattern('dag')
+  .channel('wf-workflow-hardening')
+  .maxConcurrency(3)
+  .timeout(3600000)
+
+  .agent('planner', {
+    cli: 'claude',
+    preset: 'lead',
+    role: 'Workflow planning and failure-analysis researcher',
+    model: ClaudeModels.SONNET,
+    retries: 2,
+  })
+  .agent('implementer', {
+    cli: 'codex',
+    preset: 'worker',
+    role: 'Workflow hardening implementer',
+    retries: 2,
+  })
+  .agent('reviewer', {
+    cli: 'codex',
+    preset: 'reviewer',
+    role: 'Workflow hardening reviewer',
+    retries: 1,
+  })
+
+  .step('capture-env', {
+    type: 'deterministic',
+    command: `
+      set -e
+      echo 'PWD='$PWD
+      echo 'PATH='$PATH
+      echo 'agent-relay versions:'
+      which -a agent-relay || true
+      agent-relay --version || true
+      echo 'git branch:'
+      git rev-parse --abbrev-ref HEAD
+      echo 'dirty:'
+      git status --short || true
+      echo 'has .agent-relay?'
+      [ -d .agent-relay ] && echo yes || echo no
+      echo 'has .trajectories?'
+      [ -d .trajectories ] && echo yes || echo no
+    `,
+    captureOutput: true,
+    failOnError: true,
+  })
+
+  .step('read-plan-doc', {
+    type: 'deterministic',
+    command: 'cat workflows/PLAN-workflow-hardening.md',
+    captureOutput: true,
+    failOnError: true,
+  })
+
+  .step('plan', {
+    agent: 'planner',
+    dependsOn: ['capture-env', 'read-plan-doc'],
+    task: `Create a concise workflow-hardening plan for this repo.
+
+Plan doc:
+{{steps.read-plan-doc.output}}
+
+Current environment:
+{{steps.capture-env.output}}
+
+Return sections:
+1. WORKFLOW_FLAWS
+2. ENVIRONMENT_SPECIFIC_ISSUES
+3. REPO_TOOLING_ISSUES
+4. IMPLEMENTATION_PLAN
+5. VALIDATION_PLAN
+
+End with PLAN_COMPLETE.`,
+    verification: { type: 'output_contains', value: 'PLAN_COMPLETE' },
+    retries: 2,
+  })
+
+  .step('implement', {
+    agent: 'implementer',
+    dependsOn: ['plan'],
+    task: `Implement the workflow hardening plan in the current checkout/worktree.
+
+Plan:
+{{steps.plan.output}}
+
+Requirements:
+- keep edits focused on workflow reliability, diagnostics, and validation clarity
+- prefer current-checkout semantics over hard-coded paths
+- add/adjust files needed to make workflow runs easier to debug and more deterministic
+- write code/files to disk
+- end by printing CHANGES_COMPLETE`,
+    verification: { type: 'exit_code' },
+    retries: 2,
+  })
+
+  .step('verify-diff', {
+    type: 'deterministic',
+    dependsOn: ['implement'],
+    command: `
+      set -e
+      if git diff --quiet; then
+        echo NO_CHANGES_DETECTED
+        exit 1
+      fi
+      git diff --stat
+    `,
+    captureOutput: true,
+    failOnError: true,
+  })
+
+  .step('review', {
+    agent: 'reviewer',
+    dependsOn: ['plan', 'verify-diff'],
+    task: `Review the workflow hardening changes.
+
+Plan:
+{{steps.plan.output}}
+
+Diff summary:
+{{steps.verify-diff.output}}
+
+Return:
+- PASS_FAIL
+- what workflow flaws were addressed
+- what environment-specific issues remain out of scope
+- what repo/tooling follow-ups still remain
+
+End with REVIEW_COMPLETE.`,
+    verification: { type: 'output_contains', value: 'REVIEW_COMPLETE' },
+    retries: 1,
+  })
+
+  .run({ cwd: process.cwd() });