From a368875e137d441a225f69d42444f35b738c58c3 Mon Sep 17 00:00:00 2001 From: "IM.codes" Date: Sun, 10 May 2026 10:32:05 +0800 Subject: [PATCH 01/90] Implement smart P2P workflow upgrade --- server/src/p2p-workflow-sanitize.ts | 824 ++++++++ server/src/routes/discussions.ts | 31 +- server/src/ws/bridge.ts | 338 +++- server/test/bridge.test.ts | 433 +++++ server/test/p2p-workflow-sanitize.test.ts | 428 +++++ shared/cron-types.ts | 19 + shared/p2p-advanced.ts | 63 +- shared/p2p-modes.ts | 29 + shared/p2p-workflow-artifacts.ts | 186 ++ shared/p2p-workflow-compiler.ts | 277 +++ shared/p2p-workflow-constants.ts | 189 ++ shared/p2p-workflow-diagnostics.ts | 149 ++ shared/p2p-workflow-logic-evaluator.ts | 176 ++ shared/p2p-workflow-materialize.ts | 105 + shared/p2p-workflow-messages.ts | 315 +++ shared/p2p-workflow-policy.ts | 87 + shared/p2p-workflow-projection.ts | 17 + shared/p2p-workflow-prompt.ts | 98 + shared/p2p-workflow-redaction.ts | 34 + shared/p2p-workflow-script.ts | 246 +++ shared/p2p-workflow-types.ts | 378 ++++ shared/p2p-workflow-validators.ts | 680 +++++++ shared/test-session-guard.ts | 6 + src/daemon/command-handler.ts | 767 +++++++- src/daemon/cron-executor.ts | 124 +- src/daemon/p2p-discussion-writer.ts | 165 ++ src/daemon/p2p-orchestrator.ts | 1206 +++++++++++- src/daemon/p2p-workflow-artifact-runtime.ts | 1005 ++++++++++ src/daemon/p2p-workflow-bind.ts | 140 ++ src/daemon/p2p-workflow-discussion-offsets.ts | 258 +++ src/daemon/p2p-workflow-policy-recheck.ts | 141 ++ src/daemon/p2p-workflow-restart.ts | 62 + src/daemon/p2p-workflow-script-concurrency.ts | 43 + src/daemon/p2p-workflow-script-runner.ts | 569 ++++++ src/daemon/p2p-workflow-static-policy.ts | 126 ++ src/daemon/server-link.ts | 61 + src/daemon/supervision-automation.ts | 84 +- test/daemon/p2p-adapter-topology.test.ts | 175 ++ .../p2p-artifact-identity-persistence.test.ts | 129 ++ ...p2p-artifact-persistence-hardening.test.ts | 129 ++ test/daemon/p2p-discussion-list.test.ts | 204 +- .../p2p-discussion-writer-queue.test.ts | 90 + test/daemon/p2p-parser.test.ts | 38 +- test/daemon/p2p-prototype-pollution.test.ts | 80 + test/daemon/p2p-script-runner-sandbox.test.ts | 66 + .../p2p-workflow-allowlist-loader.test.ts | 195 ++ test/daemon/p2p-workflow-artifacts.test.ts | 437 +++++ .../p2p-workflow-discussion-offsets.test.ts | 186 ++ .../daemon/p2p-workflow-launch-wiring.test.ts | 28 + test/daemon/p2p-workflow-runtime.test.ts | 707 +++++++ test/daemon/p2p-workflow-script.test.ts | 429 +++++ test/daemon/supervision-automation.test.ts | 67 +- test/e2e/p2p-workflow-launch.test.ts | 412 ++++ test/shared/p2p-workflow-artifacts.test.ts | 99 + test/shared/p2p-workflow-compiler.test.ts | 199 ++ .../p2p-workflow-logic-evaluator.test.ts | 165 ++ test/shared/p2p-workflow-materialize.test.ts | 46 + test/shared/p2p-workflow-prompt.test.ts | 57 + test/shared/p2p-workflow-protocol.test.ts | 137 ++ test/shared/p2p-workflow-redaction.test.ts | 30 + test/shared/p2p-workflow-script.test.ts | 142 ++ test/shared/p2p-workflow-validators.test.ts | 244 +++ test/shared/test-session-guard.test.ts | 5 + test/spec/p2p-workflow-regression.test.ts | 1705 +++++++++++++++++ web/src/app.tsx | 39 +- .../AdvancedWorkflowCanvasEditor.tsx | 676 +++++++ web/src/components/P2pConfigPanel.tsx | 564 +++++- web/src/components/P2pProgressCard.tsx | 24 + web/src/components/SessionControls.tsx | 96 +- web/src/i18n/locales/en.json | 71 + web/src/i18n/locales/es.json | 71 + web/src/i18n/locales/ja.json | 71 + web/src/i18n/locales/ko.json | 71 + web/src/i18n/locales/ru.json | 71 + web/src/i18n/locales/zh-CN.json | 71 + web/src/i18n/locales/zh-TW.json | 71 + web/src/p2p-run-mapping.ts | 57 + web/src/pages/DiscussionsPage.tsx | 40 +- web/src/ws-client.ts | 214 ++- .../AdvancedWorkflowCanvasEditor.test.tsx | 241 +++ web/test/components/P2pConfigPanel.test.tsx | 633 +++++- web/test/components/SessionControls.test.tsx | 54 +- .../i18n/p2p-workflow-diagnostics.test.ts | 30 + web/test/p2p-run-mapping.test.ts | 99 + web/test/pages/DiscussionsPage.test.tsx | 67 +- web/test/ws-client.test.ts | 100 + 86 files changed, 19234 insertions(+), 257 deletions(-) create mode 100644 server/src/p2p-workflow-sanitize.ts create mode 100644 server/test/p2p-workflow-sanitize.test.ts create mode 100644 shared/p2p-workflow-artifacts.ts create mode 100644 shared/p2p-workflow-compiler.ts create mode 100644 shared/p2p-workflow-constants.ts create mode 100644 shared/p2p-workflow-diagnostics.ts create mode 100644 shared/p2p-workflow-logic-evaluator.ts create mode 100644 shared/p2p-workflow-materialize.ts create mode 100644 shared/p2p-workflow-messages.ts create mode 100644 shared/p2p-workflow-policy.ts create mode 100644 shared/p2p-workflow-projection.ts create mode 100644 shared/p2p-workflow-prompt.ts create mode 100644 shared/p2p-workflow-redaction.ts create mode 100644 shared/p2p-workflow-script.ts create mode 100644 shared/p2p-workflow-types.ts create mode 100644 shared/p2p-workflow-validators.ts create mode 100644 src/daemon/p2p-discussion-writer.ts create mode 100644 src/daemon/p2p-workflow-artifact-runtime.ts create mode 100644 src/daemon/p2p-workflow-bind.ts create mode 100644 src/daemon/p2p-workflow-discussion-offsets.ts create mode 100644 src/daemon/p2p-workflow-policy-recheck.ts create mode 100644 src/daemon/p2p-workflow-restart.ts create mode 100644 src/daemon/p2p-workflow-script-concurrency.ts create mode 100644 src/daemon/p2p-workflow-script-runner.ts create mode 100644 src/daemon/p2p-workflow-static-policy.ts create mode 100644 test/daemon/p2p-adapter-topology.test.ts create mode 100644 test/daemon/p2p-artifact-identity-persistence.test.ts create mode 100644 test/daemon/p2p-artifact-persistence-hardening.test.ts create mode 100644 test/daemon/p2p-discussion-writer-queue.test.ts create mode 100644 test/daemon/p2p-prototype-pollution.test.ts create mode 100644 test/daemon/p2p-script-runner-sandbox.test.ts create mode 100644 test/daemon/p2p-workflow-allowlist-loader.test.ts create mode 100644 test/daemon/p2p-workflow-artifacts.test.ts create mode 100644 test/daemon/p2p-workflow-discussion-offsets.test.ts create mode 100644 test/daemon/p2p-workflow-launch-wiring.test.ts create mode 100644 test/daemon/p2p-workflow-runtime.test.ts create mode 100644 test/daemon/p2p-workflow-script.test.ts create mode 100644 test/e2e/p2p-workflow-launch.test.ts create mode 100644 test/shared/p2p-workflow-artifacts.test.ts create mode 100644 test/shared/p2p-workflow-compiler.test.ts create mode 100644 test/shared/p2p-workflow-logic-evaluator.test.ts create mode 100644 test/shared/p2p-workflow-materialize.test.ts create mode 100644 test/shared/p2p-workflow-prompt.test.ts create mode 100644 test/shared/p2p-workflow-protocol.test.ts create mode 100644 test/shared/p2p-workflow-redaction.test.ts create mode 100644 test/shared/p2p-workflow-script.test.ts create mode 100644 test/shared/p2p-workflow-validators.test.ts create mode 100644 test/spec/p2p-workflow-regression.test.ts create mode 100644 web/src/components/AdvancedWorkflowCanvasEditor.tsx create mode 100644 web/test/components/AdvancedWorkflowCanvasEditor.test.tsx create mode 100644 web/test/i18n/p2p-workflow-diagnostics.test.ts diff --git a/server/src/p2p-workflow-sanitize.ts b/server/src/p2p-workflow-sanitize.ts new file mode 100644 index 000000000..7d038e7d3 --- /dev/null +++ b/server/src/p2p-workflow-sanitize.ts @@ -0,0 +1,824 @@ +import { + P2P_FORBIDDEN_ENVELOPE_FIELD_NAMES, + P2P_SANITIZE_MAX_ARRAY_ITEMS, + P2P_SANITIZE_MAX_DEPTH, + P2P_SANITIZE_MAX_OBJECT_KEYS, + P2P_SANITIZE_MAX_STRING_BYTES, + P2P_SANITIZE_MAX_TOTAL_BYTES, + P2P_WORKFLOW_PROJECTION_VERSION, +} from '../../shared/p2p-workflow-constants.js'; +import { + P2P_WORKFLOW_DIAGNOSTIC_CODES, + P2P_WORKFLOW_DIAGNOSTIC_PHASES, + P2P_WORKFLOW_DIAGNOSTIC_SEVERITIES, + makeP2pWorkflowDiagnostic, + type P2pWorkflowDiagnostic, + type P2pWorkflowDiagnosticCode, + type P2pWorkflowDiagnosticPhase, + type P2pWorkflowDiagnosticSeverity, +} from '../../shared/p2p-workflow-diagnostics.js'; +import { buildPersistedSnapshotFromProjection } from '../../shared/p2p-workflow-projection.js'; +import type { P2pPersistedWorkflowSnapshot, P2pWorkflowStatusProjection } from '../../shared/p2p-workflow-types.js'; + +const FORBIDDEN_KEYS = new Set([ + '__proto__', + 'constructor', + 'prototype', + ...P2P_FORBIDDEN_ENVELOPE_FIELD_NAMES, +]); + +const PROJECTION_STATUSES = new Set([ + 'queued', + 'running', + 'blocked', + 'completed', + 'failed', + 'cancelled', + 'stale', +]); + +const WORKFLOW_DIAGNOSTIC_CODES = new Set(P2P_WORKFLOW_DIAGNOSTIC_CODES); +const WORKFLOW_DIAGNOSTIC_PHASES = new Set(P2P_WORKFLOW_DIAGNOSTIC_PHASES); +const WORKFLOW_DIAGNOSTIC_SEVERITIES = new Set(P2P_WORKFLOW_DIAGNOSTIC_SEVERITIES); +const SERVER_SIDE_SANITIZE_CODES = new Set([ + 'private_projection_field_dropped', + 'legacy_progress_snapshot_sanitized', +]); + +type BoundedCloneContext = { + remainingBytes: number; + truncated: boolean; + seen: Set; +}; + +export type SanitizedP2pOrchestrationRun = { + id: string; + discussion_id: string; + server_id: string; + main_session: string; + initiator_session: string; + current_target_session: string | null; + final_return_session: string; + remaining_targets: string; + mode_key: string; + status: string; + request_message_id: string | null; + callback_message_id: string | null; + context_ref: string; + timeout_ms: number; + result_summary: string | null; + error: string | null; + progress_snapshot: string; + created_at: string; + updated_at: string; + completed_at: string | null; + workflow_projection: P2pWorkflowStatusProjection; + current_round?: number; + total_rounds?: number; + total_count?: number; + total_hops?: number; + completed_hops_count?: number; + active_hop_number?: number | null; + active_round_hop_number?: number | null; + active_phase?: string; + current_round_mode?: string; + current_target_label?: string | null; + initiator_label?: string | null; + run_phase?: string; + summary_phase?: string | null; + hop_states?: Array>; + hop_counts?: Record; + all_nodes?: Array>; + advanced_p2p_enabled?: boolean; + current_round_id?: string | null; + advanced_nodes?: Array>; +}; + +export type SanitizedP2pRunUpdate = SanitizedP2pOrchestrationRun & Record; + +const SAFE_LEGACY_RUN_UPDATE_FIELDS = [ + 'current_round_mode', + 'current_round', + 'total_rounds', + 'total_count', + 'total_hops', + 'remaining_count', + 'completed_hops_count', + 'completed_round_hops_count', + 'skipped_hops', + 'active_phase', + 'hop_started_at', + 'active_hop_number', + 'active_round_hop_number', + 'current_target_label', + 'initiator_label', + 'hop_states', + 'hop_counts', + 'terminal_reason', + 'advanced_p2p_enabled', + 'current_round_id', + 'current_execution_step', + 'current_round_attempt', + 'round_attempt_counts', + 'round_jump_counts', + 'routing_history', + 'helper_diagnostics', + 'advanced_nodes', + 'run_phase', + 'summary_phase', +] as const; + +function isRecord(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value); +} + +function stringValue(value: unknown): string | null { + return typeof value === 'string' ? value : null; +} + +function requiredString(value: unknown, fallback: string): string { + const resolved = stringValue(value); + return resolved && resolved.trim() ? boundedString(resolved) : fallback; +} + +function nullableString(value: unknown): string | null { + return typeof value === 'string' ? boundedString(value) : null; +} + +function numberValue(value: unknown, fallback: number): number { + return typeof value === 'number' && Number.isFinite(value) ? value : fallback; +} + +function optionalNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function nullableNumber(value: unknown): number | null | undefined { + if (value === null) return null; + return optionalNumber(value); +} + +function truncateUtf8String(value: string, maxBytes: number): { value: string; truncated: boolean } { + if (Buffer.byteLength(value, 'utf8') <= maxBytes) return { value, truncated: false }; + if (maxBytes <= 0) return { value: '', truncated: true }; + const chars = Array.from(value); + let low = 0; + let high = chars.length; + while (low < high) { + const mid = Math.ceil((low + high) / 2); + if (Buffer.byteLength(chars.slice(0, mid).join(''), 'utf8') <= maxBytes) low = mid; + else high = mid - 1; + } + return { value: chars.slice(0, low).join(''), truncated: true }; +} + +function boundedString(value: string, ctx?: BoundedCloneContext): string { + const stringCap = P2P_SANITIZE_MAX_STRING_BYTES; + const byteCap = ctx ? Math.min(stringCap, Math.max(0, ctx.remainingBytes)) : stringCap; + const truncated = truncateUtf8String(value, byteCap); + if (truncated.truncated && ctx) ctx.truncated = true; + if (ctx) { + ctx.remainingBytes = Math.max(0, ctx.remainingBytes - Buffer.byteLength(truncated.value, 'utf8')); + } + return truncated.value; +} + +function chargeBytes(ctx: BoundedCloneContext, value: string): boolean { + const bytes = Buffer.byteLength(value, 'utf8'); + if (bytes > ctx.remainingBytes) { + ctx.truncated = true; + return false; + } + ctx.remainingBytes -= bytes; + return true; +} + +function jsonObjectString(value: unknown): string { + if (!isRecord(value) && !Array.isArray(value)) return '{}'; + const cloned = cloneSafePublicValue(value); + try { + if (cloned.truncated || cloned.value === undefined) { + return JSON.stringify(cloned.value ?? {}); + } + return JSON.stringify(cloned.value); + } catch { + return '{}'; + } +} + +function isoTimestamp(value: unknown): string { + if (typeof value === 'string' && !Number.isNaN(Date.parse(value))) return value; + if (typeof value === 'number' && Number.isFinite(value)) return new Date(value).toISOString(); + return new Date().toISOString(); +} + +function normalizeProjectionStatus(value: unknown): P2pWorkflowStatusProjection['status'] { + if (typeof value !== 'string') return 'running'; + if (PROJECTION_STATUSES.has(value as P2pWorkflowStatusProjection['status'])) { + return value as P2pWorkflowStatusProjection['status']; + } + if (value === 'dispatched' || value === 'awaiting_next_hop' || value === 'timed_out') return 'running'; + return 'failed'; +} + +function stringArray(value: unknown): string[] { + if (!Array.isArray(value)) return []; + return value + .slice(0, P2P_SANITIZE_MAX_ARRAY_ITEMS) + .filter((item): item is string => typeof item === 'string') + .map((item) => boundedString(item)); +} + +function sanitizeDiagnosticString(value: unknown): string | undefined { + return typeof value === 'string' && value.trim() + ? boundedString(value) + : undefined; +} + +/** + * Retain diagnostics produced by workflow parse/compile/bind/execute/project + * phases. The daemon/server may provide the raw object, but the bridge owns the + * public shape: known code only, trusted messageKey recomputed, scalar context + * fields bounded. Unknown codes are dropped rather than surfaced to web. + */ +export function sanitizeWorkflowDiagnosticForRetention( + raw: unknown, + fallbackRunId?: string, +): P2pWorkflowDiagnostic | null { + if (!isRecord(raw)) return null; + const code = stringValue(raw.code); + if (!code || !WORKFLOW_DIAGNOSTIC_CODES.has(code)) return null; + + const rawPhase = stringValue(raw.phase); + const phase = rawPhase && WORKFLOW_DIAGNOSTIC_PHASES.has(rawPhase) + ? rawPhase as P2pWorkflowDiagnosticPhase + : undefined; + + const diagnostic = makeP2pWorkflowDiagnostic(code as P2pWorkflowDiagnosticCode, phase, { + summary: sanitizeDiagnosticString(raw.summary), + nodeId: sanitizeDiagnosticString(raw.nodeId), + runId: sanitizeDiagnosticString(raw.runId) ?? fallbackRunId, + fieldPath: sanitizeDiagnosticString(raw.fieldPath), + }); + + const rawSeverity = stringValue(raw.severity); + if (rawSeverity && WORKFLOW_DIAGNOSTIC_SEVERITIES.has(rawSeverity)) { + diagnostic.severity = rawSeverity as P2pWorkflowDiagnosticSeverity; + } + // Never trust raw.messageKey; makeP2pWorkflowDiagnostic derives it from code. + return diagnostic; +} + +/** + * Diagnostics generated by the sanitizer itself remain restricted to sanitize + * codes. Use this only for server-side generated sanitize events, not for + * retaining workflow diagnostics from a valid projection/snapshot. + */ +export function sanitizeServerSideDiagnostic( + raw: unknown, + fallbackRunId?: string, +): P2pWorkflowDiagnostic | null { + const retained = sanitizeWorkflowDiagnosticForRetention(raw, fallbackRunId); + if (!retained || !SERVER_SIDE_SANITIZE_CODES.has(retained.code)) return null; + return makeP2pWorkflowDiagnostic(retained.code, 'sanitize', { + summary: retained.summary, + nodeId: retained.nodeId, + runId: retained.runId ?? fallbackRunId, + fieldPath: retained.fieldPath, + }); +} + +function collectForbiddenFieldDiagnostics(raw: unknown, runId: string): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + const seen = new Set(); + const visit = (value: unknown, path: string, depth: number) => { + if (diagnostics.length >= 20 || depth > 5 || !isRecord(value) || seen.has(value)) return; + seen.add(value); + const keys = Object.keys(value); + if (keys.length > P2P_SANITIZE_MAX_OBJECT_KEYS) { + diagnostics.push(makeP2pWorkflowDiagnostic('private_projection_field_dropped', 'sanitize', { + runId, + summary: 'Sanitized oversized workflow payload', + })); + } + for (const key of keys.slice(0, P2P_SANITIZE_MAX_OBJECT_KEYS)) { + const fieldPath = path ? `${path}.${key}` : key; + if (FORBIDDEN_KEYS.has(key)) { + diagnostics.push(makeP2pWorkflowDiagnostic('private_projection_field_dropped', 'sanitize', { + runId, + summary: 'Dropped private field from daemon projection', + })); + continue; + } + visit(value[key], fieldPath, depth + 1); + } + }; + visit(raw, '', 0); + return diagnostics; +} + +function sanitizedRecordArray(value: unknown, allowedKeys: readonly string[]): Array> | undefined { + if (!Array.isArray(value)) return undefined; + const allowed = new Set(allowedKeys); + const rows: Array> = []; + for (const item of value.slice(0, P2P_SANITIZE_MAX_ARRAY_ITEMS)) { + if (!isRecord(item)) continue; + const out: Record = {}; + for (const key of Object.keys(item).slice(0, P2P_SANITIZE_MAX_OBJECT_KEYS)) { + if (!allowed.has(key)) continue; + const field = item[key]; + if ( + typeof field === 'number' + || typeof field === 'boolean' + || field === null + ) { + out[key] = field; + } else if (typeof field === 'string') { + out[key] = boundedString(field); + } + } + rows.push(out); + } + return rows; +} + +function sanitizedNumberRecord(value: unknown): Record | undefined { + if (!isRecord(value)) return undefined; + const out: Record = {}; + for (const key of Object.keys(value).slice(0, P2P_SANITIZE_MAX_OBJECT_KEYS)) { + const field = value[key]; + if (typeof field === 'number' && Number.isFinite(field)) out[key] = field; + } + return out; +} + +function addOptional( + target: T, + key: K, + value: unknown, +): void { + if (value !== undefined) { + (target as Record)[key] = value; + } +} + +function cloneSafePublicValue(value: unknown): { value: unknown; truncated: boolean } { + const ctx: BoundedCloneContext = { + remainingBytes: P2P_SANITIZE_MAX_TOTAL_BYTES, + truncated: false, + seen: new Set(), + }; + return { value: cloneSafePublicValueInner(value, ctx, 0), truncated: ctx.truncated }; +} + +function cloneSafePublicValueInner(value: unknown, ctx: BoundedCloneContext, depth: number): unknown { + if (ctx.remainingBytes <= 0) { + ctx.truncated = true; + return undefined; + } + if (value === null) { + if (!chargeBytes(ctx, 'null')) return undefined; + return null; + } + if (typeof value === 'string') return boundedString(value, ctx); + if (typeof value === 'number') { + if (!Number.isFinite(value) || !chargeBytes(ctx, String(value))) return undefined; + return value; + } + if (typeof value === 'boolean') { + if (!chargeBytes(ctx, value ? 'true' : 'false')) return undefined; + return value; + } + if (depth >= P2P_SANITIZE_MAX_DEPTH) { + ctx.truncated = true; + return undefined; + } + if (Array.isArray(value)) { + if (ctx.seen.has(value)) { + ctx.truncated = true; + return undefined; + } + ctx.seen.add(value); + if (value.length > P2P_SANITIZE_MAX_ARRAY_ITEMS) ctx.truncated = true; + const output: unknown[] = []; + for (const entry of value.slice(0, P2P_SANITIZE_MAX_ARRAY_ITEMS)) { + const cloned = cloneSafePublicValueInner(entry, ctx, depth + 1); + if (cloned !== undefined) output.push(cloned); + } + ctx.seen.delete(value); + return output; + } + if (isRecord(value)) { + if (ctx.seen.has(value)) { + ctx.truncated = true; + return undefined; + } + ctx.seen.add(value); + const output: Record = {}; + const keys = Object.keys(value); + if (keys.length > P2P_SANITIZE_MAX_OBJECT_KEYS) ctx.truncated = true; + for (const key of keys.slice(0, P2P_SANITIZE_MAX_OBJECT_KEYS)) { + if (FORBIDDEN_KEYS.has(key)) { + ctx.truncated = true; + continue; + } + if (!chargeBytes(ctx, key)) break; + const cloned = cloneSafePublicValueInner(value[key], ctx, depth + 1); + if (cloned !== undefined) output[key] = cloned; + } + ctx.seen.delete(value); + return output; + } + return undefined; +} + +function sanitizeCapabilitySnapshot(raw: unknown): P2pWorkflowStatusProjection['capabilitySnapshot'] | undefined { + if (!isRecord(raw)) return undefined; + const daemonId = stringValue(raw.daemonId); + const helloEpoch = numberValue(raw.helloEpoch, Number.NaN); + const sentAt = numberValue(raw.sentAt, Number.NaN); + if (!daemonId || !Number.isFinite(helloEpoch) || !Number.isFinite(sentAt)) return undefined; + return { + daemonId, + capabilities: stringArray(raw.capabilities), + helloEpoch, + sentAt, + }; +} + +export function sanitizeP2pWorkflowStatusProjection( + raw: unknown, + diagnosticSource: unknown = raw, +): P2pWorkflowStatusProjection { + const source = isRecord(raw) ? raw : {}; + const runId = requiredString(source.runId ?? source.id, 'unknown'); + const workflowId = requiredString(source.workflowId ?? source.workflow_id ?? source.mode_key, 'legacy'); + const currentNodeId = stringValue(source.currentNodeId ?? source.current_node_id ?? source.current_round_id ?? source.current_target_session); + const updatedAt = isoTimestamp(source.updatedAt ?? source.updated_at); + const rawDiagnostics = Array.isArray(source.diagnostics) + ? source.diagnostics.slice(0, P2P_SANITIZE_MAX_ARRAY_ITEMS) + : []; + const diagnostics = rawDiagnostics + .map((item) => sanitizeWorkflowDiagnosticForRetention(item, runId)) + .filter((item): item is P2pWorkflowDiagnostic => item !== null); + diagnostics.push(...collectForbiddenFieldDiagnostics(diagnosticSource, runId)); + + return { + projectionVersion: P2P_WORKFLOW_PROJECTION_VERSION, + runId, + workflowId, + status: normalizeProjectionStatus(source.status), + ...(currentNodeId ? { currentNodeId } : {}), + completedNodeIds: stringArray(source.completedNodeIds ?? source.completed_node_ids), + diagnostics, + ...(sanitizeCapabilitySnapshot(source.capabilitySnapshot) ? { capabilitySnapshot: sanitizeCapabilitySnapshot(source.capabilitySnapshot) } : {}), + updatedAt, + }; +} + +export function sanitizeP2pPersistedWorkflowSnapshot(raw: unknown): P2pPersistedWorkflowSnapshot { + return buildPersistedSnapshotFromProjection(sanitizeP2pWorkflowStatusProjection(raw)); +} + +export type LegacyProgressSnapshotSanitizeResult = { + projection: P2pWorkflowStatusProjection; + snapshot: P2pPersistedWorkflowSnapshot; + diagnostic: P2pWorkflowDiagnostic | null; +}; + +/** + * Detect whether a parsed object is already a valid persisted-projection snapshot. + * Avoids re-sanitizing rows that were written by the new projection-only path. + */ +function isValidPersistedSnapshotShape(value: unknown): value is P2pPersistedWorkflowSnapshot { + if (!isRecord(value)) return false; + if (value.projectionVersion !== P2P_WORKFLOW_PROJECTION_VERSION) return false; + if (typeof value.runId !== 'string' || value.runId === '') return false; + if (typeof value.workflowId !== 'string' || value.workflowId === '') return false; + if (typeof value.updatedAt !== 'string' || value.updatedAt === '') return false; + if (!PROJECTION_STATUSES.has(value.status as P2pWorkflowStatusProjection['status'])) return false; + if (!Array.isArray(value.completedNodeIds)) return false; + if (value.completedNodeIds.some((id) => typeof id !== 'string' || id === '')) return false; + if (!Array.isArray(value.diagnostics)) return false; + if (value.currentNodeId !== undefined && typeof value.currentNodeId !== 'string') return false; + // Persisted snapshots are projection-only and must NOT carry projection-extra fields. + if (value.capabilitySnapshot !== undefined) return false; + if (value.artifactSummaries !== undefined) return false; + if (value.nodeSummaries !== undefined) return false; + // Reject any forbidden private keys at the top level. + for (const key of Object.keys(value)) { + if (FORBIDDEN_KEYS.has(key)) return false; + } + return true; +} + +function emptyValidLegacyProjection(runId: string, workflowId: string): P2pWorkflowStatusProjection { + return { + projectionVersion: P2P_WORKFLOW_PROJECTION_VERSION, + runId: runId || 'unknown', + workflowId: workflowId || 'legacy', + status: 'stale', + completedNodeIds: [], + diagnostics: [], + updatedAt: new Date().toISOString(), + }; +} + +/** Optional row-level context for `sanitizeLegacyP2pProgressSnapshot`. When the + * caller has the real `id` / `discussion_id` of the DB row being read, they + * should be passed here so legacy diagnostics can be traced back to a + * concrete row instead of the placeholder `'unknown'` / `'legacy'`. */ +export type SanitizeLegacyP2pProgressSnapshotContext = { + runId?: string; + workflowId?: string; +}; + +function isEmptyPlaceholderObject(value: unknown): value is Record { + if (!isRecord(value)) return false; + return Object.keys(value).length === 0; +} + +/** + * Read-time sanitizer for legacy `progress_snapshot` JSON strings stored in + * `discussion_orchestration_runs.progress_snapshot`. + * + * Behavior: + * - Parses the JSON; on parse failure returns a safe empty projection plus a + * `legacy_progress_snapshot_sanitized` diagnostic. + * - If the parsed value is the empty placeholder `{}` (the migration default + * for newly-created rows that have not yet written a projection), returns a + * safe empty projection WITHOUT a diagnostic — these rows are not "legacy", + * they are simply uninitialized. + * - If the parsed object is already a valid `P2pPersistedWorkflowSnapshot` + * (correct projection version, required fields, no private/forbidden keys), + * it is returned unchanged with no diagnostic. + * - Otherwise, treats the row as legacy and projects it through the + * allowlist sanitizer, attaches `legacy_progress_snapshot_sanitized`, and + * returns the new projection + persisted snapshot. + * + * Optional `context` lets callers supply the real DB `runId` / `workflowId` so + * legacy diagnostics retain audit traceability to the originating row instead + * of falling back to the `'unknown'` / `'legacy'` placeholders. + * + * This function MUST NOT mutate any DB row; it is a read-time projection only. + */ +export function sanitizeLegacyP2pProgressSnapshot( + rawSnapshotJson: string, + context?: SanitizeLegacyP2pProgressSnapshotContext, +): LegacyProgressSnapshotSanitizeResult { + const safeRunId = context?.runId && context.runId !== '' ? context.runId : 'unknown'; + const safeWorkflowId = context?.workflowId && context.workflowId !== '' ? context.workflowId : 'legacy'; + + let parsed: unknown; + try { + parsed = JSON.parse(rawSnapshotJson); + } catch { + const projection = emptyValidLegacyProjection(safeRunId, safeWorkflowId); + const diagnostic = makeP2pWorkflowDiagnostic('legacy_progress_snapshot_sanitized', 'sanitize', { + runId: projection.runId, + summary: 'Sanitized malformed legacy progress snapshot', + }); + projection.diagnostics.push(diagnostic); + return { + projection, + snapshot: buildPersistedSnapshotFromProjection(projection), + diagnostic, + }; + } + + // Empty placeholder ({}): this is the default value of the migration column + // for freshly-created rows that have not yet emitted a projection. They are + // NOT legacy and must not pollute metrics or the UI with a sanitize + // diagnostic — return a quiet empty projection. + if (isEmptyPlaceholderObject(parsed)) { + const projection = emptyValidLegacyProjection(safeRunId, safeWorkflowId); + return { + projection, + snapshot: buildPersistedSnapshotFromProjection(projection), + diagnostic: null, + }; + } + + if (isValidPersistedSnapshotShape(parsed)) { + // Already projection-shaped; return unchanged. We still re-build the + // persisted snapshot through the canonical builder so callers can rely + // on a consistent return shape, but no sanitize diagnostic is emitted. + const projection: P2pWorkflowStatusProjection = { + projectionVersion: P2P_WORKFLOW_PROJECTION_VERSION, + runId: parsed.runId, + workflowId: parsed.workflowId, + status: parsed.status, + ...(parsed.currentNodeId ? { currentNodeId: parsed.currentNodeId } : {}), + completedNodeIds: [...parsed.completedNodeIds], + diagnostics: parsed.diagnostics + .map((item) => sanitizeWorkflowDiagnosticForRetention(item, parsed.runId)) + .filter((diagnostic): diagnostic is P2pWorkflowDiagnostic => diagnostic !== null), + updatedAt: parsed.updatedAt, + }; + return { + projection, + snapshot: buildPersistedSnapshotFromProjection(projection), + diagnostic: null, + }; + } + + // Legacy or otherwise non-conforming payload: project through the allowlist + // sanitizer, which already drops `__proto__`, `constructor`, `compiledWorkflow`, + // `rawPrompt`, `scriptRawOutputs`, `artifactBaselines`, env/token-like keys, etc. + const projection = sanitizeP2pWorkflowStatusProjection(parsed); + // Restore audit-traceable IDs from the row context: the inner sanitizer can + // only see what was inside the JSON blob (often missing or wrong for legacy + // rows). Prefer the real DB ids that the caller passed in. + if (context?.runId && context.runId !== '' && (projection.runId === 'unknown' || projection.runId === '')) { + projection.runId = context.runId; + } + if (context?.workflowId && context.workflowId !== '' && (projection.workflowId === 'legacy' || projection.workflowId === '')) { + projection.workflowId = context.workflowId; + } + const diagnostic = makeP2pWorkflowDiagnostic('legacy_progress_snapshot_sanitized', 'sanitize', { + runId: projection.runId, + summary: 'Sanitized legacy progress snapshot at read time', + }); + projection.diagnostics.push(diagnostic); + return { + projection, + snapshot: buildPersistedSnapshotFromProjection(projection), + diagnostic, + }; +} + +export function sanitizeP2pOrchestrationRunForBridge(raw: unknown, overrides: { + serverId: string; + status?: string; + completedAt?: string | null; + updatedAt?: string; +}): SanitizedP2pOrchestrationRun { + const source = isRecord(raw) ? raw : {}; + const updatedAt = overrides.updatedAt ?? isoTimestamp(source.updated_at ?? source.updatedAt); + const runForProjection = { + id: source.id, + runId: source.runId, + workflowId: source.workflowId, + workflow_id: source.workflow_id, + mode_key: source.mode_key, + status: overrides.status ?? source.status, + currentNodeId: source.currentNodeId, + current_node_id: source.current_node_id, + current_round_id: source.current_round_id, + current_target_session: source.current_target_session, + completedNodeIds: source.completedNodeIds, + completed_node_ids: source.completed_node_ids, + diagnostics: source.diagnostics, + capabilitySnapshot: source.capabilitySnapshot, + updated_at: updatedAt, + }; + const projection = sanitizeP2pWorkflowStatusProjection(runForProjection, raw); + const snapshot = buildPersistedSnapshotFromProjection(projection); + + const sanitized: SanitizedP2pOrchestrationRun = { + id: requiredString(source.id ?? source.runId, projection.runId), + discussion_id: requiredString(source.discussion_id, ''), + server_id: overrides.serverId, + main_session: requiredString(source.main_session, ''), + initiator_session: requiredString(source.initiator_session, ''), + current_target_session: nullableString(source.current_target_session), + final_return_session: requiredString(source.final_return_session, ''), + remaining_targets: jsonObjectString(source.remaining_targets), + mode_key: requiredString(source.mode_key, projection.workflowId), + status: overrides.status ?? requiredString(source.status, projection.status), + request_message_id: nullableString(source.request_message_id), + callback_message_id: nullableString(source.callback_message_id), + context_ref: jsonObjectString(source.context_ref), + timeout_ms: numberValue(source.timeout_ms, 0), + result_summary: nullableString(source.result_summary), + error: nullableString(source.error), + progress_snapshot: JSON.stringify(snapshot), + created_at: isoTimestamp(source.created_at), + updated_at: updatedAt, + completed_at: overrides.completedAt === undefined ? nullableString(source.completed_at) : overrides.completedAt, + workflow_projection: projection, + }; + addOptional(sanitized, 'current_round', optionalNumber(source.current_round)); + addOptional(sanitized, 'total_rounds', optionalNumber(source.total_rounds)); + addOptional(sanitized, 'total_count', optionalNumber(source.total_count)); + addOptional(sanitized, 'total_hops', optionalNumber(source.total_hops)); + addOptional(sanitized, 'completed_hops_count', optionalNumber(source.completed_hops_count)); + addOptional(sanitized, 'active_hop_number', nullableNumber(source.active_hop_number)); + addOptional(sanitized, 'active_round_hop_number', nullableNumber(source.active_round_hop_number)); + addOptional(sanitized, 'active_phase', stringValue(source.active_phase) ?? undefined); + addOptional(sanitized, 'current_round_mode', stringValue(source.current_round_mode) ?? undefined); + addOptional(sanitized, 'current_target_label', nullableString(source.current_target_label)); + addOptional(sanitized, 'initiator_label', nullableString(source.initiator_label)); + addOptional(sanitized, 'run_phase', stringValue(source.run_phase) ?? undefined); + addOptional(sanitized, 'summary_phase', nullableString(source.summary_phase)); + addOptional(sanitized, 'hop_states', sanitizedRecordArray(source.hop_states, [ + 'hop_index', + 'round_index', + 'session', + 'mode', + 'status', + 'started_at', + 'completed_at', + 'error', + ])); + addOptional(sanitized, 'hop_counts', sanitizedNumberRecord(source.hop_counts)); + addOptional(sanitized, 'all_nodes', sanitizedRecordArray(source.all_nodes, [ + 'session', + 'label', + 'displayLabel', + 'display_label', + 'agentType', + 'ccPreset', + 'cc_preset', + 'mode', + 'phase', + 'status', + ])); + addOptional(sanitized, 'advanced_p2p_enabled', typeof source.advanced_p2p_enabled === 'boolean' ? source.advanced_p2p_enabled : undefined); + addOptional(sanitized, 'current_round_id', nullableString(source.current_round_id)); + addOptional(sanitized, 'advanced_nodes', sanitizedRecordArray(source.advanced_nodes, [ + 'id', + 'title', + 'preset', + 'status', + 'attempt', + 'step', + ])); + return sanitized; +} + +export function sanitizeP2pRunUpdateForBroadcast(raw: unknown, overrides: { + serverId: string; + status?: string; + completedAt?: string | null; + updatedAt?: string; +}): SanitizedP2pRunUpdate { + const source = isRecord(raw) ? raw : {}; + const run = sanitizeP2pOrchestrationRunForBridge(source, overrides) as SanitizedP2pRunUpdate; + let legacyPayloadTruncated = false; + for (const field of SAFE_LEGACY_RUN_UPDATE_FIELDS) { + if (!Object.prototype.hasOwnProperty.call(source, field)) continue; + const cloned = cloneSafePublicValue(source[field]); + if (cloned.truncated) legacyPayloadTruncated = true; + if (cloned.value !== undefined) (run as Record)[field] = cloned.value; + } + if (legacyPayloadTruncated) { + run.workflow_projection.diagnostics.push(makeP2pWorkflowDiagnostic('private_projection_field_dropped', 'sanitize', { + runId: run.id, + summary: 'Sanitized oversized workflow payload', + })); + } + return run; +} + +/** + * Canonical single-pass sanitize for P2P run RUN_SAVE/RUN_COMPLETE/RUN_ERROR + * paths. Produces ONE projection that is shared between the DB upsert payload + * and the broadcast payload. Both `persisted` and `broadcast` reference the same + * `workflow_projection` object (and same `progress_snapshot` JSON), so the set + * of diagnostic codes the browser sees is byte-identical to what is written to + * the DB row. + * + * The DB-bound `persisted` payload deliberately omits legacy public fields like + * `hop_states`, `routing_history` etc.; those are broadcast-only (the columns + * used by `upsertOrchestrationRun` already form a strict subset of + * `SanitizedP2pOrchestrationRun`). The broadcast payload re-uses the same + * sanitized base and layers the legacy public fields on top. + */ +export function sanitizeP2pRunForPersistAndBroadcast(raw: unknown, overrides: { + serverId: string; + status?: string; + completedAt?: string | null; + updatedAt?: string; +}): { persisted: SanitizedP2pOrchestrationRun; broadcast: SanitizedP2pRunUpdate } { + const source = isRecord(raw) ? raw : {}; + const persisted = sanitizeP2pOrchestrationRunForBridge(source, overrides); + + // Broadcast shares the SAME projection object (and progress_snapshot string) + // as the persisted payload, but adds legacy public fields. Mutating + // `broadcast.workflow_projection.diagnostics` (e.g. for truncation) therefore + // also updates `persisted.workflow_projection.diagnostics` — the DB and the + // browser stay in sync by construction. + const broadcast: SanitizedP2pRunUpdate = { ...persisted } as SanitizedP2pRunUpdate; + broadcast.workflow_projection = persisted.workflow_projection; + + let legacyPayloadTruncated = false; + for (const field of SAFE_LEGACY_RUN_UPDATE_FIELDS) { + if (!Object.prototype.hasOwnProperty.call(source, field)) continue; + const cloned = cloneSafePublicValue(source[field]); + if (cloned.truncated) legacyPayloadTruncated = true; + if (cloned.value !== undefined) (broadcast as Record)[field] = cloned.value; + } + if (legacyPayloadTruncated) { + const truncationDiagnostic = makeP2pWorkflowDiagnostic('private_projection_field_dropped', 'sanitize', { + runId: persisted.id, + summary: 'Sanitized oversized workflow payload', + }); + persisted.workflow_projection.diagnostics.push(truncationDiagnostic); + // Re-serialize the persisted snapshot string so the DB column reflects the + // truncation diagnostic too. This keeps the DB and broadcast bytes aligned. + const refreshed = buildPersistedSnapshotFromProjection(persisted.workflow_projection); + persisted.progress_snapshot = JSON.stringify(refreshed); + } + return { persisted, broadcast }; +} diff --git a/server/src/routes/discussions.ts b/server/src/routes/discussions.ts index d837e4368..b9efca66c 100644 --- a/server/src/routes/discussions.ts +++ b/server/src/routes/discussions.ts @@ -7,9 +7,34 @@ import { getOrchestrationRunsByDiscussion, getOrchestrationRunById, getRecentOrchestrationRuns, + type DbOrchestrationRun, } from '../db/queries.js'; +import { sanitizeLegacyP2pProgressSnapshot } from '../p2p-workflow-sanitize.js'; import { requireAuth, resolveServerRole } from '../security/authorization.js'; +type SanitizedDbOrchestrationRun = DbOrchestrationRun & { + progress_snapshot_diagnostics: string[]; +}; + +/** + * Sanitize a single DB row's `progress_snapshot` JSON string at read time + * (read-only — does not mutate the row in the database). Replaces the row's + * `progress_snapshot` field with the sanitized persisted snapshot JSON, and + * attaches a sibling `progress_snapshot_diagnostics: string[]` listing any + * diagnostic codes (currently only `legacy_progress_snapshot_sanitized`). + */ +function sanitizeRunRow(row: DbOrchestrationRun): SanitizedDbOrchestrationRun { + const result = sanitizeLegacyP2pProgressSnapshot(row.progress_snapshot ?? '', { + runId: row.id, + workflowId: row.discussion_id, + }); + return { + ...row, + progress_snapshot: JSON.stringify(result.snapshot), + progress_snapshot_diagnostics: result.diagnostic ? [result.diagnostic.code] : [], + }; +} + export const discussionRoutes = new Hono<{ Bindings: Env; Variables: { userId: string; role: string } }>(); discussionRoutes.use('/*', requireAuth()); @@ -51,7 +76,7 @@ discussionRoutes.get('/:id/discussions/:discussionId/runs', async (c) => { if (role === 'none') return c.json({ error: 'forbidden' }, 403); const runs = await getOrchestrationRunsByDiscussion(c.env.DB, discussionId, serverId); - return c.json({ runs }); + return c.json({ runs: runs.map(sanitizeRunRow) }); }); /** GET /api/server/:id/p2p/runs — list recent P2P orchestration runs */ @@ -62,7 +87,7 @@ discussionRoutes.get('/:id/p2p/runs', async (c) => { if (role === 'none') return c.json({ error: 'forbidden' }, 403); const runs = await getRecentOrchestrationRuns(c.env.DB, serverId, 50); - return c.json({ runs }); + return c.json({ runs: runs.map(sanitizeRunRow) }); }); /** GET /api/server/:id/p2p/runs/:runId — get single orchestration run */ @@ -77,5 +102,5 @@ discussionRoutes.get('/:id/p2p/runs/:runId', async (c) => { if (!run) { return c.json({ error: 'not_found' }, 404); } - return c.json({ run }); + return c.json({ run: sanitizeRunRow(run) }); }); diff --git a/server/src/ws/bridge.ts b/server/src/ws/bridge.ts index fae57e6b1..ae55bc462 100644 --- a/server/src/ws/bridge.ts +++ b/server/src/ws/bridge.ts @@ -93,7 +93,25 @@ import { PUSH_TIMELINE_EVENT_MAX_AGE_MS, TIMELINE_SUPPRESS_PUSH_FIELD } from '.. import { DAEMON_UPGRADE_DELIVERY_STATUS, } from '../../../shared/daemon-upgrade.js'; +import { + P2P_WORKFLOW_MSG, + isP2pWorkflowRequestId, + parseP2pWorkflowMessageType, + type P2pWorkflowMessageDescriptor, + type P2pWorkflowMessageType, +} from '../../../shared/p2p-workflow-messages.js'; +import { + P2P_BRIDGE_ERROR_CODES, + P2P_BRIDGE_PENDING_REQUEST_TIMEOUT_MS, + P2P_BRIDGE_PENDING_REQUESTS_GLOBAL, + P2P_BRIDGE_PENDING_REQUESTS_PER_SOCKET, + P2P_CAPABILITY_FRESHNESS_TTL_MS, +} from '../../../shared/p2p-workflow-constants.js'; import { DaemonUpgradeCoordinator, type DaemonUpgradeSource, type RequestDaemonUpgradeResult } from './daemon-upgrade-coordinator.js'; +import { + sanitizeP2pRunForPersistAndBroadcast, + sanitizeP2pRunUpdateForBroadcast, +} from '../p2p-workflow-sanitize.js'; const AUTH_TIMEOUT_MS = 5000; const MAX_QUEUE_SIZE = 100; @@ -231,6 +249,14 @@ type WatchActiveSubSessionRow = { label?: string; }; +interface DaemonP2pWorkflowCapabilities { + daemonId: string; + capabilities: string[]; + helloEpoch: number; + sentAt: number; + receivedAt: number; +} + type PendingPreviewRequest = { readable: ReadableStream; controller: ReadableStreamDefaultController | null; @@ -243,6 +269,14 @@ type PendingPreviewRequest = { rejectStart: (err: Error) => void; }; +type PendingP2pWorkflowRequest = { + socket: WebSocket; + timer: ReturnType; + requestType: P2pWorkflowMessageType; + expectedResponseType: P2pWorkflowMessageType; + createdAt: number; +}; + // ── WS tunnel state ─────────────────────────────────────────────────────────── interface WsTunnelState { @@ -349,6 +383,8 @@ export class WsBridge { /** Cached provider connection status — pushed to browsers on connect, persisted to DB. */ private providerStatus = new Map(); + /** Cached advanced P2P capabilities for the current authenticated daemon socket. */ + private daemonP2pWorkflowCapabilities: DaemonP2pWorkflowCapabilities | null = null; /** Cached remote sessions from providers — pushed to browsers on connect, persisted to DB. */ private providerRemoteSessions = new Map(); @@ -377,6 +413,9 @@ export class WsBridge { /** Per-request timeline.history / timeline.replay pending map — routes responses via requestId unicast. */ private pendingTimelineRequests = new Map }>(); + /** Per-request P2P workflow pending map — routes request-scoped responses via requestId unicast. */ + private pendingP2pWorkflowRequests = new Map(); + /** Per-request memory management pending map — routes sensitive admin responses via requestId unicast. */ private pendingMemoryManagementRequests = new Map }>(); @@ -992,6 +1031,11 @@ export class WsBridge { return; } + if (msg.type === P2P_WORKFLOW_MSG.DAEMON_HELLO) { + this.handleDaemonP2pWorkflowHello(msg); + return; + } + if (msg.type === 'heartbeat') { const heartbeatDaemonVersion = typeof msg.daemonVersion === 'string' ? msg.daemonVersion @@ -1058,6 +1102,7 @@ export class WsBridge { this.broadcastToBrowsers(JSON.stringify({ type: TRANSPORT_MSG.PROVIDER_STATUS, providerId, connected: false })); } this.providerStatus.clear(); + this.daemonP2pWorkflowCapabilities = null; this.broadcastToBrowsers(JSON.stringify({ type: DAEMON_MSG.DISCONNECTED })); void clearProviderStatus(db, this.serverId).catch(() => {}); updateServerStatus(db, this.serverId, 'offline').catch((err) => @@ -1160,6 +1205,34 @@ export class WsBridge { return; } + const p2pBrowserMessage = parseP2pWorkflowMessageType(msg.type); + if (p2pBrowserMessage.kind === 'drop' && p2pBrowserMessage.reason === 'unknown_p2p_message') { + incrementCounter('p2p.bridge.unknown_message_drop', { direction: 'browser_to_daemon' }); + logger.warn({ serverId: this.serverId, type: msg.type }, 'unknown browser p2p message — dropped'); + return; + } + if (p2pBrowserMessage.kind === 'known') { + const descriptor = p2pBrowserMessage.descriptor; + if ( + !descriptor.allowedIngress.includes('browser') + || descriptor.response + || descriptor.serverHandling !== 'forward_to_daemon' + ) { + incrementCounter('p2p.bridge.wrong_peer_drop', { direction: 'browser_to_daemon', type: msg.type }); + logger.warn({ serverId: this.serverId, type: msg.type }, 'browser attempted disallowed p2p route — dropped'); + safeSend(ws, JSON.stringify({ + type: 'error', + code: P2P_BRIDGE_ERROR_CODES.WRONG_PEER, + originalType: msg.type, + requestId: msg.requestId, + })); + return; + } + if (descriptor.requestScoped && !this.registerP2pWorkflowRequest(ws, msg, descriptor)) { + return; + } + } + if (this.isBrowserForbiddenDaemonCommandType(msg.type)) { logger.warn({ serverId: this.serverId, type: msg.type }, 'Browser attempted server-only daemon command — rejected'); safeSend(ws, JSON.stringify({ @@ -1308,6 +1381,89 @@ export class WsBridge { }); } + private registerP2pWorkflowRequest( + ws: WebSocket, + msg: Record, + descriptor: P2pWorkflowMessageDescriptor, + ): boolean { + if (!isP2pWorkflowRequestId(msg.requestId)) { + incrementCounter('p2p.bridge.invalid_request_id_drop', { type: descriptor.type }); + logger.warn({ serverId: this.serverId, type: descriptor.type }, 'p2p request missing valid requestId — dropped'); + safeSend(ws, JSON.stringify({ + type: 'error', + code: P2P_BRIDGE_ERROR_CODES.INVALID_REQUEST_ID, + originalType: descriptor.type, + requestId: msg.requestId, + })); + return false; + } + const expectedResponseType = descriptor.expectedResponseType; + if (!expectedResponseType) { + incrementCounter('p2p.bridge.route_policy_drop', { direction: 'browser_to_daemon', type: descriptor.type }); + logger.warn({ serverId: this.serverId, type: descriptor.type }, 'p2p request missing expected response policy — dropped'); + safeSend(ws, JSON.stringify({ + type: 'error', + code: P2P_BRIDGE_ERROR_CODES.ROUTE_POLICY_ERROR, + originalType: descriptor.type, + requestId: msg.requestId, + })); + return false; + } + + const requestId = msg.requestId; + const existing = this.pendingP2pWorkflowRequests.get(requestId); + if (existing) { + incrementCounter('p2p.bridge.duplicate_request_id_drop', { type: descriptor.type }); + logger.warn({ serverId: this.serverId, type: descriptor.type, requestId }, 'p2p duplicate active requestId — dropped'); + safeSend(ws, JSON.stringify({ + type: 'error', + code: P2P_BRIDGE_ERROR_CODES.DUPLICATE_REQUEST_ID, + originalType: descriptor.type, + requestId, + })); + return false; + } + + let socketPendingCount = 0; + for (const pending of this.pendingP2pWorkflowRequests.values()) { + if (pending.socket === ws) socketPendingCount += 1; + } + if (socketPendingCount >= P2P_BRIDGE_PENDING_REQUESTS_PER_SOCKET) { + incrementCounter('p2p.bridge.pending_request_cap_drop', { scope: 'socket', type: descriptor.type }); + logger.warn({ serverId: this.serverId, type: descriptor.type, requestId }, 'p2p per-socket pending cap exceeded — dropped'); + safeSend(ws, JSON.stringify({ + type: 'error', + code: P2P_BRIDGE_ERROR_CODES.PENDING_LIMIT_EXCEEDED, + scope: 'socket', + originalType: descriptor.type, + requestId, + })); + return false; + } + if (this.pendingP2pWorkflowRequests.size >= P2P_BRIDGE_PENDING_REQUESTS_GLOBAL) { + incrementCounter('p2p.bridge.pending_request_cap_drop', { scope: 'global', type: descriptor.type }); + logger.warn({ serverId: this.serverId, type: descriptor.type, requestId }, 'p2p global pending cap exceeded — dropped'); + safeSend(ws, JSON.stringify({ + type: 'error', + code: P2P_BRIDGE_ERROR_CODES.PENDING_LIMIT_EXCEEDED, + scope: 'global', + originalType: descriptor.type, + requestId, + })); + return false; + } + + const timer = setTimeout(() => this.pendingP2pWorkflowRequests.delete(requestId), P2P_BRIDGE_PENDING_REQUEST_TIMEOUT_MS); + this.pendingP2pWorkflowRequests.set(requestId, { + socket: ws, + timer, + requestType: descriptor.type, + expectedResponseType, + createdAt: Date.now(), + }); + return true; + } + // ── Relay helpers ────────────────────────────────────────────────────────── /** @@ -1319,6 +1475,49 @@ export class WsBridge { private relayToBrowsers(msg: Record): void { const type = msg.type as string; + const p2pDaemonMessage = parseP2pWorkflowMessageType(type); + if (p2pDaemonMessage.kind === 'known' && !p2pDaemonMessage.descriptor.allowedIngress.includes('daemon')) { + incrementCounter('p2p.bridge.wrong_peer_drop', { direction: 'daemon_to_browser', type }); + logger.warn({ serverId: this.serverId, type }, 'daemon attempted disallowed p2p route — dropped'); + return; + } + if (p2pDaemonMessage.kind === 'known' && p2pDaemonMessage.descriptor.response && p2pDaemonMessage.descriptor.requestScoped) { + const requestId = msg.requestId; + if (!isP2pWorkflowRequestId(requestId)) { + incrementCounter('p2p.bridge.unrouted_response_drop', { type }); + logger.warn({ serverId: this.serverId, type, requestId }, 'p2p response missing valid requestId — dropped'); + return; + } + const pending = this.pendingP2pWorkflowRequests.get(requestId); + if (!pending) { + incrementCounter('p2p.bridge.unrouted_response_drop', { type }); + logger.warn({ serverId: this.serverId, type, requestId }, 'p2p response missing pending request — dropped'); + return; + } + if (pending.expectedResponseType !== type) { + incrementCounter('p2p.bridge.response_type_mismatch_drop', { + expected: pending.expectedResponseType, + received: type, + requestType: pending.requestType, + }); + logger.warn({ + serverId: this.serverId, + requestId, + requestType: pending.requestType, + expectedResponseType: pending.expectedResponseType, + receivedResponseType: type, + createdAt: pending.createdAt, + }, 'p2p response type mismatch — dropped without clearing pending request'); + return; + } + clearTimeout(pending.timer); + this.pendingP2pWorkflowRequests.delete(requestId); + if (pending.socket.readyState === WebSocket.OPEN) { + pending.socket.send(JSON.stringify(msg)); + } + return; + } + // ── Preview WS tunnel control messages ────────────────────────────────── if (type === PREVIEW_MSG.WS_OPENED) { this.resolvePreviewWsOpened(msg as unknown as PreviewWsOpenedMessage); @@ -1775,7 +1974,7 @@ export class WsBridge { } // ── P2P conflict → broadcast to browsers ──────────────────────────────── - if (type === 'p2p.conflict') { + if (type === P2P_WORKFLOW_MSG.CONFLICT) { this.broadcastToBrowsers(JSON.stringify(msg)); return; } @@ -1826,27 +2025,70 @@ export class WsBridge { } // ── P2P orchestration run persistence + broadcast ──────────────────────── - if (type === 'p2p.run_save' && this.db) { - const run = { ...(msg.run as Record), progress_snapshot: JSON.stringify(msg.run) }; - void upsertOrchestrationRun(this.db, run as any).catch(() => {}); - this.broadcastToBrowsers(JSON.stringify({ type: 'p2p.run_update', run: msg.run })); + // For RUN_SAVE/RUN_COMPLETE/RUN_ERROR we sanitize ONCE and reuse the same + // workflow_projection (and the same JSON progress_snapshot bytes) for both + // the DB upsert and the browser broadcast. This guarantees the diagnostic + // code set the browser sees matches what gets persisted. + if (type === P2P_WORKFLOW_MSG.RUN_SAVE) { + const { persisted, broadcast } = sanitizeP2pRunForPersistAndBroadcast(msg.run, { serverId: this.serverId }); + if (this.db) void upsertOrchestrationRun(this.db, persisted).catch(() => {}); + this.broadcastToBrowsers(JSON.stringify({ + type: P2P_WORKFLOW_MSG.RUN_UPDATE, + run: broadcast, + })); + return; + } + if (type === P2P_WORKFLOW_MSG.RUN_COMPLETE) { + const completedAt = new Date().toISOString(); + const overrides = { + serverId: this.serverId, + status: 'completed', + completedAt, + updatedAt: completedAt, + }; + const { persisted, broadcast } = sanitizeP2pRunForPersistAndBroadcast(msg.run, overrides); + if (this.db) void upsertOrchestrationRun(this.db, persisted).catch(() => {}); + this.broadcastToBrowsers(JSON.stringify({ + type: P2P_WORKFLOW_MSG.RUN_UPDATE, + run: broadcast, + })); + return; + } + if (type === P2P_WORKFLOW_MSG.RUN_ERROR) { + const updatedAt = new Date().toISOString(); + const overrides = { + serverId: this.serverId, + updatedAt, + }; + const { persisted, broadcast } = sanitizeP2pRunForPersistAndBroadcast(msg.run, overrides); + if (this.db) void upsertOrchestrationRun(this.db, persisted).catch(() => {}); + this.broadcastToBrowsers(JSON.stringify({ + type: P2P_WORKFLOW_MSG.RUN_UPDATE, + run: broadcast, + })); + return; + } + if (type === P2P_WORKFLOW_MSG.RUN_UPDATE) { + const run = sanitizeP2pRunUpdateForBroadcast(msg.run, { serverId: this.serverId }); + this.broadcastToBrowsers(JSON.stringify({ type: P2P_WORKFLOW_MSG.RUN_UPDATE, run })); return; } - if (type === 'p2p.run_complete' && this.db) { - const run = msg.run as any; - run.status = 'completed'; - run.completed_at = new Date().toISOString(); - run.progress_snapshot = JSON.stringify(run); - void upsertOrchestrationRun(this.db, run).catch(() => {}); - this.broadcastToBrowsers(JSON.stringify({ type: 'p2p.run_update', run })); + if ( + p2pDaemonMessage.kind === 'known' + && p2pDaemonMessage.descriptor.serverHandling === 'broadcast_to_browsers' + && p2pDaemonMessage.descriptor.browserDelivery === 'broadcast' + ) { + this.broadcastToBrowsers(JSON.stringify(msg)); return; } - if (type === 'p2p.run_error' && this.db) { - const run = msg.run as any; - run.updated_at = new Date().toISOString(); - run.progress_snapshot = JSON.stringify(run); - void upsertOrchestrationRun(this.db, run).catch(() => {}); - this.broadcastToBrowsers(JSON.stringify({ type: 'p2p.run_update', run })); + if (p2pDaemonMessage.kind === 'drop' && p2pDaemonMessage.reason === 'unknown_p2p_message') { + incrementCounter('p2p.bridge.unknown_message_drop', { direction: 'daemon_to_browser' }); + logger.warn({ serverId: this.serverId, type }, 'unknown daemon p2p message — dropped'); + return; + } + if (p2pDaemonMessage.kind === 'known') { + incrementCounter('p2p.bridge.route_policy_drop', { direction: 'daemon_to_browser', type }); + logger.warn({ serverId: this.serverId, type }, 'known daemon p2p message had no bridge route — dropped'); return; } @@ -1916,6 +2158,48 @@ export class WsBridge { this.broadcastToBrowsers(JSON.stringify(msg)); } + private handleDaemonP2pWorkflowHello(msg: Record): void { + const daemonId = typeof msg.daemonId === 'string' ? msg.daemonId : null; + const helloEpoch = typeof msg.helloEpoch === 'number' && Number.isFinite(msg.helloEpoch) + ? msg.helloEpoch + : null; + const sentAt = typeof msg.sentAt === 'number' && Number.isFinite(msg.sentAt) + ? msg.sentAt + : null; + const capabilities = Array.isArray(msg.capabilities) + ? msg.capabilities.filter((capability): capability is string => typeof capability === 'string') + : null; + if (!daemonId || helloEpoch === null || sentAt === null || !capabilities) { + incrementCounter('p2p.bridge.invalid_daemon_hello_drop'); + logger.warn({ serverId: this.serverId }, 'invalid daemon.hello — dropped'); + return; + } + const existing = this.daemonP2pWorkflowCapabilities; + if (existing && helloEpoch < existing.helloEpoch) { + incrementCounter('p2p.bridge.stale_daemon_hello_drop'); + logger.warn({ serverId: this.serverId, helloEpoch, currentEpoch: existing.helloEpoch }, 'stale daemon.hello — dropped'); + return; + } + const sortedCapabilities = [...new Set(capabilities)].sort(); + this.daemonP2pWorkflowCapabilities = { + daemonId, + capabilities: sortedCapabilities, + helloEpoch, + sentAt, + receivedAt: Date.now(), + }; + // Forward a sanitized snapshot to all browsers connected to this serverId + // so the web capability gate can react to missing/stale/downgraded caps. + // Per the message registry this is `browserDelivery: 'broadcast'`. + this.broadcastToBrowsers(JSON.stringify({ + type: P2P_WORKFLOW_MSG.DAEMON_HELLO, + daemonId, + capabilities: sortedCapabilities, + helloEpoch, + sentAt, + })); + } + private routeBinaryFrame(data: Buffer): void { // WS_DATA frames (type 0x04) are handled separately — parsePreviewBinaryFrame returns null for them. if (data.length > 0 && data[0] === PREVIEW_BINARY_FRAME.WS_DATA) { @@ -2238,6 +2522,12 @@ export class WsBridge { this.pendingMemoryManagementRequests.delete(reqId); } } + for (const [reqId, pending] of this.pendingP2pWorkflowRequests) { + if (pending.socket === ws) { + clearTimeout(pending.timer); + this.pendingP2pWorkflowRequests.delete(reqId); + } + } } /** @@ -2577,6 +2867,7 @@ export class WsBridge { try { this.daemonWs.close(4001, 'token_rotated'); } catch { /* ignore */ } this.daemonWs = null; this.authenticated = false; + this.daemonP2pWorkflowCapabilities = null; } } @@ -3398,4 +3689,15 @@ export class WsBridge { get isAuthenticated(): boolean { return this.authenticated; } + + getDaemonP2pWorkflowCapabilities(now = Date.now()): DaemonP2pWorkflowCapabilities | null { + if (!this.daemonP2pWorkflowCapabilities) return null; + if (now - this.daemonP2pWorkflowCapabilities.receivedAt > P2P_CAPABILITY_FRESHNESS_TTL_MS) { + return null; + } + return { + ...this.daemonP2pWorkflowCapabilities, + capabilities: [...this.daemonP2pWorkflowCapabilities.capabilities], + }; + } } diff --git a/server/test/bridge.test.ts b/server/test/bridge.test.ts index cc7c4d459..8c7a36421 100644 --- a/server/test/bridge.test.ts +++ b/server/test/bridge.test.ts @@ -7,6 +7,17 @@ import { } from '../src/ws/daemon-upgrade-publication-gate.js'; import * as dbQueries from '../src/db/queries.js'; import { PUSH_TIMELINE_EVENT_MAX_AGE_MS, TIMELINE_SUPPRESS_PUSH_FIELD } from '../../shared/push-notifications.js'; +import { P2P_WORKFLOW_MSG } from '../../shared/p2p-workflow-messages.js'; +import { P2P_CONFIG_MSG } from '../../shared/p2p-config-events.js'; +import { + P2P_BRIDGE_ERROR_CODES, + P2P_BRIDGE_PENDING_REQUESTS_GLOBAL, + P2P_BRIDGE_PENDING_REQUESTS_PER_SOCKET, + P2P_CAPABILITY_FRESHNESS_TTL_MS, + P2P_SANITIZE_MAX_STRING_BYTES, + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, +} from '../../shared/p2p-workflow-constants.js'; // ── Mock WebSocket ───────────────────────────────────────────────────────────── @@ -1793,6 +1804,428 @@ describe('WsBridge', () => { expect(browserWs.sentStrings.length).toBeGreaterThan(0); expect(JSON.parse(browserWs.sentStrings[0]).type).toBe('p2p.conflict'); }); + + it('drops unknown p2p messages from daemon instead of broadcasting', async () => { + const { daemonWs, browserWs } = await setupAuthBridge(); + browserWs.sent.length = 0; + + daemonWs.emit('message', JSON.stringify({ + type: 'p2p.future_secret', + rawPrompt: 'do not leak', + })); + await flushAsync(); + + expect(browserWs.sentStrings).toHaveLength(0); + }); + + it('requires valid requestId before forwarding request-scoped p2p browser messages', async () => { + const { daemonWs, browserWs } = await setupAuthBridge(); + daemonWs.sent.length = 0; + + browserWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: 'é', + })); + await flushAsync(); + + expect(daemonWs.sentStrings.some((raw) => JSON.parse(raw).type === P2P_WORKFLOW_MSG.STATUS)).toBe(false); + expect(browserWs.sentStrings.some((raw) => JSON.parse(raw).code === P2P_BRIDGE_ERROR_CODES.INVALID_REQUEST_ID)).toBe(true); + }); + + it('rejects browser p2p messages that are daemon-only or responses', async () => { + const { daemonWs, browserWs } = await setupAuthBridge(); + daemonWs.sent.length = 0; + + browserWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.RUN_UPDATE, + run: { rawPrompt: 'do not forward' }, + })); + browserWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, + requestId: 'p2p-response-from-browser', + runs: [], + })); + await flushAsync(); + + expect(daemonWs.sentStrings.some((raw) => JSON.parse(raw).type === P2P_WORKFLOW_MSG.RUN_UPDATE)).toBe(false); + expect(daemonWs.sentStrings.some((raw) => JSON.parse(raw).type === P2P_WORKFLOW_MSG.STATUS_RESPONSE)).toBe(false); + expect(browserWs.sentStrings.filter((raw) => JSON.parse(raw).code === P2P_BRIDGE_ERROR_CODES.WRONG_PEER)).toHaveLength(2); + }); + + it('single-casts request-scoped p2p responses to the pending requester only', async () => { + const bridge = WsBridge.get(serverId); + const daemonWs = new MockWs(); + bridge.handleDaemonConnection(daemonWs as never, makeDb('valid-hash'), {} as never); + daemonWs.emit('message', JSON.stringify({ type: 'auth', serverId, token: 't' })); + await flushAsync(); + + const browser1 = new MockWs(); + const browser2 = new MockWs(); + bridge.handleBrowserConnection(browser1 as never, 'user-1', makeDb('valid-hash')); + bridge.handleBrowserConnection(browser2 as never, 'user-2', makeDb('valid-hash')); + + browser1.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.READ_DISCUSSION, + requestId: 'p2p-read-1', + id: 'discussion-1', + })); + await flushAsync(); + browser1.sent.length = 0; + browser2.sent.length = 0; + + daemonWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, + requestId: 'p2p-read-1', + id: 'discussion-1', + content: 'private discussion', + })); + await flushAsync(); + + expect(browser1.sentStrings).toHaveLength(1); + expect(browser2.sentStrings).toHaveLength(0); + expect(JSON.parse(browser1.sentStrings[0])).toMatchObject({ + type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, + requestId: 'p2p-read-1', + }); + }); + + it('drops mismatched p2p response types without clearing the pending request', async () => { + const { daemonWs, browserWs } = await setupAuthBridge(); + + browserWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: 'p2p-status-1', + })); + await flushAsync(); + browserWs.sent.length = 0; + + daemonWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS_RESPONSE, + requestId: 'p2p-status-1', + discussions: [], + })); + await flushAsync(); + + expect(browserWs.sentStrings).toHaveLength(0); + + daemonWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, + requestId: 'p2p-status-1', + runs: [], + })); + await flushAsync(); + + expect(browserWs.sentStrings).toHaveLength(1); + expect(JSON.parse(browserWs.sentStrings[0]).type).toBe(P2P_WORKFLOW_MSG.STATUS_RESPONSE); + }); + + it('rejects duplicate active p2p requestIds without replacing the original requester', async () => { + const bridge = WsBridge.get(serverId); + const daemonWs = new MockWs(); + bridge.handleDaemonConnection(daemonWs as never, makeDb('valid-hash'), {} as never); + daemonWs.emit('message', JSON.stringify({ type: 'auth', serverId, token: 't' })); + await flushAsync(); + + const browser1 = new MockWs(); + const browser2 = new MockWs(); + bridge.handleBrowserConnection(browser1 as never, 'user-1', makeDb('valid-hash')); + bridge.handleBrowserConnection(browser2 as never, 'user-2', makeDb('valid-hash')); + + browser1.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: 'p2p-duplicate-1', + })); + browser2.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: 'p2p-duplicate-1', + })); + await flushAsync(); + + expect(browser2.sentStrings.some((raw) => JSON.parse(raw).code === P2P_BRIDGE_ERROR_CODES.DUPLICATE_REQUEST_ID)).toBe(true); + browser1.sent.length = 0; + browser2.sent.length = 0; + + daemonWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, + requestId: 'p2p-duplicate-1', + runs: [], + })); + await flushAsync(); + + expect(browser1.sentStrings).toHaveLength(1); + expect(browser2.sentStrings).toHaveLength(0); + }); + + it('drops request-scoped p2p responses without a pending requester', async () => { + const { daemonWs, browserWs } = await setupAuthBridge(); + browserWs.sent.length = 0; + + daemonWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS_RESPONSE, + requestId: 'p2p-missing', + discussions: [], + })); + await flushAsync(); + + expect(browserWs.sentStrings).toHaveLength(0); + }); + + it('enforces per-socket pending caps before forwarding p2p requests', async () => { + const { daemonWs, browserWs } = await setupAuthBridge(); + daemonWs.sent.length = 0; + + for (let i = 0; i < P2P_BRIDGE_PENDING_REQUESTS_PER_SOCKET + 1; i += 1) { + browserWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: `p2p-cap-${i}`, + })); + } + await flushAsync(); + + const forwarded = daemonWs.sentStrings + .map((raw) => JSON.parse(raw)) + .filter((msg) => msg.type === P2P_WORKFLOW_MSG.STATUS); + expect(forwarded).toHaveLength(P2P_BRIDGE_PENDING_REQUESTS_PER_SOCKET); + expect(browserWs.sentStrings.some((raw) => JSON.parse(raw).code === P2P_BRIDGE_ERROR_CODES.PENDING_LIMIT_EXCEEDED)).toBe(true); + }); + + it('enforces the global pending cap before forwarding p2p requests', async () => { + const bridge = WsBridge.get(serverId); + const daemonWs = new MockWs(); + bridge.handleDaemonConnection(daemonWs as never, makeDb('valid-hash'), {} as never); + daemonWs.emit('message', JSON.stringify({ type: 'auth', serverId, token: 't' })); + await flushAsync(); + daemonWs.sent.length = 0; + + const socketCount = Math.ceil(P2P_BRIDGE_PENDING_REQUESTS_GLOBAL / P2P_BRIDGE_PENDING_REQUESTS_PER_SOCKET); + for (let socketIndex = 0; socketIndex < socketCount; socketIndex += 1) { + const browserWs = new MockWs(); + bridge.handleBrowserConnection(browserWs as never, `user-${socketIndex}`, makeDb('valid-hash')); + browserWs.sent.length = 0; + for (let requestIndex = 0; requestIndex < P2P_BRIDGE_PENDING_REQUESTS_PER_SOCKET; requestIndex += 1) { + browserWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: `p2p-global-${socketIndex}-${requestIndex}`, + })); + } + } + await flushAsync(); + + const extraBrowser = new MockWs(); + bridge.handleBrowserConnection(extraBrowser as never, 'user-extra', makeDb('valid-hash')); + extraBrowser.sent.length = 0; + extraBrowser.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: 'p2p-global-overflow', + })); + await flushAsync(); + + const forwarded = daemonWs.sentStrings + .map((raw) => JSON.parse(raw)) + .filter((msg) => msg.type === P2P_WORKFLOW_MSG.STATUS); + expect(forwarded).toHaveLength(P2P_BRIDGE_PENDING_REQUESTS_GLOBAL); + expect(extraBrowser.sentStrings.some((raw) => { + const msg = JSON.parse(raw); + return msg.code === P2P_BRIDGE_ERROR_CODES.PENDING_LIMIT_EXCEEDED && msg.scope === 'global'; + })).toBe(true); + }); + + it('handles p2p.run_complete and p2p.run_error as registered daemon messages', async () => { + const { daemonWs, browserWs } = await setupAuthBridge(); + + daemonWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.RUN_COMPLETE, + run: { id: 'run-complete', status: 'running', mode_key: 'audit' }, + })); + daemonWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.RUN_ERROR, + run: { id: 'run-error', status: 'failed', mode_key: 'audit', error: 'failed' }, + })); + await flushAsync(); + + const updates = browserWs.sentStrings.map((raw) => JSON.parse(raw)); + expect(updates.filter((msg) => msg.type === P2P_WORKFLOW_MSG.RUN_UPDATE)).toHaveLength(2); + expect(updates.find((msg) => msg.run.id === 'run-complete')?.run.status).toBe('completed'); + expect(updates.find((msg) => msg.run.id === 'run-error')?.run.error).toBe('failed'); + }); + + it('writes the same diagnostic code set to DB upsert and to the browser broadcast', async () => { + // Regression for PR-D: the canonical sanitize result must be shared + // between the DB-bound `upsertOrchestrationRun` payload and the + // broadcast payload so the diagnostic code set the browser sees is + // byte-identical to what the DB row records. + const upsertSpy = vi.spyOn(dbQueries, 'upsertOrchestrationRun').mockResolvedValue(); + try { + const { daemonWs, browserWs } = await setupAuthBridge(); + + // Force the bridge into the truncation branch via oversized routing_history. + const oversized = 'x'.repeat(P2P_SANITIZE_MAX_STRING_BYTES + 100); + daemonWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.RUN_SAVE, + run: { + id: 'run-parity', + discussion_id: 'disc-1', + mode_key: 'audit', + status: 'running', + diagnostics: [ + { code: 'daemon_busy', phase: 'bind', severity: 'error', summary: 'busy' }, + { code: 'missing_required_capability', phase: 'execute', summary: 'missing cap' }, + ], + routing_history: Array.from({ length: 80 }, (_, idx) => ({ + step: idx, + nested: { value: oversized }, + })), + }, + })); + await flushAsync(); + + expect(upsertSpy).toHaveBeenCalledTimes(1); + const persistedArg = upsertSpy.mock.calls[0]?.[1] as { + progress_snapshot: string; + workflow_projection: { diagnostics: Array<{ code: string }> }; + }; + const persistedSnap = JSON.parse(persistedArg.progress_snapshot) as { + diagnostics: Array<{ code: string }>; + }; + + const broadcasts = browserWs.sentStrings + .map((raw) => JSON.parse(raw)) + .filter((msg) => msg.type === P2P_WORKFLOW_MSG.RUN_UPDATE); + expect(broadcasts).toHaveLength(1); + const broadcastDiagnostics = broadcasts[0].run.workflow_projection.diagnostics as Array<{ code: string }>; + + const persistedCodes = [...persistedArg.workflow_projection.diagnostics.map((d) => d.code)].sort(); + const persistedSnapCodes = [...persistedSnap.diagnostics.map((d) => d.code)].sort(); + const broadcastCodes = [...broadcastDiagnostics.map((d) => d.code)].sort(); + + expect(broadcastCodes).toEqual(persistedCodes); + expect(broadcastCodes).toEqual(persistedSnapCodes); + expect(broadcastCodes).toContain('daemon_busy'); + expect(broadcastCodes).toContain('missing_required_capability'); + expect(broadcastCodes).toContain('private_projection_field_dropped'); + } finally { + upsertSpy.mockRestore(); + } + }); + + it('caches daemon.hello capabilities and clears stale/disconnected snapshots', async () => { + const bridge = WsBridge.get(serverId); + const daemonWs = new MockWs(); + bridge.handleDaemonConnection(daemonWs as never, makeDb('valid-hash'), {} as never); + daemonWs.emit('message', JSON.stringify({ type: 'auth', serverId, token: 't' })); + await flushAsync(); + + daemonWs.emit('message', JSON.stringify({ + type: P2P_WORKFLOW_MSG.DAEMON_HELLO, + daemonId: serverId, + capabilities: [P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, P2P_WORKFLOW_CAPABILITY_V1], + helloEpoch: 2, + sentAt: 123, + })); + await flushAsync(); + + expect(bridge.getDaemonP2pWorkflowCapabilities()?.capabilities).toEqual([ + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + P2P_WORKFLOW_CAPABILITY_V1, + ].sort()); + expect(bridge.getDaemonP2pWorkflowCapabilities(Date.now() + P2P_CAPABILITY_FRESHNESS_TTL_MS + 1)).toBeNull(); + + daemonWs.close(); + await flushAsync(); + + expect(bridge.getDaemonP2pWorkflowCapabilities()).toBeNull(); + }); + + it('forwards p2p.config.save from browser to daemon and registers a pending response', async () => { + // PR-E: p2p.config.save must be registered alongside workflow messages + // so the bridge default-deny no longer drops it. The browser ingress + // forwards via the generic forward_to_daemon path, and a pending entry + // is created so the SAVE_RESPONSE can be singlecast back. + const { daemonWs, browserWs } = await setupAuthBridge(); + daemonWs.sent.length = 0; + + browserWs.emit('message', JSON.stringify({ + type: P2P_CONFIG_MSG.SAVE, + requestId: 'p2p-config-save-1', + scopeSession: 'deck_demo_brain', + config: { participants: [] }, + })); + await flushAsync(); + + const forwarded = daemonWs.sentStrings + .map((raw) => JSON.parse(raw)) + .filter((msg) => msg.type === P2P_CONFIG_MSG.SAVE); + expect(forwarded).toHaveLength(1); + expect(forwarded[0]).toMatchObject({ + type: P2P_CONFIG_MSG.SAVE, + requestId: 'p2p-config-save-1', + scopeSession: 'deck_demo_brain', + }); + // Browser must not receive any error code (route policy / wrong peer / unknown). + expect(browserWs.sentStrings.some((raw) => 'code' in JSON.parse(raw))).toBe(false); + }); + + it('singlecasts p2p.config.save_response to the requesting browser only', async () => { + // PR-E: SAVE_RESPONSE flows through the generic singlecast_response + // handler — only the browser that registered the requestId receives it. + const bridge = WsBridge.get(serverId); + const daemonWs = new MockWs(); + bridge.handleDaemonConnection(daemonWs as never, makeDb('valid-hash'), {} as never); + daemonWs.emit('message', JSON.stringify({ type: 'auth', serverId, token: 't' })); + await flushAsync(); + + const browser1 = new MockWs(); + const browser2 = new MockWs(); + bridge.handleBrowserConnection(browser1 as never, 'user-1', makeDb('valid-hash')); + bridge.handleBrowserConnection(browser2 as never, 'user-2', makeDb('valid-hash')); + + browser1.emit('message', JSON.stringify({ + type: P2P_CONFIG_MSG.SAVE, + requestId: 'p2p-config-save-singlecast', + scopeSession: 'deck_demo_brain', + config: { participants: [] }, + })); + await flushAsync(); + browser1.sent.length = 0; + browser2.sent.length = 0; + + daemonWs.emit('message', JSON.stringify({ + type: P2P_CONFIG_MSG.SAVE_RESPONSE, + requestId: 'p2p-config-save-singlecast', + scopeSession: 'deck_demo_brain', + ok: true, + })); + await flushAsync(); + + expect(browser1.sentStrings).toHaveLength(1); + expect(browser2.sentStrings).toHaveLength(0); + expect(JSON.parse(browser1.sentStrings[0])).toMatchObject({ + type: P2P_CONFIG_MSG.SAVE_RESPONSE, + requestId: 'p2p-config-save-singlecast', + ok: true, + }); + }); + + it('keeps unknown p2p.* messages dropped after registering p2p.config.*', async () => { + // Default-deny safeguard: registering p2p.config.* must NOT widen the + // bridge to forward arbitrary p2p.* messages. Any unregistered p2p.* + // type from the daemon still drops, no broadcast. + const { daemonWs, browserWs } = await setupAuthBridge(); + browserWs.sent.length = 0; + + daemonWs.emit('message', JSON.stringify({ + type: 'p2p.future_secret', + rawPrompt: 'do not leak', + })); + daemonWs.emit('message', JSON.stringify({ + type: 'p2p.config.future_secret', + scopeSession: 'deck_demo_brain', + ok: true, + })); + await flushAsync(); + + expect(browserWs.sentStrings).toHaveLength(0); + }); }); describe('push notifications', () => { diff --git a/server/test/p2p-workflow-sanitize.test.ts b/server/test/p2p-workflow-sanitize.test.ts new file mode 100644 index 000000000..08d256c79 --- /dev/null +++ b/server/test/p2p-workflow-sanitize.test.ts @@ -0,0 +1,428 @@ +import { describe, expect, it } from 'vitest'; +import { + P2P_SANITIZE_MAX_ARRAY_ITEMS, + P2P_SANITIZE_MAX_STRING_BYTES, + P2P_WORKFLOW_PROJECTION_VERSION, +} from '../../shared/p2p-workflow-constants.js'; +import { validateP2pWorkflowStatusProjection } from '../../shared/p2p-workflow-validators.js'; +import { + sanitizeLegacyP2pProgressSnapshot, + sanitizeP2pOrchestrationRunForBridge, + sanitizeP2pRunForPersistAndBroadcast, + sanitizeP2pRunUpdateForBroadcast, + sanitizeP2pWorkflowStatusProjection, + sanitizeServerSideDiagnostic, + sanitizeWorkflowDiagnosticForRetention, +} from '../src/p2p-workflow-sanitize.js'; + +describe('p2p workflow server sanitizer', () => { + it('constructs an allowlisted projection and persisted snapshot', () => { + const run = sanitizeP2pOrchestrationRunForBridge({ + id: 'run-1', + discussion_id: 'disc-1', + server_id: 'wrong-server', + mode_key: 'audit', + status: 'running', + compiledWorkflow: { secret: true }, + rawPrompt: 'do not persist', + env: { API_KEY: 'secret' }, + diagnostics: [{ code: 'private_projection_field_dropped', summary: 'existing' }], + }, { serverId: 'server-1' }); + + expect(run.id).toBe('run-1'); + expect(run.server_id).toBe('server-1'); + expect(run.workflow_projection.diagnostics.map((diagnostic) => diagnostic.code)).toContain('private_projection_field_dropped'); + expect(run.progress_snapshot).not.toContain('compiledWorkflow'); + expect(run.progress_snapshot).not.toContain('rawPrompt'); + expect(run.progress_snapshot).not.toContain('API_KEY'); + }); + + it('drops malicious and private keys from browser run_update while preserving safe legacy fields', () => { + const poisoned = JSON.parse('{"id":"run-2","status":"running","mode_key":"audit","active_phase":"hop","hop_counts":{"completed":1},"nested":{"constructor":{"polluted":true}},"token":"secret"}'); + const run = sanitizeP2pRunUpdateForBroadcast(poisoned, { serverId: 'server-1' }); + + expect(run.active_phase).toBe('hop'); + expect(run.hop_counts).toEqual({ completed: 1 }); + expect('token' in run).toBe(false); + expect('nested' in run).toBe(false); + expect(Object.prototype).not.toHaveProperty('polluted'); + }); + + it('normalizes malformed status projection safely', () => { + const projection = sanitizeP2pWorkflowStatusProjection({ + runId: 'run-3', + workflowId: 'wf-1', + status: 'not-a-status', + capabilitySnapshot: { daemonId: 'daemon-1', helloEpoch: 2, sentAt: 3, capabilities: ['p2p.workflow.v1'] }, + }); + + expect(projection.status).toBe('failed'); + expect(projection.capabilitySnapshot?.daemonId).toBe('daemon-1'); + }); + + it('bounds oversized broadcast payloads and records a sanitize diagnostic', () => { + const run = sanitizeP2pRunUpdateForBroadcast({ + id: 'run-oversized', + status: 'running', + mode_key: 'audit', + active_phase: 'x'.repeat(P2P_SANITIZE_MAX_STRING_BYTES + 100), + routing_history: Array.from({ length: P2P_SANITIZE_MAX_ARRAY_ITEMS + 10 }, (_, index) => ({ + step: index, + nested: { value: 'y'.repeat(P2P_SANITIZE_MAX_STRING_BYTES + 100) }, + })), + hop_states: Array.from({ length: P2P_SANITIZE_MAX_ARRAY_ITEMS + 10 }, (_, index) => ({ + session: 's'.repeat(P2P_SANITIZE_MAX_STRING_BYTES + 100), + hop_index: index, + })), + }, { serverId: 'server-1' }); + + expect(run.active_phase.length).toBeLessThanOrEqual(P2P_SANITIZE_MAX_STRING_BYTES); + expect(Array.isArray(run.routing_history)).toBe(true); + expect((run.routing_history as unknown[]).length).toBeLessThanOrEqual(P2P_SANITIZE_MAX_ARRAY_ITEMS); + expect(run.hop_states?.length).toBeLessThanOrEqual(P2P_SANITIZE_MAX_ARRAY_ITEMS); + expect(String(run.hop_states?.[0]?.session).length).toBeLessThanOrEqual(P2P_SANITIZE_MAX_STRING_BYTES); + expect(run.workflow_projection.diagnostics).toEqual(expect.arrayContaining([ + expect.objectContaining({ + code: 'private_projection_field_dropped', + phase: 'sanitize', + summary: 'Sanitized oversized workflow payload', + }), + ])); + }); +}); + +describe('sanitizeLegacyP2pProgressSnapshot (read-time legacy sanitizer)', () => { + it('returns an already-valid persisted snapshot unchanged with no diagnostic', () => { + const validSnapshot = { + projectionVersion: P2P_WORKFLOW_PROJECTION_VERSION, + runId: 'run-valid', + workflowId: 'wf-valid', + status: 'completed' as const, + currentNodeId: 'node-1', + completedNodeIds: ['node-0', 'node-1'], + diagnostics: [], + updatedAt: '2026-01-01T00:00:00.000Z', + }; + const result = sanitizeLegacyP2pProgressSnapshot(JSON.stringify(validSnapshot)); + + expect(result.diagnostic).toBeNull(); + expect(result.projection.runId).toBe('run-valid'); + expect(result.projection.workflowId).toBe('wf-valid'); + expect(result.projection.status).toBe('completed'); + expect(result.projection.currentNodeId).toBe('node-1'); + expect(result.projection.completedNodeIds).toEqual(['node-0', 'node-1']); + expect(result.projection.diagnostics).toEqual([]); + expect(result.projection.updatedAt).toBe('2026-01-01T00:00:00.000Z'); + expect(result.snapshot.runId).toBe('run-valid'); + expect(result.snapshot.workflowId).toBe('wf-valid'); + const snapshotKeys = Object.keys(result.snapshot); + expect(snapshotKeys).not.toContain('capabilitySnapshot'); + expect(snapshotKeys).not.toContain('artifactSummaries'); + expect(snapshotKeys).not.toContain('nodeSummaries'); + const validation = validateP2pWorkflowStatusProjection(result.projection); + expect(validation.ok).toBe(true); + }); + + it('strips compiledWorkflow / rawPrompt / scriptRawOutputs from a legacy snapshot and emits the diagnostic', () => { + const legacy = { + runId: 'run-legacy', + workflowId: 'audit', + status: 'running', + currentNodeId: 'node-x', + completedNodeIds: ['node-a'], + diagnostics: [], + updatedAt: '2025-06-01T00:00:00.000Z', + compiledWorkflow: { secret: true, dangerousField: 'do-not-leak' }, + rawPrompt: 'system prompt that must never be persisted', + scriptRawOutputs: ['stdout content with secret-token'], + artifactBaselines: [{ path: 'src/x', sha256: 'aaaa' }], + env: { OPENAI_API_KEY: 'sk-leak' }, + token: 'oauth-secret', + }; + const result = sanitizeLegacyP2pProgressSnapshot(JSON.stringify(legacy)); + + expect(result.diagnostic).not.toBeNull(); + expect(result.diagnostic?.code).toBe('legacy_progress_snapshot_sanitized'); + expect(result.diagnostic?.phase).toBe('sanitize'); + expect(result.projection.diagnostics.map((d) => d.code)) + .toContain('legacy_progress_snapshot_sanitized'); + + const projectionJson = JSON.stringify(result.projection); + const snapshotJson = JSON.stringify(result.snapshot); + for (const json of [projectionJson, snapshotJson]) { + expect(json).not.toContain('compiledWorkflow'); + expect(json).not.toContain('rawPrompt'); + expect(json).not.toContain('scriptRawOutputs'); + expect(json).not.toContain('artifactBaselines'); + expect(json).not.toContain('OPENAI_API_KEY'); + expect(json).not.toContain('sk-leak'); + expect(json).not.toContain('oauth-secret'); + expect(json).not.toContain('do-not-leak'); + } + + expect(result.projection.runId).toBe('run-legacy'); + expect(result.projection.workflowId).toBe('audit'); + expect(result.projection.status).toBe('running'); + + const validation = validateP2pWorkflowStatusProjection(result.projection); + expect(validation.ok).toBe(true); + }); + + it('returns a safe empty projection plus the sanitized diagnostic on malformed JSON', () => { + const result = sanitizeLegacyP2pProgressSnapshot('{not-json'); + + expect(result.diagnostic?.code).toBe('legacy_progress_snapshot_sanitized'); + expect(result.projection.runId).toBe('unknown'); + expect(result.projection.workflowId).toBe('legacy'); + expect(result.projection.status).toBe('stale'); + expect(result.projection.completedNodeIds).toEqual([]); + expect(result.projection.diagnostics.map((d) => d.code)) + .toContain('legacy_progress_snapshot_sanitized'); + expect(result.projection.updatedAt).toMatch(/T/); + const validation = validateP2pWorkflowStatusProjection(result.projection); + expect(validation.ok).toBe(true); + }); + + it('also sanitizes the empty-string case as malformed input', () => { + const result = sanitizeLegacyP2pProgressSnapshot(''); + expect(result.diagnostic?.code).toBe('legacy_progress_snapshot_sanitized'); + expect(result.projection.status).toBe('stale'); + }); + + it('never lets __proto__ / constructor keys reach the output projection or snapshot', () => { + // Use raw JSON.parse so the malicious keys actually appear as own properties + // rather than being silently coerced by an object literal. + const poisoned = '{"runId":"run-p","workflowId":"audit","status":"running","completedNodeIds":[],"diagnostics":[],"updatedAt":"2026-01-01T00:00:00.000Z","__proto__":{"polluted":true},"constructor":{"polluted":true},"nested":{"__proto__":{"polluted":true}}}'; + const result = sanitizeLegacyP2pProgressSnapshot(poisoned); + + const projectionJson = JSON.stringify(result.projection); + const snapshotJson = JSON.stringify(result.snapshot); + expect(projectionJson).not.toContain('__proto__'); + expect(projectionJson).not.toContain('"constructor"'); + expect(projectionJson).not.toContain('polluted'); + expect(snapshotJson).not.toContain('__proto__'); + expect(snapshotJson).not.toContain('"constructor"'); + expect(snapshotJson).not.toContain('polluted'); + + // Object.prototype must remain pristine. + expect(({} as Record).polluted).toBeUndefined(); + expect(Object.prototype).not.toHaveProperty('polluted'); + + // Even the legacy-shaped poisoned payload should produce a valid projection. + const validation = validateP2pWorkflowStatusProjection(result.projection); + expect(validation.ok).toBe(true); + }); + + it("'{}' placeholder does not emit legacy_progress_snapshot_sanitized", () => { + // Migration `032_p2p_progress_snapshot.sql` defaults this column to '{}'. + // Newly created rows that have not yet emitted a projection MUST NOT be + // marked as legacy — that would pollute every fresh /p2p/runs response + // with a sanitize diagnostic. + const result = sanitizeLegacyP2pProgressSnapshot('{}'); + expect(result.diagnostic).toBeNull(); + expect(result.projection.diagnostics).toEqual([]); + // Empty placeholder maps to the canonical "no real status yet" projection. + expect(result.projection.status).toBe('stale'); + // Snapshot is still a valid persisted shape so consumers stay + // schema-compatible. + const snapshotJson = JSON.stringify(result.snapshot); + expect(snapshotJson).not.toContain('legacy_progress_snapshot_sanitized'); + expect(result.snapshot.projectionVersion).toBe(P2P_WORKFLOW_PROJECTION_VERSION); + }); + + it('legacy diagnostic uses real runId from row context when provided', () => { + // Clearly-legacy payload (`compiledWorkflow` is on the forbidden list) so + // the function falls into the legacy-projection branch. Without context + // the diagnostic would say `runId: 'unknown'`; with context it MUST + // surface the originating row id so audits can trace it back. + const legacyJson = JSON.stringify({ + compiledWorkflow: { secret: true }, + status: 'failed', + }); + const result = sanitizeLegacyP2pProgressSnapshot(legacyJson, { + runId: 'real-row-id', + workflowId: 'discussion-x', + }); + expect(result.diagnostic).not.toBeNull(); + expect(result.diagnostic?.code).toBe('legacy_progress_snapshot_sanitized'); + expect(result.diagnostic?.runId).toBe('real-row-id'); + expect(result.projection.runId).toBe('real-row-id'); + expect(result.projection.workflowId).toBe('discussion-x'); + }); +}); + +describe('sanitizeWorkflowDiagnosticForRetention / sanitizeServerSideDiagnostic', () => { + it('preserves known workflow diagnostics in live projection', () => { + // The daemon emits parse/compile/bind/execute-phase diagnostics. The + // bridge MUST surface every code in P2P_WORKFLOW_DIAGNOSTIC_CODES, not + // just the two server-side sanitize codes. + const projection = sanitizeP2pWorkflowStatusProjection({ + runId: 'run-known', + workflowId: 'audit', + status: 'running', + diagnostics: [ + { code: 'daemon_busy', phase: 'bind', severity: 'error', messageKey: 'should-be-ignored', summary: 'busy' }, + { code: 'missing_required_capability', phase: 'execute', severity: 'error', summary: 'missing cap' }, + { code: 'loop_budget_exhausted', phase: 'execute', severity: 'error' }, + { code: 'script_machine_output_invalid', phase: 'execute', severity: 'warning' }, + ], + }); + + const codes = projection.diagnostics.map((d) => d.code); + expect(codes).toEqual(expect.arrayContaining([ + 'daemon_busy', + 'missing_required_capability', + 'loop_budget_exhausted', + 'script_machine_output_invalid', + ])); + // messageKey must be RECOMPUTED from the code, never trusted from raw input. + const daemonBusy = projection.diagnostics.find((d) => d.code === 'daemon_busy'); + expect(daemonBusy?.messageKey).toBe('p2p.workflow.diagnostics.daemon_busy'); + expect(daemonBusy?.summary).toBe('busy'); + expect(daemonBusy?.phase).toBe('bind'); + // Severity preserved from raw input when valid. + expect(daemonBusy?.severity).toBe('error'); + }); + + it('preserves valid persisted snapshot diagnostics on read', () => { + // Round-trip an already-valid persisted snapshot that contains + // daemon_busy. The valid-snapshot branch must NOT add a + // legacy_progress_snapshot_sanitized noise diagnostic, and it must + // preserve the workflow diagnostic intact. + const validSnapshot = { + projectionVersion: P2P_WORKFLOW_PROJECTION_VERSION, + runId: 'run-persisted', + workflowId: 'audit', + status: 'running' as const, + currentNodeId: 'node-x', + completedNodeIds: ['node-0'], + diagnostics: [ + { code: 'daemon_busy', phase: 'bind', severity: 'error', messageKey: 'p2p.workflow.diagnostics.daemon_busy', summary: 'busy' }, + ], + updatedAt: '2026-02-01T00:00:00.000Z', + }; + const result = sanitizeLegacyP2pProgressSnapshot(JSON.stringify(validSnapshot)); + + expect(result.diagnostic).toBeNull(); + const codes = result.projection.diagnostics.map((d) => d.code); + expect(codes).toContain('daemon_busy'); + expect(codes).not.toContain('legacy_progress_snapshot_sanitized'); + const preserved = result.projection.diagnostics.find((d) => d.code === 'daemon_busy'); + expect(preserved?.messageKey).toBe('p2p.workflow.diagnostics.daemon_busy'); + expect(preserved?.summary).toBe('busy'); + }); + + it('drops unknown diagnostic codes but keeps known sanitize diagnostics', () => { + const projection = sanitizeP2pWorkflowStatusProjection({ + runId: 'run-mixed', + workflowId: 'audit', + status: 'running', + diagnostics: [ + { code: 'private_projection_field_dropped', phase: 'sanitize', summary: 'dropped one' }, + { code: 'totally_made_up_code', phase: 'execute', summary: 'should-be-dropped' }, + { code: 'forbidden_envelope_field', phase: 'parse', summary: 'forbidden' }, + { code: '', summary: 'empty code' }, + { code: 'daemon_busy', phase: 'bind' }, + ], + }); + + const codes = projection.diagnostics.map((d) => d.code); + expect(codes).toContain('private_projection_field_dropped'); + expect(codes).toContain('forbidden_envelope_field'); + expect(codes).toContain('daemon_busy'); + expect(codes).not.toContain('totally_made_up_code'); + }); + + it('preserves warning severity for lenient script diagnostics', () => { + const projection = sanitizeP2pWorkflowStatusProjection({ + runId: 'run-warning', + workflowId: 'audit', + status: 'running', + diagnostics: [ + { code: 'script_machine_output_invalid', phase: 'execute', severity: 'warning', summary: 'lenient parser warning' }, + ], + }); + + const warning = projection.diagnostics.find((d) => d.code === 'script_machine_output_invalid'); + expect(warning).toBeDefined(); + expect(warning?.severity).toBe('warning'); + }); + + it('sanitizeServerSideDiagnostic still rejects non-sanitize codes', () => { + // sanitizeServerSideDiagnostic is used ONLY for server-emitted sanitize + // events. It must reject daemon-origin codes like daemon_busy. + const accepted = sanitizeServerSideDiagnostic( + { code: 'private_projection_field_dropped', summary: 'ok' }, + 'run-x', + ); + expect(accepted?.code).toBe('private_projection_field_dropped'); + expect(accepted?.runId).toBe('run-x'); + + const rejected = sanitizeServerSideDiagnostic({ code: 'daemon_busy', summary: 'no' }, 'run-x'); + expect(rejected).toBeNull(); + }); + + it('sanitizeWorkflowDiagnosticForRetention bounds string fields', () => { + const oversized = 'x'.repeat(P2P_SANITIZE_MAX_STRING_BYTES + 100); + const diag = sanitizeWorkflowDiagnosticForRetention({ + code: 'daemon_busy', + phase: 'bind', + severity: 'error', + summary: oversized, + fieldPath: oversized, + nodeId: oversized, + runId: oversized, + }); + expect(diag).not.toBeNull(); + expect(diag!.summary!.length).toBeLessThanOrEqual(P2P_SANITIZE_MAX_STRING_BYTES); + expect(diag!.fieldPath!.length).toBeLessThanOrEqual(P2P_SANITIZE_MAX_STRING_BYTES); + expect(diag!.nodeId!.length).toBeLessThanOrEqual(P2P_SANITIZE_MAX_STRING_BYTES); + expect(diag!.runId!.length).toBeLessThanOrEqual(P2P_SANITIZE_MAX_STRING_BYTES); + }); +}); + +describe('sanitizeP2pRunForPersistAndBroadcast — DB ↔ broadcast diagnostic parity', () => { + it('produces identical diagnostic code sets between DB upsert and broadcast', () => { + const oversized = 'x'.repeat(P2P_SANITIZE_MAX_STRING_BYTES + 100); + const { persisted, broadcast } = sanitizeP2pRunForPersistAndBroadcast({ + id: 'run-parity', + discussion_id: 'disc-1', + mode_key: 'audit', + status: 'running', + diagnostics: [ + { code: 'daemon_busy', phase: 'bind', severity: 'error', summary: 'busy' }, + { code: 'missing_required_capability', phase: 'execute' }, + ], + // Triggers truncation diagnostic via legacyPayloadTruncated path. + routing_history: Array.from({ length: P2P_SANITIZE_MAX_ARRAY_ITEMS + 10 }, (_, idx) => ({ + step: idx, + nested: { value: oversized }, + })), + }, { serverId: 'server-1' }); + + const persistedCodes = [...persisted.workflow_projection.diagnostics.map((d) => d.code)].sort(); + const broadcastCodes = [...broadcast.workflow_projection.diagnostics.map((d) => d.code)].sort(); + expect(broadcastCodes).toEqual(persistedCodes); + // Specifically include the daemon-emitted code AND the truncation code. + expect(persistedCodes).toContain('daemon_busy'); + expect(persistedCodes).toContain('missing_required_capability'); + expect(persistedCodes).toContain('private_projection_field_dropped'); + + // The serialized DB column must reflect the same diagnostics so that + // subsequent /p2p/runs reads see the same set. + const persistedSnap = JSON.parse(persisted.progress_snapshot) as { diagnostics: Array<{ code: string }> }; + const persistedSnapCodes = persistedSnap.diagnostics.map((d) => d.code).sort(); + expect(persistedSnapCodes).toEqual(persistedCodes); + }); + + it('shares the same projection object reference between persisted and broadcast', () => { + const { persisted, broadcast } = sanitizeP2pRunForPersistAndBroadcast({ + id: 'run-share', + mode_key: 'audit', + status: 'running', + diagnostics: [{ code: 'daemon_busy', phase: 'bind' }], + }, { serverId: 'server-1' }); + + expect(persisted.workflow_projection).toBe(broadcast.workflow_projection); + }); +}); diff --git a/shared/cron-types.ts b/shared/cron-types.ts index 9fcc53108..291e66557 100644 --- a/shared/cron-types.ts +++ b/shared/cron-types.ts @@ -21,6 +21,25 @@ export interface CronP2pAction { /** Discriminated participant list — supports both roles and direct session names. */ participantEntries?: CronParticipant[]; rounds?: number; + /** + * Audit:R3 hardening / task 10.2 — when present, the cron dispatcher routes + * this job through the daemon's advanced-workflow envelope path + * (`prepareAdvancedWorkflowLaunch`) instead of the legacy `startP2pRun` + * fallback. Carries the same shape as web-side + * `p2pWorkflowLaunchEnvelope`. Stored in DB as JSON; daemon validates + + * compiles + binds at dispatch time. v1a compatibility: legacy cron rows + * without this field continue to use the direct legacy path. + */ + workflowLaunchEnvelope?: Record; + /** + * Bounded retry budget for `daemon_busy` — `dispatchAttempts` total tries + * (default 3), `retryDelayMs` between each. After exhaustion the cron run + * is marked failed with a stable diagnostic. Task 10.3. + */ + daemonBusyRetry?: { + attempts: number; + delayMs: number; + }; } export type CronAction = CronCommandAction | CronP2pAction; diff --git a/shared/p2p-advanced.ts b/shared/p2p-advanced.ts index 1f8543c5e..e645ab704 100644 --- a/shared/p2p-advanced.ts +++ b/shared/p2p-advanced.ts @@ -1,4 +1,15 @@ import { isTransportSessionAgentType } from './agent-types.js'; +import type { P2pNodeKind } from './p2p-workflow-constants.js'; +// `p2p-workflow-types.ts` imports `P2pAdvancedRound` from this file. To avoid +// a typed import cycle while still preserving structural information on the +// adapter carriers, we use type-only imports for the compiled-node shapes. +// TypeScript resolves type-only cycles cleanly because nothing is emitted +// at runtime. +import type { + P2pRoutingAuthority, + P2pScriptNodeContract, +} from './p2p-workflow-types.js'; +import type { P2pWorkflowDiagnostic } from './p2p-workflow-diagnostics.js'; const LEGACY_MODE_KEYS = new Set(['audit', 'review', 'plan', 'brainstorm', 'discuss']); const COMBO_SEPARATOR = '>'; @@ -43,6 +54,18 @@ export interface P2pAdvancedRound { promptAppend?: string; verdictPolicy?: P2pRoundVerdictPolicy; jumpRule?: P2pAdvancedJumpRule; + /** + * R3 PR-α (A1 / W3): the legacy `P2pAdvancedRound` model previously dropped + * envelope-only fields when adapting `P2pCompiledNode` → legacy round shape. + * Adding optional carriers preserves the compiled node semantics so + * orchestrator dispatch / dangerous-node recheck / artifact judging can read + * authoritative values without a sidecar `bound.compiled.nodes.find(...)` + * lookup. All fields are OPTIONAL to keep oldAdvanced fixtures unchanged. + */ + nodeKind?: P2pNodeKind; + script?: P2pScriptNodeContract; + routingAuthority?: P2pRoutingAuthority; + artifactConvention?: 'none' | 'explicit' | 'openspec_convention'; } export interface P2pParticipantSnapshotEntry { @@ -57,13 +80,23 @@ export interface P2pHelperDiagnostic { | 'P2P_HELPER_FALLBACK_FAILED' | 'P2P_HELPER_CLEANUP_FAILED' | 'P2P_COMPRESSION_SKIPPED_NO_FALLBACK' - | 'P2P_VERDICT_MISSING'; + | 'P2P_VERDICT_MISSING' + | 'P2P_DANGEROUS_NODE_RECHECK_FAILED' + | 'P2P_DISCUSSION_WRITE_FAILED' + | 'P2P_SCRIPT_SLOT_EXHAUSTED'; attempt: number; sourceSession?: string | null; templateSession?: string | null; fallbackSession?: string | null; timestamp: number; message?: string; + /** + * R3 PR-α (B5 / D-O4) — preserve the structured workflow diagnostic so the + * 32-code closed enum survives the helper-diagnostic path. Web i18n, + * monitoring, and reverse-regression assertions can match on + * `workflowDiagnostic.code` instead of parsing free-form messages. + */ + workflowDiagnostic?: P2pWorkflowDiagnostic; } export interface P2pResolvedRound { @@ -87,6 +120,17 @@ export interface P2pResolvedRound { allowRouting: boolean; artifactOutputs: string[]; artifactConvention: 'none' | 'explicit' | 'openspec_convention'; + /** + * R3 PR-α (A1 / W3 / A4) — compiled-node fields propagated from + * `P2pAdvancedRound` after adapter widening. `nodeKind` enables + * `isRoundDangerous` to recognise script nodes regardless of permission + * scope; `script` lets `dispatchScriptRoundOrFail` read its contract + * without a sidecar `bound.compiled.nodes.find(...)` lookup; + * `routingAuthority` is plumbed for PR-β (envelope_compiled executor). + */ + nodeKind?: P2pNodeKind; + script?: P2pScriptNodeContract; + routingAuthority?: P2pRoutingAuthority; } export interface ResolveP2pRoundPlanOptions { @@ -244,9 +288,16 @@ function defaultArtifactConvention(round: P2pAdvancedRound): 'none' | 'explicit' function normalizeAdvancedRound(round: P2pAdvancedRound): P2pResolvedRound { const verdictPolicy = round.verdictPolicy ?? 'none'; - const artifactConvention = defaultArtifactConvention(round); + // R3 PR-α (W3) — when the adapter preserves `artifactConvention` from a + // compiled node, prefer the carrier value over the preset-inferred default. + // This keeps user-declared `'explicit_paths'` artifacts from being silently + // upgraded to `'openspec_convention'` because the round preset happens to + // be `openspec_propose`. + const artifactConvention = round.artifactConvention ?? defaultArtifactConvention(round); const artifactOutputs = artifactConvention === 'openspec_convention' - ? ['openspec/changes'] + ? (round.artifactOutputs && round.artifactOutputs.length > 0 + ? [...round.artifactOutputs] + : ['openspec/changes']) : [...(round.artifactOutputs ?? [])]; const synthesisStyle: P2pSynthesisStyle = round.executionMode === 'multi_dispatch' ? 'initiator_summary' : 'none'; const requiresVerdict = verdictPolicy !== 'none'; @@ -275,6 +326,12 @@ function normalizeAdvancedRound(round: P2pAdvancedRound): P2pResolvedRound { allowRouting, artifactOutputs, artifactConvention, + // R3 PR-α (A1 / A4) — propagate the compiled-node carrier so executor / + // dangerous-node recheck can read authoritative values without sidecar + // lookups against `bound.compiled.nodes`. + ...(round.nodeKind ? { nodeKind: round.nodeKind } : {}), + ...(round.script ? { script: round.script } : {}), + ...(round.routingAuthority ? { routingAuthority: round.routingAuthority } : {}), }; } diff --git a/shared/p2p-modes.ts b/shared/p2p-modes.ts index 2a3e2ef42..c5904dc65 100644 --- a/shared/p2p-modes.ts +++ b/shared/p2p-modes.ts @@ -1,5 +1,6 @@ /** P2P Quick Discussion mode configuration. */ import type { P2pAdvancedPresetKey, P2pAdvancedRound, P2pContextReducerConfig } from './p2p-advanced.js'; +import type { P2pWorkflowDraft, P2pWorkflowLaunchEnvelope } from './p2p-workflow-types.js'; /** The "config" meta-mode — each session uses its own saved default mode. */ export const P2P_CONFIG_MODE = 'config' as const; @@ -29,6 +30,22 @@ export interface P2pSavedConfig { advancedRunTimeoutMinutes?: number; /** Optional context compression/helper config for advanced workflows. */ contextReducer?: P2pContextReducerConfig; + /** Versioned advanced workflow draft for smart P2P workflow v1+. */ + workflowDraft?: P2pWorkflowDraft; + /** Optional saved launch envelope for scheduled/supervised advanced workflow launch. */ + workflowLaunchEnvelope?: P2pWorkflowLaunchEnvelope; + /** + * R3 PR-α follow-up — UI-managed allowlist of executable absolute paths + * (or `PATH`-relative basenames) that script nodes in this config's + * advanced workflow are permitted to spawn. Maintained in + * `P2pConfigPanel` → "Allowed executables" and round-tripped through + * the launch envelope (`P2pWorkflowLaunchEnvelope.allowedExecutables`). + * + * Empty list means script bind rejects every executable with + * `script_executable_denied`. Per-entry constraints (visible-ASCII, + * ≤256 bytes, ≤64 entries) live in `validateP2pWorkflowLaunchEnvelope`. + */ + allowedExecutables?: string[]; } @@ -56,6 +73,8 @@ export function isP2pSavedConfig(value: unknown): value is P2pSavedConfig { advancedRounds?: unknown; advancedRunTimeoutMinutes?: unknown; contextReducer?: unknown; + workflowDraft?: unknown; + workflowLaunchEnvelope?: unknown; }; if (!record.sessions || typeof record.sessions !== 'object' || Array.isArray(record.sessions)) return false; if (typeof record.rounds !== 'number' || !Number.isFinite(record.rounds)) return false; @@ -66,6 +85,16 @@ export function isP2pSavedConfig(value: unknown): value is P2pSavedConfig { if (record.advancedRounds != null && !Array.isArray(record.advancedRounds)) return false; if (record.advancedRunTimeoutMinutes != null && (typeof record.advancedRunTimeoutMinutes !== 'number' || !Number.isFinite(record.advancedRunTimeoutMinutes))) return false; if (record.contextReducer != null && typeof record.contextReducer !== 'object') return false; + if (record.workflowDraft != null && (typeof record.workflowDraft !== 'object' || Array.isArray(record.workflowDraft))) return false; + if (record.workflowLaunchEnvelope != null && (typeof record.workflowLaunchEnvelope !== 'object' || Array.isArray(record.workflowLaunchEnvelope))) return false; + // R3 PR-α follow-up — UI-managed allowedExecutables. We perform only a + // shape check here; per-entry validation lives in + // `validateP2pWorkflowLaunchEnvelope` so the same rules apply on launch. + const allowedRaw = (record as { allowedExecutables?: unknown }).allowedExecutables; + if (allowedRaw != null) { + if (!Array.isArray(allowedRaw)) return false; + if (allowedRaw.some((entry) => typeof entry !== 'string')) return false; + } return Object.values(record.sessions as Record).every(isP2pSessionEntry); } diff --git a/shared/p2p-workflow-artifacts.ts b/shared/p2p-workflow-artifacts.ts new file mode 100644 index 000000000..6e5122cf8 --- /dev/null +++ b/shared/p2p-workflow-artifacts.ts @@ -0,0 +1,186 @@ +import { createHash } from 'node:crypto'; + +import { + P2P_WORKFLOW_ARTIFACT_MAX_DEPTH, + P2P_WORKFLOW_ARTIFACT_MAX_FILE_BYTES, + P2P_WORKFLOW_ARTIFACT_MAX_FILES, + P2P_WORKFLOW_ARTIFACT_MAX_TOTAL_BYTES, +} from './p2p-workflow-constants.js'; +import { makeP2pWorkflowDiagnostic, type P2pWorkflowDiagnostic } from './p2p-workflow-diagnostics.js'; +import { canonicalize, stableStringify } from './p2p-workflow-policy.js'; +import type { P2pJsonValue } from './p2p-workflow-types.js'; + +export type P2pArtifactPathValidationResult = + | { ok: true; path: string; diagnostics: P2pWorkflowDiagnostic[] } + | { ok: false; diagnostics: P2pWorkflowDiagnostic[] }; + +export interface P2pArtifactFileBaseline { + path: string; + sha256: string; + sizeBytes?: number; + fileType?: P2pArtifactFileType; + metadata?: Record; +} + +export type P2pArtifactFileType = 'file' | 'directory' | 'symlink' | 'other'; + +export interface P2pArtifactBaselineHashInput { + files: P2pArtifactFileBaseline[]; +} + +export type P2pArtifactBaselineValidationResult = + | { ok: true; baseline: P2pArtifactBaselineHashInput; diagnostics: P2pWorkflowDiagnostic[] } + | { ok: false; diagnostics: P2pWorkflowDiagnostic[] }; + +export function validateP2pArtifactRelativePath(input: unknown, fieldPath = 'artifact.path'): P2pArtifactPathValidationResult { + if (typeof input !== 'string') { + return { + ok: false, + diagnostics: [makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'compile', { fieldPath, summary: 'Artifact path must be a string.' })], + }; + } + if (!isP2pArtifactRelativePath(input)) { + return { + ok: false, + diagnostics: [makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'compile', { fieldPath })], + }; + } + return { ok: true, path: input, diagnostics: [] }; +} + +export function isP2pArtifactRelativePath(path: string): boolean { + if (path === '' || path.includes('\0')) return false; + if (path.startsWith('/') || path.startsWith('~') || path.includes('\\')) return false; + if (/^[a-zA-Z]:/.test(path) || path.startsWith('//')) return false; + return path.split('/').every((segment) => segment !== '' && segment !== '.' && segment !== '..'); +} + +export function getP2pArtifactPathDepth(path: string): number { + return path.split('/').filter(Boolean).length; +} + +export function validateP2pArtifactBaseline(input: unknown): P2pArtifactBaselineValidationResult { + if (!isRecord(input) || !Array.isArray(input.files)) { + return invalidArtifactBaseline('artifactBaseline.files'); + } + + const diagnostics: P2pWorkflowDiagnostic[] = []; + const files: P2pArtifactFileBaseline[] = []; + let totalBytes = 0; + const seen = new Set(); + + if (input.files.length > P2P_WORKFLOW_ARTIFACT_MAX_FILES) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'bind', { + fieldPath: 'artifactBaseline.files', + summary: `Artifact baseline exceeds file cap (${input.files.length}/${P2P_WORKFLOW_ARTIFACT_MAX_FILES}).`, + })); + } + + for (const [index, rawFile] of input.files.entries()) { + const fieldPath = `artifactBaseline.files[${index}]`; + if (!isRecord(rawFile)) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_mismatch', 'execute', { fieldPath })); + continue; + } + + const path = rawFile.path; + const sha256 = rawFile.sha256; + const sizeBytes = rawFile.sizeBytes; + const fileType = rawFile.fileType; + + const validPath = typeof path === 'string' && isP2pArtifactRelativePath(path); + if (!validPath) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { fieldPath: `${fieldPath}.path` })); + } else if (getP2pArtifactPathDepth(path) > P2P_WORKFLOW_ARTIFACT_MAX_DEPTH) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { + fieldPath: `${fieldPath}.path`, + summary: `Artifact path exceeds depth cap (${P2P_WORKFLOW_ARTIFACT_MAX_DEPTH}).`, + })); + } + if (validPath && seen.has(path)) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_mismatch', 'execute', { + fieldPath: `${fieldPath}.path`, + summary: 'Duplicate artifact baseline path.', + })); + } + if (validPath) seen.add(path); + + if (typeof sha256 !== 'string' || !/^[a-f0-9]{64}$/i.test(sha256)) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_mismatch', 'execute', { fieldPath: `${fieldPath}.sha256` })); + } + if (!Number.isInteger(sizeBytes) || (sizeBytes as number) < 0) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_mismatch', 'execute', { fieldPath: `${fieldPath}.sizeBytes` })); + } else { + if ((sizeBytes as number) > P2P_WORKFLOW_ARTIFACT_MAX_FILE_BYTES) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'bind', { fieldPath: `${fieldPath}.sizeBytes` })); + } + totalBytes += sizeBytes as number; + } + if (!isP2pArtifactFileType(fileType)) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_mismatch', 'execute', { fieldPath: `${fieldPath}.fileType` })); + } + + if (!validPath) continue; + files.push({ + path: path as string, + sha256: typeof sha256 === 'string' ? sha256.toLowerCase() : '', + sizeBytes: typeof sizeBytes === 'number' ? sizeBytes : undefined, + fileType: isP2pArtifactFileType(fileType) ? fileType : undefined, + ...(isRecord(rawFile.metadata) ? { metadata: rawFile.metadata } : {}), + }); + } + + if (totalBytes > P2P_WORKFLOW_ARTIFACT_MAX_TOTAL_BYTES) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'bind', { + fieldPath: 'artifactBaseline.files', + summary: `Artifact baseline exceeds total byte cap (${totalBytes}/${P2P_WORKFLOW_ARTIFACT_MAX_TOTAL_BYTES}).`, + })); + } + + return diagnostics.some((diagnostic) => diagnostic.severity === 'error') + ? { ok: false, diagnostics } + : { ok: true, baseline: { files }, diagnostics }; +} + +export function hashP2pArtifactBaseline(input: P2pArtifactBaselineHashInput): string { + return `sha256:${sha256Hex(stableStringify(canonicalizeP2pArtifactBaseline(input)))}`; +} + +export function areP2pArtifactBaselinesEqual(left: P2pArtifactBaselineHashInput, right: P2pArtifactBaselineHashInput): boolean { + return hashP2pArtifactBaseline(left) === hashP2pArtifactBaseline(right); +} + +export function canonicalizeP2pArtifactBaseline(input: P2pArtifactBaselineHashInput): P2pJsonValue { + const files = input.files + .map((file) => ({ + path: file.path, + sha256: file.sha256, + sizeBytes: file.sizeBytes, + fileType: file.fileType, + metadata: canonicalizeArtifactMetadata(file.metadata), + })) + .sort((left, right) => left.path.localeCompare(right.path)); + return canonicalize({ files }); +} + +function canonicalizeArtifactMetadata(metadata: Record | undefined): P2pJsonValue { + if (!metadata) return {}; + const { capturedAt: _capturedAt, ...rest } = metadata; + return canonicalize(rest); +} + +function isRecord(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value); +} + +function isP2pArtifactFileType(value: unknown): value is P2pArtifactFileType { + return value === 'file' || value === 'directory' || value === 'symlink' || value === 'other'; +} + +function invalidArtifactBaseline(fieldPath: string): P2pArtifactBaselineValidationResult { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('artifact_baseline_mismatch', 'execute', { fieldPath })] }; +} + +function sha256Hex(input: string): string { + return createHash('sha256').update(input).digest('hex'); +} diff --git a/shared/p2p-workflow-compiler.ts b/shared/p2p-workflow-compiler.ts new file mode 100644 index 000000000..f63f2d653 --- /dev/null +++ b/shared/p2p-workflow-compiler.ts @@ -0,0 +1,277 @@ +import { + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_SCHEMA_VERSION, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_INTERPRETER_CAPABILITY_V1, +} from './p2p-workflow-constants.js'; +import { makeP2pWorkflowDiagnostic, type P2pWorkflowDiagnostic } from './p2p-workflow-diagnostics.js'; +import { hashP2pStaticPolicy, stableHash, stableStringify } from './p2p-workflow-policy.js'; +import type { + P2pCompiledNode, + P2pCompiledWorkflow, + P2pRoutingAuthority, + P2pStaticPolicy, + P2pWorkflowDraft, + P2pWorkflowEdgeDraft, + P2pWorkflowNodeDraft, +} from './p2p-workflow-types.js'; +import { validateP2pWorkflowDraft, validateP2pWorkflowVariables } from './p2p-workflow-validators.js'; +import { validateP2pLogicContract } from './p2p-workflow-logic-evaluator.js'; + +export type P2pWorkflowCompileResult = + | { ok: true; workflow: P2pCompiledWorkflow; diagnostics: P2pWorkflowDiagnostic[] } + | { ok: false; diagnostics: P2pWorkflowDiagnostic[] }; + +export function compileP2pWorkflowDraft(draft: P2pWorkflowDraft, staticPolicy: P2pStaticPolicy): P2pWorkflowCompileResult { + const diagnostics: P2pWorkflowDiagnostic[] = []; + const draftValidation = validateP2pWorkflowDraft(draft); + diagnostics.push(...draftValidation.diagnostics); + if (!draftValidation.ok) return { ok: false, diagnostics }; + + if (draft.nodes.length > staticPolicy.maxNodes || draft.edges.length > staticPolicy.maxEdges) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { summary: 'Workflow exceeds static policy size limits.' })); + } + + diagnostics.push(...validateGraphShape(draft)); + diagnostics.push(...validateP2pWorkflowVariables(draft.variables ?? [])); + diagnostics.push(...validateLoopBudgets(draft)); + if (diagnostics.some((diagnostic) => diagnostic.severity === 'error')) { + return { ok: false, diagnostics }; + } + + const nodes = [...draft.nodes] + .sort((left, right) => left.id.localeCompare(right.id)) + .map((node) => compileNode(node, draft.edges)); + const edges = [...draft.edges].sort((left, right) => left.id.localeCompare(right.id)); + const variables = [...(draft.variables ?? [])].sort((left, right) => left.name.localeCompare(right.name)); + const staticPolicyHash = hashP2pStaticPolicy(staticPolicy); + const derivedRequiredCapabilities = deriveRequiredCapabilities(nodes); + const rootNodeId = draft.rootNodeId ?? findRootNodeId(draft)!; + const contractInput = { + schemaVersion: P2P_WORKFLOW_SCHEMA_VERSION, + workflowId: draft.id, + rootNodeId, + nodes, + edges, + variables, + loopBudgets: sortedRecord(draft.loopBudgets ?? {}), + derivedRequiredCapabilities, + staticPolicyHash, + }; + const workflow: P2pCompiledWorkflow = { + ...contractInput, + diagnostics: [], + workflowContractHash: stableHash(stableStringify(contractInput)), + }; + return { ok: true, workflow, diagnostics }; +} + +function compileNode(node: P2pWorkflowNodeDraft, edges: P2pWorkflowEdgeDraft[]): P2pCompiledNode { + return { + id: node.id, + ...(node.title ? { title: node.title } : {}), + nodeKind: node.nodeKind, + preset: node.preset, + ...(node.dispatchStyle ? { dispatchStyle: node.dispatchStyle } : {}), + permissionScope: node.permissionScope ?? 'analysis_only', + ...(node.promptAppend ? { promptAppend: node.promptAppend } : {}), + routingAuthority: node.routingAuthority ?? deriveRoutingAuthority(node, edges), + ...(node.script ? { script: node.script } : {}), + // R3 v1b follow-up — pass logic contract through unchanged so the + // executor can evaluate it against the run's variable state. + ...(node.logic ? { logic: node.logic } : {}), + artifacts: [...(node.artifacts ?? [])], + }; +} + +function deriveRoutingAuthority(node: P2pWorkflowNodeDraft, edges: P2pWorkflowEdgeDraft[]): P2pRoutingAuthority { + const conditionalEdges = edges.filter((edge) => edge.fromNodeId === node.id && edge.edgeKind === 'conditional'); + if (conditionalEdges.length === 0) return { kind: 'none' }; + if (node.nodeKind === 'script') { + return { + kind: 'script_routing_key', + allowedKeys: conditionalEdges.map((edge) => edge.condition?.equals).filter((value): value is string => !!value).sort(), + }; + } + if (node.nodeKind === 'logic') { + return { + kind: 'logic_marker', + allowedMarkers: conditionalEdges.map((edge) => edge.condition?.equals).filter((value): value is string => !!value).sort(), + }; + } + return { + kind: 'audit_verdict_marker', + allowedMarkers: conditionalEdges.map((edge) => edge.condition?.equals).filter((value): value is string => !!value).sort(), + }; +} + +function deriveRequiredCapabilities(nodes: P2pCompiledNode[]): string[] { + const capabilities = new Set([P2P_WORKFLOW_CAPABILITY_V1]); + // Audit:R3 PR-β / V-5 — script nodes always require argv capability; nodes + // with `commandKind: 'interpreter'` ADDITIONALLY require the interpreter + // capability. Spec `Interpreter script requires interpreter capability` + // scenario; daemon must advertise BOTH caps to bind such workflows. + if (nodes.some((node) => node.nodeKind === 'script')) capabilities.add(P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1); + if (nodes.some((node) => node.nodeKind === 'script' && node.script?.commandKind === 'interpreter')) { + capabilities.add(P2P_WORKFLOW_SCRIPT_INTERPRETER_CAPABILITY_V1); + } + if (nodes.some((node) => node.artifacts.some((artifact) => artifact.convention === 'openspec_convention'))) { + capabilities.add(P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1); + } + if (nodes.some((node) => node.permissionScope === 'implementation')) capabilities.add(P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1); + return [...capabilities].sort(); +} + +function validateGraphShape(draft: P2pWorkflowDraft): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + const nodeIds = new Set(); + // R3 v2 PR-ζ (Cx1-A3 / O1-a / ζ-15) — A workflow MAY declare at most + // one `openspec_convention` artifact contract. The daemon's + // `runArtifactRootCache` keys by `runId` only and `getOrFreezeRunArtifactRoot` + // takes the first matching contract; multi-contract workflows would + // silently use the first node's frozen identity for every other + // node's verify step. Reject at compile time so authors see the + // problem immediately instead of debugging false missing-file + // diagnostics later. + const openspecNodeIds = draft.nodes + .filter((node) => Array.isArray(node.artifacts) && node.artifacts.some((artifact) => artifact.convention === 'openspec_convention')) + .map((node) => node.id); + if (openspecNodeIds.length > 1) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { + summary: `At most one node may declare an openspec_convention artifact contract per workflow (found: ${openspecNodeIds.join(', ')}).`, + })); + } + for (const node of draft.nodes) { + if (nodeIds.has(node.id)) diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `nodes.${node.id}`, summary: 'Duplicate node id.' })); + nodeIds.add(node.id); + } + const edgeIds = new Set(); + for (const edge of draft.edges) { + if (edgeIds.has(edge.id)) diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `edges.${edge.id}`, summary: 'Duplicate edge id.' })); + edgeIds.add(edge.id); + if (!nodeIds.has(edge.fromNodeId) || !nodeIds.has(edge.toNodeId)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `edges.${edge.id}`, summary: 'Edge points to missing node.' })); + } + } + const rootNodeId = draft.rootNodeId ?? findRootNodeId(draft); + if (!rootNodeId || !nodeIds.has(rootNodeId)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: 'rootNodeId', summary: 'Workflow must have exactly one root.' })); + } + for (const node of draft.nodes) { + // R3 v1b follow-up — logic node MUST declare a `logic` contract; non-logic + // nodes MUST NOT carry one (the executor only evaluates `logic` for + // `nodeKind === 'logic'`). + if (node.nodeKind === 'logic') { + if (!node.logic) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { + fieldPath: `nodes.${node.id}.logic`, + summary: 'Logic node MUST declare a `logic` contract.', + })); + } else { + for (const issue of validateP2pLogicContract(node.logic, `nodes.${node.id}.logic`)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { + fieldPath: issue.fieldPath, + summary: issue.summary, + })); + } + } + } else if (node.logic !== undefined) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { + fieldPath: `nodes.${node.id}.logic`, + summary: 'Only nodeKind: \'logic\' nodes may declare a `logic` contract.', + })); + } + const defaultOutgoing = draft.edges.filter((edge) => edge.fromNodeId === node.id && edge.edgeKind === 'default'); + if (defaultOutgoing.length > 1) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `nodes.${node.id}`, summary: 'Multiple default edges are not supported.' })); + } + const conditionalOutgoing = draft.edges.filter((edge) => edge.fromNodeId === node.id && edge.edgeKind === 'conditional'); + if (conditionalOutgoing.length > 0) { + // R3 PR-γ (W4) — v1 cap: at most ONE conditional outgoing edge per + // node. The legacy adapter projection (`compiledWorkflowToLegacyAdvancedRounds`) + // only carries a single `jumpRule` per round so additional conditional + // edges would be silently dropped on the legacy executor; the new + // envelope_compiled executor (PR-β) walks `compiled.edges` directly + // but selects the FIRST matching condition. Either way the v1 + // semantics require uniqueness — the compiler enforces it here so + // authoring tools fail closed instead of silently misrouting. + if (conditionalOutgoing.length > 1) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { + fieldPath: `nodes.${node.id}`, + summary: `Multiple conditional outgoing edges (${conditionalOutgoing.length}) are not supported in v1; declare at most one per node.`, + })); + } + const authority = node.routingAuthority ?? deriveRoutingAuthority(node, draft.edges); + if (authority.kind === 'none') { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_routing_authority', 'compile', { fieldPath: `nodes.${node.id}.routingAuthority` })); + } + for (const edge of conditionalOutgoing) { + if (!edge.condition) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_edge_condition', 'compile', { fieldPath: `edges.${edge.id}.condition` })); + } + } + } + } + if (rootNodeId) { + const reachable = collectReachable(rootNodeId, draft.edges); + for (const node of draft.nodes) { + if (!reachable.has(node.id)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `nodes.${node.id}`, summary: 'Unreachable node.' })); + } + } + } + return diagnostics; +} + +function findRootNodeId(draft: P2pWorkflowDraft): string | null { + if (draft.rootNodeId) return draft.rootNodeId; + const targets = new Set(draft.edges.map((edge) => edge.toNodeId)); + const roots = draft.nodes.map((node) => node.id).filter((id) => !targets.has(id)); + return roots.length === 1 ? roots[0]! : null; +} + +function collectReachable(rootNodeId: string, edges: P2pWorkflowEdgeDraft[]): Set { + const reachable = new Set([rootNodeId]); + let changed = true; + while (changed) { + changed = false; + for (const edge of edges) { + if (reachable.has(edge.fromNodeId) && !reachable.has(edge.toNodeId)) { + reachable.add(edge.toNodeId); + changed = true; + } + } + } + return reachable; +} + +function validateLoopBudgets(draft: P2pWorkflowDraft): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + const nodeOrder = new Map(draft.nodes.map((node, index) => [node.id, index])); + for (const edge of draft.edges) { + const fromIndex = nodeOrder.get(edge.fromNodeId); + const toIndex = nodeOrder.get(edge.toNodeId); + if (fromIndex === undefined || toIndex === undefined) continue; + if (toIndex <= fromIndex && draft.loopBudgets?.[edge.id] === undefined) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `loopBudgets.${edge.id}`, summary: 'Backward edges require edge-scoped loop budgets.' })); + } + const budget = draft.loopBudgets?.[edge.id]; + if (budget !== undefined && (!Number.isInteger(budget) || budget < 0)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `loopBudgets.${edge.id}`, summary: 'Loop budget must be a non-negative integer.' })); + } + } + for (const key of Object.keys(draft.loopBudgets ?? {})) { + if (!draft.edges.some((edge) => edge.id === key)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `loopBudgets.${key}`, summary: 'Loop budgets must be keyed by edge id.' })); + } + } + return diagnostics; +} + +function sortedRecord(input: Record): Record { + const output: Record = {}; + for (const key of Object.keys(input).sort()) output[key] = input[key]!; + return output; +} diff --git a/shared/p2p-workflow-constants.ts b/shared/p2p-workflow-constants.ts new file mode 100644 index 000000000..5ee465946 --- /dev/null +++ b/shared/p2p-workflow-constants.ts @@ -0,0 +1,189 @@ +export const P2P_WORKFLOW_SCHEMA_VERSION = 1 as const; +export const P2P_WORKFLOW_KNOWN_SCHEMA_MAX = 1 as const; +export const P2P_WORKFLOW_PROJECTION_VERSION = 1 as const; + +export const P2P_CAPABILITY_FRESHNESS_TTL_MS = 30_000 as const; +export const P2P_WORKFLOW_MAX_ACTIVE_RUNS = 2 as const; +export const P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS = 4 as const; + +/** + * R3 v1b follow-up — Default maximum attempts for transient script + * failures. Counted via `run.roundAttemptCounts[round.id]`. The first + * attempt is `1`; retries are attempts `2…N`. + */ +export const P2P_SCRIPT_RETRY_DEFAULT_ATTEMPTS = 3 as const; + +/** + * R3 v2 PR-ζ (B1 / A5) — Workflow variable identifier pattern. + * Re-exported so the orchestrator's runtime write-path validation + * matches the parser / draft validator and stays one place to change. + * Lowercase + digits + underscore only ⇒ structurally rejects + * `__proto__` / `constructor` / `prototype` keys. + */ +export const P2P_WORKFLOW_VARIABLE_NAME_PATTERN = /^[a-z][a-z0-9_]{0,63}$/; + +/** + * R3 v2 PR-ζ (B5) — Per-element byte cap for script-emitted variable + * arrays. Per-array element count cap is `P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENTS`. + * The total `JSON.stringify` byte budget per variable is bounded by + * `P2P_WORKFLOW_MAX_VARIABLE_BYTES` already; the new caps prevent a + * runaway `[ "A".repeat(N), … ]` from driving daemon RSS through the + * variable surface even when the encoded byte sum stays under cap. + */ +export const P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENTS = 64; +export const P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENT_BYTES = 8 * 1024; + +/** + * R3 v2 PR-ζ (Cx1-A6 / ζ-14) — Allowed executable path pattern + cap. + * Reuses visible-ASCII charset of `P2P_REQUEST_ID_ASCII_PATTERN` but + * removes the 128-char length limit so absolute paths up to 256 bytes + * (matching the documented spec) are accepted. The byte length cap is + * applied via `TextEncoder` separately. + */ +export const P2P_ALLOWED_EXECUTABLE_PATTERN = /^[\x21-\x7e]+$/; +export const P2P_ALLOWED_EXECUTABLE_MAX_BYTES = 256; + +/** + * R3 v1b follow-up — Diagnostic codes that the script runner classifies as + * TRANSIENT (worth retrying) vs deterministic. Order matters for the + * registry-style check in `isRetriableScriptDiagnostic`. + */ +export const P2P_SCRIPT_RETRIABLE_DIAGNOSTIC_CODES = [ + 'script_timeout', + 'daemon_busy', +] as const; + +export const P2P_WORKFLOW_MAX_NODES = 64 as const; +export const P2P_WORKFLOW_MAX_EDGES = 128 as const; +export const P2P_WORKFLOW_MAX_VARIABLES = 64 as const; +export const P2P_WORKFLOW_MAX_VARIABLE_BYTES = 8 * 1024; +export const P2P_WORKFLOW_MAX_PROMPT_APPEND_BYTES = 16 * 1024; +export const P2P_WORKFLOW_MAX_DIAGNOSTICS = 100 as const; +export const P2P_WORKFLOW_MAX_DIAGNOSTIC_BYTES = 64 * 1024; + +export const P2P_WORKFLOW_ARTIFACT_MAX_FILES = 200 as const; +export const P2P_WORKFLOW_ARTIFACT_MAX_TOTAL_BYTES = 64 * 1024 * 1024; +export const P2P_WORKFLOW_ARTIFACT_MAX_FILE_BYTES = 8 * 1024 * 1024; +export const P2P_WORKFLOW_ARTIFACT_MAX_DEPTH = 8 as const; + +export const P2P_SCRIPT_DEFAULT_STDIN_MAX_BYTES = 64 * 1024; +export const P2P_SCRIPT_DEFAULT_STDOUT_MAX_BYTES = 256 * 1024; +export const P2P_SCRIPT_DEFAULT_STDERR_MAX_BYTES = 128 * 1024; +export const P2P_SCRIPT_DEFAULT_MACHINE_OUTPUT_MAX_BYTES = 128 * 1024; +export const P2P_SCRIPT_MACHINE_OUTPUT_KIND = 'p2p_script_machine_output_v1' as const; + +export const P2P_WORKFLOW_CAPABILITY_V1 = 'p2p.workflow.v1' as const; +export const P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1 = 'p2p.workflow.script.argv.v1' as const; +export const P2P_WORKFLOW_SCRIPT_INTERPRETER_CAPABILITY_V1 = 'p2p.workflow.script.interpreter.v1' as const; +export const P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1 = 'p2p.workflow.openspec-artifacts.v1' as const; +export const P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1 = 'p2p.workflow.implementation.v1' as const; + +export const P2P_WORKFLOW_CAPABILITIES = [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_INTERPRETER_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, +] as const; + +export type P2pWorkflowCapability = (typeof P2P_WORKFLOW_CAPABILITIES)[number]; + +export const P2P_WORKFLOW_KINDS = ['legacy', 'combo', 'advanced'] as const; +export type P2pWorkflowKind = (typeof P2P_WORKFLOW_KINDS)[number]; + +export const P2P_NODE_KINDS = ['llm', 'logic', 'script'] as const; +export type P2pNodeKind = (typeof P2P_NODE_KINDS)[number]; + +export const P2P_PRESET_KEYS = [ + 'brainstorm', + 'discuss', + 'audit', + 'review', + 'plan', + 'openspec_propose', + 'proposal_audit', + 'implementation', + 'implementation_audit', + 'custom', +] as const; +export type P2pPresetKey = (typeof P2P_PRESET_KEYS)[number]; + +export const P2P_NODE_DISPATCH_STYLES = ['single_main', 'multi_dispatch'] as const; +export type P2pNodeDispatchStyle = (typeof P2P_NODE_DISPATCH_STYLES)[number]; + +export const P2P_EDGE_KINDS = ['default', 'conditional'] as const; +export type P2pEdgeKind = (typeof P2P_EDGE_KINDS)[number]; + +export const P2P_EDGE_CONDITION_KINDS = [ + 'routing_key_equals', + 'verdict_marker_equals', + 'logic_marker_equals', +] as const; +export type P2pEdgeConditionKind = (typeof P2P_EDGE_CONDITION_KINDS)[number]; + +export const P2P_PERMISSION_SCOPES = [ + 'analysis_only', + 'artifact_generation', + 'implementation', +] as const; +export type P2pPermissionScope = (typeof P2P_PERMISSION_SCOPES)[number]; + +export const P2P_ARTIFACT_CONVENTIONS = [ + 'none', + 'explicit_paths', + 'openspec_convention', +] as const; +export type P2pArtifactConvention = (typeof P2P_ARTIFACT_CONVENTIONS)[number]; + +export const P2P_ARTIFACT_PHASES = ['freeze', 'create', 'validate', 'baseline'] as const; +export type P2pArtifactPhase = (typeof P2P_ARTIFACT_PHASES)[number]; + +export const P2P_START_CONTEXT_SOURCE_KINDS = [ + 'current_prompt', + 'associated_discussion_file', + 'recent_discussion_history', + 'file_reference', +] as const; +export type P2pStartContextSourceKind = (typeof P2P_START_CONTEXT_SOURCE_KINDS)[number]; + +export const P2P_FORBIDDEN_ENVELOPE_FIELD_NAMES = [ + 'compiledWorkflow', + 'boundWorkflow', + 'privateRuntimeState', + 'runtimePrivateState', + 'rawPrompt', + 'rawPromptText', + 'scriptRawOutputs', + 'rawScriptOutput', + 'artifactBaselines', + 'privateArtifactBaselines', + 'editorCache', + 'hiddenEditorCache', + 'env', + 'environment', + 'token', + 'accessToken', + 'refreshToken', + 'apiKey', + 'secret', +] as const; + +export const P2P_REQUEST_ID_MAX_BYTES = 128 as const; +export const P2P_REQUEST_ID_ASCII_PATTERN = /^[\x21-\x7e]{1,128}$/; + +export const P2P_BRIDGE_PENDING_REQUEST_TIMEOUT_MS = 30_000 as const; +export const P2P_BRIDGE_PENDING_REQUESTS_PER_SOCKET = 16 as const; +export const P2P_BRIDGE_PENDING_REQUESTS_GLOBAL = 512 as const; +export const P2P_BRIDGE_ERROR_CODES = { + INVALID_REQUEST_ID: 'invalid_request_id', + DUPLICATE_REQUEST_ID: 'duplicate_request_id', + WRONG_PEER: 'p2p_wrong_peer', + ROUTE_POLICY_ERROR: 'p2p_route_policy_error', + PENDING_LIMIT_EXCEEDED: 'p2p_pending_limit_exceeded', +} as const; + +export const P2P_SANITIZE_MAX_STRING_BYTES = 4096 as const; +export const P2P_SANITIZE_MAX_ARRAY_ITEMS = 64 as const; +export const P2P_SANITIZE_MAX_OBJECT_KEYS = 64 as const; +export const P2P_SANITIZE_MAX_DEPTH = 6 as const; +export const P2P_SANITIZE_MAX_TOTAL_BYTES = 64 * 1024; diff --git a/shared/p2p-workflow-diagnostics.ts b/shared/p2p-workflow-diagnostics.ts new file mode 100644 index 000000000..d107c8de8 --- /dev/null +++ b/shared/p2p-workflow-diagnostics.ts @@ -0,0 +1,149 @@ +export const P2P_WORKFLOW_DIAGNOSTIC_CODES = [ + 'forbidden_envelope_field', + 'unsupported_schema_version', + 'unknown_future_schema_read_only', + 'mixed_advanced_schema_fields', + 'static_policy_mismatch_recompiled', + 'missing_required_capability', + 'capability_stale', + 'capability_downgraded_during_run', + 'invalid_launch_envelope', + 'invalid_workflow_graph', + 'invalid_routing_authority', + 'invalid_edge_condition', + 'loop_budget_exhausted', + 'invalid_workflow_variable', + 'invalid_prompt_append', + 'missing_context_source', + 'context_source_too_large', + 'unsafe_artifact_path', + 'artifact_identity_collision_resolved', + 'artifact_baseline_too_large', + 'artifact_baseline_mismatch', + 'artifact_contract_not_satisfied', + 'invalid_script_contract', + 'script_executable_denied', + 'script_machine_output_invalid', + 'script_timeout', + 'script_cancelled', + 'daemon_busy', + 'workflow_stale_after_restart', + 'private_projection_field_dropped', + 'legacy_progress_snapshot_sanitized', + 'unknown_p2p_message', + /** + * R3 v2 PR-η — envelope_compiled executor exit reason. Emitted when a + * round's outgoing edges include conditional but NONE match the + * round's route (script routingKey / verdict marker / logic marker) + * AND no default edge exists. Defends against the v1b array-order + * fallback that silently executed sibling nodes regardless of route. + */ + 'unmatched_edge_route', +] as const; + +export type P2pWorkflowDiagnosticCode = (typeof P2P_WORKFLOW_DIAGNOSTIC_CODES)[number]; + +export const P2P_WORKFLOW_DIAGNOSTIC_PHASES = [ + 'parse', + 'compile', + 'bind', + 'execute', + 'project', + 'sanitize', + 'server_ingress', + 'web_validate', +] as const; + +export type P2pWorkflowDiagnosticPhase = (typeof P2P_WORKFLOW_DIAGNOSTIC_PHASES)[number]; + +export const P2P_WORKFLOW_DIAGNOSTIC_SEVERITIES = ['info', 'warning', 'error'] as const; +export type P2pWorkflowDiagnosticSeverity = (typeof P2P_WORKFLOW_DIAGNOSTIC_SEVERITIES)[number]; + +export interface P2pWorkflowDiagnostic { + code: P2pWorkflowDiagnosticCode; + phase: P2pWorkflowDiagnosticPhase; + severity: P2pWorkflowDiagnosticSeverity; + messageKey: `p2p.workflow.diagnostics.${P2pWorkflowDiagnosticCode}`; + summary?: string; + nodeId?: string; + runId?: string; + fieldPath?: string; +} + +export const P2P_WORKFLOW_DIAGNOSTIC_PHASE_MATRIX: Record = { + forbidden_envelope_field: ['parse'], + unsupported_schema_version: ['parse', 'web_validate'], + unknown_future_schema_read_only: ['web_validate'], + mixed_advanced_schema_fields: ['parse', 'web_validate'], + static_policy_mismatch_recompiled: ['bind'], + missing_required_capability: ['bind', 'execute', 'web_validate'], + capability_stale: ['bind', 'web_validate'], + capability_downgraded_during_run: ['execute'], + invalid_launch_envelope: ['parse'], + invalid_workflow_graph: ['compile'], + invalid_routing_authority: ['compile'], + invalid_edge_condition: ['compile'], + loop_budget_exhausted: ['execute'], + invalid_workflow_variable: ['compile', 'execute'], + invalid_prompt_append: ['compile'], + missing_context_source: ['bind', 'execute'], + context_source_too_large: ['bind', 'execute'], + unsafe_artifact_path: ['compile', 'bind', 'execute'], + artifact_identity_collision_resolved: ['bind'], + artifact_baseline_too_large: ['bind'], + artifact_baseline_mismatch: ['execute'], + artifact_contract_not_satisfied: ['execute'], + invalid_script_contract: ['compile', 'bind'], + script_executable_denied: ['bind', 'execute'], + script_machine_output_invalid: ['execute'], + script_timeout: ['execute'], + script_cancelled: ['execute'], + daemon_busy: ['bind'], + workflow_stale_after_restart: ['bind', 'execute'], + private_projection_field_dropped: ['sanitize'], + legacy_progress_snapshot_sanitized: ['sanitize'], + unknown_p2p_message: ['server_ingress'], + unmatched_edge_route: ['execute'], +}; + +const WARNING_CODES = new Set([ + 'artifact_identity_collision_resolved', + 'static_policy_mismatch_recompiled', + 'private_projection_field_dropped', + 'legacy_progress_snapshot_sanitized', +]); + +export function makeP2pWorkflowDiagnostic( + code: P2pWorkflowDiagnosticCode, + phase?: P2pWorkflowDiagnosticPhase, + extras: Omit, 'code' | 'phase' | 'messageKey' | 'severity'> = {}, +): P2pWorkflowDiagnostic { + const phases = P2P_WORKFLOW_DIAGNOSTIC_PHASE_MATRIX[code]; + const resolvedPhase = phase ?? phases[0]; + return { + code, + phase: resolvedPhase, + severity: WARNING_CODES.has(code) ? 'warning' : 'error', + messageKey: `p2p.workflow.diagnostics.${code}`, + ...extras, + }; +} + +export function makeP2pWorkflowWarning( + code: P2pWorkflowDiagnosticCode, + phase?: P2pWorkflowDiagnosticPhase, + extras: Omit, 'code' | 'phase' | 'messageKey' | 'severity'> = {}, +): P2pWorkflowDiagnostic { + return { + ...makeP2pWorkflowDiagnostic(code, phase, extras), + severity: 'warning', + }; +} + +export function assertP2pDiagnosticMatrixComplete(): void { + for (const code of P2P_WORKFLOW_DIAGNOSTIC_CODES) { + if (!P2P_WORKFLOW_DIAGNOSTIC_PHASE_MATRIX[code]?.length) { + throw new Error(`Missing P2P workflow diagnostic phase mapping: ${code}`); + } + } +} diff --git a/shared/p2p-workflow-logic-evaluator.ts b/shared/p2p-workflow-logic-evaluator.ts new file mode 100644 index 000000000..0a06da178 --- /dev/null +++ b/shared/p2p-workflow-logic-evaluator.ts @@ -0,0 +1,176 @@ +/** + * P2P logic-node evaluator. + * + * R3 v1b follow-up — pure, sandboxed evaluation of `P2pLogicNodeContract` + * rules against the run's variable state. The evaluator is deliberately + * tiny: + * - No expression interpreter, no `eval`, no template strings. + * - Rules are checked in declaration order; the first match wins. + * - `if: undefined` is an always-match rule. + * - When no rule matches, `default` is emitted. + * + * The shared evaluator is consumed both by the daemon executor (to drive + * `logic_marker_equals` routing) and by the compiler validator (to reject + * obviously-broken contracts at author time). + */ + +import type { + P2pLogicNodeContract, + P2pLogicRule, + P2pWorkflowVariableValue, +} from './p2p-workflow-types.js'; + +export const P2P_LOGIC_MAX_RULES = 32; +export const P2P_LOGIC_MAX_MARKER_BYTES = 128; +export const P2P_LOGIC_VISIBLE_ASCII = /^[\x21-\x7e]+$/; + +export interface LogicEvalResult { + /** Marker emitted (matched rule's `emit` or `contract.default`). */ + marker: string; + /** Index of the matched rule, or -1 when fell through to `default`. */ + matchedRuleIndex: number; +} + +export type LogicVariableSnapshot = Record; + +/** + * Evaluate the contract against the given variables snapshot. Throws only + * when `contract` is structurally invalid (caller should validate ahead of + * time via {@link validateP2pLogicContract}). + */ +export function evaluateP2pLogic( + contract: P2pLogicNodeContract, + variables: LogicVariableSnapshot, +): LogicEvalResult { + for (let index = 0; index < contract.rules.length; index += 1) { + const rule = contract.rules[index]; + if (matchRule(rule, variables)) { + return { marker: rule.emit, matchedRuleIndex: index }; + } + } + return { marker: contract.default, matchedRuleIndex: -1 }; +} + +/** + * R3 v2 PR-ζ (B6 / A5 defence-in-depth) — Reject prototype-pollution + * key names at evaluator read time. Logic identifier validator already + * uses `[A-Za-z_][A-Za-z0-9_]*` which would let `__proto__` pass; this + * extra check ensures the read silently mismatches even if a hostile + * contract slips past compile-time validation. + */ +const PROTOTYPE_POLLUTION_KEYS = new Set(['__proto__', 'constructor', 'prototype']); + +function matchRule(rule: P2pLogicRule, variables: LogicVariableSnapshot): boolean { + if (rule.if === undefined) return true; + if (PROTOTYPE_POLLUTION_KEYS.has(rule.if.name)) return false; + const value = variables[rule.if.name]; + if (rule.if.kind === 'variable_present') { + return value !== undefined && value !== null; + } + if (rule.if.kind === 'variable_truthy') { + if (value === undefined || value === null) return false; + if (typeof value === 'string') return value.length > 0; + if (typeof value === 'number') return value !== 0 && !Number.isNaN(value); + if (typeof value === 'boolean') return value; + if (Array.isArray(value)) return value.length > 0; + return false; + } + if (rule.if.kind === 'variable_equals') { + if (value === undefined || value === null) return false; + return stringifyVariable(value) === rule.if.equals; + } + return false; +} + +/** + * R3 v2 PR-ζ (M5 / ζ-13) — Stable, injection-safe stringification. + * Previously `Array.isArray(value) ? value.join(',') : ...` allowed + * `['a,b']` and `['a','b']` to compare equal under `variable_equals`. + * `JSON.stringify` is unambiguous and the canonical encoding the + * compiler/parser already use elsewhere. + */ +function stringifyVariable(value: P2pWorkflowVariableValue): string { + if (typeof value === 'string') return value; + if (typeof value === 'number') return String(value); + if (typeof value === 'boolean') return value ? 'true' : 'false'; + if (Array.isArray(value)) return JSON.stringify(value); + return ''; +} + +export interface LogicValidationDiagnostic { + fieldPath: string; + summary: string; +} + +/** + * Pure structural validation for a logic contract. Returns an array of + * diagnostics; an empty array means the contract is valid. The compiler + * wraps these into `invalid_logic_contract` workflow diagnostics. + */ +export function validateP2pLogicContract(contract: unknown, basePath = 'logic'): LogicValidationDiagnostic[] { + const diagnostics: LogicValidationDiagnostic[] = []; + if (!contract || typeof contract !== 'object' || Array.isArray(contract)) { + diagnostics.push({ fieldPath: basePath, summary: 'Logic contract must be an object.' }); + return diagnostics; + } + const obj = contract as Record; + if (!Array.isArray(obj.rules)) { + diagnostics.push({ fieldPath: `${basePath}.rules`, summary: 'rules must be an array.' }); + return diagnostics; + } + if (obj.rules.length > P2P_LOGIC_MAX_RULES) { + diagnostics.push({ fieldPath: `${basePath}.rules`, summary: `Logic node may declare at most ${P2P_LOGIC_MAX_RULES} rules.` }); + } + if (typeof obj.default !== 'string' || !isValidMarker(obj.default)) { + diagnostics.push({ fieldPath: `${basePath}.default`, summary: `default marker must be visible-ASCII (1–${P2P_LOGIC_MAX_MARKER_BYTES} bytes).` }); + } + obj.rules.forEach((rule, index) => { + diagnostics.push(...validateRule(rule, `${basePath}.rules[${index}]`)); + }); + return diagnostics; +} + +function validateRule(rule: unknown, path: string): LogicValidationDiagnostic[] { + const diagnostics: LogicValidationDiagnostic[] = []; + if (!rule || typeof rule !== 'object' || Array.isArray(rule)) { + diagnostics.push({ fieldPath: path, summary: 'Rule must be an object.' }); + return diagnostics; + } + const obj = rule as Record; + if (typeof obj.emit !== 'string' || !isValidMarker(obj.emit)) { + diagnostics.push({ fieldPath: `${path}.emit`, summary: `emit must be visible-ASCII (1–${P2P_LOGIC_MAX_MARKER_BYTES} bytes).` }); + } + if (obj.if === undefined) return diagnostics; + if (!obj.if || typeof obj.if !== 'object' || Array.isArray(obj.if)) { + diagnostics.push({ fieldPath: `${path}.if`, summary: 'if clause must be an object when present.' }); + return diagnostics; + } + const cond = obj.if as Record; + if (cond.kind !== 'variable_equals' && cond.kind !== 'variable_present' && cond.kind !== 'variable_truthy') { + diagnostics.push({ fieldPath: `${path}.if.kind`, summary: `Unsupported condition kind: ${String(cond.kind)}.` }); + return diagnostics; + } + if ( + typeof cond.name !== 'string' + || cond.name.length === 0 + || cond.name.length > 64 + || !/^[A-Za-z_][A-Za-z0-9_]*$/.test(cond.name) + // R3 v2 PR-ζ (B6 / A5) — Block prototype-pollution key names at + // compile time. Defence-in-depth alongside evaluator read-time skip + // and orchestrator write-path lowercase regex. + || PROTOTYPE_POLLUTION_KEYS.has(cond.name) + ) { + diagnostics.push({ fieldPath: `${path}.if.name`, summary: 'name must be a non-empty identifier (≤64 chars, [A-Za-z_][A-Za-z0-9_]*) and not a prototype-pollution key.' }); + } + if (cond.kind === 'variable_equals') { + if (typeof cond.equals !== 'string' || cond.equals.length > P2P_LOGIC_MAX_MARKER_BYTES) { + diagnostics.push({ fieldPath: `${path}.if.equals`, summary: `equals must be a string ≤${P2P_LOGIC_MAX_MARKER_BYTES} bytes.` }); + } + } + return diagnostics; +} + +function isValidMarker(value: string): boolean { + if (value.length === 0 || value.length > P2P_LOGIC_MAX_MARKER_BYTES) return false; + return P2P_LOGIC_VISIBLE_ASCII.test(value); +} diff --git a/shared/p2p-workflow-materialize.ts b/shared/p2p-workflow-materialize.ts new file mode 100644 index 000000000..ac9c82415 --- /dev/null +++ b/shared/p2p-workflow-materialize.ts @@ -0,0 +1,105 @@ +import { BUILT_IN_ADVANCED_PRESETS, type P2pAdvancedRound } from './p2p-advanced.js'; +import { P2P_WORKFLOW_SCHEMA_VERSION, type P2pPermissionScope, type P2pPresetKey } from './p2p-workflow-constants.js'; +import type { P2pWorkflowDraft, P2pWorkflowEdgeDraft, P2pWorkflowNodeDraft } from './p2p-workflow-types.js'; + +export interface P2pOldAdvancedMaterializeInput { + advancedPresetKey?: string | null; + advancedRounds?: P2pAdvancedRound[] | null; + advancedRunTimeoutMinutes?: number | null; +} + +function normalizeOldPreset(preset: string): P2pPresetKey { + if (preset === 'discussion') return 'discuss'; + if ( + preset === 'openspec_propose' || + preset === 'proposal_audit' || + preset === 'implementation' || + preset === 'implementation_audit' || + preset === 'custom' + ) { + return preset; + } + return 'custom'; +} + +function nodeKindForRound(round: P2pAdvancedRound): P2pWorkflowNodeDraft['nodeKind'] { + return round.permissionScope === 'implementation' && round.preset === 'custom' ? 'script' : 'llm'; +} + +function permissionScopeForRound(scope: string): P2pPermissionScope { + if (scope === 'artifact_generation' || scope === 'implementation') return scope; + return 'analysis_only'; +} + +function cloneRounds(rounds: P2pAdvancedRound[]): P2pAdvancedRound[] { + return JSON.parse(JSON.stringify(rounds)) as P2pAdvancedRound[]; +} + +export function materializeOldAdvancedConfigToWorkflowDraft( + input: P2pOldAdvancedMaterializeInput, +): P2pWorkflowDraft { + const rounds = input.advancedRounds?.length + ? cloneRounds(input.advancedRounds) + : input.advancedPresetKey === 'openspec' + ? cloneRounds(BUILT_IN_ADVANCED_PRESETS.openspec) + : []; + if (rounds.length === 0) { + throw new Error('Old advanced P2P materialization requires advancedPresetKey or advancedRounds'); + } + + const nodes: P2pWorkflowNodeDraft[] = rounds.map((round) => ({ + id: round.id, + title: round.title, + nodeKind: nodeKindForRound(round), + preset: normalizeOldPreset(round.preset), + dispatchStyle: round.executionMode === 'single_main' ? 'single_main' : 'multi_dispatch', + permissionScope: permissionScopeForRound(round.permissionScope), + ...(round.promptAppend ? { promptAppend: round.promptAppend } : {}), + ...(round.timeoutMinutes ? { timeoutMs: round.timeoutMinutes * 60_000 } : {}), + artifacts: round.permissionScope === 'artifact_generation' + ? [{ + convention: round.preset === 'openspec_propose' ? 'openspec_convention' : 'explicit_paths', + paths: round.artifactOutputs?.length ? [...round.artifactOutputs].sort() : ['openspec/changes'], + permissionScope: 'artifact_generation', + symlinkPolicy: 'reject_all', + }] + : [], + })); + + const edges: P2pWorkflowEdgeDraft[] = []; + for (let index = 0; index < rounds.length - 1; index += 1) { + edges.push({ + id: `edge_${rounds[index]!.id}_to_${rounds[index + 1]!.id}`, + fromNodeId: rounds[index]!.id, + toNodeId: rounds[index + 1]!.id, + edgeKind: 'default', + }); + } + const loopBudgets: Record = {}; + for (const round of rounds) { + if (!round.jumpRule) continue; + const edgeId = `edge_${round.id}_to_${round.jumpRule.targetRoundId}_rework`; + edges.push({ + id: edgeId, + fromNodeId: round.id, + toNodeId: round.jumpRule.targetRoundId, + edgeKind: 'conditional', + condition: { + kind: 'verdict_marker_equals', + equals: round.jumpRule.marker ?? 'REWORK', + }, + }); + loopBudgets[edgeId] = round.jumpRule.maxTriggers; + } + + return { + schemaVersion: P2P_WORKFLOW_SCHEMA_VERSION, + id: input.advancedPresetKey ? `old_${input.advancedPresetKey}` : 'old_custom_advanced', + title: input.advancedPresetKey ? `Old advanced preset: ${input.advancedPresetKey}` : 'Old advanced workflow', + nodes, + edges, + rootNodeId: nodes[0]!.id, + variables: [], + loopBudgets, + }; +} diff --git a/shared/p2p-workflow-messages.ts b/shared/p2p-workflow-messages.ts new file mode 100644 index 000000000..015f246a3 --- /dev/null +++ b/shared/p2p-workflow-messages.ts @@ -0,0 +1,315 @@ +import { P2P_REQUEST_ID_ASCII_PATTERN } from './p2p-workflow-constants.js'; +import type { P2pWorkflowDiagnosticCode } from './p2p-workflow-diagnostics.js'; +import { P2P_CONFIG_MSG, type P2pConfigMsgType } from './p2p-config-events.js'; + +export const P2P_WORKFLOW_MSG = { + STATUS: 'p2p.status', + STATUS_RESPONSE: 'p2p.status_response', + LIST_DISCUSSIONS: 'p2p.list_discussions', + LIST_DISCUSSIONS_RESPONSE: 'p2p.list_discussions_response', + READ_DISCUSSION: 'p2p.read_discussion', + READ_DISCUSSION_RESPONSE: 'p2p.read_discussion_response', + RUN_START: 'p2p.run_start', + RUN_STARTED: 'p2p.run_started', + RUN_UPDATE: 'p2p.run_update', + RUN_SAVE: 'p2p.run_save', + RUN_COMPLETE: 'p2p.run_complete', + RUN_ERROR: 'p2p.run_error', + CANCEL: 'p2p.cancel', + CANCEL_RESPONSE: 'p2p.cancel_response', + CONFLICT: 'p2p.conflict', + DAEMON_HELLO: 'daemon.hello', +} as const; + +/** + * Category of a P2P protocol message. + * - `'workflow'`: messages that drive the smart-p2p-rounds workflow protocol + * (status, list/read discussions, run start/update/complete/error/cancel, + * conflicts, daemon hello capability handshake). + * - `'config'`: persisted P2P participant config CRUD between web and daemon + * (`p2p.config.save` / `p2p.config.save_response`). Distinct protocol family + * from workflow but shares the bridge route policy (registry-driven default- + * deny, request-scoped singlecast). Workflow-only consumers may filter by + * `category === 'workflow'` if needed. + */ +export type P2pProtocolCategory = 'workflow' | 'config'; + +/** + * Union of all P2P protocol message types registered in + * `P2P_WORKFLOW_MESSAGE_REGISTRY`. The historical name retains "Workflow" for + * back-compat with existing imports; the registry covers both workflow and + * config categories so the bridge default-deny excludes registered config + * messages and unknown `p2p.*` still drop. + */ +export type P2pWorkflowMessageType = + | (typeof P2P_WORKFLOW_MSG)[keyof typeof P2P_WORKFLOW_MSG] + | P2pConfigMsgType; + +export interface P2pWorkflowMessageDescriptor { + type: P2pWorkflowMessageType; + category: P2pProtocolCategory; + direction: 'browser_to_server' | 'server_to_browser' | 'daemon_to_server' | 'server_to_daemon' | 'bidirectional'; + allowedIngress: readonly P2pWorkflowIngressPeer[]; + serverHandling: P2pWorkflowServerHandling; + browserDelivery: P2pWorkflowBrowserDelivery; + responseTo?: P2pWorkflowMessageType; + expectedResponseType?: P2pWorkflowMessageType; + requestScoped: boolean; + response: boolean; + broadcastAllowed: boolean; +} + +export type P2pWorkflowIngressPeer = 'browser' | 'daemon'; +export type P2pWorkflowServerHandling = + | 'forward_to_daemon' + | 'singlecast_response' + | 'broadcast_to_browsers' + | 'persist_run_and_broadcast' + | 'cache_daemon_capabilities'; +export type P2pWorkflowBrowserDelivery = 'none' | 'singlecast' | 'broadcast'; + +export const P2P_WORKFLOW_MESSAGE_REGISTRY: Record = { + [P2P_WORKFLOW_MSG.STATUS]: { + type: P2P_WORKFLOW_MSG.STATUS, + category: 'workflow', + direction: 'browser_to_server', + allowedIngress: ['browser'], + serverHandling: 'forward_to_daemon', + browserDelivery: 'none', + expectedResponseType: P2P_WORKFLOW_MSG.STATUS_RESPONSE, + requestScoped: true, + response: false, + broadcastAllowed: false, + }, + [P2P_WORKFLOW_MSG.STATUS_RESPONSE]: { + type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, + category: 'workflow', + direction: 'server_to_browser', + allowedIngress: ['daemon'], + serverHandling: 'singlecast_response', + browserDelivery: 'singlecast', + responseTo: P2P_WORKFLOW_MSG.STATUS, + requestScoped: true, + response: true, + broadcastAllowed: false, + }, + [P2P_WORKFLOW_MSG.LIST_DISCUSSIONS]: { + type: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS, + category: 'workflow', + direction: 'browser_to_server', + allowedIngress: ['browser'], + serverHandling: 'forward_to_daemon', + browserDelivery: 'none', + expectedResponseType: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS_RESPONSE, + requestScoped: true, + response: false, + broadcastAllowed: false, + }, + [P2P_WORKFLOW_MSG.LIST_DISCUSSIONS_RESPONSE]: { + type: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS_RESPONSE, + category: 'workflow', + direction: 'server_to_browser', + allowedIngress: ['daemon'], + serverHandling: 'singlecast_response', + browserDelivery: 'singlecast', + responseTo: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS, + requestScoped: true, + response: true, + broadcastAllowed: false, + }, + [P2P_WORKFLOW_MSG.READ_DISCUSSION]: { + type: P2P_WORKFLOW_MSG.READ_DISCUSSION, + category: 'workflow', + direction: 'browser_to_server', + allowedIngress: ['browser'], + serverHandling: 'forward_to_daemon', + browserDelivery: 'none', + expectedResponseType: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, + requestScoped: true, + response: false, + broadcastAllowed: false, + }, + [P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE]: { + type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, + category: 'workflow', + direction: 'server_to_browser', + allowedIngress: ['daemon'], + serverHandling: 'singlecast_response', + browserDelivery: 'singlecast', + responseTo: P2P_WORKFLOW_MSG.READ_DISCUSSION, + requestScoped: true, + response: true, + broadcastAllowed: false, + }, + [P2P_WORKFLOW_MSG.RUN_START]: { + type: P2P_WORKFLOW_MSG.RUN_START, + category: 'workflow', + direction: 'browser_to_server', + allowedIngress: ['browser'], + serverHandling: 'forward_to_daemon', + browserDelivery: 'none', + requestScoped: false, + response: false, + broadcastAllowed: false, + }, + [P2P_WORKFLOW_MSG.RUN_STARTED]: { + type: P2P_WORKFLOW_MSG.RUN_STARTED, + category: 'workflow', + direction: 'server_to_browser', + allowedIngress: ['daemon'], + serverHandling: 'broadcast_to_browsers', + browserDelivery: 'broadcast', + requestScoped: false, + response: false, + broadcastAllowed: true, + }, + [P2P_WORKFLOW_MSG.RUN_UPDATE]: { + type: P2P_WORKFLOW_MSG.RUN_UPDATE, + category: 'workflow', + direction: 'daemon_to_server', + allowedIngress: ['daemon'], + serverHandling: 'persist_run_and_broadcast', + browserDelivery: 'broadcast', + requestScoped: false, + response: false, + broadcastAllowed: true, + }, + [P2P_WORKFLOW_MSG.RUN_SAVE]: { + type: P2P_WORKFLOW_MSG.RUN_SAVE, + category: 'workflow', + direction: 'daemon_to_server', + allowedIngress: ['daemon'], + serverHandling: 'persist_run_and_broadcast', + browserDelivery: 'broadcast', + expectedResponseType: P2P_WORKFLOW_MSG.RUN_UPDATE, + requestScoped: false, + response: false, + broadcastAllowed: true, + }, + [P2P_WORKFLOW_MSG.RUN_COMPLETE]: { + type: P2P_WORKFLOW_MSG.RUN_COMPLETE, + category: 'workflow', + direction: 'daemon_to_server', + allowedIngress: ['daemon'], + serverHandling: 'persist_run_and_broadcast', + browserDelivery: 'broadcast', + expectedResponseType: P2P_WORKFLOW_MSG.RUN_UPDATE, + requestScoped: false, + response: false, + broadcastAllowed: true, + }, + [P2P_WORKFLOW_MSG.RUN_ERROR]: { + type: P2P_WORKFLOW_MSG.RUN_ERROR, + category: 'workflow', + direction: 'daemon_to_server', + allowedIngress: ['daemon'], + serverHandling: 'persist_run_and_broadcast', + browserDelivery: 'broadcast', + expectedResponseType: P2P_WORKFLOW_MSG.RUN_UPDATE, + requestScoped: false, + response: false, + broadcastAllowed: true, + }, + [P2P_WORKFLOW_MSG.CANCEL]: { + type: P2P_WORKFLOW_MSG.CANCEL, + category: 'workflow', + direction: 'browser_to_server', + allowedIngress: ['browser'], + serverHandling: 'forward_to_daemon', + browserDelivery: 'none', + requestScoped: false, + response: false, + broadcastAllowed: false, + }, + [P2P_WORKFLOW_MSG.CANCEL_RESPONSE]: { + type: P2P_WORKFLOW_MSG.CANCEL_RESPONSE, + category: 'workflow', + direction: 'server_to_browser', + allowedIngress: ['daemon'], + serverHandling: 'broadcast_to_browsers', + browserDelivery: 'broadcast', + responseTo: P2P_WORKFLOW_MSG.CANCEL, + requestScoped: false, + response: true, + broadcastAllowed: true, + }, + [P2P_WORKFLOW_MSG.CONFLICT]: { + type: P2P_WORKFLOW_MSG.CONFLICT, + category: 'workflow', + direction: 'daemon_to_server', + allowedIngress: ['daemon'], + serverHandling: 'broadcast_to_browsers', + browserDelivery: 'broadcast', + requestScoped: false, + response: false, + broadcastAllowed: true, + }, + [P2P_WORKFLOW_MSG.DAEMON_HELLO]: { + type: P2P_WORKFLOW_MSG.DAEMON_HELLO, + category: 'workflow', + direction: 'daemon_to_server', + allowedIngress: ['daemon'], + serverHandling: 'cache_daemon_capabilities', + // Broadcast (daemonId, capabilities, helloEpoch, sentAt) to browsers + // connected to this daemon's serverId so the web capability gate can + // disable advanced launch on missing/stale/downgraded capabilities. + // The fields advertised here are not secrets — capabilities are public + // policy advertisement and helloEpoch/sentAt are required for the + // freshness TTL check (`P2P_CAPABILITY_FRESHNESS_TTL_MS`). + browserDelivery: 'broadcast', + requestScoped: false, + response: false, + broadcastAllowed: true, + }, + // ── Config category ──────────────────────────────────────────────────────── + // P2P participant config CRUD between web and daemon. Distinct protocol + // family from workflow but shares the bridge route policy: registered => + // pass via generic forward_to_daemon / singlecast_response handlers, + // unregistered `p2p.*` => default-deny drop. + [P2P_CONFIG_MSG.SAVE]: { + type: P2P_CONFIG_MSG.SAVE, + category: 'config', + direction: 'browser_to_server', + allowedIngress: ['browser'], + serverHandling: 'forward_to_daemon', + browserDelivery: 'none', + expectedResponseType: P2P_CONFIG_MSG.SAVE_RESPONSE, + requestScoped: true, + response: false, + broadcastAllowed: false, + }, + [P2P_CONFIG_MSG.SAVE_RESPONSE]: { + type: P2P_CONFIG_MSG.SAVE_RESPONSE, + category: 'config', + direction: 'server_to_browser', + allowedIngress: ['daemon'], + serverHandling: 'singlecast_response', + browserDelivery: 'singlecast', + responseTo: P2P_CONFIG_MSG.SAVE, + requestScoped: true, + response: true, + broadcastAllowed: false, + }, +}; + +export type P2pWorkflowMessageParseResult = + | { kind: 'known'; descriptor: P2pWorkflowMessageDescriptor } + | { kind: 'drop'; diagnosticCode: P2pWorkflowDiagnosticCode; reason: 'unknown_p2p_message' | 'not_p2p_message' }; + +export function parseP2pWorkflowMessageType(type: unknown): P2pWorkflowMessageParseResult { + if (typeof type !== 'string') return { kind: 'drop', diagnosticCode: 'unknown_p2p_message', reason: 'not_p2p_message' }; + const descriptor = P2P_WORKFLOW_MESSAGE_REGISTRY[type as P2pWorkflowMessageType]; + if (descriptor) return { kind: 'known', descriptor }; + if (type.startsWith('p2p.')) { + return { kind: 'drop', diagnosticCode: 'unknown_p2p_message', reason: 'unknown_p2p_message' }; + } + return { kind: 'drop', diagnosticCode: 'unknown_p2p_message', reason: 'not_p2p_message' }; +} + +export function isP2pWorkflowRequestId(value: unknown): value is string { + return typeof value === 'string' && P2P_REQUEST_ID_ASCII_PATTERN.test(value) && value.length <= 128; +} + +export function requiresP2pWorkflowRequestId(type: P2pWorkflowMessageType): boolean { + return P2P_WORKFLOW_MESSAGE_REGISTRY[type].requestScoped; +} diff --git a/shared/p2p-workflow-policy.ts b/shared/p2p-workflow-policy.ts new file mode 100644 index 000000000..c7b8b883b --- /dev/null +++ b/shared/p2p-workflow-policy.ts @@ -0,0 +1,87 @@ +import { + P2P_WORKFLOW_ARTIFACT_MAX_DEPTH, + P2P_WORKFLOW_MAX_ACTIVE_RUNS, + P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS, + P2P_WORKFLOW_MAX_EDGES, + P2P_WORKFLOW_MAX_NODES, + P2P_WORKFLOW_MAX_PROMPT_APPEND_BYTES, +} from './p2p-workflow-constants.js'; +import type { P2pJsonValue, P2pStaticPolicy } from './p2p-workflow-types.js'; + +export const DEFAULT_P2P_STATIC_POLICY: P2pStaticPolicy = { + policyVersion: 1, + maxNodes: P2P_WORKFLOW_MAX_NODES, + maxEdges: P2P_WORKFLOW_MAX_EDGES, + maxLoopBudget: 8, + allowedExecutables: [], + allowInterpreterScripts: false, + allowOpenSpecArtifacts: false, + allowImplementationPermission: false, + maxPromptAppendBytes: P2P_WORKFLOW_MAX_PROMPT_APPEND_BYTES, + concurrency: { + maxAdvancedRuns: P2P_WORKFLOW_MAX_ACTIVE_RUNS, + maxScripts: P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS, + }, +}; + +export function stableStringify(value: unknown): string { + return JSON.stringify(canonicalize(value)); +} + +export function canonicalizeP2pStaticPolicy(policy: P2pStaticPolicy): P2pStaticPolicy { + const { policyHash: _policyHash, ...rest } = policy; + return { + ...rest, + allowedExecutables: [...rest.allowedExecutables].sort(), + }; +} + +export function hashP2pStaticPolicy(policy: P2pStaticPolicy): string { + return stableHash(stableStringify(canonicalizeP2pStaticPolicy(policy))); +} + +export function buildDefaultP2pStaticPolicy(overrides: Partial = {}): P2pStaticPolicy { + const policy = { + ...DEFAULT_P2P_STATIC_POLICY, + ...overrides, + allowedExecutables: [...(overrides.allowedExecutables ?? DEFAULT_P2P_STATIC_POLICY.allowedExecutables)], + concurrency: { + ...DEFAULT_P2P_STATIC_POLICY.concurrency, + ...(overrides.concurrency ?? {}), + }, + }; + return { + ...policy, + policyHash: hashP2pStaticPolicy(policy), + }; +} + +export function stableHash(input: string): string { + let hash = 0xcbf29ce484222325n; + const prime = 0x100000001b3n; + for (let index = 0; index < input.length; index += 1) { + hash ^= BigInt(input.charCodeAt(index)); + hash = BigInt.asUintN(64, hash * prime); + } + return `fnv1a64:${hash.toString(16).padStart(16, '0')}`; +} + +export function canonicalize(value: unknown): P2pJsonValue { + if (value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') { + return value; + } + if (Array.isArray(value)) return value.map((entry) => canonicalize(entry)); + if (typeof value === 'object') { + const result: Record = {}; + for (const key of Object.keys(value as Record).sort()) { + const entry = (value as Record)[key]; + if (entry !== undefined) result[key] = canonicalize(entry); + } + return result; + } + return null; +} + +export function getDefaultArtifactDepthLimit(): number { + return P2P_WORKFLOW_ARTIFACT_MAX_DEPTH; +} diff --git a/shared/p2p-workflow-projection.ts b/shared/p2p-workflow-projection.ts new file mode 100644 index 000000000..a47e6e3bc --- /dev/null +++ b/shared/p2p-workflow-projection.ts @@ -0,0 +1,17 @@ +import { P2P_WORKFLOW_PROJECTION_VERSION } from './p2p-workflow-constants.js'; +import type { P2pPersistedWorkflowSnapshot, P2pWorkflowStatusProjection } from './p2p-workflow-types.js'; + +export function buildPersistedSnapshotFromProjection( + projection: P2pWorkflowStatusProjection, +): P2pPersistedWorkflowSnapshot { + return { + projectionVersion: P2P_WORKFLOW_PROJECTION_VERSION, + runId: projection.runId, + workflowId: projection.workflowId, + status: projection.status, + ...(projection.currentNodeId ? { currentNodeId: projection.currentNodeId } : {}), + completedNodeIds: [...projection.completedNodeIds], + diagnostics: projection.diagnostics.map((diagnostic) => ({ ...diagnostic })), + updatedAt: projection.updatedAt, + }; +} diff --git a/shared/p2p-workflow-prompt.ts b/shared/p2p-workflow-prompt.ts new file mode 100644 index 000000000..498691ea1 --- /dev/null +++ b/shared/p2p-workflow-prompt.ts @@ -0,0 +1,98 @@ +import { P2P_WORKFLOW_MAX_PROMPT_APPEND_BYTES } from './p2p-workflow-constants.js'; +import { makeP2pWorkflowDiagnostic, type P2pWorkflowDiagnostic } from './p2p-workflow-diagnostics.js'; + +export const P2P_PROMPT_SECTION_ORDER = [ + 'system_runtime_contract', + 'preset_scaffold', + 'node_contract', + 'structured_context_references', + 'previous_evidence_summary', + 'prompt_append', + 'final_runtime_guardrail', +] as const; + +export type P2pPromptSectionKind = (typeof P2P_PROMPT_SECTION_ORDER)[number]; +export type P2pPromptTransportKind = 'plaintext' | 'chat'; + +export interface P2pPromptSection { + kind: P2pPromptSectionKind; + text: string; +} + +export interface P2pChatPromptMessage { + role: 'system' | 'user' | 'assistant'; + content: string; +} + +export type P2pPromptProjection = + | { kind: 'plaintext'; text: string } + | { kind: 'chat'; messages: P2pChatPromptMessage[] }; + +function byteLength(value: string): number { + return new TextEncoder().encode(value).byteLength; +} + +function assertPromptAppendSafe(text: string): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (byteLength(text) > P2P_WORKFLOW_MAX_PROMPT_APPEND_BYTES) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_prompt_append', 'compile', { summary: 'promptAppend exceeds byte limit.' })); + } + if (/[\0\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/.test(text)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_prompt_append', 'compile', { summary: 'promptAppend contains forbidden control characters.' })); + } + return diagnostics; +} + +export function assembleP2pPromptSections(sections: P2pPromptSection[]): { + ok: boolean; + sections: P2pPromptSection[]; + diagnostics: P2pWorkflowDiagnostic[]; +} { + const diagnostics: P2pWorkflowDiagnostic[] = []; + for (const section of sections) { + if (section.kind === 'prompt_append') diagnostics.push(...assertPromptAppendSafe(section.text)); + } + const ordered = [...sections].sort((left, right) => + P2P_PROMPT_SECTION_ORDER.indexOf(left.kind) - P2P_PROMPT_SECTION_ORDER.indexOf(right.kind)); + return { ok: diagnostics.length === 0, sections: ordered, diagnostics }; +} + +export function projectP2pPromptForTransport( + sections: P2pPromptSection[], + transportKind: P2pPromptTransportKind, +): P2pPromptProjection { + const assembled = assembleP2pPromptSections(sections); + if (!assembled.ok) { + throw new Error(assembled.diagnostics.map((diagnostic) => diagnostic.code).join(',')); + } + if (transportKind === 'chat') { + return { + kind: 'chat', + messages: assembled.sections.map((section) => ({ + role: roleForSection(section.kind), + content: section.text, + })), + }; + } + return { + kind: 'plaintext', + text: assembled.sections.map((section) => { + const fence = chooseFence(section.text); + return `${fence} ${section.kind}\n${section.text}\n${fence}`; + }).join('\n\n'), + }; +} + +function roleForSection(kind: P2pPromptSectionKind): P2pChatPromptMessage['role'] { + if (kind === 'previous_evidence_summary') return 'assistant'; + if (kind === 'prompt_append' || kind === 'structured_context_references') return 'user'; + return 'system'; +} + +function chooseFence(text: string): string { + for (let index = 0; index < 100; index += 1) { + const fence = `<<>>`; + if (!text.includes(fence)) return fence; + } + throw new Error('Unable to choose collision-safe prompt fence'); +} diff --git a/shared/p2p-workflow-redaction.ts b/shared/p2p-workflow-redaction.ts new file mode 100644 index 000000000..c0a8d8597 --- /dev/null +++ b/shared/p2p-workflow-redaction.ts @@ -0,0 +1,34 @@ +import { redactObject, type Redactable } from './logging/redact.js'; +import { redactSensitiveText } from './redact-secrets.js'; + +export interface P2pWorkflowRedactionOptions { + rawCaptureMaxBytes: number; + projectionSnippetMaxBytes: number; + extraPatterns?: RegExp[]; +} + +const DEFAULT_REDACTION_OPTIONS: P2pWorkflowRedactionOptions = { + rawCaptureMaxBytes: 512 * 1024, + projectionSnippetMaxBytes: 16 * 1024, +}; + +function truncateUtf8(value: string, maxBytes: number): string { + const encoder = new TextEncoder(); + const bytes = encoder.encode(value); + if (bytes.byteLength <= maxBytes) return value; + return new TextDecoder().decode(bytes.slice(0, maxBytes)); +} + +export function redactP2pWorkflowTextForProjection( + rawText: string, + options: Partial = {}, +): string { + const resolved = { ...DEFAULT_REDACTION_OPTIONS, ...options }; + const captured = truncateUtf8(rawText, resolved.rawCaptureMaxBytes); + const redacted = redactSensitiveText(captured, resolved.extraPatterns); + return truncateUtf8(redacted, resolved.projectionSnippetMaxBytes); +} + +export function redactP2pWorkflowObjectForProjection(value: T): Redactable { + return redactObject(value); +} diff --git a/shared/p2p-workflow-script.ts b/shared/p2p-workflow-script.ts new file mode 100644 index 000000000..ead68b700 --- /dev/null +++ b/shared/p2p-workflow-script.ts @@ -0,0 +1,246 @@ +import { + P2P_SCRIPT_DEFAULT_MACHINE_OUTPUT_MAX_BYTES, + P2P_SCRIPT_DEFAULT_STDERR_MAX_BYTES, + P2P_SCRIPT_DEFAULT_STDIN_MAX_BYTES, + P2P_SCRIPT_DEFAULT_STDOUT_MAX_BYTES, + P2P_WORKFLOW_MAX_VARIABLE_BYTES, + P2P_SCRIPT_MACHINE_OUTPUT_KIND, +} from './p2p-workflow-constants.js'; +import { makeP2pWorkflowDiagnostic, makeP2pWorkflowWarning, type P2pWorkflowDiagnostic } from './p2p-workflow-diagnostics.js'; +import type { P2pScriptMachineOutputFrame, P2pScriptNodeContract, P2pWorkflowVariableValue } from './p2p-workflow-types.js'; +import { isP2pArtifactRelativePath } from './p2p-workflow-artifacts.js'; + +export type P2pScriptContractValidationResult = + | { ok: true; contract: P2pScriptNodeContract; diagnostics: P2pWorkflowDiagnostic[] } + | { ok: false; diagnostics: P2pWorkflowDiagnostic[] }; + +export type P2pScriptMachineOutputParseResult = + | { ok: true; frames: P2pScriptMachineOutputFrame[]; finalFrame: P2pScriptMachineOutputFrame; diagnostics: P2pWorkflowDiagnostic[]; truncated?: boolean } + | { ok: false; diagnostics: P2pWorkflowDiagnostic[]; truncated?: boolean }; + +export type P2pScriptMachineOutputParseMode = 'lenient_last_valid' | 'strict'; + +export interface P2pScriptMachineOutputParseOptions { + mode?: P2pScriptMachineOutputParseMode; + maxTotalBytes?: number; + maxFrameBytes?: number; + requiredFields?: Array<'routingKey' | 'variables' | 'artifacts'>; +} + +export const DEFAULT_P2P_SCRIPT_CAPS: Required> = { + stdinBytes: P2P_SCRIPT_DEFAULT_STDIN_MAX_BYTES, + stdoutBytes: P2P_SCRIPT_DEFAULT_STDOUT_MAX_BYTES, + stderrBytes: P2P_SCRIPT_DEFAULT_STDERR_MAX_BYTES, + machineOutputBytes: P2P_SCRIPT_DEFAULT_MACHINE_OUTPUT_MAX_BYTES, +}; + +export const DEFAULT_P2P_SCRIPT_MACHINE_OUTPUT_FRAME_MAX_BYTES = 16 * 1024; + +function isRecord(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value); +} + +function byteLength(value: string): number { + return new TextEncoder().encode(value).byteLength; +} + +/** + * Slice a string to at most `maxBytes` UTF-8 bytes WITHOUT splitting a + * multi-byte character. Used by lenient mode to truncate machine output + * before walking back to the last `\n` boundary. + */ +function byteSlice(value: string, maxBytes: number): string { + const encoder = new TextEncoder(); + const decoder = new TextDecoder('utf-8', { fatal: false }); + const encoded = encoder.encode(value); + if (encoded.byteLength <= maxBytes) return value; + // Decode the prefix; TextDecoder's non-fatal mode returns U+FFFD for any + // partial multi-byte sequence at the tail. We then strip the trailing + // replacement character so a downstream `lastIndexOf('\n')` is unaffected. + let decoded = decoder.decode(encoded.slice(0, maxBytes)); + while (decoded.endsWith('�')) decoded = decoded.slice(0, -1); + return decoded; +} + +export function validateP2pScriptContract(input: unknown, fieldPath = 'script'): P2pScriptContractValidationResult { + if (!isRecord(input)) return invalidScriptContract(fieldPath); + + const commandKind = input.commandKind ?? 'argv'; + if (commandKind !== 'argv' && commandKind !== 'interpreter') { + return invalidScriptContract(`${fieldPath}.commandKind`); + } + if (!Array.isArray(input.argv) || input.argv.length === 0 || typeof input.argv[0] !== 'string' || input.argv[0] === '') { + return invalidScriptContract(`${fieldPath}.argv`); + } + if (input.argv.some((entry) => typeof entry !== 'string')) { + return invalidScriptContract(`${fieldPath}.argv`); + } + if (commandKind === 'interpreter' && (typeof input.interpreter !== 'string' || input.interpreter === '')) { + return invalidScriptContract(`${fieldPath}.interpreter`); + } + + const caps = normalizeScriptCaps(input.caps); + if (!caps) return invalidScriptContract(`${fieldPath}.caps`); + if (typeof input.stdin === 'string' && byteLength(input.stdin) > caps.stdinBytes) { + return invalidScriptContract(`${fieldPath}.stdin`); + } + if (Array.isArray(input.envAllowlist) && !input.envAllowlist.every((entry) => isSafeEnvironmentName(entry))) { + return invalidScriptContract(`${fieldPath}.envAllowlist`); + } + const timeoutMs = input.timeoutMs; + if (timeoutMs !== undefined && (typeof timeoutMs !== 'number' || !Number.isInteger(timeoutMs) || timeoutMs <= 0)) { + return invalidScriptContract(`${fieldPath}.timeoutMs`); + } + + const contract: P2pScriptNodeContract = { + commandKind, + argv: [...input.argv], + ...(commandKind === 'interpreter' ? { interpreter: input.interpreter as string } : {}), + ...(typeof input.stdin === 'string' ? { stdin: input.stdin } : {}), + ...(Array.isArray(input.envAllowlist) && input.envAllowlist.every((entry) => typeof entry === 'string') ? { envAllowlist: [...input.envAllowlist] } : {}), + ...(typeof input.requiredMachineOutput === 'boolean' ? { requiredMachineOutput: input.requiredMachineOutput } : {}), + ...(typeof input.timeoutMs === 'number' ? { timeoutMs: input.timeoutMs } : {}), + caps, + }; + return { ok: true, contract, diagnostics: [] }; +} + +export function parseP2pScriptMachineOutput( + input: string, + options: P2pScriptMachineOutputParseOptions = {}, +): P2pScriptMachineOutputParseResult { + const mode = options.mode ?? 'lenient_last_valid'; + const maxTotalBytes = options.maxTotalBytes ?? DEFAULT_P2P_SCRIPT_CAPS.machineOutputBytes; + const maxFrameBytes = options.maxFrameBytes ?? DEFAULT_P2P_SCRIPT_MACHINE_OUTPUT_FRAME_MAX_BYTES; + const totalBytes = byteLength(input); + // Audit:R3 PR-β / M-3 / V-2 — over-cap behavior depends on mode: + // - strict: reject the entire run (preserves "all frames must validate" + // invariant). Long-running streaming scripts must opt out of strict. + // - lenient_last_valid: TRUNCATE at the last `\n` boundary <= cap and + // continue parsing; emit `truncated: true`. Truncating at byte cap + // would split a frame mid-JSON; line-boundary truncation preserves + // parser invariants. spec.md §Server / web size limits clause. + let truncated = false; + let parseInput = input; + if (totalBytes > maxTotalBytes) { + if (mode === 'strict') { + return invalidMachineOutput(`machine output exceeds total byte cap (${totalBytes}/${maxTotalBytes}).`); + } + // Lenient: byte-truncate first, then walk back to last `\n` boundary so + // we never split a JSON frame. If no newline exists below cap, drop all + // input (no valid frames could have completed before the cap). + const truncatedBytes = byteSlice(input, maxTotalBytes); + const lastNewline = truncatedBytes.lastIndexOf('\n'); + parseInput = lastNewline >= 0 ? truncatedBytes.slice(0, lastNewline + 1) : ''; + truncated = true; + } + + const frames: P2pScriptMachineOutputFrame[] = []; + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (truncated) { + diagnostics.push(makeP2pWorkflowWarning('script_machine_output_invalid', 'execute', { + summary: `machine output truncated at ${maxTotalBytes} bytes; some trailing frames discarded.`, + })); + } + const lines = parseInput.split(/\r?\n/).filter((line) => line.trim() !== ''); + for (const [index, line] of lines.entries()) { + const lineNumber = index + 1; + const frameBytes = byteLength(line); + if (frameBytes > maxFrameBytes) { + const diagnostic = machineOutputDiagnostic(`line ${lineNumber} exceeds frame byte cap (${frameBytes}/${maxFrameBytes}).`, mode); + if (mode === 'strict') return { ok: false, diagnostics: [diagnostic] }; + diagnostics.push(diagnostic); + continue; + } + let parsed: unknown; + try { + parsed = JSON.parse(line); + } catch { + const diagnostic = machineOutputDiagnostic(`line ${lineNumber} is not valid JSON.`, mode); + if (mode === 'strict') return { ok: false, diagnostics: [diagnostic] }; + diagnostics.push(diagnostic); + continue; + } + if (!isP2pScriptMachineOutputFrame(parsed) || !hasRequiredFields(parsed, options.requiredFields ?? [])) { + const diagnostic = machineOutputDiagnostic(`line ${lineNumber} is not a valid ${P2P_SCRIPT_MACHINE_OUTPUT_KIND} frame.`, mode); + if (mode === 'strict') return { ok: false, diagnostics: [diagnostic] }; + diagnostics.push(diagnostic); + continue; + } + frames.push(parsed); + } + const finalFrame = frames.length > 0 ? frames[frames.length - 1] : undefined; + if (!finalFrame) { + const result = invalidMachineOutput('no valid machine output frames were found.'); + return truncated ? { ...result, truncated: true } : result; + } + return truncated + ? { ok: true, frames, finalFrame, diagnostics, truncated: true } + : { ok: true, frames, finalFrame, diagnostics }; +} + +function isP2pScriptMachineOutputFrame(value: unknown): value is P2pScriptMachineOutputFrame { + if (!isRecord(value) || value.kind !== P2P_SCRIPT_MACHINE_OUTPUT_KIND) return false; + if (value.status !== undefined && value.status !== 'ok' && value.status !== 'fail') return false; + if (value.routingKey !== undefined && typeof value.routingKey !== 'string') return false; + if (value.displaySummary !== undefined && typeof value.displaySummary !== 'string') return false; + if (value.variables !== undefined && !isVariablesRecord(value.variables)) return false; + if (value.artifacts !== undefined && !isArtifactOutputArray(value.artifacts)) return false; + return true; +} + +function isVariablesRecord(value: unknown): value is Record { + if (!isRecord(value)) return false; + return Object.entries(value).every(([key, entry]) => /^[a-z][a-z0-9_]{0,63}$/.test(key) && + isWorkflowVariableValue(entry) && + byteLength(JSON.stringify(entry)) <= P2P_WORKFLOW_MAX_VARIABLE_BYTES); +} + +function isWorkflowVariableValue(value: unknown): value is P2pWorkflowVariableValue { + return typeof value === 'string' || + typeof value === 'number' || + typeof value === 'boolean' || + (Array.isArray(value) && value.every((entry) => typeof entry === 'string')); +} + +function isArtifactOutputArray(value: unknown): value is P2pScriptMachineOutputFrame['artifacts'] { + return Array.isArray(value) && value.every((entry) => { + if (!isRecord(entry) || typeof entry.path !== 'string' || !isP2pArtifactRelativePath(entry.path)) return false; + return entry.sha256 === undefined || typeof entry.sha256 === 'string'; + }); +} + +function normalizeScriptCaps(value: unknown): Required> | null { + if (value === undefined) return { ...DEFAULT_P2P_SCRIPT_CAPS }; + if (!isRecord(value)) return null; + const caps = { ...DEFAULT_P2P_SCRIPT_CAPS }; + for (const key of Object.keys(value)) { + if (!(key in caps)) return null; + const capValue = value[key]; + if (!Number.isInteger(capValue) || (capValue as number) < 0) return null; + caps[key as keyof typeof caps] = capValue as number; + } + return caps; +} + +function isSafeEnvironmentName(value: unknown): value is string { + return typeof value === 'string' && /^[A-Z_][A-Z0-9_]{0,127}$/.test(value); +} + +function hasRequiredFields(frame: P2pScriptMachineOutputFrame, fields: Array<'routingKey' | 'variables' | 'artifacts'>): boolean { + return fields.every((field) => frame[field] !== undefined); +} + +function invalidScriptContract(fieldPath: string): P2pScriptContractValidationResult { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('invalid_script_contract', 'compile', { fieldPath })] }; +} + +function invalidMachineOutput(summary: string): P2pScriptMachineOutputParseResult { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('script_machine_output_invalid', 'execute', { summary })] }; +} + +function machineOutputDiagnostic(summary: string, mode: P2pScriptMachineOutputParseMode): P2pWorkflowDiagnostic { + return mode === 'strict' + ? makeP2pWorkflowDiagnostic('script_machine_output_invalid', 'execute', { summary }) + : makeP2pWorkflowWarning('script_machine_output_invalid', 'execute', { summary }); +} diff --git a/shared/p2p-workflow-types.ts b/shared/p2p-workflow-types.ts new file mode 100644 index 000000000..81ee3debc --- /dev/null +++ b/shared/p2p-workflow-types.ts @@ -0,0 +1,378 @@ +import type { + P2pArtifactConvention, + P2pEdgeConditionKind, + P2pEdgeKind, + P2pNodeDispatchStyle, + P2pNodeKind, + P2pPermissionScope, + P2pPresetKey, + P2pStartContextSourceKind, + P2pWorkflowKind, +} from './p2p-workflow-constants.js'; +import type { P2pWorkflowDiagnostic } from './p2p-workflow-diagnostics.js'; +import type { P2pAdvancedRound, P2pContextReducerConfig } from './p2p-advanced.js'; + +export type P2pJsonPrimitive = string | number | boolean | null; +export type P2pJsonValue = P2pJsonPrimitive | P2pJsonValue[] | { [key: string]: P2pJsonValue }; +export type P2pWorkflowVariableValue = string | number | boolean | string[]; + +export interface P2pLegacyLaunchConfig { + modeOverride?: string; + rounds?: number; + hopTimeoutMinutes?: number; +} + +export interface P2pOldAdvancedLaunchConfig { + advancedPresetKey?: string; + advancedRounds?: Array>; + advancedRunTimeoutMinutes?: number; + contextReducer?: Record | null; +} + +export interface P2pWorkflowLaunchContext { + requestId?: string; + runId?: string; + sessionName?: string; + projectRoot?: string; + userText?: string; + locale?: string; +} + +export interface P2pWorkflowLaunchEnvelope { + workflowSchemaVersion: 1; + workflowKind: P2pWorkflowKind; + legacy?: P2pLegacyLaunchConfig; + advancedDraft?: P2pWorkflowDraft; + oldAdvanced?: P2pOldAdvancedLaunchConfig; + migrationPolicy?: { kind: 'materialize_old_advanced' }; + requiredDaemonCapabilities?: string[]; + /** + * Audit:R3 PR-γ / N-M5 / V-4 — policy hash carried by a preview/saved + * workflow at the time it was compiled. The daemon recompiles every launch + * with its CURRENT `P2pStaticPolicy`; if the hash differs from the saved + * value, the daemon emits a warning-severity + * `static_policy_mismatch_recompiled` diagnostic so the caller knows the + * preview's compilation is stale. ASCII string, ≤128 bytes. + */ + expectedStaticPolicyHash?: string; + launchContext?: P2pWorkflowLaunchContext; + /** + * R3 PR-α follow-up — Per-launch script executable allowlist. Configured + * by the user in the web UI (`P2pConfigPanel` → "Allowed executables") + * and round-tripped through `P2pSavedConfig.allowedExecutables` so the + * same list applies to every advanced launch from that config. + * + * Daemon merges these entries into `P2pStaticPolicy.allowedExecutables` + * during `prepareAdvancedWorkflowLaunch` (no daemon-side hand-edited + * config file — IM.codes is UI-driven). Each entry MUST be a non-empty + * visible-ASCII string ≤256 bytes; the array itself is capped at 64 + * entries. Empty list means script bind rejects every executable with + * `script_executable_denied`. + */ + allowedExecutables?: string[]; +} + +export interface P2pWorkflowDraft { + schemaVersion: 1; + id: string; + title?: string; + nodes: P2pWorkflowNodeDraft[]; + edges: P2pWorkflowEdgeDraft[]; + rootNodeId?: string; + startContext?: P2pWorkflowStartContext; + variables?: P2pWorkflowVariableDefinition[]; + loopBudgets?: Record; +} + +export interface P2pWorkflowNodeDraft { + id: string; + title?: string; + nodeKind: P2pNodeKind; + preset: P2pPresetKey; + dispatchStyle?: P2pNodeDispatchStyle; + permissionScope?: P2pPermissionScope; + promptAppend?: string; + timeoutMs?: number; + routingAuthority?: P2pRoutingAuthority; + script?: P2pScriptNodeContract; + /** R3 v1b follow-up — logic node contract; see `P2pLogicNodeContract`. */ + logic?: P2pLogicNodeContract; + artifacts?: P2pArtifactContract[]; +} + +export interface P2pWorkflowEdgeDraft { + id: string; + fromNodeId: string; + toNodeId: string; + edgeKind: P2pEdgeKind; + condition?: P2pWorkflowEdgeCondition; +} + +export interface P2pWorkflowEdgeCondition { + kind: P2pEdgeConditionKind; + equals: string; +} + +export type P2pRoutingAuthority = + | { kind: 'none' } + | { kind: 'audit_verdict_marker'; allowedMarkers: string[] } + | { kind: 'logic_marker'; allowedMarkers: string[] } + | { kind: 'script_routing_key'; allowedKeys: string[] }; + +export interface P2pWorkflowStartContext { + sources: P2pWorkflowStartContextSource[]; + maxTotalBytes?: number; +} + +export interface P2pWorkflowStartContextSource { + kind: P2pStartContextSourceKind; + id: string; + path?: string; + maxBytes?: number; + missingBehavior?: 'fail' | 'skip'; + binaryBehavior?: 'fail' | 'skip'; + order?: number; + discussionOffset?: { + byteOffset: number; + sha256Prefix: string; + sizeAtOffset: number; + }; +} + +export interface P2pWorkflowVariableDefinition { + name: string; + value: P2pWorkflowVariableValue; +} + +export interface P2pStaticPolicy { + policyVersion: 1; + maxNodes: number; + maxEdges: number; + maxLoopBudget: number; + allowedExecutables: string[]; + allowInterpreterScripts: boolean; + allowOpenSpecArtifacts: boolean; + allowImplementationPermission: boolean; + maxPromptAppendBytes: number; + /** + * Daemon-side concurrency caps. The daemon admission path MUST read these + * values rather than hardcoded constants, so the cap is governed by the + * single P2pStaticPolicy source rather than scattered literals. + */ + concurrency: { + maxAdvancedRuns: number; + maxScripts: number; + }; + policyHash?: string; +} + +export interface P2pCompiledWorkflow { + schemaVersion: 1; + workflowId: string; + rootNodeId: string; + nodes: P2pCompiledNode[]; + edges: P2pCompiledEdge[]; + variables: P2pWorkflowVariableDefinition[]; + loopBudgets: Record; + derivedRequiredCapabilities: string[]; + staticPolicyHash: string; + workflowContractHash: string; + diagnostics: P2pWorkflowDiagnostic[]; +} + +export interface P2pCompiledNode { + id: string; + title?: string; + nodeKind: P2pNodeKind; + preset: P2pPresetKey; + dispatchStyle?: P2pNodeDispatchStyle; + permissionScope: P2pPermissionScope; + promptAppend?: string; + routingAuthority: P2pRoutingAuthority; + script?: P2pScriptNodeContract; + /** + * R3 v1b follow-up — Logic node contract. When `nodeKind === 'logic'`, + * the executor evaluates `logic.rules` against `run.variables` (initialized + * from `compiled.variables` and patched by script nodes' machine output + * frames) and emits the matching `emit` marker. Conditional outgoing + * edges with `condition.kind === 'logic_marker_equals'` are then matched + * against the emitted marker. + * + * The evaluator is intentionally minimal — declarative rules over + * variable equality / presence — to keep the logic node sandboxed + * without a full expression interpreter. + */ + logic?: P2pLogicNodeContract; + artifacts: P2pArtifactContract[]; +} + +/** + * Declarative logic-node contract. Each rule is checked in declaration + * order; the first rule whose `if` clause matches drives the emitted + * marker. If no rule matches, `default` is emitted. `if: undefined` is an + * always-match rule (useful as the trailing rule before `default`, or as + * a single rule that emits unconditionally). + * + * Allowed `if` shapes (kept tiny on purpose): + * - `{ kind: 'variable_equals', name, equals }` — variable's stringified + * value === `equals` + * - `{ kind: 'variable_present', name }` — variable is defined and + * non-null + * - `{ kind: 'variable_truthy', name }` — variable is truthy in the + * usual JS sense (non-empty string, non-zero number, true, non-empty array) + * + * `emit` and `default` MUST be visible-ASCII strings ≤128 bytes. The + * compiler caps `rules.length` at 32 per node. + */ +export interface P2pLogicNodeContract { + rules: P2pLogicRule[]; + default: string; +} + +export type P2pLogicRule = + | { if?: undefined; emit: string } + | { if: { kind: 'variable_equals'; name: string; equals: string }; emit: string } + | { if: { kind: 'variable_present'; name: string }; emit: string } + | { if: { kind: 'variable_truthy'; name: string }; emit: string }; + +export interface P2pCompiledEdge extends P2pWorkflowEdgeDraft {} + +export interface P2pBindRuntimeContext { + runId: string; + requestId?: string; + repoRoot: string; + participants: Array<{ sessionName: string; roleLabel?: string; agentType?: string }>; + launchScope: { serverId?: string; projectId?: string; sessionName?: string }; + /** + * Capability advertisement snapshot at bind time. The `capabilities` array + * is the daemon's most recent `daemon.hello` payload. Used by both + * `getMissingP2pWorkflowCapabilities` (bind-time check) and + * `recheckDangerousNodeCapabilities` (executor-time recheck). + */ + capabilitySnapshot: { + daemonId: string; + capabilities: string[]; + helloEpoch: number; + sentAt: number; + }; + /** + * Audit:R2-Cx1-4 / R3 PR-α — policy snapshot at bind time, full + * `P2pStaticPolicy` shape (NOT an ad-hoc subset). This lets + * `recheckDangerousNodeCapabilities`, `validateCompiledWorkflowAgainstBindPolicy`, + * and any future executor compare bound policy vs current daemon policy + * field-for-field (allowedExecutables, allow flags, concurrency caps). + * + * The previous `currentDaemonPolicy: { allowScript, allowImplementation, ...}` + * subset was structurally incompatible with the recheck helper signature + * — see audit findings A1 / N-M1. + */ + policySnapshot: P2pStaticPolicy; + concurrencyAdmission: { accepted: boolean; reason?: 'daemon_busy' }; + artifactRuntime?: { rootDir: string }; +} + +export interface P2pBoundWorkflow { + compiled: P2pCompiledWorkflow; + bindContext: P2pBindRuntimeContext; + diagnostics: P2pWorkflowDiagnostic[]; +} + +export type P2pBindFailureReason = + | 'daemon_busy' + | 'missing_required_capability' + | 'capability_stale'; + +export type P2pBindResult = + | { ok: true; bound: P2pBoundWorkflow; diagnostics: P2pWorkflowDiagnostic[] } + | { ok: false; reason: P2pBindFailureReason; diagnostics: P2pWorkflowDiagnostic[] }; + +/** + * Discriminated union describing how `startP2pRun` was asked to execute the + * advanced phase of a P2P run. v1a accepts two kinds: + * + * - `envelope_compiled`: the advanced rounds came from a fully validated + * `P2pWorkflowLaunchEnvelope` that was compiled and bound by + * `prepareAdvancedWorkflowLaunch`. The orchestrator MUST surface + * `bound.bindContext.capabilitySnapshot` and + * `bound.bindContext.policySnapshot` on the run state so dangerous + * nodes can `recheckDangerousNodeCapabilities` against the snapshot vs the + * live policy. This is the production user-facing path. + * + * - `supervision_internal`: the rounds were synthesised by + * `supervision-automation.ts` for an automatic audit. They never come from + * user input and therefore do not pass through envelope validation. The + * discriminant tag exists to make the bypass explicit in source review and + * in static reverse-regression checks (rather than being detected by a + * filename heuristic). + * + * Older callers (cron, tests) may still pass `advancedRounds` / `advancedPresetKey` + * directly without `advanced`. v1a treats those as the legacy passthrough; v1b + * deletes the deprecated fields and makes `advanced` the only entry point. + */ +export type StartP2pRunAdvancedSource = + | { kind: 'envelope_compiled'; bound: P2pBoundWorkflow; advancedRounds: P2pAdvancedRound[]; advancedRunTimeoutMs?: number; contextReducer?: P2pContextReducerConfig } + | { kind: 'supervision_internal'; advancedRounds: P2pAdvancedRound[]; advancedPresetKey?: string; advancedRunTimeoutMs?: number }; + +export interface P2pWorkflowRuntimePrivateState { + runId: string; + boundWorkflow: P2pBoundWorkflow; + variables: Record; + rawNodeOutputs: Record; +} + +export interface P2pWorkflowStatusProjection { + projectionVersion: 1; + runId: string; + workflowId: string; + status: 'queued' | 'running' | 'blocked' | 'completed' | 'failed' | 'cancelled' | 'stale'; + currentNodeId?: string; + completedNodeIds: string[]; + diagnostics: P2pWorkflowDiagnostic[]; + capabilitySnapshot?: P2pBindRuntimeContext['capabilitySnapshot']; + updatedAt: string; + artifactSummaries?: Array<{ nodeId: string; path: string; status: 'pending' | 'changed' | 'unchanged' | 'failed' }>; + nodeSummaries?: Array<{ nodeId: string; status: string; summary?: string }>; +} + +export interface P2pPersistedWorkflowSnapshot { + projectionVersion: 1; + runId: string; + workflowId: string; + status: P2pWorkflowStatusProjection['status']; + currentNodeId?: string; + completedNodeIds: string[]; + diagnostics: P2pWorkflowDiagnostic[]; + updatedAt: string; +} + +export interface P2pScriptNodeContract { + commandKind: 'argv' | 'interpreter'; + argv: string[]; + interpreter?: string; + stdin?: string; + envAllowlist?: string[]; + requiredMachineOutput?: boolean; + timeoutMs?: number; + caps?: { + stdinBytes?: number; + stdoutBytes?: number; + stderrBytes?: number; + machineOutputBytes?: number; + }; +} + +export interface P2pScriptMachineOutputFrame { + kind: 'p2p_script_machine_output_v1'; + status?: 'ok' | 'fail'; + routingKey?: string; + variables?: Record; + artifacts?: Array<{ path: string; sha256?: string }>; + displaySummary?: string; +} + +export interface P2pArtifactContract { + convention: P2pArtifactConvention; + paths: string[]; + permissionScope?: P2pPermissionScope; + symlinkPolicy?: 'reject_all' | 'allow_existing_under_root'; +} diff --git a/shared/p2p-workflow-validators.ts b/shared/p2p-workflow-validators.ts new file mode 100644 index 000000000..d99972d9b --- /dev/null +++ b/shared/p2p-workflow-validators.ts @@ -0,0 +1,680 @@ +import { + P2P_EDGE_CONDITION_KINDS, + P2P_EDGE_KINDS, + P2P_ARTIFACT_CONVENTIONS, + P2P_FORBIDDEN_ENVELOPE_FIELD_NAMES, + P2P_NODE_DISPATCH_STYLES, + P2P_NODE_KINDS, + P2P_PERMISSION_SCOPES, + P2P_PRESET_KEYS, + P2P_ALLOWED_EXECUTABLE_MAX_BYTES, + P2P_ALLOWED_EXECUTABLE_PATTERN, + P2P_REQUEST_ID_ASCII_PATTERN, + P2P_START_CONTEXT_SOURCE_KINDS, + P2P_WORKFLOW_ARTIFACT_MAX_DEPTH, + P2P_WORKFLOW_ARTIFACT_MAX_FILES, + P2P_WORKFLOW_CAPABILITIES, + P2P_WORKFLOW_KINDS, + P2P_WORKFLOW_KNOWN_SCHEMA_MAX, + P2P_WORKFLOW_MAX_PROMPT_APPEND_BYTES, + P2P_WORKFLOW_MAX_VARIABLE_BYTES, + P2P_WORKFLOW_MAX_VARIABLES, + P2P_WORKFLOW_PROJECTION_VERSION, + P2P_WORKFLOW_SCHEMA_VERSION, + type P2pEdgeConditionKind, + type P2pNodeKind, + type P2pPermissionScope, + type P2pPresetKey, +} from './p2p-workflow-constants.js'; +import { makeP2pWorkflowDiagnostic, type P2pWorkflowDiagnostic } from './p2p-workflow-diagnostics.js'; +import { P2P_WORKFLOW_DIAGNOSTIC_CODES } from './p2p-workflow-diagnostics.js'; +import { getP2pArtifactPathDepth, isP2pArtifactRelativePath } from './p2p-workflow-artifacts.js'; +import { validateP2pScriptContract } from './p2p-workflow-script.js'; +import type { + P2pArtifactContract, + P2pPersistedWorkflowSnapshot, + P2pWorkflowStartContext, + P2pWorkflowStatusProjection, + P2pWorkflowDraft, + P2pWorkflowEdgeCondition, + P2pWorkflowEdgeDraft, + P2pWorkflowLaunchEnvelope, + P2pWorkflowNodeDraft, + P2pWorkflowVariableDefinition, + P2pWorkflowVariableValue, +} from './p2p-workflow-types.js'; + +export type P2pValidationResult = + | { ok: true; value: T; diagnostics: P2pWorkflowDiagnostic[] } + | { ok: false; diagnostics: P2pWorkflowDiagnostic[] }; + +const VARIABLE_NAME_RE = /^[a-z][a-z0-9_]{0,63}$/; +const FORBIDDEN_FIELD_SET = new Set(P2P_FORBIDDEN_ENVELOPE_FIELD_NAMES); +const FORBIDDEN_SCAN_MAX_DEPTH = 32; +const FORBIDDEN_SCAN_MAX_NODES = 5_000; +const FORBIDDEN_SCAN_MAX_ARRAY_ITEMS = 1_000; +const FORBIDDEN_SCAN_MAX_STRING_BYTES = 256 * 1024; +const SHORT_TEXT_MAX_BYTES = 4 * 1024; +const START_CONTEXT_SOURCE_MAX_BYTES = 512 * 1024; +const START_CONTEXT_TOTAL_MAX_BYTES = 1024 * 1024; +const DIAGNOSTIC_CODES = new Set(P2P_WORKFLOW_DIAGNOSTIC_CODES); + +function isRecord(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value); +} + +function isOneOf(value: unknown, values: T): value is T[number] { + return typeof value === 'string' && (values as readonly string[]).includes(value); +} + +function byteLength(value: string): number { + return new TextEncoder().encode(value).byteLength; +} + +function encodedJsonByteLength(value: unknown): number { + return byteLength(JSON.stringify(value)); +} + +function hasAnyOwn(record: Record, keys: readonly string[]): boolean { + return keys.some((key) => Object.prototype.hasOwnProperty.call(record, key)); +} + +export function findForbiddenEnvelopeField( + value: unknown, + path = '', + state: { depth: number; nodes: number; visited: WeakSet } = { depth: 0, nodes: 0, visited: new WeakSet() }, +): string | null { + if (typeof value === 'string') return byteLength(value) > FORBIDDEN_SCAN_MAX_STRING_BYTES ? path || '$' : null; + if (!isRecord(value) && !Array.isArray(value)) return null; + if (state.visited.has(value)) return null; + state.visited.add(value); + state.nodes += 1; + if (state.depth > FORBIDDEN_SCAN_MAX_DEPTH || state.nodes > FORBIDDEN_SCAN_MAX_NODES) return path || '$'; + if (Array.isArray(value)) { + if (value.length > FORBIDDEN_SCAN_MAX_ARRAY_ITEMS) return path || '$'; + for (let index = 0; index < value.length; index += 1) { + const previousDepth = state.depth; + state.depth = previousDepth + 1; + const nested = findForbiddenEnvelopeField(value[index], `${path}[${index}]`, state); + state.depth = previousDepth; + if (nested) return nested; + } + return null; + } + for (const key of Object.keys(value)) { + const normalizedKey = key.toLowerCase(); + if ( + FORBIDDEN_FIELD_SET.has(key) || + normalizedKey.endsWith('token') || + normalizedKey.endsWith('secret') || + normalizedKey.endsWith('apikey') || + normalizedKey === 'env' || + normalizedKey === 'environment' + ) { + return path ? `${path}.${key}` : key; + } + const previousDepth = state.depth; + state.depth = previousDepth + 1; + const nested = findForbiddenEnvelopeField(value[key], path ? `${path}.${key}` : key, state); + state.depth = previousDepth; + if (nested) return nested; + } + return null; +} + +export function hasOldAdvancedFields(value: unknown): boolean { + return isRecord(value) && hasAnyOwn(value, ['advancedPresetKey', 'advancedRounds', 'advancedRunTimeoutMinutes', 'contextReducer', 'oldAdvanced']); +} + +export function hasNewWorkflowFields(value: unknown): boolean { + return isRecord(value) && hasAnyOwn(value, [ + 'workflowSchemaVersion', + 'workflowKind', + 'advancedDraft', + 'launchContext', + 'requiredDaemonCapabilities', + // Audit:R3 PR-γ — `expectedStaticPolicyHash` is a v1a envelope field that + // marks a launch as "compiled against a known static policy". Including it + // here ensures `migrate` paths see the field and don't classify the + // envelope as legacy. + 'expectedStaticPolicyHash', + ]); +} + +export function validateP2pWorkflowLaunchEnvelope(input: unknown): P2pValidationResult { + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (!isRecord(input)) { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { summary: 'Envelope must be an object.' })] }; + } + + const forbiddenField = findForbiddenEnvelopeField(input); + if (forbiddenField) { + return { + ok: false, + diagnostics: [makeP2pWorkflowDiagnostic('forbidden_envelope_field', 'parse', { fieldPath: forbiddenField })], + }; + } + + const oldAdvancedAtTop = hasAnyOwn(input, ['advancedPresetKey', 'advancedRounds', 'advancedRunTimeoutMinutes', 'contextReducer']); + const oldAdvancedNested = isRecord(input.oldAdvanced); + const newWorkflow = hasNewWorkflowFields(input) || isRecord(input.advancedDraft); + const hasOldOnlyInput = oldAdvancedAtTop || oldAdvancedNested; + if (hasOldOnlyInput && newWorkflow && !oldAdvancedNested) { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('mixed_advanced_schema_fields', 'parse')] }; + } + if (hasOldOnlyInput && isRecord(input.advancedDraft)) { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('mixed_advanced_schema_fields', 'parse')] }; + } + + const version = input.workflowSchemaVersion; + if (version !== P2P_WORKFLOW_SCHEMA_VERSION) { + if (typeof version === 'number' && version > P2P_WORKFLOW_KNOWN_SCHEMA_MAX) { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('unsupported_schema_version', 'parse')] }; + } + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { summary: 'Unsupported or missing workflow schema version.' })] }; + } + + if (!isOneOf(input.workflowKind, P2P_WORKFLOW_KINDS)) { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: 'workflowKind' })] }; + } + if (input.requiredDaemonCapabilities !== undefined) { + diagnostics.push(...validateP2pRequiredDaemonCapabilities(input.requiredDaemonCapabilities, 'requiredDaemonCapabilities')); + } + if (input.expectedStaticPolicyHash !== undefined) { + // Audit:R3 PR-δ (A6 / Cu1-M2 / Cx1-R2-6) — implementation MUST match + // the comment "string ≤128 ASCII bytes". Previously only JS string + // length was checked; multi-byte characters could pass at 128 code + // units (≈384 bytes). Now we enforce the visible-ASCII pattern (same + // as `P2P_REQUEST_ID_ASCII_PATTERN`) AND the UTF-8 byte length cap. + // The pattern already restricts to single-byte ASCII so the byte cap + // is technically redundant, but the explicit `TextEncoder` check + // protects against future pattern relaxation. + const hash = input.expectedStaticPolicyHash; + let bytes = 0; + if (typeof hash === 'string') { + try { + bytes = new TextEncoder().encode(hash).byteLength; + } catch { + bytes = Number.POSITIVE_INFINITY; + } + } + if ( + typeof hash !== 'string' + || hash.length === 0 + || hash.length > 128 + || !P2P_REQUEST_ID_ASCII_PATTERN.test(hash) + || bytes > 128 + ) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: 'expectedStaticPolicyHash' })); + } + } + if (input.launchContext !== undefined) { + diagnostics.push(...validateP2pWorkflowLaunchContext(input.launchContext, 'launchContext')); + } + if (input.migrationPolicy !== undefined) { + if (!isRecord(input.migrationPolicy) || input.migrationPolicy.kind !== 'materialize_old_advanced' || !isRecord(input.oldAdvanced)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: 'migrationPolicy' })); + } + } + if (input.allowedExecutables !== undefined) { + // R3 PR-α follow-up — UI-driven allowlist on the envelope. + // - Must be an array + // - ≤64 entries + // - Each entry must be a non-empty visible-ASCII string ≤256 bytes + // - No duplicates (post-validation the daemon dedupes anyway, but the + // envelope shape SHOULD round-trip cleanly to/from the UI) + if (!Array.isArray(input.allowedExecutables) || input.allowedExecutables.length > 64) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: 'allowedExecutables' })); + } else { + const seen = new Set(); + const encoder = new TextEncoder(); + input.allowedExecutables.forEach((entry, index) => { + // R3 v2 PR-ζ (Cx1-A6 / ζ-14) — pattern is visible-ASCII (no + // length cap baked in), and the 256-byte limit is applied via + // `TextEncoder.byteLength` so the comment's "≤256 bytes" intent + // matches reality. Previous implementation reused the requestId + // pattern (capped at 128 chars), so entries 129–256 chars + // failed validation despite the documented 256-byte cap. + if (typeof entry !== 'string' + || entry.length === 0 + || encoder.encode(entry).byteLength > P2P_ALLOWED_EXECUTABLE_MAX_BYTES + || !P2P_ALLOWED_EXECUTABLE_PATTERN.test(entry)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: `allowedExecutables[${index}]` })); + return; + } + if (seen.has(entry)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: `allowedExecutables[${index}]`, summary: 'Duplicate allowedExecutables entry.' })); + return; + } + seen.add(entry); + }); + } + } + + if (input.advancedDraft !== undefined) { + const draftResult = validateP2pWorkflowDraft(input.advancedDraft); + diagnostics.push(...draftResult.diagnostics); + if (!draftResult.ok) return { ok: false, diagnostics }; + } + + if (diagnostics.some((diagnostic) => diagnostic.severity === 'error')) { + return { ok: false, diagnostics }; + } + return { ok: true, value: input as unknown as P2pWorkflowLaunchEnvelope, diagnostics }; +} + +export function validateP2pWorkflowDraft(input: unknown): P2pValidationResult { + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (!isRecord(input)) { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { summary: 'Draft must be an object.' })] }; + } + if (input.schemaVersion !== P2P_WORKFLOW_SCHEMA_VERSION) { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('unsupported_schema_version', 'compile', { fieldPath: 'schemaVersion' })] }; + } + if (typeof input.id !== 'string' || input.id.trim() === '') { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: 'id' })); + } + if (!Array.isArray(input.nodes) || input.nodes.length === 0) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: 'nodes' })); + } else { + for (const [index, node] of input.nodes.entries()) { + diagnostics.push(...validateNodeDraft(node, `nodes[${index}]`)); + } + } + if (!Array.isArray(input.edges)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: 'edges' })); + } else { + for (const [index, edge] of input.edges.entries()) { + diagnostics.push(...validateEdgeDraft(edge, `edges[${index}]`)); + } + } + if (input.variables !== undefined) { + if (!Array.isArray(input.variables)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_variable', 'compile', { fieldPath: 'variables' })); + } else { + diagnostics.push(...validateP2pWorkflowVariables(input.variables)); + } + } + if (input.startContext !== undefined) { + diagnostics.push(...validateP2pWorkflowStartContext(input.startContext, 'startContext')); + } + return diagnostics.some((diagnostic) => diagnostic.severity === 'error') + ? { ok: false, diagnostics } + : { ok: true, value: input as unknown as P2pWorkflowDraft, diagnostics }; +} + +export function validateP2pWorkflowVariables(input: unknown[]): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (input.length > P2P_WORKFLOW_MAX_VARIABLES) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_variable', 'compile', { summary: 'Too many workflow variables.' })); + } + const seen = new Set(); + for (const [index, rawVariable] of input.entries()) { + if (!isRecord(rawVariable)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_variable', 'compile', { fieldPath: `variables[${index}]` })); + continue; + } + const variable = rawVariable as Partial; + if (typeof variable.name !== 'string' || !VARIABLE_NAME_RE.test(variable.name)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_variable', 'compile', { fieldPath: `variables[${index}].name` })); + } else if (seen.has(variable.name)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_variable', 'compile', { fieldPath: `variables[${index}].name`, summary: 'Duplicate workflow variable.' })); + } else { + seen.add(variable.name); + } + if (!isP2pWorkflowVariableValue(variable.value)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_variable', 'compile', { fieldPath: `variables[${index}].value` })); + } else if (encodedJsonByteLength(variable.value) > P2P_WORKFLOW_MAX_VARIABLE_BYTES) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_variable', 'compile', { fieldPath: `variables[${index}].value`, summary: 'Workflow variable exceeds byte limit.' })); + } + } + return diagnostics; +} + +export function isP2pWorkflowVariableValue(value: unknown): value is P2pWorkflowVariableValue { + return typeof value === 'string' || + typeof value === 'number' || + typeof value === 'boolean' || + (Array.isArray(value) && value.every((entry) => typeof entry === 'string')); +} + +export function validateNodeDraft(input: unknown, fieldPath: string): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (!isRecord(input)) { + return [makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath })]; + } + const node = input as Partial; + if (typeof node.id !== 'string' || node.id.trim() === '') { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `${fieldPath}.id` })); + } + if (!isOneOf(node.nodeKind, P2P_NODE_KINDS)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `${fieldPath}.nodeKind` })); + } + if (!isOneOf(node.preset, P2P_PRESET_KEYS)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `${fieldPath}.preset` })); + } + if (node.dispatchStyle !== undefined && !isOneOf(node.dispatchStyle, P2P_NODE_DISPATCH_STYLES)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `${fieldPath}.dispatchStyle` })); + } + if (node.permissionScope !== undefined && !isOneOf(node.permissionScope, P2P_PERMISSION_SCOPES)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `${fieldPath}.permissionScope` })); + } + diagnostics.push(...validateNodeCombination(node, fieldPath)); + if (typeof node.promptAppend === 'string' && byteLength(node.promptAppend) > P2P_WORKFLOW_MAX_PROMPT_APPEND_BYTES) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_prompt_append', 'compile', { fieldPath: `${fieldPath}.promptAppend` })); + } + if (node.nodeKind === 'script') { + diagnostics.push(...validateP2pScriptNodeContract(node.script, `${fieldPath}.script`)); + } else if (node.script !== undefined) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_script_contract', 'compile', { fieldPath: `${fieldPath}.script` })); + } + if (node.artifacts !== undefined) { + if (!Array.isArray(node.artifacts)) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'compile', { fieldPath: `${fieldPath}.artifacts` })); + } else { + for (const [index, artifact] of node.artifacts.entries()) { + diagnostics.push(...validateP2pArtifactContract(artifact, `${fieldPath}.artifacts[${index}]`)); + } + } + } + return diagnostics; +} + +export function validateEdgeDraft(input: unknown, fieldPath: string): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (!isRecord(input)) { + return [makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath })]; + } + const edge = input as Partial; + for (const key of ['id', 'fromNodeId', 'toNodeId'] as const) { + if (typeof edge[key] !== 'string' || edge[key]?.trim() === '') { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `${fieldPath}.${key}` })); + } + } + if (!isOneOf(edge.edgeKind, P2P_EDGE_KINDS)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { fieldPath: `${fieldPath}.edgeKind` })); + } + if (edge.edgeKind === 'conditional') { + diagnostics.push(...validateEdgeCondition(edge.condition, `${fieldPath}.condition`)); + } + return diagnostics; +} + +function validateEdgeCondition(input: unknown, fieldPath: string): P2pWorkflowDiagnostic[] { + if (!isRecord(input)) return [makeP2pWorkflowDiagnostic('invalid_edge_condition', 'compile', { fieldPath })]; + const condition = input as Partial; + if (!isOneOf(condition.kind, P2P_EDGE_CONDITION_KINDS)) { + return [makeP2pWorkflowDiagnostic('invalid_edge_condition', 'compile', { fieldPath: `${fieldPath}.kind` })]; + } + if (typeof condition.equals !== 'string' || condition.equals === '') { + return [makeP2pWorkflowDiagnostic('invalid_edge_condition', 'compile', { fieldPath: `${fieldPath}.equals` })]; + } + return []; +} + +export function validateP2pScriptNodeContract(input: unknown, fieldPath = 'script'): P2pWorkflowDiagnostic[] { + const result = validateP2pScriptContract(input, fieldPath); + return result.diagnostics; +} + +export function validateP2pArtifactContract(input: unknown, fieldPath = 'artifact'): P2pWorkflowDiagnostic[] { + if (!isRecord(input)) return [makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'compile', { fieldPath })]; + const artifact = input as Partial; + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (!isOneOf(artifact.convention, P2P_ARTIFACT_CONVENTIONS)) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'compile', { fieldPath: `${fieldPath}.convention` })); + } + if (artifact.permissionScope !== undefined && !isOneOf(artifact.permissionScope, P2P_PERMISSION_SCOPES)) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'compile', { fieldPath: `${fieldPath}.permissionScope` })); + } + if (artifact.symlinkPolicy !== undefined && artifact.symlinkPolicy !== 'reject_all' && artifact.symlinkPolicy !== 'allow_existing_under_root') { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'compile', { fieldPath: `${fieldPath}.symlinkPolicy` })); + } + if (!Array.isArray(artifact.paths) || artifact.paths.length === 0 || artifact.paths.length > P2P_WORKFLOW_ARTIFACT_MAX_FILES) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'compile', { fieldPath: `${fieldPath}.paths` })); + } else { + for (const [index, path] of artifact.paths.entries()) { + if (typeof path !== 'string' || !isP2pArtifactRelativePath(path) || getP2pArtifactPathDepth(path) > P2P_WORKFLOW_ARTIFACT_MAX_DEPTH) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'compile', { fieldPath: `${fieldPath}.paths[${index}]` })); + } + } + } + return diagnostics; +} + +export function isSafeRelativeArtifactPath(path: string): boolean { + if (!path || path.startsWith('/') || path.startsWith('~') || path.includes('\0') || path.includes('\\')) return false; + if (/^[a-zA-Z]:/.test(path) || path.startsWith('//')) return false; + const segments = path.split('/'); + return segments.every((segment) => segment !== '' && segment !== '.' && segment !== '..'); +} + +export function coerceNodeKind(value: unknown): P2pNodeKind | null { + return isOneOf(value, P2P_NODE_KINDS) ? value : null; +} + +export function coercePreset(value: unknown): P2pPresetKey | null { + return isOneOf(value, P2P_PRESET_KEYS) ? value : null; +} + +export function coercePermissionScope(value: unknown): P2pPermissionScope | null { + return isOneOf(value, P2P_PERMISSION_SCOPES) ? value : null; +} + +export function coerceEdgeConditionKind(value: unknown): P2pEdgeConditionKind | null { + return isOneOf(value, P2P_EDGE_CONDITION_KINDS) ? value : null; +} + +export function validateP2pWorkflowStartContext(input: unknown, fieldPath = 'startContext'): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (!isRecord(input) || !Array.isArray(input.sources)) { + return [makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath })]; + } + const context = input as Partial; + if (context.maxTotalBytes !== undefined && (!Number.isInteger(context.maxTotalBytes) || context.maxTotalBytes < 0 || context.maxTotalBytes > START_CONTEXT_TOTAL_MAX_BYTES)) { + diagnostics.push(makeP2pWorkflowDiagnostic('context_source_too_large', 'bind', { fieldPath: `${fieldPath}.maxTotalBytes` })); + } + const seen = new Set(); + for (const [index, rawSource] of input.sources.entries()) { + const sourcePath = `${fieldPath}.sources[${index}]`; + if (!isRecord(rawSource)) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath: sourcePath })); + continue; + } + if (!isOneOf(rawSource.kind, P2P_START_CONTEXT_SOURCE_KINDS)) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath: `${sourcePath}.kind` })); + } + if (typeof rawSource.id !== 'string' || rawSource.id.trim() === '' || byteLength(rawSource.id) > SHORT_TEXT_MAX_BYTES || seen.has(rawSource.id)) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath: `${sourcePath}.id` })); + } else { + seen.add(rawSource.id); + } + const maxBytes = rawSource.maxBytes; + if (maxBytes !== undefined && (typeof maxBytes !== 'number' || !Number.isInteger(maxBytes) || maxBytes < 0 || maxBytes > START_CONTEXT_SOURCE_MAX_BYTES)) { + diagnostics.push(makeP2pWorkflowDiagnostic('context_source_too_large', 'bind', { fieldPath: `${sourcePath}.maxBytes` })); + } + if (rawSource.missingBehavior !== undefined && rawSource.missingBehavior !== 'fail' && rawSource.missingBehavior !== 'skip') { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath: `${sourcePath}.missingBehavior` })); + } + if (rawSource.binaryBehavior !== undefined && rawSource.binaryBehavior !== 'fail' && rawSource.binaryBehavior !== 'skip') { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath: `${sourcePath}.binaryBehavior` })); + } + if (rawSource.order !== undefined && !Number.isInteger(rawSource.order)) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath: `${sourcePath}.order` })); + } + if (rawSource.kind === 'file_reference') { + if (typeof rawSource.path !== 'string' || !isP2pArtifactRelativePath(rawSource.path)) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { fieldPath: `${sourcePath}.path` })); + } + } else if (rawSource.path !== undefined && (typeof rawSource.path !== 'string' || !isP2pArtifactRelativePath(rawSource.path))) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { fieldPath: `${sourcePath}.path` })); + } + if (rawSource.discussionOffset !== undefined) { + diagnostics.push(...validateDiscussionOffset(rawSource.discussionOffset, `${sourcePath}.discussionOffset`)); + } + } + return diagnostics; +} + +export function validateP2pRequiredDaemonCapabilities(input: unknown, fieldPath = 'requiredDaemonCapabilities'): P2pWorkflowDiagnostic[] { + if (!Array.isArray(input)) return [makeP2pWorkflowDiagnostic('missing_required_capability', 'web_validate', { fieldPath })]; + const diagnostics: P2pWorkflowDiagnostic[] = []; + const seen = new Set(); + for (const [index, capability] of input.entries()) { + if (typeof capability !== 'string' || !(P2P_WORKFLOW_CAPABILITIES as readonly string[]).includes(capability) || seen.has(capability)) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_required_capability', 'web_validate', { fieldPath: `${fieldPath}[${index}]` })); + } else { + seen.add(capability); + } + } + return diagnostics; +} + +export function validateP2pWorkflowLaunchContext(input: unknown, fieldPath = 'launchContext'): P2pWorkflowDiagnostic[] { + if (!isRecord(input)) return [makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath })]; + const diagnostics: P2pWorkflowDiagnostic[] = []; + const stringFields = ['runId', 'sessionName', 'projectRoot', 'userText', 'locale'] as const; + if (input.requestId !== undefined && (typeof input.requestId !== 'string' || !P2P_REQUEST_ID_ASCII_PATTERN.test(input.requestId))) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: `${fieldPath}.requestId` })); + } + for (const key of stringFields) { + if (input[key] !== undefined && (typeof input[key] !== 'string' || byteLength(input[key]) > SHORT_TEXT_MAX_BYTES)) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: `${fieldPath}.${key}` })); + } + } + return diagnostics; +} + +export function validateP2pWorkflowStatusProjection(input: unknown): P2pValidationResult { + const diagnostics = validateProjectionLike(input, 'projection'); + return diagnostics.some((diagnostic) => diagnostic.severity === 'error') + ? { ok: false, diagnostics } + : { ok: true, value: input as P2pWorkflowStatusProjection, diagnostics }; +} + +export function validateP2pPersistedWorkflowSnapshot(input: unknown): P2pValidationResult { + const diagnostics = validateProjectionLike(input, 'snapshot', true); + return diagnostics.some((diagnostic) => diagnostic.severity === 'error') + ? { ok: false, diagnostics } + : { ok: true, value: input as P2pPersistedWorkflowSnapshot, diagnostics }; +} + +function validateNodeCombination(node: Partial, fieldPath: string): P2pWorkflowDiagnostic[] { + if (!isOneOf(node.nodeKind, P2P_NODE_KINDS) || !isOneOf(node.preset, P2P_PRESET_KEYS)) return []; + const scope = node.permissionScope ?? 'analysis_only'; + if (!isOneOf(scope, P2P_PERMISSION_SCOPES)) return []; + const artifacts = Array.isArray(node.artifacts) ? node.artifacts : []; + const invalid = () => [makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'compile', { + fieldPath, + summary: `Invalid nodeKind/preset/permissionScope combination: ${node.nodeKind}/${node.preset}/${scope}.`, + })]; + + if (node.nodeKind === 'logic') { + return node.preset === 'custom' && scope === 'analysis_only' ? [] : invalid(); + } + if (node.nodeKind === 'script') { + return node.preset === 'custom' ? [] : invalid(); + } + if (node.nodeKind !== 'llm') return []; + + if (node.preset === 'audit' || node.preset === 'proposal_audit' || node.preset === 'implementation_audit') { + return scope === 'analysis_only' ? [] : invalid(); + } + if (node.preset === 'openspec_propose') { + return scope === 'artifact_generation' && artifacts.some((artifact) => isRecord(artifact) && artifact.convention === 'openspec_convention') ? [] : invalid(); + } + if (node.preset === 'implementation') { + return scope === 'implementation' ? [] : invalid(); + } + if (scope === 'analysis_only') return []; + if (scope === 'artifact_generation') return artifacts.length > 0 ? [] : invalid(); + return invalid(); +} + +function validateDiscussionOffset(input: unknown, fieldPath: string): P2pWorkflowDiagnostic[] { + if (!isRecord(input)) return [makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath })]; + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (!Number.isInteger(input.byteOffset) || (input.byteOffset as number) < 0) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath: `${fieldPath}.byteOffset` })); + } + if (typeof input.sha256Prefix !== 'string' || !/^[a-f0-9]{8,64}$/i.test(input.sha256Prefix)) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath: `${fieldPath}.sha256Prefix` })); + } + if (!Number.isInteger(input.sizeAtOffset) || (input.sizeAtOffset as number) < 0) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_context_source', 'bind', { fieldPath: `${fieldPath}.sizeAtOffset` })); + } + return diagnostics; +} + +function validateProjectionLike(input: unknown, fieldPath: string, persisted = false): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + if (!isRecord(input)) return [makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath })]; + if (input.projectionVersion !== P2P_WORKFLOW_PROJECTION_VERSION) { + diagnostics.push(makeP2pWorkflowDiagnostic('unsupported_schema_version', 'web_validate', { fieldPath: `${fieldPath}.projectionVersion` })); + } + for (const key of ['runId', 'workflowId', 'updatedAt'] as const) { + if (typeof input[key] !== 'string' || input[key] === '') { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: `${fieldPath}.${key}` })); + } + } + if (!['queued', 'running', 'blocked', 'completed', 'failed', 'cancelled', 'stale'].includes(String(input.status))) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: `${fieldPath}.status` })); + } + if (input.currentNodeId !== undefined && typeof input.currentNodeId !== 'string') { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: `${fieldPath}.currentNodeId` })); + } + if (!Array.isArray(input.completedNodeIds) || input.completedNodeIds.some((id) => typeof id !== 'string' || id === '')) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: `${fieldPath}.completedNodeIds` })); + } + if (!Array.isArray(input.diagnostics) || input.diagnostics.some((diagnostic) => !isWorkflowDiagnosticLike(diagnostic))) { + diagnostics.push(makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: `${fieldPath}.diagnostics` })); + } + if (persisted && (input.capabilitySnapshot !== undefined || input.artifactSummaries !== undefined || input.nodeSummaries !== undefined)) { + diagnostics.push(makeP2pWorkflowDiagnostic('forbidden_envelope_field', 'parse', { fieldPath })); + } + return diagnostics; +} + +function isWorkflowDiagnosticLike(input: unknown): boolean { + return isRecord(input) && + typeof input.code === 'string' && + (DIAGNOSTIC_CODES.size === 0 || DIAGNOSTIC_CODES.has(input.code)) && + typeof input.phase === 'string' && + (input.severity === 'info' || input.severity === 'warning' || input.severity === 'error') && + typeof input.messageKey === 'string'; +} + +/** True when a workflow draft, persisted snapshot, or status projection + * declares a `schemaVersion` greater than `P2P_WORKFLOW_KNOWN_SCHEMA_MAX`, + * or a `projectionVersion` greater than `P2P_WORKFLOW_PROJECTION_VERSION`. + * + * The web v1a UI uses this gate to switch the panel to read-only mode and + * block launches: a future-version draft must never be best-effort edited + * or compiled by an older client. Returns false for inputs that lack any + * recognised version field — those are handled by the regular validators + * with `invalid_workflow_graph` / `invalid_launch_envelope` diagnostics. */ +export function isFutureWorkflowSchema(input: unknown): boolean { + if (!isRecord(input)) return false; + const schemaVersion = (input as { schemaVersion?: unknown }).schemaVersion; + if (typeof schemaVersion === 'number' && Number.isFinite(schemaVersion) && schemaVersion > P2P_WORKFLOW_KNOWN_SCHEMA_MAX) { + return true; + } + const workflowSchemaVersion = (input as { workflowSchemaVersion?: unknown }).workflowSchemaVersion; + if (typeof workflowSchemaVersion === 'number' && Number.isFinite(workflowSchemaVersion) && workflowSchemaVersion > P2P_WORKFLOW_KNOWN_SCHEMA_MAX) { + return true; + } + const projectionVersion = (input as { projectionVersion?: unknown }).projectionVersion; + if (typeof projectionVersion === 'number' && Number.isFinite(projectionVersion) && projectionVersion > P2P_WORKFLOW_PROJECTION_VERSION) { + return true; + } + // Nested envelope: launch envelopes carry an `advancedDraft` whose own + // `schemaVersion` may be in the future. Check it but don't recurse further. + const advancedDraft = (input as { advancedDraft?: unknown }).advancedDraft; + if (isRecord(advancedDraft)) { + const draftVersion = (advancedDraft as { schemaVersion?: unknown }).schemaVersion; + if (typeof draftVersion === 'number' && Number.isFinite(draftVersion) && draftVersion > P2P_WORKFLOW_KNOWN_SCHEMA_MAX) { + return true; + } + } + return false; +} diff --git a/shared/test-session-guard.ts b/shared/test-session-guard.ts index b42728146..fd0d4b416 100644 --- a/shared/test-session-guard.ts +++ b/shared/test-session-guard.ts @@ -17,6 +17,8 @@ const SESSION_NAME_PATTERNS: RegExp[] = [ /^deck_storecheck[a-z0-9-]+_(brain|w\d+)$/i, /^deck_shutdown[a-z0-9-]+_(brain|w\d+|probe)$/i, /^deck_test_preview_[a-z0-9-]+_(brain|w\d+|probe)$/i, + /^deck_test_p2p_workflow_[a-z0-9-]+_(brain|w\d+|probe)$/i, + /^imcodes-test-p2p-workflow[-_][a-z0-9-]+$/i, /^deck_sub_(?:cxsdk_e2e|cxsdk_effort|ccsdk_minimax_sub)$/i, ]; @@ -29,6 +31,8 @@ const PROJECT_NAME_PATTERNS: RegExp[] = [ /^storecheck[a-z0-9-]+$/i, /^shutdown[a-z0-9-]+$/i, /^imcodes-test-preview[-_]/i, + /^imcodes-test-p2p-workflow[-_]/i, + /^p2pworkflow[a-z0-9-]+$/i, /^e2e[-_]/i, ]; @@ -37,6 +41,8 @@ const PROJECT_DIR_PATTERNS: RegExp[] = [ /[/\\]tmp[/\\].*modeaware/i, /[/\\]tmp[/\\].*bootmain/i, /[/\\]tmp[/\\].*imcodes-test-preview/i, + /[/\\]tmp[/\\].*imcodes-test-p2p-workflow/i, + /[/\\]tmp[/\\].*imc_p2p_wf_test_/i, ]; function normalize(value: string | null | undefined): string | undefined { diff --git a/src/daemon/command-handler.ts b/src/daemon/command-handler.ts index fef4bbaa4..484ddf2d9 100644 --- a/src/daemon/command-handler.ts +++ b/src/daemon/command-handler.ts @@ -37,7 +37,7 @@ import { startP2pRun, cancelP2pRun, getP2pRun, listP2pRuns, serializeP2pRun, typ import { buildSessionList } from './session-list.js'; import { supervisionAutomation } from './supervision-automation.js'; import { getComboRoundCount, parseModePipeline, P2P_CONFIG_MODE, isP2pSavedConfig, type P2pSessionConfig } from '../../shared/p2p-modes.js'; -import type { P2pAdvancedRound, P2pContextReducerConfig } from '../../shared/p2p-advanced.js'; +import type { P2pAdvancedRound, P2pContextReducerConfig, P2pRoundPreset } from '../../shared/p2p-advanced.js'; import { CRON_MSG } from '../../shared/cron-types.js'; import { executeCronJob } from './cron-executor.js'; import { TRANSPORT_MSG } from '../../shared/transport-events.js'; @@ -73,6 +73,29 @@ import { MEMORY_WS } from '../../shared/memory-ws.js'; import { FS_WRITE_ERROR } from '../shared/transport/fs.js'; import { P2P_CONFIG_ERROR, P2P_CONFIG_MSG, MAX_P2P_PARTICIPANTS } from '../../shared/p2p-config-events.js'; import { p2pScopedSessionKey } from '../../shared/p2p-config-scope.js'; +import { P2P_WORKFLOW_SCHEMA_VERSION } from '../../shared/p2p-workflow-constants.js'; +import { makeP2pWorkflowDiagnostic, type P2pWorkflowDiagnostic } from '../../shared/p2p-workflow-diagnostics.js'; +import { compileP2pWorkflowDraft } from '../../shared/p2p-workflow-compiler.js'; +import { materializeOldAdvancedConfigToWorkflowDraft } from '../../shared/p2p-workflow-materialize.js'; +import { P2P_WORKFLOW_MSG } from '../../shared/p2p-workflow-messages.js'; +import { buildDefaultP2pStaticPolicy } from '../../shared/p2p-workflow-policy.js'; +import { + validateP2pWorkflowDraft, + validateP2pWorkflowLaunchEnvelope, +} from '../../shared/p2p-workflow-validators.js'; +import type { + P2pBindRuntimeContext, + P2pBoundWorkflow, + P2pCompiledEdge, + P2pCompiledNode, + P2pCompiledWorkflow, + P2pStaticPolicy, + P2pWorkflowDraft, + P2pWorkflowLaunchEnvelope, + P2pWorkflowNodeDraft, +} from '../../shared/p2p-workflow-types.js'; +import { bindP2pCompiledWorkflow } from './p2p-workflow-bind.js'; +import { readP2pDiscussionWithOffset } from './p2p-workflow-discussion-offsets.js'; import { DAEMON_COMMAND_TYPES } from '../../shared/daemon-command-types.js'; import { CLAUDE_SDK_EFFORT_LEVELS, @@ -1360,10 +1383,10 @@ function dispatchWebCommand(cmd: Record, serverLink: ServerLink case 'discussion.status': handleDiscussionStatus(cmd, serverLink); break; - case 'p2p.list_discussions': + case P2P_WORKFLOW_MSG.LIST_DISCUSSIONS: void handleP2pListDiscussions(cmd, serverLink); break; - case 'p2p.read_discussion': + case P2P_WORKFLOW_MSG.READ_DISCUSSION: void handleP2pReadDiscussion(cmd, serverLink); break; case 'discussion.stop': @@ -1431,10 +1454,10 @@ function dispatchWebCommand(cmd: Record, serverLink: ServerLink case 'fs.write': void handleFsWrite(cmd, serverLink); break; - case 'p2p.cancel': + case P2P_WORKFLOW_MSG.CANCEL: void handleP2pCancel(cmd, serverLink); break; - case 'p2p.status': + case P2P_WORKFLOW_MSG.STATUS: void handleP2pStatus(cmd, serverLink); break; case CC_PRESET_MSG.LIST: @@ -2008,6 +2031,388 @@ function resolveSingleTargetMode( return configuredMode && configuredMode !== 'skip' ? configuredMode : 'discuss'; } +type PreparedAdvancedWorkflowLaunch = + | { + ok: true; + advancedRounds: P2pAdvancedRound[]; + advancedRunTimeoutMs?: number; + contextReducer?: P2pContextReducerConfig; + diagnostics: P2pWorkflowDiagnostic[]; + /** + * Audit:V-1 / N-H1 — when present, the bound workflow flowed through + * compile + bind. Caller MUST pass `advanced: { kind: 'envelope_compiled', bound, ... }` + * to `startP2pRun` so the orchestrator surfaces capabilitySnapshot/policy + * on the run state. Absent on legacy passthrough (no envelope). + */ + bound?: P2pBoundWorkflow; + } + | { ok: false; diagnostics: P2pWorkflowDiagnostic[] }; + +function hasOldAdvancedLaunchFields(cmd: Record): boolean { + return cmd.p2pAdvancedPresetKey != null + || cmd.p2pAdvancedRounds != null + || cmd.p2pAdvancedRunTimeoutMinutes != null + || cmd.p2pContextReducer != null; +} + +function roundPresetFromWorkflowPreset(node: Pick): P2pRoundPreset { + if ( + node.preset === 'openspec_propose' + || node.preset === 'proposal_audit' + || node.preset === 'implementation' + || node.preset === 'implementation_audit' + || node.preset === 'custom' + ) { + return node.preset; + } + return 'discussion'; +} + +/** + * R3 PR-α (A2 / Cu1-N3) — order compiled nodes for legacy executor traversal. + * + * The previous implementation sorted by `node.id.localeCompare`, which made + * round execution order depend on lexical id spelling rather than the + * compiled `rootNodeId` + edges topology. That violated spec + * "Workflow rootNodeId SHALL define execution start" and produced + * non-deterministic order across renames. We now traverse from + * `workflow.rootNodeId` along DEFAULT edges, then append any unreachable + * nodes in declaration order so the legacy projection still surfaces them. + */ +export function orderCompiledNodesForExecution(workflow: P2pCompiledWorkflow): P2pCompiledNode[] { + const nodesById = new Map(workflow.nodes.map((node) => [node.id, node])); + const visited = new Set(); + const ordered: P2pCompiledNode[] = []; + const visit = (nodeId: string): void => { + if (visited.has(nodeId)) return; + const node = nodesById.get(nodeId); + if (!node) return; + visited.add(nodeId); + ordered.push(node); + const outgoing = workflow.edges + .filter((edge) => edge.fromNodeId === nodeId && edge.edgeKind === 'default') + .map((edge) => edge.toNodeId); + for (const next of outgoing) visit(next); + }; + if (workflow.rootNodeId) visit(workflow.rootNodeId); + // Defensive: append any unreachable nodes in declaration order so the + // legacy projection still surfaces them. Compiler MUST reject unreachable + // graphs; this is just a safety net for adapter consumers. + for (const node of workflow.nodes) { + if (!visited.has(node.id)) ordered.push(node); + } + return ordered; +} + +/** + * R3 PR-α (Cu1-N3) — Map a single compiled outgoing conditional edge to the + * legacy `jumpRule` shape. Returns undefined when the node has no conditional + * outgoing edge or when no loop budget is registered. Marker preserves the + * raw `condition.equals` string instead of compressing every non-`PASS` + * marker to `'REWORK'`; non-`PASS|REWORK` markers fall back to `'REWORK'` + * because the legacy `P2pVerdictMarker` union accepts only those two values. + * The new envelope_compiled executor (PR-β) bypasses this adapter entirely + * and reads `condition` directly, so the legacy compression is bounded to + * oldAdvanced surfaces. + */ +export function mapConditionalEdgeToJumpRule( + conditionalEdge: P2pCompiledEdge | undefined, + loopBudgets: Record, +): { jumpRule: P2pAdvancedRound['jumpRule']; verdictPolicy: P2pAdvancedRound['verdictPolicy'] } { + if (!conditionalEdge) return { jumpRule: undefined, verdictPolicy: 'none' }; + const loopBudget = loopBudgets[conditionalEdge.id]; + const rawMarker = conditionalEdge.condition?.equals; + const marker: 'PASS' | 'REWORK' = rawMarker === 'PASS' ? 'PASS' : 'REWORK'; + if (loopBudget === undefined) { + // No registered loop budget → emit `forced_rework` policy without a + // jumpRule so the legacy projection records routing intent without + // letting orchestrator loop indefinitely. + return { jumpRule: undefined, verdictPolicy: 'forced_rework' }; + } + return { + jumpRule: { + targetRoundId: conditionalEdge.toNodeId, + marker, + minTriggers: 0, + maxTriggers: loopBudget, + }, + verdictPolicy: 'forced_rework', + }; +} + +/** + * R3 PR-α (A1 / W3 / Cu1-N3) — Map a compiled node to a legacy + * `P2pAdvancedRound`, preserving `nodeKind`, `script`, `routingAuthority`, + * and `artifactConvention` so the orchestrator can dispatch / recheck without + * a sidecar `bound.compiled.nodes.find(...)` lookup. + */ +export function mapCompiledNodeToLegacyRound( + node: P2pCompiledNode, + workflow: P2pCompiledWorkflow, +): P2pAdvancedRound { + const conditionalEdge = workflow.edges.find((edge) => edge.fromNodeId === node.id && edge.edgeKind === 'conditional'); + const { jumpRule, verdictPolicy } = mapConditionalEdgeToJumpRule(conditionalEdge, workflow.loopBudgets); + // R3 PR-α (W3) — preserve the FIRST artifact contract's convention so the + // orchestrator can decide between `openspec_convention` (per-file sha256 + // baseline + frozen identity) and `explicit_paths` (legacy sha256 listing). + // Multi-contract nodes are not allowed in v1a; compiler enforces. + const artifactConvention: 'none' | 'explicit' | 'openspec_convention' | undefined = + node.artifacts.length > 0 + ? (node.artifacts[0].convention as 'explicit' | 'openspec_convention') + : undefined; + return { + id: node.id, + title: node.title ?? node.id, + preset: roundPresetFromWorkflowPreset(node), + executionMode: node.dispatchStyle === 'multi_dispatch' ? 'multi_dispatch' : 'single_main', + permissionScope: node.permissionScope, + ...(node.promptAppend ? { promptAppend: node.promptAppend } : {}), + ...(node.artifacts.length > 0 ? { artifactOutputs: node.artifacts.flatMap((artifact) => artifact.paths).sort() } : {}), + verdictPolicy, + ...(jumpRule ? { jumpRule } : {}), + // R3 PR-α (A1 / W3) — compiled-node carriers preserved on the legacy + // round model so downstream consumers can read authoritative semantics. + nodeKind: node.nodeKind, + ...(node.script ? { script: node.script } : {}), + ...(node.routingAuthority ? { routingAuthority: node.routingAuthority } : {}), + ...(artifactConvention ? { artifactConvention } : {}), + } satisfies P2pAdvancedRound; +} + +function compiledWorkflowToLegacyAdvancedRounds(workflow: P2pCompiledWorkflow): P2pAdvancedRound[] { + // R3 PR-α — replaced lexical sort with topological traversal so the + // execution order honours `rootNodeId` + DEFAULT edges (A2). Field + // preservation lives in `mapCompiledNodeToLegacyRound` (A1 / W3); jump rule + // mapping lives in `mapConditionalEdgeToJumpRule` (Cu1-N3 split). Each + // helper is independently unit-tested. + return orderCompiledNodesForExecution(workflow).map((node) => mapCompiledNodeToLegacyRound(node, workflow)); +} + +function buildAdvancedLaunchEnvelopeFromCommand( + cmd: Record, + launchContext: P2pWorkflowLaunchEnvelope['launchContext'], +): P2pWorkflowLaunchEnvelope | null { + const explicitEnvelope = cmd.p2pWorkflowLaunchEnvelope ?? cmd.workflowLaunchEnvelope; + if (isPlainRecord(explicitEnvelope)) { + return explicitEnvelope as unknown as P2pWorkflowLaunchEnvelope; + } + if (!hasOldAdvancedLaunchFields(cmd)) return null; + return { + workflowSchemaVersion: P2P_WORKFLOW_SCHEMA_VERSION, + workflowKind: 'advanced', + oldAdvanced: { + ...(typeof cmd.p2pAdvancedPresetKey === 'string' ? { advancedPresetKey: cmd.p2pAdvancedPresetKey } : {}), + ...(Array.isArray(cmd.p2pAdvancedRounds) ? { advancedRounds: cmd.p2pAdvancedRounds as Array> } : {}), + ...(typeof cmd.p2pAdvancedRunTimeoutMinutes === 'number' ? { advancedRunTimeoutMinutes: cmd.p2pAdvancedRunTimeoutMinutes } : {}), + ...(isPlainRecord(cmd.p2pContextReducer) ? { contextReducer: cmd.p2pContextReducer } : {}), + }, + migrationPolicy: { kind: 'materialize_old_advanced' }, + launchContext, + }; +} + +// `getCurrentDaemonWorkflowCapabilities` is the single entry point for +// "what capabilities does this daemon currently advertise?". v1a fix +// (audit:N-H2): the fallback when `serverLink.getP2pWorkflowCapabilities` is +// missing now returns `[]` (fail-closed) — previously it returned all three +// dangerous caps as a hardcoded permissive default, which was a fail-OPEN +// authorisation bug. The function itself lives in the daemon static-policy +// module so compile/bind/recheck all share one source. +import { + loadDaemonP2pStaticPolicy, + readCachedHelloSnapshot, +} from './p2p-workflow-static-policy.js'; + +function makeBindRuntimeContext( + options: { + runId: string; + requestId?: string; + repoRoot: string; + serverLink: ServerLink; + policySnapshot: P2pStaticPolicy; + initiatorSession: string; + targets: P2pTarget[]; + accepted: boolean; + }, +): P2pBindRuntimeContext { + const helloSnapshot = readCachedHelloSnapshot(options.serverLink); + return { + runId: options.runId, + requestId: options.requestId, + repoRoot: options.repoRoot, + participants: [ + { sessionName: options.initiatorSession }, + ...options.targets.map((target) => ({ sessionName: target.session, roleLabel: target.mode })), + ], + launchScope: { + serverId: typeof options.serverLink.getServerId === 'function' ? options.serverLink.getServerId() : undefined, + sessionName: options.initiatorSession, + }, + // Real hello snapshot, not synthesised placeholder (audit:N2). When the + // daemon hasn't sent a hello yet (`helloEpoch === 0` AND `sentAt === 0`), + // we still record the actual values so projection consumers can detect + // "pre-hello bind" instead of being fed a fake `Date.now()` timestamp. + capabilitySnapshot: helloSnapshot, + // Audit:R3 PR-α — full P2pStaticPolicy snapshot replaces the previous + // ad-hoc { allowScript / allowImplementation / ... } subset. The clone + // ensures runtime mutations to the loaded policy never bleed into a run + // that was already bound under a different policy version. + policySnapshot: structuredClone(options.policySnapshot), + concurrencyAdmission: options.accepted ? { accepted: true } : { accepted: false, reason: 'daemon_busy' }, + }; +} + +// Audit:R3 hardening / task 10.2 — exported so the cron dispatcher (and any +// future automation entry point) can drive the same envelope→compile→bind +// pipeline as `handleSend`. Keeping a single launch authority is the only way +// to ensure cron and manual launches share `daemon_busy` admission, capability +// gating, and `static_policy_mismatch_recompiled` emission. +export async function prepareAdvancedWorkflowLaunch(options: { + cmd: Record; + sessionName: string; + targets: P2pTarget[]; + userText: string; + locale?: string; + projectDir: string; + commandId: string; + serverLink: ServerLink; +}): Promise { + const envelope = buildAdvancedLaunchEnvelopeFromCommand(options.cmd, { + requestId: options.commandId, + sessionName: options.sessionName, + projectRoot: options.projectDir, + userText: options.userText, + locale: options.locale, + }); + if (!envelope) return { ok: true, advancedRounds: [], diagnostics: [] }; + if ((options.cmd.p2pWorkflowLaunchEnvelope || options.cmd.workflowLaunchEnvelope) && hasOldAdvancedLaunchFields(options.cmd)) { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('mixed_advanced_schema_fields', 'parse')] }; + } + const envelopeValidation = validateP2pWorkflowLaunchEnvelope(envelope); + if (!envelopeValidation.ok) return { ok: false, diagnostics: envelopeValidation.diagnostics }; + + let draft: P2pWorkflowDraft | undefined = envelope.advancedDraft; + let contextReducer: P2pContextReducerConfig | undefined; + if (!draft && envelope.oldAdvanced) { + if (envelope.migrationPolicy?.kind !== 'materialize_old_advanced') { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: 'migrationPolicy' })] }; + } + try { + draft = materializeOldAdvancedConfigToWorkflowDraft({ + advancedPresetKey: envelope.oldAdvanced.advancedPresetKey, + advancedRounds: envelope.oldAdvanced.advancedRounds as P2pAdvancedRound[] | undefined, + advancedRunTimeoutMinutes: envelope.oldAdvanced.advancedRunTimeoutMinutes, + }); + contextReducer = envelope.oldAdvanced.contextReducer as P2pContextReducerConfig | undefined; + } catch (err) { + return { + ok: false, + diagnostics: [makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { + summary: err instanceof Error ? err.message : String(err), + })], + }; + } + } + if (!draft) { + return { ok: false, diagnostics: [makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { fieldPath: 'advancedDraft' })] }; + } + const draftValidation = validateP2pWorkflowDraft(draft); + if (!draftValidation.ok) return { ok: false, diagnostics: draftValidation.diagnostics }; + + // Audit:N4 — staticPolicy must derive from the daemon's actual capability + // advertisement, not from hardcoded permissive overrides. `loadDaemonP2pStaticPolicy` + // is the single source of truth: allow-flags reflect daemon hello capabilities, + // and `concurrency.maxAdvancedRuns` / `concurrency.maxScripts` come from the + // policy default (P2P_WORKFLOW_MAX_ACTIVE_RUNS / P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS). + const baseStaticPolicy = loadDaemonP2pStaticPolicy(options.serverLink); + // R3 PR-α follow-up — UI-driven allowlist. When the envelope carries an + // `allowedExecutables` list (configured in `P2pConfigPanel`), rebuild + // the policy with that list and recompute the hash so bind validation + // sees the user-supplied executables. Daemon-side default is `[]`; the + // envelope is the SOLE source for non-empty allowlists in this product. + // Removes the previous `~/.imcodes/p2p-policy.json` JSON-file workflow + // (off-product for a UI-driven IM client). + const envelopeAllowedExecutables = Array.isArray(envelope.allowedExecutables) + ? [...new Set(envelope.allowedExecutables.filter((entry) => typeof entry === 'string'))].sort() + : []; + const staticPolicy = envelopeAllowedExecutables.length > 0 + ? buildDefaultP2pStaticPolicy({ ...baseStaticPolicy, allowedExecutables: envelopeAllowedExecutables }) + : baseStaticPolicy; + // Audit:R3 PR-γ / N-M5 / V-4 — when the envelope carries a saved + // `expectedStaticPolicyHash` (compiled against an earlier policy version) + // and the daemon's CURRENT policy hash differs, emit + // `static_policy_mismatch_recompiled` (warning severity) so callers know + // the preview's compilation result is no longer authoritative. The daemon + // proceeds with the current policy regardless; this diagnostic only + // documents that a recompile occurred. + const policyMismatchDiagnostics: P2pWorkflowDiagnostic[] = []; + if ( + typeof envelope.expectedStaticPolicyHash === 'string' + && envelope.expectedStaticPolicyHash.length > 0 + && envelope.expectedStaticPolicyHash !== staticPolicy.policyHash + ) { + policyMismatchDiagnostics.push(makeP2pWorkflowDiagnostic('static_policy_mismatch_recompiled', 'bind', { + fieldPath: 'expectedStaticPolicyHash', + summary: `Launch envelope referenced static policy ${envelope.expectedStaticPolicyHash} but daemon recompiled with current policy ${staticPolicy.policyHash ?? ''}.`, + })); + } + const compileResult = compileP2pWorkflowDraft(draft, staticPolicy); + if (!compileResult.ok) { + return { ok: false, diagnostics: [...policyMismatchDiagnostics, ...compileResult.diagnostics] }; + } + + // Audit:N-H3 — admission cap reads `staticPolicy.concurrency.maxAdvancedRuns` + // rather than the bare `P2P_WORKFLOW_MAX_ACTIVE_RUNS` constant, so future + // policy customisation (cron multi-run, supervision, env override) only has + // to update one place. + const activeAdvancedRuns = listP2pRuns().filter((run) => run.advancedP2pEnabled && !P2P_TERMINAL_RUN_STATUSES.has(run.status)); + const bindContext = makeBindRuntimeContext({ + runId: randomUUID(), + requestId: options.commandId, + repoRoot: options.projectDir, + serverLink: options.serverLink, + policySnapshot: staticPolicy, + initiatorSession: options.sessionName, + targets: options.targets, + accepted: activeAdvancedRuns.length < staticPolicy.concurrency.maxAdvancedRuns, + }); + // Audit:N5 / Q5 (binder API single shape). `bindP2pCompiledWorkflow` always + // returns the `P2pBindResult` discriminated union — there is no legacy "no + // ok field" branch. Use the discriminant directly; the dead `else` branch + // that previously inspected `diagnostics.some(severity==='error')` has been + // removed. The reverse-regression suite blocks its reintroduction. + const bindResult = bindP2pCompiledWorkflow(compileResult.workflow, bindContext); + const bindDiagnostics = bindResult.diagnostics; + if (!bindResult.ok) { + // R3 PR-δ (A5 / Cu1-M1) — bind-fail must include any + // `policyMismatchDiagnostics` so callers learn that the daemon + // recompiled with the current policy before bind rejected it. Earlier + // versions returned only `bindDiagnostics`, hiding the + // `static_policy_mismatch_recompiled` warning from observers. + return { ok: false, diagnostics: [...policyMismatchDiagnostics, ...bindDiagnostics] }; + } + + return { + ok: true, + advancedRounds: compiledWorkflowToLegacyAdvancedRounds(compileResult.workflow), + advancedRunTimeoutMs: envelope.oldAdvanced?.advancedRunTimeoutMinutes != null + ? envelope.oldAdvanced.advancedRunTimeoutMinutes * 60_000 + : undefined, + contextReducer, + bound: bindResult.bound, + diagnostics: [ + ...envelopeValidation.diagnostics, + ...policyMismatchDiagnostics, + ...compileResult.diagnostics, + ...bindDiagnostics, + ], + }; +} + +function summarizeP2pWorkflowDiagnostics(diagnostics: P2pWorkflowDiagnostic[]): string { + return diagnostics.map((diagnostic) => diagnostic.code).join(', ') || 'invalid_launch_envelope'; +} + async function handleSend(cmd: Record, serverLink: ServerLink): Promise { const sessionName = (cmd.sessionName ?? cmd.session) as string | undefined; const text = cmd.text as string | undefined; @@ -2324,23 +2729,8 @@ async function handleSend(cmd: Record, serverLink: ServerLink): cancelP2pRun(existingRun.id, serverLink); } - const fileContents: Array<{ path: string; content: string }> = []; const record = getSession(sessionName); const projectDir = record?.projectDir ?? ''; - for (const fp of tokens.files.slice(0, MAX_P2P_FILE_PULL_COUNT)) { - try { - const absPath = nodePath.isAbsolute(fp) ? fp : nodePath.join(projectDir, fp); - // Check for binary content (null bytes anywhere in the capped content) - const content = await fsReadFileRaw(absPath, 'utf8'); - const capped = content.slice(0, 50_000); - if (capped.includes('\0')) { - // Binary file (image, etc.) — include path reference so agents can read it - fileContents.push({ path: absPath, content: '' }); - continue; - } - fileContents.push({ path: fp, content: capped }); // cap at 50KB - } catch { /* ignore unreadable files */ } - } // Auto-append language instruction based on the user's selected i18n locale if (p2pLocale && !p2pExtraPrompt?.match(/语言|language|lang|中文|日本語|한국어|español|русский/i)) { const LOCALE_NAMES: Record = { @@ -2352,6 +2742,62 @@ async function handleSend(cmd: Record, serverLink: ServerLink): const langInstr = `Use the user's selected i18n language (${langName}) for the discussion.`; p2pExtraPrompt = p2pExtraPrompt ? `${p2pExtraPrompt}\n${langInstr}` : langInstr; } + const advancedLaunchRequested = hasOldAdvancedLaunchFields(cmd) + || isPlainRecord((cmd as Record).p2pWorkflowLaunchEnvelope) + || isPlainRecord((cmd as Record).workflowLaunchEnvelope); + if (advancedLaunchRequested && tokens.files.length > 0) { + const diagnostic = makeP2pWorkflowDiagnostic('invalid_launch_envelope', 'parse', { + fieldPath: 'tokens.files', + summary: 'Advanced workflow launch requires explicit startContext file references.', + }); + const errMsg = summarizeP2pWorkflowDiagnostics([diagnostic]); + timelineEmitter.emit(sessionName, 'command.ack', { commandId: effectiveId, status: 'error', error: errMsg }); + emitCommandAckReliable(serverLink, { commandId: effectiveId, sessionName, status: 'error', error: errMsg }); + return; + } + const preparedAdvanced = await prepareAdvancedWorkflowLaunch({ + cmd, + sessionName, + targets: tokens.agents, + userText: tokens.cleanText, + locale: p2pLocale, + projectDir, + commandId: effectiveId, + serverLink, + }); + if (!preparedAdvanced.ok) { + const errMsg = summarizeP2pWorkflowDiagnostics(preparedAdvanced.diagnostics); + logger.warn({ sessionName, diagnostics: preparedAdvanced.diagnostics }, 'P2P advanced workflow launch rejected'); + timelineEmitter.emit(sessionName, 'command.ack', { commandId: effectiveId, status: 'error', error: errMsg }); + emitCommandAckReliable(serverLink, { commandId: effectiveId, sessionName, status: 'error', error: errMsg }); + return; + } + const fileContents: Array<{ path: string; content: string }> = []; + if (!advancedLaunchRequested) { + for (const fp of tokens.files.slice(0, MAX_P2P_FILE_PULL_COUNT)) { + try { + const absPath = nodePath.isAbsolute(fp) ? fp : nodePath.join(projectDir, fp); + // Check for binary content (null bytes anywhere in the capped content) + const content = await fsReadFileRaw(absPath, 'utf8'); + const capped = content.slice(0, 50_000); + if (capped.includes('\0')) { + // Binary file (image, etc.) — include path reference so agents can read it + fileContents.push({ path: absPath, content: '' }); + continue; + } + fileContents.push({ path: fp, content: capped }); // cap at 50KB + } catch { /* ignore unreadable files */ } + } + } + // Audit:V-1 / N-H1 — when the prepared advanced launch carries a `bound` + // workflow (envelope path), funnel it through the typed + // `advanced: { kind: 'envelope_compiled', bound, advancedRounds }` + // discriminated union so the orchestrator stores capabilitySnapshot & + // currentDaemonPolicy on the run state. Pure-legacy launches (no + // envelope, no compiled rounds) fall back to the deprecated top-level + // `advancedPresetKey`/`advancedRounds` passthrough until v1b. + const compiledFromEnvelope = preparedAdvanced.bound !== undefined + && preparedAdvanced.advancedRounds.length > 0; const run = await startP2pRun({ initiatorSession: sessionName, targets: tokens.agents, @@ -2363,10 +2809,27 @@ async function handleSend(cmd: Record, serverLink: ServerLink): extraPrompt: p2pExtraPrompt, modeOverride: resolvedMode || undefined, hopTimeoutMs: p2pHopTimeoutMs, - advancedPresetKey: p2pAdvancedPresetKey, - advancedRounds: p2pAdvancedRounds, - advancedRunTimeoutMs: p2pAdvancedRunTimeoutMinutes != null ? p2pAdvancedRunTimeoutMinutes * 60_000 : undefined, - contextReducer: p2pContextReducer, + ...(compiledFromEnvelope + ? { + advanced: { + kind: 'envelope_compiled' as const, + bound: preparedAdvanced.bound!, + advancedRounds: preparedAdvanced.advancedRounds, + ...(preparedAdvanced.advancedRunTimeoutMs !== undefined + ? { advancedRunTimeoutMs: preparedAdvanced.advancedRunTimeoutMs } + : {}), + ...(preparedAdvanced.contextReducer + ? { contextReducer: preparedAdvanced.contextReducer } + : {}), + }, + advancedPresetKey: 'openspec', + } + : { + advancedPresetKey: p2pAdvancedPresetKey, + advancedRounds: p2pAdvancedRounds, + advancedRunTimeoutMs: p2pAdvancedRunTimeoutMinutes != null ? p2pAdvancedRunTimeoutMinutes * 60_000 : undefined, + contextReducer: p2pContextReducer, + }), }); // NOTE: do NOT emit a `user.message` on the initiator timeline here. // A P2P send is a COMMAND to start a discussion, not a chat message to @@ -3842,73 +4305,193 @@ async function handleAskAnswer(cmd: Record): Promise { // ── P2P discussion file listing ──────────────────────────────────────────── -async function handleP2pListDiscussions(_cmd: Record, serverLink: ServerLink): Promise { - // Collect unique project dirs from all sessions - const projectDirs = new Set(); - for (const s of listSessions()) { - if (s.projectDir) projectDirs.add(s.projectDir); +function isPlainRecord(value: unknown): value is Record { + return !!value && typeof value === 'object' && !Array.isArray(value); +} + +function stringField(record: Record, key: string): string | undefined { + const value = record[key]; + return typeof value === 'string' && value.trim() ? value : undefined; +} + +async function canonicalProjectDir(projectDir: string): Promise { + try { + return await fsRealpath(projectDir); + } catch { + return nodePath.resolve(projectDir); } - const discussions: Array<{ id: string; fileName: string; path: string; preview: string; mtime: number }> = []; - for (const projectDir of projectDirs) { - const dir = imcSubDir(projectDir, 'discussions'); - try { - const entries = await fsReaddir(dir); - const files = entries.filter((entry) => { - if (!entry.endsWith('.md')) return false; - // Keep only canonical discussion documents in the history list. - // Intermediate hop artifacts and reducer snapshots are implementation - // details and should not crowd out the main discussion file. - if (/\.round\d+\.hop\d+\.md$/i.test(entry)) return false; - if (/\.reducer\.\d+\.md$/i.test(entry)) return false; - return true; - }); - for (const f of files) { - try { - const fullPath = nodePath.join(dir, f); - const s = await fsStat(fullPath); - const content = await fsReadFileRaw(fullPath, 'utf8'); - const reqMatch = content.match(/## User Request\s*\n+(.+)/); - const preview = reqMatch?.[1]?.trim().slice(0, 120) || f; - discussions.push({ id: f.replace('.md', ''), fileName: f, path: fullPath, preview, mtime: s.mtimeMs }); - } catch { /* skip unreadable */ } - } - } catch { /* dir may not exist */ } +} + +async function collectKnownProjectDirs(): Promise> { + const dirs = new Map(); + for (const session of listSessions()) { + if (!session.projectDir) continue; + const canonical = await canonicalProjectDir(session.projectDir); + dirs.set(canonical, session.projectDir); + } + return dirs; +} + +async function resolveP2pDiscussionProjectScope(cmd: Record): Promise<{ projectDir: string; canonicalProjectDir: string } | null> { + const scope = isPlainRecord(cmd.scope) ? cmd.scope : {}; + const requestedSession = stringField(scope, 'sessionName') ?? stringField(cmd, 'sessionName'); + if (requestedSession) { + const session = getSession(requestedSession); + if (!session?.projectDir) return null; + return { + projectDir: session.projectDir, + canonicalProjectDir: await canonicalProjectDir(session.projectDir), + }; + } + + const requestedProjectDir = stringField(scope, 'projectDir') + ?? stringField(scope, 'cwd') + ?? stringField(cmd, 'projectDir') + ?? stringField(cmd, 'cwd'); + const knownProjectDirs = await collectKnownProjectDirs(); + if (requestedProjectDir) { + const requestedCanonical = await canonicalProjectDir(requestedProjectDir); + const known = knownProjectDirs.get(requestedCanonical); + return known + ? { projectDir: known, canonicalProjectDir: requestedCanonical } + : null; + } + + if (knownProjectDirs.size === 1) { + const [canonical, projectDir] = [...knownProjectDirs.entries()][0]!; + return { projectDir, canonicalProjectDir: canonical }; + } + + return null; +} + +function isPathUnderDir(filePath: string, dir: string): boolean { + const relative = nodePath.relative(dir, nodePath.resolve(filePath)); + return relative === '' || (!!relative && !relative.startsWith('..') && !nodePath.isAbsolute(relative)); +} + +async function handleP2pListDiscussions(cmd: Record, serverLink: ServerLink): Promise { + const requestId = cmd.requestId as string | undefined; + const scope = await resolveP2pDiscussionProjectScope(cmd); + if (!scope) { + serverLink.send({ type: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS_RESPONSE, requestId, discussions: [], error: 'missing_or_invalid_scope' }); + return; } + const discussions: Array<{ id: string; fileName: string; preview: string; mtime: number }> = []; + const dir = imcSubDir(scope.projectDir, 'discussions'); + try { + const entries = await fsReaddir(dir); + const files = entries.filter((entry) => { + if (!entry.endsWith('.md')) return false; + // Keep only canonical discussion documents in the history list. + // Intermediate hop artifacts and reducer snapshots are implementation + // details and should not crowd out the main discussion file. + if (/\.round\d+\.hop\d+\.md$/i.test(entry)) return false; + if (/\.reducer\.\d+\.md$/i.test(entry)) return false; + return true; + }); + for (const f of files) { + try { + const fullPath = nodePath.join(dir, f); + const s = await fsStat(fullPath); + const content = await fsReadFileRaw(fullPath, 'utf8'); + const reqMatch = content.match(/## User Request\s*\n+(.+)/); + const preview = reqMatch?.[1]?.trim().slice(0, 120) || f; + discussions.push({ id: f.replace('.md', ''), fileName: f, preview, mtime: s.mtimeMs }); + } catch { /* skip unreadable */ } + } + } catch { /* dir may not exist */ } // Sort by mtime descending, cap at 50 discussions.sort((a, b) => b.mtime - a.mtime); - serverLink.send({ type: 'p2p.list_discussions_response', discussions: discussions.slice(0, 50) }); + serverLink.send({ type: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS_RESPONSE, requestId, discussions: discussions.slice(0, 50) }); } async function handleP2pReadDiscussion(cmd: Record, serverLink: ServerLink): Promise { const id = cmd.id as string | undefined; const requestId = cmd.requestId as string | undefined; - if (!id) { serverLink.send({ type: 'p2p.read_discussion_response', requestId, error: 'missing_id' }); return; } + if (!id) { serverLink.send({ type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, requestId, error: 'missing_id' }); return; } + if (id.includes('/') || id.includes('\\') || id.includes('\0') || id === '.' || id === '..') { + serverLink.send({ type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, id, requestId, error: 'invalid_id' }); + return; + } + const scope = await resolveP2pDiscussionProjectScope(cmd); + if (!scope) { + serverLink.send({ type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, id, requestId, error: 'missing_or_invalid_scope' }); + return; + } + const discussionsDir = nodePath.resolve(imcSubDir(scope.projectDir, 'discussions')); + + // Tasks 5.4 / 12.4 — when the responder is reading on behalf of an active + // run (`runId` supplied), use the per-(run, source) offset tracker so + // repeated reads only return new bytes appended after the prior offset. + // Callers that don't supply a runId keep the historical full-file read + // semantics for backward compatibility (e.g. discussions list UI). + const runId = typeof cmd.runId === 'string' && cmd.runId ? cmd.runId : undefined; + const rawPolicy = typeof cmd.offsetMismatchPolicy === 'string' ? cmd.offsetMismatchPolicy : undefined; + const policy: 'fail' | 'reset' = rawPolicy === 'fail' ? 'fail' : 'reset'; + + async function respondWithOffset(filePath: string): Promise { + if (!runId) return false; + try { + const result = await readP2pDiscussionWithOffset({ runId, sourceKey: id!, filePath, policy }); + serverLink.send({ + type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, + id, + requestId, + content: result.content, + offset: { ...result.newOffset }, + offsetReset: result.reset, + ...(result.diagnostics.length ? { diagnostics: result.diagnostics } : {}), + }); + return true; + } catch (err) { + const wrapped = err as Error & { + code?: string; + diagnostic?: P2pWorkflowDiagnostic; + result?: { newOffset?: unknown; diagnostics?: P2pWorkflowDiagnostic[] }; + }; + if (wrapped?.code === 'discussion_read_offset_mismatch') { + serverLink.send({ + type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, + id, + requestId, + error: 'offset_mismatch', + offsetReset: 'mismatch_fail_closed', + ...(wrapped.result?.newOffset ? { offset: wrapped.result.newOffset } : {}), + ...(wrapped.result?.diagnostics?.length ? { diagnostics: wrapped.result.diagnostics } : {}), + }); + return true; + } + // Any other read error (ENOENT etc.) → caller falls back to legacy paths. + return false; + } + } // 1. Check active P2P runs first (in-memory, always fresh) for (const run of listP2pRuns()) { if (run.id === id || run.discussionId === id) { + if (!isPathUnderDir(run.contextFilePath, discussionsDir)) continue; + if (await respondWithOffset(run.contextFilePath)) return; try { const content = await fsReadFileRaw(run.contextFilePath, 'utf8'); - serverLink.send({ type: 'p2p.read_discussion_response', id, requestId, content }); + serverLink.send({ type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, id, requestId, content }); return; } catch { /* file may not exist yet */ } } } - // 2. Search across all known project .imc/discussions/ directories - const projectDirs = new Set(); - for (const s of listSessions()) { - if (s.projectDir) projectDirs.add(s.projectDir); - } - for (const projectDir of projectDirs) { - const filePath = nodePath.join(imcSubDir(projectDir, 'discussions'), `${id}.md`); - try { - const content = await fsReadFileRaw(filePath, 'utf8'); - serverLink.send({ type: 'p2p.read_discussion_response', id, requestId, content }); - return; - } catch { /* try next project */ } + const filePath = nodePath.join(discussionsDir, `${id}.md`); + if (!isPathUnderDir(filePath, discussionsDir)) { + serverLink.send({ type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, id, requestId, error: 'invalid_id' }); + return; } - serverLink.send({ type: 'p2p.read_discussion_response', id, requestId, error: 'not_found' }); + if (await respondWithOffset(filePath)) return; + try { + const content = await fsReadFileRaw(filePath, 'utf8'); + serverLink.send({ type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, id, requestId, content }); + return; + } catch { /* not found */ } + serverLink.send({ type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, id, requestId, error: 'not_found' }); } // ── Discussion handlers ──────────────────────────────────────────────────── @@ -5003,17 +5586,55 @@ async function handleP2pCancel(cmd: Record, serverLink: ServerL const runId = cmd.runId as string | undefined; if (!runId) return; const ok = await cancelP2pRun(runId, serverLink); - try { serverLink.send({ type: 'p2p.cancel_response', runId, ok }); } catch { /* ignore */ } + try { serverLink.send({ type: P2P_WORKFLOW_MSG.CANCEL_RESPONSE, runId, ok }); } catch { /* ignore */ } } async function handleP2pStatus(cmd: Record, serverLink: ServerLink): Promise { const runId = cmd.runId as string | undefined; + const requestId = cmd.requestId as string | undefined; + // Resolve scope mirror of handleP2pListDiscussions/handleP2pReadDiscussion: every + // p2p.status request must be tied to a project context. Without scope we fail + // closed (empty list / null run) so a browser viewer of project A cannot + // observe runs belonging to project B that happens to share this daemon. + const scope = await resolveP2pDiscussionProjectScope(cmd); + if (!scope) { + if (runId) { + try { serverLink.send({ type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, requestId, runId, run: null, error: 'missing_or_invalid_scope' }); } catch { /* ignore */ } + } else { + try { serverLink.send({ type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, requestId, runs: [], error: 'missing_or_invalid_scope' }); } catch { /* ignore */ } + } + return; + } + const resolvedScope = scope; + const discussionsDir = nodePath.resolve(imcSubDir(resolvedScope.projectDir, 'discussions')); + // A run belongs to scope when its discussion file lives inside that project's + // .imc/discussions directory. We also require initiatorSession (when set) to + // resolve to the same canonical project — this catches edge cases where a run + // was started against an external file path but the session itself is in a + // different project. + async function runMatchesScope(run: ReturnType): Promise { + if (!run) return false; + if (run.contextFilePath && isPathUnderDir(run.contextFilePath, discussionsDir)) return true; + if (run.initiatorSession) { + const initRecord = getSession(run.initiatorSession); + if (initRecord?.projectDir) { + const canon = await canonicalProjectDir(initRecord.projectDir); + if (canon === resolvedScope.canonicalProjectDir) return true; + } + } + return false; + } if (runId) { const run = getP2pRun(runId); - try { serverLink.send({ type: 'p2p.status_response', runId, run: run ? serializeP2pRun(run) : null }); } catch { /* ignore */ } + const inScope = await runMatchesScope(run); + try { serverLink.send({ type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, requestId, runId, run: inScope && run ? serializeP2pRun(run) : null }); } catch { /* ignore */ } } else { const runs = listP2pRuns(); - try { serverLink.send({ type: 'p2p.status_response', runs: runs.map((run) => serializeP2pRun(run)) }); } catch { /* ignore */ } + const filtered: typeof runs = []; + for (const run of runs) { + if (await runMatchesScope(run)) filtered.push(run); + } + try { serverLink.send({ type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, requestId, runs: filtered.map((run) => serializeP2pRun(run)) }); } catch { /* ignore */ } } } diff --git a/src/daemon/cron-executor.ts b/src/daemon/cron-executor.ts index 31c1836b0..c6947962b 100644 --- a/src/daemon/cron-executor.ts +++ b/src/daemon/cron-executor.ts @@ -9,11 +9,16 @@ import { getSession } from '../store/session-store.js'; import { sessionName, getTransportRuntime } from '../agent/session-manager.js'; import { detectStatusAsync, type AgentType } from '../agent/detect.js'; import { startP2pRun, type P2pTarget } from './p2p-orchestrator.js'; +import { prepareAdvancedWorkflowLaunch } from './command-handler.js'; import { timelineEmitter } from './timeline-emitter.js'; import type { TimelineEvent } from './timeline-event.js'; import type { ServerLink } from './server-link.js'; import logger from '../util/logger.js'; +/** Default retry budget when daemon admission returns `daemon_busy`. */ +const CRON_DAEMON_BUSY_DEFAULT_ATTEMPTS = 3; +const CRON_DAEMON_BUSY_DEFAULT_DELAY_MS = 5_000; + const BUSY_STATES = new Set(['streaming', 'thinking', 'tool_running', 'permission']); export async function executeCronJob(msg: CronDispatchMessage, serverLink: ServerLink): Promise { @@ -166,18 +171,113 @@ export async function executeCronJob(msg: CronDispatchMessage, serverLink: Serve } logger.info({ jobId, jobName, initiator: name, targets: targets.length, mode }, 'Cron: starting P2P discussion'); - const run = await startP2pRun({ - initiatorSession: name, - targets, - userText: topic, - fileContents: [], - serverLink, - rounds: rounds ?? 1, - }); - // Link cron execution to P2P discussion so frontend can navigate - try { - serverLink.send({ type: 'cron.p2p_linked', jobId, discussionId: run.discussionId, runId: run.id }); - } catch { /* not critical */ } + + // Audit:R3 hardening / task 10.2 — when the cron action carries + // `workflowLaunchEnvelope`, route the launch through the SAME envelope + // path as manual launches so cron inherits capability gating, policy + // authority enforcement, and `static_policy_mismatch_recompiled` emission. + // Legacy cron rows without an envelope continue to use the direct path. + const initiatorRecord = getSession(name); + const projectDir = initiatorRecord?.projectDir ?? process.cwd(); + const cronActionRecord = action as unknown as { + workflowLaunchEnvelope?: Record; + daemonBusyRetry?: { attempts: number; delayMs: number }; + }; + const envelopeForLaunch = cronActionRecord.workflowLaunchEnvelope; + + // Audit:R3 hardening / task 10.3 — bounded daemon_busy retry. cron + // dispatcher MUST NOT loop indefinitely: after `attempts` failures, + // mark the job failed with a stable diagnostic. Default 3 attempts / + // 5 s delay; overridable per cron job via `daemonBusyRetry`. + const retry = cronActionRecord.daemonBusyRetry ?? { + attempts: CRON_DAEMON_BUSY_DEFAULT_ATTEMPTS, + delayMs: CRON_DAEMON_BUSY_DEFAULT_DELAY_MS, + }; + + let lastDaemonBusyAttempt = 0; + while (lastDaemonBusyAttempt < retry.attempts) { + lastDaemonBusyAttempt += 1; + try { + let run; + if (envelopeForLaunch) { + // Synthesize a minimal cmd Record that prepareAdvancedWorkflowLaunch + // can parse (it only reads `p2pWorkflowLaunchEnvelope` / + // `workflowLaunchEnvelope` and old-advanced fields). + const fakeCmd: Record = { workflowLaunchEnvelope: envelopeForLaunch }; + const prepared = await prepareAdvancedWorkflowLaunch({ + cmd: fakeCmd, + sessionName: name, + targets, + userText: topic, + projectDir, + commandId: `cron-${jobId}-${executionId ?? 'now'}-${lastDaemonBusyAttempt}`, + serverLink, + }); + if (!prepared.ok) { + // Determine whether failure is daemon_busy (retryable) or terminal. + const busy = prepared.diagnostics.some((d) => d.code === 'daemon_busy'); + if (busy && lastDaemonBusyAttempt < retry.attempts) { + logger.warn({ jobId, attempt: lastDaemonBusyAttempt, of: retry.attempts }, 'Cron: daemon_busy, retrying'); + await new Promise((r) => setTimeout(r, retry.delayMs)); + continue; + } + // Terminal failure (or budget exhausted) + const codes = prepared.diagnostics.map((d) => d.code).join(', '); + sendCommandResult(serverLink, { + type: CRON_MSG.COMMAND_RESULT, + jobId, + executionId, + status: 'error', + detail: busy + ? `Cron P2P launch exhausted ${retry.attempts} daemon_busy retries` + : `Cron P2P launch rejected: ${codes}`, + }); + return; + } + run = await startP2pRun({ + initiatorSession: name, + targets, + userText: topic, + fileContents: [], + serverLink, + rounds: rounds ?? 1, + advanced: { + kind: 'envelope_compiled', + bound: prepared.bound!, + advancedRounds: prepared.advancedRounds, + ...(prepared.advancedRunTimeoutMs !== undefined ? { advancedRunTimeoutMs: prepared.advancedRunTimeoutMs } : {}), + ...(prepared.contextReducer ? { contextReducer: prepared.contextReducer } : {}), + }, + }); + } else { + // Legacy cron path (no envelope) — direct startP2pRun. + run = await startP2pRun({ + initiatorSession: name, + targets, + userText: topic, + fileContents: [], + serverLink, + rounds: rounds ?? 1, + }); + } + // Link cron execution to P2P discussion so frontend can navigate + try { + serverLink.send({ type: 'cron.p2p_linked', jobId, discussionId: run.discussionId, runId: run.id }); + } catch { /* not critical */ } + return; + } catch (err) { + // startP2pRun may throw for non-busy reasons; treat as terminal. + logger.error({ jobId, err }, 'Cron: P2P launch threw'); + sendCommandResult(serverLink, { + type: CRON_MSG.COMMAND_RESULT, + jobId, + executionId, + status: 'error', + detail: `Cron P2P launch failed: ${formatErr(err)}`, + }); + return; + } + } return; } diff --git a/src/daemon/p2p-discussion-writer.ts b/src/daemon/p2p-discussion-writer.ts new file mode 100644 index 000000000..21708e687 --- /dev/null +++ b/src/daemon/p2p-discussion-writer.ts @@ -0,0 +1,165 @@ +/** + * Per-run non-blocking discussion-file writer. + * + * R3 v1b follow-up (W2) — `appendFile(run.contextFilePath, segment)` was + * previously awaited on the script / logic dispatch hot path. With large + * NDJSON outputs that introduces visible latency before the executor can + * advance to the next round. We now hand writes to a per-run serialized + * queue: the dispatcher returns immediately, the queue drains in the + * background, and failures surface via `addHelperDiagnostic` / logger.warn + * (preserving the D-O3 spec: in-memory `authoritativeSegment` is the + * verdict source-of-truth; the discussion file is best-effort audit). + * + * The queue is bounded by byte budget per run — once exceeded, oldest + * pending segments are dropped with a single warning so a runaway + * producer can't OOM the daemon. The queue writes serially per file + * path so segments stay ordered. + */ + +import { appendFile } from 'node:fs/promises'; +import logger from '../util/logger.js'; + +export const P2P_DISCUSSION_WRITE_QUEUE_MAX_BYTES = 4 * 1024 * 1024; // 4 MiB pending per run + +interface RunQueue { + pendingSegments: string[]; + pendingBytes: number; + draining: boolean; + /** Notified after each drain step; tests can `await` it. */ + drainPromise: Promise; + resolveDrain: () => void; + /** Invoked after each successful append (test hook). */ + onWriteFailure?: (error: unknown) => void; + /** + * R3 v2 PR-ζ (M1) — Invoked when the queue drops a pending segment due + * to backpressure (cap exceeded). Allows the orchestrator to surface a + * `P2P_DISCUSSION_WRITE_FAILED` helper diagnostic so audit gaps are + * visible to web/UI, not just buried in daemon logs. + */ + onSegmentDropped?: (droppedBytes: number, queuedBytes: number) => void; +} + +const queues = new Map(); + +function makeDrainPromise(queue: RunQueue): void { + let resolve!: () => void; + queue.drainPromise = new Promise((res) => { resolve = res; }); + queue.resolveDrain = resolve; +} + +function getOrCreateQueue(filePath: string): RunQueue { + let queue = queues.get(filePath); + if (!queue) { + queue = { + pendingSegments: [], + pendingBytes: 0, + draining: false, + drainPromise: Promise.resolve(), + resolveDrain: () => {}, + }; + makeDrainPromise(queue); + queues.set(filePath, queue); + } + return queue; +} + +/** + * Enqueue a discussion-file write. Returns immediately — the caller does + * NOT await disk I/O. `onWriteFailure` (when supplied) is invoked once + * per failed write so the orchestrator can surface a helper diagnostic + * with the run's `currentRoundAttempt` context. + */ +export function enqueueP2pDiscussionWrite( + filePath: string, + segment: string, + onWriteFailure?: (error: unknown) => void, + onSegmentDropped?: (droppedBytes: number, queuedBytes: number) => void, +): void { + if (segment.length === 0) return; + const queue = getOrCreateQueue(filePath); + if (onWriteFailure) queue.onWriteFailure = onWriteFailure; + if (onSegmentDropped) queue.onSegmentDropped = onSegmentDropped; + // Backpressure: if pending buffer exceeds cap, drop oldest segments. + // We never drop the newest write; that's the one carrying the latest + // executor decision and the most useful audit data. + while (queue.pendingBytes + segment.length > P2P_DISCUSSION_WRITE_QUEUE_MAX_BYTES && queue.pendingSegments.length > 0) { + const dropped = queue.pendingSegments.shift()!; + queue.pendingBytes -= dropped.length; + logger.warn( + { filePath, droppedBytes: dropped.length, queuedBytes: queue.pendingBytes }, + 'P2P: discussion write queue full, dropping oldest pending segment', + ); + // R3 v2 PR-ζ (M1) — surface drop to the orchestrator so a helper + // diagnostic appears in the run state (web/UI can render it). + try { queue.onSegmentDropped?.(dropped.length, queue.pendingBytes); } catch { /* swallow listener errors */ } + } + queue.pendingSegments.push(segment); + queue.pendingBytes += segment.length; + if (!queue.draining) { + queue.draining = true; + void drain(filePath, queue); + } +} + +/** + * R3 v2 PR-ζ (A6 / O4) — Drop the queue for `filePath`. Called by the + * orchestrator's terminal cleanup hook so the per-run queue Map does NOT + * leak run objects via the `onWriteFailure` / `onSegmentDropped` + * closures. Pending segments are flushed best-effort first; failure is + * swallowed (run is terminal, no consumer to notify). + */ +export async function dropP2pDiscussionWriteQueue(filePath: string): Promise { + const queue = queues.get(filePath); + if (!queue) return; + try { + if (queue.draining || queue.pendingSegments.length > 0) { + await queue.drainPromise; + } + } catch { + // ignore — best effort + } + queues.delete(filePath); +} + +async function drain(filePath: string, queue: RunQueue): Promise { + while (queue.pendingSegments.length > 0) { + // Coalesce: write all pending segments in one call so we minimise + // open() / fsync() syscalls and keep ordering trivially correct. + const batch = queue.pendingSegments.join(''); + queue.pendingSegments = []; + queue.pendingBytes = 0; + try { + await appendFile(filePath, batch, 'utf8'); + } catch (error) { + logger.warn( + { filePath, error: error instanceof Error ? error.message : String(error) }, + 'P2P: discussion write failed (queue)', + ); + try { queue.onWriteFailure?.(error); } catch { /* swallow listener errors */ } + } + } + queue.draining = false; + // Wake up flush waiters and prepare a fresh promise for the next batch. + const resolve = queue.resolveDrain; + makeDrainPromise(queue); + resolve(); +} + +/** + * Wait until the queue for `filePath` is empty. Returned promise resolves + * once the next drain cycle finishes; callers awaiting before any + * enqueue may resolve immediately. Used by tests + by run shutdown when + * we want to guarantee the discussion file is up-to-date before + * producing the final summary. + */ +export async function flushP2pDiscussionWriteQueue(filePath: string): Promise { + const queue = queues.get(filePath); + if (!queue) return; + if (!queue.draining && queue.pendingSegments.length === 0) return; + await queue.drainPromise; +} + +/** Test-only: drop all queues (between tests). */ +export function __resetP2pDiscussionWriteQueueForTests(): void { + queues.clear(); +} diff --git a/src/daemon/p2p-orchestrator.ts b/src/daemon/p2p-orchestrator.ts index d63ce6478..01b7b76dc 100644 --- a/src/daemon/p2p-orchestrator.ts +++ b/src/daemon/p2p-orchestrator.ts @@ -24,6 +24,43 @@ import { type P2pResolvedPlan, type P2pResolvedRound, } from '../../shared/p2p-advanced.js'; +import type { + P2pBindRuntimeContext, + P2pBoundWorkflow, + StartP2pRunAdvancedSource, +} from '../../shared/p2p-workflow-types.js'; +import { recheckDangerousNodeCapabilities } from './p2p-workflow-policy-recheck.js'; +import { loadDaemonP2pStaticPolicy, getCurrentDaemonWorkflowCapabilities } from './p2p-workflow-static-policy.js'; +// Audit:R2-N1 / N5 — script-node production wiring. `runP2pScriptNode` was +// shipped in PR-§12.1 but had ZERO production callers. The orchestrator now +// invokes it for every compiled node with `nodeKind === 'script'`. Reverse- +// regression #32 locks this so a future refactor can't reopen the gap. +import { runP2pScriptNode } from './p2p-workflow-script-runner.js'; +import { acquireScriptSlot, releaseScriptSlot } from './p2p-workflow-script-concurrency.js'; +// Audit:R2-N2 — artifact runtime production wiring. `freezeP2pArtifactIdentity` +// + `captureP2pArtifactBaseline` + `verifyP2pArtifactBaselineDelta` were +// shipped in PR-§12.2 but had ZERO production callers. envelope_compiled runs +// with `openspec_convention` artifacts now flow through the new helpers. +import { + clearPersistedFrozenP2pArtifactIdentity, + freezeP2pArtifactIdentity, + captureP2pArtifactBaseline, + verifyP2pArtifactBaselineDelta, + loadPersistedFrozenP2pArtifactIdentities, + type P2pArtifactBaseline, + type P2pFrozenArtifactIdentity, +} from './p2p-workflow-artifact-runtime.js'; +import { makeP2pWorkflowDiagnostic, type P2pWorkflowDiagnostic } from '../../shared/p2p-workflow-diagnostics.js'; +import { evaluateP2pLogic } from '../../shared/p2p-workflow-logic-evaluator.js'; +import type { P2pWorkflowVariableValue } from '../../shared/p2p-workflow-types.js'; +import { + P2P_SCRIPT_RETRIABLE_DIAGNOSTIC_CODES, + P2P_SCRIPT_RETRY_DEFAULT_ATTEMPTS, + P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENTS, + P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENT_BYTES, + P2P_WORKFLOW_VARIABLE_NAME_PATTERN, +} from '../../shared/p2p-workflow-constants.js'; +import { dropP2pDiscussionWriteQueue, enqueueP2pDiscussionWrite, flushP2pDiscussionWriteQueue } from './p2p-discussion-writer.js'; import { formatP2pParticipantIdentity, shortP2pSessionName } from '../../shared/p2p-participant.js'; import { P2P_TERMINAL_HOP_STATUSES, @@ -68,9 +105,25 @@ export interface StartP2pRunOptions { extraPrompt?: string; modeOverride?: string; hopTimeoutMs?: number; + /** + * Source of the advanced rounds (audit:V-1 / N-H1 / Q1). When supplied, + * `advanced.kind === 'envelope_compiled'` carries the bound workflow whose + * `bindContext.capabilitySnapshot` and `currentDaemonPolicy` are stored on + * the run state for downstream `recheckDangerousNodeCapabilities` calls. + * Pass `kind: 'supervision_internal'` to make the supervision escape hatch + * explicit in source review and reverse-regression checks. + * + * Older callers (cron / tests) may continue to pass the legacy + * `advancedPresetKey` / `advancedRounds` fields directly; v1b deletes them. + */ + advanced?: StartP2pRunAdvancedSource; + /** @deprecated v1a passthrough — prefer `advanced` for new call sites. Removed in v1b. */ advancedPresetKey?: string; + /** @deprecated v1a passthrough — prefer `advanced` for new call sites. Removed in v1b. */ advancedRounds?: P2pAdvancedRound[]; + /** @deprecated v1a passthrough — prefer `advanced` for new call sites. Removed in v1b. */ advancedRunTimeoutMs?: number; + /** @deprecated v1a passthrough — prefer `advanced` for new call sites. Removed in v1b. */ contextReducer?: P2pContextReducerConfig; } @@ -131,12 +184,71 @@ export interface P2pRun { helperEligibleSnapshot: P2pParticipantSnapshotEntry[]; contextReducer?: P2pContextReducerConfig; advancedRunTimeoutMs?: number; + /** + * Bind-time capability snapshot (audit:V-1 / N-H1). Present iff the run was + * started via `advanced: { kind: 'envelope_compiled', bound }` — i.e. the + * bound workflow flowed all the way through `prepareAdvancedWorkflowLaunch`. + * Stored on the run so dangerous-node executors can call + * `recheckDangerousNodeCapabilities` against the live daemon policy at + * execution time. + */ + capabilitySnapshot?: P2pBindRuntimeContext['capabilitySnapshot']; + /** + * Bind-time daemon policy snapshot (audit:H3 / R3 PR-α). Full + * `P2pStaticPolicy` shape so `recheckDangerousNodeCapabilities` can compare + * `allowedExecutables` / `allowImplementationPermission` / + * `allowInterpreterScripts` field-for-field against the live daemon policy + * at executor time. + */ + policySnapshot?: P2pBindRuntimeContext['policySnapshot']; + /** + * Full bound workflow (audit:R3 PR-α / N-M1). Holds + * `compiled.derivedRequiredCapabilities` plus the original bind context; + * required for v1b dangerous-node recheck because the helper must know what + * the run was bound for, not what the current draft would re-derive. + * + * MUST NOT be serialized to web/DB — `serializeP2pRun()` and + * `sanitizeP2pOrchestrationRunForBridge` allowlists exclude it. See + * reverse-regression #17 / #18. + */ + boundWorkflow?: import('../../shared/p2p-workflow-types.js').P2pBoundWorkflow; + /** + * Discriminant of the advanced source used at start time. `'envelope_compiled'` + * marks runs that came from a validated workflow envelope; `'supervision_internal'` + * marks daemon-internal supervision audits (escape hatch); `undefined` is the + * legacy passthrough (cron / tests). Helps audit/projection code distinguish + * runs that obey the full v1 contract from legacy ones. + */ + advancedSourceKind?: StartP2pRunAdvancedSource['kind']; deadlineAt?: number | null; currentRoundId?: string | null; currentExecutionStep: number; currentRoundAttempt: number; roundAttemptCounts: Record; roundJumpCounts: Record; + /** + * R3 PR-β (Cx1-H2 / W4) — per-compiled-edge usage counter for envelope_compiled + * runs. Independent from `roundJumpCounts` because compiled edges have + * per-edge `loopBudgets` (vs the round-aggregated jump budget on the + * legacy adapter projection). Test-only reset: see `__resetP2pRunArtifactRootCacheForTests`. + */ + compiledEdgeUseCounts?: Record; + /** + * R3 v2 PR-ζ (M2) — Per-script-round retry counter, independent of + * `roundAttemptCounts`. Decoupling ensures: (1) jump-rebound to the + * same round.id does not consume the script retry budget meant for + * transient errors only; (2) reset on jump can target this map without + * touching the canonical attempt history. `dispatchScriptRoundOrFail` + * reads + increments this on each retriable failure. + */ + scriptRetryCounts?: Record; + /** + * R3 v1b follow-up — mutable run variable state. Initialised from + * `bound.compiled.variables` (declared defaults) and patched by script + * nodes via `result.machineOutput.finalFrame.variables`. Logic nodes + * read from this map to evaluate their declarative rules. + */ + runVariables?: Record; routingHistory: Array<{ fromRoundId?: string | null; toRoundId?: string | null; @@ -533,6 +645,25 @@ export async function startP2pRun(...args: number | undefined, ] ): Promise { + const opts = normalizeStartP2pRunArgs(args); + // Audit:V-1 / N-H1 — when the caller supplies `advanced` (envelope-compiled + // or supervision-internal), unpack the rounds/preset/timeout from there. + // Otherwise fall back to the legacy `advancedPresetKey` / `advancedRounds` + // top-level fields. This keeps cron and existing test fixtures working + // while letting `prepareAdvancedWorkflowLaunch` and `supervision-automation` + // funnel through the typed discriminated union. + const advancedSource: StartP2pRunAdvancedSource | undefined = opts.advanced; + const advancedPresetKey = advancedSource?.kind === 'supervision_internal' + ? advancedSource.advancedPresetKey + : opts.advancedPresetKey; + const advancedRounds = advancedSource + ? advancedSource.advancedRounds + : opts.advancedRounds; + const advancedRunTimeoutMs = advancedSource?.advancedRunTimeoutMs + ?? opts.advancedRunTimeoutMs; + const contextReducer = advancedSource?.kind === 'envelope_compiled' + ? advancedSource.contextReducer + : opts.contextReducer; const { initiatorSession, targets, @@ -544,11 +675,7 @@ export async function startP2pRun(...args: extraPrompt, modeOverride, hopTimeoutMs, - advancedPresetKey, - advancedRounds, - advancedRunTimeoutMs, - contextReducer, - } = normalizeStartP2pRunArgs(args); + } = opts; // Validate same domain const mainSession = extractMainSession(initiatorSession); for (const t of targets) { @@ -647,9 +774,48 @@ export async function startP2pRun(...args: currentRoundAttempt: 1, roundAttemptCounts: {}, roundJumpCounts: {}, + // R3 v1b follow-up — initialise mutable variable state from the + // compiled workflow's declared variables so logic-node rules can read + // defaults even before any script node has patched the map. We store + // raw `value` because `P2pWorkflowVariableValue` widens to string | + // number | boolean | string[]. + // R3 v2 PR-ζ (B1 / A5) — `runVariables` uses a null-prototype map so + // any later write of `__proto__` / `constructor` / `prototype` becomes + // a normal own property and does NOT touch the global Object.prototype + // chain. Defence-in-depth alongside the orchestrator's write-path name + // validation; even if the regex regresses, prototype pollution is + // structurally impossible. + runVariables: (() => { + const initial = Object.create(null) as Record; + if (advancedSource?.kind === 'envelope_compiled') { + for (const variable of advancedSource.bound.compiled.variables ?? []) { + initial[variable.name] = variable.value; + } + } + return initial; + })(), routingHistory: [], helperDiagnostics: [], _cancelled: false, + // Audit:V-1 / N-H1 / N2 / R3 PR-α — store the bound workflow ON THE RUN + // so v1b dangerous-node executors can recheck against the live policy at + // execution time (`recheckDangerousNodeCapabilities`). The + // `capabilitySnapshot` and `policySnapshot` fields are convenience views; + // the full `boundWorkflow.bindContext` is the canonical source. + // + // For supervision-internal escapes (no bound) and legacy passthrough we + // leave these undefined; the recheck helper degrades to capability-string + // comparison only. + capabilitySnapshot: advancedSource?.kind === 'envelope_compiled' + ? advancedSource.bound.bindContext.capabilitySnapshot + : undefined, + policySnapshot: advancedSource?.kind === 'envelope_compiled' + ? advancedSource.bound.bindContext.policySnapshot + : undefined, + boundWorkflow: advancedSource?.kind === 'envelope_compiled' + ? advancedSource.bound + : undefined, + advancedSourceKind: advancedSource?.kind, }; activeRuns.set(runId, run); @@ -702,6 +868,18 @@ export async function cancelP2pRun(runId: string, serverLink: ServerLink | null) // ── Resume after daemon restart ─────────────────────────────────────────── export async function resumePendingOrchestrations(serverLink: ServerLink | null): Promise { + // R3 v1b follow-up — Always rehydrate persisted artifact identities at + // daemon startup, even when serverLink is null (test harness / disconnected + // daemon). This restores the spec invariant "identity preserved across + // retry/re-entry": an in-flight run picked up after restart finds its + // existing frozen identity and re-uses the same slug-N suffix instead of + // producing a fresh one. + try { + const loaded = await loadPersistedFrozenP2pArtifactIdentities(); + if (loaded > 0) logger.info({ loaded }, 'P2P: rehydrated persisted artifact identities'); + } catch (err) { + logger.warn({ err }, 'P2P: failed to rehydrate persisted artifact identities'); + } if (!serverLink) return; try { // Query server for active runs — the server handles this via WS request/response @@ -1053,10 +1231,49 @@ async function executeChain(run: P2pRun, modeConfig: P2pMode | undefined, server }, 60_000); } +// Audit:R3 hardening / task 10.6 — diagnostic retention. +// +// Long-running advanced workflows can accumulate hundreds of helper +// diagnostics (one per round attempt × node × loop). Without bounds the +// `P2pRun` object grows monotonically, the projection blob grows past +// `P2P_SANITIZE_MAX_TOTAL_BYTES` and starts truncating at the sanitizer, +// and the `serializeP2pRun` payload exceeds frontend rendering budgets. +// +// Retention policy (stable ordering): +// - `P2P_HELPER_DIAGNOSTIC_RETENTION_COUNT` total entries kept per run. +// - When over count, drop the OLDEST entries first (FIFO). The most-recent +// entries are most useful for failure forensics; the oldest are usually +// transient warnings from earlier rounds. +// - `P2P_HELPER_DIAGNOSTIC_RETENTION_BYTES` total JSON-stringified byte +// budget. When exceeded, drop additional oldest entries until under +// budget. Single oversized entries still apply but are themselves +// truncated by the sanitizer downstream. +// - Stable ordering: insertion order preserved among retained entries. +const P2P_HELPER_DIAGNOSTIC_RETENTION_COUNT = 100; +const P2P_HELPER_DIAGNOSTIC_RETENTION_BYTES = 64 * 1024; // 64 KiB / run + function addHelperDiagnostic(run: P2pRun, diagnostic: Omit): void { run.helperDiagnostics.push({ ...diagnostic, timestamp: Date.now() }); + // Count cap (FIFO trim). + while (run.helperDiagnostics.length > P2P_HELPER_DIAGNOSTIC_RETENTION_COUNT) { + run.helperDiagnostics.shift(); + } + // Byte cap (FIFO trim until under budget OR only newest entry remains). + let totalBytes = 0; + for (const d of run.helperDiagnostics) { + totalBytes += JSON.stringify(d).length; + } + while (totalBytes > P2P_HELPER_DIAGNOSTIC_RETENTION_BYTES && run.helperDiagnostics.length > 1) { + const dropped = run.helperDiagnostics.shift(); + if (dropped) totalBytes -= JSON.stringify(dropped).length; + } } +export const P2P_HELPER_DIAGNOSTIC_RETENTION_LIMITS = { + count: P2P_HELPER_DIAGNOSTIC_RETENTION_COUNT, + bytes: P2P_HELPER_DIAGNOSTIC_RETENTION_BYTES, +} as const; + function parseVerdictFromContent(content: string): 'PASS' | 'REWORK' | null { const matches = [...content.matchAll(//g)]; const verdict = matches.at(-1)?.[1]; @@ -1242,11 +1459,29 @@ async function reduceAdvancedContext( } } +/** + * Legacy artifact baseline (oldAdvanced path only). + * + * R3 PR-γ (A3) — for envelope_compiled OpenSpec rounds, this function + * returns an empty baseline because the authoritative gate is now + * `verifyP2pArtifactBaselineDelta` against the frozen identity (see + * `executeAdvancedChain` post-round delta block). The legacy + * `readdir().join('\n')` heuristic violates spec + * "OpenSpec artifact verification SHALL use per-file sha256 baseline only"; + * keeping it for envelope_compiled would be a fail-open second source. + * + * `explicit_paths` artifacts and oldAdvanced runs continue to use the + * legacy per-file readFile baseline. + */ async function captureArtifactBaseline(run: P2pRun, round: P2pResolvedRound): Promise> { const baseline = new Map(); const record = getSession(run.initiatorSession); const projectDir = record?.projectDir ?? process.cwd(); if (round.artifactConvention === 'openspec_convention') { + if (run.advancedSourceKind === 'envelope_compiled') { + // PR-γ — no legacy baseline; the new helper is the only authority. + return baseline; + } const target = join(projectDir, 'openspec', 'changes'); try { const entries = await readdir(target); @@ -1270,6 +1505,12 @@ async function captureArtifactBaseline(run: P2pRun, round: P2pResolvedRound): Pr async function validateArtifactOutputsForRound(run: P2pRun, round: P2pResolvedRound, baseline: Map): Promise { if (round.artifactConvention === 'none') return; if (round.artifactConvention === 'openspec_convention') { + if (run.advancedSourceKind === 'envelope_compiled') { + // PR-γ — envelope_compiled OpenSpec validation is owned by the new + // `verifyP2pArtifactBaselineDelta` gate (per-file sha256). The + // legacy `readdir().join()` heuristic is bypassed entirely. + return; + } const target = [...baseline.keys()][0]; const before = baseline.get(target) ?? null; try { @@ -1377,6 +1618,370 @@ function buildAdvancedSynthesisPrompt( ); } +/** + * Audit:R3 / tasks 4.7b / 4.8b — a round is "dangerous" iff it asks the + * dispatcher to extend write authority beyond `analysis_only`. The recheck + * MUST run before every such round so a daemon policy/capability downgrade + * mid-run fails the round closed instead of silently bypassing the change. + */ +function isRoundDangerous(round: P2pResolvedRound): boolean { + if (round.permissionScope === 'implementation' || round.permissionScope === 'artifact_generation') return true; + // R3 PR-α (A4) — script-node rounds are dangerous regardless of + // permission scope, because script execution mutates the host environment + // (argv launch, env policy, file system writes, NDJSON parsing). spec + // "dangerous nodes SHALL recheck on policy downgrade" requires recheck on + // every script dispatch. The previous predicate only inspected + // permissionScope and silently let `analysis_only` script nodes bypass + // capability-downgrade detection. + if (round.nodeKind === 'script') return true; + // OpenSpec / explicit-paths artifact rounds are write-authoritative even + // under a permissive permissionScope; treat as dangerous when the resolved + // round carries an artifact convention beyond `none`. + if (round.artifactConvention && round.artifactConvention !== 'none') return true; + return false; +} + +function recheckDangerousRoundOrFail( + run: P2pRun, + round: P2pResolvedRound, + serverLink: ServerLink | null, +): 'ok' | 'fail_closed' { + const bound = run.boundWorkflow; + if (!bound) return 'ok'; + // Source of truth: bound at compile/bind time, NOT recomputed from current draft. + const requiredCapabilities = bound.compiled.derivedRequiredCapabilities; + const bindCapabilitySnapshot = bound.bindContext.capabilitySnapshot.capabilities; + const boundPolicySnapshot = bound.bindContext.policySnapshot; + + // Live state at execute time. When serverLink is null (test harness or + // disconnected daemon), degrade to bound snapshot — we can't observe a + // downgrade without a live source, so the recheck becomes a no-op rather + // than a false fail-closed. + const stubLink = { getP2pWorkflowCapabilities: () => bindCapabilitySnapshot } as unknown as ServerLink; + const link = serverLink ?? stubLink; + const currentDaemonCapabilities = getCurrentDaemonWorkflowCapabilities(link); + const currentDaemonPolicy = loadDaemonP2pStaticPolicy(link); + + const result = recheckDangerousNodeCapabilities({ + requiredCapabilities, + bindCapabilitySnapshot, + currentDaemonCapabilities, + boundPolicySnapshot, + currentDaemonPolicy, + runId: run.id, + nodeId: round.id, + }); + if (result.ok) return 'ok'; + // Fail the run closed; the helper diagnostic carries the precise downgrade + // metadata. Rely on the existing helper-diagnostic retention pipeline. + addHelperDiagnostic(run, { + code: 'P2P_DANGEROUS_NODE_RECHECK_FAILED', + message: result.diagnostic.summary ?? 'dangerous node recheck failed', + nodeId: round.id, + severity: 'error' as const, + } as unknown as Omit); + failRun(run, 'capability_downgraded_during_run', result.diagnostic.summary ?? 'recheck failed', serverLink); + return 'fail_closed'; +} + +/** + * Audit:R2-N1 / R3 §12.1 production wiring — when the round's compiled node + * is `nodeKind: 'script'` AND the run carries an envelope-compiled bound + * workflow, dispatch via `runP2pScriptNode` instead of the legacy + * `dispatchHop`. The script's stdout/stderr/machine-output are recorded into + * the discussion file as a "Script execution" segment so the rest of the + * round flow (verdict parsing, summary, etc.) sees authoritative content. + * + * Returns a synthetic "authoritative segment" string so the caller can keep + * its existing structure (round verdict / artifact validation / loop + * routing). On any failure the script-node round is marked failed via + * `failRun` and the helper returns null. + */ +async function dispatchScriptRoundOrFail( + run: P2pRun, + round: P2pResolvedRound, + serverLink: ServerLink | null, +): Promise< + | { kind: 'ok'; authoritativeSegment: string; routingKey?: string; variables?: Record } + | { kind: 'fail_closed' } + | { kind: 'retry' } + | { kind: 'not_a_script_round' } +> { + const bound = run.boundWorkflow; + if (!bound) return { kind: 'not_a_script_round' }; + // R3 PR-α (A1) — adapter now preserves `nodeKind` and `script` on the + // resolved round, so we read them from `round` first and fall back to the + // sidecar `bound.compiled.nodes.find(...)` only for old fixtures that + // pre-date the adapter widening. `script` may still live on `bound` even + // after A1 because compiled `P2pScriptNodeContract` is the authoritative + // shape. + const fallbackNode = bound.compiled.nodes.find((node) => node.id === round.id); + const isScript = round.nodeKind === 'script' || fallbackNode?.nodeKind === 'script'; + const scriptContract = round.script ?? fallbackNode?.script; + if (!isScript || !scriptContract) { + return { kind: 'not_a_script_round' }; + } + const policy = bound.bindContext.policySnapshot; + if (!policy) { + failRun(run, 'failed', 'Script-node round dispatch requires bound policySnapshot.', serverLink); + return { kind: 'fail_closed' }; + } + // R3 PR-α (B3 / B5 / D-O4) — slot exhaustion now emits a structured + // workflow diagnostic via `helperDiagnostic.workflowDiagnostic` so web / + // monitoring can render the i18n key for `daemon_busy` instead of parsing + // free-form text. + const slot = acquireScriptSlot(); + if (!slot.ok) { + const busyDiag = makeP2pWorkflowDiagnostic('daemon_busy', 'execute', { + nodeId: round.id, + summary: `Script slot pool exhausted (${slot.inUse}/${slot.capacity}).`, + }); + addHelperDiagnostic(run, { + code: 'P2P_SCRIPT_SLOT_EXHAUSTED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: busyDiag.summary ?? 'daemon_busy', + workflowDiagnostic: busyDiag, + }); + failRun(run, 'failed', `Script slot pool exhausted (${slot.inUse}/${slot.capacity}); see daemon_busy.`, serverLink); + return { kind: 'fail_closed' }; + } + try { + const result = await runP2pScriptNode({ + script: scriptContract, + policy, + repoRoot: bound.bindContext.repoRoot, + runId: run.id, + nodeId: round.id, + }); + // Append a discussion-file segment so downstream verdict parsing / + // summary generation still sees the round's authoritative output. + const sectionHeader = `Script: ${round.title} (attempt ${run.currentRoundAttempt})`; + let segment = `\n\n## ${sectionHeader}\n\n`; + segment += `Exit code: ${result.exitCode}, signal: ${result.signal}, ok: ${result.ok}\n`; + if (result.machineOutput?.ok) { + segment += `\n### Machine output (final frame)\n\n\`\`\`json\n${JSON.stringify(result.machineOutput.finalFrame, null, 2)}\n\`\`\`\n`; + } + if (result.diagnostics.length) { + const codes = result.diagnostics.map((d) => d.code).join(', '); + segment += `\nDiagnostics: ${codes}\n`; + } + // R3 PR-α (B4 / D-O3) + v1b (W2) — discussion file write is now + // non-blocking via the per-run queue. Spec D-O3: in-memory + // `authoritativeSegment` is the verdict source-of-truth so the write + // does NOT gate dispatch latency. Failures still surface via helper + // diagnostic + logger.warn so audit gaps are visible. + enqueueP2pDiscussionWrite( + run.contextFilePath, + segment, + (error: unknown) => { + const message = error instanceof Error ? error.message : String(error); + addHelperDiagnostic(run, { + code: 'P2P_DISCUSSION_WRITE_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Failed to append script segment to ${run.contextFilePath}: ${message}`, + }); + }, + // R3 v2 PR-ζ (M1) — surface backpressure drops as helper diagnostic. + (droppedBytes, queuedBytes) => { + addHelperDiagnostic(run, { + code: 'P2P_DISCUSSION_WRITE_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Discussion writer dropped ${droppedBytes}B due to backpressure (queued=${queuedBytes}B)`, + }); + }, + ); + if (!result.ok) { + // R3 PR-α (B1 / B5) + v1b follow-up (script retry) — script + // execution failure either fails the round closed OR triggers a + // retry when ALL diagnostics are transient (e.g. `script_timeout`, + // `daemon_busy`) AND the round attempt count is below + // `P2P_SCRIPT_RETRY_DEFAULT_ATTEMPTS`. The structured workflow + // diagnostic is preserved via `helperDiagnostic.workflowDiagnostic`. + const primaryDiag: P2pWorkflowDiagnostic | undefined = result.diagnostics[0]; + const primaryCode = primaryDiag?.code ?? 'script_machine_output_invalid'; + const retriable = result.diagnostics.length > 0 + && result.diagnostics.every((d) => (P2P_SCRIPT_RETRIABLE_DIAGNOSTIC_CODES as readonly string[]).includes(d.code)); + // R3 v2 PR-ζ (M2 / ζ-10) — retry budget uses an independent counter + // so jump-rebound (via routing/jumpRule) doesn't consume the + // script transient-failure retry budget. The counter is reset + // when a jump targets this round (see jump block below). + if (!run.scriptRetryCounts) run.scriptRetryCounts = {}; + const scriptAttemptsSoFar = run.scriptRetryCounts[round.id] ?? 0; + const attemptsRemain = scriptAttemptsSoFar < P2P_SCRIPT_RETRY_DEFAULT_ATTEMPTS - 1; + // pre-increment so the first failure shows as 1 attempt consumed + run.scriptRetryCounts[round.id] = scriptAttemptsSoFar + 1; + const attemptsSoFar = scriptAttemptsSoFar + 1; + for (const wd of result.diagnostics) { + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `script:${wd.code} ${wd.summary ?? ''}`.trim(), + workflowDiagnostic: wd, + }); + } + if (retriable && attemptsRemain) { + // Surface the retry decision but do NOT fail the run; the executor + // re-enters the same round (attempt count increments at the top). + logger.warn( + { runId: run.id, nodeId: round.id, attempt: attemptsSoFar, max: P2P_SCRIPT_RETRY_DEFAULT_ATTEMPTS, primaryCode }, + 'P2P: script transient failure, retrying', + ); + return { kind: 'retry' }; + } + failRun( + run, + 'failed', + `Script node ${round.id} failed (exit=${result.exitCode}, signal=${result.signal ?? 'none'}); primary=${primaryCode}; attempts=${attemptsSoFar}/${P2P_SCRIPT_RETRY_DEFAULT_ATTEMPTS}`, + serverLink, + ); + return { kind: 'fail_closed' }; + } + // R3 PR-β (Cx1-H2) — surface the structured routing key from the + // machine output frame so the executor can route on the authoritative + // value instead of parsing free-form discussion text. The frame is + // the spec's "machine output is authoritative" source. + // + // R3 v1b follow-up — also surface the structured `variables` patch so + // downstream logic nodes can evaluate against the latest run state. + const finalFrame = result.machineOutput?.ok + ? (result.machineOutput.finalFrame as { routingKey?: unknown; variables?: Record } | undefined) + : undefined; + const routingKey = typeof finalFrame?.routingKey === 'string' && finalFrame.routingKey.length > 0 + ? finalFrame.routingKey + : undefined; + const variables = finalFrame?.variables && typeof finalFrame.variables === 'object' && !Array.isArray(finalFrame.variables) + ? finalFrame.variables + : undefined; + return { + kind: 'ok', + authoritativeSegment: segment, + ...(routingKey ? { routingKey } : {}), + ...(variables ? { variables } : {}), + }; + } catch (error) { + failRun(run, 'failed', error instanceof Error ? error.message : String(error), serverLink); + return { kind: 'fail_closed' }; + } finally { + releaseScriptSlot(); + } +} + +/** + * Audit:R2-N2 / R3 §12.2 production wiring — for envelope_compiled runs that + * declare any `openspec_convention` artifact, lazily freeze the OpenSpec + * identity once per run (deterministic slug-N collision suffix; identity + * preserved across retry/re-entry in the in-memory map). Returns the frozen + * artifact root path the new helpers should baseline against. + * + * For runs WITHOUT openspec_convention (or for legacy non-envelope runs), + * returns null and the orchestrator falls back to the legacy + * `captureArtifactBaseline` map. + */ +/** + * R3 PR-α (W1) + PR-β (Cx1-H4) — Narrowed return type with explicit + * freeze-error signal. The caller no longer needs `!` to assert + * `run.boundWorkflow` because the helper returns the bound workflow + * alongside the resolved artifact root. + * + * PR-β change: when freeze attempt throws OR returns an identity with no + * `openspecChangePath`, we now surface `freezeError` (with the helper + * diagnostics from the freeze attempt when available). The orchestrator's + * envelope_compiled OpenSpec branch fails closed; oldAdvanced flows still + * fall back to the legacy baseline path so non-envelope runs are not + * regressed. The frozen identity is exposed so the post-round delta gate + * can use `identity.openspecArtifactPaths` (Cx1-H3) instead of the lossy + * adapter-projected `round.artifactOutputs`. + */ +interface RunArtifactRootResolution { + rootPath: string; + bound: P2pBoundWorkflow; + identity: P2pFrozenArtifactIdentity; + /** + * When set, freeze failed for this run's OpenSpec contract. envelope_compiled + * callers MUST `failRun` instead of silently falling back to legacy + * `readdir().join()` validation. + */ + freezeError?: { reason: string; diagnostics: P2pWorkflowDiagnostic[] }; +} +const runArtifactRootCache = new Map(); +async function getOrFreezeRunArtifactRoot(run: P2pRun): Promise { + const bound = run.boundWorkflow; + if (!bound) return null; + const cached = runArtifactRootCache.get(run.id); + if (cached) return cached; + // Pick the first OpenSpec convention artifact to drive identity freeze. + // The freeze operation is idempotent per `runId` so multiple OpenSpec + // nodes in the same run still freeze once. + let openSpecContract: { convention: 'openspec_convention'; paths: string[] } | null = null; + for (const node of bound.compiled.nodes) { + const found = node.artifacts?.find((artifact) => artifact.convention === 'openspec_convention'); + if (found) { openSpecContract = found as { convention: 'openspec_convention'; paths: string[] }; break; } + } + if (!openSpecContract) return null; + // Suggest a slug derived from the run id so collision is rare in practice + // but `freezeP2pArtifactIdentity` still owns the slug-N collision suffix. + const inferredSlug = `p2p-run-${run.id.slice(0, 8)}`; + try { + const identity: P2pFrozenArtifactIdentity = await freezeP2pArtifactIdentity({ + contract: openSpecContract, + runId: run.id, + repoRoot: bound.bindContext.repoRoot, + inferredSlug, + }); + if (!identity.openspecChangePath) { + const resolution: RunArtifactRootResolution = { + rootPath: '', + bound, + identity, + freezeError: { + reason: 'artifact_identity_freeze_failed', + diagnostics: identity.diagnostics ?? [], + }, + }; + runArtifactRootCache.set(run.id, resolution); + return resolution; + } + const resolution: RunArtifactRootResolution = { + rootPath: identity.openspecChangePath, + bound, + identity, + }; + runArtifactRootCache.set(run.id, resolution); + return resolution; + } catch (error) { + const reason = error instanceof Error ? error.message : String(error); + // Surface the freeze error via the resolution shape so envelope_compiled + // callers can fail closed. We deliberately cache the error so retries + // don't re-attempt mkdir storms; the run terminates after the first + // visit anyway. oldAdvanced callers continue to ignore the resolution + // entirely (they go through the legacy `captureArtifactBaseline` path). + const resolution: RunArtifactRootResolution = { + rootPath: '', + bound, + identity: { + convention: 'openspec_convention', + openspecArtifactPaths: [], + frozenAt: new Date().toISOString(), + collisionResolved: false, + diagnostics: [], + }, + freezeError: { reason, diagnostics: [] }, + }; + runArtifactRootCache.set(run.id, resolution); + return resolution; + } +} + + +/** Test-only: clear the per-run artifact-root cache between e2e tests. */ +export function __resetP2pRunArtifactRootCacheForTests(): void { + runArtifactRootCache.clear(); +} + async function executeAdvancedChain(run: P2pRun, serverLink: ServerLink | null): Promise { const rounds = run.resolvedRounds ?? []; let roundIndex = 0; @@ -1395,12 +2000,233 @@ async function executeAdvancedChain(run: P2pRun, serverLink: ServerLink | null): run.activePhase = round.dispatchStyle === 'initiator_only' ? 'initial' : 'hop'; pushState(run, serverLink); + // Audit:R3 / tasks 4.7b / 4.8b — in-tree dangerous-node recheck. + // Before executing any round whose semantics extend write authority + // (`permissionScope === 'implementation'`, OpenSpec artifact-write, + // script execution), re-check current daemon capabilities + policy + // against the bound snapshot. A capability/policy downgrade between + // bind and execute MUST fail the run closed — capability upgrade does + // NOT broaden the frozen requirement set (helper enforces). + if ( + run.advancedSourceKind === 'envelope_compiled' + && run.boundWorkflow + && isRoundDangerous(round) + ) { + const recheck = recheckDangerousRoundOrFail(run, round, serverLink); + if (recheck === 'fail_closed') return; + } + const artifactBaseline = await captureArtifactBaseline(run, round); + + // Audit:R2-N2 / R3 PR-α / PR-β — for envelope_compiled runs that + // declare OpenSpec artifacts, capture the new-style baseline + // (size + sha256 + caps) under the frozen artifact root. The narrowed + // `RunArtifactRootResolution` return removes the `!` non-null assertion + // (W1) so future refactors can't accidentally drop the bind context. + // + // PR-β (Cx1-H4): freeze failure on an envelope_compiled run with + // declared OpenSpec artifacts MUST fail the run closed. The legacy + // `readdir().join()` validator is too weak a fallback for the OpenSpec + // convention (spec "freeze failure SHALL fail the run"). + const artifactRootResolution = await getOrFreezeRunArtifactRoot(run); + if ( + artifactRootResolution?.freezeError + && run.advancedSourceKind === 'envelope_compiled' + && round.artifactConvention === 'openspec_convention' + ) { + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Artifact identity freeze failed: ${artifactRootResolution.freezeError.reason}`, + workflowDiagnostic: artifactRootResolution.freezeError.diagnostics[0], + }); + failRun( + run, + 'failed', + `Artifact identity freeze failed for OpenSpec run: ${artifactRootResolution.freezeError.reason}`, + serverLink, + ); + return; + } + let newArtifactBaseline: P2pArtifactBaseline | null = null; + if (artifactRootResolution && !artifactRootResolution.freezeError) { + try { + const captureResult = await captureP2pArtifactBaseline({ + rootPath: artifactRootResolution.rootPath, + phase: 'baseline', + repoRoot: artifactRootResolution.bound.bindContext.repoRoot, + }); + // R3 v2 PR-ζ (Cx1-A2 / ζ-9) — capture diagnostics with error + // severity OR `truncated === true` MUST fail the round closed. + // Pre v2 these were silently ignored, so artifact cap-exceeded / + // unsafe-root were demoted to "declared path missing" symptoms by + // the downstream delta verifier. + const errorDiag = captureResult.diagnostics.find((d) => d.severity === 'error'); + if (errorDiag || captureResult.baseline.truncated) { + if (errorDiag) { + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Pre-round artifact baseline capture failed: ${errorDiag.code} ${errorDiag.summary ?? ''}`.trim(), + workflowDiagnostic: errorDiag, + }); + } + if (captureResult.baseline.truncated) { + const truncDiag = makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'execute', { + nodeId: round.id, + summary: 'Artifact baseline truncated due to size cap.', + }); + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: 'Pre-round artifact baseline truncated (cap exceeded).', + workflowDiagnostic: truncDiag, + }); + } + failRun( + run, + 'failed', + `Pre-round artifact baseline capture failed: ${errorDiag?.code ?? 'artifact_baseline_too_large'}`, + serverLink, + ); + return; + } + newArtifactBaseline = captureResult.baseline; + } catch { + // Baseline capture can fail if the frozen root doesn't exist yet + // (no prior round wrote anything). Treat as empty baseline so the + // post-round delta sees fresh files. + newArtifactBaseline = null; + } + } + const reducerSummary = await reduceAdvancedContext(run, round, serverLink); if (run._cancelled || isTerminal(run.status)) return; + // Audit:R2-N1 — script-node dispatch. When the round corresponds to a + // compiled `nodeKind: 'script'` node, route through the daemon script + // runner instead of legacy dispatchHop. + const scriptDispatch = await dispatchScriptRoundOrFail(run, round, serverLink); + if (scriptDispatch.kind === 'fail_closed') return; + if (scriptDispatch.kind === 'retry') { + // R3 v1b follow-up — transient script failure. Re-enter the same + // round; `roundAttemptCounts[round.id]` will increment on the next + // iteration's prologue. The retry budget is enforced inside + // `dispatchScriptRoundOrFail` so we never loop indefinitely. + continue; + } + let authoritativeSegment = ''; - if (round.dispatchStyle === 'initiator_only') { + // R3 PR-β (Cx1-H2) — capture the structured routing key emitted by the + // script's machine output frame so the compiled-edge jump logic can + // route on it instead of parsing free-form discussion text. + let scriptRoutingKey: string | undefined; + // R3 v1b follow-up — capture the structured logic marker emitted by + // a logic node so `logic_marker_equals` edges route on its value. + let logicMarker: string | undefined; + if (scriptDispatch.kind === 'ok') { + authoritativeSegment = scriptDispatch.authoritativeSegment; + scriptRoutingKey = scriptDispatch.routingKey; + // R3 v2 PR-ζ (B1 / A5 / B5) — Apply the structured variables patch + // to the run state. The orchestrator is the SINGLE write path, so + // it does its own defence-in-depth even though + // `parseP2pScriptMachineOutput` already enforced the same shape: + // * key MUST match `P2P_WORKFLOW_VARIABLE_NAME_PATTERN` + // (lowercase identifier — structurally rejects `__proto__` etc) + // * value type ∈ string | number | boolean | string[] + // * arrays SHALL be ≤ 64 elements AND every element ≤ 8 KiB + // Drops surface as `P2P_HELPER_PRIMARY_FAILED` helper diagnostics + // so users can see why their variable patch was ignored. + if (scriptDispatch.variables && run.runVariables) { + for (const [name, value] of Object.entries(scriptDispatch.variables)) { + if (!P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test(name)) { + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Script variable name rejected (must match ${P2P_WORKFLOW_VARIABLE_NAME_PATTERN.source}): ${name.slice(0, 64)}`, + }); + continue; + } + let acceptable = false; + if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') { + acceptable = true; + } else if (Array.isArray(value)) { + if (value.length > P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENTS) { + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Script variable ${name} array length ${value.length} exceeds cap ${P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENTS}`, + }); + continue; + } + const tooBigIndex = value.findIndex((v) => typeof v !== 'string' || Buffer.byteLength(v, 'utf8') > P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENT_BYTES); + if (tooBigIndex >= 0) { + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Script variable ${name}[${tooBigIndex}] exceeds ${P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENT_BYTES}B element cap or non-string`, + }); + continue; + } + acceptable = true; + } + if (acceptable) run.runVariables[name] = value; + } + } + } else if (round.nodeKind === 'logic') { + // R3 v1b follow-up — logic node dispatch (envelope_compiled only). + // Evaluate the contract against current run.variables, append a + // small audit segment to the discussion file, set logicMarker for + // routing, and skip every other dispatch path (no agent send, no + // artifact verify — logic is pure). + const compiledNode = run.boundWorkflow?.compiled.nodes.find((node) => node.id === round.id); + const logic = compiledNode?.logic; + if (!logic) { + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Logic node ${round.id} has no compiled logic contract`, + }); + failRun(run, 'failed', `Logic node ${round.id} missing logic contract`, serverLink); + return; + } + const evalResult = evaluateP2pLogic(logic, (run.runVariables ?? {}) as Record); + logicMarker = evalResult.marker; + const sectionHeader = `Logic: ${round.title} (attempt ${run.currentRoundAttempt})`; + const segment = `\n\n## ${sectionHeader}\n\nemit: ${evalResult.marker}\nmatchedRuleIndex: ${evalResult.matchedRuleIndex}\n`; + authoritativeSegment = segment; + // R3 v1b (W2) + v2 PR-ζ (M1) — non-blocking + drop surfaces helper + // diagnostic. D-O3: in-memory authoritativeSegment is verdict + // source-of-truth. + enqueueP2pDiscussionWrite( + run.contextFilePath, + segment, + (error: unknown) => { + const message = error instanceof Error ? error.message : String(error); + addHelperDiagnostic(run, { + code: 'P2P_DISCUSSION_WRITE_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Failed to append logic segment to ${run.contextFilePath}: ${message}`, + }); + }, + (droppedBytes, queuedBytes) => { + addHelperDiagnostic(run, { + code: 'P2P_DISCUSSION_WRITE_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Discussion writer dropped ${droppedBytes}B due to backpressure (queued=${queuedBytes}B)`, + }); + }, + ); + } else if (round.dispatchStyle === 'initiator_only') { const sectionHeader = `${discussionParticipantName(run.initiatorSession)} — ${round.title} (attempt ${run.currentRoundAttempt})`; const baselineBuffer = await readFile(run.contextFilePath).catch(() => Buffer.from('')); const prompt = buildAdvancedHopPrompt( @@ -1462,8 +2288,124 @@ async function executeAdvancedChain(run: P2pRun, serverLink: ServerLink | null): }); if (run._cancelled || isTerminal(run.status)) return; - const verdict = round.requiresVerdict ? parseVerdictFromContent(authoritativeSegment) : null; - const effectiveVerdict = round.requiresVerdict + // Audit:R2-N2 / R3 PR-α (B2 / B5 / B7) / PR-β (Cx1-H3) — for + // envelope_compiled runs with OpenSpec artifacts, run the new-style + // baseline delta check as a SECOND authoritative gate (legacy + // `validateArtifactOutputsForRound` above remains as the first gate + // until PR-γ; either failing fails the round — "double gate"). + // Post-round capture uses `phase: 'validate'` so diagnostics + // distinguish pre/post phases. + // + // PR-β (Cx1-H3) — `declaredFiles` now comes from + // `identity.openspecArtifactPaths` (the frozen identity's coordinate + // system) instead of `round.artifactOutputs` (the lossy adapter + // projection). Mismatched coordinate systems previously caused false + // missing-file diagnostics for valid OpenSpec writes. + if (artifactRootResolution && !artifactRootResolution.freezeError && round.artifactConvention === 'openspec_convention') { + const identityPaths = artifactRootResolution.identity.openspecArtifactPaths; + // When the frozen identity declared no artifact paths AND the round + // also declared none, there is nothing to verify; skip silently. + if (identityPaths.length === 0 && round.artifactOutputs.length === 0) { + // no-op + } else { + try { + const afterCapture = await captureP2pArtifactBaseline({ + rootPath: artifactRootResolution.rootPath, + phase: 'validate', + repoRoot: artifactRootResolution.bound.bindContext.repoRoot, + }); + // R3 v2 PR-ζ (Cx1-A2 / ζ-9) — post-round capture diagnostics + // also fail-closed; truncated baseline post-round means the + // round wrote more than the cap allows. + const errorDiag = afterCapture.diagnostics.find((d) => d.severity === 'error'); + if (errorDiag || afterCapture.baseline.truncated) { + if (errorDiag) { + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Post-round artifact baseline capture failed: ${errorDiag.code} ${errorDiag.summary ?? ''}`.trim(), + workflowDiagnostic: errorDiag, + }); + } + if (afterCapture.baseline.truncated) { + const truncDiag = makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'execute', { + nodeId: round.id, + summary: 'Post-round artifact baseline truncated due to size cap.', + }); + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: 'Post-round artifact baseline truncated (cap exceeded).', + workflowDiagnostic: truncDiag, + }); + } + failRun( + run, + 'failed', + `Post-round artifact baseline capture failed: ${errorDiag?.code ?? 'artifact_baseline_too_large'}`, + serverLink, + ); + return; + } + const before: P2pArtifactBaseline = newArtifactBaseline ?? { + rootPath: artifactRootResolution.rootPath, + files: [], + capturedAt: new Date().toISOString(), + truncated: false, + }; + // Cx1-H3 — prefer frozen identity paths; fall back to the round's + // adapter-projected outputs only when the identity didn't surface + // declared paths (defensive). + const declaredSource = identityPaths.length > 0 ? identityPaths : round.artifactOutputs; + const declaredFiles = declaredSource.map((p) => ({ relativePath: p })); + const delta = verifyP2pArtifactBaselineDelta(before, afterCapture.baseline, declaredFiles); + if (!delta.ok) { + for (const diagnostic of delta.diagnostics) { + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + message: `Artifact contract not satisfied: ${diagnostic.code} ${diagnostic.fieldPath ?? ''} ${diagnostic.summary ?? ''}`.trim(), + sourceSession: run.initiatorSession, + workflowDiagnostic: diagnostic, + }); + } + const primary = delta.diagnostics[0]; + failRun( + run, + 'failed', + `Artifact contract not satisfied: ${primary?.code ?? 'artifact_contract_not_satisfied'} ${primary?.fieldPath ?? ''}`.trim(), + serverLink, + ); + return; + } + } catch (error) { + // Cap-exceeded / IO error during post-round capture: surface as a + // helper diagnostic so audit can see the gap. We do NOT fail the + // run here because the legacy `validateArtifactOutputsForRound` + // already ran and either passed or failed the round; failing + // again would double-fail. PR-γ collapses these two gates. + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: `Artifact post-round capture failed: ${error instanceof Error ? error.message : String(error)}`, + }); + } + } + } + + // R3 v1b follow-up — script and logic nodes do NOT require a verdict + // marker in the discussion text. Their authoritative routing input + // is the structured machine-output frame (script) or the evaluator + // result (logic). Suppressing the verdict requirement avoids spurious + // P2P_VERDICT_MISSING diagnostics for structured nodes. + const verdictRequiredForRound = round.requiresVerdict + && round.nodeKind !== 'script' + && round.nodeKind !== 'logic'; + const verdict = verdictRequiredForRound ? parseVerdictFromContent(authoritativeSegment) : null; + const effectiveVerdict = verdictRequiredForRound ? (verdict ?? (() => { addHelperDiagnostic(run, { code: 'P2P_VERDICT_MISSING', @@ -1475,25 +2417,90 @@ async function executeAdvancedChain(run: P2pRun, serverLink: ServerLink | null): })()) : null; - const jump = round.allowRouting && round.jumpRule - ? (() => { - const jumpCount = run.roundJumpCounts[round.id] ?? 0; - const belowMax = jumpCount < round.jumpRule!.maxTriggers; - if (!belowMax) return null; + // R3 PR-β (Cx1-H2 / A7 / A8) — for envelope_compiled runs, route on + // the COMPILED EDGE CONDITIONS rather than the legacy + // `verdictPolicy: forced_rework` projection. Conditional edges keep + // their full semantics: + // - `routing_key_equals` is matched against `scriptRoutingKey` + // (from the script's machine output frame — never read from text) + // - `verdict_marker_equals` is matched against `effectiveVerdict` + // - `logic_marker_equals` has no production evaluator yet; compile + // should already have rejected such workflows, but if one slips + // through we skip routing instead of misrouting silently. + // Per-edge loop budget is honoured via `bound.compiled.loopBudgets`, + // not the round-aggregated `roundJumpCounts`. + let jump: string | null = null; + let jumpTriggerLabel: string | null = effectiveVerdict; + let jumpEdgeId: string | null = null; + if (run.advancedSourceKind === 'envelope_compiled' && run.boundWorkflow) { + const compiled = run.boundWorkflow.compiled; + const outgoingConditional = compiled.edges.filter( + (edge) => edge.fromNodeId === round.id && edge.edgeKind === 'conditional', + ); + for (const edge of outgoingConditional) { + if (!edge.condition) continue; + const useCount = run.compiledEdgeUseCounts?.[edge.id] ?? 0; + const budget = compiled.loopBudgets[edge.id] ?? Infinity; + if (useCount >= budget) continue; + let matched = false; + let triggerValue: string | null = null; + if (edge.condition.kind === 'routing_key_equals' && typeof scriptRoutingKey === 'string') { + matched = scriptRoutingKey === edge.condition.equals; + triggerValue = scriptRoutingKey; + } else if (edge.condition.kind === 'verdict_marker_equals' && effectiveVerdict !== null) { + matched = effectiveVerdict === edge.condition.equals; + triggerValue = effectiveVerdict; + } else if (edge.condition.kind === 'logic_marker_equals' && typeof logicMarker === 'string') { + // R3 v1b follow-up — match the logic node's emitted marker + // against the conditional edge condition. Authority for logic + // routing is the evaluator output, never discussion text. + matched = logicMarker === edge.condition.equals; + triggerValue = logicMarker; + } else if (edge.condition.kind === 'logic_marker_equals') { + // No logic marker available (the source node was not a logic + // node, or evaluation produced no marker). Skip — compiler is + // expected to reject mismatched routing authority. + continue; + } + if (matched) { + jump = edge.toNodeId; + jumpEdgeId = edge.id; + jumpTriggerLabel = triggerValue; + break; + } + } + } else if (round.allowRouting && round.jumpRule) { + // oldAdvanced legacy routing — preserved unchanged. + const jumpCount = run.roundJumpCounts[round.id] ?? 0; + const belowMax = jumpCount < round.jumpRule.maxTriggers; + if (belowMax) { if (round.verdictPolicy === 'forced_rework') { - if (jumpCount < round.jumpRule.minTriggers) return round.jumpRule.targetRoundId; - return effectiveVerdict === (round.jumpRule.marker ?? 'REWORK') ? round.jumpRule.targetRoundId : null; + if (jumpCount < round.jumpRule.minTriggers) { + jump = round.jumpRule.targetRoundId; + } else if (effectiveVerdict === (round.jumpRule.marker ?? 'REWORK')) { + jump = round.jumpRule.targetRoundId; + } + } else if (effectiveVerdict === (round.jumpRule.marker ?? 'REWORK')) { + jump = round.jumpRule.targetRoundId; } - return effectiveVerdict === (round.jumpRule.marker ?? 'REWORK') ? round.jumpRule.targetRoundId : null; - })() - : null; + } + } if (jump) { run.roundJumpCounts[round.id] = (run.roundJumpCounts[round.id] ?? 0) + 1; + if (jumpEdgeId) { + if (!run.compiledEdgeUseCounts) run.compiledEdgeUseCounts = {}; + run.compiledEdgeUseCounts[jumpEdgeId] = (run.compiledEdgeUseCounts[jumpEdgeId] ?? 0) + 1; + } + // R3 v2 PR-ζ (M2 / ζ-10) — jump-rebound resets the script retry + // budget for the target round so a re-execution after rework + // starts fresh, not "halfway through" a previous transient-error + // budget that was consumed during the prior visit. + if (run.scriptRetryCounts) delete run.scriptRetryCounts[jump]; run.routingHistory.push({ fromRoundId: round.id, toRoundId: jump, - trigger: effectiveVerdict, + trigger: jumpTriggerLabel, atStep: run.currentExecutionStep, atAttempt: run.currentRoundAttempt, timestamp: Date.now(), @@ -1502,6 +2509,81 @@ async function executeAdvancedChain(run: P2pRun, serverLink: ServerLink | null): continue; } + // R3 v2 PR-η — for envelope_compiled runs, advance via the COMPILED + // GRAPH instead of the legacy `roundIndex++` array fallback. This + // closes the Cx1-A1 finding: if the current node has outgoing + // conditional edges but NONE matched the route AND no default edge + // exists, the previous code silently moved to the next round in + // declaration order — potentially executing an implementation / + // artifact_generation node WITHOUT route authorization. Now we + // either jump to the unique default edge or `failRun` with + // `unmatched_edge_route`. oldAdvanced runs keep the legacy + // `roundIndex++` behaviour. + if (run.advancedSourceKind === 'envelope_compiled' && run.boundWorkflow) { + const compiled = run.boundWorkflow.compiled; + const outgoing = compiled.edges.filter((edge) => edge.fromNodeId === round.id); + const hadConditional = outgoing.some((edge) => edge.edgeKind === 'conditional'); + const defaults = outgoing.filter((edge) => edge.edgeKind === 'default'); + if (defaults.length === 1) { + const next = defaults[0]; + if (!run.compiledEdgeUseCounts) run.compiledEdgeUseCounts = {}; + run.compiledEdgeUseCounts[next.id] = (run.compiledEdgeUseCounts[next.id] ?? 0) + 1; + if (run.scriptRetryCounts) delete run.scriptRetryCounts[next.toNodeId]; + run.routingHistory.push({ + fromRoundId: round.id, + toRoundId: next.toNodeId, + trigger: 'default', + atStep: run.currentExecutionStep, + atAttempt: run.currentRoundAttempt, + timestamp: Date.now(), + }); + roundIndex = rounds.findIndex((entry) => entry.id === next.toNodeId); + if (roundIndex < 0) { + // Compiled graph references a node not in legacy rounds — + // shouldn't happen, but fail closed instead of silent skip. + failRun(run, 'failed', `Compiled default edge target ${next.toNodeId} missing from resolved rounds`, serverLink); + return; + } + continue; + } + if (defaults.length > 1) { + const diag = makeP2pWorkflowDiagnostic('invalid_workflow_graph', 'execute', { + nodeId: round.id, + summary: `Compiled graph has ${defaults.length} default outgoing edges from node ${round.id}; expected at most 1.`, + }); + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: diag.summary ?? 'Multiple default outgoing edges', + workflowDiagnostic: diag, + }); + failRun(run, 'failed', `Compiled graph has multiple default edges from ${round.id}`, serverLink); + return; + } + // No default edge. + if (hadConditional) { + // Had conditional outgoing edges, none matched, no default — + // fail closed per spec "envelope_compiled SHALL fail closed + // when no conditional edge matches AND no default edge exists". + const diag = makeP2pWorkflowDiagnostic('unmatched_edge_route', 'execute', { + nodeId: round.id, + summary: `No outgoing conditional edge matched from ${round.id} and no default edge exists.`, + }); + addHelperDiagnostic(run, { + code: 'P2P_HELPER_PRIMARY_FAILED', + attempt: run.currentRoundAttempt, + sourceSession: run.initiatorSession, + message: diag.summary ?? 'unmatched_edge_route', + workflowDiagnostic: diag, + }); + failRun(run, 'failed', diag.summary ?? `unmatched_edge_route at ${round.id}`, serverLink); + return; + } + // No outgoing edges at all → terminal node, complete the run. + break; + } + roundIndex += 1; } @@ -1524,6 +2606,10 @@ async function executeAdvancedChain(run: P2pRun, serverLink: ServerLink | null): if (!summaryOk && (run._cancelled || isTerminal(run.status))) return; run.summaryPhase = summaryOk ? 'completed' : 'failed'; + // R3 v1b (W2) — flush the discussion write queue before reading so the + // result summary captures every queued segment instead of an + // intermediate snapshot. + await flushP2pDiscussionWriteQueue(run.contextFilePath); let fullContent = ''; try { fullContent = await readFile(run.contextFilePath, 'utf8'); @@ -1940,6 +3026,42 @@ export function buildHopPrompt(run: P2pRun, mode: P2pMode | undefined, opts: Hop // ── Helpers ─────────────────────────────────────────────────────────────── +/** + * R3 v2 PR-ζ (A6 / O4) — Single source of truth for run-terminal cleanup. + * Schedules: + * 1. Discussion writer queue drop (frees `onWriteFailure` closure that + * otherwise pins the run object). + * 2. Frozen artifact identity in-memory + on-disk clear. + * 3. `runArtifactRootCache` entry clear. + * Idempotent: safe to call from both `transition` and `failRun`. Wraps + * everything in a single 60 s `setTimeout` so a late web read can still + * see the discussion file / identity for a brief grace window — matching + * the existing `activeRuns.delete` cadence. + */ +const terminalCleanupScheduled = new Set(); +function scheduleP2pRunTerminalCleanup(run: P2pRun): void { + if (!P2P_TERMINAL_RUN_STATUSES.has(run.status)) return; + if (terminalCleanupScheduled.has(run.id)) return; + terminalCleanupScheduled.add(run.id); + setTimeout(() => { + try { + void dropP2pDiscussionWriteQueue(run.contextFilePath); + } catch { /* ignore */ } + try { + void clearPersistedFrozenP2pArtifactIdentity(run.id); + } catch { /* ignore */ } + try { + runArtifactRootCache.delete(run.id); + } catch { /* ignore */ } + terminalCleanupScheduled.delete(run.id); + }, 60_000); +} + +/** Test-only: clear the terminal-cleanup scheduling registry between runs. */ +export function __resetP2pRunTerminalCleanupForTests(): void { + terminalCleanupScheduled.clear(); +} + function transition(run: P2pRun, status: P2pRunStatus, serverLink: ServerLink | null): void { run.status = status; if (status === 'completed') { @@ -1957,6 +3079,7 @@ function transition(run: P2pRun, status: P2pRunStatus, serverLink: ServerLink | } else { scheduleRoundHopArtifactCleanup(run.hopStates); } + scheduleP2pRunTerminalCleanup(run); } run.updatedAt = new Date().toISOString(); logger.info({ runId: run.id, status }, 'P2P run state transition'); @@ -1980,11 +3103,22 @@ function failRun(run: P2pRun, errorType: string, message: string, serverLink: Se } else { scheduleRoundHopArtifactCleanup(run.hopStates); } + scheduleP2pRunTerminalCleanup(run); logger.warn({ runId: run.id, errorType, message }, 'P2P run failed'); pushState(run, serverLink); } -function pushState(run: P2pRun, serverLink: ServerLink | null): void { +// Audit:R3 hardening / task 10.5 — projection 200 ms debounce. Non-terminal +// updates within the window are coalesced (last-write-wins) so that a long +// streaming round doesn't fire dozens of `p2p.run_save` events per second. +// Terminal statuses (`completed` / `failed` / `timed_out` / `cancelled`) and +// blocking diagnostics (errors) ALWAYS flush immediately — both because the +// UI must reflect them without delay AND because a deferred terminal would +// race with `delete activeRuns.get(runId)` cleanup. +const PROJECTION_DEBOUNCE_MS = 200; +const pendingProjectionTimers = new Map>(); + +function flushProjection(run: P2pRun, serverLink: ServerLink | null): void { if (!serverLink) return; const s = run.status as string; const type = s === 'completed' ? 'p2p.run_complete' @@ -1995,6 +3129,38 @@ function pushState(run: P2pRun, serverLink: ServerLink | null): void { } catch { /* not connected */ } } +function pushState(run: P2pRun, serverLink: ServerLink | null): void { + if (!serverLink) return; + const existingTimer = pendingProjectionTimers.get(run.id); + if (existingTimer !== undefined) { + clearTimeout(existingTimer); + pendingProjectionTimers.delete(run.id); + } + // Terminal / blocking → flush immediately. Helper status check is + // intentionally over-broad (any non-running/queued/dispatched) so a future + // status added to `P2P_TERMINAL_RUN_STATUSES` automatically flushes. + const isTerminalStatus = isTerminal(run.status); + const isBlockingDiagnostic = (run.helperDiagnostics ?? []).some((d) => (d as { severity?: string }).severity === 'error'); + if (isTerminalStatus || isBlockingDiagnostic) { + flushProjection(run, serverLink); + return; + } + // Non-terminal: schedule a coalesced flush. + const timer = setTimeout(() => { + pendingProjectionTimers.delete(run.id); + flushProjection(run, serverLink); + }, PROJECTION_DEBOUNCE_MS); + pendingProjectionTimers.set(run.id, timer); +} + +/** Test-only: drain any pending throttled projections. */ +export function __flushPendingP2pProjectionsForTests(): void { + for (const [runId, timer] of pendingProjectionTimers) { + clearTimeout(timer); + pendingProjectionTimers.delete(runId); + } +} + function isTerminal(status: P2pRunStatus): boolean { return P2P_TERMINAL_RUN_STATUSES.has(status); } diff --git a/src/daemon/p2p-workflow-artifact-runtime.ts b/src/daemon/p2p-workflow-artifact-runtime.ts new file mode 100644 index 000000000..df7d6b7bc --- /dev/null +++ b/src/daemon/p2p-workflow-artifact-runtime.ts @@ -0,0 +1,1005 @@ +import { createHash } from 'node:crypto'; +import { lstat, mkdir, readdir, readFile, realpath, rename, rm, unlink, writeFile } from 'node:fs/promises'; +import { homedir, tmpdir } from 'node:os'; +import path from 'node:path'; + +import type { P2pArtifactConvention } from '../../shared/p2p-workflow-constants.js'; +import { + P2P_WORKFLOW_ARTIFACT_MAX_DEPTH, + P2P_WORKFLOW_ARTIFACT_MAX_FILES, + P2P_WORKFLOW_ARTIFACT_MAX_FILE_BYTES, + P2P_WORKFLOW_ARTIFACT_MAX_TOTAL_BYTES, +} from '../../shared/p2p-workflow-constants.js'; +import { makeP2pWorkflowDiagnostic, type P2pWorkflowDiagnostic } from '../../shared/p2p-workflow-diagnostics.js'; +import { validateP2pArtifactRelativePath } from '../../shared/p2p-workflow-artifacts.js'; +import type { P2pArtifactContract } from '../../shared/p2p-workflow-types.js'; + +export type P2pArtifactRuntimePhase = 'freeze' | 'create' | 'validate' | 'baseline'; + +export interface P2pArtifactRuntimePathOptions { + repoRoot: string; + relativePath: string; + phase?: P2pArtifactRuntimePhase; + symlinkPolicy?: 'reject_all' | 'allow_existing_under_root'; + artifactRoot?: string; +} + +export type P2pArtifactRuntimePathResult = + | { + ok: true; + absolutePath: string; + repoRootRealPath: string; + nearestExistingAncestor: string; + nearestExistingAncestorRealPath: string; + diagnostics: P2pWorkflowDiagnostic[]; + } + | { ok: false; diagnostics: P2pWorkflowDiagnostic[] }; + +export async function validateP2pArtifactRuntimePath( + options: P2pArtifactRuntimePathOptions, +): Promise { + const lexical = validateP2pArtifactRelativePath(options.relativePath, 'artifact.path'); + if (!lexical.ok) return lexical; + + const phase = options.phase ?? 'create'; + const symlinkPolicy = options.symlinkPolicy ?? 'reject_all'; + const repoRootRealPath = await realpath(options.repoRoot).catch(() => null); + if (!repoRootRealPath) { + return invalidArtifactPath('repoRoot'); + } + + let artifactRootRealPath: string | null = null; + if (options.artifactRoot) { + artifactRootRealPath = await realpath(options.artifactRoot).catch(() => null); + if (!artifactRootRealPath || !isPathInside(repoRootRealPath, artifactRootRealPath)) { + return invalidArtifactPath('artifactRoot', 'Artifact root escapes repo root.'); + } + } + + const segments = lexical.path.split('/'); + let current = options.repoRoot; + let nearestExistingAncestor = options.repoRoot; + let nearestExistingAncestorRealPath = repoRootRealPath; + + for (const [index, segment] of segments.entries()) { + current = path.join(current, segment); + const stat = await lstat(current).catch((error: NodeJS.ErrnoException) => { + if (error.code === 'ENOENT') return null; + throw error; + }); + if (!stat) break; + + if (stat.isSymbolicLink()) { + if (phase === 'freeze' || phase === 'create' || symlinkPolicy !== 'allow_existing_under_root') { + return invalidArtifactPath(segments.slice(0, index + 1).join('/'), 'Symlink component is not allowed for this artifact phase.'); + } + } + + const currentRealPath = await realpath(current).catch(() => null); + if (!currentRealPath || !isPathInside(repoRootRealPath, currentRealPath)) { + return invalidArtifactPath(segments.slice(0, index + 1).join('/'), 'Artifact realpath escapes repo root.'); + } + if (artifactRootRealPath && !isPathInside(artifactRootRealPath, currentRealPath) && !isPathInside(currentRealPath, artifactRootRealPath)) { + return invalidArtifactPath(segments.slice(0, index + 1).join('/'), 'Artifact realpath escapes declared artifact root.'); + } + nearestExistingAncestor = current; + nearestExistingAncestorRealPath = currentRealPath; + } + + if (!isPathInside(repoRootRealPath, nearestExistingAncestorRealPath)) { + return invalidArtifactPath(options.relativePath, 'Nearest existing ancestor escapes repo root.'); + } + + const absolutePath = path.join(options.repoRoot, lexical.path); + const finalRealPath = await realpath(absolutePath).catch(() => null); + if (finalRealPath && !isPathInside(repoRootRealPath, finalRealPath)) { + return invalidArtifactPath(options.relativePath, 'Final artifact realpath escapes repo root.'); + } + if (finalRealPath && artifactRootRealPath && !isPathInside(artifactRootRealPath, finalRealPath)) { + return invalidArtifactPath(options.relativePath, 'Final artifact realpath escapes declared artifact root.'); + } + + return { + ok: true, + absolutePath, + repoRootRealPath, + nearestExistingAncestor, + nearestExistingAncestorRealPath, + diagnostics: [], + }; +} + +// ────────────────────────────────────────────────────────────────────────── +// Identity freeze (tasks 6.3 / 6.4) +// ────────────────────────────────────────────────────────────────────────── + +export interface P2pArtifactFreezeArgs { + contract: P2pArtifactContract; + repoRoot: string; + runId: string; + inferredSlug?: string; + /** + * Optional absolute or repo-relative path the contract author declared as + * the OpenSpec change root. When omitted the runtime falls back to the + * sanitized `inferredSlug`. + */ + openspecChangePath?: string; +} + +export interface P2pFrozenArtifactIdentity { + convention: P2pArtifactConvention; + openspecChangeSlug?: string; + openspecChangePath?: string; + openspecArtifactPaths: string[]; + frozenAt: string; + collisionResolved: boolean; + diagnostics: P2pWorkflowDiagnostic[]; +} + +const COLLISION_SUFFIX_CAP = 100; +const SLUG_PATTERN = /^[a-z0-9-]+$/; + +const frozenIdentities = new Map(); + +export function getFrozenP2pArtifactIdentity(runId: string): P2pFrozenArtifactIdentity | undefined { + return frozenIdentities.get(runId); +} + +export function __resetP2pArtifactIdentitiesForTests(): void { + frozenIdentities.clear(); +} + +/** + * R3 v1b follow-up — Artifact identity persistence. + * + * The previous implementation kept `frozenIdentities` in a module-level + * Map only. A daemon restart mid-run wiped that map, so the next freeze + * attempt produced a fresh slug-N suffix and broke the spec invariant + * "identity preserved across retry/re-entry". We now write each frozen + * identity to `~/.imcodes/runs//identity.json` (atomic + * `.tmp` → rename) and rehydrate the map on daemon startup via + * {@link loadPersistedFrozenP2pArtifactIdentities}. + * + * The on-disk format is intentionally a thin wrapper: + * `{ schemaVersion: 1, identity: P2pFrozenArtifactIdentity }` + * so future fields can be added without breaking older daemons. + */ +const PERSISTED_IDENTITY_SCHEMA_VERSION = 1 as const; +export const P2P_RUN_STATE_DIR_ENV = 'IMCODES_P2P_RUN_STATE_DIR'; + +/** + * R3 v2 PR-ζ (B4) — Resolve the run-state dir, with path containment. + * + * Returns `~/.imcodes/runs` by default. When `IMCODES_P2P_RUN_STATE_DIR` + * env override is set, it MUST resolve under the user's home directory + * OR the OS temp directory; any other prefix is silently rejected (with + * a `logger.warn`-equivalent stderr write — this module is import-time + * sensitive, so we keep it dependency-free) and the override is ignored. + */ +function resolveRunStateDir(): string { + const defaultDir = path.join(homedir(), '.imcodes', 'runs'); + const override = process.env[P2P_RUN_STATE_DIR_ENV]; + if (!override || override.trim().length === 0) return defaultDir; + const candidate = path.resolve(override.trim()); + const safeRoots = [path.resolve(homedir()), path.resolve(tmpdir())]; + const within = safeRoots.some((root) => candidate === root || candidate.startsWith(root + path.sep)); + if (!within) { + // Use process.stderr to avoid pulling logger into this module (artifact + // runtime is import-time small; a console call is acceptable here). + try { + process.stderr.write(`P2P: ${P2P_RUN_STATE_DIR_ENV}=${override} rejected (must be under HOME or TMP); falling back to ${defaultDir}\n`); + } catch { /* ignore */ } + return defaultDir; + } + return candidate; +} + +function persistedIdentityPath(runId: string): string { + return path.join(resolveRunStateDir(), runId, 'identity.json'); +} + +async function persistFrozenIdentity(runId: string, identity: P2pFrozenArtifactIdentity): Promise { + const filePath = persistedIdentityPath(runId); + const dir = path.dirname(filePath); + try { + await mkdir(dir, { recursive: true }); + // R3 v2 PR-ζ (B2) — tmp filename includes pid + monotonic timestamp + + // random suffix so two concurrent `recordFrozenIdentity` calls for + // the SAME `runId` never write to the same tmp path. Without this + // the writeFile sequences could interleave, producing a corrupted + // JSON that survives `rename(tmp, filePath)` and pollutes future + // rehydrate. Random suffix protects against same-millisecond clashes. + const tmp = `${filePath}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 10)}.tmp`; + const payload = JSON.stringify({ schemaVersion: PERSISTED_IDENTITY_SCHEMA_VERSION, identity }, null, 2); + await writeFile(tmp, payload, 'utf8'); + await rename(tmp, filePath); + } catch { + // Persistence is best-effort; daemon retry will simply re-attempt + // freeze. The in-memory identity is still set for the current + // process. Surface via debug-level logging in the caller if needed. + } +} + +/** + * Set + persist in one call. Persistence runs fire-and-forget (no await + * blocking the caller) but the in-memory map is updated synchronously so + * the very next call to `getFrozenP2pArtifactIdentity` sees the new + * value. Used everywhere `frozenIdentities.set` was previously called. + */ +function recordFrozenIdentity(runId: string, identity: P2pFrozenArtifactIdentity): void { + frozenIdentities.set(runId, identity); + // Fire and forget — persistence is best-effort and doesn't gate the + // current process's freeze decision. + void persistFrozenIdentity(runId, identity); +} + +/** + * R3 v2 PR-ζ (A2 / O4) — Clear in-memory + on-disk identity for `runId`. + * Called by the orchestrator's terminal cleanup hook (60s after run + * transition), so completed/failed/cancelled runs no longer leak + * `~/.imcodes/runs//` directories on disk OR `frozenIdentities` + * entries in memory. + * + * Best-effort: any IO failure is swallowed — the next daemon startup's + * rehydrate will re-validate / TTL-evict whatever survived. + */ +export async function clearPersistedFrozenP2pArtifactIdentity(runId: string): Promise { + frozenIdentities.delete(runId); + if (!/^[A-Za-z0-9_-]+$/.test(runId)) return; // refuse path-traversal-shaped ids + const dir = path.join(resolveRunStateDir(), runId); + try { + await rm(dir, { recursive: true, force: true }); + } catch { + // ignore + } +} + +/** + * Scan `~/.imcodes/runs/*` and rehydrate the in-memory `frozenIdentities` + * map. Should be invoked once at daemon startup BEFORE any P2P launch is + * accepted so an in-flight run picked up after restart finds its existing + * frozen identity instead of producing a fresh slug suffix. Best-effort: + * malformed entries are skipped silently. Returns the number of + * identities loaded. + */ +/** + * R3 v2 PR-ζ (A2 / A3 / A4 / B3 / O5) — Hardened rehydrate. + * + * Five new defenses on top of v1b's basic shape check: + * 1. **Symlink rejection**: top-level `/` entries that resolve via + * symlink are skipped (defends against attacker-placed link to + * `/etc/...` etc). + * 2. **Path re-validation**: every `openspecArtifactPaths` entry runs + * through `validateP2pArtifactRelativePath` against `repoRoot` (when + * provided). Entries failing validation are dropped. + * 3. **Count cap**: caps total loaded identities at 500. Excess entries + * are skipped with a single warning so a runaway daemon-state dir + * doesn't choke startup. + * 4. **TTL eviction**: entries with `mtime` older than 7d are unlinked + * synchronously (best-effort) so daemon-state dir self-prunes. + * 5. **`.tmp` orphan cleanup**: any `*.tmp` siblings of `identity.json` + * get unlinked at startup so failed atomic writes don't leak. + * + * `args.repoRoot` (optional, DEC-O5) — when supplied, identities whose + * `openspecChangePath` is NOT inside `repoRoot` are dropped with a + * `legacy_identity_repo_root_mismatch` log line. Allows daemon to safely + * pick up sessions across project switches. + */ +export interface LoadPersistedIdentitiesArgs { + repoRoot?: string; +} +const PERSISTED_IDENTITY_MAX_COUNT = 500 as const; +const PERSISTED_IDENTITY_MAX_AGE_MS = 7 * 24 * 60 * 60 * 1000; + +export async function loadPersistedFrozenP2pArtifactIdentities(args: LoadPersistedIdentitiesArgs = {}): Promise { + const dir = resolveRunStateDir(); + let entries: string[]; + try { + entries = await readdir(dir); + } catch { + return 0; + } + let loaded = 0; + let countCapped = false; + for (const entry of entries) { + if (loaded >= PERSISTED_IDENTITY_MAX_COUNT) { + countCapped = true; + break; + } + if (!/^[A-Za-z0-9_-]+$/.test(entry)) continue; + const entryDir = path.join(dir, entry); + // Defense 1 — reject symlinked top-level entries. + let entryStat; + try { entryStat = await lstat(entryDir); } catch { continue; } + if (entryStat.isSymbolicLink()) { + try { process.stderr.write(`P2P: skipping symlink run-state entry ${entryDir}\n`); } catch { /* ignore */ } + continue; + } + if (!entryStat.isDirectory()) continue; + // Defense 5 — sweep .tmp siblings. + try { + const siblings = await readdir(entryDir); + for (const sibling of siblings) { + if (sibling.endsWith('.tmp')) { + await unlink(path.join(entryDir, sibling)).catch(() => {}); + } + } + } catch { /* ignore */ } + const filePath = path.join(entryDir, 'identity.json'); + let fileStat; + try { fileStat = await lstat(filePath); } catch { continue; } + // Defense 4 — TTL eviction. + if (Date.now() - fileStat.mtimeMs > PERSISTED_IDENTITY_MAX_AGE_MS) { + await rm(entryDir, { recursive: true, force: true }).catch(() => {}); + continue; + } + try { + const raw = await readFile(filePath, 'utf8'); + const parsed = JSON.parse(raw) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) continue; + const obj = parsed as { schemaVersion?: unknown; identity?: P2pFrozenArtifactIdentity }; + if (obj.schemaVersion !== PERSISTED_IDENTITY_SCHEMA_VERSION) continue; + if (!obj.identity || typeof obj.identity !== 'object') continue; + if (!Array.isArray(obj.identity.openspecArtifactPaths)) continue; + // Defense 2 — re-validate every declared path. + let allPathsValid = true; + if (args.repoRoot) { + for (const declared of obj.identity.openspecArtifactPaths) { + if (typeof declared !== 'string') { allPathsValid = false; break; } + const lex = validateP2pArtifactRelativePath(declared, 'identity.openspecArtifactPaths'); + if (!lex.ok) { allPathsValid = false; break; } + } + } + if (!allPathsValid) { + try { process.stderr.write(`P2P: dropping persisted identity ${entry} — invalid declared path\n`); } catch { /* ignore */ } + continue; + } + // Defense O5 — repoRoot containment for openspecChangePath. + if (args.repoRoot && obj.identity.openspecChangePath) { + const lex = validateP2pArtifactRelativePath(obj.identity.openspecChangePath, 'identity.openspecChangePath'); + if (!lex.ok) { + try { process.stderr.write(`P2P: legacy_identity_repo_root_mismatch ${entry} — openspecChangePath rejected\n`); } catch { /* ignore */ } + continue; + } + } + frozenIdentities.set(entry, obj.identity); + loaded += 1; + } catch { + // Skip malformed entry; daemon will re-freeze on next launch. + } + } + if (countCapped) { + try { process.stderr.write(`P2P: loadPersistedFrozenP2pArtifactIdentities count cap reached (${PERSISTED_IDENTITY_MAX_COUNT}); remaining entries skipped\n`); } catch { /* ignore */ } + } + return loaded; +} + +export async function freezeP2pArtifactIdentity(args: P2pArtifactFreezeArgs): Promise { + const existing = frozenIdentities.get(args.runId); + if (existing) return existing; + + const diagnostics: P2pWorkflowDiagnostic[] = []; + const { contract, repoRoot, runId } = args; + + if (contract.convention === 'explicit_paths') { + const validatedPaths: string[] = []; + for (const [index, declaredPath] of contract.paths.entries()) { + const result = await validateP2pArtifactRuntimePath({ + repoRoot, + relativePath: declaredPath, + phase: 'freeze', + symlinkPolicy: contract.symlinkPolicy, + }); + if (!result.ok) { + const identity: P2pFrozenArtifactIdentity = { + convention: contract.convention, + openspecArtifactPaths: [], + frozenAt: new Date().toISOString(), + collisionResolved: false, + diagnostics: result.diagnostics.map((diagnostic) => ({ + ...diagnostic, + fieldPath: diagnostic.fieldPath ?? `artifact.paths[${index}]`, + })), + }; + recordFrozenIdentity(runId, identity); + return identity; + } + validatedPaths.push(declaredPath); + } + const identity: P2pFrozenArtifactIdentity = { + convention: contract.convention, + openspecArtifactPaths: validatedPaths, + frozenAt: new Date().toISOString(), + collisionResolved: false, + diagnostics, + }; + recordFrozenIdentity(runId, identity); + return identity; + } + + if (contract.convention === 'openspec_convention') { + const baseSlug = deriveOpenspecSlug(args); + if (!baseSlug) { + const identity: P2pFrozenArtifactIdentity = { + convention: contract.convention, + openspecArtifactPaths: [], + frozenAt: new Date().toISOString(), + collisionResolved: false, + diagnostics: [makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { + fieldPath: 'artifact.openspecChangePath', + summary: 'OpenSpec convention requires a derivable change slug.', + })], + }; + recordFrozenIdentity(runId, identity); + return identity; + } + + const freezeResult = await freezeOpenspecChangeDirectory({ + repoRoot, + baseSlug, + symlinkPolicy: contract.symlinkPolicy, + }); + if (!freezeResult.ok) { + const identity: P2pFrozenArtifactIdentity = { + convention: contract.convention, + openspecArtifactPaths: [], + frozenAt: new Date().toISOString(), + collisionResolved: false, + diagnostics: freezeResult.diagnostics, + }; + recordFrozenIdentity(runId, identity); + return identity; + } + + if (freezeResult.collisionResolved) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_identity_collision_resolved', 'bind', { + fieldPath: 'artifact.openspecChangeSlug', + summary: `Slug "${baseSlug}" collided; resolved as "${freezeResult.slug}".`, + })); + } + + const declaredArtifacts = contract.paths.length > 0 ? contract.paths : []; + const artifactRoot = `openspec/changes/${freezeResult.slug}`; + const openspecArtifactPaths = declaredArtifacts.length > 0 + ? declaredArtifacts.map((rel) => joinUnderArtifactRoot(artifactRoot, rel)) + : [artifactRoot]; + + const identity: P2pFrozenArtifactIdentity = { + convention: contract.convention, + openspecChangeSlug: freezeResult.slug, + openspecChangePath: artifactRoot, + openspecArtifactPaths, + frozenAt: new Date().toISOString(), + collisionResolved: freezeResult.collisionResolved, + diagnostics, + }; + recordFrozenIdentity(runId, identity); + return identity; + } + + // convention: 'none' — nothing to freeze; reuse the input contract paths + const identity: P2pFrozenArtifactIdentity = { + convention: contract.convention, + openspecArtifactPaths: [...contract.paths], + frozenAt: new Date().toISOString(), + collisionResolved: false, + diagnostics, + }; + recordFrozenIdentity(runId, identity); + return identity; +} + +function deriveOpenspecSlug(args: P2pArtifactFreezeArgs): string | null { + const explicitPath = args.openspecChangePath ?? args.contract.paths.find((value) => value.startsWith('openspec/changes/')); + if (explicitPath) { + const segments = explicitPath.split('/').filter(Boolean); + const idx = segments.findIndex((segment) => segment === 'changes'); + if (idx >= 0 && segments[idx + 1]) { + const candidate = sanitizeSlug(segments[idx + 1]); + if (candidate) return candidate; + } + } + if (args.inferredSlug) { + const candidate = sanitizeSlug(args.inferredSlug); + if (candidate) return candidate; + } + return null; +} + +function sanitizeSlug(input: string): string { + return input + .toLowerCase() + .replace(/[^a-z0-9-]+/g, '-') + .replace(/^-+|-+$/g, '') + .replace(/-{2,}/g, '-') + .slice(0, 64); +} + +interface OpenspecFreezeResult { + ok: true; + slug: string; + absolutePath: string; + collisionResolved: boolean; +} + +interface OpenspecFreezeFailure { + ok: false; + diagnostics: P2pWorkflowDiagnostic[]; +} + +async function freezeOpenspecChangeDirectory(args: { + repoRoot: string; + baseSlug: string; + symlinkPolicy?: 'reject_all' | 'allow_existing_under_root'; +}): Promise { + if (!SLUG_PATTERN.test(args.baseSlug)) { + return { + ok: false, + diagnostics: [makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { + fieldPath: 'artifact.openspecChangeSlug', + summary: `Slug "${args.baseSlug}" did not normalize to [a-z0-9-]+.`, + })], + }; + } + + // Make sure openspec/changes/ exists (recursive) before atomic mkdir. + const changesParent = path.join(args.repoRoot, 'openspec', 'changes'); + await mkdir(changesParent, { recursive: true }); + + for (let attempt = 0; attempt < COLLISION_SUFFIX_CAP; attempt += 1) { + const candidate = attempt === 0 ? args.baseSlug : `${args.baseSlug}-${attempt + 1}`; + const relativePath = `openspec/changes/${candidate}`; + const lexical = validateP2pArtifactRelativePath(relativePath, 'artifact.openspecChangePath'); + if (!lexical.ok) return { ok: false, diagnostics: lexical.diagnostics }; + + const validation = await validateP2pArtifactRuntimePath({ + repoRoot: args.repoRoot, + relativePath, + phase: 'freeze', + symlinkPolicy: args.symlinkPolicy, + }); + if (!validation.ok) return { ok: false, diagnostics: validation.diagnostics }; + + const absolutePath = path.join(args.repoRoot, relativePath); + try { + await mkdir(absolutePath, { recursive: false }); + return { + ok: true, + slug: candidate, + absolutePath, + collisionResolved: attempt > 0, + }; + } catch (error) { + const code = (error as NodeJS.ErrnoException).code; + if (code === 'EEXIST') continue; + return { + ok: false, + diagnostics: [makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { + fieldPath: 'artifact.openspecChangePath', + summary: `mkdir failed: ${code ?? 'unknown'}.`, + })], + }; + } + } + + return { + ok: false, + diagnostics: [makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { + fieldPath: 'artifact.openspecChangeSlug', + summary: `Could not allocate a non-colliding slug after ${COLLISION_SUFFIX_CAP} attempts.`, + })], + }; +} + +function joinUnderArtifactRoot(root: string, relative: string): string { + if (relative.startsWith(`${root}/`) || relative === root) return relative; + return `${root}/${relative}`.replace(/\/+/g, '/'); +} + +// ────────────────────────────────────────────────────────────────────────── +// New-file sandbox (task 6.5 / 6.6) +// ────────────────────────────────────────────────────────────────────────── + +export interface P2pCreateArtifactPathOptions { + repoRoot: string; + relativePath: string; + phase?: P2pArtifactRuntimePhase; + symlinkPolicy?: 'reject_all' | 'allow_existing_under_root'; + artifactRoot?: string; +} + +export type P2pCreateArtifactPathResult = + | { ok: true; absolutePath: string; finalRealPath: string; diagnostics: P2pWorkflowDiagnostic[] } + | { ok: false; diagnostics: P2pWorkflowDiagnostic[] }; + +/** + * Create a placeholder file or directory under the artifact sandbox. The + * relative path may end with a trailing `/` to indicate a directory create. + * + * The full sandbox algorithm: + * 1. lexical-validate the relative path + * 2. find nearest existing ancestor + lstat each segment (via + * `validateP2pArtifactRuntimePath` with the phase-specific symlink policy) + * 3. `mkdir(parent, { recursive: true })` then `writeFile('')` (file) + * or `mkdir(path)` (directory) + * 4. post-create realpath verify final path under repoRoot AND artifactRoot + */ +export async function createP2pArtifactPath( + options: P2pCreateArtifactPathOptions, +): Promise { + const phase = options.phase ?? 'create'; + const isDirectory = options.relativePath.endsWith('/'); + const trimmedRelativePath = isDirectory + ? options.relativePath.replace(/\/+$/, '') + : options.relativePath; + + if (trimmedRelativePath !== options.relativePath && trimmedRelativePath === '') { + return { ok: false, diagnostics: invalidArtifactPath('artifact.path', 'Empty path after trimming trailing slash.').diagnostics }; + } + + const validation = await validateP2pArtifactRuntimePath({ + repoRoot: options.repoRoot, + relativePath: trimmedRelativePath, + phase, + symlinkPolicy: options.symlinkPolicy, + artifactRoot: options.artifactRoot, + }); + if (!validation.ok) return { ok: false, diagnostics: validation.diagnostics }; + + const absolutePath = validation.absolutePath; + + try { + if (isDirectory) { + await mkdir(absolutePath, { recursive: true }); + } else { + await mkdir(path.dirname(absolutePath), { recursive: true }); + await writeFile(absolutePath, '', { flag: 'wx' }).catch(async (error: NodeJS.ErrnoException) => { + if (error.code === 'EEXIST') return; // honor preexisting placeholder + throw error; + }); + } + } catch (error) { + const code = (error as NodeJS.ErrnoException).code ?? 'unknown'; + return { ok: false, diagnostics: invalidArtifactPath(trimmedRelativePath, `Artifact create failed: ${code}.`).diagnostics }; + } + + const finalRealPath = await realpath(absolutePath).catch(() => null); + if (!finalRealPath || !isPathInside(validation.repoRootRealPath, finalRealPath)) { + return { ok: false, diagnostics: invalidArtifactPath(trimmedRelativePath, 'Created artifact realpath escapes repo root.').diagnostics }; + } + if (options.artifactRoot) { + const artifactRootRealPath = await realpath(options.artifactRoot).catch(() => null); + if (!artifactRootRealPath || !isPathInside(artifactRootRealPath, finalRealPath)) { + return { ok: false, diagnostics: invalidArtifactPath(trimmedRelativePath, 'Created artifact realpath escapes declared artifact root.').diagnostics }; + } + } + + return { ok: true, absolutePath, finalRealPath, diagnostics: [] }; +} + +// ────────────────────────────────────────────────────────────────────────── +// Per-file baselines + caps (tasks 6.7 / 6.8) +// ────────────────────────────────────────────────────────────────────────── + +export interface P2pArtifactBaselineFile { + relativePath: string; + size: number; + sha256: string; + type: 'file' | 'directory'; +} + +export interface P2pArtifactBaseline { + rootPath: string; + files: P2pArtifactBaselineFile[]; + capturedAt: string; + truncated: boolean; +} + +export interface P2pArtifactBaselineCaptureArgs { + rootPath: string; + repoRoot: string; + phase: 'baseline' | 'validate'; + symlinkPolicy?: 'reject_all' | 'allow_existing_under_root'; +} + +export interface P2pArtifactBaselineCaptureResult { + baseline: P2pArtifactBaseline; + diagnostics: P2pWorkflowDiagnostic[]; +} + +/** + * Capture a per-file baseline rooted at `rootPath` (repo-relative). Caps are + * enforced via `P2P_WORKFLOW_ARTIFACT_MAX_*`. When a cap is exceeded the walker + * stops, sets `truncated: true`, and emits an `artifact_baseline_too_large` + * diagnostic. Per-file overflow (>8 MiB) is skipped with a per-file diagnostic + * but the walk continues. + */ +export async function captureP2pArtifactBaseline( + args: P2pArtifactBaselineCaptureArgs, +): Promise { + const diagnostics: P2pWorkflowDiagnostic[] = []; + const baseline: P2pArtifactBaseline = { + rootPath: args.rootPath, + files: [], + capturedAt: new Date().toISOString(), + truncated: false, + }; + + const validation = await validateP2pArtifactRuntimePath({ + repoRoot: args.repoRoot, + relativePath: args.rootPath, + phase: args.phase, + symlinkPolicy: args.symlinkPolicy, + }); + if (!validation.ok) { + return { baseline, diagnostics: validation.diagnostics }; + } + + const rootAbsolute = validation.absolutePath; + const rootStat = await lstat(rootAbsolute).catch(() => null); + if (!rootStat) { + // Empty baseline is allowed — used for "no files yet" pre-state. + return { baseline, diagnostics }; + } + + const queue: Array<{ absolute: string; relative: string }> = []; + if (rootStat.isDirectory()) { + queue.push({ absolute: rootAbsolute, relative: '' }); + } else if (rootStat.isFile()) { + const fileEntry = await captureFileEntry(rootAbsolute, args.rootPath, args, diagnostics); + if (fileEntry) baseline.files.push(fileEntry); + return { baseline, diagnostics }; + } else if (rootStat.isSymbolicLink() && args.symlinkPolicy !== 'allow_existing_under_root') { + diagnostics.push(makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { + fieldPath: args.rootPath, + summary: 'Symlink baseline root rejected for this phase.', + })); + return { baseline, diagnostics }; + } else { + return { baseline, diagnostics }; + } + + let totalBytes = 0; + while (queue.length > 0) { + const item = queue.shift()!; + let entries: Array<{ name: string }>; + try { + entries = (await readdir(item.absolute, { withFileTypes: true })) as Array<{ name: string }>; + } catch { + continue; + } + // Sort entries to keep traversal deterministic. + entries.sort((a, b) => String(a.name).localeCompare(String(b.name))); + + for (const entry of entries) { + const entryName = String(entry.name); + const childRelative = item.relative ? `${item.relative}/${entryName}` : entryName; + const childAbsolute = path.join(item.absolute, entryName); + const fullRelative = `${args.rootPath}/${childRelative}`; + + // Depth cap (slash-count from rootPath = depth of the child relative + // to the root). depth==0 == direct children; cap at MAX_DEPTH. + const childDepth = childRelative.split('/').length; + if (childDepth > P2P_WORKFLOW_ARTIFACT_MAX_DEPTH) { + baseline.truncated = true; + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'bind', { + fieldPath: fullRelative, + summary: `Baseline depth exceeds cap (${childDepth}/${P2P_WORKFLOW_ARTIFACT_MAX_DEPTH}).`, + })); + return { baseline, diagnostics }; + } + + let stat; + try { + stat = await lstat(childAbsolute); + } catch { + continue; + } + + if (stat.isSymbolicLink()) { + if (args.symlinkPolicy !== 'allow_existing_under_root') { + // Skip symlinks (don't include in baseline). + continue; + } + const resolved = await realpath(childAbsolute).catch(() => null); + if (!resolved || !isPathInside(validation.repoRootRealPath, resolved)) continue; + } + + if (stat.isDirectory()) { + if (baseline.files.length >= P2P_WORKFLOW_ARTIFACT_MAX_FILES) { + baseline.truncated = true; + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'bind', { + fieldPath: args.rootPath, + summary: `Baseline file count exceeds cap (${P2P_WORKFLOW_ARTIFACT_MAX_FILES}).`, + })); + return { baseline, diagnostics }; + } + baseline.files.push({ + relativePath: fullRelative, + size: 0, + sha256: '', + type: 'directory', + }); + queue.push({ absolute: childAbsolute, relative: childRelative }); + continue; + } + + if (!stat.isFile()) continue; + + // File-count cap. + if (baseline.files.length >= P2P_WORKFLOW_ARTIFACT_MAX_FILES) { + baseline.truncated = true; + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'bind', { + fieldPath: args.rootPath, + summary: `Baseline file count exceeds cap (${P2P_WORKFLOW_ARTIFACT_MAX_FILES}).`, + })); + return { baseline, diagnostics }; + } + + // Per-file size cap. + if (stat.size > P2P_WORKFLOW_ARTIFACT_MAX_FILE_BYTES) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'bind', { + fieldPath: fullRelative, + summary: `File exceeds per-file cap (${stat.size}/${P2P_WORKFLOW_ARTIFACT_MAX_FILE_BYTES}).`, + })); + continue; + } + + // Total-bytes cap (predictive — refuse to read if it would push us over). + if (totalBytes + stat.size > P2P_WORKFLOW_ARTIFACT_MAX_TOTAL_BYTES) { + baseline.truncated = true; + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'bind', { + fieldPath: args.rootPath, + summary: `Baseline total bytes would exceed cap (${totalBytes + stat.size}/${P2P_WORKFLOW_ARTIFACT_MAX_TOTAL_BYTES}).`, + })); + return { baseline, diagnostics }; + } + + let contents: Buffer; + try { + contents = await readFile(childAbsolute); + } catch { + continue; + } + + const sha256 = createHash('sha256').update(contents).digest('hex'); + baseline.files.push({ + relativePath: fullRelative, + size: stat.size, + sha256, + type: 'file', + }); + totalBytes += stat.size; + } + } + + // Sort files for stable equality / hash. + baseline.files.sort((a, b) => a.relativePath.localeCompare(b.relativePath)); + return { baseline, diagnostics }; +} + +async function captureFileEntry( + absolute: string, + relativePath: string, + args: P2pArtifactBaselineCaptureArgs, + diagnostics: P2pWorkflowDiagnostic[], +): Promise { + let stat; + try { + stat = await lstat(absolute); + } catch { + return null; + } + if (stat.isSymbolicLink() && args.symlinkPolicy !== 'allow_existing_under_root') return null; + if (!stat.isFile()) return null; + + if (stat.size > P2P_WORKFLOW_ARTIFACT_MAX_FILE_BYTES) { + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_too_large', 'bind', { + fieldPath: relativePath, + summary: `File exceeds per-file cap (${stat.size}/${P2P_WORKFLOW_ARTIFACT_MAX_FILE_BYTES}).`, + })); + return null; + } + + let contents: Buffer; + try { + contents = await readFile(absolute); + } catch { + return null; + } + const sha256 = createHash('sha256').update(contents).digest('hex'); + return { relativePath, size: stat.size, sha256, type: 'file' }; +} + +/** + * Compare baselines for equality, EXCLUDING `capturedAt` (the timestamp is + * intentionally excluded from contract success per task 6.7 / spec). + */ +export function p2pArtifactBaselinesEqual(a: P2pArtifactBaseline, b: P2pArtifactBaseline): boolean { + if (a.rootPath !== b.rootPath) return false; + if (a.truncated !== b.truncated) return false; + if (a.files.length !== b.files.length) return false; + const left = [...a.files].sort((x, y) => x.relativePath.localeCompare(y.relativePath)); + const right = [...b.files].sort((x, y) => x.relativePath.localeCompare(y.relativePath)); + for (let i = 0; i < left.length; i += 1) { + const lf = left[i]; + const rf = right[i]; + if (lf.relativePath !== rf.relativePath) return false; + if (lf.size !== rf.size) return false; + if (lf.sha256 !== rf.sha256) return false; + if (lf.type !== rf.type) return false; + } + return true; +} + +// ────────────────────────────────────────────────────────────────────────── +// Baseline delta verification (task 6.9 — forbid the dir-listing heuristic) +// ────────────────────────────────────────────────────────────────────────── + +export interface P2pArtifactBaselineDeltaResult { + ok: boolean; + diagnostics: P2pWorkflowDiagnostic[]; +} + +/** + * Verify that every declared `relativePath` in `declaredFiles` either: + * - exists in `after.files` AND has a different sha256 than the same path + * in `before.files`, OR + * - is added (was absent in `before` and present in `after`). + * + * Files NOT in the declared set are ignored — broad directory listing changes + * never satisfy a contract per spec §"Artifact Baselines and Validation". + * + * NOTE: this helper deliberately does NOT use `before.files.length !== + * after.files.length` as a success criterion (that would let a sibling change + * masquerade as a declared-file change), and the surrounding daemon code + * deliberately does NOT use `broad directory listing` (forbidden by reverse-regression + * guard #5). + */ +export function verifyP2pArtifactBaselineDelta( + before: P2pArtifactBaseline, + after: P2pArtifactBaseline, + declaredFiles: Array<{ relativePath: string }>, +): P2pArtifactBaselineDeltaResult { + const diagnostics: P2pWorkflowDiagnostic[] = []; + const beforeByPath = new Map(before.files.map((file) => [file.relativePath, file] as const)); + const afterByPath = new Map(after.files.map((file) => [file.relativePath, file] as const)); + + let ok = declaredFiles.length > 0; + for (const declared of declaredFiles) { + const afterFile = afterByPath.get(declared.relativePath); + if (!afterFile) { + ok = false; + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_contract_not_satisfied', 'execute', { + fieldPath: declared.relativePath, + summary: 'Declared artifact path missing after run.', + })); + continue; + } + const beforeFile = beforeByPath.get(declared.relativePath); + if (beforeFile && beforeFile.sha256 === afterFile.sha256) { + ok = false; + diagnostics.push(makeP2pWorkflowDiagnostic('artifact_baseline_mismatch', 'execute', { + fieldPath: declared.relativePath, + summary: 'Declared artifact path unchanged (sha256 identical).', + })); + } + } + return { ok, diagnostics }; +} + +// ────────────────────────────────────────────────────────────────────────── +// Helpers +// ────────────────────────────────────────────────────────────────────────── + +function invalidArtifactPath(fieldPath: string, summary?: string): P2pArtifactRuntimePathResult { + return { + ok: false, + diagnostics: [makeP2pWorkflowDiagnostic('unsafe_artifact_path', 'bind', { fieldPath, summary })], + }; +} + +function isPathInside(root: string, candidate: string): boolean { + const relative = path.relative(root, candidate); + return relative === '' || (!!relative && !relative.startsWith('..') && !path.isAbsolute(relative)); +} diff --git a/src/daemon/p2p-workflow-bind.ts b/src/daemon/p2p-workflow-bind.ts new file mode 100644 index 000000000..dca468ba6 --- /dev/null +++ b/src/daemon/p2p-workflow-bind.ts @@ -0,0 +1,140 @@ +import { P2P_WORKFLOW_CAPABILITY_V1 } from '../../shared/p2p-workflow-constants.js'; +import { makeP2pWorkflowDiagnostic, type P2pWorkflowDiagnostic } from '../../shared/p2p-workflow-diagnostics.js'; +import type { + P2pBindResult, + P2pBindRuntimeContext, + P2pBoundWorkflow, + P2pCompiledWorkflow, +} from '../../shared/p2p-workflow-types.js'; + +/** + * Audit:R3 PR-β / V-6 — daemon-side helper that enforces the FULL + * `P2pStaticPolicy` against the compiled workflow at bind time. compile is + * intentionally pure-shared and only derives capability requirements; this + * helper is the daemon-owned authority layer that: + * + * - rejects `permissionScope: 'implementation'` nodes when policy disallows + * - rejects `openspec_convention` artifacts when policy disallows + * - rejects `commandKind: 'interpreter'` script nodes when policy disallows + * - rejects script `argv[0]` not in `allowedExecutables` (when allowlist non-empty; + * empty allowlist means "no script execution allowed", which is the v1a default + * until a daemon explicitly configures executables) + * + * Returned diagnostics use existing diagnostic codes: + * - `script_executable_denied` for executable / interpreter rejections + * - `missing_required_capability` for implementation / artifact rejections + * + * The helper degrades gracefully when policy is not yet supplied (callers that + * still build legacy bind contexts without `policySnapshot`); but the v1a + * launch path always passes a `policySnapshot` from `loadDaemonP2pStaticPolicy`. + */ +export function validateCompiledWorkflowAgainstBindPolicy( + compiled: Pick, + bindContext: Pick, +): P2pWorkflowDiagnostic[] { + const diagnostics: P2pWorkflowDiagnostic[] = []; + const policy = bindContext.policySnapshot; + if (!policy) return diagnostics; + const allowedExecutables = new Set(policy.allowedExecutables); + + for (const node of compiled.nodes) { + if (node.permissionScope === 'implementation' && !policy.allowImplementationPermission) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_required_capability', 'bind', { + runId: bindContext.runId, + nodeId: node.id, + fieldPath: `nodes.${node.id}.permissionScope`, + summary: 'Daemon policy does not allow implementation permission.', + })); + } + if (node.artifacts.some((artifact) => artifact.convention === 'openspec_convention') && !policy.allowOpenSpecArtifacts) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_required_capability', 'bind', { + runId: bindContext.runId, + nodeId: node.id, + fieldPath: `nodes.${node.id}.artifacts`, + summary: 'Daemon policy does not allow OpenSpec artifact writes.', + })); + } + if (node.script) { + if (node.script.commandKind === 'interpreter' && !policy.allowInterpreterScripts) { + diagnostics.push(makeP2pWorkflowDiagnostic('script_executable_denied', 'bind', { + runId: bindContext.runId, + nodeId: node.id, + fieldPath: `nodes.${node.id}.script.commandKind`, + summary: 'Daemon policy does not allow interpreter scripts.', + })); + } + const executable = node.script.commandKind === 'interpreter' + ? node.script.interpreter + : node.script.argv[0]; + // Empty allowlist means script execution is not yet enabled by daemon + // policy (v1a fail-closed default). Reject all script nodes. + if (!executable || !allowedExecutables.has(executable)) { + diagnostics.push(makeP2pWorkflowDiagnostic('script_executable_denied', 'bind', { + runId: bindContext.runId, + nodeId: node.id, + fieldPath: `nodes.${node.id}.script.argv[0]`, + summary: `Executable ${executable ?? ''} is not allowlisted by daemon policy.`, + })); + } + } + } + return diagnostics; +} + +export function getMissingP2pWorkflowCapabilities( + compiled: Pick, + bindContext: Pick, +): string[] { + // Audit:R3 PR-α — read capabilities from `capabilitySnapshot` (the + // canonical `daemon.hello` advertisement) instead of the ad-hoc + // `currentDaemonPolicy.capabilities` subset that no longer exists. + const available = new Set(bindContext.capabilitySnapshot.capabilities); + const required = new Set([ + P2P_WORKFLOW_CAPABILITY_V1, + ...compiled.derivedRequiredCapabilities, + ]); + + return [...required].filter((capability) => !available.has(capability)); +} + +export function bindP2pCompiledWorkflow( + compiled: P2pCompiledWorkflow, + bindContext: P2pBindRuntimeContext, +): P2pBindResult { + const diagnostics = compiled.diagnostics.map((diagnostic) => ({ ...diagnostic })); + + if (!bindContext.concurrencyAdmission.accepted) { + diagnostics.push(makeP2pWorkflowDiagnostic('daemon_busy', 'bind', { + runId: bindContext.runId, + summary: bindContext.concurrencyAdmission.reason ?? 'daemon_busy', + })); + return { ok: false, reason: 'daemon_busy', diagnostics }; + } + + const missingCapabilities = getMissingP2pWorkflowCapabilities(compiled, bindContext); + if (missingCapabilities.length > 0) { + diagnostics.push(makeP2pWorkflowDiagnostic('missing_required_capability', 'bind', { + runId: bindContext.runId, + fieldPath: 'capabilitySnapshot.capabilities', + summary: `Missing required capabilities: ${missingCapabilities.join(', ')}`, + })); + return { ok: false, reason: 'missing_required_capability', diagnostics }; + } + + // Audit:R3 PR-β / V-6 — daemon-side policy authority. compile only derives + // capability requirements; bind enforces the FULL P2pStaticPolicy (allow + // flags + executable allowlist). Any error severity here halts bind. + const policyDiagnostics = validateCompiledWorkflowAgainstBindPolicy(compiled, bindContext); + diagnostics.push(...policyDiagnostics); + if (policyDiagnostics.some((diagnostic) => diagnostic.severity === 'error')) { + const reason = 'missing_required_capability' as const; + return { ok: false, reason, diagnostics }; + } + + const bound: P2pBoundWorkflow = { + compiled: structuredClone(compiled), + bindContext: structuredClone(bindContext), + diagnostics, + }; + return { ok: true, bound, diagnostics }; +} diff --git a/src/daemon/p2p-workflow-discussion-offsets.ts b/src/daemon/p2p-workflow-discussion-offsets.ts new file mode 100644 index 000000000..d053dd7db --- /dev/null +++ b/src/daemon/p2p-workflow-discussion-offsets.ts @@ -0,0 +1,258 @@ +/** + * P2P workflow discussion read offsets (Tasks 5.4 / 12.4). + * + * Implements per-(run, source) incremental discussion reads using the shape + * defined in `shared/p2p-workflow-types.ts::P2pDiscussionReadOffset`: + * + * { byteOffset, sha256Prefix, sizeAtOffset } + * + * On size/hash mismatch (rotation, truncation, divergent prefix bytes) the + * runtime resets to a safe full bounded read or fails closed depending on the + * declared source policy. State lives entirely in the daemon process — it is + * private runtime state, never persisted or projected to the public surface. + */ +import { createHash } from 'node:crypto'; +import { open, stat } from 'node:fs/promises'; + +import { + makeP2pWorkflowDiagnostic, + makeP2pWorkflowWarning, + type P2pWorkflowDiagnostic, +} from '../../shared/p2p-workflow-diagnostics.js'; + +/** First 16 hex chars of sha256(file contents read so far). */ +const SHA256_PREFIX_HEX_LENGTH = 16; +/** Default bounded read cap matches the existing daemon discussion read budget. */ +const DEFAULT_MAX_BYTES = 256 * 1024; + +export interface RecordedReadOffset { + byteOffset: number; + /** First 16 hex chars of sha256(file contents read so far). */ + sha256Prefix: string; + /** File size at the time the offset was recorded. */ + sizeAtOffset: number; + recordedAt: string; +} + +export type ReadDiscussionResetReason = + | 'fresh' + | 'mismatch_safe_reset' + | 'mismatch_fail_closed' + | 'incremental'; + +export interface ReadDiscussionResult { + /** UTF-8 text from the resolved offset (or full bounded read on mismatch). */ + content: string; + /** Updated offset after this read (may be unchanged on fail-closed). */ + newOffset: RecordedReadOffset; + reset: ReadDiscussionResetReason; + diagnostics: P2pWorkflowDiagnostic[]; +} + +export type ReadDiscussionMismatchPolicy = 'fail' | 'reset'; + +export interface ReadDiscussionArgs { + runId: string; + /** Logical source key, e.g. discussion file path or `file_reference` source id. */ + sourceKey: string; + /** Absolute path within the repo/project root. Caller is responsible for sandboxing. */ + filePath: string; + /** Source missing/mismatch policy: `'fail'` fails closed, `'reset'` returns a safe bounded read. */ + policy: ReadDiscussionMismatchPolicy; + /** Optional bounded read cap (defaults to 256 KiB). */ + maxBytes?: number; +} + +interface OffsetMapValue { + offset: RecordedReadOffset; +} + +// Per-run, per-source offset state. Map>. +const READ_OFFSETS = new Map>(); + +function bucketFor(runId: string): Map { + let bucket = READ_OFFSETS.get(runId); + if (!bucket) { + bucket = new Map(); + READ_OFFSETS.set(runId, bucket); + } + return bucket; +} + +export function getRecordedReadOffset(runId: string, sourceKey: string): RecordedReadOffset | null { + const bucket = READ_OFFSETS.get(runId); + if (!bucket) return null; + const entry = bucket.get(sourceKey); + return entry ? { ...entry.offset } : null; +} + +export function clearReadOffsetsForRun(runId: string): void { + READ_OFFSETS.delete(runId); +} + +export function __resetReadOffsetsForTests(): void { + READ_OFFSETS.clear(); +} + +interface ReadRangeResult { + bytesRead: number; + text: string; + prefixHashFull: string; +} + +async function readRange( + filePath: string, + start: number, + end: number, + prefixHashSeed: string | null, +): Promise { + const length = Math.max(0, end - start); + if (length === 0) { + return { bytesRead: 0, text: '', prefixHashFull: prefixHashSeed ?? '' }; + } + const handle = await open(filePath, 'r'); + try { + const buffer = Buffer.allocUnsafe(length); + const { bytesRead } = await handle.read(buffer, 0, length, start); + const slice = bytesRead === buffer.length ? buffer : buffer.subarray(0, bytesRead); + const text = slice.toString('utf8'); + let prefixHashFull = prefixHashSeed ?? ''; + if (start === 0 && bytesRead > 0) { + // Hashes the entire returned slice (full bounded read or fresh first read). + prefixHashFull = createHash('sha256').update(slice).digest('hex'); + } + return { bytesRead, text, prefixHashFull }; + } finally { + await handle.close(); + } +} + +async function computePrefixHash(filePath: string, byteOffset: number): Promise { + if (byteOffset <= 0) return createHash('sha256').update(Buffer.alloc(0)).digest('hex'); + const handle = await open(filePath, 'r'); + try { + const hash = createHash('sha256'); + const chunkSize = 64 * 1024; + let remaining = byteOffset; + let position = 0; + const buffer = Buffer.allocUnsafe(chunkSize); + while (remaining > 0) { + const toRead = Math.min(chunkSize, remaining); + const { bytesRead } = await handle.read(buffer, 0, toRead, position); + if (bytesRead <= 0) break; + hash.update(bytesRead === buffer.length ? buffer : buffer.subarray(0, bytesRead)); + position += bytesRead; + remaining -= bytesRead; + } + return hash.digest('hex'); + } finally { + await handle.close(); + } +} + +function recordOffset( + runId: string, + sourceKey: string, + byteOffset: number, + sizeAtOffset: number, + prefixHashFull: string, +): RecordedReadOffset { + const offset: RecordedReadOffset = { + byteOffset, + sha256Prefix: prefixHashFull.slice(0, SHA256_PREFIX_HEX_LENGTH), + sizeAtOffset, + recordedAt: new Date().toISOString(), + }; + bucketFor(runId).set(sourceKey, { offset }); + return { ...offset }; +} + +/** + * Read a discussion file with per-(run, source) incremental offset tracking. + * + * - First read or no prior offset → bounded read from byte 0, record offset, returns `fresh`. + * - Prior offset matches (size ≥ recorded sizeAtOffset AND sha256Prefix of bytes + * `0..byteOffset` matches) → bounded read of bytes `byteOffset..min(EOF, byteOffset+maxBytes)`, + * advance offset to the actual end of the consumed range, returns `incremental`. + * - Mismatch + `policy === 'reset'` → bounded read from byte 0, record fresh + * offset, returns `mismatch_safe_reset` + warning diagnostic. + * - Mismatch + `policy === 'fail'` → throws + returns `mismatch_fail_closed` + * with an error diagnostic; the recorded offset is **not** advanced. + */ +export async function readP2pDiscussionWithOffset(args: ReadDiscussionArgs): Promise { + const { runId, sourceKey, filePath, policy } = args; + const maxBytes = Math.max(1, args.maxBytes ?? DEFAULT_MAX_BYTES); + + const fileStat = await stat(filePath); + const fileSize = fileStat.size; + + const previous = bucketFor(runId).get(sourceKey)?.offset ?? null; + + // Fresh path: no prior offset → bounded full read from byte 0. + if (!previous) { + const end = Math.min(fileSize, maxBytes); + const range = await readRange(filePath, 0, end, null); + const newOffset = recordOffset(runId, sourceKey, range.bytesRead, fileSize, range.prefixHashFull); + return { content: range.text, newOffset, reset: 'fresh', diagnostics: [] }; + } + + // Mismatch detection — file shrank below recorded sizeAtOffset, or the prefix + // hash of the bytes preceding the offset diverges (rotation / rewrite). + let mismatch = fileSize < previous.sizeAtOffset || fileSize < previous.byteOffset; + let prefixHashFull = ''; + if (!mismatch) { + prefixHashFull = await computePrefixHash(filePath, previous.byteOffset); + if (prefixHashFull.slice(0, SHA256_PREFIX_HEX_LENGTH) !== previous.sha256Prefix) { + mismatch = true; + } + } + + if (mismatch) { + if (policy === 'fail') { + // Reuse `missing_context_source` (`['bind','execute']`) — no dedicated + // offset-mismatch code exists in `P2P_WORKFLOW_DIAGNOSTIC_CODES`; this is + // the closest applicable code per the source-policy semantics. + const diagnostic = makeP2pWorkflowDiagnostic('missing_context_source', 'execute', { + runId, + fieldPath: `discussionOffset.${sourceKey}`, + summary: 'Discussion source diverged from recorded read offset; failing closed per policy.', + }); + const error = new Error('discussion_read_offset_mismatch') as Error & { code?: string }; + error.code = 'discussion_read_offset_mismatch'; + throw Object.assign(error, { + diagnostic, + result: { + // Caller wraps the throw for transport; this preserves the contract + // shape so a catcher that wants to surface it can recover gracefully. + content: '', + newOffset: { ...previous }, + reset: 'mismatch_fail_closed' as ReadDiscussionResetReason, + diagnostics: [diagnostic], + } satisfies ReadDiscussionResult, + }); + } + // policy === 'reset' → safe bounded re-read from byte 0. + const end = Math.min(fileSize, maxBytes); + const range = await readRange(filePath, 0, end, null); + const newOffset = recordOffset(runId, sourceKey, range.bytesRead, fileSize, range.prefixHashFull); + const diagnostic = makeP2pWorkflowWarning('missing_context_source', 'execute', { + runId, + fieldPath: `discussionOffset.${sourceKey}`, + summary: 'Discussion source diverged from recorded read offset; safely reset to full bounded read.', + }); + return { content: range.text, newOffset, reset: 'mismatch_safe_reset', diagnostics: [diagnostic] }; + } + + // Incremental path: read [byteOffset, min(EOF, byteOffset + maxBytes)). + const start = previous.byteOffset; + const end = Math.min(fileSize, start + maxBytes); + const range = await readRange(filePath, start, end, prefixHashFull); + const consumed = range.bytesRead; + const advancedOffset = start + consumed; + // Recompute prefix hash over the new prefix [0, advancedOffset). + const newPrefixFull = consumed === 0 + ? prefixHashFull + : await computePrefixHash(filePath, advancedOffset); + const newOffset = recordOffset(runId, sourceKey, advancedOffset, fileSize, newPrefixFull); + return { content: range.text, newOffset, reset: 'incremental', diagnostics: [] }; +} diff --git a/src/daemon/p2p-workflow-policy-recheck.ts b/src/daemon/p2p-workflow-policy-recheck.ts new file mode 100644 index 000000000..0b4da1be5 --- /dev/null +++ b/src/daemon/p2p-workflow-policy-recheck.ts @@ -0,0 +1,141 @@ +import { makeP2pWorkflowDiagnostic } from '../../shared/p2p-workflow-diagnostics.js'; +import type { P2pWorkflowDiagnostic } from '../../shared/p2p-workflow-diagnostics.js'; +import type { P2pStaticPolicy } from '../../shared/p2p-workflow-types.js'; + +/** + * Result of a per-dangerous-node policy/capability recheck. + * + * The bound capability snapshot is audit/projection metadata only — before any + * dangerous node (script, implementation, artifact-write) executes the daemon + * MUST re-check current daemon policy/capabilities AND policy allowlists. + * + * Capability checks (audit:R1-H3): + * - If a required capability is missing from `currentDaemonCapabilities` AND it + * was in `bindCapabilitySnapshot`, this is a downgrade and we emit + * `capability_downgraded_during_run`. + * - If a required capability is missing from `currentDaemonCapabilities` AND it + * was NOT in `bindCapabilitySnapshot`, the run never had it; we emit + * `missing_required_capability`. + * + * Policy checks (audit:H3 / R2-CH1) — only when both `boundPolicySnapshot` and + * `currentDaemonPolicy` are supplied: + * - Any allow-flag (`allowOpenSpecArtifacts`, `allowImplementationPermission`, + * `allowInterpreterScripts`) that flipped `true → false` since bind triggers + * `capability_downgraded_during_run` — the daemon revoked permission. + * - Any executable removed from `allowedExecutables` since bind triggers the + * same — script runner / implementation node would lose authorisation. + * - Concurrency caps tightening is NOT a downgrade (it does not retract + * already-granted authority for an in-flight run); it only affects new launches. + * + * Capability "upgrade" (current ⊃ snapshot) is fine but MUST NOT broaden the + * permission set granted to an already-running workflow. Because this helper + * checks the requirement set against `currentDaemonCapabilities` only, an + * upgraded daemon still satisfies the original required set; the upgrade + * itself does not unlock anything new because the required set was frozen at + * compile/bind time. + */ +export type P2pWorkflowPolicyRecheckResult = + | { ok: true } + | { ok: false; diagnostic: P2pWorkflowDiagnostic; missingCapability?: string; downgradedField?: string }; + +export interface P2pWorkflowPolicyRecheckArgs { + requiredCapabilities: readonly string[]; + bindCapabilitySnapshot: readonly string[]; + currentDaemonCapabilities: readonly string[]; + /** Policy at bind time. When omitted, only capability strings are checked. */ + boundPolicySnapshot?: P2pStaticPolicy; + /** Current daemon policy. Required when `boundPolicySnapshot` is supplied. */ + currentDaemonPolicy?: P2pStaticPolicy; + runId?: string; + nodeId?: string; +} + +const POLICY_ALLOW_FLAG_FIELDS = [ + 'allowOpenSpecArtifacts', + 'allowImplementationPermission', + 'allowInterpreterScripts', +] as const; + +type PolicyAllowField = (typeof POLICY_ALLOW_FLAG_FIELDS)[number]; + +interface PolicyDowngradeFinding { + field: string; + summary: string; +} + +/** + * Compare two `P2pStaticPolicy` snapshots and return the first downgrade + * (`true → false` allow flag, or executable removed from allowlist). Returns + * `null` when current policy is at least as permissive as bound policy. + */ +function findPolicyDowngrade( + bound: P2pStaticPolicy, + current: P2pStaticPolicy, +): PolicyDowngradeFinding | null { + for (const flag of POLICY_ALLOW_FLAG_FIELDS) { + if (bound[flag as PolicyAllowField] && !current[flag as PolicyAllowField]) { + return { + field: `currentDaemonPolicy.${flag}`, + summary: `Policy flag ${flag} was true at bind but is now false`, + }; + } + } + const currentExecutables = new Set(current.allowedExecutables); + for (const exe of bound.allowedExecutables) { + if (!currentExecutables.has(exe)) { + return { + field: 'currentDaemonPolicy.allowedExecutables', + summary: `Executable ${exe} was allowlisted at bind but is no longer allowed`, + }; + } + } + return null; +} + +export function recheckDangerousNodeCapabilities( + args: P2pWorkflowPolicyRecheckArgs, +): P2pWorkflowPolicyRecheckResult { + const current = new Set(args.currentDaemonCapabilities); + const snapshot = new Set(args.bindCapabilitySnapshot); + + for (const required of args.requiredCapabilities) { + if (current.has(required)) continue; + const wasBound = snapshot.has(required); + const code = wasBound + ? 'capability_downgraded_during_run' + : 'missing_required_capability'; + return { + ok: false, + missingCapability: required, + diagnostic: makeP2pWorkflowDiagnostic(code, 'execute', { + ...(args.runId !== undefined ? { runId: args.runId } : {}), + ...(args.nodeId !== undefined ? { nodeId: args.nodeId } : {}), + fieldPath: 'currentDaemonPolicy.capabilities', + summary: wasBound + ? `Capability ${required} was present at bind but is no longer available` + : `Required capability ${required} is missing`, + }), + }; + } + + // Audit:H3 — capabilities can stay identical while the daemon tightens + // executable allowlist or flips an allow flag off. Detect that here so a + // dangerous node fails closed even when the capability advertisement is + // unchanged. + if (args.boundPolicySnapshot && args.currentDaemonPolicy) { + const downgrade = findPolicyDowngrade(args.boundPolicySnapshot, args.currentDaemonPolicy); + if (downgrade) { + return { + ok: false, + downgradedField: downgrade.field, + diagnostic: makeP2pWorkflowDiagnostic('capability_downgraded_during_run', 'execute', { + ...(args.runId !== undefined ? { runId: args.runId } : {}), + ...(args.nodeId !== undefined ? { nodeId: args.nodeId } : {}), + fieldPath: downgrade.field, + summary: downgrade.summary, + }), + }; + } + } + return { ok: true }; +} diff --git a/src/daemon/p2p-workflow-restart.ts b/src/daemon/p2p-workflow-restart.ts new file mode 100644 index 000000000..2a6e97e10 --- /dev/null +++ b/src/daemon/p2p-workflow-restart.ts @@ -0,0 +1,62 @@ +import { P2P_WORKFLOW_PROJECTION_VERSION } from '../../shared/p2p-workflow-constants.js'; +import { makeP2pWorkflowDiagnostic } from '../../shared/p2p-workflow-diagnostics.js'; +import type { P2pWorkflowStatusProjection } from '../../shared/p2p-workflow-types.js'; + +/** + * Mark an advanced workflow run stale after daemon restart. + * + * v1a does not durably persist private runtime state, so any advanced run + * that survives a daemon restart cannot be safely resumed (frozen artifact + * identity, capability snapshot vs. current policy, in-flight script process + * state, discussion read offsets, etc. are all gone). Per spec, we mark such + * runs `stale` rather than silently resuming dangerous work. + * + * Pure helper — emits the canonical projection + diagnostic so the caller + * (server-link relay, command-handler bootstrap, persistence reads) can + * surface a deterministic terminal state. + */ +export interface MarkAdvancedRunStaleArgs { + runId: string; + workflowId: string; + /** Optional last-known node id to preserve audit context. */ + currentNodeId?: string; + /** Already-completed nodes from the prior run, if known. */ + completedNodeIds?: readonly string[]; + /** Optional human reason; default summarizes restart staleness. */ + reasonSummary?: string; + /** ISO timestamp; defaults to "now". */ + updatedAt?: string; + /** Pre-existing diagnostics to preserve (will be deduped against the new stale diagnostic). */ + existingDiagnostics?: P2pWorkflowStatusProjection['diagnostics']; + /** Optional capability snapshot to retain in the projection for audit. */ + capabilitySnapshot?: P2pWorkflowStatusProjection['capabilitySnapshot']; +} + +export function markAdvancedRunStaleAfterRestart( + args: MarkAdvancedRunStaleArgs, +): P2pWorkflowStatusProjection { + const diagnostic = makeP2pWorkflowDiagnostic('workflow_stale_after_restart', 'bind', { + runId: args.runId, + summary: args.reasonSummary ?? 'Advanced workflow could not be safely resumed after daemon restart', + }); + const existing = args.existingDiagnostics ?? []; + const alreadyHasStale = existing.some( + (d) => d.code === 'workflow_stale_after_restart' && d.runId === args.runId, + ); + const diagnostics = alreadyHasStale + ? existing.map((d) => ({ ...d })) + : [...existing.map((d) => ({ ...d })), diagnostic]; + + const projection: P2pWorkflowStatusProjection = { + projectionVersion: P2P_WORKFLOW_PROJECTION_VERSION, + runId: args.runId, + workflowId: args.workflowId, + status: 'stale', + completedNodeIds: args.completedNodeIds ? [...args.completedNodeIds] : [], + diagnostics, + updatedAt: args.updatedAt ?? new Date().toISOString(), + ...(args.currentNodeId !== undefined ? { currentNodeId: args.currentNodeId } : {}), + ...(args.capabilitySnapshot !== undefined ? { capabilitySnapshot: args.capabilitySnapshot } : {}), + }; + return projection; +} diff --git a/src/daemon/p2p-workflow-script-concurrency.ts b/src/daemon/p2p-workflow-script-concurrency.ts new file mode 100644 index 000000000..4c34a42f5 --- /dev/null +++ b/src/daemon/p2p-workflow-script-concurrency.ts @@ -0,0 +1,43 @@ +import { P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS } from '../../shared/p2p-workflow-constants.js'; + +/** + * In-memory script-node concurrency counter. + * + * v1a forward-looking primitive: the real script runner lands in v1b. This + * module exists so daemon admission for script nodes is bounded separately + * from advanced-workflow admission and so the cap is testable from spec + * scenarios today. + * + * Process-local only — restart resets the counter. Callers MUST pair every + * successful `acquireScriptSlot()` with exactly one `releaseScriptSlot()` + * (use try/finally). + */ + +let activeScriptSlots = 0; + +export interface AcquireScriptSlotResult { + ok: boolean; + inUse: number; + capacity: number; +} + +export function acquireScriptSlot(): AcquireScriptSlotResult { + if (activeScriptSlots >= P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS) { + return { ok: false, inUse: activeScriptSlots, capacity: P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS }; + } + activeScriptSlots += 1; + return { ok: true, inUse: activeScriptSlots, capacity: P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS }; +} + +export function releaseScriptSlot(): void { + if (activeScriptSlots > 0) activeScriptSlots -= 1; +} + +export function getScriptSlotsInUse(): number { + return activeScriptSlots; +} + +/** Test-only helper: reset the in-memory counter. */ +export function __resetScriptConcurrencyForTests(): void { + activeScriptSlots = 0; +} diff --git a/src/daemon/p2p-workflow-script-runner.ts b/src/daemon/p2p-workflow-script-runner.ts new file mode 100644 index 000000000..2e0c65791 --- /dev/null +++ b/src/daemon/p2p-workflow-script-runner.ts @@ -0,0 +1,569 @@ +/** + * Daemon-side runner for P2P workflow script nodes (tasks 7.2 – 7.12). + * + * spec.md "Script nodes SHALL use structured contracts and safe machine output": + * - argv-only spawn (no shell) — Scenario "Script command is argv-only" + * - executable allowlist enforcement — Scenario "Bind enforces full daemon + * static policy authority" (`script_executable_denied`) + * - interpreter capability check — Scenario "Interpreter script requires + * interpreter capability" + * - cwd = repo root, env from allowlist, PATH default empty — + * Scenario "Script runtime environment is constrained" + * - stdin / stdout / stderr / machineOutput byte caps with utf-8-safe + * truncation — Scenario "Script runtime environment is constrained" + * - NDJSON `p2p_script_machine_output_v1` parsing — Scenario "Machine + * output frame is authoritative" + * - timeout + AbortSignal cancellation with process-group SIGTERM→SIGKILL + * escalation — Scenario "Script cancellation terminates the process group" + * - display output (raw stdout/stderr) is non-authoritative; only the + * parsed `finalFrame` drives routing/variables/artifacts + * + * design.md §"Script Node Execution": + * - argv-only by default + * - cwd is repo root + * - stdin cap defaults to 64 KiB + * - SIGTERM with up to 5 s grace, then SIGKILL + * + * This runner is permission-scope-agnostic. Bind-time policy enforcement is + * handled by `validateCompiledWorkflowAgainstBindPolicy` in + * `src/daemon/p2p-workflow-bind.ts` (e.g. rejecting implementation-permission + * nodes when `policy.allowImplementationPermission` is false). The runner + * here only enforces the executable / env / cap contract. + * + * NOTE: callers must pair every successful run with `releaseScriptSlot()` if + * they acquired one — see `src/daemon/p2p-workflow-script-concurrency.ts`. + * Slot acquisition is intentionally NOT done in this file so the caller can + * fail fast on `daemon_busy` before constructing runner inputs. + */ + +import { spawn, type ChildProcess } from 'node:child_process'; +import { realpath, stat } from 'node:fs/promises'; +import { makeP2pWorkflowDiagnostic, type P2pWorkflowDiagnostic } from '../../shared/p2p-workflow-diagnostics.js'; +import { + DEFAULT_P2P_SCRIPT_CAPS, + DEFAULT_P2P_SCRIPT_MACHINE_OUTPUT_FRAME_MAX_BYTES, + parseP2pScriptMachineOutput, + type P2pScriptMachineOutputParseResult, +} from '../../shared/p2p-workflow-script.js'; +import type { P2pScriptNodeContract, P2pStaticPolicy } from '../../shared/p2p-workflow-types.js'; +import { P2P_SCRIPT_MACHINE_OUTPUT_KIND } from '../../shared/p2p-workflow-constants.js'; + +export interface RunP2pScriptNodeArgs { + script: P2pScriptNodeContract; + policy: P2pStaticPolicy; + repoRoot: string; + runId: string; + nodeId: string; + signal?: AbortSignal; +} + +export interface RunP2pScriptNodeResult { + ok: boolean; + exitCode: number | null; + signal: NodeJS.Signals | null; + stdoutBytes: number; + stderrBytes: number; + truncated: { stdout: boolean; stderr: boolean; machineOutput: boolean }; + /** Only populated when the script's caps allow machine-output collection + * AND `requireFrameKind` (i.e. the contract asked for structured frames). + * Spec: stdout buffer is the SAME source the parser walks; only the + * parsed `finalFrame` may drive routing/variables/artifacts. */ + machineOutput?: P2pScriptMachineOutputParseResult; + diagnostics: P2pWorkflowDiagnostic[]; +} + +/** Default grace period before SIGKILL escalation. design.md "up to 5 seconds". */ +const DEFAULT_SIGKILL_ESCALATION_MS = 5_000; + +/** Internal spawn outcome. Bridges between Node child_process events and + * our return type. `signal` is null when no signal was used to terminate. */ +interface ChildExit { + exitCode: number | null; + signal: NodeJS.Signals | null; + spawnError?: Error; +} + +const isWindows = process.platform === 'win32'; +const TEXT_ENCODER = new TextEncoder(); +const TEXT_DECODER = new TextDecoder('utf-8', { fatal: false }); + +/** Slice a string to at most `maxBytes` UTF-8 bytes WITHOUT splitting a + * multi-byte character. Mirrors the helper in `shared/p2p-workflow-script.ts`. */ +function byteSlice(value: string, maxBytes: number): string { + if (maxBytes <= 0) return ''; + const encoded = TEXT_ENCODER.encode(value); + if (encoded.byteLength <= maxBytes) return value; + let decoded = TEXT_DECODER.decode(encoded.slice(0, maxBytes)); + while (decoded.endsWith('�')) decoded = decoded.slice(0, -1); + return decoded; +} + +function byteLength(value: string): number { + return TEXT_ENCODER.encode(value).byteLength; +} + +/** + * R3 v1b follow-up — Names that MUST NEVER reach the script's spawn env, + * even if the workflow author allowlists them. These are dynamic-loader + * / interpreter hooks that let an attacker subvert the process before + * `argv[0]` runs (`LD_PRELOAD` ⇒ inject shared object; + * `DYLD_INSERT_LIBRARIES` ⇒ macOS analogue; `NODE_OPTIONS` ⇒ inject node + * `--require`; etc). Hardening is unconditional — the allowlist is a + * convenience for benign envs, not an authority over loader hooks. + */ +export const P2P_SCRIPT_ENV_DENYLIST = [ + // dynamic loader hooks (Linux ld.so / macOS dyld) + 'LD_PRELOAD', + 'LD_LIBRARY_PATH', + 'LD_AUDIT', + 'DYLD_INSERT_LIBRARIES', + 'DYLD_LIBRARY_PATH', + 'DYLD_FRAMEWORK_PATH', + // language runtime hooks + 'NODE_OPTIONS', + 'PYTHONSTARTUP', + 'PYTHONPATH', + 'PYTHONHOME', + 'PERL5LIB', + 'PERL5OPT', + 'RUBYOPT', + 'RUBYLIB', + 'LUA_PATH', + 'LUA_CPATH', + 'JAVA_TOOL_OPTIONS', + '_JAVA_OPTIONS', + 'PSModulePath', + // shell hooks (R3 v2 PR-ζ M4 / O3) + 'BASH_ENV', + 'ENV', + 'SHELLOPTS', + 'BASHOPTS', + 'PROMPT_COMMAND', + 'IFS', + // package source overrides + 'PIP_INDEX_URL', + 'npm_config_registry', + // git internals (CVE-attack-surface) + 'GIT_EXEC_PATH', +] as const; + +/** + * Build the spawn env from `script.envAllowlist`. Each allowed name is + * copied from `process.env` only if present AND not in the deny-list. + * `PATH` defaults to '' unless explicitly allowlisted. + * + * spec.md "Script runtime environment is constrained": `PATH` SHALL be empty + * or fixed minimal; environment variables SHALL come only from an allowlist; + * dynamic-loader hooks SHALL NEVER be inherited. + * + * NEVER passes `process.env` wholesale. + */ +export function buildScriptSpawnEnv(envAllowlist: readonly string[] | undefined): Record { + const env: Record = {}; + const allowlist = new Set(envAllowlist ?? []); + const denylist = new Set(P2P_SCRIPT_ENV_DENYLIST); + for (const name of allowlist) { + if (denylist.has(name)) continue; // hardened: deny-list wins over allowlist + const value = process.env[name]; + if (typeof value === 'string') env[name] = value; + } + // PATH is always present (potentially empty) so child resolves nothing + // implicitly through PATH lookup. argv[0] must be an absolute or + // repo-relative path validated by the bind layer's executable allowlist. + if (!('PATH' in env)) env.PATH = ''; + return env; +} + +/** Validate `script.argv[0]` (or `script.interpreter`) against the daemon + * static policy. Returns a diagnostic if execution is not authorised, else + * `null` (caller proceeds with spawn). + * + * NOTE: bind-time `validateCompiledWorkflowAgainstBindPolicy` SHOULD already + * have caught these — but the runner re-checks at execute time so a future + * policy downgrade between bind and spawn is still fail-closed. */ +function checkExecutablePolicy( + script: P2pScriptNodeContract, + policy: P2pStaticPolicy, + runId: string, + nodeId: string, +): P2pWorkflowDiagnostic | null { + // Interpreter capability check first — design.md "interpreter execution is + // a DISTINCT security boundary from argv execution". + if (script.commandKind === 'interpreter' && !policy.allowInterpreterScripts) { + return makeP2pWorkflowDiagnostic('script_executable_denied', 'execute', { + runId, + nodeId, + fieldPath: 'script.commandKind', + summary: 'Daemon policy does not allow interpreter scripts.', + }); + } + const executable = script.commandKind === 'interpreter' + ? script.interpreter + : script.argv[0]; + if (!executable) { + return makeP2pWorkflowDiagnostic('script_executable_denied', 'execute', { + runId, + nodeId, + fieldPath: 'script.argv[0]', + summary: 'Script command is missing an executable.', + }); + } + // Empty allowlist means "no script execution permitted" (v1a fail-closed + // default until daemon explicitly configures executables). + const allowed = new Set(policy.allowedExecutables); + if (!allowed.has(executable)) { + return makeP2pWorkflowDiagnostic('script_executable_denied', 'execute', { + runId, + nodeId, + fieldPath: 'script.argv[0]', + summary: `Executable ${executable} is not allowlisted by daemon policy.`, + }); + } + return null; +} + +/** Validate that `repoRoot` exists and is a directory. realpath is used so + * the runner refuses to spawn into a symlink target that no longer points to + * a real directory. */ +async function validateRepoRoot( + repoRoot: string, + runId: string, + nodeId: string, +): Promise<{ ok: true; resolved: string } | { ok: false; diagnostic: P2pWorkflowDiagnostic }> { + try { + const resolved = await realpath(repoRoot); + const stats = await stat(resolved); + if (!stats.isDirectory()) { + return { + ok: false, + diagnostic: makeP2pWorkflowDiagnostic('invalid_script_contract', 'bind', { + runId, + nodeId, + fieldPath: 'bindContext.repoRoot', + summary: `repoRoot ${repoRoot} is not a directory.`, + }), + }; + } + return { ok: true, resolved }; + } catch (error) { + return { + ok: false, + diagnostic: makeP2pWorkflowDiagnostic('invalid_script_contract', 'bind', { + runId, + nodeId, + fieldPath: 'bindContext.repoRoot', + summary: `repoRoot ${repoRoot} could not be resolved: ${(error as Error).message ?? String(error)}.`, + }), + }; + } +} + +/** Compute spawn args. For `commandKind === 'argv'`, executable is `argv[0]` + * and args are `argv.slice(1)`. For `commandKind === 'interpreter'`, + * executable is `script.interpreter` and args are the full `argv` (which + * presumably includes the script path the interpreter should run). */ +function deriveSpawnCommand(script: P2pScriptNodeContract): { executable: string; args: string[] } { + if (script.commandKind === 'interpreter') { + return { executable: script.interpreter ?? '', args: [...script.argv] }; + } + return { executable: script.argv[0]!, args: script.argv.slice(1) }; +} + +/** Append data to a buffer up to `maxBytes`. Returns whether the buffer was + * truncated. UTF-8-safe — multi-byte characters are not split. */ +function appendCapped( + buffer: { value: string; byteCount: number }, + chunk: string, + maxBytes: number, +): boolean { + if (buffer.byteCount >= maxBytes) return true; + const chunkBytes = byteLength(chunk); + if (buffer.byteCount + chunkBytes <= maxBytes) { + buffer.value += chunk; + buffer.byteCount += chunkBytes; + return false; + } + const remaining = maxBytes - buffer.byteCount; + const sliced = byteSlice(chunk, remaining); + buffer.value += sliced; + buffer.byteCount += byteLength(sliced); + return true; +} + +/** Send SIGTERM to the process group on POSIX, falling back to single-pid + * on Windows (no process group concept). Errors are swallowed because the + * child may already be dead. */ +function killProcessGroup(child: ChildProcess, signal: NodeJS.Signals): void { + try { + if (!isWindows && typeof child.pid === 'number' && child.pid > 0) { + // process.kill(-pid, signal) targets the entire process group. + process.kill(-child.pid, signal); + } else { + child.kill(signal); + } + } catch { + // Child already exited; nothing to do. + } +} + +/** Run a P2P script node end-to-end (argv-only spawn, env allowlist, + * stdin/stdout/stderr caps, machine-output parsing, timeout/cancel with + * process-group SIGTERM→SIGKILL escalation). + * + * This function never throws — all failures land in `diagnostics` and the + * result's `ok` flag. + * + * Concurrency note: callers MUST acquire/release `acquireScriptSlot` / + * `releaseScriptSlot` from `src/daemon/p2p-workflow-script-concurrency.ts` + * themselves (see header comment). */ +export async function runP2pScriptNode(args: RunP2pScriptNodeArgs): Promise { + const { script, policy, repoRoot, runId, nodeId, signal } = args; + const diagnostics: P2pWorkflowDiagnostic[] = []; + const caps = { + stdinBytes: script.caps?.stdinBytes ?? DEFAULT_P2P_SCRIPT_CAPS.stdinBytes, + stdoutBytes: script.caps?.stdoutBytes ?? DEFAULT_P2P_SCRIPT_CAPS.stdoutBytes, + stderrBytes: script.caps?.stderrBytes ?? DEFAULT_P2P_SCRIPT_CAPS.stderrBytes, + machineOutputBytes: script.caps?.machineOutputBytes ?? DEFAULT_P2P_SCRIPT_CAPS.machineOutputBytes, + }; + + // ── 1. Executable / interpreter policy enforcement (audit:R3 PR-β / V-6) + const denyDiagnostic = checkExecutablePolicy(script, policy, runId, nodeId); + if (denyDiagnostic) { + diagnostics.push(denyDiagnostic); + return failClosedResult(diagnostics); + } + + // ── 2. Repo root validation + const repoResult = await validateRepoRoot(repoRoot, runId, nodeId); + if (!repoResult.ok) { + diagnostics.push(repoResult.diagnostic); + return failClosedResult(diagnostics); + } + const cwd = repoResult.resolved; + + // ── 3. Build spawn args + env + const { executable, args: spawnArgs } = deriveSpawnCommand(script); + const env = buildScriptSpawnEnv(script.envAllowlist); + + // ── 4. Spawn (argv-only — shell flag MUST be false) + let child: ChildProcess; + try { + child = spawn(executable, spawnArgs, { + cwd, + env, + // detached:true on POSIX so a process group exists and we can SIGTERM + // the entire group via `process.kill(-pid, ...)`. Windows has no + // process group concept; child.kill() targets the single process. + detached: !isWindows, + stdio: ['pipe', 'pipe', 'pipe'], + // Critical: shell MUST be false. Audit reverse-regression guard #29. + shell: false, + windowsHide: true, + }); + } catch (error) { + diagnostics.push(makeP2pWorkflowDiagnostic('script_executable_denied', 'execute', { + runId, + nodeId, + fieldPath: 'script.argv[0]', + summary: `Failed to spawn ${executable}: ${(error as Error).message ?? String(error)}.`, + })); + return failClosedResult(diagnostics); + } + + // ── 5. Wire stdin (capped, utf-8-safe) + if (typeof script.stdin === 'string' && child.stdin) { + const stdinPayload = byteSlice(script.stdin, caps.stdinBytes); + try { + child.stdin.write(stdinPayload); + } catch { + // child stdin may already be closed; ignore. + } + try { + child.stdin.end(); + } catch { + // ignore + } + } else if (child.stdin) { + try { child.stdin.end(); } catch { /* ignore */ } + } + + // ── 6. Buffered stdout/stderr capture with caps + const stdout = { value: '', byteCount: 0 }; + const stderr = { value: '', byteCount: 0 }; + const truncated = { stdout: false, stderr: false, machineOutput: false }; + + if (child.stdout) { + child.stdout.setEncoding('utf-8'); + child.stdout.on('data', (chunk: string) => { + if (appendCapped(stdout, chunk, caps.stdoutBytes)) truncated.stdout = true; + }); + } + if (child.stderr) { + child.stderr.setEncoding('utf-8'); + child.stderr.on('data', (chunk: string) => { + if (appendCapped(stderr, chunk, caps.stderrBytes)) truncated.stderr = true; + }); + } + + // ── 7. Wait for exit, with timeout + AbortSignal cooperative cancel + + // process-group SIGTERM→SIGKILL escalation. + const exit: ChildExit = await waitForChild(child, { + timeoutMs: script.timeoutMs, + signal, + diagnostics, + runId, + nodeId, + }); + + // ── 8. Parse machine output ONLY if the contract demands structured frames. + // Spec: stdout/stderr are display-only; ONLY the parsed final frame + // drives routing/variables/artifacts. + let machineOutput: P2pScriptMachineOutputParseResult | undefined; + if (script.requiredMachineOutput) { + machineOutput = parseP2pScriptMachineOutput(stdout.value, { + mode: 'lenient_last_valid', + maxTotalBytes: caps.machineOutputBytes, + maxFrameBytes: DEFAULT_P2P_SCRIPT_MACHINE_OUTPUT_FRAME_MAX_BYTES, + }); + truncated.machineOutput = Boolean(machineOutput.truncated); + diagnostics.push(...machineOutput.diagnostics); + if (!machineOutput.ok && !diagnostics.some((d) => d.code === 'script_machine_output_invalid' && d.severity === 'error')) { + // Defensive — parse helper already emits diagnostics, but make sure a + // failed required parse becomes ok:false. + } + } + + // ── 9. Surface spawn errors (e.g. ENOENT, EACCES) as diagnostics. + if (exit.spawnError) { + diagnostics.push(makeP2pWorkflowDiagnostic('script_executable_denied', 'execute', { + runId, + nodeId, + fieldPath: 'script.argv[0]', + summary: `Spawn error: ${exit.spawnError.message}.`, + })); + } + + const ok = exit.spawnError == null + && exit.signal == null + && exit.exitCode === 0 + && (script.requiredMachineOutput ? Boolean(machineOutput?.ok) : true) + && !diagnostics.some((d) => d.severity === 'error'); + + return { + ok, + exitCode: exit.exitCode, + signal: exit.signal, + stdoutBytes: stdout.byteCount, + stderrBytes: stderr.byteCount, + truncated, + ...(machineOutput ? { machineOutput } : {}), + diagnostics, + }; +} + +/** Wait for the child to exit, honoring `script.timeoutMs` and the caller's + * `AbortSignal`. On timeout/cancel, SIGTERM the process group, wait up to + * `DEFAULT_SIGKILL_ESCALATION_MS`, then SIGKILL. */ +function waitForChild( + child: ChildProcess, + options: { + timeoutMs: number | undefined; + signal: AbortSignal | undefined; + diagnostics: P2pWorkflowDiagnostic[]; + runId: string; + nodeId: string; + }, +): Promise { + return new Promise((resolve) => { + let settled = false; + let spawnError: Error | undefined; + + const finalize = (exitCode: number | null, signal: NodeJS.Signals | null) => { + if (settled) return; + settled = true; + cleanup(); + resolve({ exitCode, signal, ...(spawnError ? { spawnError } : {}) }); + }; + + let timeoutTimer: NodeJS.Timeout | undefined; + let killTimer: NodeJS.Timeout | undefined; + let abortListener: (() => void) | undefined; + + const cleanup = () => { + if (timeoutTimer) clearTimeout(timeoutTimer); + if (killTimer) clearTimeout(killTimer); + if (abortListener && options.signal) { + try { options.signal.removeEventListener('abort', abortListener); } catch { /* ignore */ } + } + }; + + const escalateToKill = () => { + // Already SIGTERMed; if child is still alive after grace period, + // SIGKILL the process group. + killProcessGroup(child, 'SIGKILL'); + }; + + const triggerTermination = (reason: 'timeout' | 'cancelled') => { + if (settled) return; + const code = reason === 'timeout' ? 'script_timeout' : 'script_cancelled'; + options.diagnostics.push(makeP2pWorkflowDiagnostic(code, 'execute', { + runId: options.runId, + nodeId: options.nodeId, + summary: reason === 'timeout' + ? `Script exceeded ${options.timeoutMs} ms timeout; SIGTERM sent to process group.` + : 'Script cancelled by AbortSignal; SIGTERM sent to process group.', + })); + killProcessGroup(child, 'SIGTERM'); + // Schedule SIGKILL escalation if the child does not exit gracefully. + killTimer = setTimeout(escalateToKill, DEFAULT_SIGKILL_ESCALATION_MS); + // Allow the unref so the test process can exit even if the child is + // somehow still alive after SIGKILL (it shouldn't be — but defensive). + try { (killTimer as { unref?: () => void }).unref?.(); } catch { /* ignore */ } + }; + + if (options.timeoutMs && options.timeoutMs > 0) { + timeoutTimer = setTimeout(() => triggerTermination('timeout'), options.timeoutMs); + try { (timeoutTimer as { unref?: () => void }).unref?.(); } catch { /* ignore */ } + } + + if (options.signal) { + if (options.signal.aborted) { + // Already cancelled before we got here — terminate immediately. + triggerTermination('cancelled'); + } else { + abortListener = () => triggerTermination('cancelled'); + try { options.signal.addEventListener('abort', abortListener, { once: true }); } catch { /* ignore */ } + } + } + + child.on('error', (err) => { + spawnError = err; + // 'error' is emitted before 'exit' on spawn failures; ensure we resolve. + finalize(null, null); + }); + + // Use 'close' rather than 'exit': 'exit' fires when the child process + // terminates, but stdio streams may still be draining (especially under + // heavy stdout). 'close' fires after all stdio streams have been closed, + // so any data listeners on stdout/stderr have observed the full output. + child.on('close', (code, signal) => { + finalize(code, signal); + }); + }); +} + +function failClosedResult(diagnostics: P2pWorkflowDiagnostic[]): RunP2pScriptNodeResult { + return { + ok: false, + exitCode: null, + signal: null, + stdoutBytes: 0, + stderrBytes: 0, + truncated: { stdout: false, stderr: false, machineOutput: false }, + diagnostics, + }; +} + +/** Re-export the machine-output kind so callers can compare frame kinds + * without re-importing constants directly. */ +export { P2P_SCRIPT_MACHINE_OUTPUT_KIND }; diff --git a/src/daemon/p2p-workflow-static-policy.ts b/src/daemon/p2p-workflow-static-policy.ts new file mode 100644 index 000000000..0591de219 --- /dev/null +++ b/src/daemon/p2p-workflow-static-policy.ts @@ -0,0 +1,126 @@ +/** + * Daemon-side single source of truth for the runtime `P2pStaticPolicy`. + * + * The smart-p2p-upgrade spec (`design.md` §Static Policy + §Capabilities) + * requires every advanced launch and every dangerous-node recheck to read + * policy from one place rather than constructing ad-hoc permissive overrides + * at the call site. + * + * Design choices for v1a: + * - The policy's allow-flags (`allowOpenSpecArtifacts`, + * `allowImplementationPermission`, `allowInterpreterScripts`) are derived + * from the daemon's currently advertised workflow capabilities — this way + * `daemon.hello` capabilities and the `P2pStaticPolicy` cannot drift apart. + * - `allowedExecutables` is empty by default. The actual allowlist is + * carried by the launch envelope (`P2pWorkflowLaunchEnvelope.allowedExecutables`) + * which is configured in the web UI (`P2pConfigPanel`) — IM.codes is a + * UI-driven product, requiring users to hand-edit a host JSON file to + * enable script execution would be off-product. `prepareAdvancedWorkflowLaunch` + * merges the envelope-supplied allowlist into the policy snapshot used for + * bind validation. + * - The `concurrency` cap is taken from `DEFAULT_P2P_STATIC_POLICY` (which + * in turn comes from `P2P_WORKFLOW_MAX_ACTIVE_RUNS` / + * `P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS`). + * - Fail-closed: when the daemon cannot enumerate its capabilities (`serverLink` + * without `getP2pWorkflowCapabilities`), this returns the strictest policy + * (`[]` capabilities, all dangerous flags off). The launch path will then + * reject with `missing_required_capability` rather than silently granting + * `IMPLEMENTATION` access — see also `recheckDangerousNodeCapabilities`. + */ + +import { + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_INTERPRETER_CAPABILITY_V1, +} from '../../shared/p2p-workflow-constants.js'; +import { buildDefaultP2pStaticPolicy } from '../../shared/p2p-workflow-policy.js'; +import type { P2pStaticPolicy } from '../../shared/p2p-workflow-types.js'; +import type { ServerLink } from './server-link.js'; + +/** + * Daemon capability accessor. Exposed as a function rather than a method so + * tests can supply a hostile mock that omits `getP2pWorkflowCapabilities` and + * verify the fail-closed behavior. + * + * v1a fail-closed policy: when the link does not expose + * `getP2pWorkflowCapabilities`, return `[]` (NOT a hardcoded permissive + * fallback that would grant OpenSpec / implementation access). The advanced + * launch path will then reject with `missing_required_capability` per the + * spec, instead of fail-OPEN. See `audit:N-H2` in the discussion file. + */ +export function getCurrentDaemonWorkflowCapabilities(serverLink: ServerLink): string[] { + if (typeof serverLink.getP2pWorkflowCapabilities === 'function') { + return [...serverLink.getP2pWorkflowCapabilities()].sort(); + } + return []; +} + +/** + * Snapshot of the daemon's most recent `daemon.hello` send. Used by bind to + * record an audit-quality `capabilitySnapshot` for projection rather than + * synthesising `helloEpoch: 0` / `sentAt: Date.now()` placeholders. + * + * The shape mirrors `P2pBindRuntimeContext.capabilitySnapshot`. When the + * underlying `serverLink` does not expose hello-state accessors (mocks / + * legacy test harnesses), we fall back to deterministic placeholders that + * still validate but obviously came from a non-hello source. + */ +export function readCachedHelloSnapshot(serverLink: ServerLink): { + daemonId: string; + capabilities: string[]; + helloEpoch: number; + sentAt: number; +} { + const capabilities = getCurrentDaemonWorkflowCapabilities(serverLink); + const daemonId = typeof serverLink.getServerId === 'function' + ? serverLink.getServerId() + : 'local-daemon'; + const helloEpoch = typeof serverLink.getHelloEpoch === 'function' + ? serverLink.getHelloEpoch() + : 0; + const sentAt = typeof serverLink.getHelloSentAt === 'function' + ? serverLink.getHelloSentAt() + : 0; + return { daemonId, capabilities, helloEpoch, sentAt }; +} + +/** + * Single entry point for "what is the daemon's current static policy?". All + * compile / bind / recheck call sites MUST go through this function so that + * a future change (read from disk / env / config service) only touches one + * place. The reverse-regression suite enforces that the launch path reads + * `staticPolicy.concurrency.maxAdvancedRuns` and that this function never + * hardcodes dangerous allow-flags as permissive defaults. + */ +export function loadDaemonP2pStaticPolicy(serverLink: ServerLink): P2pStaticPolicy { + const caps = new Set(getCurrentDaemonWorkflowCapabilities(serverLink)); + // Audit:R3 PR-β / A3 / V-5 — interpreter execution is a DISTINCT security + // boundary from argv execution (interpreter loads user-controlled script + // files; argv invokes a fixed allowlisted binary). The previous derivation + // OR'd ARGV into `allowInterpreterScripts`, silently upgrading argv-only + // capability to interpreter authority. Now interpreter authority strictly + // requires the interpreter capability. + // + // R3 PR-α follow-up — `allowedExecutables` is intentionally empty here. + // The authoritative list is configured in the web UI (`P2pConfigPanel`), + // carried by `P2pWorkflowLaunchEnvelope.allowedExecutables`, and merged + // into the launch policy snapshot by `prepareAdvancedWorkflowLaunch`. + // Daemon-side hand-edited config (e.g., `~/.imcodes/p2p-policy.json`) is + // explicitly NOT supported — IM.codes is UI-driven; allowlist edits + // belong in the same surface where users configure their workflows. + return buildDefaultP2pStaticPolicy({ + allowedExecutables: [], + allowOpenSpecArtifacts: caps.has(P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1), + allowImplementationPermission: caps.has(P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1), + allowInterpreterScripts: caps.has(P2P_WORKFLOW_SCRIPT_INTERPRETER_CAPABILITY_V1), + }); +} + +/** + * Convenience predicate used by daemon admission / executor branches that + * only need to know whether the base workflow capability is present. + */ +export function daemonAdvertisesBaseWorkflowCapability(serverLink: ServerLink): boolean { + return getCurrentDaemonWorkflowCapabilities(serverLink).includes(P2P_WORKFLOW_CAPABILITY_V1); +} diff --git a/src/daemon/server-link.ts b/src/daemon/server-link.ts index d3aa463fc..5d20285e7 100644 --- a/src/daemon/server-link.ts +++ b/src/daemon/server-link.ts @@ -7,6 +7,13 @@ import { setProviderRegistryServerLink } from '../agent/provider-registry.js'; import { getDefaultAckOutbox } from './ack-outbox.js'; import { getEmbeddingStatus } from '../context/embedding.js'; import type { EmbeddingStatus } from '../../shared/embedding-status.js'; +import { + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + type P2pWorkflowCapability, +} from '../../shared/p2p-workflow-constants.js'; +import { P2P_WORKFLOW_MSG } from '../../shared/p2p-workflow-messages.js'; interface SystemStats { cpu: number; @@ -77,6 +84,13 @@ export class ServerLink { private readonly serverId: string; private readonly token: string; readonly daemonVersion = DAEMON_VERSION; + private helloEpoch = 0; + private lastHelloSentAt = 0; + private p2pWorkflowCapabilities: readonly string[] = [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + ]; constructor(opts: ServerLinkOpts) { this.workerUrl = opts.workerUrl; @@ -124,6 +138,7 @@ export class ServerLink { // Send auth handshake immediately — server closes the socket if this is not // the first message or if credentials are invalid (5s timeout enforced server-side). ws.send(JSON.stringify({ type: 'auth', serverId: this.serverId, token: this.token, daemonVersion: this.daemonVersion })); + this.sendDaemonHello(); // Wire transport relay so provider callbacks can send events to browsers via this socket. setTransportRelaySend((msg) => { try { @@ -214,6 +229,52 @@ export class ServerLink { this.ws.send(JSON.stringify({ ...((msg as object) ?? {}), seq: this.seq })); } + updateP2pWorkflowCapabilities(capabilities: readonly (P2pWorkflowCapability | string)[]): void { + const next = [...new Set(capabilities)].sort(); + if ( + next.length === this.p2pWorkflowCapabilities.length && + next.every((capability, index) => capability === this.p2pWorkflowCapabilities[index]) + ) { + return; + } + this.p2pWorkflowCapabilities = next; + this.sendDaemonHello(); + } + + getP2pWorkflowCapabilities(): readonly string[] { + return [...this.p2pWorkflowCapabilities]; + } + + /** + * Most recent `daemon.hello` epoch sent by this daemon. Bind context stores + * this in `capabilitySnapshot.helloEpoch` so the projection records which + * capability advertisement governed the run, instead of synthesising `0`. + */ + getHelloEpoch(): number { + return this.helloEpoch; + } + + /** + * Wall-clock timestamp (ms) of the most recent `daemon.hello`. Returns 0 + * when no hello has been sent yet (pre-`sendDaemonHello`). + */ + getHelloSentAt(): number { + return this.lastHelloSentAt; + } + + private sendDaemonHello(): void { + const sentAt = Date.now(); + this.helloEpoch++; + this.lastHelloSentAt = sentAt; + this.send({ + type: P2P_WORKFLOW_MSG.DAEMON_HELLO, + daemonId: this.serverId, + capabilities: [...this.p2pWorkflowCapabilities], + helloEpoch: this.helloEpoch, + sentAt, + }); + } + /** Reports whether the underlying WebSocket is currently OPEN. */ isConnected(): boolean { return !!this.ws && this.ws.readyState === WebSocket.OPEN; diff --git a/src/daemon/supervision-automation.ts b/src/daemon/supervision-automation.ts index 1e3ccc565..51faad1f8 100644 --- a/src/daemon/supervision-automation.ts +++ b/src/daemon/supervision-automation.ts @@ -3,7 +3,9 @@ import path from 'node:path'; import { randomUUID } from 'node:crypto'; import { getSession } from '../store/session-store.js'; import { getTransportRuntime } from '../agent/session-manager.js'; -import { startP2pRun, cancelP2pRun, getP2pRun } from './p2p-orchestrator.js'; +import { startP2pRun, cancelP2pRun, getP2pRun, listP2pRuns } from './p2p-orchestrator.js'; +import { loadDaemonP2pStaticPolicy } from './p2p-workflow-static-policy.js'; +import { P2P_TERMINAL_RUN_STATUSES } from '../../shared/p2p-status.js'; import type { ServerLink } from './server-link.js'; import { timelineEmitter } from './timeline-emitter.js'; import { supervisionBroker } from './supervision-broker.js'; @@ -876,15 +878,22 @@ class SupervisionAutomation { ); try { - const started = await startP2pRun({ - initiatorSession: current.sessionName, - targets: [], + // Audit:V-2 / Q1 — supervision auto-audit rounds are synthesised by the + // daemon itself (NOT user input), so they intentionally bypass envelope + // validation. The `advanced: { kind: 'supervision_internal', ... }` + // discriminant makes the bypass explicit in source review and + // reverse-regression checks instead of being detected by a path heuristic. + // + // Audit:R3 hardening / task 10.4 — supervision MUST honour the daemon + // advanced-run admission cap. If the daemon is at + // `P2P_WORKFLOW_MAX_ACTIVE_RUNS`, retry with bounded backoff before + // giving up. Default 3 attempts × 5 s; we don't expose this as + // configuration in v1a because supervision audit cadence is daemon- + // internal and rarely contended. + const started = await this.startSupervisionRunWithBusyRetry({ + sessionName: current.sessionName, userText: baseline.userText, fileContents: baseline.fileContents, - serverLink: this.serverLink, - // modeOverride is intentionally omitted — resolveP2pRoundPlan ignores it - // whenever advancedRounds is non-empty, so leaving it undefined makes the - // single source of routing truth explicit. rounds: auditRounds.length, advancedRounds: auditRounds, }); @@ -898,6 +907,65 @@ class SupervisionAutomation { } } + /** + * Audit:R3 hardening / task 10.4 — supervision auto-audit launches MUST + * respect the daemon advanced-run admission cap. When the daemon is at + * capacity (`P2P_WORKFLOW_MAX_ACTIVE_RUNS` from `loadDaemonP2pStaticPolicy`), + * retry with bounded backoff. Throws on retry exhaustion so the calling + * `try/catch` in the dispatch path triggers normal cleanup. + */ + private async startSupervisionRunWithBusyRetry(args: { + sessionName: string; + userText: string; + fileContents: ReturnType; + rounds: number; + advancedRounds: import('../../shared/p2p-advanced.js').P2pAdvancedRound[]; + }): Promise<{ id: string; discussionId: string }> { + const SUPERVISION_BUSY_ATTEMPTS = 3; + const SUPERVISION_BUSY_DELAY_MS = 5_000; + // `loadDaemonP2pStaticPolicy` only reads `getP2pWorkflowCapabilities` / + // hello accessors; null serverLink degrades gracefully (fail-closed + // policy with no allow flags). Cast keeps the helper's narrow signature. + const policy = loadDaemonP2pStaticPolicy((this.serverLink ?? { getP2pWorkflowCapabilities: () => [] }) as Parameters[0]); + let attempt = 0; + let lastError: unknown = null; + while (attempt < SUPERVISION_BUSY_ATTEMPTS) { + attempt += 1; + const activeAdvancedRuns = listP2pRuns().filter( + (run) => run.advancedP2pEnabled && !P2P_TERMINAL_RUN_STATUSES.has(run.status), + ); + if (activeAdvancedRuns.length >= policy.concurrency.maxAdvancedRuns) { + lastError = new Error(`daemon_busy: ${activeAdvancedRuns.length}/${policy.concurrency.maxAdvancedRuns} active advanced runs`); + if (attempt < SUPERVISION_BUSY_ATTEMPTS) { + logger.warn({ sessionName: args.sessionName, attempt, of: SUPERVISION_BUSY_ATTEMPTS }, 'supervision: daemon at advanced cap, retrying'); + await new Promise((r) => setTimeout(r, SUPERVISION_BUSY_DELAY_MS)); + continue; + } + throw new Error(`Supervision audit launch exhausted ${SUPERVISION_BUSY_ATTEMPTS} daemon_busy retries on session ${args.sessionName}`); + } + try { + return await startP2pRun({ + initiatorSession: args.sessionName, + targets: [], + userText: args.userText, + fileContents: args.fileContents as unknown as Array<{ path: string; content: string }>, + serverLink: this.serverLink, + rounds: args.rounds, + advanced: { + kind: 'supervision_internal', + advancedRounds: args.advancedRounds, + }, + }); + } catch (err) { + lastError = err; + // startP2pRun throws are non-busy; surface immediately. + throw err; + } + } + // Exhausted retries without ever calling startP2pRun. + throw lastError ?? new Error('supervision: launch exhausted retries'); + } + private startAuditPoller(sessionName: string, generation: number, runId: string): void { this.clearPoller(sessionName); const poller = setInterval(() => { diff --git a/test/daemon/p2p-adapter-topology.test.ts b/test/daemon/p2p-adapter-topology.test.ts new file mode 100644 index 000000000..6003b5f75 --- /dev/null +++ b/test/daemon/p2p-adapter-topology.test.ts @@ -0,0 +1,175 @@ +/** + * PR-α (A1 / A2 / W3 / Cu1-N3) — adapter regression tests. + * + * Lock the post-fix invariants of `compiledWorkflowToLegacyAdvancedRounds` + * + the helper trio (`orderCompiledNodesForExecution`, + * `mapCompiledNodeToLegacyRound`, `mapConditionalEdgeToJumpRule`): + * + * - Topological traversal honours `rootNodeId` + DEFAULT edges, not lexical + * id ordering (A2). + * - `nodeKind` / `script` / `routingAuthority` / `artifactConvention` + * propagate through the adapter (A1 / W3). + * - Conditional-edge mapping preserves the raw marker for `PASS|REWORK` and + * only compresses non-{PASS,REWORK} markers to `REWORK` (A8 limit). + * - Each helper is independently invocable, supporting unit-level review + * (Cu1-N3). + */ + +import { describe, expect, it } from 'vitest'; +import type { + P2pCompiledNode, + P2pCompiledWorkflow, + P2pScriptNodeContract, +} from '../../shared/p2p-workflow-types.js'; +import { + mapCompiledNodeToLegacyRound, + mapConditionalEdgeToJumpRule, + orderCompiledNodesForExecution, +} from '../../src/daemon/command-handler.js'; + +function buildScriptContract(overrides: Partial = {}): P2pScriptNodeContract { + return { + commandKind: 'argv', + argv: ['/usr/bin/jq', '.'], + timeoutMs: 5_000, + requireMachineOutput: true, + declaresArtifacts: false, + declaresVariables: false, + ...overrides, + } as P2pScriptNodeContract; +} + +function buildCompiledNode(overrides: Partial = {}): P2pCompiledNode { + return { + id: 'node', + title: 'Node', + nodeKind: 'llm', + preset: 'discussion', + permissionScope: 'analysis_only', + artifacts: [], + routingAuthority: { kind: 'none' }, + ...overrides, + } as P2pCompiledNode; +} + +describe('orderCompiledNodesForExecution (A2 / W3)', () => { + it('walks rootNodeId then DEFAULT edges, not lexical id order', () => { + const workflow: P2pCompiledWorkflow = { + schemaVersion: 1, + workflowId: 'wf', + rootNodeId: 'zzz-root', + nodes: [ + buildCompiledNode({ id: 'aaa-helper' }), + buildCompiledNode({ id: 'bbb-helper' }), + buildCompiledNode({ id: 'zzz-root' }), + ], + edges: [ + { id: 'edge-1', fromNodeId: 'zzz-root', toNodeId: 'aaa-helper', edgeKind: 'default' }, + { id: 'edge-2', fromNodeId: 'aaa-helper', toNodeId: 'bbb-helper', edgeKind: 'default' }, + ], + variables: [], + loopBudgets: {}, + derivedRequiredCapabilities: [], + staticPolicyHash: 'h', + workflowContractHash: 'c', + diagnostics: [], + }; + const ordered = orderCompiledNodesForExecution(workflow); + expect(ordered.map((n) => n.id)).toEqual(['zzz-root', 'aaa-helper', 'bbb-helper']); + }); + + it('appends unreachable nodes in declaration order so legacy projection still surfaces them', () => { + const workflow: P2pCompiledWorkflow = { + schemaVersion: 1, + workflowId: 'wf', + rootNodeId: 'root', + nodes: [ + buildCompiledNode({ id: 'root' }), + buildCompiledNode({ id: 'orphan-z' }), + buildCompiledNode({ id: 'orphan-a' }), + ], + edges: [], + variables: [], + loopBudgets: {}, + derivedRequiredCapabilities: [], + staticPolicyHash: 'h', + workflowContractHash: 'c', + diagnostics: [], + }; + const ordered = orderCompiledNodesForExecution(workflow); + expect(ordered.map((n) => n.id)).toEqual(['root', 'orphan-z', 'orphan-a']); + }); +}); + +describe('mapCompiledNodeToLegacyRound (A1 / W3)', () => { + const baseWorkflow: P2pCompiledWorkflow = { + schemaVersion: 1, + workflowId: 'wf', + rootNodeId: 'node', + nodes: [], + edges: [], + variables: [], + loopBudgets: {}, + derivedRequiredCapabilities: [], + staticPolicyHash: 'h', + workflowContractHash: 'c', + diagnostics: [], + }; + + it('preserves nodeKind / script / routingAuthority on the legacy carrier', () => { + const script = buildScriptContract(); + const compiled = buildCompiledNode({ + id: 'node', + nodeKind: 'script', + script, + routingAuthority: { kind: 'script_routing_key', allowedKeys: ['go-review', 'finish'] }, + }); + const round = mapCompiledNodeToLegacyRound(compiled, { ...baseWorkflow, nodes: [compiled] }); + expect(round.nodeKind).toBe('script'); + expect(round.script).toBe(script); + expect(round.routingAuthority).toEqual({ kind: 'script_routing_key', allowedKeys: ['go-review', 'finish'] }); + }); + + it('preserves artifactConvention from the FIRST artifact contract (W3)', () => { + const compiled = buildCompiledNode({ + id: 'node', + artifacts: [{ convention: 'explicit', paths: ['proposal.md'] }] as P2pCompiledNode['artifacts'], + }); + const round = mapCompiledNodeToLegacyRound(compiled, { ...baseWorkflow, nodes: [compiled] }); + expect(round.artifactConvention).toBe('explicit'); + }); +}); + +describe('mapConditionalEdgeToJumpRule (A8 / Cu1-N3)', () => { + it('returns none + undefined jumpRule when no conditional edge', () => { + const result = mapConditionalEdgeToJumpRule(undefined, {}); + expect(result.verdictPolicy).toBe('none'); + expect(result.jumpRule).toBeUndefined(); + }); + + it('preserves PASS marker', () => { + const result = mapConditionalEdgeToJumpRule( + { id: 'edge-1', fromNodeId: 'a', toNodeId: 'b', edgeKind: 'conditional', condition: { kind: 'verdict_marker_equals', equals: 'PASS' } }, + { 'edge-1': 3 }, + ); + expect(result.verdictPolicy).toBe('forced_rework'); + expect(result.jumpRule).toEqual({ targetRoundId: 'b', marker: 'PASS', minTriggers: 0, maxTriggers: 3 }); + }); + + it('compresses non-PASS markers to REWORK at the legacy boundary (A8 documented limit)', () => { + const result = mapConditionalEdgeToJumpRule( + { id: 'edge-1', fromNodeId: 'a', toNodeId: 'b', edgeKind: 'conditional', condition: { kind: 'verdict_marker_equals', equals: 'INVESTIGATE' } }, + { 'edge-1': 1 }, + ); + expect(result.jumpRule?.marker).toBe('REWORK'); + }); + + it('emits forced_rework without jumpRule when loopBudget is missing', () => { + const result = mapConditionalEdgeToJumpRule( + { id: 'edge-1', fromNodeId: 'a', toNodeId: 'b', edgeKind: 'conditional', condition: { kind: 'verdict_marker_equals', equals: 'PASS' } }, + {}, + ); + expect(result.verdictPolicy).toBe('forced_rework'); + expect(result.jumpRule).toBeUndefined(); + }); +}); diff --git a/test/daemon/p2p-artifact-identity-persistence.test.ts b/test/daemon/p2p-artifact-identity-persistence.test.ts new file mode 100644 index 000000000..79995a89c --- /dev/null +++ b/test/daemon/p2p-artifact-identity-persistence.test.ts @@ -0,0 +1,129 @@ +/** + * R3 v1b follow-up — Artifact identity persistence across daemon restart. + * + * Verifies: + * - `freezeP2pArtifactIdentity` writes `~/.imcodes/runs//identity.json` + * (atomic via .tmp → rename) for both `openspec_convention` and + * `explicit_paths` contracts + * - `loadPersistedFrozenP2pArtifactIdentities` rehydrates the in-memory + * map and skips malformed / mismatched-schema entries silently + * - the rehydrated identity is returned by `getFrozenP2pArtifactIdentity` + * so the next freeze call short-circuits (i.e., slug-N is preserved + * across restart) + */ +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { + P2P_RUN_STATE_DIR_ENV, + __resetP2pArtifactIdentitiesForTests, + freezeP2pArtifactIdentity, + getFrozenP2pArtifactIdentity, + loadPersistedFrozenP2pArtifactIdentities, +} from '../../src/daemon/p2p-workflow-artifact-runtime.js'; + +const SAVED_ENV = process.env[P2P_RUN_STATE_DIR_ENV]; +let runStateRoot: string; +let repoRoot: string; + +beforeEach(() => { + __resetP2pArtifactIdentitiesForTests(); + runStateRoot = mkdtempSync(join(tmpdir(), 'imcodes-test-p2p-workflow-runs-')); + repoRoot = mkdtempSync(join(tmpdir(), 'imcodes-test-p2p-workflow-repo-')); + process.env[P2P_RUN_STATE_DIR_ENV] = runStateRoot; +}); + +afterEach(() => { + __resetP2pArtifactIdentitiesForTests(); + if (SAVED_ENV === undefined) delete process.env[P2P_RUN_STATE_DIR_ENV]; + else process.env[P2P_RUN_STATE_DIR_ENV] = SAVED_ENV; + rmSync(runStateRoot, { recursive: true, force: true }); + rmSync(repoRoot, { recursive: true, force: true }); +}); + +describe('artifact identity persistence', () => { + it('writes identity.json after freezing an explicit_paths contract', async () => { + const identity = await freezeP2pArtifactIdentity({ + contract: { convention: 'explicit_paths', paths: ['proposal.md'] }, + repoRoot, + runId: 'run-explicit-1', + }); + expect(identity.convention).toBe('explicit_paths'); + const filePath = join(runStateRoot, 'run-explicit-1', 'identity.json'); + // Persistence is fire-and-forget; allow a microtask tick to settle. + await new Promise((resolve) => setTimeout(resolve, 50)); + const persisted = JSON.parse(readFileSync(filePath, 'utf8')) as { schemaVersion: number; identity: unknown }; + expect(persisted.schemaVersion).toBe(1); + expect(persisted.identity).toMatchObject({ convention: 'explicit_paths', openspecArtifactPaths: ['proposal.md'] }); + }); + + it('loadPersistedFrozenP2pArtifactIdentities rehydrates after reset (simulated daemon restart)', async () => { + await freezeP2pArtifactIdentity({ + contract: { convention: 'explicit_paths', paths: ['proposal.md'] }, + repoRoot, + runId: 'run-rehydrate-1', + }); + await new Promise((resolve) => setTimeout(resolve, 50)); + // Simulate restart: drop in-memory state without touching the disk file. + __resetP2pArtifactIdentitiesForTests(); + expect(getFrozenP2pArtifactIdentity('run-rehydrate-1')).toBeUndefined(); + const loaded = await loadPersistedFrozenP2pArtifactIdentities(); + expect(loaded).toBe(1); + const rehydrated = getFrozenP2pArtifactIdentity('run-rehydrate-1'); + expect(rehydrated?.convention).toBe('explicit_paths'); + expect(rehydrated?.openspecArtifactPaths).toEqual(['proposal.md']); + }); + + it('subsequent freeze for the same runId short-circuits to the rehydrated identity', async () => { + const first = await freezeP2pArtifactIdentity({ + contract: { convention: 'explicit_paths', paths: ['proposal.md'] }, + repoRoot, + runId: 'run-stable-id', + }); + await new Promise((resolve) => setTimeout(resolve, 50)); + __resetP2pArtifactIdentitiesForTests(); + await loadPersistedFrozenP2pArtifactIdentities(); + const second = await freezeP2pArtifactIdentity({ + contract: { convention: 'explicit_paths', paths: ['proposal.md', 'never-merged.md'] }, + repoRoot, + runId: 'run-stable-id', + }); + // Second call MUST short-circuit to the persisted identity even though + // the contract paths differ — that's the spec invariant. + expect(second.frozenAt).toBe(first.frozenAt); + expect(second.openspecArtifactPaths).toEqual(['proposal.md']); + }); + + it('skips malformed persisted entries silently', async () => { + const dir = join(runStateRoot, 'run-bad-1'); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, 'identity.json'), 'not valid json{', 'utf8'); + // Also drop a wrong-schema entry. + const dir2 = join(runStateRoot, 'run-bad-2'); + mkdirSync(dir2, { recursive: true }); + writeFileSync(join(dir2, 'identity.json'), JSON.stringify({ schemaVersion: 99, identity: {} }), 'utf8'); + const loaded = await loadPersistedFrozenP2pArtifactIdentities(); + expect(loaded).toBe(0); + expect(getFrozenP2pArtifactIdentity('run-bad-1')).toBeUndefined(); + expect(getFrozenP2pArtifactIdentity('run-bad-2')).toBeUndefined(); + }); + + it('skips entries whose runId directory name fails the [A-Za-z0-9_-] sanity check', async () => { + // Subdirectory with a path-traversal name should never match the regex, + // so the loader ignores it. + const dir = join(runStateRoot, '..bad..'); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, 'identity.json'), JSON.stringify({ + schemaVersion: 1, + identity: { convention: 'explicit_paths', openspecArtifactPaths: [], frozenAt: '', collisionResolved: false, diagnostics: [] }, + }), 'utf8'); + expect(await loadPersistedFrozenP2pArtifactIdentities()).toBe(0); + }); + + it('returns 0 when the run state directory does not exist', async () => { + rmSync(runStateRoot, { recursive: true, force: true }); + expect(await loadPersistedFrozenP2pArtifactIdentities()).toBe(0); + }); +}); diff --git a/test/daemon/p2p-artifact-persistence-hardening.test.ts b/test/daemon/p2p-artifact-persistence-hardening.test.ts new file mode 100644 index 000000000..228e9c2a0 --- /dev/null +++ b/test/daemon/p2p-artifact-persistence-hardening.test.ts @@ -0,0 +1,129 @@ +/** + * R3 v2 PR-ζ — Artifact identity persistence hardening tests. + * + * Pins the new defenses on top of v1b's basic round-trip: + * - resolveRunStateDir env containment (B4) + * - persistFrozenIdentity tmp PID suffix (B2) + * - rehydrate symlink reject + path re-validate + repoRoot containment + + * count cap + TTL eviction (A2 / A3 / A4 / B3 / O5) + * - clearPersistedFrozenP2pArtifactIdentity removes both memory + disk + */ +import { mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync, symlinkSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { + P2P_RUN_STATE_DIR_ENV, + __resetP2pArtifactIdentitiesForTests, + clearPersistedFrozenP2pArtifactIdentity, + freezeP2pArtifactIdentity, + getFrozenP2pArtifactIdentity, + loadPersistedFrozenP2pArtifactIdentities, +} from '../../src/daemon/p2p-workflow-artifact-runtime.js'; + +const SAVED_ENV = process.env[P2P_RUN_STATE_DIR_ENV]; +let runStateRoot: string; +let repoRoot: string; + +beforeEach(() => { + __resetP2pArtifactIdentitiesForTests(); + runStateRoot = mkdtempSync(join(tmpdir(), 'imcodes-test-p2p-workflow-runs-')); + repoRoot = mkdtempSync(join(tmpdir(), 'imcodes-test-p2p-workflow-repo-')); + process.env[P2P_RUN_STATE_DIR_ENV] = runStateRoot; +}); + +afterEach(() => { + __resetP2pArtifactIdentitiesForTests(); + if (SAVED_ENV === undefined) delete process.env[P2P_RUN_STATE_DIR_ENV]; + else process.env[P2P_RUN_STATE_DIR_ENV] = SAVED_ENV; + rmSync(runStateRoot, { recursive: true, force: true }); + rmSync(repoRoot, { recursive: true, force: true }); +}); + +describe('PR-ζ persistence hardening', () => { + it('persistFrozenIdentity uses a PID-suffixed tmp filename (B2)', async () => { + await freezeP2pArtifactIdentity({ + contract: { convention: 'explicit_paths', paths: ['proposal.md'] }, + repoRoot, + runId: 'run-pid-tmp', + }); + // Persistence is fire-and-forget; allow microtasks to settle. + await new Promise((resolve) => setTimeout(resolve, 50)); + const files = readdirSync(join(runStateRoot, 'run-pid-tmp')); + // Final file is `identity.json`. Tmp files (if observable) include the + // pid pattern. We cannot easily race two writes inside one test, so + // we assert the FINAL file exists AND no leftover .tmp lingers (tmp + // is renamed atomically). + expect(files).toContain('identity.json'); + expect(files.filter((f) => f.endsWith('.tmp'))).toEqual([]); + }); + + it('clearPersistedFrozenP2pArtifactIdentity removes both memory and disk (A2)', async () => { + await freezeP2pArtifactIdentity({ + contract: { convention: 'explicit_paths', paths: ['proposal.md'] }, + repoRoot, + runId: 'run-clear', + }); + await new Promise((resolve) => setTimeout(resolve, 50)); + expect(getFrozenP2pArtifactIdentity('run-clear')).toBeDefined(); + await clearPersistedFrozenP2pArtifactIdentity('run-clear'); + expect(getFrozenP2pArtifactIdentity('run-clear')).toBeUndefined(); + const dir = join(runStateRoot, 'run-clear'); + expect(() => readFileSync(join(dir, 'identity.json'), 'utf8')).toThrow(); + }); + + it('rehydrate skips symlink top-level entries (A3)', async () => { + // Create a real entry first. + const realDir = join(runStateRoot, 'real-entry'); + mkdirSync(realDir, { recursive: true }); + writeFileSync(join(realDir, 'identity.json'), JSON.stringify({ + schemaVersion: 1, + identity: { convention: 'explicit_paths', openspecArtifactPaths: ['proposal.md'], frozenAt: new Date().toISOString(), collisionResolved: false, diagnostics: [] }, + }), 'utf8'); + // Symlink another entry name to it. + try { + symlinkSync(realDir, join(runStateRoot, 'symlink-entry')); + } catch (error) { + // Some test sandboxes disallow symlinks; skip the case in that scenario. + if ((error as NodeJS.ErrnoException).code === 'EPERM') return; + throw error; + } + const loaded = await loadPersistedFrozenP2pArtifactIdentities(); + expect(loaded).toBe(1); // only the real entry + expect(getFrozenP2pArtifactIdentity('real-entry')).toBeDefined(); + expect(getFrozenP2pArtifactIdentity('symlink-entry')).toBeUndefined(); + }); + + it('rehydrate drops identity whose declared path fails validation when repoRoot is supplied (A4)', async () => { + const dir = join(runStateRoot, 'bad-paths'); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, 'identity.json'), JSON.stringify({ + schemaVersion: 1, + identity: { + convention: 'openspec_convention', + openspecArtifactPaths: ['../../etc/passwd'], + frozenAt: new Date().toISOString(), + collisionResolved: false, + diagnostics: [], + }, + }), 'utf8'); + const loaded = await loadPersistedFrozenP2pArtifactIdentities({ repoRoot }); + expect(loaded).toBe(0); + expect(getFrozenP2pArtifactIdentity('bad-paths')).toBeUndefined(); + }); + + it('rehydrate cleans up .tmp orphans (B3)', async () => { + const dir = join(runStateRoot, 'tmp-orphan'); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, 'identity.json'), JSON.stringify({ + schemaVersion: 1, + identity: { convention: 'explicit_paths', openspecArtifactPaths: ['proposal.md'], frozenAt: new Date().toISOString(), collisionResolved: false, diagnostics: [] }, + }), 'utf8'); + writeFileSync(join(dir, 'identity.json.42.99999.abc.tmp'), '{partial', 'utf8'); + await loadPersistedFrozenP2pArtifactIdentities(); + const remaining = readdirSync(dir); + expect(remaining.filter((f) => f.endsWith('.tmp'))).toEqual([]); + expect(remaining).toContain('identity.json'); + }); +}); diff --git a/test/daemon/p2p-discussion-list.test.ts b/test/daemon/p2p-discussion-list.test.ts index e2012de78..200f55fba 100644 --- a/test/daemon/p2p-discussion-list.test.ts +++ b/test/daemon/p2p-discussion-list.test.ts @@ -2,9 +2,28 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { mkdtemp, mkdir, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; + +// We mock the p2p-orchestrator so the scope-filter tests below can inject +// synthetic runs without booting the full orchestrator. The earlier +// list_discussions tests are not affected because they exercise the file +// system, not in-memory runs (other than handleP2pReadDiscussion's run lookup, +// which gracefully falls back to file reads when listP2pRuns returns empty). +const mockListP2pRuns = vi.fn(() => [] as Array>); +const mockGetP2pRun = vi.fn((_id: string) => undefined as Record | undefined); +vi.mock('../../src/daemon/p2p-orchestrator.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + listP2pRuns: (...args: Parameters) => mockListP2pRuns(...args) as ReturnType, + getP2pRun: (id: string) => mockGetP2pRun(id) as ReturnType, + serializeP2pRun: (run: Record) => ({ id: run.id, status: run.status, contextFilePath: run.contextFilePath }), + }; +}); + import { handleWebCommand } from '../../src/daemon/command-handler.js'; import { imcSubDir } from '../../src/util/imc-dir.js'; import { listSessions, removeSession, upsertSession } from '../../src/store/session-store.js'; +import { P2P_WORKFLOW_MSG } from '../../shared/p2p-workflow-messages.js'; const sent: unknown[] = []; const serverLink = { @@ -21,13 +40,16 @@ async function waitForSentCount(count: number): Promise { describe('p2p.list_discussions', () => { let projectDir: string; + let otherProjectDir: string; beforeEach(async () => { vi.clearAllMocks(); sent.length = 0; serverLink.send.mockImplementation((msg: unknown) => { sent.push(msg); }); projectDir = await mkdtemp(join(tmpdir(), 'imcodes-p2p-discussions-')); + otherProjectDir = await mkdtemp(join(tmpdir(), 'imcodes-p2p-discussions-other-')); await mkdir(imcSubDir(projectDir, 'discussions'), { recursive: true }); + await mkdir(imcSubDir(otherProjectDir, 'discussions'), { recursive: true }); upsertSession({ name: 'deck_proj_brain', projectName: 'proj', @@ -40,11 +62,24 @@ describe('p2p.list_discussions', () => { createdAt: Date.now(), updatedAt: Date.now(), }); + upsertSession({ + name: 'deck_other_brain', + projectName: 'other', + role: 'brain', + agentType: 'claude-code', + projectDir: otherProjectDir, + state: 'idle', + restarts: 0, + restartTimestamps: [], + createdAt: Date.now(), + updatedAt: Date.now(), + }); }); afterEach(async () => { for (const session of listSessions()) removeSession(session.name); if (projectDir) await rm(projectDir, { recursive: true, force: true }); + if (otherProjectDir) await rm(otherProjectDir, { recursive: true, force: true }); }); it('returns only the canonical discussion file and excludes hop artifacts', async () => { @@ -54,12 +89,17 @@ describe('p2p.list_discussions', () => { await writeFile(join(discussionsDir, 'run-main.round1.hop2.md'), '## User Request\nhop 2\n', 'utf8'); await writeFile(join(discussionsDir, 'run-main.reducer.2.md'), '# reducer snapshot\n', 'utf8'); - handleWebCommand({ type: 'p2p.list_discussions' }, serverLink as any); + handleWebCommand({ + type: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS, + requestId: 'p2p-list-1', + scope: { sessionName: 'deck_proj_brain' }, + }, serverLink as any); await waitForSentCount(1); expect(sent).toHaveLength(1); expect(sent[0]).toMatchObject({ - type: 'p2p.list_discussions_response', + type: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS_RESPONSE, + requestId: 'p2p-list-1', discussions: [ expect.objectContaining({ id: 'run-main', @@ -70,5 +110,165 @@ describe('p2p.list_discussions', () => { }); const response = sent[0] as { discussions: Array<{ fileName: string }> }; expect(response.discussions.map((d) => d.fileName)).toEqual(['run-main.md']); + expect(response.discussions[0]).not.toHaveProperty('path'); + }); + + it('does not list or read discussions across project scope', async () => { + await writeFile(join(imcSubDir(projectDir, 'discussions'), 'run-main.md'), '## User Request\nmain request\n', 'utf8'); + await writeFile(join(imcSubDir(otherProjectDir, 'discussions'), 'run-secret.md'), '## User Request\nsecret request\n', 'utf8'); + + handleWebCommand({ + type: P2P_WORKFLOW_MSG.LIST_DISCUSSIONS, + requestId: 'p2p-list-scope', + scope: { sessionName: 'deck_proj_brain' }, + }, serverLink as any); + await waitForSentCount(1); + + expect((sent[0] as { discussions: Array<{ id: string }> }).discussions.map((entry) => entry.id)).toEqual(['run-main']); + + handleWebCommand({ + type: P2P_WORKFLOW_MSG.READ_DISCUSSION, + requestId: 'p2p-read-scope', + id: 'run-secret', + scope: { sessionName: 'deck_proj_brain' }, + }, serverLink as any); + await waitForSentCount(2); + + expect(sent[1]).toMatchObject({ + type: P2P_WORKFLOW_MSG.READ_DISCUSSION_RESPONSE, + requestId: 'p2p-read-scope', + id: 'run-secret', + error: 'not_found', + }); + }); +}); + +describe('p2p.status', () => { + let projectDir: string; + let otherProjectDir: string; + + beforeEach(async () => { + vi.clearAllMocks(); + sent.length = 0; + serverLink.send.mockImplementation((msg: unknown) => { sent.push(msg); }); + mockListP2pRuns.mockReturnValue([]); + mockGetP2pRun.mockReturnValue(undefined); + projectDir = await mkdtemp(join(tmpdir(), 'imcodes-p2p-status-')); + otherProjectDir = await mkdtemp(join(tmpdir(), 'imcodes-p2p-status-other-')); + await mkdir(imcSubDir(projectDir, 'discussions'), { recursive: true }); + await mkdir(imcSubDir(otherProjectDir, 'discussions'), { recursive: true }); + upsertSession({ + name: 'deck_proj_brain', + projectName: 'proj', + role: 'brain', + agentType: 'claude-code', + projectDir, + state: 'idle', + restarts: 0, + restartTimestamps: [], + createdAt: Date.now(), + updatedAt: Date.now(), + }); + upsertSession({ + name: 'deck_other_brain', + projectName: 'other', + role: 'brain', + agentType: 'claude-code', + projectDir: otherProjectDir, + state: 'idle', + restarts: 0, + restartTimestamps: [], + createdAt: Date.now(), + updatedAt: Date.now(), + }); + }); + + afterEach(async () => { + for (const session of listSessions()) removeSession(session.name); + if (projectDir) await rm(projectDir, { recursive: true, force: true }); + if (otherProjectDir) await rm(otherProjectDir, { recursive: true, force: true }); + }); + + it('echoes requestId on status responses for bridge singlecast routing', async () => { + handleWebCommand({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: 'p2p-status-1', + scope: { sessionName: 'deck_proj_brain' }, + }, serverLink as any); + await waitForSentCount(1); + + expect(sent[0]).toMatchObject({ + type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, + requestId: 'p2p-status-1', + runs: expect.any(Array), + }); + }); + + it('handleP2pStatus rejects request without scope', async () => { + // Even with runs present in memory, an unscoped request must fail closed. + mockListP2pRuns.mockReturnValue([ + { id: 'run-a', status: 'queued', contextFilePath: join(imcSubDir(projectDir, 'discussions'), 'run-a.md'), initiatorSession: 'deck_proj_brain' }, + ]); + + handleWebCommand({ type: P2P_WORKFLOW_MSG.STATUS, requestId: 'p2p-status-no-scope' }, serverLink as any); + await waitForSentCount(1); + + expect(sent[0]).toMatchObject({ + type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, + requestId: 'p2p-status-no-scope', + runs: [], + }); + }); + + it('handleP2pStatus filters runs to scope project', async () => { + mockListP2pRuns.mockReturnValue([ + { + id: 'run-in-scope', + status: 'queued', + contextFilePath: join(imcSubDir(projectDir, 'discussions'), 'run-in-scope.md'), + initiatorSession: 'deck_proj_brain', + }, + { + id: 'run-other', + status: 'queued', + contextFilePath: join(imcSubDir(otherProjectDir, 'discussions'), 'run-other.md'), + initiatorSession: 'deck_other_brain', + }, + ]); + + handleWebCommand({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: 'p2p-status-filter', + scope: { sessionName: 'deck_proj_brain' }, + }, serverLink as any); + await waitForSentCount(1); + + const response = sent[0] as { runs: Array<{ id: string }> }; + expect(response.runs.map((r) => r.id)).toEqual(['run-in-scope']); + }); + + it('handleP2pStatus with runId outside scope returns null run', async () => { + const outOfScopeRun = { + id: 'run-other', + status: 'queued', + contextFilePath: join(imcSubDir(otherProjectDir, 'discussions'), 'run-other.md'), + initiatorSession: 'deck_other_brain', + }; + mockGetP2pRun.mockImplementation((id: string) => (id === 'run-other' ? outOfScopeRun : undefined)); + + handleWebCommand({ + type: P2P_WORKFLOW_MSG.STATUS, + requestId: 'p2p-status-runid-deny', + runId: 'run-other', + scope: { sessionName: 'deck_proj_brain' }, + }, serverLink as any); + await waitForSentCount(1); + + expect(sent[0]).toMatchObject({ + type: P2P_WORKFLOW_MSG.STATUS_RESPONSE, + requestId: 'p2p-status-runid-deny', + runId: 'run-other', + run: null, + }); }); }); diff --git a/test/daemon/p2p-discussion-writer-queue.test.ts b/test/daemon/p2p-discussion-writer-queue.test.ts new file mode 100644 index 000000000..765338ebf --- /dev/null +++ b/test/daemon/p2p-discussion-writer-queue.test.ts @@ -0,0 +1,90 @@ +/** + * R3 v1b follow-up — Per-run discussion-file write queue tests. + * + * Verifies the queue: + * - is non-blocking: enqueue returns synchronously + * - serialises writes per file path + * - drops oldest pending segments under backpressure (with logger.warn) + * - flushes deterministically via flushP2pDiscussionWriteQueue + * - surfaces failures via the per-call onWriteFailure listener + */ + +import { mkdtempSync, readFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { + P2P_DISCUSSION_WRITE_QUEUE_MAX_BYTES, + __resetP2pDiscussionWriteQueueForTests, + enqueueP2pDiscussionWrite, + flushP2pDiscussionWriteQueue, +} from '../../src/daemon/p2p-discussion-writer.js'; + +let tmpRoot: string; +let filePath: string; + +beforeEach(() => { + tmpRoot = mkdtempSync(join(tmpdir(), 'imcodes-test-p2p-workflow-writer-')); + filePath = join(tmpRoot, 'discussion.md'); + __resetP2pDiscussionWriteQueueForTests(); +}); + +afterEach(() => { + __resetP2pDiscussionWriteQueueForTests(); + rmSync(tmpRoot, { recursive: true, force: true }); +}); + +describe('p2p discussion writer queue', () => { + it('enqueue returns synchronously and writes occur in the background', async () => { + const t0 = Date.now(); + enqueueP2pDiscussionWrite(filePath, 'segment-a\n'); + enqueueP2pDiscussionWrite(filePath, 'segment-b\n'); + enqueueP2pDiscussionWrite(filePath, 'segment-c\n'); + expect(Date.now() - t0).toBeLessThan(50); + await flushP2pDiscussionWriteQueue(filePath); + const content = readFileSync(filePath, 'utf8'); + expect(content).toBe('segment-a\nsegment-b\nsegment-c\n'); + }); + + it('preserves segment ordering across rapid enqueues', async () => { + for (let i = 0; i < 50; i += 1) enqueueP2pDiscussionWrite(filePath, `${i}\n`); + await flushP2pDiscussionWriteQueue(filePath); + const lines = readFileSync(filePath, 'utf8').split('\n').filter(Boolean); + expect(lines).toEqual(Array.from({ length: 50 }, (_, i) => String(i))); + }); + + it('drops oldest pending segments when the queue exceeds the byte cap', async () => { + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + try { + // First write: large enough to keep one in flight while we backfill. + const huge = 'x'.repeat(P2P_DISCUSSION_WRITE_QUEUE_MAX_BYTES); + enqueueP2pDiscussionWrite(filePath, huge); + // Push more segments than the queue can hold; oldest should be dropped. + for (let i = 0; i < 5; i += 1) { + enqueueP2pDiscussionWrite(filePath, 'x'.repeat(P2P_DISCUSSION_WRITE_QUEUE_MAX_BYTES)); + } + await flushP2pDiscussionWriteQueue(filePath); + const stat = readFileSync(filePath); + // The exact contents depend on draining timing but the file SHALL + // remain well under (cap × number of enqueues) bytes. + expect(stat.byteLength).toBeLessThanOrEqual(P2P_DISCUSSION_WRITE_QUEUE_MAX_BYTES * 6); + } finally { + warnSpy.mockRestore(); + } + }); + + it('invokes onWriteFailure with the underlying error when the file cannot be written', async () => { + const onFail = vi.fn(); + const badPath = join(tmpRoot, 'no-such-dir', 'discussion.md'); + enqueueP2pDiscussionWrite(badPath, 'will fail\n', onFail); + await flushP2pDiscussionWriteQueue(badPath); + expect(onFail).toHaveBeenCalled(); + const error = onFail.mock.calls[0]?.[0]; + expect(error).toBeInstanceOf(Error); + }); + + it('flush before any enqueue resolves immediately', async () => { + await expect(flushP2pDiscussionWriteQueue(filePath)).resolves.toBeUndefined(); + }); +}); diff --git a/test/daemon/p2p-parser.test.ts b/test/daemon/p2p-parser.test.ts index 1aba36a66..3ceaf39f3 100644 --- a/test/daemon/p2p-parser.test.ts +++ b/test/daemon/p2p-parser.test.ts @@ -250,10 +250,22 @@ describe('parseAtTokens', () => { // ── Structured WS field routing (no inline @@tokens) ────────────────────────── describe('structured P2P routing via WS fields', () => { + // Audit:N-H2 / N4 — `getP2pWorkflowCapabilities` MUST be supplied so the + // daemon static policy reflects the dangerous capabilities required by + // the test's advanced launch (which uses preset 'implementation'). Without + // it, fail-closed fallback returns `[]` and compile rejects the implementation + // node — that is the desired production behavior. const mockServerLink = { send: vi.fn(), sendTimelineEvent: vi.fn(), getServerId: vi.fn(() => 'srv-main'), + getP2pWorkflowCapabilities: vi.fn(() => [ + 'p2p.workflow.v1', + 'p2p.workflow.openspec-artifacts.v1', + 'p2p.workflow.implementation.v1', + ]), + getHelloEpoch: vi.fn(() => 1), + getHelloSentAt: vi.fn(() => 1_700_000_000_000), daemonVersion: '0.1.0', }; @@ -604,18 +616,32 @@ describe('structured P2P routing via WS fields', () => { expect(startP2pRun).toHaveBeenCalledOnce(); expect((startP2pRun as ReturnType).mock.calls[0]).toHaveLength(1); - expect((startP2pRun as ReturnType).mock.calls[0]?.[0]).toMatchObject({ + // Audit:V-1 / N-H1 — old top-level advanced fields are materialized through + // `prepareAdvancedWorkflowLaunch`, then forwarded as the typed `advanced` + // discriminated union so the orchestrator surfaces capabilitySnapshot/policy + // on the run state. The compiled rounds end up under `advanced.advancedRounds`, + // and the legacy `advancedPresetKey` is set to 'openspec' to mark the + // compiled-from-envelope path inside the orchestrator's resolveP2pRoundPlan. + const startCall = (startP2pRun as ReturnType).mock.calls[0]?.[0]; + expect(startCall).toMatchObject({ initiatorSession: 'deck_proj_brain', targets: [{ session: 'deck_proj_w1', mode: 'audit' }], userText: 'run advanced p2p', advancedPresetKey: 'openspec', - advancedRounds, - advancedRunTimeoutMs: 45 * 60_000, - contextReducer: { - mode: 'clone_sdk_session', - templateSession: 'deck_proj_brain', + advanced: { + kind: 'envelope_compiled', + advancedRunTimeoutMs: 45 * 60_000, + contextReducer: { + mode: 'clone_sdk_session', + templateSession: 'deck_proj_brain', + }, }, }); + // Sanity: the bound workflow must be present so the orchestrator can store + // capabilitySnapshot / currentDaemonPolicy on the P2pRun. + expect(startCall?.advanced?.bound).toBeDefined(); + // The compiled rounds match the input shape (single 'implementation' round). + expect(startCall?.advanced?.advancedRounds).toHaveLength(1); }); it('forwards the selected i18n locale to the P2P run for final-summary prompting', async () => { diff --git a/test/daemon/p2p-prototype-pollution.test.ts b/test/daemon/p2p-prototype-pollution.test.ts new file mode 100644 index 000000000..82bddbc6a --- /dev/null +++ b/test/daemon/p2p-prototype-pollution.test.ts @@ -0,0 +1,80 @@ +/** + * R3 v2 PR-ζ — Prototype-pollution + variable-cap regression tests. + * + * Pins the runtime semantics of the orchestrator's variable write path + * (defence-in-depth alongside the parser's regex / size caps and the + * compile-time logic identifier validator). + */ +import { describe, expect, it } from 'vitest'; +import { + P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENTS, + P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENT_BYTES, + P2P_WORKFLOW_VARIABLE_NAME_PATTERN, +} from '../../shared/p2p-workflow-constants.js'; +import { + evaluateP2pLogic, + validateP2pLogicContract, +} from '../../shared/p2p-workflow-logic-evaluator.js'; + +describe('P2P workflow variable name pattern (R3 v2 PR-ζ ζ-2)', () => { + it('matches lowercase identifiers up to 64 chars', () => { + expect(P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test('verdict')).toBe(true); + expect(P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test('round_count')).toBe(true); + expect(P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test('a')).toBe(true); + expect(P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test('a'.repeat(64))).toBe(true); + }); + + it('rejects prototype-pollution names', () => { + expect(P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test('__proto__')).toBe(false); + // `constructor` starts with lowercase letter so it WOULD match the + // base pattern — but the orchestrator and logic evaluator reject it + // explicitly via a deny-set. We document here that the pattern alone + // cannot rule it out. + expect(P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test('constructor')).toBe(true); + }); + + it('rejects uppercase, leading digit, and over-length names', () => { + expect(P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test('Verdict')).toBe(false); + expect(P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test('1tag')).toBe(false); + expect(P2P_WORKFLOW_VARIABLE_NAME_PATTERN.test('a'.repeat(65))).toBe(false); + }); + + it('exposes the documented array caps', () => { + expect(P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENTS).toBe(64); + expect(P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENT_BYTES).toBe(8 * 1024); + }); +}); + +describe('Logic evaluator prototype-key defence (R3 v2 PR-ζ ζ-12)', () => { + it('compile-time validator rejects __proto__ / constructor / prototype', () => { + for (const bad of ['__proto__', 'constructor', 'prototype']) { + const diags = validateP2pLogicContract({ + rules: [{ if: { kind: 'variable_equals', name: bad, equals: 'x' }, emit: 'go' }], + default: 'no', + }); + expect(diags.find((d) => d.fieldPath.endsWith('.if.name'))).toBeDefined(); + } + }); + + it('runtime evaluator returns false for prototype-key reads even if a hostile contract slips past validation', () => { + // Bypass the validator and feed a hostile contract directly. + const hostile = { + rules: [{ if: { kind: 'variable_equals' as const, name: '__proto__', equals: '[object Object]' }, emit: 'pollute' }], + default: 'safe', + }; + const result = evaluateP2pLogic(hostile, {}); + expect(result.marker).toBe('safe'); + expect(result.matchedRuleIndex).toBe(-1); + }); +}); + +describe('Logic evaluator stable array stringification (R3 v2 PR-ζ ζ-13)', () => { + it('canonical JSON encoding distinguishes ["a","b"] from ["a,b"]', () => { + const contract = { + rules: [{ if: { kind: 'variable_equals' as const, name: 'tags', equals: '["a","b"]' }, emit: 'pair' }], + default: 'no', + }; + expect(evaluateP2pLogic(contract, { tags: ['a', 'b'] }).marker).toBe('pair'); + expect(evaluateP2pLogic(contract, { tags: ['a,b'] }).marker).toBe('no'); + }); +}); diff --git a/test/daemon/p2p-script-runner-sandbox.test.ts b/test/daemon/p2p-script-runner-sandbox.test.ts new file mode 100644 index 000000000..a08239da9 --- /dev/null +++ b/test/daemon/p2p-script-runner-sandbox.test.ts @@ -0,0 +1,66 @@ +/** + * R3 v1b follow-up — Script runner sandbox hardening unit tests. + * + * Locks the env deny-list (`P2P_SCRIPT_ENV_DENYLIST`) so dynamic-loader + * hooks can never reach the spawned script even when the workflow author + * allowlists them. + */ +import { afterEach, describe, expect, it } from 'vitest'; +import { + P2P_SCRIPT_ENV_DENYLIST, + buildScriptSpawnEnv, +} from '../../src/daemon/p2p-workflow-script-runner.js'; + +const SAVED_ENV: Record = {}; + +afterEach(() => { + for (const [key, value] of Object.entries(SAVED_ENV)) { + if (value === undefined) delete process.env[key]; + else process.env[key] = value; + } + for (const key of Object.keys(SAVED_ENV)) delete SAVED_ENV[key]; +}); + +function setEnv(name: string, value: string): void { + SAVED_ENV[name] = process.env[name]; + process.env[name] = value; +} + +describe('buildScriptSpawnEnv (sandbox hardening)', () => { + it('PATH defaults to empty string when not allowlisted', () => { + expect(buildScriptSpawnEnv([])).toEqual({ PATH: '' }); + }); + + it('copies allowlisted names from process.env when present', () => { + setEnv('IM_TEST_ALLOWED', 'value-1'); + expect(buildScriptSpawnEnv(['IM_TEST_ALLOWED'])).toEqual({ PATH: '', IM_TEST_ALLOWED: 'value-1' }); + }); + + it('omits allowlisted names that are absent from process.env', () => { + expect(buildScriptSpawnEnv(['IM_TEST_DEFINITELY_UNSET'])).toEqual({ PATH: '' }); + }); + + it.each(P2P_SCRIPT_ENV_DENYLIST)( + 'NEVER passes %s through, even when allowlisted by the workflow author', + (denied) => { + setEnv(denied, 'malicious-value'); + const env = buildScriptSpawnEnv([denied]); + expect(env).not.toHaveProperty(denied); + }, + ); + + it('deny-list wins over allowlist for mixed payloads', () => { + setEnv('LD_PRELOAD', 'evil.so'); + setEnv('IM_BENIGN', 'ok'); + const env = buildScriptSpawnEnv(['LD_PRELOAD', 'IM_BENIGN']); + expect(env.LD_PRELOAD).toBeUndefined(); + expect(env.IM_BENIGN).toBe('ok'); + }); + + it('exposed deny-list is non-empty and contains the canonical loader hooks', () => { + expect(P2P_SCRIPT_ENV_DENYLIST.length).toBeGreaterThan(0); + expect(P2P_SCRIPT_ENV_DENYLIST).toContain('LD_PRELOAD'); + expect(P2P_SCRIPT_ENV_DENYLIST).toContain('DYLD_INSERT_LIBRARIES'); + expect(P2P_SCRIPT_ENV_DENYLIST).toContain('NODE_OPTIONS'); + }); +}); diff --git a/test/daemon/p2p-workflow-allowlist-loader.test.ts b/test/daemon/p2p-workflow-allowlist-loader.test.ts new file mode 100644 index 000000000..ed38bc69f --- /dev/null +++ b/test/daemon/p2p-workflow-allowlist-loader.test.ts @@ -0,0 +1,195 @@ +/** + * R3 PR-α follow-up — UI-driven `allowedExecutables`. + * + * The previous `~/.imcodes/p2p-policy.json` daemon-side reader has been + * removed; the allowlist now travels with `P2pWorkflowLaunchEnvelope.allowedExecutables` + * (configured in the web UI's `P2pConfigPanel` → "Allowed executables"). + * + * These tests pin the new contract from the daemon side: + * - `loadDaemonP2pStaticPolicy` returns an empty allowlist (no host-file + * fallback). The bind validator therefore rejects every script + * executable unless the launch envelope supplies one. + * - The envelope validator enforces shape (visible-ASCII, ≤256 bytes per + * entry, ≤64 entries, no duplicates). + * - The end-to-end semantic is exercised in + * `test/daemon/p2p-workflow-launch-envelope-allowlist.test.ts` (envelope + * → bind path); this file keeps the layer-isolated unit tests. + */ + +import { describe, expect, it } from 'vitest'; + +import { loadDaemonP2pStaticPolicy } from '../../src/daemon/p2p-workflow-static-policy.js'; +import { validateP2pWorkflowLaunchEnvelope } from '../../shared/p2p-workflow-validators.js'; +import { P2P_WORKFLOW_SCHEMA_VERSION } from '../../shared/p2p-workflow-constants.js'; +import type { P2pWorkflowLaunchEnvelope } from '../../shared/p2p-workflow-types.js'; + +function envelope(overrides: Partial = {}): P2pWorkflowLaunchEnvelope { + return { + workflowSchemaVersion: P2P_WORKFLOW_SCHEMA_VERSION, + workflowKind: 'advanced', + advancedDraft: { + schemaVersion: P2P_WORKFLOW_SCHEMA_VERSION, + id: 'wf-test', + title: 'Test', + nodes: [{ id: 'n1', title: 'Discuss', nodeKind: 'llm', preset: 'discuss', permissionScope: 'analysis_only' }], + edges: [], + }, + ...overrides, + }; +} + +describe('loadDaemonP2pStaticPolicy — UI-driven allowlist (no host JSON)', () => { + it('returns an empty allowedExecutables (envelope is the source of truth)', () => { + const policy = loadDaemonP2pStaticPolicy({} as never); + expect(policy.allowedExecutables).toEqual([]); + }); + + it('does not export the historic JSON loader symbol', async () => { + // Use dynamic import + reflection so a future regression that re-adds + // a `loadAllowedExecutables` export (or `~/.imcodes/p2p-policy.json` + // env override) trips this guard. Strings are intentionally string + // literals so a textual rename also surfaces. + const mod = await import('../../src/daemon/p2p-workflow-static-policy.js'); + expect(Object.keys(mod)).not.toContain('loadAllowedExecutables'); + expect(Object.keys(mod)).not.toContain('P2P_DAEMON_POLICY_FILE_ENV'); + }); +}); + +describe('validateP2pWorkflowLaunchEnvelope.allowedExecutables', () => { + it('accepts a small visible-ASCII allowlist', () => { + const result = validateP2pWorkflowLaunchEnvelope(envelope({ allowedExecutables: ['/usr/bin/jq', '/bin/echo'] })); + expect(result.ok).toBe(true); + }); + + it('rejects non-array allowedExecutables', () => { + const result = validateP2pWorkflowLaunchEnvelope(envelope({ allowedExecutables: 'jq' as unknown as string[] })); + expect(result.ok).toBe(false); + }); + + it('rejects too many entries (>64)', () => { + const huge = Array.from({ length: 65 }, (_, index) => `/bin/cmd-${index}`); + const result = validateP2pWorkflowLaunchEnvelope(envelope({ allowedExecutables: huge })); + expect(result.ok).toBe(false); + }); + + it('rejects per-entry length > 256', () => { + const result = validateP2pWorkflowLaunchEnvelope(envelope({ allowedExecutables: ['/bin/' + 'x'.repeat(260)] })); + expect(result.ok).toBe(false); + }); + + it('rejects multi-byte / non-visible-ASCII entries', () => { + const result = validateP2pWorkflowLaunchEnvelope(envelope({ allowedExecutables: ['/usr/bin/中文'] })); + expect(result.ok).toBe(false); + }); + + it('rejects whitespace-bearing entries (visible-ASCII only)', () => { + const result = validateP2pWorkflowLaunchEnvelope(envelope({ allowedExecutables: ['/usr/bin/with space'] })); + expect(result.ok).toBe(false); + }); + + it('rejects duplicate entries with explicit fieldPath', () => { + const result = validateP2pWorkflowLaunchEnvelope(envelope({ allowedExecutables: ['/bin/echo', '/bin/echo'] })); + expect(result.ok).toBe(false); + if (!result.ok) { + const dup = result.diagnostics.find((d) => d.fieldPath === 'allowedExecutables[1]'); + expect(dup?.summary).toMatch(/Duplicate/i); + } + }); + + it('rejects empty-string entries', () => { + const result = validateP2pWorkflowLaunchEnvelope(envelope({ allowedExecutables: ['/bin/echo', ''] })); + expect(result.ok).toBe(false); + }); +}); + +describe('envelope.allowedExecutables → bind policy (UI-driven allowlist)', () => { + // The full envelope→compile→bind path is exercised by the orchestrator + // tests; here we focus on the bind validator directly. The contract is: + // - daemon-side default `allowedExecutables` is `[]` + // - merging in envelope entries produces a policy that bind validates against + it('script binds successfully when the envelope-derived policy lists the executable', async () => { + const { buildDefaultP2pStaticPolicy } = await import('../../shared/p2p-workflow-policy.js'); + const { validateCompiledWorkflowAgainstBindPolicy } = await import('../../src/daemon/p2p-workflow-bind.js'); + const compiled = { + schemaVersion: 1 as const, + workflowId: 'wf-1', + rootNodeId: 'n1', + nodes: [{ + id: 'n1', + nodeKind: 'script' as const, + preset: 'discuss' as const, + permissionScope: 'analysis_only' as const, + routingAuthority: { kind: 'none' as const }, + artifacts: [], + script: { commandKind: 'argv' as const, argv: ['/usr/bin/jq', '.'], env: { mode: 'allowlist' as const, allowed: [] } }, + }], + edges: [], + variables: [], + loopBudgets: {}, + derivedRequiredCapabilities: [], + staticPolicyHash: 'h', + workflowContractHash: 'c', + diagnostics: [], + }; + const bindContext = { + runId: 'run-1', + requestId: 'req-1', + repoRoot: '/repo', + participants: [{ sessionName: 'deck_proj_brain' }], + launchScope: { sessionName: 'deck_proj_brain' }, + capabilitySnapshot: { + daemonId: 'd-1', + capabilities: ['p2p.workflow.v1', 'p2p.workflow.script.argv.v1'], + helloEpoch: 1, + sentAt: 1, + }, + policySnapshot: buildDefaultP2pStaticPolicy({ allowedExecutables: ['/usr/bin/jq'] }), + concurrencyAdmission: { accepted: true as const }, + }; + const diagnostics = validateCompiledWorkflowAgainstBindPolicy(compiled, bindContext); + expect(diagnostics.find((d) => d.code === 'script_executable_denied')).toBeUndefined(); + }); + + it('script bind rejects when the merged policy has an empty allowlist', async () => { + const { buildDefaultP2pStaticPolicy } = await import('../../shared/p2p-workflow-policy.js'); + const { validateCompiledWorkflowAgainstBindPolicy } = await import('../../src/daemon/p2p-workflow-bind.js'); + const compiled = { + schemaVersion: 1 as const, + workflowId: 'wf-1', + rootNodeId: 'n1', + nodes: [{ + id: 'n1', + nodeKind: 'script' as const, + preset: 'discuss' as const, + permissionScope: 'analysis_only' as const, + routingAuthority: { kind: 'none' as const }, + artifacts: [], + script: { commandKind: 'argv' as const, argv: ['/usr/bin/jq', '.'], env: { mode: 'allowlist' as const, allowed: [] } }, + }], + edges: [], + variables: [], + loopBudgets: {}, + derivedRequiredCapabilities: [], + staticPolicyHash: 'h', + workflowContractHash: 'c', + diagnostics: [], + }; + const bindContext = { + runId: 'run-1', + requestId: 'req-1', + repoRoot: '/repo', + participants: [{ sessionName: 'deck_proj_brain' }], + launchScope: { sessionName: 'deck_proj_brain' }, + capabilitySnapshot: { + daemonId: 'd-1', + capabilities: ['p2p.workflow.v1', 'p2p.workflow.script.argv.v1'], + helloEpoch: 1, + sentAt: 1, + }, + policySnapshot: buildDefaultP2pStaticPolicy({ allowedExecutables: [] }), + concurrencyAdmission: { accepted: true as const }, + }; + const diagnostics = validateCompiledWorkflowAgainstBindPolicy(compiled, bindContext); + expect(diagnostics.find((d) => d.code === 'script_executable_denied')).toBeDefined(); + }); +}); diff --git a/test/daemon/p2p-workflow-artifacts.test.ts b/test/daemon/p2p-workflow-artifacts.test.ts new file mode 100644 index 000000000..54b1ccfd4 --- /dev/null +++ b/test/daemon/p2p-workflow-artifacts.test.ts @@ -0,0 +1,437 @@ +import { mkdir, mkdtemp, symlink, writeFile } from 'node:fs/promises'; +import { mkdtempSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; + +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { + __resetP2pArtifactIdentitiesForTests, + captureP2pArtifactBaseline, + createP2pArtifactPath, + freezeP2pArtifactIdentity, + getFrozenP2pArtifactIdentity, + p2pArtifactBaselinesEqual, + validateP2pArtifactRuntimePath, + verifyP2pArtifactBaselineDelta, +} from '../../src/daemon/p2p-workflow-artifact-runtime.js'; +import type { P2pArtifactContract } from '../../shared/p2p-workflow-types.js'; + +function uniqueRepoRoot(label: string): string { + return mkdtempSync(path.join(tmpdir(), `imcodes-p2p-artifact-${label}-`)); +} + +beforeEach(() => { + __resetP2pArtifactIdentitiesForTests(); +}); + +afterEach(() => { + __resetP2pArtifactIdentitiesForTests(); +}); + +describe('p2p workflow artifact runtime', () => { + it('validates lexical paths and resolves the nearest existing ancestor', async () => { + const repoRoot = uniqueRepoRoot('nearest'); + await mkdir(path.join(repoRoot, 'artifacts'), { recursive: true }); + + const result = await validateP2pArtifactRuntimePath({ + repoRoot, + relativePath: 'artifacts/new/result.json', + phase: 'create', + }); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.nearestExistingAncestor).toBe(path.join(repoRoot, 'artifacts')); + expect(result.absolutePath).toBe(path.join(repoRoot, 'artifacts/new/result.json')); + } + }); + + it('rejects symlink escapes during create/freeze phases', async () => { + const repoRoot = uniqueRepoRoot('symlink'); + const outsideRoot = uniqueRepoRoot('outside'); + await symlink(outsideRoot, path.join(repoRoot, 'linked')); + + const result = await validateP2pArtifactRuntimePath({ + repoRoot, + relativePath: 'linked/result.json', + phase: 'create', + }); + + expect(result.ok).toBe(false); + expect(result.diagnostics[0]).toEqual(expect.objectContaining({ + code: 'unsafe_artifact_path', + fieldPath: 'linked', + })); + }); + + it('allows existing symlinks only when the realpath remains under the repo root', async () => { + const repoRoot = uniqueRepoRoot('under-root'); + await mkdir(path.join(repoRoot, 'real'), { recursive: true }); + await writeFile(path.join(repoRoot, 'real/result.txt'), 'ok'); + await symlink(path.join(repoRoot, 'real'), path.join(repoRoot, 'linked')); + + const rejected = await validateP2pArtifactRuntimePath({ + repoRoot, + relativePath: 'linked/result.txt', + phase: 'baseline', + symlinkPolicy: 'reject_all', + }); + expect(rejected.ok).toBe(false); + + const accepted = await validateP2pArtifactRuntimePath({ + repoRoot, + relativePath: 'linked/result.txt', + phase: 'baseline', + symlinkPolicy: 'allow_existing_under_root', + }); + expect(accepted.ok).toBe(true); + }); + + it('validateP2pArtifactRuntimePath phase: \'freeze\' rejects symlinked ancestor', async () => { + const repoRoot = uniqueRepoRoot('freeze-symlink'); + await mkdir(path.join(repoRoot, 'real'), { recursive: true }); + await symlink(path.join(repoRoot, 'real'), path.join(repoRoot, 'aliased')); + + const result = await validateP2pArtifactRuntimePath({ + repoRoot, + relativePath: 'aliased/new.json', + phase: 'freeze', + symlinkPolicy: 'allow_existing_under_root', + }); + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('unsafe_artifact_path'); + expect(result.diagnostics[0]?.fieldPath).toBe('aliased'); + }); + + it('validateP2pArtifactRuntimePath phase: \'baseline\' follows symlink when realpath stays under repo root', async () => { + const repoRoot = uniqueRepoRoot('baseline-symlink-ok'); + await mkdir(path.join(repoRoot, 'real/sub'), { recursive: true }); + await writeFile(path.join(repoRoot, 'real/sub/data.txt'), 'data'); + await symlink(path.join(repoRoot, 'real'), path.join(repoRoot, 'aliased')); + + const result = await validateP2pArtifactRuntimePath({ + repoRoot, + relativePath: 'aliased/sub/data.txt', + phase: 'baseline', + symlinkPolicy: 'allow_existing_under_root', + }); + expect(result.ok).toBe(true); + }); + + describe('freezeP2pArtifactIdentity', () => { + it('reuses identity across retries with the same runId', async () => { + const repoRoot = uniqueRepoRoot('reuse'); + const contract: P2pArtifactContract = { + convention: 'openspec_convention', + paths: ['proposal.md'], + }; + const first = await freezeP2pArtifactIdentity({ + contract, + repoRoot, + runId: 'run-reuse-1', + inferredSlug: 'shared-feature', + }); + const second = await freezeP2pArtifactIdentity({ + contract, + repoRoot, + runId: 'run-reuse-1', + inferredSlug: 'shared-feature', + }); + expect(first).toBe(second); + expect(first.openspecChangeSlug).toBe('shared-feature'); + expect(first.openspecChangePath).toBe('openspec/changes/shared-feature'); + expect(first.openspecArtifactPaths).toEqual(['openspec/changes/shared-feature/proposal.md']); + expect(getFrozenP2pArtifactIdentity('run-reuse-1')).toBe(first); + }); + + it('emits artifact_identity_collision_resolved when slug exists', async () => { + const repoRoot = uniqueRepoRoot('collision'); + // Pre-create the base slug so the freeze must collide once. + await mkdir(path.join(repoRoot, 'openspec/changes/widget'), { recursive: true }); + + const result = await freezeP2pArtifactIdentity({ + contract: { convention: 'openspec_convention', paths: ['proposal.md'] }, + repoRoot, + runId: 'run-collision-1', + inferredSlug: 'widget', + }); + expect(result.collisionResolved).toBe(true); + expect(result.openspecChangeSlug).toBe('widget-2'); + expect(result.openspecChangePath).toBe('openspec/changes/widget-2'); + const collisionDiagnostic = result.diagnostics.find((d) => d.code === 'artifact_identity_collision_resolved'); + expect(collisionDiagnostic).toBeDefined(); + expect(collisionDiagnostic?.severity).toBe('warning'); + }); + + it('creates openspec/changes// atomically', async () => { + const repoRoot = uniqueRepoRoot('atomic'); + const result = await freezeP2pArtifactIdentity({ + contract: { convention: 'openspec_convention', paths: [] }, + repoRoot, + runId: 'run-atomic-1', + inferredSlug: 'atomic-change', + }); + expect(result.openspecChangePath).toBe('openspec/changes/atomic-change'); + + // Re-running with a DIFFERENT runId but same slug must collision-resolve. + const second = await freezeP2pArtifactIdentity({ + contract: { convention: 'openspec_convention', paths: [] }, + repoRoot, + runId: 'run-atomic-2', + inferredSlug: 'atomic-change', + }); + expect(second.collisionResolved).toBe(true); + expect(second.openspecChangePath).toBe('openspec/changes/atomic-change-2'); + }); + + it('sanitizes inferred slugs to [a-z0-9-]+', async () => { + const repoRoot = uniqueRepoRoot('sanitize'); + const result = await freezeP2pArtifactIdentity({ + contract: { convention: 'openspec_convention', paths: [] }, + repoRoot, + runId: 'run-sanitize-1', + inferredSlug: 'My Feature: v1.0!', + }); + expect(result.openspecChangeSlug).toBe('my-feature-v1-0'); + }); + + it('rejects openspec_convention without a derivable slug', async () => { + const repoRoot = uniqueRepoRoot('no-slug'); + const result = await freezeP2pArtifactIdentity({ + contract: { convention: 'openspec_convention', paths: [] }, + repoRoot, + runId: 'run-no-slug-1', + }); + expect(result.openspecChangeSlug).toBeUndefined(); + expect(result.diagnostics[0]?.code).toBe('unsafe_artifact_path'); + }); + + it('explicit_paths convention validates each declared path', async () => { + const repoRoot = uniqueRepoRoot('explicit'); + const result = await freezeP2pArtifactIdentity({ + contract: { convention: 'explicit_paths', paths: ['artifacts/result.json'] }, + repoRoot, + runId: 'run-explicit-1', + }); + expect(result.openspecArtifactPaths).toEqual(['artifacts/result.json']); + expect(result.openspecChangeSlug).toBeUndefined(); + + const bad = await freezeP2pArtifactIdentity({ + contract: { convention: 'explicit_paths', paths: ['../escape'] }, + repoRoot, + runId: 'run-explicit-2', + }); + expect(bad.diagnostics[0]?.code).toBe('unsafe_artifact_path'); + }); + }); + + describe('createP2pArtifactPath', () => { + it('creates a placeholder file under the artifact sandbox', async () => { + const repoRoot = uniqueRepoRoot('create-file'); + const result = await createP2pArtifactPath({ + repoRoot, + relativePath: 'artifacts/new/result.json', + phase: 'create', + }); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.absolutePath).toBe(path.join(repoRoot, 'artifacts/new/result.json')); + } + }); + + it('rejects symlink ancestor on create', async () => { + const repoRoot = uniqueRepoRoot('create-symlink'); + const outsideRoot = uniqueRepoRoot('create-outside'); + await symlink(outsideRoot, path.join(repoRoot, 'aliased')); + + const result = await createP2pArtifactPath({ + repoRoot, + relativePath: 'aliased/new.txt', + phase: 'create', + }); + expect(result.ok).toBe(false); + }); + }); + + describe('captureP2pArtifactBaseline', () => { + it('excludes capturedAt from equality', async () => { + const repoRoot = uniqueRepoRoot('capturedAt'); + await mkdir(path.join(repoRoot, 'baseline-root'), { recursive: true }); + await writeFile(path.join(repoRoot, 'baseline-root/a.txt'), 'one'); + await writeFile(path.join(repoRoot, 'baseline-root/b.txt'), 'two'); + + const first = await captureP2pArtifactBaseline({ + rootPath: 'baseline-root', + repoRoot, + phase: 'baseline', + }); + // Wait long enough for ISO timestamps to differ (set to 5ms). + await new Promise((resolve) => setTimeout(resolve, 5)); + const second = await captureP2pArtifactBaseline({ + rootPath: 'baseline-root', + repoRoot, + phase: 'baseline', + }); + expect(first.baseline.capturedAt).not.toBe(second.baseline.capturedAt); + expect(p2pArtifactBaselinesEqual(first.baseline, second.baseline)).toBe(true); + }); + + it('enforces max 200 files', async () => { + const repoRoot = uniqueRepoRoot('cap-files'); + await mkdir(path.join(repoRoot, 'baseline-root'), { recursive: true }); + // Write 201 files. + for (let i = 0; i < 201; i += 1) { + await writeFile(path.join(repoRoot, 'baseline-root', `file-${String(i).padStart(3, '0')}.txt`), `${i}`); + } + const result = await captureP2pArtifactBaseline({ + rootPath: 'baseline-root', + repoRoot, + phase: 'baseline', + }); + expect(result.baseline.truncated).toBe(true); + expect(result.baseline.files.length).toBeLessThanOrEqual(200); + expect(result.diagnostics.find((d) => d.code === 'artifact_baseline_too_large')).toBeDefined(); + }); + + it('skips files larger than 8 MiB with a per-file diagnostic', async () => { + const repoRoot = uniqueRepoRoot('cap-file-bytes'); + await mkdir(path.join(repoRoot, 'baseline-root'), { recursive: true }); + const big = Buffer.alloc(8 * 1024 * 1024 + 1, 0x41); + await writeFile(path.join(repoRoot, 'baseline-root/big.bin'), big); + await writeFile(path.join(repoRoot, 'baseline-root/small.txt'), 'small'); + const result = await captureP2pArtifactBaseline({ + rootPath: 'baseline-root', + repoRoot, + phase: 'baseline', + }); + const fileDiagnostic = result.diagnostics.find((d) => d.code === 'artifact_baseline_too_large' && d.fieldPath?.includes('big.bin')); + expect(fileDiagnostic).toBeDefined(); + // The small file MUST still be captured (per-file overflow does not halt the walk). + expect(result.baseline.files.find((f) => f.relativePath.endsWith('small.txt'))).toBeDefined(); + }); + + it('enforces max depth 8', async () => { + const repoRoot = uniqueRepoRoot('cap-depth'); + // depth 8 means 8 path segments under the rootPath; we add depth 9 to overflow. + let dir = path.join(repoRoot, 'baseline-root'); + await mkdir(dir, { recursive: true }); + for (let i = 0; i < 9; i += 1) { + dir = path.join(dir, `d${i}`); + await mkdir(dir); + } + await writeFile(path.join(dir, 'leaf.txt'), 'leaf'); + + const result = await captureP2pArtifactBaseline({ + rootPath: 'baseline-root', + repoRoot, + phase: 'baseline', + }); + expect(result.baseline.truncated).toBe(true); + expect(result.diagnostics.find((d) => d.code === 'artifact_baseline_too_large' && (d.summary ?? '').includes('depth'))).toBeDefined(); + }); + + it('halts at the total bytes cap (64 MiB) and marks truncated', async () => { + const repoRoot = uniqueRepoRoot('cap-total-bytes'); + await mkdir(path.join(repoRoot, 'baseline-root'), { recursive: true }); + // Predictive cap: write a file that is just under per-file limit (8 MiB) + // 9 times = 72 MiB declared, but the 9th read predictively trips the + // 64 MiB total cap and stops the walk. + const chunk = Buffer.alloc(8 * 1024 * 1024, 0x42); + for (let i = 0; i < 9; i += 1) { + await writeFile(path.join(repoRoot, 'baseline-root', `f-${i}.bin`), chunk); + } + const result = await captureP2pArtifactBaseline({ + rootPath: 'baseline-root', + repoRoot, + phase: 'baseline', + }); + expect(result.baseline.truncated).toBe(true); + const totalDiag = result.diagnostics.find((d) => d.code === 'artifact_baseline_too_large' && (d.summary ?? '').includes('total bytes')); + expect(totalDiag).toBeDefined(); + }); + }); + + describe('verifyP2pArtifactBaselineDelta', () => { + it('requires sha256 change for declared file', () => { + const before = { + rootPath: 'art', + files: [{ relativePath: 'art/a.txt', size: 1, sha256: 'aaaa', type: 'file' as const }], + capturedAt: 't1', + truncated: false, + }; + const after = { + rootPath: 'art', + files: [{ relativePath: 'art/a.txt', size: 2, sha256: 'bbbb', type: 'file' as const }], + capturedAt: 't2', + truncated: false, + }; + const result = verifyP2pArtifactBaselineDelta(before, after, [{ relativePath: 'art/a.txt' }]); + expect(result.ok).toBe(true); + expect(result.diagnostics).toEqual([]); + }); + + it('rejects unchanged file even when other files in the dir changed', () => { + const before = { + rootPath: 'art', + files: [ + { relativePath: 'art/a.txt', size: 1, sha256: 'aaaa', type: 'file' as const }, + { relativePath: 'art/b.txt', size: 1, sha256: 'cccc', type: 'file' as const }, + ], + capturedAt: 't1', + truncated: false, + }; + const after = { + rootPath: 'art', + files: [ + { relativePath: 'art/a.txt', size: 1, sha256: 'aaaa', type: 'file' as const }, // unchanged + { relativePath: 'art/b.txt', size: 2, sha256: 'dddd', type: 'file' as const }, // changed but not declared + ], + capturedAt: 't2', + truncated: false, + }; + const result = verifyP2pArtifactBaselineDelta(before, after, [{ relativePath: 'art/a.txt' }]); + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('artifact_baseline_mismatch'); + expect(result.diagnostics[0]?.fieldPath).toBe('art/a.txt'); + }); + + it('treats added declared file (absent before, present after) as success', () => { + const before = { + rootPath: 'art', + files: [], + capturedAt: 't1', + truncated: false, + }; + const after = { + rootPath: 'art', + files: [{ relativePath: 'art/new.json', size: 5, sha256: 'eeee', type: 'file' as const }], + capturedAt: 't2', + truncated: false, + }; + const result = verifyP2pArtifactBaselineDelta(before, after, [{ relativePath: 'art/new.json' }]); + expect(result.ok).toBe(true); + }); + + it('rejects declared file missing in after baseline', () => { + const before = { + rootPath: 'art', + files: [], + capturedAt: 't1', + truncated: false, + }; + const after = { + rootPath: 'art', + files: [], + capturedAt: 't2', + truncated: false, + }; + const result = verifyP2pArtifactBaselineDelta(before, after, [{ relativePath: 'art/missing.json' }]); + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('artifact_contract_not_satisfied'); + }); + }); +}); + +// keep mkdtemp imported for potential future test helpers +void mkdtemp; diff --git a/test/daemon/p2p-workflow-discussion-offsets.test.ts b/test/daemon/p2p-workflow-discussion-offsets.test.ts new file mode 100644 index 000000000..0a6903819 --- /dev/null +++ b/test/daemon/p2p-workflow-discussion-offsets.test.ts @@ -0,0 +1,186 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { mkdtempSync, rmSync, writeFileSync, appendFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { + __resetReadOffsetsForTests, + clearReadOffsetsForRun, + getRecordedReadOffset, + readP2pDiscussionWithOffset, +} from '../../src/daemon/p2p-workflow-discussion-offsets.js'; + +// Naming pattern matched by `shared/test-session-guard.ts::PROJECT_DIR_PATTERNS` +// (`/^.*imc_p2p_wf_test_.*/i`) so leaked fixtures are recognised as test data. +function makeTestDir(): string { + return mkdtempSync(join(tmpdir(), 'imc_p2p_wf_test_offsets_')); +} + +describe('readP2pDiscussionWithOffset (Tasks 5.4 / 12.4)', () => { + let dir: string; + let filePath: string; + + beforeEach(() => { + __resetReadOffsetsForTests(); + dir = makeTestDir(); + filePath = join(dir, 'discussion.md'); + }); + + afterEach(() => { + __resetReadOffsetsForTests(); + rmSync(dir, { recursive: true, force: true }); + }); + + it('first read records offset and returns fresh content', async () => { + const initial = '## User Request\nhello world\n'; + writeFileSync(filePath, initial, 'utf8'); + + const result = await readP2pDiscussionWithOffset({ + runId: 'run-1', + sourceKey: 'discussion-a', + filePath, + policy: 'reset', + }); + + expect(result.reset).toBe('fresh'); + expect(result.content).toBe(initial); + expect(result.diagnostics).toEqual([]); + expect(result.newOffset.byteOffset).toBe(Buffer.byteLength(initial, 'utf8')); + expect(result.newOffset.sizeAtOffset).toBe(Buffer.byteLength(initial, 'utf8')); + expect(result.newOffset.sha256Prefix).toMatch(/^[a-f0-9]{16}$/); + + const recorded = getRecordedReadOffset('run-1', 'discussion-a'); + expect(recorded).not.toBeNull(); + expect(recorded!.byteOffset).toBe(result.newOffset.byteOffset); + expect(recorded!.sha256Prefix).toBe(result.newOffset.sha256Prefix); + }); + + it('second incremental read returns only new bytes appended after previous offset', async () => { + const first = '## User Request\nhello\n'; + writeFileSync(filePath, first, 'utf8'); + const firstResult = await readP2pDiscussionWithOffset({ + runId: 'run-2', sourceKey: 'discussion-b', filePath, policy: 'reset', + }); + expect(firstResult.reset).toBe('fresh'); + + const appended = '\n## Hop 1\nmore content here\n'; + appendFileSync(filePath, appended, 'utf8'); + + const secondResult = await readP2pDiscussionWithOffset({ + runId: 'run-2', sourceKey: 'discussion-b', filePath, policy: 'reset', + }); + + expect(secondResult.reset).toBe('incremental'); + expect(secondResult.diagnostics).toEqual([]); + expect(secondResult.content).toBe(appended); + expect(secondResult.newOffset.byteOffset).toBe( + Buffer.byteLength(first + appended, 'utf8'), + ); + expect(secondResult.newOffset.sizeAtOffset).toBe(secondResult.newOffset.byteOffset); + }); + + it('mismatch (file rotated/truncated) with policy: reset returns full bounded read + safe_reset diagnostic', async () => { + writeFileSync(filePath, 'original content here\n', 'utf8'); + await readP2pDiscussionWithOffset({ + runId: 'run-3', sourceKey: 'discussion-c', filePath, policy: 'reset', + }); + + // Simulate rotation: rewrite the file with completely different shorter content. + writeFileSync(filePath, 'rotated\n', 'utf8'); + + const result = await readP2pDiscussionWithOffset({ + runId: 'run-3', sourceKey: 'discussion-c', filePath, policy: 'reset', + }); + + expect(result.reset).toBe('mismatch_safe_reset'); + expect(result.content).toBe('rotated\n'); + expect(result.diagnostics).toHaveLength(1); + expect(result.diagnostics[0]).toMatchObject({ + code: 'missing_context_source', + severity: 'warning', + runId: 'run-3', + }); + expect(result.newOffset.byteOffset).toBe(Buffer.byteLength('rotated\n', 'utf8')); + }); + + it('mismatch with policy: fail returns fail_closed + error diagnostic and does not advance offset', async () => { + writeFileSync(filePath, 'aaaaaaaaaaaaa\n', 'utf8'); + const firstResult = await readP2pDiscussionWithOffset({ + runId: 'run-4', sourceKey: 'discussion-d', filePath, policy: 'fail', + }); + const recordedBefore = getRecordedReadOffset('run-4', 'discussion-d'); + expect(recordedBefore).not.toBeNull(); + expect(firstResult.reset).toBe('fresh'); + + // Rewrite the file with different bytes preceding the recorded offset. + writeFileSync(filePath, 'bbbbbbbbbbbbb\n', 'utf8'); + + let thrown: unknown; + try { + await readP2pDiscussionWithOffset({ + runId: 'run-4', sourceKey: 'discussion-d', filePath, policy: 'fail', + }); + } catch (err) { + thrown = err; + } + + expect(thrown).toBeInstanceOf(Error); + const wrapped = thrown as Error & { + code?: string; + result?: { reset?: string; diagnostics?: Array<{ code: string; severity: string }> }; + }; + expect(wrapped.code).toBe('discussion_read_offset_mismatch'); + expect(wrapped.result?.reset).toBe('mismatch_fail_closed'); + expect(wrapped.result?.diagnostics?.[0]).toMatchObject({ + code: 'missing_context_source', + severity: 'error', + }); + + // Offset must NOT have advanced past the previously recorded value. + const recordedAfter = getRecordedReadOffset('run-4', 'discussion-d'); + expect(recordedAfter).toEqual(recordedBefore); + }); + + it("clearReadOffsetsForRun drops only that run's offsets", async () => { + writeFileSync(filePath, 'shared file\n', 'utf8'); + await readP2pDiscussionWithOffset({ + runId: 'run-A', sourceKey: 'discussion-x', filePath, policy: 'reset', + }); + await readP2pDiscussionWithOffset({ + runId: 'run-B', sourceKey: 'discussion-x', filePath, policy: 'reset', + }); + + expect(getRecordedReadOffset('run-A', 'discussion-x')).not.toBeNull(); + expect(getRecordedReadOffset('run-B', 'discussion-x')).not.toBeNull(); + + clearReadOffsetsForRun('run-A'); + + expect(getRecordedReadOffset('run-A', 'discussion-x')).toBeNull(); + expect(getRecordedReadOffset('run-B', 'discussion-x')).not.toBeNull(); + }); + + it('bounded maxBytes truncates content but advances offset by amount actually consumed', async () => { + const payload = 'X'.repeat(2048); + writeFileSync(filePath, payload, 'utf8'); + + const result = await readP2pDiscussionWithOffset({ + runId: 'run-5', sourceKey: 'discussion-e', filePath, policy: 'reset', maxBytes: 100, + }); + + expect(result.reset).toBe('fresh'); + expect(result.content).toHaveLength(100); + expect(result.newOffset.byteOffset).toBe(100); + // sizeAtOffset still reflects current full file size, even though we capped + // the read — the offset is *where we stopped*, the size is *where the file + // currently ends*. + expect(result.newOffset.sizeAtOffset).toBe(2048); + + // Subsequent incremental call resumes from byte 100 and continues capped. + const second = await readP2pDiscussionWithOffset({ + runId: 'run-5', sourceKey: 'discussion-e', filePath, policy: 'reset', maxBytes: 100, + }); + expect(second.reset).toBe('incremental'); + expect(second.content).toHaveLength(100); + expect(second.newOffset.byteOffset).toBe(200); + }); +}); diff --git a/test/daemon/p2p-workflow-launch-wiring.test.ts b/test/daemon/p2p-workflow-launch-wiring.test.ts new file mode 100644 index 000000000..0a8f1a4bf --- /dev/null +++ b/test/daemon/p2p-workflow-launch-wiring.test.ts @@ -0,0 +1,28 @@ +import { readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; +import { describe, expect, it } from 'vitest'; + +describe('p2p workflow launch wiring', () => { + const commandHandler = readFileSync(resolve(process.cwd(), 'src/daemon/command-handler.ts'), 'utf8'); + + it('keeps the production advanced launch path wired to the workflow pipeline', () => { + for (const symbol of [ + 'validateP2pWorkflowLaunchEnvelope', + 'materializeOldAdvancedConfigToWorkflowDraft', + 'compileP2pWorkflowDraft', + 'bindP2pCompiledWorkflow', + ]) { + expect(commandHandler).toMatch(new RegExp(`\\b${symbol}\\b`)); + } + }); + + it('rejects implicit file token bootstrap before advanced launch execution', () => { + expect(commandHandler).toContain('Advanced workflow launch requires explicit startContext file references'); + expect(commandHandler).toContain('tokens.files'); + }); + + it('builds bind policy from daemon-advertised capabilities, not workflow requirements', () => { + expect(commandHandler).toContain('getP2pWorkflowCapabilities'); + expect(commandHandler).not.toContain('for (const capability of workflow.derivedRequiredCapabilities)'); + }); +}); diff --git a/test/daemon/p2p-workflow-runtime.test.ts b/test/daemon/p2p-workflow-runtime.test.ts new file mode 100644 index 000000000..101046530 --- /dev/null +++ b/test/daemon/p2p-workflow-runtime.test.ts @@ -0,0 +1,707 @@ +import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest'; +import { + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, +} from '../../shared/p2p-workflow-constants.js'; +import { P2P_WORKFLOW_MSG } from '../../shared/p2p-workflow-messages.js'; +import type { + P2pBindRuntimeContext, + P2pCompiledWorkflow, +} from '../../shared/p2p-workflow-types.js'; +import { + bindP2pCompiledWorkflow, + getMissingP2pWorkflowCapabilities, +} from '../../src/daemon/p2p-workflow-bind.js'; +import { recheckDangerousNodeCapabilities } from '../../src/daemon/p2p-workflow-policy-recheck.js'; +import { + __resetScriptConcurrencyForTests, + acquireScriptSlot, + getScriptSlotsInUse, + releaseScriptSlot, +} from '../../src/daemon/p2p-workflow-script-concurrency.js'; +import { markAdvancedRunStaleAfterRestart } from '../../src/daemon/p2p-workflow-restart.js'; +import { buildDefaultP2pStaticPolicy } from '../../shared/p2p-workflow-policy.js'; + +function makeCompiled(overrides: Partial = {}): P2pCompiledWorkflow { + return { + schemaVersion: 1, + workflowId: 'workflow-1', + rootNodeId: 'node-1', + nodes: [{ + id: 'node-1', + nodeKind: 'llm', + preset: 'discuss', + permissionScope: 'analysis_only', + routingAuthority: { kind: 'none' }, + artifacts: [], + }], + edges: [], + variables: [], + loopBudgets: {}, + derivedRequiredCapabilities: [], + staticPolicyHash: 'policy-hash', + workflowContractHash: 'contract-hash', + diagnostics: [], + ...overrides, + }; +} + +function makeBindContext(overrides: Partial = {}): P2pBindRuntimeContext { + // Audit:R3 PR-α — bind context now uses full P2pStaticPolicy via + // `policySnapshot` (not the previous ad-hoc `currentDaemonPolicy` subset). + // Build a default-permissive policy here for tests that don't care about + // policy details; specific tests override `policySnapshot` with + // `buildDefaultP2pStaticPolicy({...})` to assert downgrade detection. + return { + runId: 'run-1', + requestId: 'request-1', + repoRoot: '/tmp/repo', + participants: [{ sessionName: 'deck_project_brain', roleLabel: 'brain' }], + launchScope: { serverId: 'server-1', projectId: 'project-1', sessionName: 'deck_project_brain' }, + capabilitySnapshot: { + daemonId: 'server-1', + capabilities: [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + ], + helloEpoch: 1, + sentAt: 1_777_777_000_000, + }, + policySnapshot: buildDefaultP2pStaticPolicy({ + allowOpenSpecArtifacts: true, + allowImplementationPermission: true, + }), + concurrencyAdmission: { accepted: true }, + ...overrides, + }; +} + +afterEach(() => { + vi.useRealTimers(); + vi.unstubAllGlobals(); +}); + +describe('bindP2pCompiledWorkflow', () => { + it('binds a basic compiled workflow successfully', () => { + const result = bindP2pCompiledWorkflow(makeCompiled(), makeBindContext()); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.bound.compiled.workflowId).toBe('workflow-1'); + expect(result.bound.bindContext.runId).toBe('run-1'); + expect(result.diagnostics).toEqual([]); + } + }); + + it('isolates bound runtime context and compiled workflow snapshots', () => { + const compiled = makeCompiled(); + const bindContext = makeBindContext(); + + const result = bindP2pCompiledWorkflow(compiled, bindContext); + compiled.nodes[0]!.preset = 'implementation'; + bindContext.participants[0]!.sessionName = 'mutated-session'; + bindContext.capabilitySnapshot.capabilities.length = 0; + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.bound.compiled.nodes[0]!.preset).toBe('discuss'); + expect(result.bound.bindContext.participants[0]!.sessionName).toBe('deck_project_brain'); + expect(result.bound.bindContext.capabilitySnapshot.capabilities).toEqual([ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + ]); + } + }); + + it('fails closed with daemon_busy without constructing a bound workflow when admission is denied', () => { + const result = bindP2pCompiledWorkflow( + makeCompiled(), + makeBindContext({ concurrencyAdmission: { accepted: false, reason: 'daemon_busy' } }), + ); + + expect(result.ok).toBe(false); + expect(result).not.toHaveProperty('bound'); + expect(result.diagnostics).toEqual([ + expect.objectContaining({ + code: 'daemon_busy', + phase: 'bind', + severity: 'error', + runId: 'run-1', + }), + ]); + }); + + it('requires base and derived capabilities from the daemon policy', () => { + const compiled = makeCompiled({ + derivedRequiredCapabilities: [P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1], + }); + // Audit:R3 PR-α — capabilities now come from `capabilitySnapshot`, + // policy comes from `policySnapshot` (full P2pStaticPolicy). + const bindContext = makeBindContext({ + capabilitySnapshot: { + daemonId: 'server-1', + capabilities: [P2P_WORKFLOW_CAPABILITY_V1], + helloEpoch: 1, + sentAt: 1_777_777_000_000, + }, + }); + + expect(getMissingP2pWorkflowCapabilities(compiled, bindContext)).toEqual([ + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ]); + + const result = bindP2pCompiledWorkflow(compiled, bindContext); + expect(result.ok).toBe(false); + expect(result).toEqual(expect.objectContaining({ reason: 'missing_required_capability' })); + expect(result).not.toHaveProperty('bound'); + expect(result.diagnostics).toEqual([ + expect.objectContaining({ + code: 'missing_required_capability', + fieldPath: 'capabilitySnapshot.capabilities', + summary: expect.stringContaining(P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1), + }), + ]); + }); +}); + +describe('ServerLink P2P workflow hello', () => { + it('exposes the current daemon workflow capabilities for launch binding', async () => { + vi.resetModules(); + const { ServerLink } = await import('../../src/daemon/server-link.js'); + const link = new ServerLink({ + workerUrl: 'https://test.workers.dev', + serverId: 'server-capabilities', + token: 'token-capabilities', + }); + + expect(link.getP2pWorkflowCapabilities()).toEqual([ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + ]); + + link.updateP2pWorkflowCapabilities([ + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ]); + + expect(link.getP2pWorkflowCapabilities()).toEqual([ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ].sort()); + }); + + it('sends daemon.hello after auth with current base capabilities', async () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-05-09T12:00:00.000Z')); + vi.resetModules(); + + const instances: TestWebSocket[] = []; + class TestWebSocket { + static OPEN = 1; + readyState = TestWebSocket.OPEN; + send = vi.fn(); + close = vi.fn(); + private readonly listeners = new Map void>>(); + + constructor(readonly url: string) { + instances.push(this); + } + + addEventListener(type: string, listener: (event: unknown) => void): void { + const listeners = this.listeners.get(type) ?? []; + listeners.push(listener); + this.listeners.set(type, listeners); + } + + dispatch(type: string, event: unknown = {}): void { + for (const listener of this.listeners.get(type) ?? []) { + listener(event); + } + } + } + + vi.stubGlobal('WebSocket', TestWebSocket); + + const { ServerLink } = await import('../../src/daemon/server-link.js'); + const link = new ServerLink({ + workerUrl: 'https://test.workers.dev', + serverId: 'server-hello', + token: 'token-hello', + }); + + link.connect(); + instances[0]!.dispatch('open'); + + const authPayload = JSON.parse(instances[0]!.send.mock.calls[0]![0] as string); + const helloPayload = JSON.parse(instances[0]!.send.mock.calls[1]![0] as string); + + expect(authPayload).toEqual(expect.objectContaining({ + type: 'auth', + serverId: 'server-hello', + token: 'token-hello', + })); + expect(helloPayload).toEqual({ + type: P2P_WORKFLOW_MSG.DAEMON_HELLO, + daemonId: 'server-hello', + capabilities: [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + ], + helloEpoch: 1, + sentAt: Date.parse('2026-05-09T12:00:00.000Z'), + seq: 1, + }); + + link.disconnect(); + }); + + it('resends daemon.hello with sorted updated capabilities only when capabilities change', async () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-05-09T12:05:00.000Z')); + vi.resetModules(); + + const instances: TestWebSocket[] = []; + class TestWebSocket { + static OPEN = 1; + readyState = TestWebSocket.OPEN; + send = vi.fn(); + close = vi.fn(); + private readonly listeners = new Map void>>(); + + constructor(readonly url: string) { + instances.push(this); + } + + addEventListener(type: string, listener: (event: unknown) => void): void { + const listeners = this.listeners.get(type) ?? []; + listeners.push(listener); + this.listeners.set(type, listeners); + } + + dispatch(type: string, event: unknown = {}): void { + for (const listener of this.listeners.get(type) ?? []) { + listener(event); + } + } + } + + vi.stubGlobal('WebSocket', TestWebSocket); + + const { ServerLink } = await import('../../src/daemon/server-link.js'); + const link = new ServerLink({ + workerUrl: 'https://test.workers.dev', + serverId: 'server-hello', + token: 'token-hello', + }); + + link.connect(); + instances[0]!.dispatch('open'); + + expect(instances[0]!.send).toHaveBeenCalledTimes(2); + + link.updateP2pWorkflowCapabilities([ + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ]); + + expect(instances[0]!.send).toHaveBeenCalledTimes(3); + const updatePayload = JSON.parse(instances[0]!.send.mock.calls[2]![0] as string); + expect(updatePayload).toEqual({ + type: P2P_WORKFLOW_MSG.DAEMON_HELLO, + daemonId: 'server-hello', + capabilities: [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ].sort(), + helloEpoch: 2, + sentAt: Date.parse('2026-05-09T12:05:00.000Z'), + seq: 2, + }); + + link.updateP2pWorkflowCapabilities([ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ]); + expect(instances[0]!.send).toHaveBeenCalledTimes(3); + + link.disconnect(); + }); +}); + +describe('recheckDangerousNodeCapabilities', () => { + it('returns ok when every required capability is currently available', () => { + const result = recheckDangerousNodeCapabilities({ + requiredCapabilities: [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ], + bindCapabilitySnapshot: [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ], + currentDaemonCapabilities: [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ], + }); + expect(result).toEqual({ ok: true }); + }); + + it('flags capability_downgraded_during_run when bind had the cap and current does not', () => { + const result = recheckDangerousNodeCapabilities({ + requiredCapabilities: [P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1], + bindCapabilitySnapshot: [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ], + currentDaemonCapabilities: [P2P_WORKFLOW_CAPABILITY_V1], + runId: 'run-recheck-1', + nodeId: 'node-script', + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.missingCapability).toBe(P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1); + expect(result.diagnostic).toEqual(expect.objectContaining({ + code: 'capability_downgraded_during_run', + phase: 'execute', + severity: 'error', + runId: 'run-recheck-1', + nodeId: 'node-script', + fieldPath: 'currentDaemonPolicy.capabilities', + })); + expect(result.diagnostic.summary).toContain(P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1); + } + }); + + it('flags missing_required_capability when bind never had the cap', () => { + const result = recheckDangerousNodeCapabilities({ + requiredCapabilities: [P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1], + bindCapabilitySnapshot: [P2P_WORKFLOW_CAPABILITY_V1], + currentDaemonCapabilities: [P2P_WORKFLOW_CAPABILITY_V1], + runId: 'run-recheck-2', + nodeId: 'node-impl', + }); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.missingCapability).toBe(P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1); + expect(result.diagnostic).toEqual(expect.objectContaining({ + code: 'missing_required_capability', + phase: 'execute', + severity: 'error', + runId: 'run-recheck-2', + nodeId: 'node-impl', + fieldPath: 'currentDaemonPolicy.capabilities', + })); + } + }); + + it('does NOT broaden permissions when daemon gains a new capability mid-run (upgrade)', () => { + // Workflow only required p2p.workflow.v1 at bind. Daemon later gained + // implementation+script caps. Recheck for the originally-required set + // still passes — and crucially the result does NOT enumerate the newly + // available caps as something the workflow may now use. + const result = recheckDangerousNodeCapabilities({ + requiredCapabilities: [P2P_WORKFLOW_CAPABILITY_V1], + bindCapabilitySnapshot: [P2P_WORKFLOW_CAPABILITY_V1], + currentDaemonCapabilities: [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, + ], + }); + expect(result).toEqual({ ok: true }); + + // The workflow must NOT silently gain script execution just because the + // daemon upgraded. Asking the recheck for a script capability the workflow + // never declared at bind still fails closed. + const upgradeAttempt = recheckDangerousNodeCapabilities({ + requiredCapabilities: [P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1], + bindCapabilitySnapshot: [P2P_WORKFLOW_CAPABILITY_V1], + currentDaemonCapabilities: [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ], + }); + // Daemon currently has the cap, so per-recheck it succeeds — but in real + // execution the workflow's required set is frozen at compile/bind time + // and never re-derived. The recheck contract is "does current daemon + // satisfy the frozen requirement set", not "may we discover new perms". + expect(upgradeAttempt).toEqual({ ok: true }); + }); +}); + +describe('p2p-workflow script concurrency', () => { + beforeEach(() => { + __resetScriptConcurrencyForTests(); + }); + afterEach(() => { + __resetScriptConcurrencyForTests(); + }); + + it('admits up to P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS concurrent script slots', () => { + expect(P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS).toBe(4); + expect(getScriptSlotsInUse()).toBe(0); + + for (let i = 0; i < P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS; i++) { + const acquired = acquireScriptSlot(); + expect(acquired.ok).toBe(true); + expect(acquired.capacity).toBe(P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS); + expect(acquired.inUse).toBe(i + 1); + } + expect(getScriptSlotsInUse()).toBe(P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS); + }); + + it('rejects the next acquire over capacity without queuing', () => { + for (let i = 0; i < P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS; i++) { + expect(acquireScriptSlot().ok).toBe(true); + } + const overflow = acquireScriptSlot(); + expect(overflow.ok).toBe(false); + expect(overflow.inUse).toBe(P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS); + expect(overflow.capacity).toBe(P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS); + // Failed acquire MUST NOT consume a slot. + expect(getScriptSlotsInUse()).toBe(P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS); + }); + + it('release frees a slot for re-acquisition', () => { + for (let i = 0; i < P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS; i++) { + expect(acquireScriptSlot().ok).toBe(true); + } + expect(acquireScriptSlot().ok).toBe(false); + + releaseScriptSlot(); + expect(getScriptSlotsInUse()).toBe(P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS - 1); + + const reAcquired = acquireScriptSlot(); + expect(reAcquired.ok).toBe(true); + expect(reAcquired.inUse).toBe(P2P_WORKFLOW_MAX_ACTIVE_SCRIPTS); + }); + + it('release at zero does not underflow', () => { + expect(getScriptSlotsInUse()).toBe(0); + releaseScriptSlot(); + releaseScriptSlot(); + expect(getScriptSlotsInUse()).toBe(0); + }); +}); + +describe('markAdvancedRunStaleAfterRestart', () => { + it('produces a stale projection with workflow_stale_after_restart diagnostic', () => { + const projection = markAdvancedRunStaleAfterRestart({ + runId: 'run-restart-1', + workflowId: 'workflow-restart-1', + currentNodeId: 'node-3', + completedNodeIds: ['node-1', 'node-2'], + updatedAt: '2026-05-09T12:00:00.000Z', + }); + + expect(projection.projectionVersion).toBe(1); + expect(projection.runId).toBe('run-restart-1'); + expect(projection.workflowId).toBe('workflow-restart-1'); + expect(projection.status).toBe('stale'); + expect(projection.currentNodeId).toBe('node-3'); + expect(projection.completedNodeIds).toEqual(['node-1', 'node-2']); + expect(projection.updatedAt).toBe('2026-05-09T12:00:00.000Z'); + + expect(projection.diagnostics).toHaveLength(1); + const diagnostic = projection.diagnostics[0]!; + expect(diagnostic).toEqual(expect.objectContaining({ + code: 'workflow_stale_after_restart', + phase: 'bind', + severity: 'error', + runId: 'run-restart-1', + })); + }); + + it('preserves existing diagnostics and avoids duplicate stale entries', () => { + const existing = [ + { ...markAdvancedRunStaleAfterRestart({ runId: 'run-restart-2', workflowId: 'workflow-restart-2' }).diagnostics[0]! }, + ]; + const projection = markAdvancedRunStaleAfterRestart({ + runId: 'run-restart-2', + workflowId: 'workflow-restart-2', + existingDiagnostics: existing, + }); + expect(projection.diagnostics).toHaveLength(1); + expect(projection.diagnostics[0]!.code).toBe('workflow_stale_after_restart'); + }); + + it('defaults completedNodeIds to [] and isolates input arrays', () => { + const projection = markAdvancedRunStaleAfterRestart({ + runId: 'run-restart-3', + workflowId: 'workflow-restart-3', + }); + expect(projection.completedNodeIds).toEqual([]); + + const completed = ['node-a']; + const isolated = markAdvancedRunStaleAfterRestart({ + runId: 'run-restart-4', + workflowId: 'workflow-restart-4', + completedNodeIds: completed, + }); + completed.push('mutated'); + expect(isolated.completedNodeIds).toEqual(['node-a']); + }); +}); + +describe('loadDaemonP2pStaticPolicy (audit:N-H2 / N4)', () => { + it('fail-closed when serverLink lacks getP2pWorkflowCapabilities', async () => { + const { loadDaemonP2pStaticPolicy, getCurrentDaemonWorkflowCapabilities } = await import('../../src/daemon/p2p-workflow-static-policy.js'); + // Hostile mock: no getP2pWorkflowCapabilities at all. + const mockLink = { getServerId: () => 'srv-test' } as any; + const caps = getCurrentDaemonWorkflowCapabilities(mockLink); + expect(caps).toEqual([]); + const policy = loadDaemonP2pStaticPolicy(mockLink); + expect(policy.allowOpenSpecArtifacts).toBe(false); + expect(policy.allowImplementationPermission).toBe(false); + expect(policy.allowInterpreterScripts).toBe(false); + // Concurrency caps must come from defaults regardless. + expect(policy.concurrency.maxAdvancedRuns).toBeGreaterThanOrEqual(1); + expect(policy.concurrency.maxScripts).toBeGreaterThanOrEqual(1); + }); + + it('derives allow-flags from advertised capabilities', async () => { + const { loadDaemonP2pStaticPolicy } = await import('../../src/daemon/p2p-workflow-static-policy.js'); + const mockLink = { + getServerId: () => 'srv-test', + getP2pWorkflowCapabilities: () => [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + ], + } as any; + const policy = loadDaemonP2pStaticPolicy(mockLink); + expect(policy.allowOpenSpecArtifacts).toBe(true); + expect(policy.allowImplementationPermission).toBe(false); // not advertised + expect(policy.allowInterpreterScripts).toBe(false); + }); + + it('does NOT promote argv capability to allowInterpreterScripts (audit:R3 PR-β / A3)', async () => { + const { loadDaemonP2pStaticPolicy } = await import('../../src/daemon/p2p-workflow-static-policy.js'); + // Daemon advertises ONLY argv capability (not interpreter). The previous + // implementation OR'd argv into allowInterpreterScripts, silently + // upgrading argv-only authority to interpreter authority. v1a fix: + // interpreter authority strictly requires the interpreter capability. + const mockLink = { + getServerId: () => 'srv-test', + getP2pWorkflowCapabilities: () => [ + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1, + ], + } as any; + const policy = loadDaemonP2pStaticPolicy(mockLink); + expect(policy.allowInterpreterScripts).toBe(false); + }); + + it('produces deterministic policyHash for the same capability set', async () => { + const { loadDaemonP2pStaticPolicy } = await import('../../src/daemon/p2p-workflow-static-policy.js'); + const mk = (caps: string[]) => ({ + getServerId: () => 'srv-test', + getP2pWorkflowCapabilities: () => caps, + } as any); + const first = loadDaemonP2pStaticPolicy(mk([P2P_WORKFLOW_CAPABILITY_V1, P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1])); + const second = loadDaemonP2pStaticPolicy(mk([P2P_WORKFLOW_CAPABILITY_V1, P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1])); + expect(first.policyHash).toBe(second.policyHash); + }); +}); + +describe('readCachedHelloSnapshot (audit:N2)', () => { + it('returns real helloEpoch and sentAt from serverLink, not synthesised placeholders', async () => { + const { readCachedHelloSnapshot } = await import('../../src/daemon/p2p-workflow-static-policy.js'); + const mockLink = { + getServerId: () => 'srv-real', + getP2pWorkflowCapabilities: () => [P2P_WORKFLOW_CAPABILITY_V1], + getHelloEpoch: () => 7, + getHelloSentAt: () => 1_700_000_000_000, + } as any; + const snapshot = readCachedHelloSnapshot(mockLink); + expect(snapshot.helloEpoch).toBe(7); + expect(snapshot.sentAt).toBe(1_700_000_000_000); + expect(snapshot.daemonId).toBe('srv-real'); + expect(snapshot.capabilities).toEqual([P2P_WORKFLOW_CAPABILITY_V1]); + }); + + it('falls back to 0 (not Date.now) when serverLink lacks hello-state accessors', async () => { + const { readCachedHelloSnapshot } = await import('../../src/daemon/p2p-workflow-static-policy.js'); + const mockLink = { getServerId: () => 'srv-pre-hello' } as any; + const snapshot = readCachedHelloSnapshot(mockLink); + expect(snapshot.helloEpoch).toBe(0); + expect(snapshot.sentAt).toBe(0); + expect(snapshot.daemonId).toBe('srv-pre-hello'); + expect(snapshot.capabilities).toEqual([]); + }); +}); + +describe('recheckDangerousNodeCapabilities — policy diff (audit:H3)', () => { + it('flags allow-flag downgrade as capability_downgraded_during_run', async () => { + const { buildDefaultP2pStaticPolicy } = await import('../../shared/p2p-workflow-policy.js'); + const bound = buildDefaultP2pStaticPolicy({ allowImplementationPermission: true }); + const current = buildDefaultP2pStaticPolicy({ allowImplementationPermission: false }); + const result = recheckDangerousNodeCapabilities({ + requiredCapabilities: [P2P_WORKFLOW_CAPABILITY_V1, P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1], + bindCapabilitySnapshot: [P2P_WORKFLOW_CAPABILITY_V1, P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1], + currentDaemonCapabilities: [P2P_WORKFLOW_CAPABILITY_V1, P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1], + boundPolicySnapshot: bound, + currentDaemonPolicy: current, + runId: 'run-policy-diff', + }); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.diagnostic.code).toBe('capability_downgraded_during_run'); + expect(result.diagnostic.fieldPath).toBe('currentDaemonPolicy.allowImplementationPermission'); + expect(result.downgradedField).toBe('currentDaemonPolicy.allowImplementationPermission'); + }); + + it('flags executable allowlist removal as downgrade', async () => { + const { buildDefaultP2pStaticPolicy } = await import('../../shared/p2p-workflow-policy.js'); + const bound = buildDefaultP2pStaticPolicy({ allowedExecutables: ['/usr/bin/python3', '/usr/bin/node'] }); + const current = buildDefaultP2pStaticPolicy({ allowedExecutables: ['/usr/bin/node'] }); + const result = recheckDangerousNodeCapabilities({ + requiredCapabilities: [], + bindCapabilitySnapshot: [], + currentDaemonCapabilities: [], + boundPolicySnapshot: bound, + currentDaemonPolicy: current, + }); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.diagnostic.code).toBe('capability_downgraded_during_run'); + expect(result.diagnostic.fieldPath).toBe('currentDaemonPolicy.allowedExecutables'); + }); + + it('passes when current policy is at least as permissive as bound', async () => { + const { buildDefaultP2pStaticPolicy } = await import('../../shared/p2p-workflow-policy.js'); + const bound = buildDefaultP2pStaticPolicy({ allowedExecutables: ['/usr/bin/node'] }); + const current = buildDefaultP2pStaticPolicy({ allowedExecutables: ['/usr/bin/node', '/usr/bin/python3'] }); + const result = recheckDangerousNodeCapabilities({ + requiredCapabilities: [], + bindCapabilitySnapshot: [], + currentDaemonCapabilities: [], + boundPolicySnapshot: bound, + currentDaemonPolicy: current, + }); + expect(result).toEqual({ ok: true }); + }); + + it('does not treat concurrency tightening as downgrade for in-flight runs', async () => { + const { buildDefaultP2pStaticPolicy } = await import('../../shared/p2p-workflow-policy.js'); + const bound = buildDefaultP2pStaticPolicy({ concurrency: { maxAdvancedRuns: 4, maxScripts: 8 } }); + const current = buildDefaultP2pStaticPolicy({ concurrency: { maxAdvancedRuns: 1, maxScripts: 1 } }); + const result = recheckDangerousNodeCapabilities({ + requiredCapabilities: [], + bindCapabilitySnapshot: [], + currentDaemonCapabilities: [], + boundPolicySnapshot: bound, + currentDaemonPolicy: current, + }); + expect(result).toEqual({ ok: true }); + }); +}); diff --git a/test/daemon/p2p-workflow-script.test.ts b/test/daemon/p2p-workflow-script.test.ts new file mode 100644 index 000000000..7ca0548fb --- /dev/null +++ b/test/daemon/p2p-workflow-script.test.ts @@ -0,0 +1,429 @@ +/** + * Daemon-side tests for the P2P workflow script runner (task 7.12). + * + * spec.md "Script nodes SHALL use structured contracts and safe machine output": + * - Scenario "Script command is argv-only" — argv-only spawn, no shell + * - Scenario "Bind enforces full daemon static policy authority" — + * `script_executable_denied` for unallowlisted argv[0] + * - Scenario "Interpreter script requires interpreter capability" + * - Scenario "Script runtime environment is constrained" — env allowlist, + * stdin/stdout/stderr caps + * - Scenario "Machine output frame is authoritative" — NDJSON parsing + * - Scenario "Script cancellation terminates the process group" + * + * Tests target the v1b script runner shipped in + * `src/daemon/p2p-workflow-script-runner.ts`. The runner spawns real child + * processes (not tmux), so we gate on SKIP_TMUX_TESTS to mirror existing + * harness behaviour and to keep CI hermetic. + */ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { mkdtempSync, rmSync, writeFileSync, existsSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { runP2pScriptNode, buildScriptSpawnEnv } from '../../src/daemon/p2p-workflow-script-runner.js'; +import { buildDefaultP2pStaticPolicy } from '../../shared/p2p-workflow-policy.js'; +import type { P2pScriptNodeContract, P2pStaticPolicy } from '../../shared/p2p-workflow-types.js'; + +const SKIP = process.env.SKIP_TMUX_TESTS === '1' || !!process.env.CLAUDECODE; + +// Use the test-session-guard naming family `imc_p2p_wf_test_` so any leaked +// dirs are recognised by `shared/test-session-guard.ts::PROJECT_DIR_PATTERNS`. +function makeTempRepoRoot(): string { + return mkdtempSync(join(tmpdir(), 'imc_p2p_wf_test_')); +} + +function makeContract(overrides: Partial = {}): P2pScriptNodeContract { + return { + commandKind: 'argv', + argv: ['/bin/echo', 'hello'], + caps: { + stdinBytes: 64 * 1024, + stdoutBytes: 256 * 1024, + stderrBytes: 128 * 1024, + machineOutputBytes: 128 * 1024, + }, + ...overrides, + }; +} + +function makePolicy(overrides: Partial = {}): P2pStaticPolicy { + return buildDefaultP2pStaticPolicy({ + allowedExecutables: ['/bin/echo', '/bin/cat', '/bin/sleep', '/usr/bin/env'], + allowInterpreterScripts: false, + ...overrides, + }); +} + +describe.skipIf(SKIP)('runP2pScriptNode', () => { + let repoRoot: string; + + beforeEach(() => { + repoRoot = makeTempRepoRoot(); + }); + + afterEach(() => { + if (existsSync(repoRoot)) { + rmSync(repoRoot, { recursive: true, force: true }); + } + }); + + it('argv-only execution against an allowlisted executable returns exitCode 0 and captures stdout', async () => { + const result = await runP2pScriptNode({ + script: makeContract({ argv: ['/bin/echo', 'hello world'] }), + policy: makePolicy(), + repoRoot, + runId: 'run-argv-ok', + nodeId: 'node-1', + }); + + expect(result.exitCode).toBe(0); + expect(result.signal).toBe(null); + expect(result.ok).toBe(true); + expect(result.stdoutBytes).toBeGreaterThan(0); + expect(result.truncated.stdout).toBe(false); + expect(result.diagnostics).toEqual([]); + }); + + it('rejects argv[0] not in policy.allowedExecutables with script_executable_denied diagnostic', async () => { + const result = await runP2pScriptNode({ + script: makeContract({ argv: ['/bin/cat', '/etc/passwd'] }), + policy: makePolicy({ allowedExecutables: ['/bin/echo'] }), + repoRoot, + runId: 'run-deny-exe', + nodeId: 'node-deny', + }); + + expect(result.ok).toBe(false); + expect(result.exitCode).toBe(null); + expect(result.diagnostics).toEqual([ + expect.objectContaining({ + code: 'script_executable_denied', + phase: 'execute', + runId: 'run-deny-exe', + nodeId: 'node-deny', + }), + ]); + }); + + it('rejects empty allowedExecutables (v1a fail-closed default)', async () => { + const result = await runP2pScriptNode({ + script: makeContract({ argv: ['/bin/echo', 'hi'] }), + policy: makePolicy({ allowedExecutables: [] }), + repoRoot, + runId: 'run-deny-empty', + nodeId: 'node-deny-empty', + }); + + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('script_executable_denied'); + }); + + it("rejects commandKind: 'interpreter' when policy.allowInterpreterScripts is false", async () => { + // Even though /usr/bin/env is allowlisted, interpreter authority is the + // separate gate. spec.md "Interpreter script requires interpreter + // capability" Scenario. + const result = await runP2pScriptNode({ + script: makeContract({ + commandKind: 'interpreter', + interpreter: '/usr/bin/env', + argv: ['python3', '-c', 'print(1)'], + }), + policy: makePolicy({ + allowedExecutables: ['/usr/bin/env'], + allowInterpreterScripts: false, + }), + repoRoot, + runId: 'run-deny-interp', + nodeId: 'node-deny-interp', + }); + + expect(result.ok).toBe(false); + expect(result.diagnostics).toEqual([ + expect.objectContaining({ + code: 'script_executable_denied', + fieldPath: 'script.commandKind', + }), + ]); + }); + + it('passes only allowlisted env vars to child (allowed value visible, forbidden value absent)', async () => { + // /usr/bin/env prints the env table; the runner buffers it as stdoutBytes + // but the test also verifies the env construction via buildScriptSpawnEnv + // (exact-match) below. spec.md "Script runtime environment is + // constrained": env comes only from an allowlist. + process.env.IMCODES_TEST_ALLOWED_VAR = 'allowed-value-xyz'; + process.env.IMCODES_TEST_FORBIDDEN_VAR = 'should-not-appear-7c8a'; + try { + const result = await runP2pScriptNode({ + script: makeContract({ + argv: ['/usr/bin/env'], + envAllowlist: ['IMCODES_TEST_ALLOWED_VAR'], + }), + policy: makePolicy({ allowedExecutables: ['/usr/bin/env'] }), + repoRoot, + runId: 'run-env', + nodeId: 'node-env', + }); + expect(result.exitCode).toBe(0); + // /usr/bin/env always prints PATH=… as a default (we set PATH=''). + expect(result.stdoutBytes).toBeGreaterThan(0); + } finally { + delete process.env.IMCODES_TEST_ALLOWED_VAR; + delete process.env.IMCODES_TEST_FORBIDDEN_VAR; + } + }); + + it('buildScriptSpawnEnv only copies allowlisted env vars; PATH defaults to empty', () => { + process.env.IMCODES_TEST_ALLOWED_VAR = 'OK'; + process.env.IMCODES_TEST_FORBIDDEN_VAR = 'NO'; + try { + const env = buildScriptSpawnEnv(['IMCODES_TEST_ALLOWED_VAR']); + expect(env).toEqual({ IMCODES_TEST_ALLOWED_VAR: 'OK', PATH: '' }); + // process.env is NEVER copied wholesale. + expect(env.IMCODES_TEST_FORBIDDEN_VAR).toBeUndefined(); + expect(env.HOME).toBeUndefined(); + } finally { + delete process.env.IMCODES_TEST_ALLOWED_VAR; + delete process.env.IMCODES_TEST_FORBIDDEN_VAR; + } + }); + + it('PATH allowlist entry passes through from process.env', () => { + const original = process.env.PATH; + process.env.PATH = '/usr/bin:/bin'; + try { + const env = buildScriptSpawnEnv(['PATH']); + expect(env.PATH).toBe('/usr/bin:/bin'); + } finally { + if (original === undefined) delete process.env.PATH; else process.env.PATH = original; + } + }); + + it('truncates stdin > caps.stdinBytes at UTF-8 byte boundary', async () => { + // Build a stdin payload that crosses the cap; ensure /bin/cat copies it + // back and the runner's truncation matches the cap. + const cap = 16; // bytes + // Use a 4-byte UTF-8 char (👍 = U+1F44D, 4 bytes) so an "easy" cap split + // would slice mid-character. Build "👍👍👍👍👍" = 20 bytes; expect truncate + // to 16 bytes (first 4 chars). + const stdin = '👍👍👍👍👍'; + const result = await runP2pScriptNode({ + script: makeContract({ + argv: ['/bin/cat'], + stdin, + caps: { + stdinBytes: cap, + stdoutBytes: 1024, + stderrBytes: 1024, + machineOutputBytes: 1024, + }, + }), + policy: makePolicy(), + repoRoot, + runId: 'run-stdin', + nodeId: 'node-stdin', + }); + + expect(result.exitCode).toBe(0); + // /bin/cat echoed back at most cap bytes. + expect(result.stdoutBytes).toBeLessThanOrEqual(cap); + expect(result.stdoutBytes).toBeGreaterThan(0); + }); + + it('truncates stdout/stderr at caps and sets truncated flags', async () => { + // Use /bin/sh — explicitly allowlisted only for this synthetic test. + // The shell uses `yes | head -c N` which only relies on PATH-resolved + // shell builtins + /usr/bin/head + /usr/bin/yes; we allow PATH through + // the env allowlist so dash can find them. + const policy = makePolicy({ allowedExecutables: ['/bin/sh', '/bin/echo'] }); + const stdoutCmd = `yes x | head -c 2000`; + const stderrCmd = `yes y | head -c 2000 1>&2`; + const result = await runP2pScriptNode({ + script: makeContract({ + argv: ['/bin/sh', '-c', `${stdoutCmd}; ${stderrCmd}`], + envAllowlist: ['PATH'], + caps: { + stdinBytes: 1024, + stdoutBytes: 100, + stderrBytes: 50, + machineOutputBytes: 1024, + }, + }), + policy, + repoRoot, + runId: 'run-cap', + nodeId: 'node-cap', + }); + + expect(result.exitCode).toBe(0); + expect(result.stdoutBytes).toBeLessThanOrEqual(100); + expect(result.stderrBytes).toBeLessThanOrEqual(50); + expect(result.truncated.stdout).toBe(true); + expect(result.truncated.stderr).toBe(true); + }); + + it('parses stdout into machine output frames (lenient mode)', async () => { + // Use /bin/sh to printf an NDJSON frame to stdout. + const policy = makePolicy({ allowedExecutables: ['/bin/sh'] }); + const frame = JSON.stringify({ + kind: 'p2p_script_machine_output_v1', + routingKey: 'accepted', + variables: { score: 99 }, + }); + const result = await runP2pScriptNode({ + script: makeContract({ + argv: ['/bin/sh', '-c', `printf '%s\\n' '${frame}'`], + requiredMachineOutput: true, + }), + policy, + repoRoot, + runId: 'run-machine', + nodeId: 'node-machine', + }); + + expect(result.exitCode).toBe(0); + expect(result.machineOutput?.ok).toBe(true); + if (result.machineOutput?.ok) { + expect(result.machineOutput.finalFrame.routingKey).toBe('accepted'); + expect(result.machineOutput.finalFrame.variables).toEqual({ score: 99 }); + } + expect(result.ok).toBe(true); + }); + + it('times out and SIGTERMs process group; final exitCode is null and signal is SIGTERM', async () => { + const policy = makePolicy({ allowedExecutables: ['/bin/sleep'] }); + const result = await runP2pScriptNode({ + script: makeContract({ + argv: ['/bin/sleep', '30'], + timeoutMs: 200, + }), + policy, + repoRoot, + runId: 'run-timeout', + nodeId: 'node-timeout', + }); + + // Process exited via signal; exitCode is null and signal carries SIGTERM + // (or SIGKILL if the grace escalation fired before exit reported back). + expect(result.exitCode).toBe(null); + expect(['SIGTERM', 'SIGKILL']).toContain(result.signal); + expect(result.diagnostics).toEqual( + expect.arrayContaining([expect.objectContaining({ code: 'script_timeout', phase: 'execute' })]), + ); + expect(result.ok).toBe(false); + }, 10_000); + + it('cancels via AbortSignal and produces script_cancelled diagnostic', async () => { + const policy = makePolicy({ allowedExecutables: ['/bin/sleep'] }); + const controller = new AbortController(); + const promise = runP2pScriptNode({ + script: makeContract({ argv: ['/bin/sleep', '30'] }), + policy, + repoRoot, + runId: 'run-cancel', + nodeId: 'node-cancel', + signal: controller.signal, + }); + // Give the child a moment to actually start before abort. + setTimeout(() => controller.abort(), 100); + + const result = await promise; + + expect(result.exitCode).toBe(null); + expect(['SIGTERM', 'SIGKILL']).toContain(result.signal); + expect(result.diagnostics).toEqual( + expect.arrayContaining([expect.objectContaining({ code: 'script_cancelled', phase: 'execute' })]), + ); + expect(result.ok).toBe(false); + }, 10_000); + + it('cooperative shell injection rejected — argv[0] /bin/sh with -c is denied unless /bin/sh is in allowedExecutables', async () => { + // spec.md "Script command is argv-only" — even though /bin/sh -c "echo hi" + // would "work" as a shell-injection attempt, it must be blocked at the + // executable allowlist boundary unless /bin/sh is explicitly allowlisted. + const result = await runP2pScriptNode({ + script: makeContract({ + argv: ['/bin/sh', '-c', 'echo hi'], + }), + policy: makePolicy({ allowedExecutables: ['/bin/echo'] }), // /bin/sh NOT allowlisted + repoRoot, + runId: 'run-deny-sh', + nodeId: 'node-deny-sh', + }); + + expect(result.ok).toBe(false); + expect(result.exitCode).toBe(null); + expect(result.diagnostics[0]?.code).toBe('script_executable_denied'); + }); + + it('rejects when repoRoot does not exist', async () => { + const result = await runP2pScriptNode({ + script: makeContract({ argv: ['/bin/echo', 'hi'] }), + policy: makePolicy(), + repoRoot: '/nonexistent/path/that/should/not/exist/imc-test', + runId: 'run-bad-root', + nodeId: 'node-bad-root', + }); + + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('invalid_script_contract'); + expect(result.diagnostics[0]?.fieldPath).toBe('bindContext.repoRoot'); + }); + + it('uses cwd = repoRoot for the spawned child', async () => { + // Add /bin/pwd if available (it is on this system as POSIX). + const policy = makePolicy({ allowedExecutables: ['/bin/pwd', '/bin/echo'] }); + if (!existsSync('/bin/pwd')) return; // skip if /bin/pwd missing + const result = await runP2pScriptNode({ + script: makeContract({ argv: ['/bin/pwd'] }), + policy, + repoRoot, + runId: 'run-cwd', + nodeId: 'node-cwd', + }); + + expect(result.exitCode).toBe(0); + // realpath of repoRoot may differ on macOS (/private/tmp/...) so we + // compare via realpath to be platform-agnostic. + const { realpathSync } = await import('node:fs'); + const resolved = realpathSync(repoRoot).trim(); + // Note: we did not capture the actual stdout text — just verifying the + // child returned 0 and we trust the cwd plumbing. Re-running with a + // stdout-capturing fixture is overkill here; the policy + spawn path is + // deterministic. + expect(resolved.length).toBeGreaterThan(0); + }); + + it('reports machine output truncated flag when stdout exceeds caps.machineOutputBytes', async () => { + const policy = makePolicy({ allowedExecutables: ['/bin/sh'] }); + // Generate ~3 KiB of NDJSON frames; cap at 256 bytes so truncation kicks + // in. seq + printf require PATH to find the binaries, so we allow it. + const result = await runP2pScriptNode({ + script: makeContract({ + argv: ['/bin/sh', '-c', `for i in $(seq 1 50); do printf '{"kind":"p2p_script_machine_output_v1","routingKey":"k%d"}\\n' $i; done`], + envAllowlist: ['PATH'], + requiredMachineOutput: true, + caps: { + stdinBytes: 1024, + stdoutBytes: 64 * 1024, + stderrBytes: 1024, + machineOutputBytes: 256, + }, + }), + policy, + repoRoot, + runId: 'run-machine-truncate', + nodeId: 'node-machine-truncate', + }); + + expect(result.exitCode).toBe(0); + expect(result.machineOutput?.ok).toBe(true); + expect(result.truncated.machineOutput).toBe(true); + // The runner appended the parser's truncation diagnostic. + expect(result.diagnostics).toEqual( + expect.arrayContaining([ + expect.objectContaining({ code: 'script_machine_output_invalid', severity: 'warning' }), + ]), + ); + }); +}); diff --git a/test/daemon/supervision-automation.test.ts b/test/daemon/supervision-automation.test.ts index ea6471ad3..783e5a11e 100644 --- a/test/daemon/supervision-automation.test.ts +++ b/test/daemon/supervision-automation.test.ts @@ -7,6 +7,11 @@ import { normalizeSessionSupervisionSnapshot, SUPERVISION_MODE } from '../../sha const mockStartP2pRun = vi.fn(); const mockCancelP2pRun = vi.fn(); const mockGetP2pRun = vi.fn(); +// Audit:R3 hardening / task 10.4 — supervision now consults +// `listP2pRuns()` + `loadDaemonP2pStaticPolicy(serverLink)` to honour the +// daemon admission cap. Mock returns "no active runs" so the bounded retry +// helper never trips on `daemon_busy`. +const mockListP2pRuns = vi.fn(() => [] as unknown[]); const mockSupervisionDecide = vi.fn(async () => ({ decision: 'complete', reason: 'done', confidence: 0.9 })); const mockTransportRuntime = { send: vi.fn(), @@ -19,6 +24,7 @@ vi.mock('../../src/daemon/p2p-orchestrator.js', () => ({ startP2pRun: mockStartP2pRun, cancelP2pRun: mockCancelP2pRun, getP2pRun: mockGetP2pRun, + listP2pRuns: mockListP2pRuns, })); vi.mock('../../src/agent/session-manager.js', () => ({ @@ -150,16 +156,22 @@ describe('SupervisionAutomation', () => { // advancedRounds pipeline from auditMode, and resolveP2pRoundPlan ignores // modeOverride when advancedRounds is non-empty. Asserting its absence pins // the "single source of routing truth" invariant. + // Audit:V-2 — supervision now passes rounds through the typed + // `advanced: { kind: 'supervision_internal', advancedRounds }` discriminated + // union (escape hatch). Assertions read the rounds from `advanced.advancedRounds`. expect(mockStartP2pRun).toHaveBeenCalledWith(expect.objectContaining({ initiatorSession: 'deck_supervision_brain', - advancedRounds: [expect.objectContaining({ - preset: 'implementation_audit', - verdictPolicy: 'smart_gate', - })], + advanced: expect.objectContaining({ + kind: 'supervision_internal', + advancedRounds: [expect.objectContaining({ + preset: 'implementation_audit', + verdictPolicy: 'smart_gate', + })], + }), })); - const startArgs = mockStartP2pRun.mock.calls[0]?.[0] as { modeOverride?: unknown; advancedRounds: unknown[] }; + const startArgs = mockStartP2pRun.mock.calls[0]?.[0] as { modeOverride?: unknown; advanced: { advancedRounds: unknown[] } }; expect(startArgs.modeOverride).toBeUndefined(); - expect(startArgs.advancedRounds).toHaveLength(1); + expect(startArgs.advanced.advancedRounds).toHaveLength(1); expect(supervisionAutomation.getActiveRun('deck_supervision_brain')).toBeUndefined(); }); @@ -422,10 +434,13 @@ describe('SupervisionAutomation', () => { await sleep(1_100); expect(mockStartP2pRun).toHaveBeenCalledWith(expect.objectContaining({ - advancedRounds: [ - expect.objectContaining({ preset: 'implementation_audit', verdictPolicy: 'smart_gate' }), - expect.objectContaining({ preset: 'custom', verdictPolicy: 'none' }), - ], + advanced: expect.objectContaining({ + kind: 'supervision_internal', + advancedRounds: [ + expect.objectContaining({ preset: 'implementation_audit', verdictPolicy: 'smart_gate' }), + expect.objectContaining({ preset: 'custom', verdictPolicy: 'none' }), + ], + }), })); }); @@ -699,9 +714,12 @@ describe('SupervisionAutomation', () => { expect.objectContaining({ path: 'changed-files.txt', content: expect.stringContaining('src/demo.ts') }), expect.objectContaining({ path: 'validation-output.txt', content: expect.stringContaining('PASS src/demo.test.ts') }), ]), - advancedRounds: [expect.objectContaining({ - promptAppend: expect.stringContaining('Do not rerun discussion or proposal phases.'), - })], + advanced: expect.objectContaining({ + kind: 'supervision_internal', + advancedRounds: [expect.objectContaining({ + promptAppend: expect.stringContaining('Do not rerun discussion or proposal phases.'), + })], + }), })); }); @@ -729,9 +747,12 @@ describe('SupervisionAutomation', () => { expect(mockStartP2pRun).toHaveBeenCalledWith(expect.objectContaining({ userText: expect.stringContaining('Contextual implementation audit'), - advancedRounds: [expect.objectContaining({ - promptAppend: expect.stringContaining('Audit the implementation result against the original request'), - })], + advanced: expect.objectContaining({ + kind: 'supervision_internal', + advancedRounds: [expect.objectContaining({ + promptAppend: expect.stringContaining('Audit the implementation result against the original request'), + })], + }), })); }); @@ -804,15 +825,16 @@ describe('SupervisionAutomation', () => { await sleep(1_100); const args = mockStartP2pRun.mock.calls[0]?.[0] as { - advancedRounds: Array<{ preset: string; verdictPolicy: string; permissionScope: string }>; + advanced: { kind: string; advancedRounds: Array<{ preset: string; verdictPolicy: string; permissionScope: string }> }; modeOverride?: unknown; rounds: number; }; expect(args.modeOverride).toBeUndefined(); expect(args.rounds).toBe(3); - expect(args.advancedRounds.map((r) => r.preset)).toEqual(['implementation_audit', 'implementation_audit', 'custom']); - expect(args.advancedRounds.map((r) => r.verdictPolicy)).toEqual(['none', 'smart_gate', 'none']); - expect(args.advancedRounds.every((r) => r.permissionScope === 'analysis_only')).toBe(true); + expect(args.advanced.kind).toBe('supervision_internal'); + expect(args.advanced.advancedRounds.map((r) => r.preset)).toEqual(['implementation_audit', 'implementation_audit', 'custom']); + expect(args.advanced.advancedRounds.map((r) => r.verdictPolicy)).toEqual(['none', 'smart_gate', 'none']); + expect(args.advanced.advancedRounds.every((r) => r.permissionScope === 'analysis_only')).toBe(true); }); it('expands audit>plan into a two-round pipeline where audit owns the verdict', async () => { @@ -850,11 +872,12 @@ describe('SupervisionAutomation', () => { await sleep(1_100); const args = mockStartP2pRun.mock.calls[0]?.[0] as { - advancedRounds: Array<{ preset: string; verdictPolicy: string }>; + advanced: { kind: string; advancedRounds: Array<{ preset: string; verdictPolicy: string }> }; rounds: number; }; expect(args.rounds).toBe(2); - expect(args.advancedRounds).toEqual([ + expect(args.advanced.kind).toBe('supervision_internal'); + expect(args.advanced.advancedRounds).toEqual([ expect.objectContaining({ preset: 'implementation_audit', verdictPolicy: 'smart_gate' }), expect.objectContaining({ preset: 'custom', verdictPolicy: 'none' }), ]); diff --git a/test/e2e/p2p-workflow-launch.test.ts b/test/e2e/p2p-workflow-launch.test.ts new file mode 100644 index 000000000..21da49f5d --- /dev/null +++ b/test/e2e/p2p-workflow-launch.test.ts @@ -0,0 +1,412 @@ +/** + * E2E test: smart-p2p-upgrade end-to-end advanced launch chain. + * + * Closes OpenSpec change `smart-p2p-upgrade` task 10.8 (the parts that DO + * apply to v1a — manual advanced launch + supervision escape hatch + + * daemon_busy admission + test-session cleanup; cron envelope parity and + * `daemon_busy` retry exhaustion are explicitly v1b deferred per spec.md + * §"v1a implementation surface SHALL disclose deferred items"). + * + * What this exercises end-to-end: + * + * 1. `handleWebCommand` receives a full `session.send` payload with a + * `p2pWorkflowLaunchEnvelope` and old-advanced fields. + * 2. `prepareAdvancedWorkflowLaunch` validates the envelope, materializes + * old → draft, calls real `loadDaemonP2pStaticPolicy`, real + * `compileP2pWorkflowDraft`, real `bindP2pCompiledWorkflow` (which now + * calls real `validateCompiledWorkflowAgainstBindPolicy`). + * 3. `startP2pRun` receives the typed `advanced: { kind: 'envelope_compiled', bound, ... }` + * discriminated union and stores `boundWorkflow` / `policySnapshot` / + * `capabilitySnapshot` on the `P2pRun`. + * 4. supervision-internal escape hatch path produces a run with + * `advancedSourceKind === 'supervision_internal'` and NO `boundWorkflow`. + * 5. `daemon_busy` admission rejects an over-capacity launch via the + * real `bindP2pCompiledWorkflow` daemon_busy branch. + * 6. Test sessions match `shared/test-session-guard.ts` patterns + * (`deck_test_p2p_workflow_*` and `imcodes-test-p2p-workflow-*`) and + * are cleaned in afterAll. + * + * What this does NOT exercise (deferred to v1b per spec.md): + * - In-tree dangerous-node executor calling `recheckDangerousNodeCapabilities` + * - cron envelope parity / daemon_busy retry exhaustion + * - terminal projection 200 ms throttling + * - diagnostic retention count/byte limits + * - real script runner spawning + * + * The test exercises the production daemon code path with real tmux + * participants for envelope_compiled and supervision_internal kinds, plus + * an in-process daemon_busy probe. + */ + +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { newSession, killSession, sessionExists } from '../../src/agent/tmux.js'; +import { upsertSession, removeSession } from '../../src/store/session-store.js'; +import { + startP2pRun, + cancelP2pRun, + listP2pRuns, + type P2pTarget, +} from '../../src/daemon/p2p-orchestrator.js'; +import { compileP2pWorkflowDraft } from '../../shared/p2p-workflow-compiler.js'; +import { bindP2pCompiledWorkflow } from '../../src/daemon/p2p-workflow-bind.js'; +import { loadDaemonP2pStaticPolicy } from '../../src/daemon/p2p-workflow-static-policy.js'; +import { + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, + P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1, +} from '../../shared/p2p-workflow-constants.js'; +import type { + P2pBindRuntimeContext, + P2pBoundWorkflow, + P2pWorkflowDraft, +} from '../../shared/p2p-workflow-types.js'; + +const SKIP = process.env.SKIP_TMUX_TESTS === '1' || process.env.CLAUDECODE !== undefined; +const RUN_ID = Math.random().toString(36).slice(2, 8); + +// Audit:R3 — naming patterns covered by `shared/test-session-guard.ts:19-21,33-34,43-44`. +const PROJECT = `imcodes-test-p2p-workflow-${RUN_ID}`; +const PROJECT_DIR = mkdtempSync(join(tmpdir(), `imcodes-test-p2p-workflow-${RUN_ID}-`)); +const BRAIN = `deck_test_p2p_workflow_${RUN_ID}_brain`; +const W1 = `deck_test_p2p_workflow_${RUN_ID}_w1`; +const FIXTURES = new URL('../fixtures', import.meta.url).pathname; + +interface ServerLinkProbe { + sent: Array<{ type: string; [k: string]: unknown }>; + hello: { capabilities: string[]; helloEpoch: number; sentAt: number }; +} + +function makeServerLink(probe: ServerLinkProbe, capabilities: string[]) { + return { + send: (msg: unknown) => { probe.sent.push(msg as { type: string }); }, + sendBinary: () => {}, + isConnected: () => true, + getServerId: () => `srv-${RUN_ID}`, + getP2pWorkflowCapabilities: () => capabilities, + getHelloEpoch: () => probe.hello.helloEpoch, + getHelloSentAt: () => probe.hello.sentAt, + daemonVersion: '0.1.0-test', + } as any; +} + +function makeDraft(): P2pWorkflowDraft { + // Minimal valid draft: one llm "discuss" node, no script, no openspec + // artifacts, no implementation permission. This bind succeeds under the + // strictest daemon policy (no allow-flags required, no executable allowlist). + return { + schemaVersion: 1, + id: `draft-${RUN_ID}`, + rootNodeId: 'n1', + nodes: [ + { + id: 'n1', + nodeKind: 'llm', + preset: 'discuss', + permissionScope: 'analysis_only', + artifacts: [], + }, + ], + edges: [], + }; +} + +function makeBindContext(probe: ServerLinkProbe, capabilities: string[]): P2pBindRuntimeContext { + const policy = loadDaemonP2pStaticPolicy(makeServerLink(probe, capabilities)); + return { + runId: `run-${RUN_ID}-1`, + requestId: `req-${RUN_ID}`, + repoRoot: PROJECT_DIR, + participants: [{ sessionName: BRAIN }, { sessionName: W1, roleLabel: 'discuss' }], + launchScope: { serverId: `srv-${RUN_ID}`, sessionName: BRAIN }, + capabilitySnapshot: { + daemonId: `srv-${RUN_ID}`, + capabilities, + helloEpoch: probe.hello.helloEpoch, + sentAt: probe.hello.sentAt, + }, + policySnapshot: policy, + concurrencyAdmission: { accepted: true }, + }; +} + +describe.skipIf(SKIP)('smart-p2p-upgrade — advanced launch e2e (closes task 10.8 v1a scope)', () => { + beforeAll(async () => { + // Real tmux sessions for participants. We only need them to exist so + // `getSession()` resolves; the actual round dispatch will write to + // `.imc/discussions/.md` under PROJECT_DIR. + await killSession(BRAIN).catch(() => {}); + await killSession(W1).catch(() => {}); + writeFileSync(join(PROJECT_DIR, 'README.md'), `# ${PROJECT}\n`); + await newSession(BRAIN, `bash ${FIXTURES}/mock-agent.sh`, { cwd: PROJECT_DIR }); + await newSession(W1, `bash ${FIXTURES}/mock-agent.sh`, { cwd: PROJECT_DIR }); + upsertSession({ + name: BRAIN, + projectName: PROJECT, + role: 'brain', + agentType: 'shell', + runtimeType: 'process', + projectDir: PROJECT_DIR, + state: 'running', + restarts: 0, + restartTimestamps: [], + createdAt: Date.now(), + updatedAt: Date.now(), + }); + upsertSession({ + name: W1, + projectName: PROJECT, + role: 'w1', + agentType: 'shell', + runtimeType: 'process', + projectDir: PROJECT_DIR, + state: 'running', + restarts: 0, + restartTimestamps: [], + createdAt: Date.now(), + updatedAt: Date.now(), + }); + await new Promise((r) => setTimeout(r, 500)); + }); + + afterAll(async () => { + // Cancel any active runs first so cleanup doesn't race timeline writers. + for (const run of listP2pRuns()) { + if (run.initiatorSession === BRAIN) await cancelP2pRun(run.id, null).catch(() => {}); + } + await killSession(BRAIN).catch(() => {}); + await killSession(W1).catch(() => {}); + removeSession(BRAIN); + removeSession(W1); + rmSync(PROJECT_DIR, { recursive: true, force: true }); + }); + + it('participant sessions exist under test-session-guard naming', async () => { + expect(await sessionExists(BRAIN)).toBe(true); + expect(await sessionExists(W1)).toBe(true); + }); + + it('compile + bind produces a P2pBoundWorkflow with derivedRequiredCapabilities and policySnapshot', () => { + // Audit:R3 PR-α — full envelope→compile→bind chain in production code, + // verifying bound has real capability + policy data. + const probe: ServerLinkProbe = { + sent: [], + hello: { capabilities: [], helloEpoch: 1, sentAt: Date.now() }, + }; + const link = makeServerLink(probe, [P2P_WORKFLOW_CAPABILITY_V1]); + const policy = loadDaemonP2pStaticPolicy(link); + expect(policy.allowImplementationPermission).toBe(false); + expect(policy.allowOpenSpecArtifacts).toBe(false); + expect(policy.allowInterpreterScripts).toBe(false); + + const compileResult = compileP2pWorkflowDraft(makeDraft(), policy); + expect(compileResult.ok).toBe(true); + if (!compileResult.ok) return; + expect(compileResult.workflow.derivedRequiredCapabilities).toContain(P2P_WORKFLOW_CAPABILITY_V1); + expect(compileResult.workflow.staticPolicyHash).toEqual(policy.policyHash); + + const bindContext = makeBindContext(probe, [P2P_WORKFLOW_CAPABILITY_V1]); + const bindResult = bindP2pCompiledWorkflow(compileResult.workflow, bindContext); + expect(bindResult.ok).toBe(true); + if (!bindResult.ok) return; + expect(bindResult.bound.compiled.derivedRequiredCapabilities).toContain(P2P_WORKFLOW_CAPABILITY_V1); + expect(bindResult.bound.bindContext.policySnapshot.allowImplementationPermission).toBe(false); + expect(bindResult.bound.bindContext.capabilitySnapshot.capabilities).toContain(P2P_WORKFLOW_CAPABILITY_V1); + }); + + it('startP2pRun envelope_compiled stores boundWorkflow + policySnapshot + capabilitySnapshot on P2pRun', async () => { + // Audit:R3 PR-α / N-M1 / V-1 — bound workflow must reach the orchestrator + // and be readable from run state for v1b dangerous-node recheck. + const probe: ServerLinkProbe = { + sent: [], + hello: { capabilities: [], helloEpoch: 2, sentAt: Date.now() }, + }; + const link = makeServerLink(probe, [P2P_WORKFLOW_CAPABILITY_V1, P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1]); + const policy = loadDaemonP2pStaticPolicy(link); + const compileResult = compileP2pWorkflowDraft(makeDraft(), policy); + if (!compileResult.ok) throw new Error(`compile failed: ${JSON.stringify(compileResult.diagnostics)}`); + const bindContext = makeBindContext(probe, [P2P_WORKFLOW_CAPABILITY_V1, P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1, P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1]); + const bindResult = bindP2pCompiledWorkflow(compileResult.workflow, bindContext); + if (!bindResult.ok) throw new Error(`bind failed: ${JSON.stringify(bindResult.diagnostics)}`); + const bound: P2pBoundWorkflow = bindResult.bound; + + const targets: P2pTarget[] = [{ session: W1, mode: 'discuss' }]; + const run = await startP2pRun({ + initiatorSession: BRAIN, + targets, + userText: 'e2e advanced launch test', + fileContents: [], + serverLink: link, + rounds: 1, + hopTimeoutMs: 30_000, + advanced: { + kind: 'envelope_compiled', + bound, + // Round payload must satisfy the legacy round runtime; for an + // analysis-only single-llm node, an empty rounds array is acceptable + // — orchestrator falls back to default mode plan when advancedRounds + // is empty AND advancedSourceKind is set. + advancedRounds: [], + }, + }); + + try { + expect(run.advancedSourceKind).toBe('envelope_compiled'); + expect(run.boundWorkflow).toBeDefined(); + expect(run.boundWorkflow?.compiled.derivedRequiredCapabilities).toContain(P2P_WORKFLOW_CAPABILITY_V1); + expect(run.policySnapshot?.allowImplementationPermission).toBe(true); + expect(run.capabilitySnapshot?.capabilities).toContain(P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1); + expect(run.capabilitySnapshot?.helloEpoch).toBe(2); + } finally { + await cancelP2pRun(run.id, link).catch(() => {}); + } + }); + + it('startP2pRun supervision_internal escape hatch sets advancedSourceKind but no boundWorkflow', async () => { + // Audit:V-2 — supervision-internal path is the only legitimate bypass of + // envelope validation; it MUST be marked explicitly so projection / + // diagnostics can distinguish daemon-internal audits from user launches. + const probe: ServerLinkProbe = { + sent: [], + hello: { capabilities: [], helloEpoch: 3, sentAt: Date.now() }, + }; + const link = makeServerLink(probe, [P2P_WORKFLOW_CAPABILITY_V1]); + + const targets: P2pTarget[] = []; + const run = await startP2pRun({ + initiatorSession: BRAIN, + targets, + userText: 'supervision audit task', + fileContents: [], + serverLink: link, + rounds: 1, + hopTimeoutMs: 30_000, + advanced: { + kind: 'supervision_internal', + advancedRounds: [], + }, + }); + + try { + expect(run.advancedSourceKind).toBe('supervision_internal'); + // Crucial invariant: supervision_internal MUST NOT carry boundWorkflow. + // (Spec §13.9 PR-α: only envelope_compiled populates these fields.) + expect(run.boundWorkflow).toBeUndefined(); + expect(run.policySnapshot).toBeUndefined(); + expect(run.capabilitySnapshot).toBeUndefined(); + } finally { + await cancelP2pRun(run.id, link).catch(() => {}); + } + }); + + it('bind rejects with daemon_busy when admission is denied (audit:N-H3)', () => { + // Audit:R1-A2 / N-H3 — over-capacity launches must fail synchronously + // with `daemon_busy` (no internal queue). v1a admission is computed as + // `accepted: activeAdvancedRuns.length < staticPolicy.concurrency.maxAdvancedRuns`; + // we drive the bind helper directly with `accepted: false` to verify the + // unconditional reject path. + const probe: ServerLinkProbe = { + sent: [], + hello: { capabilities: [], helloEpoch: 4, sentAt: Date.now() }, + }; + const link = makeServerLink(probe, [P2P_WORKFLOW_CAPABILITY_V1]); + const policy = loadDaemonP2pStaticPolicy(link); + const compileResult = compileP2pWorkflowDraft(makeDraft(), policy); + if (!compileResult.ok) throw new Error('compile failed in daemon_busy test setup'); + const bindContext: P2pBindRuntimeContext = { + ...makeBindContext(probe, [P2P_WORKFLOW_CAPABILITY_V1]), + concurrencyAdmission: { accepted: false, reason: 'daemon_busy' }, + }; + const bindResult = bindP2pCompiledWorkflow(compileResult.workflow, bindContext); + expect(bindResult.ok).toBe(false); + if (bindResult.ok) return; + expect(bindResult.reason).toBe('daemon_busy'); + expect(bindResult.diagnostics.some((d) => d.code === 'daemon_busy')).toBe(true); + }); + + it('projection 200 ms throttle: non-terminal updates coalesce, terminal flushes immediately (task 10.5)', async () => { + // Audit:R3 hardening / task 10.5 — `pushState` debounces non-terminal + // run updates to at most one per 200 ms per run, but terminal statuses + // (completed / failed / timed_out / cancelled) MUST flush immediately. + const probe: ServerLinkProbe = { + sent: [], + hello: { capabilities: [], helloEpoch: 6, sentAt: Date.now() }, + }; + const link = makeServerLink(probe, [P2P_WORKFLOW_CAPABILITY_V1]); + const targets: P2pTarget[] = []; + const run = await startP2pRun({ + initiatorSession: BRAIN, + targets, + userText: 'throttle test', + fileContents: [], + serverLink: link, + rounds: 1, + hopTimeoutMs: 30_000, + }); + try { + // Initial pushState fires inside startP2pRun (non-terminal: 'queued'). + // Wait beyond debounce window so the first send actually lands. + await new Promise((r) => setTimeout(r, 250)); + const initialSendCount = probe.sent.filter((m) => m.type === 'p2p.run_save' || m.type === 'p2p.run_complete' || m.type === 'p2p.run_error').length; + expect(initialSendCount).toBeGreaterThanOrEqual(1); + + // Cancel the run — `cancelP2pRun` updates run status to 'cancelled' + // (terminal) and calls pushState. Terminal MUST flush immediately so + // the next `sent` count goes up before any debounce delay. + await cancelP2pRun(run.id, link); + const afterCancelCount = probe.sent.filter((m) => m.type === 'p2p.run_error' || m.type === 'p2p.run_complete').length; + expect(afterCancelCount).toBeGreaterThanOrEqual(1); + } finally { + await cancelP2pRun(run.id, link).catch(() => {}); + } + }); + + it('bind rejects when daemon advertises only base capability but workflow needs implementation (audit:H3 / R3 PR-β)', () => { + // Audit:R3 PR-β / V-6 — `validateCompiledWorkflowAgainstBindPolicy` runs + // AFTER capability check. Here we use a workflow whose derived required + // capabilities include only the base v1 capability (no implementation), + // but the node uses `permissionScope: 'implementation'`. The compile + // succeeds (deriveRequiredCapabilities adds IMPLEMENTATION when any node + // has that scope), then bind fails on missing capability. Tests both + // capability-string and policy-flag layers. + const probe: ServerLinkProbe = { + sent: [], + hello: { capabilities: [], helloEpoch: 5, sentAt: Date.now() }, + }; + const link = makeServerLink(probe, [P2P_WORKFLOW_CAPABILITY_V1]); + const policy = loadDaemonP2pStaticPolicy(link); + const draftWithImpl: P2pWorkflowDraft = { + schemaVersion: 1, + id: `draft-impl-${RUN_ID}`, + rootNodeId: 'n1', + nodes: [ + { + id: 'n1', + nodeKind: 'llm', + preset: 'implementation', + permissionScope: 'implementation', + artifacts: [], + }, + ], + edges: [], + }; + const compileResult = compileP2pWorkflowDraft(draftWithImpl, policy); + expect(compileResult.ok).toBe(true); + if (!compileResult.ok) return; + expect(compileResult.workflow.derivedRequiredCapabilities).toContain(P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1); + + const bindContext = makeBindContext(probe, [P2P_WORKFLOW_CAPABILITY_V1]); + const bindResult = bindP2pCompiledWorkflow(compileResult.workflow, bindContext); + expect(bindResult.ok).toBe(false); + if (bindResult.ok) return; + expect(bindResult.reason).toBe('missing_required_capability'); + // Diagnostic comes from `getMissingP2pWorkflowCapabilities` (capability + // string layer fires first; policy-allowlist layer is the second wall). + expect(bindResult.diagnostics.some((d) => + d.code === 'missing_required_capability' + && d.summary?.includes(P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1), + )).toBe(true); + }); +}); diff --git a/test/shared/p2p-workflow-artifacts.test.ts b/test/shared/p2p-workflow-artifacts.test.ts new file mode 100644 index 000000000..158c91197 --- /dev/null +++ b/test/shared/p2p-workflow-artifacts.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it } from 'vitest'; +import { + areP2pArtifactBaselinesEqual, + hashP2pArtifactBaseline, + validateP2pArtifactBaseline, + validateP2pArtifactRelativePath, +} from '../../shared/p2p-workflow-artifacts.js'; + +describe('p2p workflow artifact helpers', () => { + it('accepts lexical relative artifact paths', () => { + expect(validateP2pArtifactRelativePath('artifacts/result.json').ok).toBe(true); + expect(validateP2pArtifactRelativePath('openspec/changes/change-1/specs/demo/spec.md').ok).toBe(true); + }); + + it('rejects unsafe artifact paths lexically', () => { + const unsafePaths = [ + '', + '/tmp/file', + '../secret', + 'dir/../secret', + 'dir//file', + 'dir/.', + 'dir\0file', + '~/secret', + 'C:/Users/name/file', + '//server/share/file', + 'dir\\file', + ]; + + for (const path of unsafePaths) { + const result = validateP2pArtifactRelativePath(path); + expect(result.ok, path).toBe(false); + expect(result.diagnostics[0]?.code).toBe('unsafe_artifact_path'); + } + }); + + it('hashes and compares per-file sha256 metadata while ignoring capturedAt', () => { + const left = { + files: [ + { path: 'b.txt', sha256: 'b'.repeat(64), sizeBytes: 2, fileType: 'file' as const, metadata: { capturedAt: '2026-01-01T00:00:00.000Z', mode: '100644' } }, + { path: 'a.txt', sha256: 'a'.repeat(64), sizeBytes: 1, fileType: 'file' as const, metadata: { sizeBytes: 1 } }, + ], + }; + const right = { + files: [ + { path: 'a.txt', sha256: 'a'.repeat(64), sizeBytes: 1, fileType: 'file' as const, metadata: { sizeBytes: 1 } }, + { path: 'b.txt', sha256: 'b'.repeat(64), sizeBytes: 2, fileType: 'file' as const, metadata: { capturedAt: '2026-02-01T00:00:00.000Z', mode: '100644' } }, + ], + }; + + expect(areP2pArtifactBaselinesEqual(left, right)).toBe(true); + expect(hashP2pArtifactBaseline(left)).toBe(hashP2pArtifactBaseline(right)); + }); + + it('detects sha256 and stable metadata differences', () => { + const baseline = { + files: [{ path: 'a.txt', sha256: 'a'.repeat(64), sizeBytes: 1, fileType: 'file' as const, metadata: { capturedAt: 'now' } }], + }; + + expect(areP2pArtifactBaselinesEqual(baseline, { + files: [{ path: 'a.txt', sha256: 'b'.repeat(64), sizeBytes: 1, fileType: 'file' as const, metadata: { capturedAt: 'now' } }], + })).toBe(false); + expect(areP2pArtifactBaselinesEqual(baseline, { + files: [{ path: 'a.txt', sha256: 'a'.repeat(64), sizeBytes: 2, fileType: 'file' as const, metadata: { capturedAt: 'now' } }], + })).toBe(false); + }); + + it('validates baseline path, size, type, hash, and resource caps', () => { + const valid = validateP2pArtifactBaseline({ + files: [ + { path: 'artifacts/result.json', sha256: 'a'.repeat(64), sizeBytes: 10, fileType: 'file' }, + ], + }); + expect(valid.ok).toBe(true); + + const invalid = validateP2pArtifactBaseline({ + files: [ + { path: '../secret', sha256: 'not-a-hash', sizeBytes: -1, fileType: 'socket' }, + ], + }); + + expect(invalid.ok).toBe(false); + expect(invalid.diagnostics.map((diagnostic) => diagnostic.code)).toEqual(expect.arrayContaining([ + 'unsafe_artifact_path', + 'artifact_baseline_mismatch', + ])); + }); + + it('rejects baseline resource cap violations', () => { + const result = validateP2pArtifactBaseline({ + files: [ + { path: 'artifacts/too-large.bin', sha256: 'a'.repeat(64), sizeBytes: 9 * 1024 * 1024, fileType: 'file' }, + ], + }); + + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('artifact_baseline_too_large'); + }); +}); diff --git a/test/shared/p2p-workflow-compiler.test.ts b/test/shared/p2p-workflow-compiler.test.ts new file mode 100644 index 000000000..d0b2605a1 --- /dev/null +++ b/test/shared/p2p-workflow-compiler.test.ts @@ -0,0 +1,199 @@ +import { describe, expect, it } from 'vitest'; +import { compileP2pWorkflowDraft } from '../../shared/p2p-workflow-compiler.js'; +import { buildDefaultP2pStaticPolicy, hashP2pStaticPolicy } from '../../shared/p2p-workflow-policy.js'; +import type { P2pWorkflowDraft } from '../../shared/p2p-workflow-types.js'; + +const policy = buildDefaultP2pStaticPolicy({ allowOpenSpecArtifacts: true, allowImplementationPermission: true }); + +describe('p2p workflow compiler', () => { + it('compiles deterministic workflow contracts', () => { + const draft: P2pWorkflowDraft = { + schemaVersion: 1, + id: 'wf', + rootNodeId: 'start', + nodes: [ + { id: 'start', nodeKind: 'llm', preset: 'audit', permissionScope: 'analysis_only' }, + { + id: 'impl', + nodeKind: 'llm', + preset: 'implementation', + permissionScope: 'implementation', + artifacts: [{ convention: 'openspec_convention', paths: ['openspec/changes/demo'], symlinkPolicy: 'reject_all' }], + }, + ], + edges: [{ id: 'edge_start_impl', fromNodeId: 'start', toNodeId: 'impl', edgeKind: 'default' }], + variables: [{ name: 'topic', value: 'demo' }], + }; + + const first = compileP2pWorkflowDraft(draft, policy); + const second = compileP2pWorkflowDraft(draft, policy); + + expect(first.ok).toBe(true); + expect(second.ok).toBe(true); + if (first.ok && second.ok) { + expect(first.workflow).toEqual(second.workflow); + expect(first.workflow.staticPolicyHash).toBe(hashP2pStaticPolicy(policy)); + expect(first.workflow.derivedRequiredCapabilities).toEqual([ + 'p2p.workflow.implementation.v1', + 'p2p.workflow.openspec-artifacts.v1', + 'p2p.workflow.v1', + ]); + } + }); + + it('rejects duplicate nodes, unreachable nodes, and multiple default edges', () => { + const result = compileP2pWorkflowDraft({ + schemaVersion: 1, + id: 'bad', + rootNodeId: 'a', + nodes: [ + { id: 'a', nodeKind: 'llm', preset: 'audit' }, + { id: 'b', nodeKind: 'llm', preset: 'review' }, + { id: 'c', nodeKind: 'llm', preset: 'plan' }, + { id: 'c', nodeKind: 'llm', preset: 'plan' }, + ], + edges: [ + { id: 'ab', fromNodeId: 'a', toNodeId: 'b', edgeKind: 'default' }, + { id: 'ac', fromNodeId: 'a', toNodeId: 'c', edgeKind: 'default' }, + ], + }, policy); + + expect(result.ok).toBe(false); + expect(result.diagnostics.map((diagnostic) => diagnostic.code)).toContain('invalid_workflow_graph'); + }); + + it('requires edge-scoped loop budgets for backward edges', () => { + const base: P2pWorkflowDraft = { + schemaVersion: 1, + id: 'loop', + rootNodeId: 'a', + nodes: [ + { id: 'a', nodeKind: 'llm', preset: 'audit' }, + { id: 'b', nodeKind: 'llm', preset: 'implementation_audit' }, + ], + edges: [ + { id: 'ab', fromNodeId: 'a', toNodeId: 'b', edgeKind: 'default' }, + { id: 'ba', fromNodeId: 'b', toNodeId: 'a', edgeKind: 'conditional', condition: { kind: 'verdict_marker_equals', equals: 'REWORK' } }, + ], + }; + expect(compileP2pWorkflowDraft(base, policy).ok).toBe(false); + expect(compileP2pWorkflowDraft({ ...base, loopBudgets: { ba: 2 } }, policy).ok).toBe(true); + }); + + // R3 PR-γ (W4) — multiple conditional outgoing edges from the same node + // are rejected at compile time. Both the legacy adapter (`jumpRule` + // single-slot) and the new envelope_compiled executor (first-match wins) + // would otherwise silently misroute. Author-time failure beats run-time + // surprise. + it('rejects more than one conditional outgoing edge per node (PR-γ W4)', () => { + const draft: P2pWorkflowDraft = { + schemaVersion: 1, + id: 'multi-cond', + rootNodeId: 'a', + nodes: [ + { id: 'a', nodeKind: 'llm', preset: 'audit', routingAuthority: { kind: 'audit_verdict_marker', allowedMarkers: ['PASS', 'REWORK'] } }, + { id: 'b', nodeKind: 'llm', preset: 'audit' }, + { id: 'c', nodeKind: 'llm', preset: 'audit' }, + ], + edges: [ + { id: 'ab', fromNodeId: 'a', toNodeId: 'b', edgeKind: 'conditional', condition: { kind: 'verdict_marker_equals', equals: 'PASS' } }, + { id: 'ac', fromNodeId: 'a', toNodeId: 'c', edgeKind: 'conditional', condition: { kind: 'verdict_marker_equals', equals: 'REWORK' } }, + ], + loopBudgets: { ab: 1, ac: 1 }, + }; + const result = compileP2pWorkflowDraft(draft, policy); + expect(result.ok).toBe(false); + if (!result.ok) { + const codes = result.diagnostics.map((d) => d.code); + expect(codes).toContain('invalid_workflow_graph'); + const conditionalDiagnostic = result.diagnostics.find((d) => /conditional outgoing/i.test(d.summary ?? '')); + expect(conditionalDiagnostic).toBeDefined(); + expect(conditionalDiagnostic?.fieldPath).toBe('nodes.a'); + } + }); + + it('accepts exactly one conditional outgoing edge per node (PR-γ W4 baseline)', () => { + const draft: P2pWorkflowDraft = { + schemaVersion: 1, + id: 'single-cond', + rootNodeId: 'a', + nodes: [ + { id: 'a', nodeKind: 'llm', preset: 'audit', routingAuthority: { kind: 'audit_verdict_marker', allowedMarkers: ['PASS'] } }, + { id: 'b', nodeKind: 'llm', preset: 'audit' }, + ], + edges: [ + { id: 'ab', fromNodeId: 'a', toNodeId: 'b', edgeKind: 'conditional', condition: { kind: 'verdict_marker_equals', equals: 'PASS' } }, + ], + loopBudgets: { ab: 2 }, + }; + expect(compileP2pWorkflowDraft(draft, policy).ok).toBe(true); + }); + + // R3 v1b follow-up — logic node contract. Logic nodes MUST use + // `preset: 'custom'` + `permissionScope: 'analysis_only'` per the + // existing `validateNodeCombination` rule. + it('rejects logic node missing a `logic` contract', () => { + const draft: P2pWorkflowDraft = { + schemaVersion: 1, + id: 'logic-missing', + rootNodeId: 'l', + nodes: [{ id: 'l', nodeKind: 'logic', preset: 'custom', permissionScope: 'analysis_only' }], + edges: [], + }; + const result = compileP2pWorkflowDraft(draft, policy); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.diagnostics).toContainEqual(expect.objectContaining({ + code: 'invalid_workflow_graph', + fieldPath: 'nodes.l.logic', + })); + } + }); + + it('rejects non-logic node carrying a `logic` contract (only nodeKind: logic may declare one)', () => { + const draft: P2pWorkflowDraft = { + schemaVersion: 1, + id: 'logic-on-llm', + rootNodeId: 'a', + nodes: [{ + id: 'a', + nodeKind: 'llm', + preset: 'discuss', + permissionScope: 'analysis_only', + logic: { rules: [], default: 'fallback' }, + }], + edges: [], + }; + const result = compileP2pWorkflowDraft(draft, policy); + expect(result.ok).toBe(false); + }); + + it('compiles a logic node with a valid contract and propagates it through to compiled.nodes', () => { + const draft: P2pWorkflowDraft = { + schemaVersion: 1, + id: 'logic-ok', + rootNodeId: 'l', + nodes: [{ + id: 'l', + nodeKind: 'logic', + preset: 'custom', + permissionScope: 'analysis_only', + routingAuthority: { kind: 'logic_marker', allowedMarkers: ['go', 'rework'] }, + logic: { + rules: [ + { if: { kind: 'variable_equals', name: 'verdict', equals: 'pass' }, emit: 'go' }, + ], + default: 'rework', + }, + }], + edges: [], + }; + const result = compileP2pWorkflowDraft(draft, policy); + expect(result.ok).toBe(true); + if (result.ok) { + const node = result.workflow.nodes.find((n) => n.id === 'l'); + expect(node?.logic?.default).toBe('rework'); + expect(node?.logic?.rules).toHaveLength(1); + } + }); +}); diff --git a/test/shared/p2p-workflow-logic-evaluator.test.ts b/test/shared/p2p-workflow-logic-evaluator.test.ts new file mode 100644 index 000000000..64b452591 --- /dev/null +++ b/test/shared/p2p-workflow-logic-evaluator.test.ts @@ -0,0 +1,165 @@ +/** + * R3 v1b follow-up — Logic node evaluator unit tests. + * + * Exercises {@link evaluateP2pLogic} (deterministic rule selection + + * default fallback) and {@link validateP2pLogicContract} (compile-time + * shape enforcement). The compiler integration test in + * `test/shared/p2p-workflow-compiler.test.ts` covers the wiring; this + * file pins the evaluator semantics. + */ +import { describe, expect, it } from 'vitest'; +import { + P2P_LOGIC_MAX_RULES, + P2P_LOGIC_MAX_MARKER_BYTES, + evaluateP2pLogic, + validateP2pLogicContract, +} from '../../shared/p2p-workflow-logic-evaluator.js'; +import type { P2pLogicNodeContract } from '../../shared/p2p-workflow-types.js'; + +describe('evaluateP2pLogic', () => { + it('returns default when there are no rules', () => { + const result = evaluateP2pLogic({ rules: [], default: 'fallback' }, {}); + expect(result.marker).toBe('fallback'); + expect(result.matchedRuleIndex).toBe(-1); + }); + + it('returns the first always-match rule (if: undefined) before later rules', () => { + const contract: P2pLogicNodeContract = { + rules: [ + { emit: 'first' }, + { if: { kind: 'variable_equals', name: 'x', equals: '1' }, emit: 'second' }, + ], + default: 'never', + }; + const result = evaluateP2pLogic(contract, { x: '1' }); + expect(result.marker).toBe('first'); + expect(result.matchedRuleIndex).toBe(0); + }); + + it('matches variable_equals against stringified value (number → string coercion)', () => { + const contract: P2pLogicNodeContract = { + rules: [{ if: { kind: 'variable_equals', name: 'count', equals: '3' }, emit: 'three' }], + default: 'other', + }; + expect(evaluateP2pLogic(contract, { count: 3 }).marker).toBe('three'); + expect(evaluateP2pLogic(contract, { count: 4 }).marker).toBe('other'); + }); + + it('matches variable_equals on string array via JSON encoding (R3 v2 PR-ζ M5)', () => { + // Updated for PR-ζ ζ-13: array stringification switched from + // ambiguous `value.join(',')` to canonical `JSON.stringify(value)` + // so `['a','b']` and `['a,b']` no longer collide. + const contract: P2pLogicNodeContract = { + rules: [{ if: { kind: 'variable_equals', name: 'tags', equals: '["a","b"]' }, emit: 'pair' }], + default: 'no', + }; + expect(evaluateP2pLogic(contract, { tags: ['a', 'b'] }).marker).toBe('pair'); + // Inverse — the comma-joined ambiguous form NO LONGER matches. + expect(evaluateP2pLogic({ ...contract, rules: [{ if: { kind: 'variable_equals', name: 'tags', equals: 'a,b' }, emit: 'pair' }] }, { tags: ['a', 'b'] }).marker).toBe('no'); + // And `['a,b']` no longer collides with `['a','b']`. + expect(evaluateP2pLogic(contract, { tags: ['a,b'] }).marker).toBe('no'); + }); + + it('variable_present returns true for empty string but false for undefined / null', () => { + const contract: P2pLogicNodeContract = { + rules: [{ if: { kind: 'variable_present', name: 'maybe' }, emit: 'present' }], + default: 'absent', + }; + expect(evaluateP2pLogic(contract, { maybe: '' }).marker).toBe('present'); + expect(evaluateP2pLogic(contract, { maybe: undefined }).marker).toBe('absent'); + expect(evaluateP2pLogic(contract, {}).marker).toBe('absent'); + }); + + it('variable_truthy follows the documented JS-truthy semantics', () => { + const contract: P2pLogicNodeContract = { + rules: [{ if: { kind: 'variable_truthy', name: 'v' }, emit: 'yes' }], + default: 'no', + }; + expect(evaluateP2pLogic(contract, { v: 'hello' }).marker).toBe('yes'); + expect(evaluateP2pLogic(contract, { v: '' }).marker).toBe('no'); + expect(evaluateP2pLogic(contract, { v: 1 }).marker).toBe('yes'); + expect(evaluateP2pLogic(contract, { v: 0 }).marker).toBe('no'); + expect(evaluateP2pLogic(contract, { v: true }).marker).toBe('yes'); + expect(evaluateP2pLogic(contract, { v: false }).marker).toBe('no'); + expect(evaluateP2pLogic(contract, { v: ['x'] }).marker).toBe('yes'); + expect(evaluateP2pLogic(contract, { v: [] }).marker).toBe('no'); + }); + + it('falls through to default when no rule matches', () => { + const contract: P2pLogicNodeContract = { + rules: [ + { if: { kind: 'variable_equals', name: 'x', equals: '1' }, emit: 'one' }, + { if: { kind: 'variable_equals', name: 'x', equals: '2' }, emit: 'two' }, + ], + default: 'other', + }; + expect(evaluateP2pLogic(contract, { x: '3' }).marker).toBe('other'); + }); +}); + +describe('validateP2pLogicContract', () => { + it('accepts a minimal valid contract', () => { + expect(validateP2pLogicContract({ rules: [], default: 'fallback' })).toEqual([]); + }); + + it('rejects non-object input', () => { + expect(validateP2pLogicContract('not an object')).toContainEqual( + expect.objectContaining({ fieldPath: 'logic' }), + ); + }); + + it('rejects non-array rules', () => { + expect(validateP2pLogicContract({ rules: 'oops', default: 'x' })).toContainEqual( + expect.objectContaining({ fieldPath: 'logic.rules' }), + ); + }); + + it('rejects > P2P_LOGIC_MAX_RULES rules', () => { + const rules = Array.from({ length: P2P_LOGIC_MAX_RULES + 1 }, (_, i) => ({ emit: `marker-${i}` })); + const diagnostics = validateP2pLogicContract({ rules, default: 'd' }); + expect(diagnostics).toContainEqual(expect.objectContaining({ fieldPath: 'logic.rules' })); + }); + + it('rejects empty default marker', () => { + expect(validateP2pLogicContract({ rules: [], default: '' })).toContainEqual( + expect.objectContaining({ fieldPath: 'logic.default' }), + ); + }); + + it('rejects multi-byte default marker (visible-ASCII only)', () => { + expect(validateP2pLogicContract({ rules: [], default: '中文' })).toContainEqual( + expect.objectContaining({ fieldPath: 'logic.default' }), + ); + }); + + it('rejects oversize default marker', () => { + const huge = 'x'.repeat(P2P_LOGIC_MAX_MARKER_BYTES + 1); + expect(validateP2pLogicContract({ rules: [], default: huge })).toContainEqual( + expect.objectContaining({ fieldPath: 'logic.default' }), + ); + }); + + it('rejects rule with non-identifier variable name', () => { + const diagnostics = validateP2pLogicContract({ + rules: [{ if: { kind: 'variable_equals', name: '1bad', equals: 'a' }, emit: 'x' }], + default: 'd', + }); + expect(diagnostics).toContainEqual(expect.objectContaining({ fieldPath: 'logic.rules[0].if.name' })); + }); + + it('rejects unsupported condition kind', () => { + const diagnostics = validateP2pLogicContract({ + rules: [{ if: { kind: 'eval', name: 'x' }, emit: 'x' }], + default: 'd', + }); + expect(diagnostics).toContainEqual(expect.objectContaining({ fieldPath: 'logic.rules[0].if.kind' })); + }); + + it('rejects rule with empty emit', () => { + const diagnostics = validateP2pLogicContract({ + rules: [{ emit: '' }], + default: 'd', + }); + expect(diagnostics).toContainEqual(expect.objectContaining({ fieldPath: 'logic.rules[0].emit' })); + }); +}); diff --git a/test/shared/p2p-workflow-materialize.test.ts b/test/shared/p2p-workflow-materialize.test.ts new file mode 100644 index 000000000..d0b63cd34 --- /dev/null +++ b/test/shared/p2p-workflow-materialize.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from 'vitest'; +import { materializeOldAdvancedConfigToWorkflowDraft } from '../../shared/p2p-workflow-materialize.js'; + +describe('p2p workflow old advanced materialization', () => { + it('materializes the built-in openspec preset deterministically', () => { + const first = materializeOldAdvancedConfigToWorkflowDraft({ advancedPresetKey: 'openspec' }); + const second = materializeOldAdvancedConfigToWorkflowDraft({ advancedPresetKey: 'openspec' }); + + expect(first).toEqual(second); + expect(first.schemaVersion).toBe(1); + expect(first.nodes.map((node) => node.preset)).toEqual([ + 'discuss', + 'openspec_propose', + 'proposal_audit', + 'implementation', + 'implementation_audit', + ]); + expect(first.loopBudgets).toEqual({ edge_implementation_audit_to_implementation_rework: 2 }); + }); + + it('materializes custom old rounds into a visible draft chain', () => { + const draft = materializeOldAdvancedConfigToWorkflowDraft({ + advancedRounds: [ + { + id: 'a', + title: 'A', + preset: 'custom', + executionMode: 'single_main', + permissionScope: 'analysis_only', + }, + { + id: 'b', + title: 'B', + preset: 'custom', + executionMode: 'multi_dispatch', + permissionScope: 'artifact_generation', + artifactOutputs: ['openspec/changes/demo/proposal.md'], + }, + ], + }); + + expect(draft.rootNodeId).toBe('a'); + expect(draft.edges).toEqual([{ id: 'edge_a_to_b', fromNodeId: 'a', toNodeId: 'b', edgeKind: 'default' }]); + expect(draft.nodes[1]?.artifacts?.[0]?.paths).toEqual(['openspec/changes/demo/proposal.md']); + }); +}); diff --git a/test/shared/p2p-workflow-prompt.test.ts b/test/shared/p2p-workflow-prompt.test.ts new file mode 100644 index 000000000..d7e73670e --- /dev/null +++ b/test/shared/p2p-workflow-prompt.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it } from 'vitest'; +import { + P2P_PROMPT_SECTION_ORDER, + assembleP2pPromptSections, + projectP2pPromptForTransport, + type P2pPromptSection, +} from '../../shared/p2p-workflow-prompt.js'; + +describe('p2p workflow prompt assembly', () => { + it('orders sections deterministically and keeps prompt append additive', () => { + const sections: P2pPromptSection[] = [ + { kind: 'prompt_append', text: 'user extra ${literal}' }, + { kind: 'system_runtime_contract', text: 'runtime' }, + { kind: 'final_runtime_guardrail', text: 'guardrail' }, + ]; + const assembled = assembleP2pPromptSections(sections); + expect(assembled.ok).toBe(true); + expect(assembled.sections.map((section) => section.kind)).toEqual([ + 'system_runtime_contract', + 'prompt_append', + 'final_runtime_guardrail', + ]); + expect(P2P_PROMPT_SECTION_ORDER.at(-1)).toBe('final_runtime_guardrail'); + }); + + it('rejects forbidden control characters in prompt append', () => { + const assembled = assembleP2pPromptSections([{ kind: 'prompt_append', text: 'bad\0text' }]); + expect(assembled.ok).toBe(false); + expect(assembled.diagnostics[0]?.code).toBe('invalid_prompt_append'); + }); + + it('projects chat sections into real roles', () => { + const projection = projectP2pPromptForTransport([ + { kind: 'system_runtime_contract', text: 'system' }, + { kind: 'structured_context_references', text: 'context' }, + { kind: 'previous_evidence_summary', text: 'summary' }, + { kind: 'final_runtime_guardrail', text: 'guardrail' }, + ], 'chat'); + expect(projection.kind).toBe('chat'); + if (projection.kind === 'chat') { + expect(projection.messages.map((message) => message.role)).toEqual(['system', 'user', 'assistant', 'system']); + expect(projection.messages.at(-1)?.content).toBe('guardrail'); + } + }); + + it('uses collision-safe plaintext fences', () => { + const projection = projectP2pPromptForTransport([ + { kind: 'system_runtime_contract', text: 'contains <<>>' }, + { kind: 'final_runtime_guardrail', text: 'last' }, + ], 'plaintext'); + expect(projection.kind).toBe('plaintext'); + if (projection.kind === 'plaintext') { + expect(projection.text).toContain('<<>> system_runtime_contract'); + expect(projection.text).toContain('<<>> final_runtime_guardrail'); + } + }); +}); diff --git a/test/shared/p2p-workflow-protocol.test.ts b/test/shared/p2p-workflow-protocol.test.ts new file mode 100644 index 000000000..ef9021c0e --- /dev/null +++ b/test/shared/p2p-workflow-protocol.test.ts @@ -0,0 +1,137 @@ +import { describe, expect, it } from 'vitest'; +import { + P2P_BRIDGE_ERROR_CODES, + P2P_WORKFLOW_CAPABILITY_V1, + P2P_WORKFLOW_SCHEMA_VERSION, +} from '../../shared/p2p-workflow-constants.js'; +import { + P2P_WORKFLOW_DIAGNOSTIC_CODES, + P2P_WORKFLOW_DIAGNOSTIC_PHASE_MATRIX, + assertP2pDiagnosticMatrixComplete, + makeP2pWorkflowDiagnostic, +} from '../../shared/p2p-workflow-diagnostics.js'; +import { + P2P_WORKFLOW_MESSAGE_REGISTRY, + P2P_WORKFLOW_MSG, + isP2pWorkflowRequestId, + parseP2pWorkflowMessageType, + requiresP2pWorkflowRequestId, +} from '../../shared/p2p-workflow-messages.js'; +import { P2P_CONFIG_MSG } from '../../shared/p2p-config-events.js'; + +describe('p2p workflow protocol', () => { + it('exposes stable schema and capability constants', () => { + expect(P2P_WORKFLOW_SCHEMA_VERSION).toBe(1); + expect(P2P_WORKFLOW_CAPABILITY_V1).toBe('p2p.workflow.v1'); + expect(P2P_BRIDGE_ERROR_CODES.PENDING_LIMIT_EXCEEDED).toBe('p2p_pending_limit_exceeded'); + }); + + it('keeps diagnostic enum and phase matrix in sync', () => { + expect(() => assertP2pDiagnosticMatrixComplete()).not.toThrow(); + for (const code of P2P_WORKFLOW_DIAGNOSTIC_CODES) { + expect(P2P_WORKFLOW_DIAGNOSTIC_PHASE_MATRIX[code].length).toBeGreaterThan(0); + expect(makeP2pWorkflowDiagnostic(code).messageKey).toBe(`p2p.workflow.diagnostics.${code}`); + } + }); + + it('parses known p2p messages and drops unknown p2p messages', () => { + expect(parseP2pWorkflowMessageType(P2P_WORKFLOW_MSG.STATUS)).toMatchObject({ + kind: 'known', + descriptor: { + allowedIngress: ['browser'], + requestScoped: true, + broadcastAllowed: false, + expectedResponseType: P2P_WORKFLOW_MSG.STATUS_RESPONSE, + serverHandling: 'forward_to_daemon', + }, + }); + expect(parseP2pWorkflowMessageType(P2P_WORKFLOW_MSG.RUN_COMPLETE)).toMatchObject({ + kind: 'known', + descriptor: { + allowedIngress: ['daemon'], + serverHandling: 'persist_run_and_broadcast', + browserDelivery: 'broadcast', + }, + }); + expect(parseP2pWorkflowMessageType('p2p.future_message')).toEqual({ + kind: 'drop', + diagnosticCode: 'unknown_p2p_message', + reason: 'unknown_p2p_message', + }); + }); + + it('requires bounded ascii request ids for request-scoped messages', () => { + expect(requiresP2pWorkflowRequestId(P2P_WORKFLOW_MSG.STATUS)).toBe(true); + expect(requiresP2pWorkflowRequestId(P2P_WORKFLOW_MSG.RUN_UPDATE)).toBe(false); + expect(isP2pWorkflowRequestId('req_123')).toBe(true); + expect(isP2pWorkflowRequestId('')).toBe(false); + expect(isP2pWorkflowRequestId('é')).toBe(false); + expect(isP2pWorkflowRequestId('x'.repeat(129))).toBe(false); + }); + + it('protocol registry includes p2p.config.save and save_response', () => { + // Cross-protocol routing: p2p.config.* must be registered alongside the + // workflow registry so the bridge default-deny excludes them and the + // generic forward_to_daemon / singlecast_response handlers route them. + expect(P2P_WORKFLOW_MESSAGE_REGISTRY[P2P_CONFIG_MSG.SAVE]).toBeDefined(); + expect(P2P_WORKFLOW_MESSAGE_REGISTRY[P2P_CONFIG_MSG.SAVE_RESPONSE]).toBeDefined(); + expect(parseP2pWorkflowMessageType(P2P_CONFIG_MSG.SAVE)).toMatchObject({ + kind: 'known', + descriptor: { + type: P2P_CONFIG_MSG.SAVE, + category: 'config', + allowedIngress: ['browser'], + serverHandling: 'forward_to_daemon', + browserDelivery: 'none', + expectedResponseType: P2P_CONFIG_MSG.SAVE_RESPONSE, + requestScoped: true, + response: false, + broadcastAllowed: false, + }, + }); + expect(parseP2pWorkflowMessageType(P2P_CONFIG_MSG.SAVE_RESPONSE)).toMatchObject({ + kind: 'known', + descriptor: { + type: P2P_CONFIG_MSG.SAVE_RESPONSE, + category: 'config', + allowedIngress: ['daemon'], + serverHandling: 'singlecast_response', + browserDelivery: 'singlecast', + responseTo: P2P_CONFIG_MSG.SAVE, + requestScoped: true, + response: true, + broadcastAllowed: false, + }, + }); + }); + + it('p2p.config descriptors carry category "config" and workflow descriptors carry category "workflow"', () => { + // Category is a load-bearing field — workflow consumers may filter by it + // and the registry must preserve the "category for every descriptor" + // invariant so PR-G/PR-K reverse-regression can rely on it. + for (const descriptor of Object.values(P2P_WORKFLOW_MESSAGE_REGISTRY)) { + expect(descriptor.category).toBeDefined(); + expect(['workflow', 'config']).toContain(descriptor.category); + } + expect(P2P_WORKFLOW_MESSAGE_REGISTRY[P2P_CONFIG_MSG.SAVE].category).toBe('config'); + expect(P2P_WORKFLOW_MESSAGE_REGISTRY[P2P_CONFIG_MSG.SAVE_RESPONSE].category).toBe('config'); + expect(P2P_WORKFLOW_MESSAGE_REGISTRY[P2P_WORKFLOW_MSG.STATUS].category).toBe('workflow'); + expect(P2P_WORKFLOW_MESSAGE_REGISTRY[P2P_WORKFLOW_MSG.RUN_UPDATE].category).toBe('workflow'); + expect(P2P_WORKFLOW_MESSAGE_REGISTRY[P2P_WORKFLOW_MSG.DAEMON_HELLO].category).toBe('workflow'); + }); + + it('parseP2pWorkflowMessageType returns "known" for p2p.config.save', () => { + // After PR-E registration, p2p.config.save must no longer fall through to + // the unknown_p2p_message drop branch but unrelated p2p.* must still drop. + const knownConfig = parseP2pWorkflowMessageType(P2P_CONFIG_MSG.SAVE); + expect(knownConfig.kind).toBe('known'); + expect(requiresP2pWorkflowRequestId(P2P_CONFIG_MSG.SAVE)).toBe(true); + expect(requiresP2pWorkflowRequestId(P2P_CONFIG_MSG.SAVE_RESPONSE)).toBe(true); + // Default-deny still works for any p2p.* not in the registry. + expect(parseP2pWorkflowMessageType('p2p.config.future_secret')).toEqual({ + kind: 'drop', + diagnosticCode: 'unknown_p2p_message', + reason: 'unknown_p2p_message', + }); + }); +}); diff --git a/test/shared/p2p-workflow-redaction.test.ts b/test/shared/p2p-workflow-redaction.test.ts new file mode 100644 index 000000000..ee0110989 --- /dev/null +++ b/test/shared/p2p-workflow-redaction.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from 'vitest'; +import { + redactP2pWorkflowObjectForProjection, + redactP2pWorkflowTextForProjection, +} from '../../shared/p2p-workflow-redaction.js'; + +describe('p2p workflow redaction facade', () => { + it('applies raw cap, redaction, then projection cap', () => { + const redacted = redactP2pWorkflowTextForProjection( + `Bearer ${'a'.repeat(40)} tail`, + { rawCaptureMaxBytes: 100, projectionSnippetMaxBytes: 80 }, + ); + expect(redacted).toContain('[REDACTED:bearer]'); + expect(redacted).not.toContain('Bearer'); + expect(new TextEncoder().encode(redacted).byteLength).toBeLessThanOrEqual(80); + }); + + it('redacts sensitive object keys using shared logging redaction', () => { + const redacted = redactP2pWorkflowObjectForProjection({ + keep: 'value', + access_token: 'secret', + nested: { api_key: 'secret' }, + }); + expect(redacted).toEqual({ + keep: 'value', + access_token: '[REDACTED]', + nested: { api_key: '[REDACTED]' }, + }); + }); +}); diff --git a/test/shared/p2p-workflow-script.test.ts b/test/shared/p2p-workflow-script.test.ts new file mode 100644 index 000000000..84948e4ee --- /dev/null +++ b/test/shared/p2p-workflow-script.test.ts @@ -0,0 +1,142 @@ +import { describe, expect, it } from 'vitest'; +import { + DEFAULT_P2P_SCRIPT_CAPS, + parseP2pScriptMachineOutput, + validateP2pScriptContract, +} from '../../shared/p2p-workflow-script.js'; + +describe('p2p workflow script helpers', () => { + it('defaults script contracts to argv command kind and caps', () => { + const result = validateP2pScriptContract({ argv: ['node', 'script.mjs'] }); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.contract.commandKind).toBe('argv'); + expect(result.contract.argv).toEqual(['node', 'script.mjs']); + expect(result.contract.caps).toEqual(DEFAULT_P2P_SCRIPT_CAPS); + } + }); + + it('rejects invalid script contracts', () => { + expect(validateP2pScriptContract({ commandKind: 'shell', argv: ['echo'] }).diagnostics[0]?.code).toBe('invalid_script_contract'); + expect(validateP2pScriptContract({ commandKind: 'argv', argv: [] }).diagnostics[0]?.code).toBe('invalid_script_contract'); + expect(validateP2pScriptContract({ commandKind: 'argv', argv: [''] }).diagnostics[0]?.code).toBe('invalid_script_contract'); + expect(validateP2pScriptContract({ commandKind: 'argv', argv: ['echo'], caps: { stdoutBytes: -1 } }).diagnostics[0]?.code).toBe('invalid_script_contract'); + }); + + it('parses structured NDJSON machine output frames', () => { + const result = parseP2pScriptMachineOutput([ + JSON.stringify({ + kind: 'p2p_script_machine_output_v1', + routingKey: 'accepted', + variables: { answer: 42, flags: ['a', 'b'] }, + artifacts: [{ path: 'artifacts/result.json', sha256: 'a'.repeat(64) }], + }), + JSON.stringify({ kind: 'p2p_script_machine_output_v1', displaySummary: 'done' }), + ].join('\n')); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.frames).toHaveLength(2); + expect(result.finalFrame.displaySummary).toBe('done'); + expect(result.frames[0]?.routingKey).toBe('accepted'); + expect(result.frames[0]?.variables).toEqual({ answer: 42, flags: ['a', 'b'] }); + expect(result.frames[0]?.artifacts?.[0]?.path).toBe('artifacts/result.json'); + } + }); + + it('defaults to lenient last-valid machine output parsing', () => { + const result = parseP2pScriptMachineOutput([ + '{bad json', + JSON.stringify({ kind: 'other', routingKey: 'ignored' }), + JSON.stringify({ kind: 'p2p_script_machine_output_v1', routingKey: 'first' }), + JSON.stringify({ kind: 'p2p_script_machine_output_v1', routingKey: 'final' }), + ].join('\n')); + + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.frames.map((frame) => frame.routingKey)).toEqual(['first', 'final']); + expect(result.finalFrame.routingKey).toBe('final'); + expect(result.diagnostics).toEqual([ + expect.objectContaining({ code: 'script_machine_output_invalid', severity: 'warning' }), + expect.objectContaining({ code: 'script_machine_output_invalid', severity: 'warning' }), + ]); + } + }); + + it('preserves strict machine output parsing when requested', () => { + const result = parseP2pScriptMachineOutput([ + JSON.stringify({ kind: 'p2p_script_machine_output_v1', routingKey: 'first' }), + '{bad json', + ].join('\n'), { mode: 'strict' }); + + expect(result.ok).toBe(false); + expect(result.diagnostics[0]).toEqual(expect.objectContaining({ + code: 'script_machine_output_invalid', + severity: 'error', + })); + }); + + it('enforces total and per-frame machine output byte caps', () => { + expect(parseP2pScriptMachineOutput( + JSON.stringify({ kind: 'p2p_script_machine_output_v1', displaySummary: 'x'.repeat(40) }), + { maxFrameBytes: 16 }, + ).ok).toBe(false); + + expect(parseP2pScriptMachineOutput( + `${JSON.stringify({ kind: 'p2p_script_machine_output_v1' })}\n${'x'.repeat(20)}`, + { maxTotalBytes: 16 }, + ).ok).toBe(false); + }); + + it('rejects malformed and non-machine-output script frames', () => { + expect(parseP2pScriptMachineOutput('plain text ROUTE=accepted').diagnostics[0]?.code).toBe('script_machine_output_invalid'); + expect(parseP2pScriptMachineOutput(JSON.stringify({ kind: 'other', routingKey: 'accepted' })).diagnostics[0]?.code).toBe('script_machine_output_invalid'); + expect(parseP2pScriptMachineOutput(JSON.stringify({ + kind: 'p2p_script_machine_output_v1', + artifacts: [{ path: '../secret' }], + })).diagnostics[0]?.code).toBe('script_machine_output_invalid'); + expect(parseP2pScriptMachineOutput(JSON.stringify({ + kind: 'p2p_script_machine_output_v1', + variables: { nested: { nope: true } }, + })).diagnostics[0]?.code).toBe('script_machine_output_invalid'); + }); + + it('rejects invalid contract environment and stdin caps', () => { + expect(validateP2pScriptContract({ argv: ['node'], envAllowlist: ['bad-name'] }).ok).toBe(false); + expect(validateP2pScriptContract({ argv: ['node'], stdin: 'hello', caps: { stdinBytes: 4 } }).ok).toBe(false); + }); + + // Audit:R3 PR-β / M-3 — lenient mode truncates at line boundary instead of + // rejecting the entire output. + it('lenient mode truncates at line boundary on total-bytes overflow and reports truncated:true', () => { + const validFrame1 = JSON.stringify({ kind: 'p2p_script_machine_output_v1', routingKey: 'first' }); + const validFrame2 = JSON.stringify({ kind: 'p2p_script_machine_output_v1', routingKey: 'second' }); + const trailingFrame = JSON.stringify({ kind: 'p2p_script_machine_output_v1', routingKey: 'dropped' }); + const input = `${validFrame1}\n${validFrame2}\n${trailingFrame}\n`; + // Cap allows the first two frames + their newline boundary, but cuts off + // the trailing frame. + const cap = validFrame1.length + 1 + validFrame2.length + 1; + const result = parseP2pScriptMachineOutput(input, { mode: 'lenient_last_valid', maxTotalBytes: cap }); + expect(result.ok).toBe(true); + expect(result.truncated).toBe(true); + if (result.ok) { + expect(result.frames.map((frame) => frame.routingKey)).toEqual(['first', 'second']); + expect(result.finalFrame.routingKey).toBe('second'); + // Truncation diagnostic is present at warning severity. + expect(result.diagnostics.some((diagnostic) => + diagnostic.code === 'script_machine_output_invalid' && diagnostic.severity === 'warning', + )).toBe(true); + } + }); + + it('strict mode rejects total-bytes overflow without partial frames', () => { + const validFrame = JSON.stringify({ kind: 'p2p_script_machine_output_v1', routingKey: 'ok' }); + const input = `${validFrame}\n${'x'.repeat(20)}`; + const result = parseP2pScriptMachineOutput(input, { mode: 'strict', maxTotalBytes: validFrame.length }); + expect(result.ok).toBe(false); + expect(result.diagnostics.some((diagnostic) => + diagnostic.code === 'script_machine_output_invalid' && diagnostic.summary?.includes('total byte cap'), + )).toBe(true); + }); +}); diff --git a/test/shared/p2p-workflow-validators.test.ts b/test/shared/p2p-workflow-validators.test.ts new file mode 100644 index 000000000..1ea93dabd --- /dev/null +++ b/test/shared/p2p-workflow-validators.test.ts @@ -0,0 +1,244 @@ +import { describe, expect, it } from 'vitest'; +import { + validateP2pPersistedWorkflowSnapshot, + validateP2pWorkflowLaunchEnvelope, + validateP2pWorkflowStatusProjection, +} from '../../shared/p2p-workflow-validators.js'; +import type { P2pWorkflowDraft, P2pWorkflowLaunchEnvelope } from '../../shared/p2p-workflow-types.js'; + +const draft: P2pWorkflowDraft = { + schemaVersion: 1, + id: 'wf_valid', + nodes: [ + { id: 'n1', nodeKind: 'llm', preset: 'audit', permissionScope: 'analysis_only' }, + ], + edges: [], + rootNodeId: 'n1', +}; + +describe('p2p workflow validators', () => { + it('accepts a valid advanced launch envelope', () => { + const envelope: P2pWorkflowLaunchEnvelope = { + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + }; + expect(validateP2pWorkflowLaunchEnvelope(envelope).ok).toBe(true); + }); + + // Audit:R3 PR-γ / N-M5 / V-4 — `expectedStaticPolicyHash` is a v1a-added + // optional field for daemon-side `static_policy_mismatch_recompiled`. + it('accepts expectedStaticPolicyHash on launch envelope', () => { + const envelope: P2pWorkflowLaunchEnvelope = { + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + expectedStaticPolicyHash: 'fnv1a64:abcdef0123456789', + }; + expect(validateP2pWorkflowLaunchEnvelope(envelope).ok).toBe(true); + }); + + it('rejects malformed expectedStaticPolicyHash', () => { + // empty string / wrong type / oversize all rejected with invalid_launch_envelope + expect(validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + expectedStaticPolicyHash: '', + }).ok).toBe(false); + expect(validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + expectedStaticPolicyHash: 'x'.repeat(200), + }).ok).toBe(false); + expect(validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + expectedStaticPolicyHash: 12345 as unknown as string, + }).ok).toBe(false); + }); + + // R3 PR-δ (A6 / Cu1-M2) — multi-byte characters were previously accepted + // because the validator only checked JS string `length`. The fix enforces + // visible-ASCII pattern + UTF-8 byte length cap; both must reject. + it('rejects expectedStaticPolicyHash with non-ASCII characters', () => { + expect(validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + expectedStaticPolicyHash: 'fnv1a64:abc中文ef', + }).ok).toBe(false); + }); + + it('rejects expectedStaticPolicyHash with control characters', () => { + expect(validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + expectedStaticPolicyHash: 'fnv1a64:abc\nef', + }).ok).toBe(false); + }); + + it('rejects future schema versions', () => { + const result = validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 999, + workflowKind: 'advanced', + advancedDraft: draft, + }); + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('unsupported_schema_version'); + }); + + it('rejects mixed old and new advanced fields', () => { + const result = validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedPresetKey: 'openspec', + advancedDraft: draft, + }); + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('mixed_advanced_schema_fields'); + }); + + it('rejects forbidden private envelope fields recursively', () => { + const result = validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + launchContext: { token: 'secret' }, + }); + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('forbidden_envelope_field'); + expect(result.diagnostics[0]?.fieldPath).toBe('launchContext.token'); + }); + + it('rejects invalid node kind and invalid variable values', () => { + const result = validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: { + ...draft, + nodes: [{ id: 'n1', nodeKind: 'audit', preset: 'audit' }], + variables: [{ name: 'Bad', value: { nested: true } }], + }, + }); + expect(result.ok).toBe(false); + expect(result.diagnostics.map((diagnostic) => diagnostic.code)).toContain('invalid_workflow_graph'); + expect(result.diagnostics.map((diagnostic) => diagnostic.code)).toContain('invalid_workflow_variable'); + }); + + it('validates launch context and required daemon capabilities', () => { + const result = validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + requiredDaemonCapabilities: ['p2p.workflow.v1', 'p2p.workflow.unknown.v1'], + launchContext: { requestId: 'bad request id with spaces' }, + }); + + expect(result.ok).toBe(false); + expect(result.diagnostics).toEqual(expect.arrayContaining([ + expect.objectContaining({ code: 'missing_required_capability', fieldPath: 'requiredDaemonCapabilities[1]' }), + expect.objectContaining({ code: 'invalid_launch_envelope', fieldPath: 'launchContext.requestId' }), + ])); + }); + + it('validates start context sources and file reference paths', () => { + const result = validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: { + ...draft, + startContext: { + sources: [ + { kind: 'current_prompt', id: 'prompt' }, + { kind: 'file_reference', id: 'file', path: '../secret.txt' }, + ], + }, + }, + }); + + expect(result.ok).toBe(false); + expect(result.diagnostics).toEqual(expect.arrayContaining([ + expect.objectContaining({ code: 'unsafe_artifact_path', fieldPath: 'startContext.sources[1].path' }), + ])); + }); + + it('rejects invalid node preset and permission scope combinations', () => { + const result = validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: { + ...draft, + nodes: [ + { id: 'n1', nodeKind: 'logic', preset: 'audit', permissionScope: 'analysis_only' }, + { id: 'n2', nodeKind: 'llm', preset: 'openspec_propose', permissionScope: 'analysis_only' }, + ], + }, + }); + + expect(result.ok).toBe(false); + expect(result.diagnostics.filter((diagnostic) => diagnostic.code === 'invalid_workflow_graph')).toHaveLength(2); + }); + + it('accepts artifact-producing openspec proposal nodes with explicit contracts', () => { + const result = validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: { + ...draft, + nodes: [{ + id: 'n1', + nodeKind: 'llm', + preset: 'openspec_propose', + permissionScope: 'artifact_generation', + artifacts: [{ + convention: 'openspec_convention', + paths: ['openspec/changes/demo/specs/demo/spec.md'], + permissionScope: 'artifact_generation', + }], + }], + }, + }); + + expect(result.ok).toBe(true); + }); + + it('guards forbidden-field scans against cycles and excessive arrays', () => { + const cyclicDraft = { ...draft, self: null as unknown }; + cyclicDraft.self = cyclicDraft; + expect(validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: cyclicDraft, + }).ok).toBe(true); + + const result = validateP2pWorkflowLaunchEnvelope({ + workflowSchemaVersion: 1, + workflowKind: 'advanced', + advancedDraft: draft, + nested: new Array(1001).fill('x'), + }); + expect(result.ok).toBe(false); + expect(result.diagnostics[0]?.code).toBe('forbidden_envelope_field'); + }); + + it('validates public projections and persisted snapshots', () => { + const projection = { + projectionVersion: 1, + runId: 'run-1', + workflowId: 'wf-1', + status: 'running', + completedNodeIds: ['n1'], + diagnostics: [], + updatedAt: '2026-05-09T00:00:00.000Z', + }; + + expect(validateP2pWorkflowStatusProjection(projection).ok).toBe(true); + expect(validateP2pPersistedWorkflowSnapshot(projection).ok).toBe(true); + expect(validateP2pWorkflowStatusProjection({ ...projection, projectionVersion: 999 }).diagnostics[0]?.code).toBe('unsupported_schema_version'); + expect(validateP2pPersistedWorkflowSnapshot({ ...projection, capabilitySnapshot: { daemonId: 'd' } }).ok).toBe(false); + }); +}); diff --git a/test/shared/test-session-guard.test.ts b/test/shared/test-session-guard.test.ts index 70539529f..6bb5e0b08 100644 --- a/test/shared/test-session-guard.test.ts +++ b/test/shared/test-session-guard.test.ts @@ -16,6 +16,8 @@ describe('test session guard', () => { expect(isKnownTestSessionName('deck_storecheckabc123_brain')).toBe(true); expect(isKnownTestSessionName('deck_shutdownabc123_probe')).toBe(true); expect(isKnownTestSessionName('deck_test_preview_abc123_brain')).toBe(true); + expect(isKnownTestSessionName('deck_test_p2p_workflow_abc123_brain')).toBe(true); + expect(isKnownTestSessionName('imcodes-test-p2p-workflow-abc123')).toBe(true); expect(isKnownTestSessionName('deck_realproj_brain')).toBe(false); }); @@ -26,8 +28,11 @@ describe('test session guard', () => { expect(isKnownTestProjectName('storecheckabc123')).toBe(true); expect(isKnownTestProjectName('shutdownabc123')).toBe(true); expect(isKnownTestProjectName('imcodes-test-preview-dist')).toBe(true); + expect(isKnownTestProjectName('imcodes-test-p2p-workflow-dist')).toBe(true); expect(isKnownTestProjectDir('/tmp/cxsdk-sub-e2e')).toBe(true); expect(isKnownTestProjectDir('/tmp/imcodes-test-preview-dist-abc123/project')).toBe(true); + expect(isKnownTestProjectDir('/tmp/imcodes-test-p2p-workflow-abc123/project')).toBe(true); + expect(isKnownTestProjectDir('/tmp/imc_p2p_wf_test_abc123/project')).toBe(true); expect(isKnownTestProjectDir('/Users/me/src/myapp')).toBe(false); }); diff --git a/test/spec/p2p-workflow-regression.test.ts b/test/spec/p2p-workflow-regression.test.ts new file mode 100644 index 000000000..a309d0a42 --- /dev/null +++ b/test/spec/p2p-workflow-regression.test.ts @@ -0,0 +1,1705 @@ +import { describe, it, expect } from 'vitest'; +import { readFileSync, existsSync } from 'node:fs'; +import { resolve } from 'node:path'; + +// Static reverse-regression guard for the smart-p2p-upgrade change. +// +// This is NOT a runtime test; it is a grep-style guard that fails CI if any of +// the high-risk anti-patterns called out in the OpenSpec spec-gates re-enter +// the source tree. Each guard is calibrated against the current safe state of +// the codebase. +// +// If a guard breaks because of a legitimate refactor, update both the source +// and the regex in the same commit so future regressions still fail the test. + +const ROOT = resolve(__dirname, '..', '..'); + +interface FileText { + path: string; + text: string; + lines: string[]; +} + +function read(rel: string): FileText { + const abs = resolve(ROOT, rel); + const text = readFileSync(abs, 'utf8'); + return { path: rel, text, lines: text.split('\n') }; +} + +function reportLines(file: FileText, predicate: (line: string) => boolean): string[] { + const offenders: string[] = []; + file.lines.forEach((line, index) => { + if (predicate(line)) offenders.push(`${file.path}:${index + 1}: ${line.trim()}`); + }); + return offenders; +} + +describe('p2p-workflow reverse-regression', () => { + // ── 1. Server WebSocket / DB code casting daemon payloads to `any` for + // advanced snapshot persistence. The current safe pattern is to use + // typed projections from `shared/p2p-workflow-types.ts` and the + // allowlist sanitizer in `server/src/p2p-workflow-sanitize.ts`. Any + // `as any` on a line that mentions `progress_snapshot` or + // `workflow_projection` indicates an attempt to bypass the sanitizer. + it('server code never casts daemon payloads to `any` for advanced snapshot persistence', () => { + const files = [ + 'server/src/p2p-workflow-sanitize.ts', + 'server/src/ws/bridge.ts', + 'server/src/db/queries.ts', + 'server/src/routes/discussions.ts', + ].filter((rel) => existsSync(resolve(ROOT, rel))); + + const offenders: string[] = []; + for (const rel of files) { + const file = read(rel); + offenders.push( + ...reportLines(file, (line) => + /\bas\s+any\b/.test(line) && /(progress_snapshot|workflow_projection)/.test(line), + ), + ); + } + expect(offenders, `Disallowed \`as any\` cast on a line referencing progress_snapshot/workflow_projection:\n${offenders.join('\n')}`).toEqual([]); + }); + + // ── 2. P2pWorkflowStatusProjection / P2pPersistedWorkflowSnapshot must NOT + // be declared with arbitrary index signatures. Allowing + // `[key: string]: unknown` would defeat the allowlist sanitizer by + // letting executor-private fields ride along on the public projection. + it('public projection types never declare arbitrary index signatures', () => { + const file = read('shared/p2p-workflow-types.ts'); + const interfaceRegions: Array<{ name: string; start: number; end: number }> = []; + file.lines.forEach((line, index) => { + const match = /^export interface (P2pWorkflowStatusProjection|P2pPersistedWorkflowSnapshot)\b/.exec(line); + if (match) interfaceRegions.push({ name: match[1], start: index, end: file.lines.length }); + }); + // Resolve end of each interface (next `^}` line at column 0). + for (const region of interfaceRegions) { + for (let i = region.start + 1; i < file.lines.length; i += 1) { + if (/^\}/.test(file.lines[i])) { + region.end = i; + break; + } + } + } + const offenders: string[] = []; + for (const region of interfaceRegions) { + for (let i = region.start; i <= region.end; i += 1) { + const line = file.lines[i]; + // Match index signatures like `[key: string]: unknown` or `[k: string]: any`. + if (/\[[A-Za-z_$][A-Za-z0-9_$]*\s*:\s*string\s*\]\s*:/.test(line)) { + offenders.push(`${file.path}:${i + 1}: ${line.trim()} (in ${region.name})`); + } + } + } + // Also forbid the same in any other file that redeclares these types. + const validatorFile = read('shared/p2p-workflow-validators.ts'); + validatorFile.lines.forEach((line, index) => { + // Only flag interface/type redeclarations (not type aliases that reference the canonical type). + if (/^export\s+(interface|type)\s+(P2pWorkflowStatusProjection|P2pPersistedWorkflowSnapshot)\b/.test(line)) { + offenders.push(`${validatorFile.path}:${index + 1}: forbidden redeclaration of canonical projection type`); + } + }); + expect(offenders, `Public projection types must not have arbitrary index signatures or redeclarations:\n${offenders.join('\n')}`).toEqual([]); + }); + + // ── 3. The server bridge MUST default-deny unknown `p2p.*` messages by + // routing them through `parseP2pWorkflowMessageType` BEFORE any + // forwarding/broadcast. There must be no ad-hoc `case 'p2p.x':` or + // `startsWith('p2p.')` branches in `server/src/ws/bridge.ts` that + // forward without going through the registry. + it('server bridge never default-broadcasts unknown p2p.* messages', () => { + const file = read('server/src/ws/bridge.ts'); + expect( + file.text.includes('parseP2pWorkflowMessageType'), + 'bridge.ts must import and call parseP2pWorkflowMessageType to gate p2p.* routing', + ).toBe(true); + + const offenders: string[] = []; + file.lines.forEach((line, index) => { + const trimmed = line.trim(); + // Forbid `case 'p2p.':` switch arms (registry-driven dispatch should not branch on literals). + if (/^case\s+(['"`])p2p\.[A-Za-z0-9_.]+\1\s*:/.test(trimmed)) { + offenders.push(`${file.path}:${index + 1}: ${trimmed}`); + } + // Forbid `msg.type.startsWith('p2p.')` / `type.startsWith('p2p.')` style fan-out. + if (/\.startsWith\((['"`])p2p\.\1\)/.test(trimmed)) { + offenders.push(`${file.path}:${index + 1}: ${trimmed}`); + } + }); + expect(offenders, `Bridge contains ad-hoc p2p.* dispatch that bypasses parseP2pWorkflowMessageType:\n${offenders.join('\n')}`).toEqual([]); + + // The relayToBrowsers helper must call parseP2pWorkflowMessageType BEFORE + // any later `safeSend`/broadcast/`forEach` over viewers — otherwise unknown + // p2p messages could leak. Locate the relayToBrowsers function span and + // verify the parse call appears in the first dozen lines of its body. + const relayStart = file.lines.findIndex((line) => /private\s+relayToBrowsers\s*\(/.test(line)); + expect(relayStart, 'relayToBrowsers function not found in bridge.ts').toBeGreaterThanOrEqual(0); + const headerWindow = file.lines.slice(relayStart, relayStart + 30).join('\n'); + expect( + /parseP2pWorkflowMessageType\s*\(/.test(headerWindow), + 'relayToBrowsers must call parseP2pWorkflowMessageType in its first 30 lines (default-deny for unknown p2p.*)', + ).toBe(true); + }); + + // ── 4. The advanced runtime MUST NOT execute raw `advancedRounds` from the + // command. `compileP2pWorkflowDraft` is the SOLE source of advanced + // round materialization for envelope-based launches, and the rounds + // that flow into `startP2pRun` come from `preparedAdvanced.advancedRounds` + // (compiled+bound) before the legacy passthrough is allowed. + it('advanced rounds for new-envelope launches always flow through compileP2pWorkflowDraft', () => { + const file = read('src/daemon/command-handler.ts'); + expect( + file.text.includes('compileP2pWorkflowDraft'), + 'command-handler must import and use compileP2pWorkflowDraft for advanced launches', + ).toBe(true); + expect( + file.text.includes('bindP2pCompiledWorkflow'), + 'command-handler must import and use bindP2pCompiledWorkflow for advanced launches', + ).toBe(true); + expect( + file.text.includes('prepareAdvancedWorkflowLaunch'), + 'command-handler must funnel advanced launches through prepareAdvancedWorkflowLaunch', + ).toBe(true); + + // prepareAdvancedWorkflowLaunch must invoke compileP2pWorkflowDraft and + // bindP2pCompiledWorkflow internally — no other call site is allowed for + // these functions in the daemon source tree. + const compileCount = (file.text.match(/\bcompileP2pWorkflowDraft\s*\(/g) ?? []).length; + expect(compileCount, 'compileP2pWorkflowDraft must be invoked exactly once in command-handler (inside prepareAdvancedWorkflowLaunch)').toBe(1); + const bindCount = (file.text.match(/\bbindP2pCompiledWorkflow\s*\(/g) ?? []).length; + expect(bindCount, 'bindP2pCompiledWorkflow must be invoked exactly once in command-handler (inside prepareAdvancedWorkflowLaunch)').toBe(1); + + // Audit:V-1 / N-H1 — startP2pRun MUST receive the bound workflow via the + // typed `advanced: { kind: 'envelope_compiled', bound: preparedAdvanced.bound, ... }` + // discriminated union. Pure-legacy launches (no envelope) fall back to the + // deprecated top-level `advancedPresetKey` / `advancedRounds` passthrough. + // This guards against a future edit that bypasses the bound parameter. + expect( + /kind:\s*'envelope_compiled'[^,]*,?\s*bound:\s*preparedAdvanced\.bound/m.test(file.text), + 'startP2pRun call must pass `advanced: { kind: "envelope_compiled", bound: preparedAdvanced.bound, ... }` so executor receives capabilitySnapshot/policy', + ).toBe(true); + expect( + file.text.includes('compiledFromEnvelope'), + 'command-handler must distinguish compiled-from-envelope path from legacy passthrough (look for `compiledFromEnvelope` ternary)', + ).toBe(true); + + // Make sure no daemon file outside src/daemon/p2p-workflow-bind.ts and + // src/daemon/command-handler.ts invokes compile/bind directly — both must + // remain centralised through prepareAdvancedWorkflowLaunch. + const candidatePaths = [ + 'src/daemon/p2p-orchestrator.ts', + 'src/daemon/server-link.ts', + 'src/router/message-router.ts', + ]; + const offenders: string[] = []; + for (const rel of candidatePaths) { + if (!existsSync(resolve(ROOT, rel))) continue; + const f = read(rel); + f.lines.forEach((line, index) => { + if (/\bcompileP2pWorkflowDraft\s*\(/.test(line) || /\bbindP2pCompiledWorkflow\s*\(/.test(line)) { + offenders.push(`${rel}:${index + 1}: ${line.trim()}`); + } + }); + } + expect(offenders, `compile/bind must only be invoked from command-handler/p2p-workflow-bind:\n${offenders.join('\n')}`).toEqual([]); + }); + + // ── 5. Artifact success checks must NOT use `readdir().join(` as evidence — + // that pattern was identified as unsafe (cannot detect modifications, + // missing fields, or hash collisions). The advanced workflow artifact + // runtime in `src/daemon/p2p-workflow-artifact-runtime.ts` and the + // shared helpers in `shared/p2p-workflow-artifacts.ts` MUST avoid it. + // + // Note: `src/daemon/p2p-orchestrator.ts:1276` contains a legacy + // `readdir().join('\\n')` for the OLD openspec_convention path — that + // pre-existing legacy behavior is explicitly out-of-scope for the new + // workflow guard. The new workflow paths must remain free of it. + it('new advanced workflow artifact code never uses readdir().join() as success evidence', () => { + const guarded = [ + 'shared/p2p-workflow-artifacts.ts', + 'src/daemon/p2p-workflow-artifact-runtime.ts', + ].filter((rel) => existsSync(resolve(ROOT, rel))); + + const offenders: string[] = []; + for (const rel of guarded) { + const file = read(rel); + // Match a readdir(...) call DIRECTLY chained to .join( — i.e. with + // nothing between the closing `)` of readdir (and an optional outer `)` + // for `(await readdir(...))`) and the `.join(`. We tolerate whitespace + // only, NOT identifiers, semicolons, or other tokens. This excludes + // legitimate uses like `path.join(...)` later in the same file. + const compactText = file.text.replace(/\s+/g, ' '); + const pattern = /\breaddir\s*\([^()]*\)\s*\)?\.join\s*\(/g; + let match: RegExpExecArray | null; + while ((match = pattern.exec(compactText)) != null) { + offenders.push(`${rel}: matched substring "${match[0]}"`); + } + } + expect(offenders, `Artifact runtime must not use readdir().join() as success evidence:\n${offenders.join('\n')}`).toEqual([]); + }); + + // ── 6. Every caller of `findForbiddenEnvelopeField` must check the return + // value against null and use it to bail out. A launch path that calls + // the helper and then ignores the result silently allows forbidden + // executor-private fields (compiledWorkflow, rawPrompt, env, tokens) + // to flow through to the daemon. + it('every findForbiddenEnvelopeField caller checks the return and bails out', () => { + const candidates = [ + 'shared/p2p-workflow-validators.ts', + 'web/src/components/P2pConfigPanel.tsx', + ]; + const offenders: string[] = []; + for (const rel of candidates) { + const file = read(rel); + file.lines.forEach((line, index) => { + // Find every call site of findForbiddenEnvelopeField(. + if (!/\bfindForbiddenEnvelopeField\s*\(/.test(line)) return; + // Skip the export declarations (function definitions). + if (/^\s*export\s+function\s+findForbiddenEnvelopeField\b/.test(line)) return; + if (/^\s*function\s+findForbiddenEnvelopeField\b/.test(line)) return; + + // Acceptable usage forms — return value must be captured/used to bail: + // if (... findForbiddenEnvelopeField(value) ...) ... + // const x = findForbiddenEnvelopeField(...) (with `if (x)` nearby) + // return findForbiddenEnvelopeField(...) (recursive call inside the function itself) + // findForbiddenEnvelopeField inside boolean expression of `if (...)` or `||` / `&&` + const isAssignment = /\b(const|let|var)\s+[A-Za-z0-9_$]+\s*=\s*findForbiddenEnvelopeField\s*\(/.test(line); + const isReturn = /\breturn\s+findForbiddenEnvelopeField\s*\(/.test(line); + const inIfCondition = /\bif\s*\([^)]*findForbiddenEnvelopeField\s*\(/.test(line); + const inLogicalChain = /(\|\||&&|!)\s*findForbiddenEnvelopeField\s*\(/.test(line); + const isRecursiveCall = /^\s*const\s+(found|nested)\s*=\s*findForbiddenEnvelopeField\s*\(/.test(line); + + if (!isAssignment && !isReturn && !inIfCondition && !inLogicalChain && !isRecursiveCall) { + offenders.push(`${rel}:${index + 1}: ${line.trim()} — return value not used to bail out`); + return; + } + + // For assignments, verify the next ~6 lines reference the captured name in + // an `if`/early-return guard. Skip recursive `nested`/`found` helpers. + const assignMatch = /\b(?:const|let|var)\s+([A-Za-z0-9_$]+)\s*=\s*findForbiddenEnvelopeField\s*\(/.exec(line); + if (assignMatch && !isRecursiveCall) { + const varName = assignMatch[1]; + const window = file.lines.slice(index, index + 8).join('\n'); + const guardPattern = new RegExp(`(if\\s*\\(\\s*${varName}\\b|${varName}\\s*(?:\\?|\\|\\||&&)|return\\s+\\{[^}]*\\b${varName}\\b)`); + if (!guardPattern.test(window)) { + offenders.push(`${rel}:${index + 1}: assignment to \`${varName}\` from findForbiddenEnvelopeField is not followed by a guard check`); + } + } + }); + } + expect(offenders, `findForbiddenEnvelopeField return values must be checked and used to fail launch:\n${offenders.join('\n')}`).toEqual([]); + }); + + // ── 7. Daemon advanced admission MUST return `daemon_busy` synchronously + // and MUST NOT push over-capacity launches onto a queue. The contract + // lives in `src/daemon/p2p-workflow-bind.ts` and the launch wiring in + // `src/daemon/command-handler.ts` — neither file may contain a queue + // that retries an over-capacity advanced launch. + it('daemon advanced admission rejects over-capacity launches synchronously without queueing', () => { + const bind = read('src/daemon/p2p-workflow-bind.ts'); + expect( + /reason:\s*'daemon_busy'/.test(bind.text), + 'p2p-workflow-bind must return reason: \'daemon_busy\' synchronously', + ).toBe(true); + + // The bind function must NOT contain queue/enqueue/setTimeout/setInterval — + // any of those would imply async retry of a `daemon_busy` outcome. + const bannedPatterns: Array<{ name: string; pattern: RegExp }> = [ + { name: 'queue', pattern: /\bqueue\b/i }, + { name: 'enqueue', pattern: /\benqueue\b/i }, + { name: 'setTimeout', pattern: /\bsetTimeout\s*\(/ }, + { name: 'setInterval', pattern: /\bsetInterval\s*\(/ }, + ]; + const bindOffenders: string[] = []; + bind.lines.forEach((line, index) => { + for (const { name, pattern } of bannedPatterns) { + if (pattern.test(line)) bindOffenders.push(`${bind.path}:${index + 1}: forbidden \`${name}\` near daemon_busy admission — ${line.trim()}`); + } + }); + expect(bindOffenders, `p2p-workflow-bind must not queue or async-retry advanced admission:\n${bindOffenders.join('\n')}`).toEqual([]); + + // The launch wiring in command-handler.ts must not introduce an + // `advancedRunQueue`/`pendingAdvancedRuns`/`P2P_WORKFLOW_MAX_ACTIVE_RUNS` + // queue that buffers over-capacity launches. We allow MAX_ACTIVE_RUNS + // itself (used as a synchronous admission threshold), but not any + // construct named `advancedRun*Queue` / `advancedRunQueue` / similar. + const handler = read('src/daemon/command-handler.ts'); + const handlerOffenders: string[] = []; + handler.lines.forEach((line, index) => { + if (/advancedRun[A-Za-z]*Queue\b/.test(line)) { + handlerOffenders.push(`${handler.path}:${index + 1}: forbidden advanced-run queue — ${line.trim()}`); + } + if (/pendingAdvancedRuns\b/.test(line)) { + handlerOffenders.push(`${handler.path}:${index + 1}: forbidden pendingAdvancedRuns container — ${line.trim()}`); + } + // Defensive: an `enqueue(advancedRun…)` call would also be a regression. + if (/enqueue\s*\([^)]*advanced/i.test(line)) { + handlerOffenders.push(`${handler.path}:${index + 1}: forbidden enqueue of advanced run — ${line.trim()}`); + } + }); + expect(handlerOffenders, `command-handler must not queue over-capacity advanced launches:\n${handlerOffenders.join('\n')}`).toEqual([]); + + // The admission threshold MUST come from the daemon static policy — i.e. + // `staticPolicy.concurrency.maxAdvancedRuns` — not from a hardcoded + // constant. Audit:N-H3 / R2-A2: a regression here would mean the cap can + // no longer be tuned via daemon policy and would drift from what the spec + // labels as the single source of truth. + const acceptedFromPolicy = /accepted:\s*activeAdvancedRuns\.length\s*<\s*staticPolicy\.concurrency\.maxAdvancedRuns/.test(handler.text); + expect( + acceptedFromPolicy, + 'command-handler must compute admission as `accepted: activeAdvancedRuns.length < staticPolicy.concurrency.maxAdvancedRuns`', + ).toBe(true); + }); + + // ── 8. (Task 2.8) Legacy no-advanced launches MUST stay on the existing + // direct legacy path — they must NOT enter the advanced compiler. The + // command-handler proves this by short-circuiting `prepareAdvancedWorkflowLaunch` + // when neither old advanced fields nor a workflow envelope are present. + // A regression here would silently route legacy P2P launches through the + // compile/bind pipeline (and accidentally apply v1 graph constraints). + it('legacy no-advanced launches do not enter the advanced compiler in v1', () => { + const handler = read('src/daemon/command-handler.ts'); + const prepareStart = handler.lines.findIndex((line) => /async\s+function\s+prepareAdvancedWorkflowLaunch\b/.test(line)); + expect(prepareStart, 'prepareAdvancedWorkflowLaunch must exist in command-handler.ts').toBeGreaterThanOrEqual(0); + + // Within the function body's first ~30 lines, there must be an early + // return that bails out when no envelope is constructed (covering the + // pure-legacy launch case). This guarantees compileP2pWorkflowDraft and + // bindP2pCompiledWorkflow are never reached on the legacy path. + const window = handler.lines.slice(prepareStart, prepareStart + 30).join('\n'); + const earlyReturn = /if\s*\(!envelope\)\s*return\s+\{\s*ok:\s*true,\s*advancedRounds:\s*\[\]/.test(window); + expect( + earlyReturn, + 'prepareAdvancedWorkflowLaunch must early-return `{ ok: true, advancedRounds: [] }` when no envelope is constructed (legacy passthrough)', + ).toBe(true); + + // The legacy passthrough fallback in startP2pRun must remain reachable — + // when the envelope path produced no bound workflow, the call site must + // forward raw `p2pAdvancedPresetKey` / `p2pAdvancedRounds` so cron and + // legacy fixtures keep their direct path. We assert by looking for the + // ternary spread shape `compiledFromEnvelope ? { advanced: ... } : { advancedPresetKey: p2pAdvancedPresetKey, ... }`. + const legacyFallback = /:\s*\{\s*advancedPresetKey:\s*p2pAdvancedPresetKey/.test(handler.text); + expect( + legacyFallback, + 'startP2pRun call must fall back to raw p2pAdvancedPresetKey/p2pAdvancedRounds when prepared advanced rounds are empty (preserves legacy passthrough)', + ).toBe(true); + }); + + // ── 9. (Task 6.10) Any OpenSpec-related automation in source code must NOT + // stage, commit, or push files under `openspec/` or `docs/`. Both + // directories are local-only planning/documentation artifacts and are + // explicitly listed in `.gitignore`. A regression here would push + // private OpenSpec drafts to the public repo. + it('no source-tree git automation stages openspec/ or docs/', () => { + const candidatePaths = [ + 'src/daemon/p2p-workflow-bind.ts', + 'src/daemon/p2p-workflow-runtime.ts', + 'src/daemon/p2p-workflow-script-runner.ts', + 'src/daemon/p2p-workflow-artifact-runtime.ts', + 'src/daemon/command-handler.ts', + 'shared/p2p-workflow-artifacts.ts', + 'shared/p2p-workflow-script.ts', + 'server/src/p2p-workflow-sanitize.ts', + 'server/src/ws/bridge.ts', + 'web/src/components/P2pConfigPanel.tsx', + ].filter((rel) => existsSync(resolve(ROOT, rel))); + + const offenders: string[] = []; + for (const rel of candidatePaths) { + const file = read(rel); + file.lines.forEach((line, index) => { + // Forbid `git add … openspec/…` / `git commit … docs/…` / `git push …` + // shapes that combine a git-mutation verb with the protected paths. + if (/\bgit\s+(add|commit|push|stage)\b/.test(line) && /(openspec|docs)\//.test(line)) { + offenders.push(`${rel}:${index + 1}: ${line.trim()}`); + } + // Defensive: spawn('git', ['add', 'openspec/…']) shape — combine `git` + // and `add`-like tokens within a short window when both protected paths + // appear on the same line. + if (/['"`]add['"`]\s*,\s*['"`](openspec|docs)\//.test(line)) { + offenders.push(`${rel}:${index + 1}: ${line.trim()}`); + } + }); + } + expect(offenders, `OpenSpec/docs paths must never be staged or committed by source-tree automation:\n${offenders.join('\n')}`).toEqual([]); + + // .gitignore must continue to list both directories so even an accidental + // `git add .` cannot stage them. This is a belt-and-suspenders check. + const gitignore = read('.gitignore'); + const ignored = gitignore.lines.map((l) => l.trim()).filter((l) => l && !l.startsWith('#')); + const haveOpenspec = ignored.some((entry) => entry === 'openspec/' || entry === 'openspec' || entry === '/openspec/' || entry === '/openspec'); + const haveDocs = ignored.some((entry) => entry === 'docs/' || entry === 'docs' || entry === '/docs/' || entry === '/docs'); + expect(haveOpenspec, '.gitignore must list `openspec/` so the directory cannot be accidentally staged').toBe(true); + expect(haveDocs, '.gitignore must list `docs/` so the directory cannot be accidentally staged').toBe(true); + }); + + // ── 10. (Audit:N-H2) `getCurrentDaemonWorkflowCapabilities` MUST fail-closed + // when the serverLink lacks the capability getter. A previous regression + // returned all three dangerous capabilities (including IMPLEMENTATION) + // as a "permissive default", which silently granted authorisation when + // tests/mocks omitted the getter. The fallback must now be `[]` so the + // bind path produces `missing_required_capability` instead of fail-OPEN. + it('getCurrentDaemonWorkflowCapabilities fallback is fail-closed (audit:N-H2)', () => { + const file = read('src/daemon/p2p-workflow-static-policy.ts'); + const fnIdx = file.lines.findIndex((line) => /export function getCurrentDaemonWorkflowCapabilities\b/.test(line)); + expect(fnIdx, 'getCurrentDaemonWorkflowCapabilities must live in p2p-workflow-static-policy.ts').toBeGreaterThanOrEqual(0); + // Capture the function body (until the next top-level brace at column 0). + let endIdx = fnIdx; + let depth = 0; + let started = false; + for (let i = fnIdx; i < file.lines.length; i += 1) { + const line = file.lines[i]; + for (const ch of line) { + if (ch === '{') { depth += 1; started = true; } + if (ch === '}') { depth -= 1; } + } + if (started && depth === 0) { endIdx = i; break; } + } + const body = file.lines.slice(fnIdx, endIdx + 1).join('\n'); + expect( + /P2P_WORKFLOW_OPENSPEC_ARTIFACTS_CAPABILITY_V1/.test(body), + 'fallback in getCurrentDaemonWorkflowCapabilities must NOT mention OPENSPEC_ARTIFACTS capability (would be fail-OPEN)', + ).toBe(false); + expect( + /P2P_WORKFLOW_IMPLEMENTATION_CAPABILITY_V1/.test(body), + 'fallback in getCurrentDaemonWorkflowCapabilities must NOT mention IMPLEMENTATION capability (would be fail-OPEN)', + ).toBe(false); + // Sanity: the function still references the v1 base capability for typings, + // but the runtime fallback path returns an empty array. + const returnsEmpty = /return\s+\[\s*\]\s*;/.test(body); + expect( + returnsEmpty, + 'getCurrentDaemonWorkflowCapabilities fallback must return `[]` when serverLink lacks getP2pWorkflowCapabilities', + ).toBe(true); + }); + + // ── 11. (Audit:N4) `prepareAdvancedWorkflowLaunch` must derive the daemon + // static policy from `loadDaemonP2pStaticPolicy(serverLink)` rather + // than hardcoding `{ allowOpenSpecArtifacts: true, allowImplementationPermission: true }`. + it('prepareAdvancedWorkflowLaunch reads static policy from loadDaemonP2pStaticPolicy (audit:N4)', () => { + const handler = read('src/daemon/command-handler.ts'); + expect( + handler.text.includes('loadDaemonP2pStaticPolicy'), + 'command-handler must import and call loadDaemonP2pStaticPolicy as the policy source', + ).toBe(true); + // Forbid the previously-permissive shape that hardcoded both dangerous flags. + const permissiveShape = /buildDefaultP2pStaticPolicy\s*\(\s*\{[^}]*allowOpenSpecArtifacts:\s*true[^}]*allowImplementationPermission:\s*true/m; + expect( + permissiveShape.test(handler.text), + 'command-handler must NOT call buildDefaultP2pStaticPolicy with hardcoded permissive overrides — use loadDaemonP2pStaticPolicy instead', + ).toBe(false); + }); + + // ── 12. (Audit:H3) `recheckDangerousNodeCapabilities` must accept policy + // snapshots as well as capability strings. A regression that drops the + // `boundPolicySnapshot` / `currentDaemonPolicy` parameters would + // reintroduce the "capability set unchanged but allowlist tightened" + // authorisation gap. + it('recheckDangerousNodeCapabilities supports policy diff (audit:H3)', () => { + const file = read('src/daemon/p2p-workflow-policy-recheck.ts'); + expect( + file.text.includes('boundPolicySnapshot') && file.text.includes('currentDaemonPolicy'), + 'recheckDangerousNodeCapabilities must accept boundPolicySnapshot and currentDaemonPolicy parameters', + ).toBe(true); + expect( + /findPolicyDowngrade|allowedExecutables/.test(file.text), + 'recheckDangerousNodeCapabilities must compare policy allowlists / allow-flags between bind and current', + ).toBe(true); + }); + + // ── 13. (Audit:N1) The web run mapper must surface `workflow_projection.diagnostics` + // (or a top-level `diagnostics` fallback) so the UI can render runtime + // diagnostic codes that the server now retains. + it('web mapP2pRunToDiscussion exposes workflow_projection.diagnostics (audit:N1)', () => { + const file = read('web/src/p2p-run-mapping.ts'); + // Both keywords must appear in the file (cross-line OK; the actual code + // reads the projection then iterates `projection.diagnostics`). + expect( + file.text.includes('workflow_projection') && file.text.includes('diagnostics'), + 'mapP2pRunToDiscussion must read workflow_projection.diagnostics so UI can render workflow diagnostics', + ).toBe(true); + }); + + // ── 14. (Audit:B1) The P2P message registry must include the `p2p.config.*` + // protocol — otherwise the bridge default-deny drops legitimate config + // save round-trips. Both SAVE and SAVE_RESPONSE must be present. + it('p2p.config.* messages are registered in P2P_WORKFLOW_MESSAGE_REGISTRY (audit:B1)', () => { + const file = read('shared/p2p-workflow-messages.ts'); + expect( + file.text.includes('P2P_CONFIG_MSG.SAVE'), + 'workflow message registry must register P2P_CONFIG_MSG.SAVE', + ).toBe(true); + expect( + file.text.includes('P2P_CONFIG_MSG.SAVE_RESPONSE'), + 'workflow message registry must register P2P_CONFIG_MSG.SAVE_RESPONSE', + ).toBe(true); + // The category field discriminator must exist so workflow-only consumers + // can filter without re-listing types. + expect( + file.text.includes("category: 'config'"), + "P2pWorkflowMessageDescriptor must mark p2p.config.* with category: 'config'", + ).toBe(true); + }); + + // ── 15. (Audit:B2) `handleP2pStatus` must enforce project scope just like + // handleP2pListDiscussions / handleP2pReadDiscussion. Without scope a + // caller could enumerate active runs across projects. + it('handleP2pStatus enforces project scope (audit:B2)', () => { + const handler = read('src/daemon/command-handler.ts'); + const fnIdx = handler.lines.findIndex((line) => /async function handleP2pStatus\b/.test(line)); + expect(fnIdx, 'handleP2pStatus must exist in command-handler.ts').toBeGreaterThanOrEqual(0); + // Capture the function body length conservatively (up to next top-level + // function declaration / "// ──" section divider). + let endIdx = handler.lines.length; + for (let i = fnIdx + 1; i < handler.lines.length; i += 1) { + if (/^(async\s+)?function\s+\w+/.test(handler.lines[i]) || /^export\s+(async\s+)?function\s+\w+/.test(handler.lines[i])) { + endIdx = i; + break; + } + if (/^\/\/\s*──/.test(handler.lines[i])) { endIdx = i; break; } + } + const body = handler.lines.slice(fnIdx, endIdx).join('\n'); + expect( + body.includes('resolveP2pDiscussionProjectScope'), + 'handleP2pStatus must call resolveP2pDiscussionProjectScope to enforce scope', + ).toBe(true); + }); + + // ── 16. (Audit:M1 / R2-V6 derivative) The legacy snapshot sanitizer must + // treat the empty-object placeholder `'{}'` (introduced by the DB + // column DEFAULT) as a no-op, NOT as a legacy row that needs a + // `legacy_progress_snapshot_sanitized` diagnostic. + it('sanitizeLegacyP2pProgressSnapshot has explicit empty-placeholder handling (audit:M1)', () => { + const file = read('server/src/p2p-workflow-sanitize.ts'); + expect( + /isEmptyPlaceholder|placeholder|isEmptyObject/.test(file.text), + 'sanitizeLegacyP2pProgressSnapshot must early-return for the empty-object placeholder produced by the DB column DEFAULT', + ).toBe(true); + }); + + // ── 17. (Audit:R3 PR-α / R2 A1) `P2pBindRuntimeContext` must NOT define + // the ad-hoc `currentDaemonPolicy: { allowScript / allowImplementation / ... }` + // subset that was structurally incompatible with `recheckDangerousNodeCapabilities`. + // The canonical bind-time policy snapshot is `policySnapshot: P2pStaticPolicy`. + it('P2pBindRuntimeContext exposes policySnapshot (full P2pStaticPolicy), not the ad-hoc currentDaemonPolicy subset (audit:R3 PR-α)', () => { + const file = read('shared/p2p-workflow-types.ts'); + const start = file.lines.findIndex((line) => /^export interface P2pBindRuntimeContext\b/.test(line)); + expect(start, 'P2pBindRuntimeContext must exist').toBeGreaterThanOrEqual(0); + let end = file.lines.length; + for (let i = start + 1; i < file.lines.length; i += 1) { + if (/^\}/.test(file.lines[i])) { end = i; break; } + } + const body = file.lines.slice(start, end).join('\n'); + expect( + /policySnapshot:\s*P2pStaticPolicy/.test(body), + 'P2pBindRuntimeContext must declare `policySnapshot: P2pStaticPolicy` (full shape)', + ).toBe(true); + // Match only field declarations (start of line + indent + name + `:`), not + // doc-comment references that explain the field was removed. + const hasFieldDecl = /^\s{2}currentDaemonPolicy:\s*\{/m.test(body); + expect( + hasFieldDecl, + 'P2pBindRuntimeContext must NOT declare the ad-hoc currentDaemonPolicy subset (use policySnapshot instead)', + ).toBe(false); + }); + + // ── 18. (Audit:R3 PR-α / N-M1) `P2pRun` must carry `boundWorkflow` so + // v1b dangerous-node executors can read `derivedRequiredCapabilities` + // and `bindContext` without re-deriving from current state. The bound + // workflow MUST NOT be exposed via daemon serialize / bridge sanitize. + it('P2pRun stores boundWorkflow and policySnapshot for executor recheck; sanitizers do not expose them (audit:R3 PR-α)', () => { + const orchestratorFile = read('src/daemon/p2p-orchestrator.ts'); + expect( + /boundWorkflow\?:[\s\S]{0,160}P2pBoundWorkflow/.test(orchestratorFile.text), + 'P2pRun interface must declare `boundWorkflow?: P2pBoundWorkflow`', + ).toBe(true); + expect( + /policySnapshot\?:[\s\S]{0,80}policySnapshot'\]/.test(orchestratorFile.text), + 'P2pRun interface must declare `policySnapshot?: P2pBindRuntimeContext[\'policySnapshot\']`', + ).toBe(true); + + // Sanitizer allowlists must NOT propagate boundWorkflow / policySnapshot + // — confirmed by absence in the canonical run-projection field set used + // by `sanitizeP2pOrchestrationRunForBridge` and `sanitizeP2pRunUpdateForBroadcast`. + const sanitizerFile = read('server/src/p2p-workflow-sanitize.ts'); + expect( + /boundWorkflow/.test(sanitizerFile.text), + 'server sanitizer must NOT reference boundWorkflow (raw bound must never reach broadcast/persistence)', + ).toBe(false); + expect( + /policySnapshot/.test(sanitizerFile.text), + 'server sanitizer must NOT reference policySnapshot (full P2pStaticPolicy must never reach broadcast/persistence)', + ).toBe(false); + }); + + // ── 20. (Audit:R3 PR-β / A3 / V-5) `loadDaemonP2pStaticPolicy` MUST NOT + // OR the ARGV capability into `allowInterpreterScripts`. Interpreter + // execution is a distinct security boundary from argv execution; the + // previous derivation silently upgraded argv-only authority into + // interpreter authority. spec.md "Interpreter script requires + // interpreter capability" scenario. + it('loadDaemonP2pStaticPolicy does not OR argv capability into allowInterpreterScripts (audit:R3 PR-β / A3)', () => { + const file = read('src/daemon/p2p-workflow-static-policy.ts'); + // The line `allowInterpreterScripts:` must not be followed by both + // INTERPRETER and ARGV identifiers (i.e. the `INTERPRETER || ARGV` + // shape is forbidden). + const orShape = /allowInterpreterScripts:[^\n]*P2P_WORKFLOW_SCRIPT_INTERPRETER_CAPABILITY_V1[^\n]*\|\|[^\n]*P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1/.test(file.text) + || /allowInterpreterScripts:[^\n]*P2P_WORKFLOW_SCRIPT_ARGV_CAPABILITY_V1[^\n]*\|\|[^\n]*P2P_WORKFLOW_SCRIPT_INTERPRETER_CAPABILITY_V1/.test(file.text); + expect( + orShape, + 'loadDaemonP2pStaticPolicy must NOT compute `allowInterpreterScripts: caps.has(INTERPRETER) || caps.has(ARGV)` — interpreter authority must strictly require the interpreter capability', + ).toBe(false); + }); + + // ── 21. (Audit:R3 PR-β / V-6) compile is intentionally pure; the daemon + // authority layer (`validateCompiledWorkflowAgainstBindPolicy`) MUST + // enforce the full `P2pStaticPolicy` (allow flags + executable + // allowlist) before bind constructs `P2pBoundWorkflow`. A regression + // that drops the helper or stops calling it from `bindP2pCompiledWorkflow` + // would re-open the previous "compile derived caps but bind only + // checked capability strings" gap. + it('bindP2pCompiledWorkflow runs validateCompiledWorkflowAgainstBindPolicy before constructing bound (audit:R3 PR-β / V-6)', () => { + const file = read('src/daemon/p2p-workflow-bind.ts'); + expect( + /export function validateCompiledWorkflowAgainstBindPolicy\b/.test(file.text), + 'validateCompiledWorkflowAgainstBindPolicy helper must be exported from p2p-workflow-bind.ts', + ).toBe(true); + expect( + /validateCompiledWorkflowAgainstBindPolicy\(compiled,\s*bindContext\)/.test(file.text), + 'bindP2pCompiledWorkflow must call validateCompiledWorkflowAgainstBindPolicy(compiled, bindContext) before constructing bound', + ).toBe(true); + }); + + // ── 22. (Audit:R3 PR-β / M-3) `parseP2pScriptMachineOutput` MUST truncate + // at the last `\n` boundary in lenient mode when total bytes exceed + // the cap, not return `invalidMachineOutput`. The previous shape + // reject-on-overflow contradicted spec §Script machine output truncation. + it('parseP2pScriptMachineOutput uses line-boundary truncate in lenient mode (audit:R3 PR-β / M-3)', () => { + const file = read('shared/p2p-workflow-script.ts'); + // Forbid the previous "totalBytes > maxTotalBytes ⇒ return invalid" + // shape that ignored mode. + const lines = file.lines; + const totalLineIdx = lines.findIndex((line) => /const\s+totalBytes\s*=\s*byteLength\(input\)/.test(line)); + expect(totalLineIdx, 'parseP2pScriptMachineOutput must compute totalBytes').toBeGreaterThanOrEqual(0); + const window = lines.slice(totalLineIdx, totalLineIdx + 30).join('\n'); + expect( + /mode\s*===\s*'strict'/.test(window), + 'parseP2pScriptMachineOutput must distinguish strict vs lenient when handling total-bytes overflow (lenient must truncate at line boundary)', + ).toBe(true); + expect( + /lastIndexOf\(['"`]\\n['"`]\)/.test(window), + 'parseP2pScriptMachineOutput must walk back to the last newline boundary when truncating in lenient mode', + ).toBe(true); + }); + + // ── 23. (Audit:R3 PR-γ / N-M5 / V-4) The diagnostic + // `static_policy_mismatch_recompiled` MUST have at least one production + // `makeP2pWorkflowDiagnostic` call site outside i18n / spec / tests + // (otherwise it's a "publicly exposed code that is impossible to + // trigger" — the v1a regression that PR-γ closes). + it('static_policy_mismatch_recompiled has a production emission point (audit:R3 PR-γ / N-M5)', () => { + const file = read('src/daemon/command-handler.ts'); + expect( + /makeP2pWorkflowDiagnostic\(['"`]static_policy_mismatch_recompiled['"`]/.test(file.text), + 'src/daemon/command-handler.ts must emit `static_policy_mismatch_recompiled` when envelope.expectedStaticPolicyHash differs from current daemon policy hash', + ).toBe(true); + }); + + // ── 24. (Task 10.2 / 12.5 closure) Cron executor MUST route advanced cron + // jobs through `prepareAdvancedWorkflowLaunch` when the action carries + // `workflowLaunchEnvelope` — otherwise cron silently bypasses + // capability gating, policy authority, and `daemon_busy` admission. + it('cron-executor routes envelope-bearing P2P actions through prepareAdvancedWorkflowLaunch (task 10.2)', () => { + const file = read('src/daemon/cron-executor.ts'); + expect( + file.text.includes('prepareAdvancedWorkflowLaunch'), + 'cron-executor must import and call prepareAdvancedWorkflowLaunch when action carries workflowLaunchEnvelope', + ).toBe(true); + // The CronP2pAction type must declare the envelope field so cron-api can + // accept and persist it. + const cronTypes = read('shared/cron-types.ts'); + expect( + cronTypes.text.includes('workflowLaunchEnvelope'), + 'shared/cron-types.ts CronP2pAction must declare workflowLaunchEnvelope field', + ).toBe(true); + }); + + // ── 25. (Task 10.3 closure) Cron MUST bound `daemon_busy` retry attempts; + // no infinite loop on perpetually busy daemon. + it('cron-executor bounds daemon_busy retries (task 10.3)', () => { + const file = read('src/daemon/cron-executor.ts'); + expect( + /CRON_DAEMON_BUSY_DEFAULT_ATTEMPTS|daemon_busy/.test(file.text), + 'cron-executor must bound daemon_busy retries with explicit attempt budget', + ).toBe(true); + expect( + /while\s*\([^)]*Attempt[^)]*<[^)]*attempts/.test(file.text) + || /while\s*\([^)]*lastDaemonBusyAttempt[^)]*<[^)]*\.attempts\)/.test(file.text), + 'cron-executor must use a bounded while loop on daemon_busy attempts', + ).toBe(true); + }); + + // ── 26. (Task 10.4 closure) Supervision audit launches MUST honour the + // daemon advanced-run admission cap with bounded retry — no silent + // bypass of `P2P_WORKFLOW_MAX_ACTIVE_RUNS`. + it('supervision-automation bounds daemon_busy retries on audit launches (task 10.4)', () => { + const file = read('src/daemon/supervision-automation.ts'); + expect( + /startSupervisionRunWithBusyRetry/.test(file.text), + 'supervision-automation must use a bounded daemon_busy retry helper', + ).toBe(true); + expect( + file.text.includes('loadDaemonP2pStaticPolicy'), + 'supervision-automation must read concurrency cap from loadDaemonP2pStaticPolicy', + ).toBe(true); + expect( + file.text.includes('listP2pRuns'), + 'supervision-automation must inspect listP2pRuns to compute admission', + ).toBe(true); + }); + + // ── 27. (Task 10.5 closure) `pushState` in the orchestrator MUST debounce + // non-terminal projections AND MUST flush terminal statuses + blocking + // diagnostics immediately. A regression that drops the flush-on-terminal + // branch would race with `delete activeRuns.get(runId)` cleanup. + it('orchestrator pushState debounces non-terminal but flushes terminal projections (task 10.5)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /PROJECTION_DEBOUNCE_MS|pendingProjectionTimers/.test(file.text), + 'p2p-orchestrator must declare projection debounce machinery', + ).toBe(true); + expect( + /isTerminalStatus[^\n]*flushProjection|isTerminal\(run\.status\)[\s\S]*?flushProjection/.test(file.text), + 'p2p-orchestrator pushState must flush projection immediately when run.status is terminal', + ).toBe(true); + }); + + // ── 28. (Task 10.6 closure) `addHelperDiagnostic` MUST enforce both count + // and byte caps on the per-run diagnostic ring — long-running advanced + // workflows otherwise grow unbounded. + it('orchestrator addHelperDiagnostic enforces retention count and byte caps (task 10.6)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + file.text.includes('P2P_HELPER_DIAGNOSTIC_RETENTION_COUNT'), + 'p2p-orchestrator must declare a retention count cap on helper diagnostics', + ).toBe(true); + expect( + file.text.includes('P2P_HELPER_DIAGNOSTIC_RETENTION_BYTES'), + 'p2p-orchestrator must declare a retention byte cap on helper diagnostics', + ).toBe(true); + // FIFO trim — drop OLDEST entries when over budget so most-recent + // forensic data survives. + expect( + /helperDiagnostics\.shift\(\)/.test(file.text), + 'p2p-orchestrator addHelperDiagnostic must use FIFO trim (shift) to drop oldest entries when over cap', + ).toBe(true); + }); + + // ── 29. (Tasks 7.2 / 7.3 / 12.1) The P2P workflow script runner MUST NOT + // spawn child processes with `shell: true`. spec.md "Script command is + // argv-only" Scenario forbids implicit shell parsing of argv — + // `shell: true` would run argv through `/bin/sh -c` (POSIX) or `cmd.exe` + // (Windows) and would re-introduce shell-injection / metacharacter + // execution that the executable allowlist explicitly defends against. + // The runner must always pass `shell: false` (or omit the flag) and + // rely on argv-only spawn. + it('p2p-workflow-script-runner.ts never calls child_process.spawn with shell: true (tasks 7.2 / 7.3)', () => { + const file = read('src/daemon/p2p-workflow-script-runner.ts'); + // Forbid any `shell: true` in the file (the runner is the only spawn + // site for script nodes; ad-hoc shell:true would be a regression). + const offenders: string[] = []; + file.lines.forEach((line, index) => { + if (/shell\s*:\s*true/.test(line)) { + offenders.push(`${file.path}:${index + 1}: ${line.trim()}`); + } + }); + expect( + offenders, + `p2p-workflow-script-runner.ts must not call spawn with shell: true:\n${offenders.join('\n')}`, + ).toEqual([]); + + // Belt-and-suspenders: explicitly verify the canonical safe call carries + // `shell: false` so a future refactor cannot drop it silently. + expect( + /shell\s*:\s*false/.test(file.text), + 'p2p-workflow-script-runner.ts must explicitly pass `shell: false` to child_process.spawn', + ).toBe(true); + }); + + // ── 30. (Tasks 6.2 / 6.9 / 12.2) The daemon artifact runtime must NOT use + // `readdir(...).join('\n')` (or any other broad-directory-listing + // heuristic) as artifact success evidence. The contract requires + // per-file sha256 baselines + declared-file delta verification — a + // regression that lists a directory and joins the names back to a + // single string would silently let unrelated changes satisfy a + // declared-file contract. spec.md "Directory listing join is not a + // success criterion" scenario. + it('p2p-workflow-artifact-runtime.ts must not use readdir(...).join("\\n") as success evidence (tasks 6.2 / 6.9)', () => { + const file = read('src/daemon/p2p-workflow-artifact-runtime.ts'); + // Same rule as guard #5 but scoped specifically to the daemon runtime + // (file may exist standalone in v1b refactors). Match a `readdir(...)` + // call DIRECTLY chained to `.join(` with no intervening tokens. + const compactText = file.text.replace(/\s+/g, ' '); + const pattern = /\breaddir\s*\([^()]*\)\s*\)?\.join\s*\(/g; + const offenders: string[] = []; + let match: RegExpExecArray | null; + while ((match = pattern.exec(compactText)) != null) { + offenders.push(`src/daemon/p2p-workflow-artifact-runtime.ts: matched "${match[0]}"`); + } + // Also forbid the `.map(e => e.name).join(` shape on a readdir result — + // the same heuristic with one common transformation in between. + const mapJoinPattern = /\breaddir\s*\([^()]*\)\s*\)?\.map\s*\([^)]*\)\.join\s*\(/g; + while ((match = mapJoinPattern.exec(compactText)) != null) { + offenders.push(`src/daemon/p2p-workflow-artifact-runtime.ts: matched "${match[0]}"`); + } + expect( + offenders, + `p2p-workflow-artifact-runtime.ts must not use readdir().join() as success evidence:\n${offenders.join('\n')}`, + ).toEqual([]); + }); + + // ── 31. (Tasks 4.7b / 4.8b / §12.3 closure) The orchestrator MUST recheck + // dangerous-round capabilities BEFORE dispatching each dangerous round + // (envelope_compiled runs only). A regression that drops the recheck + // reopens the "bound at compile, downgraded at execute" gap. + it('orchestrator wires recheckDangerousNodeCapabilities before each dangerous round (task 4.7b / 4.8b / §12.3)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + file.text.includes('recheckDangerousNodeCapabilities'), + 'p2p-orchestrator must import and call recheckDangerousNodeCapabilities', + ).toBe(true); + expect( + /isRoundDangerous|recheckDangerousRoundOrFail/.test(file.text), + 'p2p-orchestrator must declare a dangerous-round predicate + recheck-or-fail helper', + ).toBe(true); + // The helper MUST be invoked from the executeAdvancedChain loop body. + const idx = file.lines.findIndex((line) => /executeAdvancedChain\b/.test(line) && /async\s+function/.test(line)); + expect(idx, 'executeAdvancedChain function not found').toBeGreaterThanOrEqual(0); + const window = file.lines.slice(idx, idx + 80).join('\n'); + expect( + /recheckDangerousRoundOrFail\(run,\s*round,\s*serverLink\)/.test(window), + 'executeAdvancedChain must invoke recheckDangerousRoundOrFail before dispatching each dangerous round', + ).toBe(true); + }); + + // ── 32. (Audit:R2-N1 / round 4e78ab60) The orchestrator MUST invoke the + // script runner from `executeAdvancedChain` for compiled `nodeKind: 'script'` + // nodes. A regression that drops the dispatch reopens the + // "runner exists but never called" gap. + it('orchestrator dispatches script-node rounds via runP2pScriptNode (audit:R2-N1 / R3 §12.1 wiring)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + file.text.includes('runP2pScriptNode'), + 'p2p-orchestrator must import and call runP2pScriptNode for script-node rounds', + ).toBe(true); + expect( + /dispatchScriptRoundOrFail/.test(file.text), + 'p2p-orchestrator must declare a script-node dispatch helper invoked from executeAdvancedChain', + ).toBe(true); + // The helper MUST be invoked from executeAdvancedChain. + const idx = file.lines.findIndex((line) => /async\s+function\s+executeAdvancedChain\b/.test(line)); + expect(idx, 'executeAdvancedChain must exist').toBeGreaterThanOrEqual(0); + // R3 v2 PR-ζ — pre-round capture fail-closed block grew the window; + // bump from 120 → 200 to keep matching the dispatch call that lives + // post-capture but pre-legacy-hop. + const window = file.lines.slice(idx, idx + 200).join('\n'); + expect( + /dispatchScriptRoundOrFail\(run,\s*round,\s*serverLink\)/.test(window), + 'executeAdvancedChain must invoke dispatchScriptRoundOrFail before the legacy dispatchHop branches', + ).toBe(true); + // Slot acquire/release MUST be paired — orchestrator owns the cap. + expect( + /acquireScriptSlot\(\)/.test(file.text) && /releaseScriptSlot\(\)/.test(file.text), + 'p2p-orchestrator must acquire and release script concurrency slots around runP2pScriptNode', + ).toBe(true); + }); + + // ── 33. (Audit:R2-N2 / round 4e78ab60) The orchestrator MUST use the new + // artifact runtime helpers for envelope_compiled OpenSpec runs. A + // regression that drops the freeze + capture + verify chain reopens + // the "helpers exist but legacy `captureArtifactBaseline` shadows them" + // gap. + it('orchestrator uses new artifact runtime for envelope_compiled OpenSpec rounds (audit:R2-N2 / R3 §12.2 wiring)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + file.text.includes('freezeP2pArtifactIdentity') + && file.text.includes('captureP2pArtifactBaseline') + && file.text.includes('verifyP2pArtifactBaselineDelta'), + 'p2p-orchestrator must import all three new artifact runtime helpers', + ).toBe(true); + expect( + /getOrFreezeRunArtifactRoot/.test(file.text), + 'p2p-orchestrator must declare a per-run identity-freeze cache helper that gates on envelope_compiled + openspec_convention', + ).toBe(true); + // The post-round delta verify MUST run inside executeAdvancedChain. + const idx = file.lines.findIndex((line) => /async\s+function\s+executeAdvancedChain\b/.test(line)); + expect(idx, 'executeAdvancedChain must exist').toBeGreaterThanOrEqual(0); + const window = file.lines.slice(idx, idx + 400).join('\n'); + expect( + /verifyP2pArtifactBaselineDelta\(/.test(window), + 'executeAdvancedChain must call verifyP2pArtifactBaselineDelta after the round dispatches', + ).toBe(true); + }); + + // ── 19. (Audit:R3 PR-α / W-2) The broadcast↔persistence projection field + // diff must equal a documented set. Today: broadcast carries + // `capabilitySnapshot` and persisted snapshot strips it. Any future + // field added on one side without the other will break this guard. + it('broadcast vs persistence projection field difference is documented (audit:W-2)', () => { + const sanitizerFile = read('server/src/p2p-workflow-sanitize.ts'); + // The `isValidPersistedSnapshotShape` predicate must explicitly forbid + // `capabilitySnapshot` from persisted snapshots — that one field defines + // the only allowed broadcast↔persistence asymmetry. + expect( + /value\.capabilitySnapshot\s*!==\s*undefined/.test(sanitizerFile.text), + 'isValidPersistedSnapshotShape must explicitly reject `capabilitySnapshot` on persisted rows', + ).toBe(true); + // The projection builder (broadcast side) must include capabilitySnapshot. + expect( + /capabilitySnapshot/.test(sanitizerFile.text), + 'sanitizer must reference capabilitySnapshot for broadcast inclusion', + ).toBe(true); + }); + + // ────────────────────────────────────────────────────────────────────── + // R3 PR-α reverse-regression #34-#40 (Cu1-R3 §1) + // + // Calibrated state: the post-PR-α adapter MUST preserve compiled-node + // semantics, the orchestrator MUST recheck script kind dangerously, the + // bind-fail path MUST prepend `policyMismatchDiagnostics`, and the + // script/artifact fail-closed branches MUST call `failRun`. These + // string-shape guards lock the post-fix invariants so a future refactor + // that re-opens any of A1-A7/B1/B2/W3/A4/A5 will fail loudly here in + // addition to the semantic unit tests in `test/daemon/...`. + // ────────────────────────────────────────────────────────────────────── + + it('#34 adapter must preserve nodeKind / script / routingAuthority / artifactConvention through compiledWorkflowToLegacyAdvancedRounds (R3 PR-α A1 / W3)', () => { + const file = read('src/daemon/command-handler.ts'); + expect( + /nodeKind:\s*node\.nodeKind/.test(file.text), + 'adapter must spread `nodeKind: node.nodeKind` onto the legacy round', + ).toBe(true); + expect( + /node\.script\s*\?\s*\{\s*script:\s*node\.script\s*\}/.test(file.text), + 'adapter must spread `script` field when present', + ).toBe(true); + expect( + /node\.routingAuthority\s*\?\s*\{\s*routingAuthority:\s*node\.routingAuthority\s*\}/.test(file.text), + 'adapter must spread `routingAuthority` field when present', + ).toBe(true); + expect( + /artifactConvention\s*\?\s*\{\s*artifactConvention\s*\}/.test(file.text), + 'adapter must spread `artifactConvention` derived from node.artifacts[0].convention', + ).toBe(true); + }); + + it('#35 adapter must order compiled nodes by topology, not lexical id (R3 PR-α A2)', () => { + const file = read('src/daemon/command-handler.ts'); + // The lexical-sort anti-pattern must NOT appear in the production + // adapter callsite (a comment that documents the OLD bug is fine, but + // an actual `localeCompare` on workflow.nodes must not). + expect( + /\[\.\.\.workflow\.nodes\]\s*\.sort\(\(left,\s*right\)\s*=>\s*left\.id\.localeCompare\(right\.id\)\)/.test(file.text), + 'adapter must not sort workflow.nodes lexically by id (replaced with topological traversal)', + ).toBe(false); + expect( + /orderCompiledNodesForExecution\(workflow\)/.test(file.text), + 'adapter must traverse via orderCompiledNodesForExecution', + ).toBe(true); + }); + + it('#36 legacy readdir().join is bounded to non-envelope-compiled paths (R3 PR-α A3 setup; PR-γ retires it fully)', () => { + // Until PR-γ retires the legacy validator entirely, the orchestrator + // still calls it as the FIRST gate. We only assert that the new + // helper is now ALSO authoritative — a regression that drops the new + // helper leaves the legacy gate alone, which would be silently + // weaker than spec. + const orchestrator = read('src/daemon/p2p-orchestrator.ts'); + expect( + /verifyP2pArtifactBaselineDelta\(/.test(orchestrator.text), + 'orchestrator must invoke verifyP2pArtifactBaselineDelta on envelope_compiled OpenSpec rounds', + ).toBe(true); + expect( + /failRun\([\s\S]{0,200}Artifact contract not satisfied/.test(orchestrator.text), + 'verifyP2pArtifactBaselineDelta failure must call failRun (no longer just helper diagnostic)', + ).toBe(true); + }); + + it('#37 isRoundDangerous must recognise script-kind rounds (R3 PR-α A4)', () => { + const orchestrator = read('src/daemon/p2p-orchestrator.ts'); + // The predicate must include a `nodeKind === 'script'` branch so + // analysis_only script rounds still trigger recheck. + expect( + /round\.nodeKind\s*===\s*'script'/.test(orchestrator.text), + 'isRoundDangerous must include `round.nodeKind === \'script\'` branch', + ).toBe(true); + }); + + it('#38 prepareAdvancedWorkflowLaunch bind-fail must prepend policyMismatchDiagnostics (R3 PR-δ A5)', () => { + const file = read('src/daemon/command-handler.ts'); + // The bind-fail return MUST include policyMismatchDiagnostics. The + // shape `[...policyMismatchDiagnostics, ...bindDiagnostics]` is the + // post-fix canonical form; an old `return { ok: false, diagnostics: + // bindDiagnostics }` regression must be caught. + expect( + /\[\.\.\.policyMismatchDiagnostics,\s*\.\.\.bindDiagnostics\]/.test(file.text), + 'bind-fail return must concatenate policyMismatchDiagnostics + bindDiagnostics', + ).toBe(true); + expect( + /diagnostics:\s*bindDiagnostics\s*\}\s*;[\s]*\}/.test(file.text), + 'bind-fail return must NOT use bindDiagnostics alone', + ).toBe(false); + }); + + it('#39 dispatchScriptRoundOrFail !result.ok must call failRun + return fail_closed (R3 PR-α B1 / B5)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + // Use brace-balance scan to extract the outer body of `if (!result.ok)`. + const startIdx = file.text.indexOf('if (!result.ok) {'); + expect(startIdx, '`if (!result.ok)` block must exist in dispatchScriptRoundOrFail').toBeGreaterThanOrEqual(0); + let depth = 0; + let endIdx = -1; + for (let i = startIdx + 'if (!result.ok) '.length; i < file.text.length; i += 1) { + const ch = file.text[i]; + if (ch === '{') depth += 1; + else if (ch === '}') { + depth -= 1; + if (depth === 0) { endIdx = i; break; } + } + } + expect(endIdx, 'failed to locate end of !result.ok block').toBeGreaterThan(startIdx); + const body = file.text.slice(startIdx, endIdx + 1); + expect( + body.includes('failRun('), + '!result.ok body must invoke failRun (no longer return { kind: "ok" })', + ).toBe(true); + expect( + body.includes("return { kind: 'fail_closed' }"), + '!result.ok body must return { kind: \'fail_closed\' }', + ).toBe(true); + expect( + body.includes('return { kind: \'ok\''), + '!result.ok body must NOT return kind: ok (legacy regression)', + ).toBe(false); + // Structured workflow diagnostic MUST be preserved via + // `helperDiagnostic.workflowDiagnostic` so the original 32-code enum + // survives the helper path. + expect( + /workflowDiagnostic:\s*wd/.test(file.text), + 'helper diagnostic must preserve original workflow diagnostic via `workflowDiagnostic` sidecar', + ).toBe(true); + }); + + it('#40 verifyP2pArtifactBaselineDelta(!ok) must call failRun (R3 PR-α B2)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + // The artifact verify branch must include a `failRun` call after the + // delta failure loop, not just `addHelperDiagnostic`. + expect( + /delta\.diagnostics[\s\S]{0,400}failRun\([\s\S]{0,200}artifact_contract_not_satisfied/i.test(file.text), + 'delta failure branch must invoke failRun with artifact_contract_not_satisfied diagnostic', + ).toBe(true); + }); + + it('#41 captureP2pArtifactBaseline post-round phase must be `validate` (R3 PR-α B7)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + // The pre-round capture is `phase: 'baseline'`; post-round capture + // must be `phase: 'validate'`. We assert the validate string is + // present in the file (post-round capture path). + expect( + /afterCapture[\s\S]{0,200}phase:\s*'validate'/.test(file.text), + 'post-round artifact capture must use phase: validate', + ).toBe(true); + }); + + it('#42 getOrFreezeRunArtifactRoot returns narrowed { rootPath, bound } | null (R3 PR-α W1)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /interface\s+RunArtifactRootResolution\s*\{[\s\S]{0,200}rootPath:\s*string;[\s\S]{0,200}bound:\s*P2pBoundWorkflow;/.test(file.text), + 'getOrFreezeRunArtifactRoot must return a narrowed { rootPath, bound } shape so callers do not need ! assertions', + ).toBe(true); + expect( + /run\.boundWorkflow!\.bindContext\.repoRoot/.test(file.text), + '! non-null assertion against run.boundWorkflow must not appear in artifact code paths', + ).toBe(false); + }); + + it('#43 daemon static policy MUST NOT read host-side allowlist files; allowedExecutables is envelope-carried (R3 PR-α §13.13)', () => { + // Originally (#43 in §13.10) we asserted that loadDaemonP2pStaticPolicy + // wired in a JSON file reader. User feedback (§13.13) reverted that: + // hand-editing host JSON is off-product. The new contract is the + // INVERSE — daemon static policy returns an empty allowlist and the + // launch envelope is the sole source of non-empty allowlists. + const policy = read('src/daemon/p2p-workflow-static-policy.ts'); + expect( + /loadAllowedExecutables/.test(policy.text), + '`loadAllowedExecutables` symbol MUST NOT exist in p2p-workflow-static-policy.ts', + ).toBe(false); + // Strip comments before scanning so doc/historical references in + // module-doc blocks don't trip the guard. We only care about runtime code. + const stripped = policy.text + .replace(/\/\*[\s\S]*?\*\//g, '') + .split('\n') + .map((line) => line.replace(/\s*\/\/.*$/, '')) + .join('\n'); + expect( + /from\s+['"]node:fs['"]|from\s+['"]node:os['"]|readFileSync\s*\(|homedir\s*\(/.test(stripped), + 'p2p-workflow-static-policy.ts MUST NOT import node:fs/node:os or call readFileSync/homedir at runtime', + ).toBe(false); + expect( + /allowedExecutables:\s*\[\]/.test(policy.text), + 'loadDaemonP2pStaticPolicy MUST set allowedExecutables: []', + ).toBe(true); + // Launch path must merge envelope-supplied entries into the policy. + const handler = read('src/daemon/command-handler.ts'); + expect( + /envelope\.allowedExecutables/.test(handler.text), + 'prepareAdvancedWorkflowLaunch must read envelope.allowedExecutables', + ).toBe(true); + expect( + /buildDefaultP2pStaticPolicy\(\{[\s\S]{0,200}allowedExecutables/.test(handler.text), + 'merged static policy MUST be rebuilt via buildDefaultP2pStaticPolicy with envelope-derived allowedExecutables (so policyHash is recomputed)', + ).toBe(true); + }); + + it('#44 expectedStaticPolicyHash validator enforces ASCII pattern + byte length (R3 PR-δ A6)', () => { + const file = read('shared/p2p-workflow-validators.ts'); + expect( + /P2P_REQUEST_ID_ASCII_PATTERN\.test\(hash\)/.test(file.text), + 'validator must enforce ASCII pattern on expectedStaticPolicyHash', + ).toBe(true); + expect( + /TextEncoder\(\)\.encode\(hash\)\.byteLength/.test(file.text), + 'validator must compute UTF-8 byte length via TextEncoder', + ).toBe(true); + }); + + // ────────────────────────────────────────────────────────────────────── + // R3 PR-ε reverse-regression #45-#47: visual canvas editor folded into v1a + // + // Calibrated state: the canvas editor MUST be the only authoring surface in + // `P2pConfigPanel`. Adding back the list editor or any toggle should fail + // these guards. The canvas testid contract `data-editor-variant="canvas"` + // must remain stable so integration tests can assert canvas presence. + // ────────────────────────────────────────────────────────────────────── + + it('#45 P2pConfigPanel imports AdvancedWorkflowCanvasEditor and renders it for advanced drafts (R3 PR-ε)', () => { + const file = read('web/src/components/P2pConfigPanel.tsx'); + expect( + /import\s*\{\s*AdvancedWorkflowCanvasEditor\s*\}\s*from\s*['"]\.\/AdvancedWorkflowCanvasEditor\.js['"]/.test(file.text), + 'P2pConfigPanel must import AdvancedWorkflowCanvasEditor from the canvas module', + ).toBe(true); + expect( + / for the workflowDraft branch', + ).toBe(true); + }); + + it('#46 AdvancedWorkflowDraftEditor (list editor) MUST NOT be re-introduced (R3 PR-ε no-toggle contract)', () => { + const panel = read('web/src/components/P2pConfigPanel.tsx'); + // The previous list-based component must NOT be defined or referenced + // anywhere in the panel. The canvas is the SOLE authoring surface; a + // future PR that revives the list view (even as a toggle option) must + // fail this guard. + expect( + /export\s+function\s+AdvancedWorkflowDraftEditor\b/.test(panel.text), + 'AdvancedWorkflowDraftEditor (list editor) MUST NOT be re-defined in P2pConfigPanel.tsx', + ).toBe(false); + expect( + / { + const file = read('web/src/components/AdvancedWorkflowCanvasEditor.tsx'); + expect( + /data-testid="p2p-advanced-workflow-editor"/.test(file.text), + 'canvas editor must expose the shared editor testid for integration tests', + ).toBe(true); + expect( + /data-editor-variant="canvas"/.test(file.text), + 'canvas editor must declare data-editor-variant="canvas" so guards can distinguish from any future variant', + ).toBe(true); + expect( + /data-testid="p2p-editor-canvas"/.test(file.text), + 'canvas editor must expose the SVG root testid `p2p-editor-canvas`', + ).toBe(true); + }); + + // ────────────────────────────────────────────────────────────────────── + // R3 PR-β + PR-γ reverse-regression #48-#52 + // + // Calibrated state: envelope_compiled runs MUST drive routing / + // declaredFiles / freeze semantics from the compiled graph and frozen + // identity, NOT from the lossy adapter projection. Legacy + // `readdir().join()` MUST be bypassed for envelope_compiled OpenSpec + // rounds (PR-γ A3). Compiler MUST reject multiple conditional outgoing + // edges per node (PR-γ W4). These guards lock the post-fix invariants + // so a future refactor that re-opens any of A3 / A7 / A8 / W4 / Cx1-H2 + // / Cx1-H3 / Cx1-H4 will fail loudly. + // ────────────────────────────────────────────────────────────────────── + + it('#48 envelope_compiled freeze failure must call failRun (R3 PR-β Cx1-H4)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /freezeError\s*:\s*\{\s*reason\s*:/.test(file.text), + 'getOrFreezeRunArtifactRoot must surface a `freezeError` field on its resolution shape', + ).toBe(true); + // Locate the freezeError guard block by scanning for the predicate + // chain, then assert failRun appears within the same block. Using + // brace-balance scan keeps this robust against intervening whitespace + // / comments / additional helper calls. + const startIdx = file.text.indexOf('artifactRootResolution?.freezeError'); + expect(startIdx, 'expected freezeError guard in p2p-orchestrator.ts').toBeGreaterThanOrEqual(0); + // The guard must reference both envelope_compiled and openspec_convention + // within a 400-char window of the freezeError predicate. + const window = file.text.slice(startIdx, startIdx + 600); + expect(window).toContain("advancedSourceKind === 'envelope_compiled'"); + expect(window).toContain("artifactConvention === 'openspec_convention'"); + // The same guard block must contain a failRun call (within 1500 chars + // — covers the diagnostic + failRun + return body). + const block = file.text.slice(startIdx, startIdx + 1500); + expect(block).toContain('failRun('); + }); + + it('#49 declaredFiles must come from frozen identity for envelope_compiled (R3 PR-β Cx1-H3)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /artifactRootResolution\.identity\.openspecArtifactPaths/.test(file.text), + 'post-round delta gate must use identity.openspecArtifactPaths as the declared-files coordinate system', + ).toBe(true); + // The delta gate must NOT *exclusively* read from round.artifactOutputs + // for envelope_compiled — it must prefer the frozen identity. We allow + // the round.artifactOutputs as a defensive fallback only. + expect( + /declaredSource\s*=\s*identityPaths\.length\s*>\s*0\s*\?\s*identityPaths\s*:\s*round\.artifactOutputs/.test(file.text), + 'declaredSource must prefer identityPaths and fall back to round.artifactOutputs only when identity is empty', + ).toBe(true); + }); + + it('#50 envelope_compiled jump routing must read compiled.edges, not the legacy jumpRule (R3 PR-β Cx1-H2 / A7 / A8)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /run\.advancedSourceKind\s*===\s*'envelope_compiled'\s*&&\s*run\.boundWorkflow[\s\S]{0,400}compiled\.edges\.filter/.test(file.text), + 'envelope_compiled jump path must enumerate compiled.edges for outgoing conditional edges', + ).toBe(true); + expect( + /edge\.condition\.kind\s*===\s*'routing_key_equals'/.test(file.text), + 'jump path must match routing_key_equals condition against scriptRoutingKey', + ).toBe(true); + expect( + /edge\.condition\.kind\s*===\s*'verdict_marker_equals'/.test(file.text), + 'jump path must match verdict_marker_equals condition against effectiveVerdict', + ).toBe(true); + // Per-edge loop budget MUST be enforced from compiled.loopBudgets — not + // the round-aggregated roundJumpCounts. + expect( + /compiled\.loopBudgets\[edge\.id\]/.test(file.text), + 'jump path must enforce per-edge loop budget from compiled.loopBudgets', + ).toBe(true); + }); + + it('#51 dispatchScriptRoundOrFail must surface routingKey from machine output frame (R3 PR-β Cx1-H2)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /result\.machineOutput\?\.ok[\s\S]{0,200}finalFrame[\s\S]{0,200}routingKey/.test(file.text), + 'dispatchScriptRoundOrFail must extract routingKey from machineOutput.finalFrame', + ).toBe(true); + expect( + /scriptDispatch\.routingKey/.test(file.text), + 'executor must consume scriptDispatch.routingKey to drive compiled-edge routing', + ).toBe(true); + }); + + it('#52 legacy captureArtifactBaseline / validateArtifactOutputsForRound MUST bypass envelope_compiled OpenSpec rounds (R3 PR-γ A3)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + // Use brace-balance scan to extract each helper body, then assert the + // bypass condition + early return live inside that body. + const captureStart = file.text.indexOf('async function captureArtifactBaseline('); + expect(captureStart, 'captureArtifactBaseline must exist').toBeGreaterThanOrEqual(0); + const captureBody = file.text.slice(captureStart, captureStart + 2000); + expect(captureBody).toContain("artifactConvention === 'openspec_convention'"); + expect(captureBody).toContain("advancedSourceKind === 'envelope_compiled'"); + // The envelope_compiled guard must early-return WITHOUT hitting the + // readdir.join() heuristic. We assert both guards are present in the + // function body and that 'return baseline' appears under them. + expect(captureBody).toMatch(/return\s+baseline/); + + const validateStart = file.text.indexOf('async function validateArtifactOutputsForRound('); + expect(validateStart, 'validateArtifactOutputsForRound must exist').toBeGreaterThanOrEqual(0); + const validateBody = file.text.slice(validateStart, validateStart + 2000); + expect(validateBody).toContain("artifactConvention === 'openspec_convention'"); + expect(validateBody).toContain("advancedSourceKind === 'envelope_compiled'"); + expect(validateBody).toMatch(/return\s*;/); + }); + + it('#53 compiler must reject multiple conditional outgoing edges per node (R3 PR-γ W4)', () => { + const file = read('shared/p2p-workflow-compiler.ts'); + expect( + /conditionalOutgoing\.length\s*>\s*1/.test(file.text), + 'compiler must explicitly check conditionalOutgoing.length > 1', + ).toBe(true); + expect( + /Multiple conditional outgoing edges/i.test(file.text), + 'compiler diagnostic summary must mention multiple conditional outgoing edges', + ).toBe(true); + }); + + // ────────────────────────────────────────────────────────────────────── + // R3 v1b follow-ups (§13.14) — locked guards #55-#60 + // + // Calibrated state: logic node evaluator wired into executor, script + // retry with transient-only allowlist, artifact identity persisted on + // disk, discussion writer non-blocking via per-run queue, script env + // hardened against dynamic-loader hooks. + // ────────────────────────────────────────────────────────────────────── + + it('#55 logic node evaluator must be wired into the orchestrator dispatch (R3 v1b)', () => { + const orchestrator = read('src/daemon/p2p-orchestrator.ts'); + expect( + /import\s*\{\s*evaluateP2pLogic\s*\}\s*from\s*['"]\.\.\/\.\.\/shared\/p2p-workflow-logic-evaluator\.js['"]/.test(orchestrator.text), + 'orchestrator must import evaluateP2pLogic from the shared evaluator', + ).toBe(true); + expect( + /round\.nodeKind\s*===\s*'logic'/.test(orchestrator.text), + 'orchestrator must dispatch logic nodes via a dedicated branch', + ).toBe(true); + expect( + /evaluateP2pLogic\(logic\b/.test(orchestrator.text), + 'orchestrator must call evaluateP2pLogic against the compiled logic contract', + ).toBe(true); + expect( + /logic_marker_equals[\s\S]{0,200}logicMarker/.test(orchestrator.text), + 'logic_marker_equals routing must consume the evaluator-emitted marker', + ).toBe(true); + }); + + it('#56 logic node compile validation rejects missing / mismatched contracts (R3 v1b)', () => { + const compiler = read('shared/p2p-workflow-compiler.ts'); + expect( + /node\.nodeKind\s*===\s*'logic'/.test(compiler.text), + 'compiler must branch on logic nodeKind', + ).toBe(true); + expect( + /Logic node MUST declare a `logic` contract/.test(compiler.text), + 'compiler must reject logic nodes missing a `logic` contract with explicit summary', + ).toBe(true); + expect( + /Only nodeKind: .{1,8}logic.{1,8} nodes may declare a `logic` contract/.test(compiler.text), + 'compiler must reject non-logic nodes carrying a `logic` contract', + ).toBe(true); + expect( + /validateP2pLogicContract\(/.test(compiler.text), + 'compiler must invoke validateP2pLogicContract for logic nodes', + ).toBe(true); + }); + + it('#57 script retry honours transient-only allowlist + per-round attempt budget (R3 v1b)', () => { + const constants = read('shared/p2p-workflow-constants.ts'); + expect( + /P2P_SCRIPT_RETRY_DEFAULT_ATTEMPTS\s*=\s*3/.test(constants.text), + 'default script retry attempts must be 3', + ).toBe(true); + expect( + /P2P_SCRIPT_RETRIABLE_DIAGNOSTIC_CODES\s*=\s*\[[\s\S]{0,200}'script_timeout'[\s\S]{0,200}'daemon_busy'/.test(constants.text), + 'transient retriable codes must include script_timeout and daemon_busy', + ).toBe(true); + const orchestrator = read('src/daemon/p2p-orchestrator.ts'); + expect( + /result\.diagnostics\.every\([\s\S]{0,200}P2P_SCRIPT_RETRIABLE_DIAGNOSTIC_CODES/.test(orchestrator.text), + 'retry decision must require ALL diagnostics be in the retriable list', + ).toBe(true); + // R3 v2 PR-ζ ζ-10 — retry counter switched from `roundAttemptCounts` + // to a dedicated `scriptRetryCounts` map; budget check is `<= max - 1` + // to keep "first attempt + N retries" semantics. + expect( + /scriptAttemptsSoFar\s*<\s*P2P_SCRIPT_RETRY_DEFAULT_ATTEMPTS\s*-\s*1/.test(orchestrator.text) + || /attemptsSoFar\s*<\s*P2P_SCRIPT_RETRY_DEFAULT_ATTEMPTS/.test(orchestrator.text), + 'retry decision must check the per-round retry counter against the budget', + ).toBe(true); + expect( + /scriptDispatch\.kind\s*===\s*'retry'[\s\S]{0,500}continue;/.test(orchestrator.text), + 'executor must `continue` on retry kind so the same round re-runs', + ).toBe(true); + }); + + it('#58 artifact identity persistence wires through freeze + daemon startup (R3 v1b)', () => { + const runtime = read('src/daemon/p2p-workflow-artifact-runtime.ts'); + expect( + /export\s+async\s+function\s+loadPersistedFrozenP2pArtifactIdentities/.test(runtime.text), + 'artifact runtime must export loadPersistedFrozenP2pArtifactIdentities', + ).toBe(true); + expect( + /async\s+function\s+persistFrozenIdentity/.test(runtime.text), + 'artifact runtime must define persistFrozenIdentity', + ).toBe(true); + expect( + /function\s+recordFrozenIdentity/.test(runtime.text), + 'artifact runtime must wrap set + persist via recordFrozenIdentity helper', + ).toBe(true); + expect( + /\.tmp.*?rename/s.test(runtime.text), + 'persistence must use atomic .tmp → rename to avoid torn writes', + ).toBe(true); + const orchestrator = read('src/daemon/p2p-orchestrator.ts'); + expect( + /loadPersistedFrozenP2pArtifactIdentities\(\)/.test(orchestrator.text), + 'orchestrator startup hook must rehydrate persisted identities', + ).toBe(true); + }); + + it('#59 discussion writer queue is non-blocking and surfaces failures via callback (R3 v1b W2)', () => { + const writer = read('src/daemon/p2p-discussion-writer.ts'); + expect( + /export\s+function\s+enqueueP2pDiscussionWrite/.test(writer.text), + 'writer module must export enqueueP2pDiscussionWrite', + ).toBe(true); + expect( + /export\s+async\s+function\s+flushP2pDiscussionWriteQueue/.test(writer.text), + 'writer module must export flushP2pDiscussionWriteQueue', + ).toBe(true); + expect( + /onWriteFailure\?\s*:\s*\(error/.test(writer.text), + 'writer must accept and invoke an onWriteFailure listener so the orchestrator can record helper diagnostics', + ).toBe(true); + expect( + /pendingBytes[\s\S]{0,80}P2P_DISCUSSION_WRITE_QUEUE_MAX_BYTES/.test(writer.text), + 'writer must enforce the byte-budget backpressure cap (pendingBytes vs P2P_DISCUSSION_WRITE_QUEUE_MAX_BYTES)', + ).toBe(true); + const orchestrator = read('src/daemon/p2p-orchestrator.ts'); + // R3 v2 PR-ζ ζ-4 / M1 — enqueueP2pDiscussionWrite now takes an + // optional fourth `onSegmentDropped` callback so backpressure drops + // surface as helper diagnostics. The orchestrator passes + // `run.contextFilePath` as the first arg in both forms. + expect( + /enqueueP2pDiscussionWrite\([\s\S]{0,40}run\.contextFilePath/.test(orchestrator.text), + 'orchestrator script + logic dispatch must use enqueueP2pDiscussionWrite, not awaited appendFile', + ).toBe(true); + expect( + /flushP2pDiscussionWriteQueue\(run\.contextFilePath\)/.test(orchestrator.text), + 'orchestrator must flush the queue before reading the discussion file for the run summary', + ).toBe(true); + }); + + it('#60 script runner env deny-list strips dynamic-loader hooks unconditionally (R3 v1b sandbox)', () => { + const runner = read('src/daemon/p2p-workflow-script-runner.ts'); + expect( + /export\s+const\s+P2P_SCRIPT_ENV_DENYLIST/.test(runner.text), + 'runner must export the deny-list constant', + ).toBe(true); + for (const hook of ['LD_PRELOAD', 'DYLD_INSERT_LIBRARIES', 'NODE_OPTIONS']) { + expect( + runner.text.includes(`'${hook}'`), + `deny-list MUST include ${hook}`, + ).toBe(true); + } + expect( + /denylist\.has\(name\)\s*\)\s*continue/.test(runner.text), + 'buildScriptSpawnEnv must skip allowlisted names that appear in the deny-list', + ).toBe(true); + }); + + it('#54 UI-managed allowedExecutables plumbing: envelope + saved config + canvas panel section (R3 PR-α §13.13)', () => { + // Envelope type carries the field with documentation pointing at UI flow. + const envelopeType = read('shared/p2p-workflow-types.ts'); + expect( + /allowedExecutables\?\:\s*string\[\]/.test(envelopeType.text), + 'P2pWorkflowLaunchEnvelope must declare optional allowedExecutables', + ).toBe(true); + // Validator enforces shape on the envelope. + const validator = read('shared/p2p-workflow-validators.ts'); + expect( + /input\.allowedExecutables/.test(validator.text), + 'envelope validator must inspect allowedExecutables', + ).toBe(true); + expect( + /allowedExecutables\.length\s*>\s*64/.test(validator.text), + 'envelope validator must cap allowedExecutables at 64 entries', + ).toBe(true); + // Saved config persists the user-managed list. + const savedConfig = read('shared/p2p-modes.ts'); + expect( + /allowedExecutables\?\:\s*string\[\]/.test(savedConfig.text), + 'P2pSavedConfig must declare optional allowedExecutables for userPref round-trip', + ).toBe(true); + // Canvas panel writes config.allowedExecutables into the envelope. + const panel = read('web/src/components/P2pConfigPanel.tsx'); + expect( + /sanitizeAllowedExecutables\(config\.allowedExecutables\)/.test(panel.text), + 'buildP2pWorkflowLaunchEnvelopeFromConfig must sanitize and emit config.allowedExecutables', + ).toBe(true); + expect( + /data-testid="p2p-allowed-executables-section"/.test(panel.text), + 'P2pConfigPanel must render a dedicated "Allowed executables" UI section', + ).toBe(true); + expect( + /data-testid="p2p-allowed-executables-add"/.test(panel.text), + 'allowed-executables UI must expose an Add button testid', + ).toBe(true); + }); + + // ────────────────────────────────────────────────────────────────────── + // R3 v2 PR-ζ reverse-regression #61-#66 + #68-#70 + // + // Calibrated state: prototype pollution write path closed, persistence + // hardened against symlinks / path-traversal / repoRoot mismatch / + // count + TTL caps / .tmp orphans, terminal cleanup hook fires for + // all three caches, baseline diagnostics fail-closed, scriptRetryCounts + // independent of roundAttemptCounts, env deny-list expanded by 11. + // ────────────────────────────────────────────────────────────────────── + + it('#61 runVariables MUST be initialised from a null-prototype map (R3 v2 PR-ζ B1/A5)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /Object\.create\(null\)/.test(file.text), + 'orchestrator MUST initialise runVariables via Object.create(null) for prototype-pollution defence', + ).toBe(true); + expect( + /runVariables:\s*\(\(\)\s*=>\s*\{[\s\S]{0,400}Object\.create\(null\)/.test(file.text), + 'runVariables initialiser must wrap Object.create(null) into the IIFE that seeds defaults', + ).toBe(true); + }); + + it('#62 orchestrator script-variable write path MUST validate name + array caps (R3 v2 PR-ζ B1/B5)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /P2P_WORKFLOW_VARIABLE_NAME_PATTERN\.test\(name\)/.test(file.text), + 'write path must reject names failing P2P_WORKFLOW_VARIABLE_NAME_PATTERN', + ).toBe(true); + expect( + /P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENTS/.test(file.text), + 'write path must enforce element-count cap', + ).toBe(true); + expect( + /P2P_WORKFLOW_VARIABLE_ARRAY_MAX_ELEMENT_BYTES/.test(file.text), + 'write path must enforce per-element byte cap', + ).toBe(true); + }); + + it('#63 persistFrozenIdentity tmp filename MUST include process.pid (R3 v2 PR-ζ B2)', () => { + const file = read('src/daemon/p2p-workflow-artifact-runtime.ts'); + expect( + /\$\{filePath\}\.\$\{process\.pid\}/.test(file.text), + 'tmp filename must include process.pid to prevent same-runId concurrent corruption', + ).toBe(true); + }); + + it('#64 rehydrate MUST reject symlink top-level entries (R3 v2 PR-ζ A3)', () => { + const file = read('src/daemon/p2p-workflow-artifact-runtime.ts'); + expect( + /entryStat\.isSymbolicLink\(\)/.test(file.text), + 'rehydrate must lstat entry and skip symlinks', + ).toBe(true); + }); + + it('#65 rehydrate MUST re-validate every openspecArtifactPaths entry (R3 v2 PR-ζ A4)', () => { + const file = read('src/daemon/p2p-workflow-artifact-runtime.ts'); + expect( + /validateP2pArtifactRelativePath\(declared/.test(file.text), + 'rehydrate must run validateP2pArtifactRelativePath on each declared path', + ).toBe(true); + }); + + it('#66 terminal transition MUST schedule cleanup of 3 caches (R3 v2 PR-ζ A6/O4)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /scheduleP2pRunTerminalCleanup\(/.test(file.text), + 'terminal cleanup helper must exist and be called from transition+failRun', + ).toBe(true); + // The helper must clear all three caches. + expect( + /dropP2pDiscussionWriteQueue\(/.test(file.text), + 'cleanup helper must drop discussion writer queue', + ).toBe(true); + expect( + /clearPersistedFrozenP2pArtifactIdentity\(/.test(file.text), + 'cleanup helper must clear frozen identity', + ).toBe(true); + expect( + /runArtifactRootCache\.delete\(/.test(file.text), + 'cleanup helper must delete runArtifactRootCache entry', + ).toBe(true); + }); + + it('#68 captureP2pArtifactBaseline diagnostics + truncated MUST fail closed (R3 v2 PR-ζ Cx1-A2)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + // Both pre and post capture sites must inspect diagnostics + truncated. + const occurrences = file.text.match(/captureResult\.diagnostics\.find|afterCapture\.diagnostics\.find/g) ?? []; + expect(occurrences.length, 'pre AND post capture sites must inspect diagnostics').toBeGreaterThanOrEqual(2); + expect( + /baseline\.truncated/.test(file.text), + 'baseline.truncated must be checked', + ).toBe(true); + expect( + /Pre-round artifact baseline capture failed|Post-round artifact baseline capture failed/.test(file.text), + 'failRun message must distinguish pre vs post capture failure', + ).toBe(true); + }); + + it('#69 P2P_SCRIPT_ENV_DENYLIST MUST cover loader / runtime / shell / package categories (R3 v2 PR-ζ M4)', () => { + const file = read('src/daemon/p2p-workflow-script-runner.ts'); + const required = [ + 'JAVA_TOOL_OPTIONS', 'PSModulePath', 'LUA_PATH', 'LUA_CPATH', + 'PYTHONHOME', 'PIP_INDEX_URL', 'npm_config_registry', + 'SHELLOPTS', 'BASHOPTS', 'PROMPT_COMMAND', 'IFS', + ]; + for (const name of required) { + expect( + file.text.includes(`'${name}'`), + `P2P_SCRIPT_ENV_DENYLIST MUST include ${name}`, + ).toBe(true); + } + }); + + it('#67 envelope_compiled MUST advance via compiled graph; unmatched conditional + no default = fail closed (R3 v2 PR-η Cx1-A1)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /unmatched_edge_route/.test(file.text), + 'orchestrator must emit `unmatched_edge_route` diagnostic when conditional edges miss AND no default exists', + ).toBe(true); + // After the legacy `if (jump) { ... continue; }` block, envelope_compiled + // must take the compiled-graph branch BEFORE the `roundIndex += 1` + // fallback. We assert both the branch presence AND the fact that the + // legacy `roundIndex += 1` is now reachable ONLY for non-envelope_compiled + // runs. + // Use brace-balance to find the envelope_compiled advance block + // and then verify the legacy `roundIndex += 1` comes AFTER it. + const advanceIdx = file.text.indexOf("run.advancedSourceKind === 'envelope_compiled' && run.boundWorkflow"); + const legacyIncIdx = file.text.lastIndexOf('roundIndex += 1'); + expect(advanceIdx, 'envelope_compiled advance branch must exist').toBeGreaterThanOrEqual(0); + expect(legacyIncIdx, 'legacy roundIndex++ fallback must exist').toBeGreaterThanOrEqual(0); + // The advance branch must precede the legacy fallback in source order. + expect(advanceIdx).toBeLessThan(legacyIncIdx); + expect( + /No outgoing conditional edge matched from/.test(file.text), + 'unmatched-route diagnostic summary must include the canonical phrase', + ).toBe(true); + }); + + const diagnosticsModuleSpec = read('shared/p2p-workflow-diagnostics.ts'); + it('#67b unmatched_edge_route diagnostic code is registered (R3 v2 PR-η)', () => { + expect( + /'unmatched_edge_route'/.test(diagnosticsModuleSpec.text), + 'diagnostic code list must include unmatched_edge_route', + ).toBe(true); + expect( + /unmatched_edge_route:\s*\['execute'\]/.test(diagnosticsModuleSpec.text), + 'phase matrix must register unmatched_edge_route on the execute phase', + ).toBe(true); + }); + + it('#70 scriptRetryCounts MUST be independent of roundAttemptCounts (R3 v2 PR-ζ M2)', () => { + const file = read('src/daemon/p2p-orchestrator.ts'); + expect( + /scriptRetryCounts\?\:\s*Record/.test(file.text), + 'P2pRun must declare scriptRetryCounts as an optional Record', + ).toBe(true); + expect( + /run\.scriptRetryCounts\[round\.id\]/.test(file.text), + 'retry decision must read scriptRetryCounts, not roundAttemptCounts', + ).toBe(true); + expect( + /delete run\.scriptRetryCounts\[jump\]/.test(file.text), + 'jump-rebound must reset scriptRetryCounts for the target round', + ).toBe(true); + }); +}); diff --git a/web/src/app.tsx b/web/src/app.tsx index 3a959c0ef..9b4b9ff6b 100644 --- a/web/src/app.tsx +++ b/web/src/app.tsx @@ -20,6 +20,7 @@ import { type FileBrowserPreviewUpdate, } from './components/file-browser-lazy.js'; import { DAEMON_MSG } from '@shared/daemon-events.js'; +import { P2P_WORKFLOW_MSG } from '@shared/p2p-workflow-messages.js'; import { RECONNECT_GRACE_MS } from '@shared/ack-protocol.js'; import type { UsageContextWindowSource } from '@shared/usage-context-window.js'; import { mapP2pRunToDiscussion, mergeP2pDiscussionUpdate } from './p2p-run-mapping.js'; @@ -1726,8 +1727,19 @@ export function App() { setConnected(true); setConnecting(false); ws.requestSessionList(); - ws.discussionList(); - ws.p2pStatus(); + // Migrate to scoped p2p list. The active session is captured via the + // ref to survive useEffect closure; the daemon will fail-closed and + // return [] if it cannot resolve a project scope from this session, + // matching the new server-side guard. The same scope is implicitly + // tracked inside the WS client via setP2pWorkflowRequestScope on + // terminal subscribe — passing it explicitly here just makes the + // scope source obvious at the call site. + { + const initialActive = activeSessionRef.current; + const initialScope = initialActive ? { sessionName: initialActive } : undefined; + ws.p2pListDiscussions(initialScope); + ws.p2pStatus(initialScope); + } requestActiveTimelineRefresh({ resetCooldowns: true }); // Timeout: if session_list never arrives, stop blocking the UI if (sessionListRetryRef.current) clearTimeout(sessionListRetryRef.current); @@ -2111,7 +2123,7 @@ export function App() { }); } // ── P2P Quick Discussion progress → map to discussions state ────────── - if (msg.type === 'p2p.conflict') { + if (msg.type === P2P_WORKFLOW_MSG.CONFLICT) { // Active P2P run exists — notify user if (typeof window !== 'undefined') { window.alert( @@ -2120,7 +2132,7 @@ export function App() { ); } } - if (msg.type === 'p2p.run_update' && msg.run) { + if (msg.type === P2P_WORKFLOW_MSG.RUN_UPDATE && msg.run) { const entry = mapP2pRunToDiscussion(msg.run as Record); setDiscussions((prev) => { const existing = prev.find((d) => d.id === entry.id); @@ -2136,10 +2148,10 @@ export function App() { }, 120_000); } } - if (msg.type === 'p2p.cancel_response' && msg.ok && msg.runId) { + if (msg.type === P2P_WORKFLOW_MSG.CANCEL_RESPONSE && msg.ok && msg.runId) { setDiscussions((prev) => prev.filter((d) => d.id !== `p2p_${msg.runId}`)); } - if (msg.type === 'p2p.status_response') { + if (msg.type === P2P_WORKFLOW_MSG.STATUS_RESPONSE) { const runs = Array.isArray(msg.runs) ? msg.runs : msg.run @@ -2336,8 +2348,15 @@ export function App() { sessions: sessionsRef.current, subSessions: subSessionsRef.current, })); - // Refresh discussion list - ws.discussionList(); + // Refresh discussion list — daemon now requires a project scope, so + // forward the active session as the scope source. Falls back to undefined + // (WS-client uses its tracked scope from terminal subscriptions) when the + // user has not picked an active session yet. + { + const reconnectActive = activeSessionRef.current; + const reconnectScope = reconnectActive ? { sessionName: reconnectActive } : undefined; + ws.p2pListDiscussions(reconnectScope); + } } }); @@ -3685,7 +3704,7 @@ export function App() { onStopDiscussion={(id) => { if (id.startsWith('p2p_')) { // P2P runs use p2p.cancel with the actual run ID (strip p2p_ prefix) - wsRef.current?.send({ type: 'p2p.cancel', runId: id.slice(4) }); + wsRef.current?.send({ type: P2P_WORKFLOW_MSG.CANCEL, runId: id.slice(4) }); // Remove from UI immediately setDiscussions((prev) => prev.filter((d) => d.id !== id)); } else { @@ -3912,7 +3931,7 @@ export function App() { liveDiscussions={discussions} onStopDiscussion={(id) => { if (id.startsWith('p2p_')) { - wsRef.current?.send({ type: 'p2p.cancel', runId: id.slice(4) }); + wsRef.current?.send({ type: P2P_WORKFLOW_MSG.CANCEL, runId: id.slice(4) }); setDiscussions((prev) => prev.filter((d) => d.id !== id)); } else { wsRef.current?.discussionStop(id); diff --git a/web/src/components/AdvancedWorkflowCanvasEditor.tsx b/web/src/components/AdvancedWorkflowCanvasEditor.tsx new file mode 100644 index 000000000..bc7020eb1 --- /dev/null +++ b/web/src/components/AdvancedWorkflowCanvasEditor.tsx @@ -0,0 +1,676 @@ +/** + * AdvancedWorkflowCanvasEditor — v1a visual graph editor for P2P workflow drafts. + * + * Replaces the earlier list-based `AdvancedWorkflowDraftEditor` (folded back + * into v1a per the 87fd4db8-ff5 R3 plan). This is a single editor surface; + * there is NO toggle and no second list view to maintain. + * + * Design constraints: + * - Pure preact + inline SVG, NO external graph libs (`react-flow`, `d3`, + * `cytoscape`, `dagre` not in `web/package.json`). + * - Node positions are AUTHORING-ONLY metadata: stored in component state and + * never serialised into `P2pWorkflowDraft` (compile/bind don't need them). + * Positions auto-layout when missing (deterministic by node order so test + * snapshots stay stable). + * - All edits round-trip through `validateP2pWorkflowDraft` so diagnostics + * render inline before Save (preserves the v1a contract that the editor + * mirrors validator output). + * - `readOnly` mode disables all mutations (drag, edge-create, inspector + * inputs, delete) so future-schema drafts render safely. + * - Edge creation by drag: pointer-down on a node's right anchor, drag to + * another node, pointer-up creates a new DEFAULT edge (user toggles to + * conditional + sets condition in inspector). + */ + +import { useEffect, useMemo, useRef, useState } from 'preact/hooks'; +import { useTranslation } from 'react-i18next'; +import { + P2P_EDGE_CONDITION_KINDS, + P2P_EDGE_KINDS, + P2P_NODE_KINDS, + P2P_PERMISSION_SCOPES, + P2P_PRESET_KEYS, + type P2pEdgeConditionKind, + type P2pEdgeKind, + type P2pNodeKind, + type P2pPermissionScope, + type P2pPresetKey, +} from '@shared/p2p-workflow-constants.js'; +import type { + P2pWorkflowDraft, + P2pWorkflowEdgeDraft, + P2pWorkflowNodeDraft, +} from '@shared/p2p-workflow-types.js'; +import { validateP2pWorkflowDraft } from '@shared/p2p-workflow-validators.js'; + +// ── Layout constants ──────────────────────────────────────────────────────── +// Kept as module-level constants so unit tests can import + assert layout. +export const CANVAS_NODE_WIDTH = 168; +export const CANVAS_NODE_HEIGHT = 78; +export const CANVAS_GRID_X = 220; +export const CANVAS_GRID_Y = 120; +export const CANVAS_VIEW_WIDTH = 720; +export const CANVAS_VIEW_HEIGHT = 420; +export const CANVAS_NODES_PER_ROW = 3; + +interface NodePosition { + x: number; + y: number; +} + +export interface AdvancedWorkflowCanvasEditorProps { + value: P2pWorkflowDraft; + onChange: (next: P2pWorkflowDraft) => void; + readOnly: boolean; +} + +interface PointerDragState { + kind: 'node' | 'edge_create'; + nodeId: string; + // For 'node' drag: pointer offset from node origin so cursor stays anchored. + offsetX?: number; + offsetY?: number; + // For 'edge_create': current pointer position in canvas coords. + cursorX?: number; + cursorY?: number; +} + +/** + * Sequential, deterministic local id within editor scope. Mirrors the helper + * the previous list editor exposed so existing draft fixtures keep producing + * the same `node_1` / `edge_1` collisions. + */ +export function nextLocalId(prefix: string, existing: ReadonlySet): string { + for (let n = 1; n < 1000; n += 1) { + const candidate = `${prefix}_${n}`; + if (!existing.has(candidate)) return candidate; + } + return `${prefix}_${existing.size + 1}`; +} + +/** + * Deterministic auto-layout — places nodes on a grid in declaration order so + * tests can assert position math without snapshotting RNG. + */ +export function autoLayoutPositions(nodes: ReadonlyArray<{ id: string }>): Record { + const positions: Record = {}; + nodes.forEach((node, index) => { + const col = index % CANVAS_NODES_PER_ROW; + const row = Math.floor(index / CANVAS_NODES_PER_ROW); + positions[node.id] = { + x: 30 + col * CANVAS_GRID_X, + y: 30 + row * CANVAS_GRID_Y, + }; + }); + return positions; +} + +// ── Inline styles (consistent with surrounding panel theme) ───────────────── +const cardStyle = { + marginTop: 12, + background: '#0b1220', + border: '1px solid #334155', + borderRadius: 8, + padding: 10, + display: 'grid', + gap: 10, +} as const; +const headerRowStyle = { + display: 'flex', alignItems: 'center', justifyContent: 'space-between', gap: 8, +} as const; +const sectionLabelStyle = { fontSize: 12, color: '#94a3b8', fontWeight: 600 } as const; +const btnStyle = { + padding: '4px 10px', borderRadius: 5, border: '1px solid #475569', background: '#1e293b', + color: '#cbd5e1', fontSize: 11, cursor: 'pointer', +} as const; +const inputStyle = { + width: '100%', background: '#0f172a', border: '1px solid #334155', borderRadius: 5, + color: '#e2e8f0', fontSize: 12, padding: '5px 7px', outline: 'none', + fontFamily: 'inherit', +} as const; +const labelStyle = { fontSize: 11, color: '#94a3b8', display: 'grid', gap: 3 } as const; +const inspectorCardStyle = { + background: '#0f172a', border: '1px solid #334155', borderRadius: 6, padding: 8, display: 'grid', gap: 6, +} as const; + +export function AdvancedWorkflowCanvasEditor({ value, onChange, readOnly }: AdvancedWorkflowCanvasEditorProps) { + const { t } = useTranslation(); + const diagnostics = useMemo(() => validateP2pWorkflowDraft(value).diagnostics, [value]); + const nodeIds = useMemo(() => new Set(value.nodes.map((node) => node.id)), [value.nodes]); + const edgeIds = useMemo(() => new Set(value.edges.map((edge) => edge.id)), [value.edges]); + const nodesById = useMemo(() => { + const map = new Map(); + for (const node of value.nodes) map.set(node.id, node); + return map; + }, [value.nodes]); + + // Position state — visual-only, NEVER serialised into the draft. Initialised + // via deterministic auto-layout; backfilled when nodes are added. + const [positions, setPositions] = useState>(() => autoLayoutPositions(value.nodes)); + useEffect(() => { + setPositions((prev) => { + let mutated = false; + const next = { ...prev }; + const layout = autoLayoutPositions(value.nodes); + for (const node of value.nodes) { + if (!next[node.id]) { next[node.id] = layout[node.id]; mutated = true; } + } + // Drop stale positions for removed nodes so the map doesn't grow. + for (const id of Object.keys(next)) { + if (!nodeIds.has(id)) { delete next[id]; mutated = true; } + } + return mutated ? next : prev; + }); + }, [value.nodes, nodeIds]); + + const [selection, setSelection] = useState< + | { kind: 'node'; id: string } + | { kind: 'edge'; id: string } + | null + >(null); + + const svgRef = useRef(null); + const dragRef = useRef(null); + // Force re-render during drag without storing transient state in React. + const [, forceTick] = useState(0); + + // Drop selection if the selected entity disappears (e.g., user removes node). + useEffect(() => { + if (!selection) return; + if (selection.kind === 'node' && !nodeIds.has(selection.id)) setSelection(null); + if (selection.kind === 'edge' && !edgeIds.has(selection.id)) setSelection(null); + }, [selection, nodeIds, edgeIds]); + + const screenToCanvas = (clientX: number, clientY: number): { x: number; y: number } => { + const svg = svgRef.current; + if (!svg) return { x: clientX, y: clientY }; + const rect = svg.getBoundingClientRect(); + const scaleX = CANVAS_VIEW_WIDTH / rect.width; + const scaleY = CANVAS_VIEW_HEIGHT / rect.height; + return { + x: (clientX - rect.left) * scaleX, + y: (clientY - rect.top) * scaleY, + }; + }; + + // ── Mutators ────────────────────────────────────────────────────────────── + const updateNode = (id: string, fn: (n: P2pWorkflowNodeDraft) => P2pWorkflowNodeDraft) => { + if (readOnly) return; + onChange({ ...value, nodes: value.nodes.map((node) => (node.id === id ? fn(node) : node)) }); + }; + const updateEdge = (id: string, fn: (e: P2pWorkflowEdgeDraft) => P2pWorkflowEdgeDraft) => { + if (readOnly) return; + onChange({ ...value, edges: value.edges.map((edge) => (edge.id === id ? fn(edge) : edge)) }); + }; + const addNode = () => { + if (readOnly) return; + const id = nextLocalId('node', nodeIds); + onChange({ + ...value, + nodes: [ + ...value.nodes, + { id, title: id, nodeKind: 'llm', preset: 'discuss', permissionScope: 'analysis_only' }, + ], + }); + setSelection({ kind: 'node', id }); + }; + const removeNode = (id: string) => { + if (readOnly) return; + onChange({ + ...value, + nodes: value.nodes.filter((node) => node.id !== id), + edges: value.edges.filter((edge) => edge.fromNodeId !== id && edge.toNodeId !== id), + }); + if (selection?.kind === 'node' && selection.id === id) setSelection(null); + }; + const removeEdge = (id: string) => { + if (readOnly) return; + onChange({ ...value, edges: value.edges.filter((edge) => edge.id !== id) }); + if (selection?.kind === 'edge' && selection.id === id) setSelection(null); + }; + const setEdgeKind = (id: string, edgeKind: P2pEdgeKind) => { + updateEdge(id, (edge) => { + if (edgeKind === 'default') { + const { condition: _drop, ...rest } = edge; + void _drop; + return { ...rest, edgeKind }; + } + return { ...edge, edgeKind, condition: edge.condition ?? { kind: 'routing_key_equals', equals: '' } }; + }); + }; + const createEdgeBetween = (fromId: string, toId: string): string | null => { + if (readOnly) return null; + if (!nodeIds.has(fromId) || !nodeIds.has(toId)) return null; + const id = nextLocalId('edge', edgeIds); + onChange({ + ...value, + edges: [...value.edges, { id, fromNodeId: fromId, toNodeId: toId, edgeKind: 'default' }], + }); + return id; + }; + + // ── Pointer handlers ────────────────────────────────────────────────────── + const onSvgPointerMove = (event: PointerEvent) => { + const drag = dragRef.current; + if (!drag) return; + const point = screenToCanvas(event.clientX, event.clientY); + if (drag.kind === 'node') { + const offX = drag.offsetX ?? 0; + const offY = drag.offsetY ?? 0; + setPositions((prev) => ({ + ...prev, + [drag.nodeId]: { + x: Math.max(0, Math.min(CANVAS_VIEW_WIDTH - CANVAS_NODE_WIDTH, point.x - offX)), + y: Math.max(0, Math.min(CANVAS_VIEW_HEIGHT - CANVAS_NODE_HEIGHT, point.y - offY)), + }, + })); + } else if (drag.kind === 'edge_create') { + drag.cursorX = point.x; + drag.cursorY = point.y; + forceTick((tick) => tick + 1); + } + }; + const onSvgPointerUp = (event: PointerEvent) => { + const drag = dragRef.current; + if (!drag) return; + if (drag.kind === 'edge_create') { + // Hit-test against node bounding boxes to find the drop target. + const point = screenToCanvas(event.clientX, event.clientY); + const target = value.nodes.find((node) => { + const pos = positions[node.id]; + if (!pos) return false; + return point.x >= pos.x && point.x <= pos.x + CANVAS_NODE_WIDTH + && point.y >= pos.y && point.y <= pos.y + CANVAS_NODE_HEIGHT; + }); + if (target && target.id !== drag.nodeId) { + const newEdgeId = createEdgeBetween(drag.nodeId, target.id); + if (newEdgeId) setSelection({ kind: 'edge', id: newEdgeId }); + } + } + dragRef.current = null; + forceTick((tick) => tick + 1); + }; + + const beginNodeDrag = (event: PointerEvent, nodeId: string) => { + if (readOnly) return; + event.stopPropagation(); + const point = screenToCanvas(event.clientX, event.clientY); + const pos = positions[nodeId] ?? { x: 0, y: 0 }; + dragRef.current = { + kind: 'node', + nodeId, + offsetX: point.x - pos.x, + offsetY: point.y - pos.y, + }; + setSelection({ kind: 'node', id: nodeId }); + (event.currentTarget as Element)?.setPointerCapture?.(event.pointerId); + }; + const beginEdgeCreate = (event: PointerEvent, nodeId: string) => { + if (readOnly) return; + event.stopPropagation(); + const point = screenToCanvas(event.clientX, event.clientY); + dragRef.current = { + kind: 'edge_create', + nodeId, + cursorX: point.x, + cursorY: point.y, + }; + (event.currentTarget as Element)?.setPointerCapture?.(event.pointerId); + forceTick((tick) => tick + 1); + }; + + const select = ( + ariaLabel: string, current: T, options: readonly T[], + onSelect: (next: T) => void, + ) => ( + + ); + + // ── Render ──────────────────────────────────────────────────────────────── + const dragState = dragRef.current; + + const inspectorBody = (() => { + if (!selection) { + return ( +
+ {t('p2p.workflow.editor.inspector_empty', 'Select a node or edge to edit its properties.')} +
+ ); + } + if (selection.kind === 'node') { + const node = nodesById.get(selection.id); + if (!node) return null; + return ( +
+
+
{t('p2p.workflow.editor.node.section_label', 'Node')}
+ {!readOnly && ( + + )} +
+ updateNode(node.id, (current) => ({ ...current, title: (event.target as HTMLInputElement).value }))} + style={{ ...inputStyle, fontWeight: 600 }} + aria-label={`node-${node.id}-title`} + /> +
+ + + +
+