From 044324fa708e5d5bc11d2da6349a91b7e812b74e Mon Sep 17 00:00:00 2001 From: MK Date: Tue, 10 Mar 2026 13:08:18 -0400 Subject: [PATCH 1/3] feat: add code-agent skill, builtin tools, and fix telegram context cancellation - Add code-agent skill with file read/write/edit, bash execute, glob/grep search, directory tree, and file patch tools - Add github skill shell scripts for clone, checkout, commit, PR, and issues - Fix telegram handler context cancellation by detaching from polling lifecycle - Add interim "Working on it" message for long-running telegram tasks (>15s) - Extract shared telegram handler logic into handleEvent method - Update runtime config, memory compactor, guardrails, and loop - Update skill contract types and requirements derivation - Update CLI init, TUI wizard, and runner --- forge-cli/cmd/init.go | 2 +- forge-cli/cmd/init_test.go | 2 +- forge-cli/internal/tui/steps/provider_step.go | 15 +- forge-cli/internal/tui/wizard.go | 2 +- forge-cli/runtime/runner.go | 114 ++- forge-core/forgecore_test.go | 31 +- forge-core/llm/providers/responses.go | 18 +- forge-core/runtime/config.go | 2 +- forge-core/runtime/config_test.go | 6 +- forge-core/runtime/guardrails.go | 69 +- forge-core/runtime/loop.go | 494 +++++++++- forge-core/runtime/loop_test.go | 840 +++++++++++++++++- forge-core/runtime/memory.go | 29 +- forge-core/runtime/memory_compactor.go | 71 +- forge-core/runtime/memory_compactor_test.go | 107 +++ forge-core/tools/builtins/bash_execute.go | 219 +++++ .../tools/builtins/code_agent_tools_test.go | 776 ++++++++++++++++ forge-core/tools/builtins/directory_tree.go | 127 +++ forge-core/tools/builtins/file_edit.go | 111 +++ forge-core/tools/builtins/file_patch.go | 169 ++++ forge-core/tools/builtins/file_read.go | 136 +++ forge-core/tools/builtins/file_write.go | 81 ++ forge-core/tools/builtins/glob_search.go | 161 ++++ forge-core/tools/builtins/grep_search.go | 295 ++++++ forge-core/tools/builtins/pathutil.go | 59 ++ forge-core/tools/builtins/pathutil_test.go | 105 +++ forge-core/tools/builtins/register.go | 88 ++ forge-core/tools/builtins/truncate.go | 39 + forge-plugins/channels/telegram/telegram.go | 74 +- forge-skills/contract/types.go | 3 + .../local/embedded/code-agent/SKILL.md | 282 ++++++ .../code-agent/scripts/code-agent-edit.sh | 111 +++ .../code-agent/scripts/code-agent-read.sh | 128 +++ .../code-agent/scripts/code-agent-run.sh | 325 +++++++ .../code-agent/scripts/code-agent-scaffold.sh | 674 ++++++++++++++ .../code-agent/scripts/code-agent-write.sh | 89 ++ forge-skills/local/embedded/github/SKILL.md | 105 ++- .../github/scripts/github-checkout.sh | 71 ++ .../embedded/github/scripts/github-clone.sh | 90 ++ .../embedded/github/scripts/github-commit.sh | 111 +++ .../github/scripts/github-create-pr.sh | 73 ++ .../embedded/github/scripts/github-push.sh | 77 ++ .../embedded/github/scripts/github-status.sh | 74 ++ forge-skills/local/registry_embedded_test.go | 20 +- forge-skills/requirements/derive.go | 27 +- forge-skills/requirements/derive_test.go | 34 + forge-skills/requirements/requirements.go | 15 +- .../requirements/requirements_test.go | 48 + forge-ui/handlers_create.go | 17 +- skills/code-agent/SKILL.md | 279 ++++++ skills/code-agent/scripts/code-agent-edit.sh | 112 +++ skills/code-agent/scripts/code-agent-read.sh | 101 +++ skills/code-agent/scripts/code-agent-run.sh | 323 +++++++ .../code-agent/scripts/code-agent-scaffold.sh | 674 ++++++++++++++ skills/code-agent/scripts/code-agent-write.sh | 87 ++ 55 files changed, 8048 insertions(+), 144 deletions(-) create mode 100644 forge-core/tools/builtins/bash_execute.go create mode 100644 forge-core/tools/builtins/code_agent_tools_test.go create mode 100644 forge-core/tools/builtins/directory_tree.go create mode 100644 forge-core/tools/builtins/file_edit.go create mode 100644 forge-core/tools/builtins/file_patch.go create mode 100644 forge-core/tools/builtins/file_read.go create mode 100644 forge-core/tools/builtins/file_write.go create mode 100644 forge-core/tools/builtins/glob_search.go create mode 100644 forge-core/tools/builtins/grep_search.go create mode 100644 forge-core/tools/builtins/pathutil.go create mode 100644 forge-core/tools/builtins/pathutil_test.go create mode 100644 forge-core/tools/builtins/truncate.go create mode 100644 forge-skills/local/embedded/code-agent/SKILL.md create mode 100755 forge-skills/local/embedded/code-agent/scripts/code-agent-edit.sh create mode 100755 forge-skills/local/embedded/code-agent/scripts/code-agent-read.sh create mode 100755 forge-skills/local/embedded/code-agent/scripts/code-agent-run.sh create mode 100755 forge-skills/local/embedded/code-agent/scripts/code-agent-scaffold.sh create mode 100755 forge-skills/local/embedded/code-agent/scripts/code-agent-write.sh create mode 100755 forge-skills/local/embedded/github/scripts/github-checkout.sh create mode 100755 forge-skills/local/embedded/github/scripts/github-clone.sh create mode 100755 forge-skills/local/embedded/github/scripts/github-commit.sh create mode 100755 forge-skills/local/embedded/github/scripts/github-create-pr.sh create mode 100755 forge-skills/local/embedded/github/scripts/github-push.sh create mode 100755 forge-skills/local/embedded/github/scripts/github-status.sh create mode 100644 skills/code-agent/SKILL.md create mode 100755 skills/code-agent/scripts/code-agent-edit.sh create mode 100755 skills/code-agent/scripts/code-agent-read.sh create mode 100755 skills/code-agent/scripts/code-agent-run.sh create mode 100755 skills/code-agent/scripts/code-agent-scaffold.sh create mode 100755 skills/code-agent/scripts/code-agent-write.sh diff --git a/forge-cli/cmd/init.go b/forge-cli/cmd/init.go index 1160994..4bf6919 100644 --- a/forge-cli/cmd/init.go +++ b/forge-cli/cmd/init.go @@ -1016,7 +1016,7 @@ func buildTemplateData(opts *initOptions) templateData { func defaultModelNameForProvider(provider string) string { switch provider { case "openai": - return "gpt-5.2-2025-12-11" + return "gpt-5.4" case "anthropic": return "claude-sonnet-4-20250514" case "gemini": diff --git a/forge-cli/cmd/init_test.go b/forge-cli/cmd/init_test.go index d59edad..471a5cb 100644 --- a/forge-cli/cmd/init_test.go +++ b/forge-cli/cmd/init_test.go @@ -580,7 +580,7 @@ func TestBuildTemplateData_DefaultModels(t *testing.T) { provider string expectedModel string }{ - {"openai", "gpt-5.2-2025-12-11"}, + {"openai", "gpt-5.4"}, {"anthropic", "claude-sonnet-4-20250514"}, {"gemini", "gemini-2.5-flash"}, {"ollama", "llama3"}, diff --git a/forge-cli/internal/tui/steps/provider_step.go b/forge-cli/internal/tui/steps/provider_step.go index 73e5900..b45a3f5 100644 --- a/forge-cli/internal/tui/steps/provider_step.go +++ b/forge-cli/internal/tui/steps/provider_step.go @@ -40,17 +40,18 @@ type modelOption struct { // openAIOAuthModels are available when using browser-based OAuth login. var openAIOAuthModels = []modelOption{ - {DisplayName: "GPT 5.3 Codex", ModelID: "gpt-5.3-codex"}, - {DisplayName: "GPT 5.2", ModelID: "gpt-5.2-2025-12-11"}, - {DisplayName: "GPT 5.2 Codex", ModelID: "gpt-5.2-codex"}, + {DisplayName: "GPT 5.4", ModelID: "gpt-5.4"}, + {DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini"}, + {DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano"}, + {DisplayName: "GPT 4.1", ModelID: "gpt-4.1"}, } // openAIAPIKeyModels are available when using an API key. var openAIAPIKeyModels = []modelOption{ - {DisplayName: "GPT 5.2", ModelID: "gpt-5.2-2025-12-11"}, - {DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini-2025-08-07"}, - {DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano-2025-08-07"}, - {DisplayName: "GPT 4.1 Mini", ModelID: "gpt-4.1-mini-2025-04-14"}, + {DisplayName: "GPT 5.4", ModelID: "gpt-5.4"}, + {DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini"}, + {DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano"}, + {DisplayName: "GPT 4.1", ModelID: "gpt-4.1"}, } // ProviderStep handles model provider selection and API key entry. diff --git a/forge-cli/internal/tui/wizard.go b/forge-cli/internal/tui/wizard.go index 58b6cad..73dd0c1 100644 --- a/forge-cli/internal/tui/wizard.go +++ b/forge-cli/internal/tui/wizard.go @@ -18,7 +18,7 @@ type WizardContext struct { Provider string APIKey string AuthMethod string // "apikey" or "oauth" — how the user authenticated - ModelName string // selected model ID (e.g. "gpt-5.3-codex") + ModelName string // selected model ID (e.g. "gpt-5.4") OrganizationID string // OpenAI enterprise organization ID Fallbacks []FallbackProvider Channel string diff --git a/forge-cli/runtime/runner.go b/forge-cli/runtime/runner.go index 2fd1438..8f52a82 100644 --- a/forge-cli/runtime/runner.go +++ b/forge-cli/runtime/runner.go @@ -54,6 +54,34 @@ type RunnerConfig struct { // result to the appropriate channel (e.g. Slack, Telegram). type ScheduleNotifier func(ctx context.Context, channel, target string, response *a2a.Message) error +// codeAgentDirective is appended to the system prompt when code-agent skill +// is active. Forces the LLM to always call tools — never respond with text only. +const codeAgentDirective = `## Code Agent — MANDATORY RULES + +You are a coding agent. Every response MUST include tool calls. NEVER respond with only text. + +FORBIDDEN: +- Respond with "I'll do X now" or "Let me X" without calling tools in the same response +- Output code in markdown blocks for the user to copy-paste +- Ask the user for permission or confirmation before acting +- Describe what you plan to do without simultaneously doing it +- Read files unrelated to the error path or code you plan to change +- Edit test files before fixing the source code — always fix source first, then update tests + +REQUIRED: +- New project → code_agent_scaffold → code_agent_write (all files) → code_agent_run +- Modify existing code → search + trace error origin + read functions to change → code_agent_edit or code_agent_write +- Any request → ACT IMMEDIATELY with tools. Write ALL files and run in ONE turn. + +EXPLORATION RULES: +Bug fixes: search for the error message → trace to its origin (not just where it surfaces) → read functions you plan to call or replace → edit. +Features: search for similar patterns (2-3 searches) → read files you plan to modify → edit. +Both: complete the workflow (commit/push/PR if applicable). +Do NOT read files unrelated to the error path or code you plan to change. Do NOT replace function calls without reading both the old and new function. + +VERIFY BUG FIXES: +After editing, trace the failing input through your new code. Read the functions your fix calls — confirm they handle the type that was failing. If the codebase has a working path for similar logic (e.g., another provider), your fix must use the same approach. Type annotations alone do not fix runtime bugs.` + // Runner orchestrates the local A2A development server. type Runner struct { cfg RunnerConfig @@ -257,6 +285,24 @@ func (r *Runner) Run(ctx context.Context) error { r.logger.Warn("failed to register builtin tools", map[string]any{"error": err.Error()}) } + // Register search/exploration tools (grep, glob, tree). + // When code-agent skill is active, scope them to workspace/ so searches + // default to cloned repos. Otherwise scope to the main working directory. + searchRoot := r.cfg.WorkDir + if r.hasSkill("code-agent") { + codeDir := filepath.Join(r.cfg.WorkDir, "workspace") + if mkErr := os.MkdirAll(codeDir, 0o755); mkErr != nil { + r.logger.Warn("failed to create code workspace directory", map[string]any{"error": mkErr.Error()}) + } + searchRoot = codeDir + r.logger.Info("code-agent skill detected: workspace ready", map[string]any{"workspace": codeDir}) + // Script tools (code_agent_read, code_agent_write, code_agent_run) + // are registered by registerSkillTools() from SKILL.md ## Tool: entries. + } + if err := builtins.RegisterCodeAgentSearchTools(reg, searchRoot); err != nil { + r.logger.Warn("failed to register search tools", map[string]any{"error": err.Error()}) + } + // Register read_skill tool for lazy-loading skill instructions readSkill := builtins.NewReadSkillTool(r.cfg.WorkDir) if regErr := reg.Register(readSkill); regErr != nil { @@ -385,15 +431,25 @@ func (r *Runner) Run(ctx context.Context) error { charBudget = coreruntime.ContextBudgetForModel(mc.Client.Model) } + // Build system prompt; append code-agent tool directives if those tools are registered. + sysPrompt := r.buildSystemPrompt() + if r.hasSkill("code-agent") { + sysPrompt += "\n\n" + codeAgentDirective + } + execCfg := coreruntime.LLMExecutorConfig{ - Client: llmClient, - Tools: reg, - Hooks: hooks, - SystemPrompt: r.buildSystemPrompt(), - Logger: r.logger, - ModelName: mc.Client.Model, - CharBudget: charBudget, - FilesDir: filepath.Join(r.cfg.WorkDir, ".forge", "files"), + Client: llmClient, + Tools: reg, + Hooks: hooks, + SystemPrompt: sysPrompt, + Logger: r.logger, + ModelName: mc.Client.Model, + MaxIterations: 100, + CharBudget: charBudget, + FilesDir: filepath.Join(r.cfg.WorkDir, ".forge", "files"), + } + if r.derivedCLIConfig != nil { + execCfg.WorkflowPhases = r.derivedCLIConfig.WorkflowPhases } // Initialize memory persistence (enabled by default). @@ -1595,6 +1651,28 @@ func ensureGitignore(workDir string) { os.WriteFile(gitignorePath, []byte(content+entry), 0644) //nolint:errcheck } +// hasSkill checks whether a skill with the given name is present in the project's +// discovered skill files. Checks both ## Tool: entry names and frontmatter name. +func (r *Runner) hasSkill(name string) bool { + for _, sf := range r.discoverSkillFiles() { + entries, meta, err := cliskills.ParseFileWithMetadata(sf) + if err != nil { + continue + } + // Check frontmatter name (for skills without ## Tool: entries) + if meta != nil && meta.Name == name { + return true + } + // Check individual tool entry names + for _, e := range entries { + if e.Name == name { + return true + } + } + } + return false +} + // discoverSkillFiles returns all skill file paths from both flat and subdirectory formats, // plus the main SKILL.md (or custom path from forge.yaml). func (r *Runner) discoverSkillFiles() []string { @@ -1736,7 +1814,7 @@ func (r *Runner) buildSkillCatalog() string { var catalogEntries []string for _, match := range matches { - entries, _, err := cliskills.ParseFileWithMetadata(match) + entries, meta, err := cliskills.ParseFileWithMetadata(match) if err != nil { continue } @@ -1747,6 +1825,16 @@ func (r *Runner) buildSkillCatalog() string { catalogSkillDir = filepath.Base(filepath.Dir(match)) } + // If no ## Tool: entries were parsed but frontmatter has name+description, + // create a synthetic entry so the skill appears in the catalog summary. + if len(entries) == 0 && meta != nil && meta.Name != "" && meta.Description != "" { + entries = []contract.SkillEntry{{ + Name: meta.Name, + Description: meta.Description, + Metadata: meta, + }} + } + for _, entry := range entries { // Skip skills that have scripts (already registered as tools) scriptName := strings.ReplaceAll(entry.Name, "_", "-") @@ -1776,13 +1864,6 @@ func (r *Runner) buildSkillCatalog() string { line += fmt.Sprintf(" (use cli_execute with: %s)", strings.Join(entry.ForgeReqs.Bins, ", ")) } catalogEntries = append(catalogEntries, line) - - // Include full skill instructions when available - if entry.Body != "" { - catalogEntries = append(catalogEntries, "") - catalogEntries = append(catalogEntries, entry.Body) - catalogEntries = append(catalogEntries, "") - } } } } @@ -1793,6 +1874,7 @@ func (r *Runner) buildSkillCatalog() string { var b strings.Builder b.WriteString("## Available Skills\n\n") + b.WriteString("Use `read_skill` to load full instructions for a skill before using it.\n\n") for _, entry := range catalogEntries { b.WriteString(entry) b.WriteString("\n") diff --git a/forge-core/forgecore_test.go b/forge-core/forgecore_test.go index 0abde62..1b798af 100644 --- a/forge-core/forgecore_test.go +++ b/forge-core/forgecore_test.go @@ -794,6 +794,15 @@ func TestNewRuntime_WithToolCalling(t *testing.T) { }, FinishReason: "stop", }, + // The agent loop sends a continuation nudge after the first stop. + // Without workflow phases configured, only 1 nudge fires. + { + Message: llm.ChatMessage{ + Role: llm.RoleAssistant, + Content: "I fetched the URL and got: ok", + }, + FinishReason: "stop", + }, }, } @@ -825,9 +834,9 @@ func TestNewRuntime_WithToolCalling(t *testing.T) { t.Errorf("response text = %q, want 'I fetched the URL and got: ok'", resp.Parts[0].Text) } - // Should have made 2 LLM calls - if toolCallClient.callIdx != 2 { - t.Errorf("LLM was called %d times, want 2", toolCallClient.callIdx) + // Should have made 3 LLM calls (tool call + stop + 1 continuation nudge) + if toolCallClient.callIdx != 3 { + t.Errorf("LLM was called %d times, want 3", toolCallClient.callIdx) } } @@ -1064,7 +1073,7 @@ func TestNewRuntime_LLMError(t *testing.T) { } func TestNewRuntime_DefaultMaxIterations(t *testing.T) { - // If MaxIterations is 0, should default to 10 + // If MaxIterations is 0, should default to 50 client := &mockLLMClient{ response: &llm.ChatResponse{ Message: llm.ChatMessage{ @@ -1077,7 +1086,7 @@ func TestNewRuntime_DefaultMaxIterations(t *testing.T) { executor := NewRuntime(RuntimeConfig{ LLMClient: client, - // MaxIterations: 0 -> defaults to 10 + // MaxIterations: 0 -> defaults to 50 }) if executor == nil { @@ -1325,6 +1334,14 @@ func TestIntegration_CompileWithToolCallLoop(t *testing.T) { }, FinishReason: "stop", }, + // Continuation nudge: without workflow phases, only 1 nudge fires. + { + Message: llm.ChatMessage{ + Role: llm.RoleAssistant, + Content: "Found and fetched the result", + }, + FinishReason: "stop", + }, }, } @@ -1354,8 +1371,8 @@ func TestIntegration_CompileWithToolCallLoop(t *testing.T) { if resp.Parts[0].Text != "Found and fetched the result" { t.Errorf("response text = %q", resp.Parts[0].Text) } - if toolCallClient.callIdx != 3 { - t.Errorf("LLM was called %d times, want 3", toolCallClient.callIdx) + if toolCallClient.callIdx != 4 { + t.Errorf("LLM was called %d times, want 4", toolCallClient.callIdx) } } diff --git a/forge-core/llm/providers/responses.go b/forge-core/llm/providers/responses.go index e957a91..410561a 100644 --- a/forge-core/llm/providers/responses.go +++ b/forge-core/llm/providers/responses.go @@ -222,7 +222,7 @@ func (c *ResponsesClient) buildRequest(req *llm.ChatRequest, stream bool) respon inputs = append(inputs, responsesInput{ Type: "function_call", CallID: tc.ID, - ID: tc.ID, + ID: responsesItemID(tc.ID), Name: tc.Function.Name, Arguments: tc.Function.Arguments, }) @@ -249,6 +249,13 @@ func (c *ResponsesClient) buildRequest(req *llm.ChatRequest, stream bool) respon }) } + // The Responses API requires the instructions field. If no system + // message was provided (e.g. summarization calls), use a minimal default + // so the request doesn't fail with "Instructions are required". + if instructions == "" { + instructions = "You are a helpful assistant." + } + r := responsesRequest{ Model: model, Instructions: instructions, @@ -321,6 +328,15 @@ type streamCompleted struct { Response responsesResponse `json:"response"` } +// responsesItemID ensures a tool call ID has the "fc_" prefix required by +// the Responses API for function_call item IDs. +func responsesItemID(id string) string { + if strings.HasPrefix(id, "fc_") { + return id + } + return "fc_" + strings.TrimPrefix(id, "call_") +} + func (c *ResponsesClient) readStream(r io.Reader, ch chan<- llm.StreamDelta) { // Track function calls being built so we can emit them with correct IDs type pendingFC struct { diff --git a/forge-core/runtime/config.go b/forge-core/runtime/config.go index 9b326aa..75b113a 100644 --- a/forge-core/runtime/config.go +++ b/forge-core/runtime/config.go @@ -107,7 +107,7 @@ func ResolveModelConfig(cfg *types.ForgeConfig, envVars map[string]string, provi func defaultModelForProvider(provider string) string { switch provider { case "openai": - return "gpt-5.2-2025-12-11" + return "gpt-5.4" case "anthropic": return "claude-sonnet-4-20250514" case "gemini": diff --git a/forge-core/runtime/config_test.go b/forge-core/runtime/config_test.go index 6b97e80..db84a8a 100644 --- a/forge-core/runtime/config_test.go +++ b/forge-core/runtime/config_test.go @@ -180,7 +180,7 @@ func TestResolveModelConfig_OrgIDFromYAML(t *testing.T) { cfg := &types.ForgeConfig{ Model: types.ModelRef{ Provider: "openai", - Name: "gpt-5.2-2025-12-11", + Name: "gpt-5.4", OrganizationID: "org-yaml-123", }, } @@ -201,7 +201,7 @@ func TestResolveModelConfig_OrgIDEnvOverridesYAML(t *testing.T) { cfg := &types.ForgeConfig{ Model: types.ModelRef{ Provider: "openai", - Name: "gpt-5.2-2025-12-11", + Name: "gpt-5.4", OrganizationID: "org-yaml-123", }, } @@ -303,7 +303,7 @@ func TestDefaultModelForProvider(t *testing.T) { provider string expected string }{ - {"openai", "gpt-5.2-2025-12-11"}, + {"openai", "gpt-5.4"}, {"anthropic", "claude-sonnet-4-20250514"}, {"gemini", "gemini-2.5-flash"}, {"ollama", "llama3"}, diff --git a/forge-core/runtime/guardrails.go b/forge-core/runtime/guardrails.go index 4ac4ae5..0f93938 100644 --- a/forge-core/runtime/guardrails.go +++ b/forge-core/runtime/guardrails.go @@ -32,8 +32,60 @@ func (g *GuardrailEngine) CheckInbound(msg *a2a.Message) error { } // CheckOutbound validates an outbound (agent) message against guardrails. +// Unlike CheckInbound, outbound violations are always handled by redacting +// the offending content rather than blocking the entire response. Blocking +// throws away a potentially useful agent response (e.g., code analysis) over +// a false positive from broad PII/secret patterns matching source code. func (g *GuardrailEngine) CheckOutbound(msg *a2a.Message) error { - return g.check(msg, "outbound") + for i, p := range msg.Parts { + if p.Kind != a2a.PartKindText || p.Text == "" { + continue + } + text := p.Text + redacted := false + + for _, gr := range g.scaffold.Guardrails { + switch gr.Type { + case "no_secrets": + for _, re := range secretPatterns { + if re.MatchString(text) { + text = re.ReplaceAllString(text, "[REDACTED]") + redacted = true + } + } + case "no_pii": + for _, re := range piiPatterns { + if re.MatchString(text) { + text = re.ReplaceAllString(text, "[REDACTED]") + redacted = true + } + } + case "content_filter": + // Content filter: redact blocked words inline. + if gr.Config != nil { + if words, ok := gr.Config["blocked_words"]; ok { + if list, ok := words.([]any); ok { + lower := strings.ToLower(text) + for _, w := range list { + if s, ok := w.(string); ok && strings.Contains(lower, strings.ToLower(s)) { + text = strings.ReplaceAll(text, s, "[BLOCKED]") + redacted = true + } + } + } + } + } + } + } + + if redacted { + msg.Parts[i].Text = text + g.logger.Warn("outbound guardrail redaction applied", map[string]any{ + "direction": "outbound", + }) + } + } + return nil } func (g *GuardrailEngine) check(msg *a2a.Message, direction string) error { @@ -168,9 +220,10 @@ func (g *GuardrailEngine) checkNoSecrets(text string) error { } // CheckToolOutput scans tool output text against configured guardrails -// (no_secrets and no_pii). In enforce mode, returns an error on first match -// without echoing the match. In warn mode, replaces matches with [REDACTED], -// logs a warning, and returns the redacted text. +// (no_secrets and no_pii). Matches are always redacted rather than blocked, +// because tool outputs are internal (sent to the LLM, not the user) and +// blocking would kill the entire agent session. Search tools routinely find +// code containing API key patterns in test files, config examples, etc. func (g *GuardrailEngine) CheckToolOutput(text string) (string, error) { if text == "" { return text, nil @@ -191,10 +244,10 @@ func (g *GuardrailEngine) CheckToolOutput(text string) (string, error) { if !re.MatchString(text) { continue } - if g.enforce { - return "", fmt.Errorf("tool output blocked by content policy") - } - // Warn mode: redact matches + // Always redact tool output instead of blocking. Blocking + // returns a fatal error that kills the agent session, which + // is too aggressive for tool output (especially search tools + // that scan source code containing dummy keys in tests). text = re.ReplaceAllString(text, "[REDACTED]") g.logger.Warn("guardrail redaction", map[string]any{ "guardrail": gr.Type, diff --git a/forge-core/runtime/loop.go b/forge-core/runtime/loop.go index 4598bcc..b976cdd 100644 --- a/forge-core/runtime/loop.go +++ b/forge-core/runtime/loop.go @@ -34,29 +34,31 @@ type LLMExecutor struct { maxToolResultChars int // computed from char budget filesDir string // directory for file_create output sessionMaxAge time.Duration // max age for session recovery (0 = no limit) + workflowPhases []string // workflow phases from skills (edit, finalize, query) } // LLMExecutorConfig configures the LLM executor. type LLMExecutorConfig struct { - Client llm.Client - Tools ToolExecutor - Hooks *HookRegistry - SystemPrompt string - MaxIterations int - Compactor *Compactor - Store *MemoryStore - Logger Logger - ModelName string // model name for context-aware budgeting - CharBudget int // explicit char budget override (0 = auto from model) - FilesDir string // directory for file_create output (default: $TMPDIR/forge-files) - SessionMaxAge time.Duration // max idle time before session recovery is skipped (0 = 30m default) + Client llm.Client + Tools ToolExecutor + Hooks *HookRegistry + SystemPrompt string + MaxIterations int + Compactor *Compactor + Store *MemoryStore + Logger Logger + ModelName string // model name for context-aware budgeting + CharBudget int // explicit char budget override (0 = auto from model) + FilesDir string // directory for file_create output (default: $TMPDIR/forge-files) + SessionMaxAge time.Duration // max idle time before session recovery is skipped (0 = 30m default) + WorkflowPhases []string // workflow phases from skills (edit, finalize, query) } // NewLLMExecutor creates a new LLMExecutor with the given configuration. func NewLLMExecutor(cfg LLMExecutorConfig) *LLMExecutor { maxIter := cfg.MaxIterations if maxIter == 0 { - maxIter = 10 + maxIter = 50 } hooks := cfg.Hooks if hooks == nil { @@ -105,6 +107,7 @@ func NewLLMExecutor(cfg LLMExecutorConfig) *LLMExecutor { maxToolResultChars: toolLimit, filesDir: cfg.FilesDir, sessionMaxAge: sessionMaxAge, + workflowPhases: cfg.WorkflowPhases, } } @@ -168,6 +171,31 @@ func (e *LLMExecutor) Execute(ctx context.Context, task *a2a.Task, msg *a2a.Mess const largeToolOutputThreshold = 8000 var largeToolOutputs []a2a.Part + // stopNudgesSent tracks how many consecutive stop-nudges have been sent + // since the LLM last made tool calls. Reset to 0 whenever the LLM calls + // tools. This prevents infinite nudging while still allowing a second, + // more forceful nudge when the workflow is clearly incomplete (e.g., + // commit failed but agent stopped anyway). + stopNudgesSent := 0 + + // toolsUsed tracks which tools were called during this execution. + // Included in the continuation prompt so the LLM cannot hallucinate + // actions it never performed. + var toolsUsed []string + + // Workflow tracker detects behavioral patterns (exploration loops, + // missing git ops) and injects proactive nudges. The agent never + // sees iteration counts — nudges fire on consecutive read-only iterations. + tracker := newWorkflowTracker(e.workflowPhases) + + // Pre-compute available write tools for nudge messages. + var availWriteTools []string + for _, td := range toolDefs { + if isWriteActionTool(td.Function.Name) { + availWriteTools = append(availWriteTools, td.Function.Name) + } + } + // Agent loop for i := 0; i < e.maxIter; i++ { // Run compaction before LLM call (best-effort). @@ -222,17 +250,147 @@ func (e *LLMExecutor) Execute(ctx context.Context, task *a2a.Task, msg *a2a.Mess // Check if we're done (no tool calls) if resp.FinishReason == "stop" || len(resp.Message.ToolCalls) == 0 { + // If the LLM stopped after executing tools, send a continuation + // nudge. This catches cases where the LLM reports findings instead + // of completing the full workflow (e.g., stops after exploration + // without editing/committing/pushing). The maxIter limit prevents + // infinite loops. + if i > 0 { + // Determine if the workflow is incomplete based on required phases. + workflowIncomplete := false + if tracker.requireEdit && !tracker.phaseOK(phaseEdit) { + workflowIncomplete = true + } + if tracker.requireFinalize && !tracker.phaseOK(phaseGitOps) { + workflowIncomplete = true + } + + // Determine nudge budget: + // - No workflow phases configured → 1 nudge (can't tell if done) + // - Workflow phases configured and ALL complete → 0 (agent is done) + // - Workflow incomplete, no errors → 1 nudge + // - Workflow incomplete with git errors → 2 nudges + hasWorkflowRequirements := tracker.requireEdit || tracker.requireFinalize + maxNudges := 1 // default for agents without workflow phases + if hasWorkflowRequirements && !workflowIncomplete { + maxNudges = 0 // workflow is complete — don't nudge + } else if workflowIncomplete && tracker.phaseHasError[phaseGitOps] { + maxNudges = 2 + } + + if stopNudgesSent < maxNudges { + stopNudgesSent++ + + // Workflow-aware stop-point nudge: check what phases + // the agent completed successfully before stopping. + var nudge string + if stopNudgesSent == 2 { + // Second nudge: agent stopped again without calling + // tools despite knowing the task isn't done. Be very + // forceful. + nudge = "You stopped AGAIN without calling any tools. " + + "Do NOT describe what needs to be done — DO it. " + + "Call the required tools NOW: " + var steps []string + if tracker.requireEdit && !tracker.phaseOK(phaseEdit) { + steps = append(steps, strings.Join(availWriteTools, "/")+ + " to fix the code") + } + if tracker.requireFinalize && !tracker.phaseOK(phaseGitOps) { + if tracker.phaseHasError[phaseGitOps] { + steps = append(steps, "github_commit (previous attempt FAILED — check the files parameter is a JSON array)") + } + steps = append(steps, "github_push -> github_create_pr") + } + if len(steps) > 0 { + nudge += strings.Join(steps, ", then ") + "." + } else { + nudge += "complete the remaining steps." + } + } else if tracker.requireEdit && !tracker.phaseSeen[phaseEdit] { + // Never wrote anything — stuck in exploration + nudge = "You stopped without making any code changes. " + + "You called: " + strings.Join(dedup(toolsUsed), ", ") + ". " + + "You MUST continue: " + if tracker.requireEdit { + nudge += "edit the code" + } + if tracker.requireFinalize { + nudge += ", then commit, push, and create PR" + } + nudge += ". Available write tools: " + strings.Join(availWriteTools, ", ") + "." + } else if tracker.requireEdit && tracker.phaseSeen[phaseEdit] && tracker.requireFinalize && !tracker.phaseOK(phaseGitOps) { + // Edited but git ops either missing or had errors + nudge = "You edited files but " + if tracker.phaseHasError[phaseGitOps] { + nudge += "some git operations FAILED. Fix the errors and retry: " + } else { + nudge += "stopped before git operations. Complete NOW: " + } + nudge += "github_status -> github_commit -> github_push -> github_create_pr." + } else { + // Standard: completed or no requirements + nudge = "You stopped. If the task is complete, summarize what was done. " + + "If not, continue with the remaining steps." + } + e.logger.Info("sending continuation nudge", map[string]any{ + "task_id": TaskIDFromContext(ctx), + "iteration": i, + "tools_used": strings.Join(toolsUsed, ", "), + "has_edits": tracker.phaseSeen[phaseEdit], + "has_git": tracker.phaseSeen[phaseGitOps], + "git_errors": tracker.phaseHasError[phaseGitOps], + "nudge_count": stopNudgesSent, + "max_nudges": maxNudges, + }) + mem.Append(llm.ChatMessage{ + Role: llm.RoleUser, + Content: nudge, + }) + continue + } + } + + // If the LLM returned empty text after executing tools, re-prompt + // it once to produce a meaningful summary instead of sending nothing. + if strings.TrimSpace(resp.Message.Content) == "" && i > 0 { + mem.Append(llm.ChatMessage{ + Role: llm.RoleUser, + Content: "Your response was empty. Please provide a brief summary of what you found, what you were unable to do, and suggest next steps.", + }) + retryReq := &llm.ChatRequest{ + Messages: mem.Messages(), + } + if retryResp, retryErr := e.client.Chat(ctx, retryReq); retryErr == nil && strings.TrimSpace(retryResp.Message.Content) != "" { + resp = retryResp + mem.Append(resp.Message) + } + } + if strings.TrimSpace(resp.Message.Content) == "" { + resp.Message.Content = "I processed your request but wasn't able to produce a response. Please try again." + } e.persistSession(task.ID, mem) return llmMessageToA2A(resp.Message, largeToolOutputs...), nil } // Execute tool calls if e.tools == nil { + if strings.TrimSpace(resp.Message.Content) == "" { + resp.Message.Content = "I processed your request but wasn't able to produce a response. Please try again." + } e.persistSession(task.ID, mem) return llmMessageToA2A(resp.Message, largeToolOutputs...), nil } + // The LLM made tool calls -- it's making progress. Allow + // another nudge if it stops again after this round. + stopNudgesSent = 0 + + iterResults := make([]toolIterResult, 0, len(resp.Message.ToolCalls)) + for _, tc := range resp.Message.ToolCalls { + toolsUsed = append(toolsUsed, tc.Function.Name) + // Fire BeforeToolExec hook if err := e.hooks.Fire(ctx, BeforeToolExec, &HookContext{ ToolName: tc.Function.Name, @@ -248,14 +406,19 @@ func (e *LLMExecutor) Execute(ctx context.Context, task *a2a.Task, msg *a2a.Mess if execErr != nil { result = fmt.Sprintf("Error executing tool %s: %s", tc.Function.Name, execErr.Error()) } + iterResults = append(iterResults, toolIterResult{ + Name: tc.Function.Name, + Failed: execErr != nil, + FilePath: extractReadFilePath(tc.Function.Name, tc.Function.Arguments), + }) // Truncate oversized tool results to avoid LLM API errors. // Limit is proportional to model context budget (25%, floor 2K, cap 400K). if len(result) > e.maxToolResultChars { - result = result[:e.maxToolResultChars] + "\n\n[OUTPUT TRUNCATED — original length: " + strconv.Itoa(len(result)) + " chars]" + result = result[:e.maxToolResultChars] + "\n\n[OUTPUT TRUNCATED -- original length: " + strconv.Itoa(len(result)) + " chars]" } - // Fire AfterToolExec hook — hooks may redact ToolOutput. + // Fire AfterToolExec hook -- hooks may redact ToolOutput. afterHctx := &HookContext{ ToolName: tc.Function.Name, ToolInput: tc.Function.Arguments, @@ -307,6 +470,19 @@ func (e *LLMExecutor) Execute(ctx context.Context, task *a2a.Task, msg *a2a.Mess Name: tc.Function.Name, }) } + + // Record this iteration's tools for workflow tracking. + tracker.recordIteration(iterResults) + + // Proactive mid-loop nudge (fires while agent is still calling tools). + if nudgeMsg, shouldNudge := tracker.generateProactiveNudge(availWriteTools); shouldNudge { + e.logger.Info("sending proactive workflow nudge", map[string]any{ + "task_id": TaskIDFromContext(ctx), + "iteration": i, + "consecutive_reads": tracker.consecutiveReads, + }) + mem.Append(llm.ChatMessage{Role: llm.RoleUser, Content: nudgeMsg}) + } } e.persistSession(task.ID, mem) @@ -408,3 +584,291 @@ func llmMessageToA2A(msg llm.ChatMessage, extraParts ...a2a.Part) *a2a.Message { Parts: parts, } } + +// isWriteActionTool returns true for tools that modify state (edit, write, +// commit, push, create PR) as opposed to read-only tools (read, grep, glob, +// directory_tree, clone, status). +func isWriteActionTool(name string) bool { + switch name { + case "code_agent_edit", "code_agent_write", "code_agent_patch", + "github_commit", "github_push", "github_create_pr", + "github_checkout", "github_create_issue", + "file_create", "bash_execute", "code_agent_run": + return true + } + // Catch any tool with "edit", "write", "commit", "push" in the name. + lower := strings.ToLower(name) + for _, kw := range []string{"edit", "write", "commit", "push", "patch", "create"} { + if strings.Contains(lower, kw) { + return true + } + } + return false +} + +// ─── Workflow Tracker ──────────────────────────────────────────────── + +// workflowPhase classifies tools by their role in the coding workflow. +type workflowPhase int + +const ( + phaseSetup workflowPhase = iota // clone, scaffold + phaseExplore // read, grep, glob, tree, read_skill + phaseEdit // edit, write, patch + phaseGitOps // status, commit, push, create_pr +) + +// toolIterResult captures a tool call's name, whether it failed, and +// (for read tools) the file path that was read. +type toolIterResult struct { + Name string + Failed bool + FilePath string // non-empty for file_read / code_agent_read +} + +// workflowTracker monitors agent behavior to detect exploration loops +// and missing workflow phases. The agent never sees iteration counts. +type workflowTracker struct { + phaseSeen map[workflowPhase]bool + phaseHasError map[workflowPhase]bool // at least one tool in this phase errored + consecutiveReads int // resets when a non-explore tool is called + totalReadIters int + itersSinceLastEdit int // iterations since last edit-phase tool + planCheckpointDone bool + transitionDone bool + urgentDone bool + gitNudgeDone bool + verifyNudgeDone bool // post-edit verification nudge (fires once) + requireEdit bool // skill(s) declared workflow_phase: edit + requireFinalize bool // skill(s) declared workflow_phase: finalize + fileReadCounts map[string]int // path → read count (for re-read detection) + rereadNudgeDone bool // fires once per re-read batch +} + +func newWorkflowTracker(phases []string) *workflowTracker { + wt := &workflowTracker{ + phaseSeen: make(map[workflowPhase]bool), + phaseHasError: make(map[workflowPhase]bool), + fileReadCounts: make(map[string]int), + } + for _, p := range phases { + switch p { + case "edit": + wt.requireEdit = true + case "finalize": + wt.requireFinalize = true + } + } + return wt +} + +// phaseOK returns true if the phase was seen AND had no errors. +func (wt *workflowTracker) phaseOK(p workflowPhase) bool { + return wt.phaseSeen[p] && !wt.phaseHasError[p] +} + +// toolPhase classifies a tool name into a workflow phase. +func toolPhase(name string) workflowPhase { + switch name { + case "github_clone", "code_agent_scaffold", "github_checkout": + return phaseSetup + case "code_agent_read", "grep_search", "glob_search", "directory_tree", "read_skill", "github_status": + return phaseExplore + case "code_agent_edit", "code_agent_write", "code_agent_patch", "bash_execute", "file_create", "code_agent_run": + return phaseEdit + case "github_commit", "github_push", "github_create_pr": + return phaseGitOps + } + // Keyword fallback + lower := strings.ToLower(name) + for _, kw := range []string{"read", "grep", "glob", "search", "tree", "status"} { + if strings.Contains(lower, kw) { + return phaseExplore + } + } + for _, kw := range []string{"edit", "write", "patch", "create"} { + if strings.Contains(lower, kw) { + return phaseEdit + } + } + for _, kw := range []string{"commit", "push"} { + if strings.Contains(lower, kw) { + return phaseGitOps + } + } + return phaseSetup // default: setup / unknown +} + +// recordIteration updates the tracker based on which tools were called and +// whether they succeeded or failed. Failed tools mark phaseHasError but still +// mark phaseSeen (the tool was attempted). The phaseOK() method checks both. +func (wt *workflowTracker) recordIteration(results []toolIterResult) { + allExplore := true + for _, r := range results { + phase := toolPhase(r.Name) + wt.phaseSeen[phase] = true + if r.Failed { + wt.phaseHasError[phase] = true + } + if phase != phaseExplore { + allExplore = false + } + // Track file reads for re-read detection. + if r.FilePath != "" && !r.Failed { + wt.fileReadCounts[r.FilePath]++ + } + } + + if allExplore && len(results) > 0 { + wt.consecutiveReads++ + wt.totalReadIters++ + } else { + wt.consecutiveReads = 0 + } + + // Track iterations since last edit + hasEdit := false + for _, r := range results { + if toolPhase(r.Name) == phaseEdit && !r.Failed { + hasEdit = true + break + } + } + if hasEdit { + wt.itersSinceLastEdit = 0 + } else { + wt.itersSinceLastEdit++ + } +} + +// generateProactiveNudge returns a behavioral nudge if the agent is stuck in +// an exploration loop. Nudges escalate monotonically — each tier fires once. +func (wt *workflowTracker) generateProactiveNudge(availWriteTools []string) (string, bool) { + // Re-read detection nudge: highest priority — fires once when any file + // has been read 2+ times, which wastes context and triggers compaction. + if !wt.rereadNudgeDone { + var rereadFiles []string + for path, count := range wt.fileReadCounts { + if count >= 2 { + rereadFiles = append(rereadFiles, path) + } + } + if len(rereadFiles) > 0 { + wt.rereadNudgeDone = true + return "STOP RE-READING FILES: You have already read " + + strings.Join(rereadFiles, ", ") + " earlier in this session. " + + "The content was lost to compaction. Do NOT read the entire file again — " + + "that will trigger more compaction and lose context again. Instead: " + + "1) State your hypothesis based on what you learned. " + + "2) If you need specific lines, use offset/limit parameters. " + + "3) Proceed to edit based on your current knowledge.", true + } + } + + // Git workflow nudge: only if finalize is required + if wt.requireFinalize && wt.phaseOK(phaseEdit) && !wt.phaseOK(phaseGitOps) && wt.itersSinceLastEdit >= 4 && !wt.gitNudgeDone { + wt.gitNudgeDone = true + nudge := "You edited files but haven't committed. " + if wt.requireEdit && wt.verifyNudgeDone { + nudge += "BEFORE committing: does your edit change RUNTIME behavior, not just types or tests? " + + "Does the failing input now reach a code path that handles it correctly? " + + "If your edit only modifies test files, it does NOT fix the bug — edit source code first. " + } + nudge += "Complete the git workflow: " + + "github_status -> github_commit -> github_push -> github_create_pr." + return nudge, true + } + + // Post-edit verification nudge: fires once immediately after first edit in bug-fix workflows. + if wt.requireEdit && wt.phaseOK(phaseEdit) && (!wt.requireFinalize || !wt.phaseOK(phaseGitOps)) && !wt.verifyNudgeDone && wt.itersSinceLastEdit == 1 { + wt.verifyNudgeDone = true + return "VERIFY YOUR FIX: You just edited code. Before committing, trace the failing input through your new code: " + + "1) What value was causing the bug (e.g., an object, null, wrong type)? " + + "2) Does that value now reach a code path that handles it correctly? " + + "3) Read the functions your new code calls — do they accept that input type? " + + "If the fix only adds types or annotations without changing runtime behavior, it is wrong. " + + "If correct, proceed to commit.", true + } + + // Exploration loop nudges: only if edit is required + if wt.requireEdit { + if wt.phaseOK(phaseEdit) { + return "", false + } + + if wt.consecutiveReads >= 8 && !wt.urgentDone { + wt.urgentDone = true + return "STOP READING. You have explored " + fmt.Sprintf("%d", wt.consecutiveReads) + + " consecutive iterations without a single edit. Act on what you know NOW. " + + "Call " + strings.Join(availWriteTools, "/") + " immediately. An imperfect fix is better than endless exploration.", true + } + + if wt.consecutiveReads >= 6 && !wt.transitionDone { + wt.transitionDone = true + return "You have been exploring for " + fmt.Sprintf("%d", wt.consecutiveReads) + + " consecutive iterations without making changes. " + + "If fixing a bug, have you traced it to its origin? Have you read the functions you plan to change? " + + "If not, do those reads now. Otherwise, Start editing with " + strings.Join(availWriteTools, ", ") + ". " + + "An imperfect edit you can iterate on is better than more reading.", true + } + + if wt.consecutiveReads >= 4 && !wt.planCheckpointDone { + wt.planCheckpointDone = true + return "PLANNING CHECKPOINT: You've read " + fmt.Sprintf("%d", wt.totalReadIters) + + " files without editing. Before reading more: " + + "1) If fixing a bug: have you traced the error to its origin, not just where it surfaces? " + + "2) Have you read the implementation of every function you plan to call or replace? " + + "3) If yes, state your fix and call " + strings.Join(availWriteTools, "/") + ". " + + "If no, do those reads next — then edit.", true + } + + return "", false + } + + // Query-only: gentle nudge at 8 consecutive reads + if !wt.requireEdit && !wt.requireFinalize { + if wt.consecutiveReads >= 8 && !wt.urgentDone { + wt.urgentDone = true + return "You have been reading for " + fmt.Sprintf("%d", wt.consecutiveReads) + + " consecutive iterations. If you have enough information, provide your analysis. " + + "If not, focus your remaining searches.", true + } + } + + return "", false +} + +// extractReadFilePath extracts the file path from tool arguments for +// file_read and code_agent_read tools. Returns "" for other tools or +// if the path cannot be extracted. +func extractReadFilePath(toolName, argsJSON string) string { + switch toolName { + case "file_read", "code_agent_read": + default: + return "" + } + var args struct { + Path string `json:"path"` + FilePath string `json:"file_path"` + } + if err := json.Unmarshal([]byte(argsJSON), &args); err != nil { + return "" + } + if args.FilePath != "" { + return args.FilePath + } + return args.Path +} + +// dedup returns unique tool names in first-seen order. +func dedup(names []string) []string { + seen := make(map[string]bool) + var out []string + for _, n := range names { + if !seen[n] { + seen[n] = true + out = append(out, n) + } + } + return out +} diff --git a/forge-core/runtime/loop_test.go b/forge-core/runtime/loop_test.go index 173b7e1..9765eaf 100644 --- a/forge-core/runtime/loop_test.go +++ b/forge-core/runtime/loop_test.go @@ -284,7 +284,7 @@ func TestToolTruncationUsesProportionalLimit(t *testing.T) { executor := NewLLMExecutor(LLMExecutorConfig{ Client: client, Tools: tools, - CharBudget: 4000, // floor enforced → 2K limit + CharBudget: 10000, // tool limit = 10000/4 = 2500 (floor 2K), still truncates 5K output }) task := &a2a.Task{ID: "test-proportional"} @@ -311,8 +311,8 @@ func TestToolTruncationUsesProportionalLimit(t *testing.T) { if !strings.Contains(toolMsg.Content, "[OUTPUT TRUNCATED") { t.Error("tool result should be truncated with proportional limit") } - // Content should be roughly 2K + truncation suffix - if len(toolMsg.Content) > 2500 { + // Content should be roughly tool limit (2500) + truncation suffix + if len(toolMsg.Content) > 2700 { t.Errorf("tool result too large after truncation: %d chars", len(toolMsg.Content)) } } @@ -351,3 +351,837 @@ func TestLLMErrorReturnsFriendlyMessage(t *testing.T) { t.Errorf("error should contain friendly message, got: %s", errStr) } } + +// ─── Workflow Tracker Tests ────────────────────────────────────────── + +func TestToolPhaseClassification(t *testing.T) { + tests := []struct { + tool string + want workflowPhase + }{ + {"github_clone", phaseSetup}, + {"code_agent_scaffold", phaseSetup}, + {"github_checkout", phaseSetup}, + {"code_agent_read", phaseExplore}, + {"grep_search", phaseExplore}, + {"glob_search", phaseExplore}, + {"directory_tree", phaseExplore}, + {"read_skill", phaseExplore}, + {"github_status", phaseExplore}, + {"code_agent_edit", phaseEdit}, + {"code_agent_write", phaseEdit}, + {"code_agent_patch", phaseEdit}, + {"bash_execute", phaseEdit}, + {"file_create", phaseEdit}, + {"code_agent_run", phaseEdit}, + {"github_commit", phaseGitOps}, + {"github_push", phaseGitOps}, + {"github_create_pr", phaseGitOps}, + } + + for _, tt := range tests { + t.Run(tt.tool, func(t *testing.T) { + got := toolPhase(tt.tool) + if got != tt.want { + t.Errorf("toolPhase(%q) = %d, want %d", tt.tool, got, tt.want) + } + }) + } +} + +func TestPlanningCheckpointAfter4Reads(t *testing.T) { + wt := newWorkflowTracker([]string{"edit"}) + writeTools := []string{"code_agent_edit", "code_agent_write"} + + var nudgeCount int + for i := 0; i < 5; i++ { + wt.recordIteration([]toolIterResult{{Name: "grep_search"}}) + if msg, ok := wt.generateProactiveNudge(writeTools); ok { + nudgeCount++ + if !strings.Contains(msg, "PLANNING CHECKPOINT") { + t.Errorf("iteration %d: expected PLANNING CHECKPOINT nudge, got: %s", i, msg) + } + } + } + + if nudgeCount != 1 { + t.Errorf("planning checkpoint fired %d times, want exactly 1", nudgeCount) + } +} + +func TestProactiveNudgeEscalation(t *testing.T) { + wt := newWorkflowTracker([]string{"edit"}) + writeTools := []string{"code_agent_edit"} + + var nudges []string + for i := 0; i < 9; i++ { + wt.recordIteration([]toolIterResult{{Name: "grep_search"}}) + if msg, ok := wt.generateProactiveNudge(writeTools); ok { + nudges = append(nudges, msg) + } + } + + if len(nudges) != 3 { + t.Fatalf("expected 3 nudges, got %d", len(nudges)) + } + + // Nudge 1: planning checkpoint (at consecutive read 4) + if !strings.Contains(nudges[0], "PLANNING CHECKPOINT") { + t.Errorf("nudge 0: expected PLANNING CHECKPOINT, got: %s", nudges[0]) + } + // Nudge 2: transition (at consecutive read 6) + if !strings.Contains(nudges[1], "Start editing") { + t.Errorf("nudge 1: expected 'Start editing', got: %s", nudges[1]) + } + // Nudge 3: urgent (at consecutive read 8) + if !strings.Contains(nudges[2], "STOP READING") { + t.Errorf("nudge 2: expected 'STOP READING', got: %s", nudges[2]) + } +} + +func TestPlanningCheckpointMentionsOriginTracing(t *testing.T) { + wt := newWorkflowTracker([]string{"edit"}) + for i := 0; i < 4; i++ { + wt.recordIteration([]toolIterResult{{Name: "grep_search"}}) + } + msg, ok := wt.generateProactiveNudge([]string{"code_agent_edit"}) + if !ok { + t.Fatal("expected planning checkpoint at 4 consecutive reads") + } + if !strings.Contains(msg, "traced the error to its origin") { + t.Errorf("should mention origin tracing, got: %s", msg) + } + if !strings.Contains(msg, "read the implementation of every function") { + t.Errorf("should mention reading implementations, got: %s", msg) + } +} + +func TestNoProactiveNudgeWhenWriting(t *testing.T) { + wt := newWorkflowTracker([]string{"edit"}) + writeTools := []string{"code_agent_edit"} + + verifyCount := 0 + for i := 0; i < 10; i++ { + // Alternate read/write — consecutive reads never exceed 1 + if i%2 == 0 { + wt.recordIteration([]toolIterResult{{Name: "grep_search"}}) + } else { + wt.recordIteration([]toolIterResult{{Name: "code_agent_edit"}}) + } + if msg, ok := wt.generateProactiveNudge(writeTools); ok { + // The one-time verify nudge is expected after the first edit; + // exploration nudges (PLANNING CHECKPOINT, STOP READING) must not fire. + if strings.Contains(msg, "VERIFY YOUR FIX") { + verifyCount++ + continue + } + t.Fatalf("unexpected proactive nudge at iteration %d: %s", i, msg) + } + } + if verifyCount != 1 { + t.Errorf("expected exactly 1 verify nudge, got %d", verifyCount) + } +} + +func TestStopNudgeWorkflowAware(t *testing.T) { + makeToolDefs := func(names ...string) []llm.ToolDefinition { + var defs []llm.ToolDefinition + for _, n := range names { + defs = append(defs, llm.ToolDefinition{ + Type: "function", + Function: llm.FunctionSchema{Name: n}, + }) + } + return defs + } + + allTools := []string{ + "grep_search", "code_agent_read", "code_agent_edit", + "github_status", "github_commit", "github_push", "github_create_pr", + } + + tests := []struct { + name string + tools []string // tools the LLM calls across iterations + wantMsg string // substring expected in the stop nudge (empty = no nudge) + wantNudge bool // whether a nudge is expected + }{ + { + name: "only reads → no code changes nudge", + tools: []string{"grep_search", "code_agent_read"}, + wantMsg: "without making any code changes", + wantNudge: true, + }, + { + name: "edits but no git → complete git nudge", + tools: []string{"grep_search", "code_agent_edit"}, + wantMsg: "stopped before git operations", + wantNudge: true, + }, + { + name: "edits + git → no nudge (workflow complete)", + tools: []string{"grep_search", "code_agent_edit", "github_commit", "github_push"}, + wantMsg: "", + wantNudge: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + callIdx := 0 + + client := &mockLLMClient{ + chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) { + callIdx++ + if callIdx <= len(tt.tools) { + return &llm.ChatResponse{ + Message: llm.ChatMessage{ + Role: llm.RoleAssistant, + ToolCalls: []llm.ToolCall{ + { + ID: fmt.Sprintf("call_%d", callIdx), + Type: "function", + Function: llm.FunctionCall{ + Name: tt.tools[callIdx-1], + Arguments: `{}`, + }, + }, + }, + }, + FinishReason: "tool_calls", + }, nil + } + // After all tool calls, stop + return &llm.ChatResponse{ + Message: llm.ChatMessage{Role: llm.RoleAssistant, Content: "Done"}, + FinishReason: "stop", + }, nil + }, + } + + tools := &mockToolExecutor{ + executeFunc: func(ctx context.Context, name string, arguments json.RawMessage) (string, error) { + return "ok", nil + }, + toolDefs: makeToolDefs(allTools...), + } + + executor := NewLLMExecutor(LLMExecutorConfig{ + Client: client, + Tools: tools, + MaxIterations: 20, + WorkflowPhases: []string{"edit", "finalize"}, + }) + + task := &a2a.Task{ID: "stop-nudge-" + tt.name} + msg := &a2a.Message{ + Role: a2a.MessageRoleUser, + Parts: []a2a.Part{a2a.NewTextPart("do the task")}, + } + + // Execute — the stop nudge triggers a continuation, which gets + // a second "Done" response that actually returns. + resp, err := executor.Execute(context.Background(), task, msg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if resp == nil { + t.Fatal("expected response") + } + + // The nudge was injected as a user message. Find it. + // We can't directly inspect memory, but we know the LLM + // received the nudge as the last user message before the + // final "Done" response. So we check via a capturing client. + // For simplicity, re-run with capturing. + callIdx = 0 + var capturedMessages []llm.ChatMessage + client.chatFunc = func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) { + callIdx++ + capturedMessages = append([]llm.ChatMessage{}, req.Messages...) + if callIdx <= len(tt.tools) { + return &llm.ChatResponse{ + Message: llm.ChatMessage{ + Role: llm.RoleAssistant, + ToolCalls: []llm.ToolCall{ + { + ID: fmt.Sprintf("call_%d", callIdx), + Type: "function", + Function: llm.FunctionCall{ + Name: tt.tools[callIdx-1], + Arguments: `{}`, + }, + }, + }, + }, + FinishReason: "tool_calls", + }, nil + } + return &llm.ChatResponse{ + Message: llm.ChatMessage{Role: llm.RoleAssistant, Content: "Done"}, + FinishReason: "stop", + }, nil + } + + executor2 := NewLLMExecutor(LLMExecutorConfig{ + Client: client, + Tools: tools, + MaxIterations: 20, + WorkflowPhases: []string{"edit", "finalize"}, + }) + task2 := &a2a.Task{ID: "stop-nudge-capture-" + tt.name} + _, _ = executor2.Execute(context.Background(), task2, msg) + + // Find the nudge in captured messages + found := false + for _, m := range capturedMessages { + if m.Role == "user" && tt.wantMsg != "" && strings.Contains(m.Content, tt.wantMsg) { + found = true + break + } + } + if tt.wantNudge && !found { + t.Errorf("expected nudge containing %q in messages", tt.wantMsg) + } + if !tt.wantNudge { + // Ensure no continuation nudge was injected + for _, m := range capturedMessages { + if m.Role == "user" && strings.Contains(m.Content, "You stopped") { + t.Errorf("expected no nudge for complete workflow, but got: %s", m.Content) + } + } + } + }) + } +} + +func TestProactiveNudgeInjectedMidLoop(t *testing.T) { + // Integration test: LLM does 6 grep_search calls, then stops. + // Verify a planning checkpoint user message appears in the conversation. + callIdx := 0 + var capturedMessages []llm.ChatMessage + + client := &mockLLMClient{ + chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) { + callIdx++ + capturedMessages = append([]llm.ChatMessage{}, req.Messages...) + + if callIdx <= 6 { + return &llm.ChatResponse{ + Message: llm.ChatMessage{ + Role: llm.RoleAssistant, + ToolCalls: []llm.ToolCall{ + { + ID: fmt.Sprintf("call_%d", callIdx), + Type: "function", + Function: llm.FunctionCall{ + Name: "grep_search", + Arguments: `{}`, + }, + }, + }, + }, + FinishReason: "tool_calls", + }, nil + } + // After 6 tool calls, stop + return &llm.ChatResponse{ + Message: llm.ChatMessage{Role: llm.RoleAssistant, Content: "Here's what I found"}, + FinishReason: "stop", + }, nil + }, + } + + tools := &mockToolExecutor{ + executeFunc: func(ctx context.Context, name string, arguments json.RawMessage) (string, error) { + return "some search result", nil + }, + toolDefs: []llm.ToolDefinition{ + {Type: "function", Function: llm.FunctionSchema{Name: "grep_search"}}, + {Type: "function", Function: llm.FunctionSchema{Name: "code_agent_edit"}}, + }, + } + + executor := NewLLMExecutor(LLMExecutorConfig{ + Client: client, + Tools: tools, + MaxIterations: 20, + WorkflowPhases: []string{"edit"}, + }) + + task := &a2a.Task{ID: "proactive-nudge-midloop"} + msg := &a2a.Message{ + Role: a2a.MessageRoleUser, + Parts: []a2a.Part{a2a.NewTextPart("find and fix the bug")}, + } + + _, _ = executor.Execute(context.Background(), task, msg) + + // Verify planning checkpoint appeared in messages (injected after 4th read) + planCheckpointFound := false + transitionFound := false + for _, m := range capturedMessages { + if m.Role == "user" { + if strings.Contains(m.Content, "PLANNING CHECKPOINT") { + planCheckpointFound = true + } + if strings.Contains(m.Content, "Start editing") { + transitionFound = true + } + } + } + + if !planCheckpointFound { + t.Error("expected PLANNING CHECKPOINT nudge in messages after 4 consecutive reads") + } + if !transitionFound { + t.Error("expected transition nudge in messages after 6 consecutive reads") + } +} + +func TestFailedToolDoesNotMarkPhaseOK(t *testing.T) { + wt := newWorkflowTracker([]string{"edit", "finalize"}) + + // Successful edit + wt.recordIteration([]toolIterResult{{Name: "code_agent_edit", Failed: false}}) + if !wt.phaseOK(phaseEdit) { + t.Error("expected phaseEdit to be OK after successful edit") + } + + // Failed commit — phaseGitOps is "seen" but NOT "OK" + wt.recordIteration([]toolIterResult{ + {Name: "github_commit", Failed: true}, + {Name: "github_push", Failed: false}, + }) + if !wt.phaseSeen[phaseGitOps] { + t.Error("expected phaseGitOps to be seen after attempted commit") + } + if wt.phaseOK(phaseGitOps) { + t.Error("expected phaseGitOps NOT OK because commit failed") + } + if !wt.phaseHasError[phaseGitOps] { + t.Error("expected phaseHasError[phaseGitOps] to be true") + } +} + +func TestReNudgeOnIncompleteWorkflow(t *testing.T) { + // Simulates the production bug: agent edits, commit fails, agent stops + // twice. The second stop should still get a nudge because the workflow + // is incomplete (git ops had errors). + callIdx := 0 + var lastCapturedMessages []llm.ChatMessage + + toolSequence := []struct { + name string + fail bool + }{ + {"grep_search", false}, + {"code_agent_edit", false}, + {"github_status", false}, + {"github_commit", true}, // fails + {"github_push", false}, // succeeds but commit didn't + } + + client := &mockLLMClient{ + chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) { + callIdx++ + lastCapturedMessages = append([]llm.ChatMessage{}, req.Messages...) + + if callIdx <= len(toolSequence) { + return &llm.ChatResponse{ + Message: llm.ChatMessage{ + Role: llm.RoleAssistant, + ToolCalls: []llm.ToolCall{ + { + ID: fmt.Sprintf("call_%d", callIdx), + Type: "function", + Function: llm.FunctionCall{ + Name: toolSequence[callIdx-1].name, + Arguments: `{}`, + }, + }, + }, + }, + FinishReason: "tool_calls", + }, nil + } + + // After tools: stop with text. Do this 3 times to test + // that we get 2 nudges (re-nudge on incomplete workflow). + return &llm.ChatResponse{ + Message: llm.ChatMessage{Role: llm.RoleAssistant, Content: "Not complete yet"}, + FinishReason: "stop", + }, nil + }, + } + + tools := &mockToolExecutor{ + executeFunc: func(ctx context.Context, name string, arguments json.RawMessage) (string, error) { + for _, ts := range toolSequence { + if ts.name == name && ts.fail { + return "", fmt.Errorf("no changes staged to commit") + } + } + return "ok", nil + }, + toolDefs: []llm.ToolDefinition{ + {Type: "function", Function: llm.FunctionSchema{Name: "grep_search"}}, + {Type: "function", Function: llm.FunctionSchema{Name: "code_agent_edit"}}, + {Type: "function", Function: llm.FunctionSchema{Name: "github_status"}}, + {Type: "function", Function: llm.FunctionSchema{Name: "github_commit"}}, + {Type: "function", Function: llm.FunctionSchema{Name: "github_push"}}, + {Type: "function", Function: llm.FunctionSchema{Name: "github_create_pr"}}, + }, + } + + executor := NewLLMExecutor(LLMExecutorConfig{ + Client: client, + Tools: tools, + MaxIterations: 20, + WorkflowPhases: []string{"edit", "finalize"}, + }) + + task := &a2a.Task{ID: "re-nudge-test"} + msg := &a2a.Message{ + Role: a2a.MessageRoleUser, + Parts: []a2a.Part{a2a.NewTextPart("fix the bug and create PR")}, + } + + _, _ = executor.Execute(context.Background(), task, msg) + + // Count nudge messages in the final captured messages. + // With the fix, we should see 2 nudges: first "git ops FAILED", + // second "stopped AGAIN without calling tools". + nudgeCount := 0 + hasFailedNudge := false + hasReNudge := false + for _, m := range lastCapturedMessages { + if m.Role == "user" { + if strings.Contains(m.Content, "FAILED") { + hasFailedNudge = true + nudgeCount++ + } + if strings.Contains(m.Content, "stopped AGAIN") { + hasReNudge = true + nudgeCount++ + } + } + } + + if !hasFailedNudge { + t.Error("expected a nudge mentioning git ops FAILED") + } + if !hasReNudge { + t.Error("expected a re-nudge ('stopped AGAIN') on second stop with incomplete workflow") + } + if nudgeCount < 2 { + t.Errorf("expected at least 2 nudges for incomplete workflow, got %d", nudgeCount) + } +} + +func TestNoEditNudgeForQueryOnlySkill(t *testing.T) { + // Tracker with no edit/finalize phases (query-only or no phases). + // 6 consecutive reads should NOT fire "start editing" nudge. + // Only fire gentle nudge at 8. + for _, phases := range [][]string{{}, {"query"}} { + t.Run(fmt.Sprintf("phases=%v", phases), func(t *testing.T) { + wt := newWorkflowTracker(phases) + writeTools := []string{"code_agent_edit"} + + var nudges []string + for i := 0; i < 9; i++ { + wt.recordIteration([]toolIterResult{{Name: "grep_search"}}) + if msg, ok := wt.generateProactiveNudge(writeTools); ok { + nudges = append(nudges, msg) + } + } + + // Should get exactly 1 nudge (gentle at 8), not 3 (plan+transition+urgent) + if len(nudges) != 1 { + t.Fatalf("expected 1 gentle nudge, got %d: %v", len(nudges), nudges) + } + if !strings.Contains(nudges[0], "provide your analysis") { + t.Errorf("expected gentle query nudge, got: %s", nudges[0]) + } + // Must NOT contain edit-specific nudge language + if strings.Contains(nudges[0], "STOP READING") || strings.Contains(nudges[0], "PLANNING CHECKPOINT") { + t.Errorf("query-only skill should not get edit nudges, got: %s", nudges[0]) + } + }) + } +} + +func TestVerifyNudgeAfterFirstEdit(t *testing.T) { + wt := newWorkflowTracker([]string{"edit", "finalize"}) + // Simulate: 3 reads then 1 edit + for i := 0; i < 3; i++ { + wt.recordIteration([]toolIterResult{{Name: "grep_search"}}) + } + wt.recordIteration([]toolIterResult{{Name: "code_agent_edit"}}) + // Next iteration (1 iter since edit) — should fire verify nudge + wt.recordIteration([]toolIterResult{{Name: "code_agent_read"}}) + msg, ok := wt.generateProactiveNudge([]string{"code_agent_edit"}) + if !ok { + t.Fatal("expected verification nudge after first edit") + } + if !strings.Contains(msg, "VERIFY YOUR FIX") { + t.Errorf("expected VERIFY YOUR FIX nudge, got: %s", msg) + } + // Should not fire again + wt.recordIteration([]toolIterResult{{Name: "code_agent_read"}}) + _, ok = wt.generateProactiveNudge([]string{"code_agent_edit"}) + if ok { + t.Error("verification nudge should fire only once") + } +} + +func TestVerifyNudgeNotFiredForFeatures(t *testing.T) { + // Tracker without edit phase — no verification nudge + wt := newWorkflowTracker([]string{}) + wt.recordIteration([]toolIterResult{{Name: "code_agent_edit"}}) + wt.recordIteration([]toolIterResult{{Name: "code_agent_read"}}) + _, ok := wt.generateProactiveNudge([]string{"code_agent_edit"}) + if ok { + t.Error("verification nudge should not fire without edit workflow phase") + } +} + +func TestNoGitNudgeWithoutFinalize(t *testing.T) { + // Tracker with only edit phase (no finalize). + // After edits, 4+ iterations should NOT fire git workflow nudge. + wt := newWorkflowTracker([]string{"edit"}) + writeTools := []string{"code_agent_edit"} + + // Do an edit + wt.recordIteration([]toolIterResult{{Name: "code_agent_edit"}}) + + // 5 more iterations of reads — should NOT trigger git nudge + var nudges []string + for i := 0; i < 5; i++ { + wt.recordIteration([]toolIterResult{{Name: "grep_search"}}) + if msg, ok := wt.generateProactiveNudge(writeTools); ok { + nudges = append(nudges, msg) + } + } + + for _, nudge := range nudges { + if strings.Contains(nudge, "committed") || strings.Contains(nudge, "git workflow") { + t.Errorf("should not get git nudge without finalize phase, got: %s", nudge) + } + } + + // Also verify stop nudge says "summarize" not "commit/push/PR" + wt2 := newWorkflowTracker([]string{"edit"}) + wt2.recordIteration([]toolIterResult{{Name: "code_agent_edit"}}) + // workflowIncomplete should be false (edit is OK, finalize not required) + if wt2.requireFinalize { + t.Error("requireFinalize should be false for edit-only phases") + } + if !wt2.phaseOK(phaseEdit) { + t.Error("phaseEdit should be OK after successful edit") + } +} + +func TestWorkflowIncompleteWithPhases(t *testing.T) { + tests := []struct { + name string + phases []string + tools []toolIterResult + wantIncomplete bool + }{ + { + name: "edit+finalize, nothing done", + phases: []string{"edit", "finalize"}, + tools: []toolIterResult{{Name: "grep_search"}}, + wantIncomplete: true, + }, + { + name: "edit+finalize, edit done", + phases: []string{"edit", "finalize"}, + tools: []toolIterResult{{Name: "code_agent_edit"}}, + wantIncomplete: true, // finalize still missing + }, + { + name: "edit+finalize, both done", + phases: []string{"edit", "finalize"}, + tools: []toolIterResult{ + {Name: "code_agent_edit"}, + {Name: "github_commit"}, + {Name: "github_push"}, + }, + wantIncomplete: false, + }, + { + name: "edit only, edit done", + phases: []string{"edit"}, + tools: []toolIterResult{{Name: "code_agent_edit"}}, + wantIncomplete: false, + }, + { + name: "edit only, nothing done", + phases: []string{"edit"}, + tools: []toolIterResult{{Name: "grep_search"}}, + wantIncomplete: true, + }, + { + name: "query only, nothing done", + phases: []string{"query"}, + tools: []toolIterResult{{Name: "grep_search"}}, + wantIncomplete: false, // query doesn't require edit or finalize + }, + { + name: "no phases, nothing done", + phases: []string{}, + tools: []toolIterResult{{Name: "grep_search"}}, + wantIncomplete: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + wt := newWorkflowTracker(tt.phases) + wt.recordIteration(tt.tools) + + incomplete := (wt.requireEdit && !wt.phaseOK(phaseEdit)) || + (wt.requireFinalize && !wt.phaseOK(phaseGitOps)) + + if incomplete != tt.wantIncomplete { + t.Errorf("workflowIncomplete = %v, want %v", incomplete, tt.wantIncomplete) + } + }) + } +} + +// ─── File Re-read Detection Tests ──────────────────────────────────── + +func TestExtractReadFilePath(t *testing.T) { + tests := []struct { + name string + tool string + args string + wantPath string + }{ + {"file_read with path", "file_read", `{"path":"/src/main.ts"}`, "/src/main.ts"}, + {"file_read with file_path", "file_read", `{"file_path":"/src/app.go"}`, "/src/app.go"}, + {"code_agent_read with path", "code_agent_read", `{"path":"/lib/utils.js"}`, "/lib/utils.js"}, + {"code_agent_read file_path takes precedence", "code_agent_read", `{"path":"a","file_path":"b"}`, "b"}, + {"grep_search ignored", "grep_search", `{"path":"/src/main.ts"}`, ""}, + {"code_agent_edit ignored", "code_agent_edit", `{"path":"/src/main.ts"}`, ""}, + {"invalid JSON", "file_read", `{bad json`, ""}, + {"missing path fields", "file_read", `{"query":"test"}`, ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractReadFilePath(tt.tool, tt.args) + if got != tt.wantPath { + t.Errorf("extractReadFilePath(%q, %q) = %q, want %q", tt.tool, tt.args, got, tt.wantPath) + } + }) + } +} + +func TestReReadNudgeFires(t *testing.T) { + wt := newWorkflowTracker([]string{"edit"}) + writeTools := []string{"code_agent_edit"} + + // Read the same file twice + wt.recordIteration([]toolIterResult{{Name: "code_agent_read", FilePath: "/src/web-search.ts"}}) + wt.recordIteration([]toolIterResult{{Name: "code_agent_read", FilePath: "/src/web-search.ts"}}) + + msg, ok := wt.generateProactiveNudge(writeTools) + if !ok { + t.Fatal("expected re-read nudge to fire") + } + if !strings.Contains(msg, "STOP RE-READING FILES") { + t.Errorf("expected STOP RE-READING FILES nudge, got: %s", msg) + } + if !strings.Contains(msg, "/src/web-search.ts") { + t.Errorf("nudge should mention the re-read file, got: %s", msg) + } + + // Should not fire again + wt.recordIteration([]toolIterResult{{Name: "code_agent_read", FilePath: "/src/web-search.ts"}}) + _, ok = wt.generateProactiveNudge(writeTools) + if ok { + t.Error("re-read nudge should fire only once") + } +} + +func TestReReadNudgeNotForDifferentFiles(t *testing.T) { + wt := newWorkflowTracker([]string{"edit"}) + writeTools := []string{"code_agent_edit"} + + // Read 3 different files — no re-reads + wt.recordIteration([]toolIterResult{{Name: "code_agent_read", FilePath: "/src/a.ts"}}) + wt.recordIteration([]toolIterResult{{Name: "code_agent_read", FilePath: "/src/b.ts"}}) + wt.recordIteration([]toolIterResult{{Name: "code_agent_read", FilePath: "/src/c.ts"}}) + + msg, ok := wt.generateProactiveNudge(writeTools) + // At 3 consecutive reads we're below the planning checkpoint threshold (4), + // so no nudge should fire at all. + if ok && strings.Contains(msg, "STOP RE-READING") { + t.Errorf("should not fire re-read nudge for different files, got: %s", msg) + } +} + +func TestCodeAgentRunIsWriteAction(t *testing.T) { + if !isWriteActionTool("code_agent_run") { + t.Error("isWriteActionTool(\"code_agent_run\") should return true") + } +} + +func TestGitNudgeIncludesVerifyReminder(t *testing.T) { + wt := newWorkflowTracker([]string{"edit", "finalize"}) + + // Simulate: edit, then verify nudge fires, then 4+ iterations of reads + wt.recordIteration([]toolIterResult{{Name: "code_agent_edit"}}) + wt.recordIteration([]toolIterResult{{Name: "code_agent_read"}}) + // Trigger verify nudge + wt.generateProactiveNudge([]string{"code_agent_edit"}) + if !wt.verifyNudgeDone { + t.Fatal("expected verifyNudgeDone to be true after verify nudge") + } + + // Now simulate 4 more read iterations to trigger git nudge + for range 4 { + wt.recordIteration([]toolIterResult{{Name: "grep_search"}}) + } + + msg, ok := wt.generateProactiveNudge([]string{"code_agent_edit"}) + if !ok { + t.Fatal("expected git nudge to fire") + } + if !strings.Contains(msg, "BEFORE committing") { + t.Errorf("git nudge should include verification reminder, got: %s", msg) + } + if !strings.Contains(msg, "RUNTIME behavior") { + t.Errorf("git nudge should mention RUNTIME behavior, got: %s", msg) + } + if !strings.Contains(msg, "github_status") { + t.Errorf("git nudge should still include git workflow steps, got: %s", msg) + } +} + +func TestGitNudgeNoVerifyReminderWithoutEditPhase(t *testing.T) { + // Tracker with finalize but NOT edit — git nudge should NOT include verify reminder + wt := newWorkflowTracker([]string{"finalize"}) + + // Mark edit as done (even though not required) + wt.recordIteration([]toolIterResult{{Name: "code_agent_edit"}}) + // 5 iterations of reads + for range 5 { + wt.recordIteration([]toolIterResult{{Name: "grep_search"}}) + } + + msg, ok := wt.generateProactiveNudge([]string{"code_agent_edit"}) + if !ok { + t.Fatal("expected git nudge to fire") + } + if strings.Contains(msg, "BEFORE committing") { + t.Errorf("git nudge without edit requirement should not include verify reminder, got: %s", msg) + } + if !strings.Contains(msg, "github_status") { + t.Errorf("git nudge should still include git workflow steps, got: %s", msg) + } +} diff --git a/forge-core/runtime/memory.go b/forge-core/runtime/memory.go index 61d9ac2..6746a38 100644 --- a/forge-core/runtime/memory.go +++ b/forge-core/runtime/memory.go @@ -137,6 +137,9 @@ func (m *Memory) Reset() { // It first prunes old tool results into compact placeholders (preserving signal), // then drops oldest message groups if still over budget. // +// The first user message (the task request) is always preserved so the LLM +// retains its objective even after aggressive trimming. +// // Messages are removed in structural groups to maintain valid sequences: // - An assistant message with tool_calls is always removed together with its // subsequent tool-result messages (they form one atomic group). @@ -151,26 +154,36 @@ func (m *Memory) trim() { m.pruneToolResults() } - // Phase 2: Drop oldest message groups (existing logic). - for m.totalChars() > m.maxChars && len(m.messages) > 1 { - // Determine the size of the first message group. - end := 1 - if m.messages[0].Role == llm.RoleTool { + // Find the first user message to pin (the task request). + pinEnd := 0 + for i, msg := range m.messages { + if msg.Role == llm.RoleUser { + pinEnd = i + 1 + break + } + } + + // Phase 2: Drop oldest message groups after the pinned prefix. + for m.totalChars() > m.maxChars && len(m.messages) > pinEnd+1 { + // Start trimming from the first non-pinned message. + idx := pinEnd + end := idx + 1 + if m.messages[idx].Role == llm.RoleTool { // Orphaned tool results — remove all contiguous tool messages. for end < len(m.messages) && m.messages[end].Role == llm.RoleTool { end++ } - } else if len(m.messages[0].ToolCalls) > 0 { + } else if len(m.messages[idx].ToolCalls) > 0 { // Assistant with tool_calls — include all following tool results. for end < len(m.messages) && m.messages[end].Role == llm.RoleTool { end++ } } - // Don't remove everything — keep at least one complete group. + // Don't remove everything — keep at least one complete group after pin. if end >= len(m.messages) { break } - m.messages = m.messages[end:] + m.messages = append(m.messages[:idx], m.messages[end:]...) } } diff --git a/forge-core/runtime/memory_compactor.go b/forge-core/runtime/memory_compactor.go index b8e43ea..7975a14 100644 --- a/forge-core/runtime/memory_compactor.go +++ b/forge-core/runtime/memory_compactor.go @@ -13,7 +13,6 @@ import ( const ( defaultCharBudget = 200_000 defaultTriggerRatio = 0.6 - summaryMaxTokens = 1024 summaryTimeout = 30 * time.Second maxExtractiveChars = 2000 ) @@ -95,6 +94,9 @@ func (c *Compactor) SetMemoryFlusher(f MemoryFlusher) { // if so, compacts the oldest 50% of messages into a summary. Returns true // if compaction occurred. // +// The first user message (the original task request) is always preserved +// so the LLM retains the objective across compaction cycles. +// // The method holds mem.mu for its entire duration including any LLM call. // This is safe because each Memory is used by a single sequential agent loop. func (c *Compactor) MaybeCompact(taskID string, mem *Memory) (bool, error) { @@ -115,14 +117,34 @@ func (c *Compactor) MaybeCompact(taskID string, mem *Memory) (bool, error) { "messages": len(mem.messages), }) - // Take oldest 50% of messages, respecting group boundaries. - target := len(mem.messages) / 2 - splitIdx := c.findGroupBoundary(mem.messages, target) - if splitIdx <= 0 || splitIdx >= len(mem.messages) { + // Find the first user message — this is the task request that must + // survive all compaction cycles so the LLM knows its objective. + pinIdx := -1 + for i, msg := range mem.messages { + if msg.Role == llm.RoleUser { + pinIdx = i + break + } + } + + // Compactable range starts after the pinned message. + compactStart := 0 + if pinIdx >= 0 { + compactStart = pinIdx + 1 + } + compactable := mem.messages[compactStart:] + if len(compactable) < 2 { return false, nil } - oldMessages := mem.messages[:splitIdx] + // Take oldest 50% of the compactable range, respecting group boundaries. + target := len(compactable) / 2 + splitIdx := c.findGroupBoundary(compactable, target) + if splitIdx <= 0 || splitIdx >= len(compactable) { + return false, nil + } + + oldMessages := compactable[:splitIdx] // Flush key observations to long-term memory before discarding. c.flushToLongTermMemory(oldMessages) @@ -133,14 +155,20 @@ func (c *Compactor) MaybeCompact(taskID string, mem *Memory) (bool, error) { return false, fmt.Errorf("summarization failed: %w", err) } - // Replace old messages with the summary. - mem.messages = mem.messages[splitIdx:] + // Rebuild messages: pinned prefix + remaining compactable messages. + pinned := mem.messages[:compactStart] + remaining := compactable[splitIdx:] + rebuilt := make([]llm.ChatMessage, 0, len(pinned)+len(remaining)) + rebuilt = append(rebuilt, pinned...) + rebuilt = append(rebuilt, remaining...) + mem.messages = rebuilt mem.existingSummary = summary c.logger.Info("compaction complete", map[string]any{ "task_id": taskID, "removed": splitIdx, "remaining": len(mem.messages), + "preserved": compactStart, "summary_chars": len(summary), }) @@ -170,8 +198,16 @@ func (c *Compactor) summarize(messages []llm.ChatMessage, existingSummary string func (c *Compactor) llmSummarize(messages []llm.ChatMessage, existingSummary string) (string, error) { // Build the prompt for summarization. var sb strings.Builder - sb.WriteString("Summarize the following conversation concisely. ") - sb.WriteString("Preserve key facts, decisions, tool results, and action items. ") + sb.WriteString("Summarize the following conversation for an AI agent that will continue this task. ") + sb.WriteString("The agent will NOT have access to the original messages after this summary.\n\n") + sb.WriteString("You MUST preserve:\n") + sb.WriteString("- Identifiers: file paths, resource names, URLs, branches, environment details\n") + sb.WriteString("- Technical findings: what was examined, what was discovered, specific names and values\n") + sb.WriteString("- The agent's current hypothesis or analysis about the problem\n") + sb.WriteString("- Actions taken: what was created, modified, executed, and their outcomes\n") + sb.WriteString("- Errors encountered and whether they were resolved\n") + sb.WriteString("- What remains to be done\n\n") + sb.WriteString("Format: ## State, ## Findings, ## Progress, ## Remaining\n") sb.WriteString("Output only the summary, no preamble.\n\n") if existingSummary != "" { @@ -182,22 +218,29 @@ func (c *Compactor) llmSummarize(messages []llm.ChatMessage, existingSummary str sb.WriteString("## Conversation to summarize\n") for _, msg := range messages { - fmt.Fprintf(&sb, "[%s]: %s\n", msg.Role, truncateForPrompt(msg.Content, 500)) + if msg.Role == llm.RoleTool { + fmt.Fprintf(&sb, "[%s:%s]: %s\n", msg.Role, msg.Name, truncateForPrompt(msg.Content, 2000)) + } else { + fmt.Fprintf(&sb, "[%s]: %s\n", msg.Role, truncateForPrompt(msg.Content, 500)) + } for _, tc := range msg.ToolCalls { fmt.Fprintf(&sb, " -> tool_call: %s(%s)\n", tc.Function.Name, truncateForPrompt(tc.Function.Arguments, 200)) } } - temp := 0.3 ctx, cancel := context.WithTimeout(context.Background(), summaryTimeout) defer cancel() resp, err := c.client.Chat(ctx, &llm.ChatRequest{ Messages: []llm.ChatMessage{ + {Role: llm.RoleSystem, Content: "You are a summarizer for an AI agent that is working on a task. " + + "The agent will lose access to the original conversation after this summary. " + + "Preserve specific identifiers (file paths, resource names, URLs, function names, " + + "config keys) and technical findings so the agent can continue without re-reading. " + + "Use sections: ## State, ## Findings, ## Progress, ## Remaining. " + + "Keep under 1200 words."}, {Role: llm.RoleUser, Content: sb.String()}, }, - Temperature: &temp, - MaxTokens: summaryMaxTokens, }) if err != nil { return "", err diff --git a/forge-core/runtime/memory_compactor_test.go b/forge-core/runtime/memory_compactor_test.go index fc84790..181f440 100644 --- a/forge-core/runtime/memory_compactor_test.go +++ b/forge-core/runtime/memory_compactor_test.go @@ -384,6 +384,113 @@ func TestFindGroupBoundary(t *testing.T) { } } +func TestCompactorStructuredSummaryPrompt(t *testing.T) { + var capturedPrompt string + client := &mockLLMClient{ + chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) { + // Capture the user message (which contains the summarization prompt) + for _, msg := range req.Messages { + if msg.Role == llm.RoleUser { + capturedPrompt = msg.Content + } + } + // Also verify the system prompt + for _, msg := range req.Messages { + if msg.Role == llm.RoleSystem { + if !strings.Contains(msg.Content, "## State") { + t.Errorf("system prompt should mention structured sections, got: %s", msg.Content) + } + if !strings.Contains(msg.Content, "1200 words") { + t.Errorf("system prompt should mention 1200 word limit, got: %s", msg.Content) + } + } + } + return &llm.ChatResponse{ + Message: llm.ChatMessage{ + Role: llm.RoleAssistant, + Content: "## State\nWorking on bug fix\n## Findings\nFound issue in main.go:42", + }, + FinishReason: "stop", + }, nil + }, + } + + c := NewCompactor(CompactorConfig{Client: client}) + + messages := []llm.ChatMessage{ + {Role: llm.RoleUser, Content: "Fix the bug in /src/main.go"}, + {Role: llm.RoleAssistant, Content: "I'll investigate the issue."}, + } + + _, err := c.llmSummarize(messages, "") + if err != nil { + t.Fatalf("llmSummarize: %v", err) + } + + // Verify the structured prompt contains required elements + for _, want := range []string{"## State", "## Findings", "Identifiers", "Technical findings"} { + if !strings.Contains(capturedPrompt, want) { + t.Errorf("summarization prompt should contain %q, got: %s", want, capturedPrompt) + } + } +} + +func TestCompactorToolResultTruncation2000(t *testing.T) { + var capturedPrompt string + client := &mockLLMClient{ + chatFunc: func(ctx context.Context, req *llm.ChatRequest) (*llm.ChatResponse, error) { + for _, msg := range req.Messages { + if msg.Role == llm.RoleUser { + capturedPrompt = msg.Content + } + } + return &llm.ChatResponse{ + Message: llm.ChatMessage{Role: llm.RoleAssistant, Content: "Summary"}, + FinishReason: "stop", + }, nil + }, + } + + c := NewCompactor(CompactorConfig{Client: client}) + + // Create a tool result that is >500 but <2000 chars — should NOT be truncated + toolContent := strings.Repeat("x", 1500) + messages := []llm.ChatMessage{ + {Role: llm.RoleUser, Content: "Do something"}, + {Role: llm.RoleAssistant, Content: "", ToolCalls: []llm.ToolCall{ + {ID: "c1", Type: "function", Function: llm.FunctionCall{Name: "code_agent_read", Arguments: `{"path":"/src/main.go"}`}}, + }}, + {Role: llm.RoleTool, Content: toolContent, ToolCallID: "c1", Name: "code_agent_read"}, + } + + _, err := c.llmSummarize(messages, "") + if err != nil { + t.Fatalf("llmSummarize: %v", err) + } + + // Tool result (1500 chars) should NOT be truncated since limit is 2000 + if !strings.Contains(capturedPrompt, toolContent) { + t.Error("tool result under 2000 chars should not be truncated in summarization prompt") + } + + // Now test with a result >2000 chars — should be truncated + longToolContent := strings.Repeat("y", 2500) + messages[2].Content = longToolContent + + _, err = c.llmSummarize(messages, "") + if err != nil { + t.Fatalf("llmSummarize: %v", err) + } + + // Should be truncated to 2000 chars + "..." + if strings.Contains(capturedPrompt, longToolContent) { + t.Error("tool result over 2000 chars should be truncated in summarization prompt") + } + if !strings.Contains(capturedPrompt, "...") { + t.Error("truncated tool result should end with '...'") + } +} + func TestTruncateForPrompt(t *testing.T) { short := "hello" if got := truncateForPrompt(short, 10); got != short { diff --git a/forge-core/tools/builtins/bash_execute.go b/forge-core/tools/builtins/bash_execute.go new file mode 100644 index 0000000..f2d8fdf --- /dev/null +++ b/forge-core/tools/builtins/bash_execute.go @@ -0,0 +1,219 @@ +package builtins + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "runtime" + "strings" + "time" + + "github.com/initializ/forge/forge-core/tools" +) + +const ( + bashDefaultTimeout = 120 * time.Second + bashMaxOutputBytes = 1 * 1024 * 1024 // 1 MB +) + +// dangerousCommands is a deny-list of commands/patterns that are blocked. +var dangerousCommands = []string{ + "rm -rf /", + "rm -rf /*", + "mkfs.", + "dd if=", + ":(){ :|:& };:", // fork bomb + "> /dev/sda", // disk overwrite + "chmod -R 777 /", // recursive world-writable root + "shutdown", // system shutdown + "reboot", // system reboot + "init 0", // system halt + "halt", // system halt + "poweroff", // system poweroff +} + +// blockedPrefixes are command prefixes that are always blocked. +var blockedPrefixes = []string{ + "sudo ", + "su ", + "su\n", +} + +type bashExecuteTool struct { + workDir string + proxyURL string +} + +func (t *bashExecuteTool) Name() string { return "bash_execute" } +func (t *bashExecuteTool) Description() string { + return "Execute a bash command in the project directory. Supports pipes, redirection, and shell features. Commands run with a timeout (default 120s) and output is capped at 1MB. Dangerous commands (sudo, rm -rf /, etc.) are blocked." +} +func (t *bashExecuteTool) Category() tools.Category { return tools.CategoryBuiltin } + +func (t *bashExecuteTool) InputSchema() json.RawMessage { + return json.RawMessage(`{ + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The bash command to execute" + }, + "timeout": { + "type": "integer", + "description": "Timeout in seconds. Default: 120, Max: 600" + } + }, + "required": ["command"] + }`) +} + +func (t *bashExecuteTool) Execute(ctx context.Context, args json.RawMessage) (string, error) { + var input struct { + Command string `json:"command"` + Timeout int `json:"timeout"` + } + if err := json.Unmarshal(args, &input); err != nil { + return "", fmt.Errorf("invalid arguments: %w", err) + } + + if strings.TrimSpace(input.Command) == "" { + return "", fmt.Errorf("command is required") + } + + // Check deny-list. + if err := t.validateCommand(input.Command); err != nil { + return "", err + } + + // Determine timeout. + timeout := bashDefaultTimeout + if input.Timeout > 0 { + timeout = time.Duration(min(input.Timeout, 600)) * time.Second + } + + cmdCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + cmd := exec.CommandContext(cmdCtx, "bash", "-c", input.Command) + cmd.Dir = t.workDir + cmd.Env = t.buildEnv() + + stdoutWriter := newBashLimitedWriter(bashMaxOutputBytes) + stderrWriter := newBashLimitedWriter(bashMaxOutputBytes) + cmd.Stdout = stdoutWriter + cmd.Stderr = stderrWriter + + err := cmd.Run() + + exitCode := 0 + if err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + exitCode = exitErr.ExitCode() + } else if cmdCtx.Err() == context.DeadlineExceeded { + exitCode = 124 // timeout exit code + } else { + exitCode = 1 + } + } + + result := map[string]any{ + "stdout": strings.TrimRight(stdoutWriter.String(), "\n"), + "stderr": strings.TrimRight(stderrWriter.String(), "\n"), + "exit_code": exitCode, + "truncated": stdoutWriter.overflow || stderrWriter.overflow, + } + + if cmdCtx.Err() == context.DeadlineExceeded { + result["error"] = "command timed out" + } + + out, _ := json.Marshal(result) + return string(out), nil +} + +func (t *bashExecuteTool) validateCommand(cmd string) error { + lower := strings.ToLower(strings.TrimSpace(cmd)) + + for _, prefix := range blockedPrefixes { + if strings.HasPrefix(lower, prefix) { + return fmt.Errorf("command blocked: %q is not allowed", strings.TrimSpace(prefix)) + } + } + + for _, pattern := range dangerousCommands { + if strings.Contains(lower, strings.ToLower(pattern)) { + return fmt.Errorf("command blocked: contains dangerous pattern %q", pattern) + } + } + + return nil +} + +func (t *bashExecuteTool) buildEnv() []string { + home := os.Getenv("HOME") + if home == "" { + home = t.workDir + } + + env := []string{ + "PATH=" + os.Getenv("PATH"), + "HOME=" + home, + "LANG=" + os.Getenv("LANG"), + "TERM=xterm-256color", + "USER=" + os.Getenv("USER"), + } + + // Pass through DISPLAY for Linux GUI apps (browser opening). + if runtime.GOOS == "linux" { + if display := os.Getenv("DISPLAY"); display != "" { + env = append(env, "DISPLAY="+display) + } + if xauth := os.Getenv("XAUTHORITY"); xauth != "" { + env = append(env, "XAUTHORITY="+xauth) + } + } + + if t.proxyURL != "" { + env = append(env, + "HTTP_PROXY="+t.proxyURL, + "HTTPS_PROXY="+t.proxyURL, + "http_proxy="+t.proxyURL, + "https_proxy="+t.proxyURL, + ) + } + + return env +} + +// bashLimitedWriter caps output at a byte limit. +type bashLimitedWriter struct { + buf bytes.Buffer + limit int + overflow bool +} + +func newBashLimitedWriter(limit int) *bashLimitedWriter { + return &bashLimitedWriter{limit: limit} +} + +func (w *bashLimitedWriter) Write(p []byte) (int, error) { + remaining := w.limit - w.buf.Len() + if remaining <= 0 { + w.overflow = true + return len(p), nil + } + if len(p) > remaining { + w.buf.Write(p[:remaining]) + w.overflow = true + return len(p), nil + } + w.buf.Write(p) + return len(p), nil +} + +func (w *bashLimitedWriter) String() string { + return w.buf.String() +} diff --git a/forge-core/tools/builtins/code_agent_tools_test.go b/forge-core/tools/builtins/code_agent_tools_test.go new file mode 100644 index 0000000..3d70d49 --- /dev/null +++ b/forge-core/tools/builtins/code_agent_tools_test.go @@ -0,0 +1,776 @@ +package builtins + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/initializ/forge/forge-core/tools" +) + +func TestRegisterCodeAgentTools(t *testing.T) { + reg := tools.NewRegistry() + workDir := t.TempDir() + + err := RegisterCodeAgentTools(reg, workDir) + if err != nil { + t.Fatalf("RegisterCodeAgentTools failed: %v", err) + } + + expected := []string{ + "file_read", + "file_write", + "file_edit", + "file_patch", + "bash_execute", + "grep_search", + "glob_search", + "directory_tree", + } + + for _, name := range expected { + if reg.Get(name) == nil { + t.Errorf("expected tool %q to be registered", name) + } + } +} + +func TestCodeAgentToolsCount(t *testing.T) { + toolList := CodeAgentTools(t.TempDir()) + if len(toolList) != 8 { + t.Errorf("expected 8 tools, got %d", len(toolList)) + } +} + +// --- file_read tests --- + +func TestFileRead_HappyPath(t *testing.T) { + workDir := t.TempDir() + content := "line1\nline2\nline3\nline4\nline5\n" + writeTestFile(t, workDir, "test.txt", content) + + tool := &fileReadTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{"path": "test.txt"})) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == "" { + t.Fatal("expected non-empty result") + } +} + +func TestFileRead_WithOffset(t *testing.T) { + workDir := t.TempDir() + content := "line1\nline2\nline3\nline4\nline5" + writeTestFile(t, workDir, "test.txt", content) + + tool := &fileReadTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "path": "test.txt", + "offset": 3, + "limit": 2, + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Should contain lines 3 and 4 + if result == "" { + t.Fatal("expected non-empty result") + } +} + +func TestFileRead_Directory(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "a.txt", "content") + writeTestFile(t, workDir, "b.txt", "content") + + tool := &fileReadTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{"path": "."})) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == "" { + t.Fatal("expected directory listing") + } +} + +func TestFileRead_PathTraversal(t *testing.T) { + workDir := t.TempDir() + tool := &fileReadTool{pathValidator: NewPathValidator(workDir)} + + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{"path": "../../../etc/passwd"})) + if err == nil { + t.Fatal("expected error for path traversal") + } +} + +// --- file_write tests --- + +func TestFileWrite_Create(t *testing.T) { + workDir := t.TempDir() + tool := &fileWriteTool{pathValidator: NewPathValidator(workDir)} + + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "path": "new_file.go", + "content": "package main\n", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var out map[string]any + if err := json.Unmarshal([]byte(result), &out); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if out["action"] != "created" { + t.Errorf("expected action 'created', got %q", out["action"]) + } + + // Verify file exists. + data, err := os.ReadFile(filepath.Join(workDir, "new_file.go")) + if err != nil { + t.Fatalf("file not created: %v", err) + } + if string(data) != "package main\n" { + t.Errorf("unexpected content: %s", data) + } +} + +func TestFileWrite_Update(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "existing.txt", "old content") + tool := &fileWriteTool{pathValidator: NewPathValidator(workDir)} + + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "path": "existing.txt", + "content": "new content", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var out map[string]any + if err := json.Unmarshal([]byte(result), &out); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if out["action"] != "updated" { + t.Errorf("expected action 'updated', got %q", out["action"]) + } +} + +func TestFileWrite_CreatesDirs(t *testing.T) { + workDir := t.TempDir() + tool := &fileWriteTool{pathValidator: NewPathValidator(workDir)} + + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "path": "deep/nested/dir/file.txt", + "content": "hello", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, err := os.Stat(filepath.Join(workDir, "deep", "nested", "dir", "file.txt")); err != nil { + t.Fatalf("file not created in nested dir: %v", err) + } +} + +func TestFileWrite_PathTraversal(t *testing.T) { + workDir := t.TempDir() + tool := &fileWriteTool{pathValidator: NewPathValidator(workDir)} + + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "path": "../../escape.txt", + "content": "evil", + })) + if err == nil { + t.Fatal("expected error for path traversal") + } +} + +// --- file_edit tests --- + +func TestFileEdit_HappyPath(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "edit_me.go", "func foo() {\n\treturn 1\n}\n") + + tool := &fileEditTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "path": "edit_me.go", + "old_text": "return 1", + "new_text": "return 42", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Verify diff output. + if result == "" { + t.Fatal("expected diff output") + } + + // Verify file was updated. + data, _ := os.ReadFile(filepath.Join(workDir, "edit_me.go")) + if got := string(data); got != "func foo() {\n\treturn 42\n}\n" { + t.Errorf("unexpected content: %s", got) + } +} + +func TestFileEdit_NotFound(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "test.txt", "hello world") + + tool := &fileEditTool{pathValidator: NewPathValidator(workDir)} + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "path": "test.txt", + "old_text": "nonexistent string", + "new_text": "replacement", + })) + if err == nil { + t.Fatal("expected error for old_text not found") + } +} + +func TestFileEdit_AmbiguousMatch(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "test.txt", "foo bar foo") + + tool := &fileEditTool{pathValidator: NewPathValidator(workDir)} + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "path": "test.txt", + "old_text": "foo", + "new_text": "baz", + })) + if err == nil { + t.Fatal("expected error for ambiguous match") + } +} + +// --- file_patch tests --- + +func TestFilePatch_MultipleOps(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "a.txt", "content A") + + tool := &filePatchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "operations": []map[string]any{ + {"action": "add", "path": "b.txt", "content": "content B"}, + {"action": "update", "path": "a.txt", "content": "updated A"}, + {"action": "move", "path": "b.txt", "new_path": "c.txt"}, + }, + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == "" { + t.Fatal("expected result") + } + + // Verify a.txt updated. + data, _ := os.ReadFile(filepath.Join(workDir, "a.txt")) + if string(data) != "updated A" { + t.Errorf("a.txt not updated: %s", data) + } + + // Verify c.txt exists (moved from b.txt). + if _, err := os.Stat(filepath.Join(workDir, "c.txt")); err != nil { + t.Fatal("c.txt should exist after move") + } + + // Verify b.txt no longer exists. + if _, err := os.Stat(filepath.Join(workDir, "b.txt")); err == nil { + t.Fatal("b.txt should not exist after move") + } +} + +func TestFilePatch_Delete(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "delete_me.txt", "goodbye") + + tool := &filePatchTool{pathValidator: NewPathValidator(workDir)} + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "operations": []map[string]any{ + {"action": "delete", "path": "delete_me.txt"}, + }, + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, err := os.Stat(filepath.Join(workDir, "delete_me.txt")); err == nil { + t.Fatal("file should be deleted") + } +} + +func TestFilePatch_PathTraversal(t *testing.T) { + workDir := t.TempDir() + tool := &filePatchTool{pathValidator: NewPathValidator(workDir)} + + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "operations": []map[string]any{ + {"action": "add", "path": "../../evil.txt", "content": "bad"}, + }, + })) + if err == nil { + t.Fatal("expected error for path traversal") + } +} + +// --- bash_execute tests --- + +func TestBashExecute_HappyPath(t *testing.T) { + workDir := t.TempDir() + tool := &bashExecuteTool{workDir: workDir} + + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "command": "echo hello", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var out map[string]any + if err := json.Unmarshal([]byte(result), &out); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if out["stdout"] != "hello" { + t.Errorf("expected stdout 'hello', got %q", out["stdout"]) + } + if out["exit_code"] != float64(0) { + t.Errorf("expected exit_code 0, got %v", out["exit_code"]) + } +} + +func TestBashExecute_ExitCode(t *testing.T) { + workDir := t.TempDir() + tool := &bashExecuteTool{workDir: workDir} + + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "command": "exit 42", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var out map[string]any + if err := json.Unmarshal([]byte(result), &out); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if out["exit_code"] != float64(42) { + t.Errorf("expected exit_code 42, got %v", out["exit_code"]) + } +} + +func TestBashExecute_DangerousCommandBlocked(t *testing.T) { + workDir := t.TempDir() + tool := &bashExecuteTool{workDir: workDir} + + tests := []struct { + name string + command string + }{ + {"sudo", "sudo rm -rf /"}, + {"rm_rf_root", "rm -rf /"}, + {"fork_bomb", ":(){ :|:& };:"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "command": tt.command, + })) + if err == nil { + t.Fatal("expected error for dangerous command") + } + }) + } +} + +func TestBashExecute_Timeout(t *testing.T) { + workDir := t.TempDir() + tool := &bashExecuteTool{workDir: workDir} + + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "command": "sleep 10", + "timeout": 1, + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var out map[string]any + if err := json.Unmarshal([]byte(result), &out); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if out["exit_code"] != float64(124) { + // Process may be killed with different exit codes, just verify it's non-zero + if out["exit_code"] == float64(0) { + t.Error("expected non-zero exit code for timed out command") + } + } +} + +func TestBashExecute_Pipes(t *testing.T) { + workDir := t.TempDir() + tool := &bashExecuteTool{workDir: workDir} + + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "command": "echo 'hello world' | tr ' ' '\\n' | wc -l", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var out map[string]any + if err := json.Unmarshal([]byte(result), &out); err != nil { + t.Fatalf("unmarshal: %v", err) + } + if out["exit_code"] != float64(0) { + t.Errorf("expected exit_code 0, got %v", out["exit_code"]) + } +} + +// --- grep_search tests --- + +func TestGrepSearch_HappyPath(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "main.go", "package main\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n") + writeTestFile(t, workDir, "lib.go", "package main\n\nfunc helper() {}\n") + + tool := &grepSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "func.*main", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == "(no matches found)" { + t.Fatal("expected matches") + } +} + +func TestGrepSearch_NoMatch(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "test.txt", "hello world") + + tool := &grepSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "nonexistent_pattern_xyz", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result != "(no matches found)" { + t.Errorf("expected no matches, got: %s", result) + } +} + +func TestGrepSearch_InvalidRegex(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "test.txt", "hello") + + tool := &grepSearchTool{pathValidator: NewPathValidator(workDir)} + // Use Go fallback by searching. Invalid regex should fail. + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "[invalid", + })) + // rg might handle this differently, so just ensure it doesn't panic. + _ = err +} + +func TestGrepSearch_WithInclude(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "main.go", "func main() {}") + writeTestFile(t, workDir, "main.py", "def main(): pass") + + tool := &grepSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "main", + "include": "*.go", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == "(no matches found)" { + t.Fatal("expected matches for .go files") + } +} + +func TestGrepSearch_ExcludePattern(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "main.go", "func main() { apiKey := \"\" }") + writeTestFile(t, workDir, "main_test.go", "func TestMain() { apiKey := \"test\" }") + + tool := &grepSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "apiKey", + "exclude": "*_test.go", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == "(no matches found)" { + t.Fatal("expected matches from main.go") + } + if strings.Contains(result, "main_test.go") { + t.Error("expected test file to be excluded") + } + if !strings.Contains(result, "main.go") { + t.Error("expected main.go to be included") + } +} + +func TestGrepSearch_ContextLines(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "schema.ts", "line1\nline2\napiKey: z.string()\nline4\nline5\n") + + tool := &grepSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "apiKey", + "context": 1, + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Should contain context lines around the match. + if !strings.Contains(result, "line2") { + t.Error("expected before-context line 'line2'") + } + if !strings.Contains(result, "apiKey") { + t.Error("expected matching line") + } + if !strings.Contains(result, "line4") { + t.Error("expected after-context line 'line4'") + } +} + +func TestGrepSearch_MaxResultsConsistency(t *testing.T) { + workDir := t.TempDir() + // Create a file with many matching lines. + var sb strings.Builder + for i := 0; i < 100; i++ { + fmt.Fprintf(&sb, "match line %d\n", i) + } + writeTestFile(t, workDir, "many.txt", sb.String()) + + tool := &grepSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "match", + "max_results": 10, + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // Count output lines (excluding trailing empty line). + lines := strings.Split(strings.TrimRight(result, "\n"), "\n") + // Should be at most 10 lines plus possibly a truncation notice. + if len(lines) > 11 { + t.Errorf("expected at most ~10 result lines, got %d", len(lines)) + } +} + +func TestGrepSearch_ExcludeAndIncludeTogether(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "app.ts", "const x = 1") + writeTestFile(t, workDir, "app.test.ts", "const x = 2") + writeTestFile(t, workDir, "app.go", "var x = 3") + + tool := &grepSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "x", + "include": "*.ts", + "exclude": "*.test.ts", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(result, "app.ts") { + t.Error("expected app.ts in results") + } + if strings.Contains(result, "app.test.ts") { + t.Error("expected app.test.ts to be excluded") + } + if strings.Contains(result, "app.go") { + t.Error("expected app.go to be excluded by include filter") + } +} + +// --- glob_search tests --- + +func TestGlobSearch_HappyPath(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "main.go", "package main") + writeTestFile(t, workDir, "lib.go", "package main") + writeTestFile(t, workDir, "readme.md", "# readme") + + tool := &globSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "*.go", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == "(no matches found)" { + t.Fatal("expected matches") + } +} + +func TestGlobSearch_DoublestarPattern(t *testing.T) { + workDir := t.TempDir() + _ = os.MkdirAll(filepath.Join(workDir, "src", "pkg"), 0o755) + writeTestFile(t, workDir, "src/main.go", "package main") + writeTestFile(t, workDir, "src/pkg/lib.go", "package pkg") + + tool := &globSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "**/*.go", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == "(no matches found)" { + t.Fatal("expected matches") + } +} + +func TestGlobSearch_NoMatch(t *testing.T) { + workDir := t.TempDir() + writeTestFile(t, workDir, "test.txt", "hello") + + tool := &globSearchTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{ + "pattern": "*.xyz", + })) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result != "(no matches found)" { + t.Errorf("expected no matches, got: %s", result) + } +} + +// --- directory_tree tests --- + +func TestDirectoryTree_HappyPath(t *testing.T) { + workDir := t.TempDir() + _ = os.MkdirAll(filepath.Join(workDir, "src", "pkg"), 0o755) + writeTestFile(t, workDir, "src/main.go", "package main") + writeTestFile(t, workDir, "src/pkg/lib.go", "package pkg") + writeTestFile(t, workDir, "README.md", "# readme") + + tool := &directoryTreeTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{})) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result == "" { + t.Fatal("expected tree output") + } +} + +func TestDirectoryTree_SkipsDotGit(t *testing.T) { + workDir := t.TempDir() + _ = os.MkdirAll(filepath.Join(workDir, ".git", "objects"), 0o755) + writeTestFile(t, workDir, "main.go", "package main") + + tool := &directoryTreeTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{})) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + // .git should not appear in output. + if containsString(result, ".git") { + t.Error("expected .git to be excluded from tree") + } +} + +func TestDirectoryTree_ShowsFileSizes(t *testing.T) { + workDir := t.TempDir() + // Create a file larger than 1KB. + bigContent := strings.Repeat("x", 2048) + writeTestFile(t, workDir, "big.txt", bigContent) + // Create a small file (< 1KB, no size shown). + writeTestFile(t, workDir, "small.txt", "tiny") + + tool := &directoryTreeTool{pathValidator: NewPathValidator(workDir)} + result, err := tool.Execute(context.Background(), toJSON(t, map[string]any{})) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(result, "big.txt (2KB)") { + t.Errorf("expected file size for big.txt, got:\n%s", result) + } + // Small file should not have a size suffix. + if strings.Contains(result, "small.txt (") { + t.Errorf("expected no size suffix for small.txt, got:\n%s", result) + } +} + +func TestDirectoryTree_PathTraversal(t *testing.T) { + workDir := t.TempDir() + tool := &directoryTreeTool{pathValidator: NewPathValidator(workDir)} + + _, err := tool.Execute(context.Background(), toJSON(t, map[string]any{"path": "../.."})) + if err == nil { + t.Fatal("expected error for path traversal") + } +} + +// --- truncate tests --- + +func TestTruncateOutput_ShortString(t *testing.T) { + input := "short string" + if got := TruncateOutput(input); got != input { + t.Errorf("expected no truncation, got %q", got) + } +} + +func TestTruncateOutput_TooManyLines(t *testing.T) { + var sb strings.Builder + for i := 0; i < MaxOutputLines+100; i++ { + sb.WriteString("line\n") + } + result := TruncateOutput(sb.String()) + if !containsString(result, "output truncated") { + t.Error("expected truncation notice") + } +} + +func TestTruncateOutput_TooManyBytes(t *testing.T) { + input := strings.Repeat("x", MaxOutputBytes+1000) + result := TruncateOutput(input) + if !containsString(result, "output truncated") { + t.Error("expected truncation notice") + } +} + +// --- helpers --- + +func writeTestFile(t *testing.T, dir, name, content string) { + t.Helper() + fullPath := filepath.Join(dir, name) + if err := os.MkdirAll(filepath.Dir(fullPath), 0o755); err != nil { + t.Fatalf("creating dirs: %v", err) + } + if err := os.WriteFile(fullPath, []byte(content), 0o644); err != nil { + t.Fatalf("writing test file: %v", err) + } +} + +func toJSON(t *testing.T, v any) json.RawMessage { + t.Helper() + data, err := json.Marshal(v) + if err != nil { + t.Fatalf("marshalling JSON: %v", err) + } + return data +} + +func containsString(s, substr string) bool { + return strings.Contains(s, substr) +} diff --git a/forge-core/tools/builtins/directory_tree.go b/forge-core/tools/builtins/directory_tree.go new file mode 100644 index 0000000..e4b6a6e --- /dev/null +++ b/forge-core/tools/builtins/directory_tree.go @@ -0,0 +1,127 @@ +package builtins + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/initializ/forge/forge-core/tools" +) + +type directoryTreeTool struct { + pathValidator *PathValidator +} + +func (t *directoryTreeTool) Name() string { return "directory_tree" } +func (t *directoryTreeTool) Description() string { + return "Display a tree-formatted directory listing showing the structure of files and directories. Useful for understanding project layout." +} +func (t *directoryTreeTool) Category() tools.Category { return tools.CategoryBuiltin } + +func (t *directoryTreeTool) InputSchema() json.RawMessage { + return json.RawMessage(`{ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Directory path (relative to project root). Default: project root" + }, + "max_depth": { + "type": "integer", + "description": "Maximum depth to traverse. Default: 3" + } + } + }`) +} + +func (t *directoryTreeTool) Execute(_ context.Context, args json.RawMessage) (string, error) { + var input struct { + Path string `json:"path"` + MaxDepth int `json:"max_depth"` + } + if err := json.Unmarshal(args, &input); err != nil { + return "", fmt.Errorf("invalid arguments: %w", err) + } + + resolved, err := t.pathValidator.Resolve(input.Path) + if err != nil { + return "", err + } + + info, err := os.Stat(resolved) + if err != nil { + return "", fmt.Errorf("cannot access %q: %w", input.Path, err) + } + if !info.IsDir() { + return "", fmt.Errorf("%q is not a directory", input.Path) + } + + maxDepth := input.MaxDepth + if maxDepth <= 0 { + maxDepth = 3 + } + + var sb strings.Builder + relRoot, _ := filepath.Rel(t.pathValidator.WorkDir(), resolved) + if relRoot == "." { + relRoot = filepath.Base(resolved) + } + sb.WriteString(relRoot + "/\n") + + t.buildTree(&sb, resolved, "", 0, maxDepth) + + return TruncateOutput(sb.String()), nil +} + +func (t *directoryTreeTool) buildTree(sb *strings.Builder, dir, prefix string, depth, maxDepth int) { + if depth >= maxDepth { + return + } + + entries, err := os.ReadDir(dir) + if err != nil { + return + } + + // Filter out skipped directories. + var visible []os.DirEntry + for _, entry := range entries { + if entry.IsDir() && skipDirs[entry.Name()] { + continue + } + // Skip hidden files/dirs (starting with .). + if strings.HasPrefix(entry.Name(), ".") { + continue + } + visible = append(visible, entry) + } + + for i, entry := range visible { + isLast := i == len(visible)-1 + connector := "├── " + childPrefix := prefix + "│ " + if isLast { + connector = "└── " + childPrefix = prefix + " " + } + + if entry.IsDir() { + fmt.Fprintf(sb, "%s%s%s/\n", prefix, connector, entry.Name()) + t.buildTree(sb, filepath.Join(dir, entry.Name()), childPrefix, depth+1, maxDepth) + } else { + sizeStr := "" + if info, infoErr := entry.Info(); infoErr == nil && info != nil { + bytes := info.Size() + if bytes >= 1024*1024 { + sizeStr = fmt.Sprintf(" (%dMB)", bytes/(1024*1024)) + } else if bytes >= 1024 { + sizeStr = fmt.Sprintf(" (%dKB)", bytes/1024) + } + } + fmt.Fprintf(sb, "%s%s%s%s\n", prefix, connector, entry.Name(), sizeStr) + } + } +} diff --git a/forge-core/tools/builtins/file_edit.go b/forge-core/tools/builtins/file_edit.go new file mode 100644 index 0000000..4f31351 --- /dev/null +++ b/forge-core/tools/builtins/file_edit.go @@ -0,0 +1,111 @@ +package builtins + +import ( + "context" + "encoding/json" + "fmt" + "os" + "strings" + + "github.com/initializ/forge/forge-core/tools" +) + +type fileEditTool struct { + pathValidator *PathValidator +} + +func (t *fileEditTool) Name() string { return "file_edit" } +func (t *fileEditTool) Description() string { + return "Edit a file by replacing an exact string match with new text. The old_text must match exactly one location in the file. Returns a unified diff of the change. Always read a file before editing to get exact text." +} +func (t *fileEditTool) Category() tools.Category { return tools.CategoryBuiltin } + +func (t *fileEditTool) InputSchema() json.RawMessage { + return json.RawMessage(`{ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "File path (relative to project root or absolute within project)" + }, + "old_text": { + "type": "string", + "description": "The exact text to find and replace (must be unique in the file)" + }, + "new_text": { + "type": "string", + "description": "The replacement text" + } + }, + "required": ["path", "old_text", "new_text"] + }`) +} + +func (t *fileEditTool) Execute(_ context.Context, args json.RawMessage) (string, error) { + var input struct { + Path string `json:"path"` + OldText string `json:"old_text"` + NewText string `json:"new_text"` + } + if err := json.Unmarshal(args, &input); err != nil { + return "", fmt.Errorf("invalid arguments: %w", err) + } + + if strings.TrimSpace(input.Path) == "" { + return "", fmt.Errorf("path is required") + } + if input.OldText == "" { + return "", fmt.Errorf("old_text is required") + } + if input.OldText == input.NewText { + return "", fmt.Errorf("old_text and new_text are identical") + } + + resolved, err := t.pathValidator.Resolve(input.Path) + if err != nil { + return "", err + } + + data, err := os.ReadFile(resolved) + if err != nil { + return "", fmt.Errorf("reading file: %w", err) + } + + content := string(data) + count := strings.Count(content, input.OldText) + if count == 0 { + return "", fmt.Errorf("old_text not found in %s", input.Path) + } + if count > 1 { + return "", fmt.Errorf("old_text found %d times in %s — must be unique. Provide more surrounding context to make it unique", count, input.Path) + } + + // Perform replacement. + newContent := strings.Replace(content, input.OldText, input.NewText, 1) + + if err := os.WriteFile(resolved, []byte(newContent), 0o644); err != nil { + return "", fmt.Errorf("writing file: %w", err) + } + + // Generate a simple diff output. + diff := generateDiff(input.Path, input.OldText, input.NewText) + return diff, nil +} + +// generateDiff creates a unified-diff-like representation of the change. +func generateDiff(path, oldText, newText string) string { + var sb strings.Builder + fmt.Fprintf(&sb, "--- %s\n+++ %s\n", path, path) + + oldLines := strings.Split(oldText, "\n") + newLines := strings.Split(newText, "\n") + + for _, line := range oldLines { + fmt.Fprintf(&sb, "-%s\n", line) + } + for _, line := range newLines { + fmt.Fprintf(&sb, "+%s\n", line) + } + + return sb.String() +} diff --git a/forge-core/tools/builtins/file_patch.go b/forge-core/tools/builtins/file_patch.go new file mode 100644 index 0000000..dd2c39b --- /dev/null +++ b/forge-core/tools/builtins/file_patch.go @@ -0,0 +1,169 @@ +package builtins + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/initializ/forge/forge-core/tools" +) + +type filePatchTool struct { + pathValidator *PathValidator +} + +func (t *filePatchTool) Name() string { return "file_patch" } +func (t *filePatchTool) Description() string { + return "Perform batch file operations in a single call. Supports add (create), update (overwrite), delete (remove), and move (rename) actions. All paths are validated before any changes are applied." +} +func (t *filePatchTool) Category() tools.Category { return tools.CategoryBuiltin } + +func (t *filePatchTool) InputSchema() json.RawMessage { + return json.RawMessage(`{ + "type": "object", + "properties": { + "operations": { + "type": "array", + "description": "List of file operations to perform", + "items": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["add", "update", "delete", "move"], + "description": "The operation to perform" + }, + "path": { + "type": "string", + "description": "File path for the operation" + }, + "content": { + "type": "string", + "description": "File content (required for add and update)" + }, + "new_path": { + "type": "string", + "description": "Destination path (required for move)" + } + }, + "required": ["action", "path"] + } + } + }, + "required": ["operations"] + }`) +} + +type patchOperation struct { + Action string `json:"action"` + Path string `json:"path"` + Content string `json:"content"` + NewPath string `json:"new_path"` +} + +func (t *filePatchTool) Execute(_ context.Context, args json.RawMessage) (string, error) { + var input struct { + Operations []patchOperation `json:"operations"` + } + if err := json.Unmarshal(args, &input); err != nil { + return "", fmt.Errorf("invalid arguments: %w", err) + } + + if len(input.Operations) == 0 { + return "", fmt.Errorf("at least one operation is required") + } + + // Phase 1: Validate all paths upfront before applying any changes. + type resolvedOp struct { + op patchOperation + resolved string + newResolved string + } + ops := make([]resolvedOp, len(input.Operations)) + + for i, op := range input.Operations { + if strings.TrimSpace(op.Path) == "" { + return "", fmt.Errorf("operation %d: path is required", i) + } + + resolved, err := t.pathValidator.Resolve(op.Path) + if err != nil { + return "", fmt.Errorf("operation %d: %w", i, err) + } + ops[i] = resolvedOp{op: op, resolved: resolved} + + switch op.Action { + case "add", "update": + // content is expected + case "delete": + // no extra validation + case "move": + if strings.TrimSpace(op.NewPath) == "" { + return "", fmt.Errorf("operation %d: new_path is required for move", i) + } + newResolved, err := t.pathValidator.Resolve(op.NewPath) + if err != nil { + return "", fmt.Errorf("operation %d: new_path %w", i, err) + } + ops[i].newResolved = newResolved + default: + return "", fmt.Errorf("operation %d: unknown action %q (use add, update, delete, or move)", i, op.Action) + } + } + + // Phase 2: Apply operations. + var results []map[string]string + for _, rop := range ops { + result := map[string]string{ + "action": rop.op.Action, + "path": rop.op.Path, + } + + switch rop.op.Action { + case "add": + dir := filepath.Dir(rop.resolved) + if err := os.MkdirAll(dir, 0o755); err != nil { + return "", fmt.Errorf("creating directories for %s: %w", rop.op.Path, err) + } + if err := os.WriteFile(rop.resolved, []byte(rop.op.Content), 0o644); err != nil { + return "", fmt.Errorf("creating %s: %w", rop.op.Path, err) + } + result["status"] = "created" + + case "update": + if err := os.WriteFile(rop.resolved, []byte(rop.op.Content), 0o644); err != nil { + return "", fmt.Errorf("updating %s: %w", rop.op.Path, err) + } + result["status"] = "updated" + + case "delete": + if err := os.Remove(rop.resolved); err != nil { + return "", fmt.Errorf("deleting %s: %w", rop.op.Path, err) + } + result["status"] = "deleted" + + case "move": + // Create destination directory if needed. + destDir := filepath.Dir(rop.newResolved) + if err := os.MkdirAll(destDir, 0o755); err != nil { + return "", fmt.Errorf("creating directories for %s: %w", rop.op.NewPath, err) + } + if err := os.Rename(rop.resolved, rop.newResolved); err != nil { + return "", fmt.Errorf("moving %s to %s: %w", rop.op.Path, rop.op.NewPath, err) + } + result["status"] = "moved" + result["new_path"] = rop.op.NewPath + } + + results = append(results, result) + } + + out, _ := json.Marshal(map[string]any{ + "operations": results, + "total": len(results), + }) + return string(out), nil +} diff --git a/forge-core/tools/builtins/file_read.go b/forge-core/tools/builtins/file_read.go new file mode 100644 index 0000000..7af58a6 --- /dev/null +++ b/forge-core/tools/builtins/file_read.go @@ -0,0 +1,136 @@ +package builtins + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/initializ/forge/forge-core/tools" +) + +type fileReadTool struct { + pathValidator *PathValidator +} + +func (t *fileReadTool) Name() string { return "file_read" } +func (t *fileReadTool) Description() string { + return "Read a file's contents with optional line offset and limit, or list a directory's entries. Returns numbered lines (cat -n style) for files, or a listing with name, type, and size for directories." +} +func (t *fileReadTool) Category() tools.Category { return tools.CategoryBuiltin } + +func (t *fileReadTool) InputSchema() json.RawMessage { + return json.RawMessage(`{ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "File or directory path (relative to project root or absolute within project)" + }, + "offset": { + "type": "integer", + "description": "Line number to start reading from (1-based). Default: 1" + }, + "limit": { + "type": "integer", + "description": "Maximum number of lines to read. Default: 2000" + } + }, + "required": ["path"] + }`) +} + +func (t *fileReadTool) Execute(_ context.Context, args json.RawMessage) (string, error) { + var input struct { + Path string `json:"path"` + Offset int `json:"offset"` + Limit int `json:"limit"` + } + if err := json.Unmarshal(args, &input); err != nil { + return "", fmt.Errorf("invalid arguments: %w", err) + } + + resolved, err := t.pathValidator.Resolve(input.Path) + if err != nil { + return "", err + } + + info, err := os.Stat(resolved) + if err != nil { + return "", fmt.Errorf("cannot access %q: %w", input.Path, err) + } + + if info.IsDir() { + return t.listDirectory(resolved) + } + + return t.readFile(resolved, input.Offset, input.Limit) +} + +func (t *fileReadTool) readFile(path string, offset, limit int) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("reading file: %w", err) + } + + if offset <= 0 { + offset = 1 + } + if limit <= 0 { + limit = MaxOutputLines + } + + lines := strings.Split(string(data), "\n") + totalLines := len(lines) + + // Convert to 0-based index. + start := offset - 1 + if start >= totalLines { + return fmt.Sprintf("(file has %d lines, offset %d is past end)", totalLines, offset), nil + } + + end := min(start+limit, totalLines) + + var sb strings.Builder + for i := start; i < end; i++ { + fmt.Fprintf(&sb, "%6d\t%s\n", i+1, lines[i]) + } + + result := sb.String() + if end < totalLines { + result += fmt.Sprintf("\n... (%d more lines not shown)", totalLines-end) + } + + return TruncateOutput(result), nil +} + +func (t *fileReadTool) listDirectory(path string) (string, error) { + entries, err := os.ReadDir(path) + if err != nil { + return "", fmt.Errorf("reading directory: %w", err) + } + + var sb strings.Builder + for _, entry := range entries { + info, infoErr := entry.Info() + if infoErr != nil { + continue + } + entryType := "file" + if entry.IsDir() { + entryType = "dir" + } else if info.Mode()&os.ModeSymlink != 0 { + entryType = "link" + } + relPath, _ := filepath.Rel(t.pathValidator.WorkDir(), filepath.Join(path, entry.Name())) + fmt.Fprintf(&sb, "%-6s %10d %s\n", entryType, info.Size(), relPath) + } + + if sb.Len() == 0 { + return "(empty directory)", nil + } + + return TruncateOutput(sb.String()), nil +} diff --git a/forge-core/tools/builtins/file_write.go b/forge-core/tools/builtins/file_write.go new file mode 100644 index 0000000..17f253d --- /dev/null +++ b/forge-core/tools/builtins/file_write.go @@ -0,0 +1,81 @@ +package builtins + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/initializ/forge/forge-core/tools" +) + +type fileWriteTool struct { + pathValidator *PathValidator +} + +func (t *fileWriteTool) Name() string { return "file_write" } +func (t *fileWriteTool) Description() string { + return "Create or overwrite a file in the project directory. Creates intermediate directories as needed. Use file_edit for modifying existing files instead of overwriting them entirely." +} +func (t *fileWriteTool) Category() tools.Category { return tools.CategoryBuiltin } + +func (t *fileWriteTool) InputSchema() json.RawMessage { + return json.RawMessage(`{ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "File path (relative to project root or absolute within project)" + }, + "content": { + "type": "string", + "description": "The full file content to write" + } + }, + "required": ["path", "content"] + }`) +} + +func (t *fileWriteTool) Execute(_ context.Context, args json.RawMessage) (string, error) { + var input struct { + Path string `json:"path"` + Content string `json:"content"` + } + if err := json.Unmarshal(args, &input); err != nil { + return "", fmt.Errorf("invalid arguments: %w", err) + } + + if strings.TrimSpace(input.Path) == "" { + return "", fmt.Errorf("path is required") + } + + resolved, err := t.pathValidator.Resolve(input.Path) + if err != nil { + return "", err + } + + // Determine if creating or updating. + action := "created" + if _, statErr := os.Stat(resolved); statErr == nil { + action = "updated" + } + + // Create intermediate directories. + dir := filepath.Dir(resolved) + if err := os.MkdirAll(dir, 0o755); err != nil { + return "", fmt.Errorf("creating directories: %w", err) + } + + if err := os.WriteFile(resolved, []byte(input.Content), 0o644); err != nil { + return "", fmt.Errorf("writing file: %w", err) + } + + result, _ := json.Marshal(map[string]any{ + "path": input.Path, + "action": action, + "bytes": len(input.Content), + }) + return string(result), nil +} diff --git a/forge-core/tools/builtins/glob_search.go b/forge-core/tools/builtins/glob_search.go new file mode 100644 index 0000000..539a558 --- /dev/null +++ b/forge-core/tools/builtins/glob_search.go @@ -0,0 +1,161 @@ +package builtins + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/initializ/forge/forge-core/tools" +) + +type globSearchTool struct { + pathValidator *PathValidator +} + +func (t *globSearchTool) Name() string { return "glob_search" } +func (t *globSearchTool) Description() string { + return "Find files by glob pattern (e.g. '**/*.go', 'src/**/*.ts'). Returns matching file paths sorted by modification time (most recent first)." +} +func (t *globSearchTool) Category() tools.Category { return tools.CategoryBuiltin } + +func (t *globSearchTool) InputSchema() json.RawMessage { + return json.RawMessage(`{ + "type": "object", + "properties": { + "pattern": { + "type": "string", + "description": "Glob pattern to match files (supports ** for recursive matching)" + }, + "path": { + "type": "string", + "description": "Directory to search in (relative to project root). Default: project root" + }, + "max_results": { + "type": "integer", + "description": "Maximum number of results to return. Default: 100" + } + }, + "required": ["pattern"] + }`) +} + +func (t *globSearchTool) Execute(_ context.Context, args json.RawMessage) (string, error) { + var input struct { + Pattern string `json:"pattern"` + Path string `json:"path"` + MaxResults int `json:"max_results"` + } + if err := json.Unmarshal(args, &input); err != nil { + return "", fmt.Errorf("invalid arguments: %w", err) + } + + if strings.TrimSpace(input.Pattern) == "" { + return "", fmt.Errorf("pattern is required") + } + + searchPath, err := t.pathValidator.Resolve(input.Path) + if err != nil { + return "", err + } + + maxResults := input.MaxResults + if maxResults <= 0 { + maxResults = 100 + } + + // Check if pattern uses ** for recursive matching. + hasDoublestar := strings.Contains(input.Pattern, "**") + + // Extract the base filename pattern from the glob. + // For "**/*.go", the file pattern is "*.go". + filePattern := input.Pattern + if hasDoublestar { + parts := strings.Split(input.Pattern, "**") + if len(parts) > 1 { + filePattern = strings.TrimPrefix(parts[len(parts)-1], "/") + filePattern = strings.TrimPrefix(filePattern, string(filepath.Separator)) + } + } + + type fileEntry struct { + path string + modTime int64 + } + + var matches []fileEntry + + walkErr := filepath.WalkDir(searchPath, func(path string, d os.DirEntry, err error) error { + if err != nil { + return nil + } + if d.IsDir() { + if skipDirs[d.Name()] { + return filepath.SkipDir + } + return nil + } + + relPath, relErr := filepath.Rel(t.pathValidator.WorkDir(), path) + if relErr != nil { + return nil + } + + var matched bool + if hasDoublestar { + // For ** patterns, match the filename against the file portion. + if filePattern == "" { + matched = true + } else { + matched, _ = filepath.Match(filePattern, d.Name()) + } + } else { + // For non-** patterns, match against the relative path from the search dir. + relFromSearch, _ := filepath.Rel(searchPath, path) + matched, _ = filepath.Match(input.Pattern, relFromSearch) + if !matched { + // Also try matching just the filename. + matched, _ = filepath.Match(input.Pattern, d.Name()) + } + } + + if matched { + info, infoErr := d.Info() + if infoErr == nil { + matches = append(matches, fileEntry{ + path: relPath, + modTime: info.ModTime().UnixNano(), + }) + } + } + return nil + }) + + if walkErr != nil { + return "", fmt.Errorf("search error: %w", walkErr) + } + + if len(matches) == 0 { + return "(no matches found)", nil + } + + // Sort by modification time, most recent first. + sort.Slice(matches, func(i, j int) bool { + return matches[i].modTime > matches[j].modTime + }) + + if len(matches) > maxResults { + matches = matches[:maxResults] + } + + var sb strings.Builder + for _, m := range matches { + sb.WriteString(m.path) + sb.WriteByte('\n') + } + + return sb.String(), nil +} diff --git a/forge-core/tools/builtins/grep_search.go b/forge-core/tools/builtins/grep_search.go new file mode 100644 index 0000000..02052b8 --- /dev/null +++ b/forge-core/tools/builtins/grep_search.go @@ -0,0 +1,295 @@ +package builtins + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + + "github.com/initializ/forge/forge-core/tools" +) + +// Directories to skip during search. +var skipDirs = map[string]bool{ + ".git": true, + "node_modules": true, + "vendor": true, + "__pycache__": true, + ".venv": true, + "dist": true, + "build": true, +} + +type grepSearchTool struct { + pathValidator *PathValidator +} + +func (t *grepSearchTool) Name() string { return "grep_search" } +func (t *grepSearchTool) Description() string { + return "Search file contents using a regex pattern. Uses ripgrep (rg) if available, otherwise falls back to a Go-based search. Returns matches in file:line:content format." +} +func (t *grepSearchTool) Category() tools.Category { return tools.CategoryBuiltin } + +func (t *grepSearchTool) InputSchema() json.RawMessage { + return json.RawMessage(`{ + "type": "object", + "properties": { + "pattern": { + "type": "string", + "description": "Regex pattern to search for" + }, + "path": { + "type": "string", + "description": "Directory or file to search in (relative to project root). Default: project root" + }, + "include": { + "type": "string", + "description": "Glob pattern to filter files (e.g. '*.go', '*.ts')" + }, + "exclude": { + "type": "string", + "description": "Glob pattern to exclude files (e.g. '*.test.ts', 'test/**')" + }, + "max_results": { + "type": "integer", + "description": "Maximum number of output lines to return. Default: 50" + }, + "context": { + "type": "integer", + "description": "Number of context lines to show before and after each match. Default: 0" + } + }, + "required": ["pattern"] + }`) +} + +func (t *grepSearchTool) Execute(_ context.Context, args json.RawMessage) (string, error) { + var input struct { + Pattern string `json:"pattern"` + Path string `json:"path"` + Include string `json:"include"` + Exclude string `json:"exclude"` + MaxResults int `json:"max_results"` + Context int `json:"context"` + } + if err := json.Unmarshal(args, &input); err != nil { + return "", fmt.Errorf("invalid arguments: %w", err) + } + + if strings.TrimSpace(input.Pattern) == "" { + return "", fmt.Errorf("pattern is required") + } + + searchPath, err := t.pathValidator.Resolve(input.Path) + if err != nil { + return "", err + } + + maxResults := input.MaxResults + if maxResults <= 0 { + maxResults = 50 + } + + // Try ripgrep first. + if rgPath, lookErr := exec.LookPath("rg"); lookErr == nil { + result, rgErr := t.searchWithRipgrep(rgPath, searchPath, input.Pattern, input.Include, input.Exclude, input.Context, maxResults) + if rgErr == nil { + return result, nil + } + // Fall through to Go-based search on ripgrep error. + } + + return t.searchWithGo(searchPath, input.Pattern, input.Include, input.Exclude, input.Context, maxResults) +} + +func (t *grepSearchTool) searchWithRipgrep(rgPath, searchPath, pattern, include, exclude string, contextLines, maxResults int) (string, error) { + args := []string{ + "--no-heading", + "--line-number", + "--color", "never", + } + if include != "" { + args = append(args, "--glob", include) + } + if exclude != "" { + args = append(args, "--glob", "!"+exclude) + } + if contextLines > 0 { + args = append(args, "-C", fmt.Sprintf("%d", contextLines)) + } + args = append(args, pattern, searchPath) + + cmd := exec.Command(rgPath, args...) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + if err != nil { + // Exit code 1 means no matches — not an error. + if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 { + return "(no matches found)", nil + } + return "", fmt.Errorf("ripgrep error: %s", stderr.String()) + } + + result := stdout.String() + if result == "" { + return "(no matches found)", nil + } + + // Make paths relative to workDir. + result = t.relativizePaths(result) + + // Enforce total output line limit. + lines := strings.Split(result, "\n") + if len(lines) > maxResults { + result = strings.Join(lines[:maxResults], "\n") + "\n... (more results not shown)" + } + + return TruncateOutput(result), nil +} + +func (t *grepSearchTool) searchWithGo(searchPath, pattern, include, exclude string, contextLines, maxResults int) (string, error) { + re, err := regexp.Compile(pattern) + if err != nil { + return "", fmt.Errorf("invalid regex: %w", err) + } + + var sb strings.Builder + totalLines := 0 + + walkErr := filepath.WalkDir(searchPath, func(path string, d os.DirEntry, err error) error { + if err != nil { + return nil // skip inaccessible entries + } + if d.IsDir() { + if skipDirs[d.Name()] { + return filepath.SkipDir + } + return nil + } + if totalLines >= maxResults { + return filepath.SkipAll + } + + // Apply include filter. + if include != "" { + matched, matchErr := filepath.Match(include, d.Name()) + if matchErr != nil || !matched { + return nil + } + } + + // Apply exclude filter. + if exclude != "" { + matched, _ := filepath.Match(exclude, d.Name()) + if matched { + return nil + } + } + + // Skip binary files (check first 512 bytes). + if isBinaryFile(path) { + return nil + } + + relPath, _ := filepath.Rel(t.pathValidator.WorkDir(), path) + f, openErr := os.Open(path) + if openErr != nil { + return nil + } + defer func() { _ = f.Close() }() + + // Read all lines for context support. + var allLines []string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + allLines = append(allLines, scanner.Text()) + } + + // Find matching lines and emit with context. + lastPrinted := -1 + for lineIdx, line := range allLines { + if !re.MatchString(line) { + continue + } + if totalLines >= maxResults { + break + } + + startCtx := lineIdx - contextLines + if startCtx < 0 { + startCtx = 0 + } + endCtx := lineIdx + contextLines + if endCtx >= len(allLines) { + endCtx = len(allLines) - 1 + } + + // Add group separator if there's a gap from last printed block. + if contextLines > 0 && lastPrinted >= 0 && startCtx > lastPrinted+1 { + sb.WriteString("--\n") + totalLines++ + if totalLines >= maxResults { + break + } + } + + for i := startCtx; i <= endCtx; i++ { + if i <= lastPrinted { + continue + } + if totalLines >= maxResults { + break + } + sep := "-" + if i == lineIdx { + sep = ":" + } + fmt.Fprintf(&sb, "%s:%d%s%s\n", relPath, i+1, sep, allLines[i]) + totalLines++ + lastPrinted = i + } + } + return nil + }) + + if walkErr != nil { + return "", fmt.Errorf("search error: %w", walkErr) + } + + if sb.Len() == 0 { + return "(no matches found)", nil + } + + return TruncateOutput(sb.String()), nil +} + +func (t *grepSearchTool) relativizePaths(output string) string { + prefix := t.pathValidator.WorkDir() + string(filepath.Separator) + return strings.ReplaceAll(output, prefix, "") +} + +// isBinaryFile checks if a file appears to be binary by reading the first 512 bytes. +func isBinaryFile(path string) bool { + f, err := os.Open(path) + if err != nil { + return false + } + defer func() { _ = f.Close() }() + + buf := make([]byte, 512) + n, err := f.Read(buf) + if err != nil || n == 0 { + return false + } + + return bytes.ContainsRune(buf[:n], 0) +} diff --git a/forge-core/tools/builtins/pathutil.go b/forge-core/tools/builtins/pathutil.go new file mode 100644 index 0000000..fde62a6 --- /dev/null +++ b/forge-core/tools/builtins/pathutil.go @@ -0,0 +1,59 @@ +package builtins + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// PathValidator provides path confinement to a working directory. +// All resolved paths are guaranteed to be within workDir. +type PathValidator struct { + workDir string // absolute path +} + +// NewPathValidator creates a PathValidator for the given working directory. +func NewPathValidator(workDir string) *PathValidator { + abs, err := filepath.Abs(workDir) + if err != nil { + abs = workDir + } + return &PathValidator{workDir: abs} +} + +// Resolve converts a relative or absolute path to an absolute path within workDir. +// It returns an error if the resolved path escapes the working directory. +func (v *PathValidator) Resolve(path string) (string, error) { + if strings.TrimSpace(path) == "" { + return v.workDir, nil + } + + var resolved string + if filepath.IsAbs(path) { + resolved = filepath.Clean(path) + } else { + resolved = filepath.Clean(filepath.Join(v.workDir, path)) + // If the path doesn't exist but workspace/ does, use that. + // This handles the common case where the LLM passes "myrepo" instead + // of "workspace/myrepo" for cloned repositories. + if _, err := os.Stat(resolved); os.IsNotExist(err) { + wsPath := filepath.Clean(filepath.Join(v.workDir, "workspace", path)) + if _, wsErr := os.Stat(wsPath); wsErr == nil { + resolved = wsPath + } + } + } + + // Ensure the resolved path is within workDir. + if resolved != v.workDir && !strings.HasPrefix(resolved, v.workDir+string(filepath.Separator)) { + return "", fmt.Errorf("path %q resolves outside the working directory", path) + } + + return resolved, nil +} + +// WorkDir returns the absolute working directory. +func (v *PathValidator) WorkDir() string { + return v.workDir +} diff --git a/forge-core/tools/builtins/pathutil_test.go b/forge-core/tools/builtins/pathutil_test.go new file mode 100644 index 0000000..523fc4a --- /dev/null +++ b/forge-core/tools/builtins/pathutil_test.go @@ -0,0 +1,105 @@ +package builtins + +import ( + "os" + "path/filepath" + "testing" +) + +func TestPathValidator_Resolve(t *testing.T) { + workDir := t.TempDir() + + pv := NewPathValidator(workDir) + + tests := []struct { + name string + path string + wantErr bool + wantAbs string // expected absolute path (empty = just check no error) + }{ + { + name: "empty path returns workDir", + path: "", + wantAbs: workDir, + }, + { + name: "relative path", + path: "foo/bar.txt", + wantAbs: filepath.Join(workDir, "foo", "bar.txt"), + }, + { + name: "absolute path within workDir", + path: filepath.Join(workDir, "src", "main.go"), + wantAbs: filepath.Join(workDir, "src", "main.go"), + }, + { + name: "dot path returns workDir", + path: ".", + wantAbs: workDir, + }, + { + name: "path traversal blocked", + path: "../../../etc/passwd", + wantErr: true, + }, + { + name: "absolute path outside workDir blocked", + path: "/etc/passwd", + wantErr: true, + }, + { + name: "sneaky traversal blocked", + path: "foo/../../..", + wantErr: true, + }, + { + name: "dot-dot in middle resolved safely", + path: "foo/../bar.txt", + wantAbs: filepath.Join(workDir, "bar.txt"), + }, + } + + // Test workspace/ fallback: when "myrepo" doesn't exist but "workspace/myrepo" does, + // Resolve should return the workspace path. + t.Run("workspace fallback", func(t *testing.T) { + wsDir := filepath.Join(workDir, "workspace", "myrepo") + if err := os.MkdirAll(wsDir, 0o755); err != nil { + t.Fatal(err) + } + + got, err := pv.Resolve("myrepo") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != wsDir { + t.Errorf("got %q, want %q (workspace fallback)", got, wsDir) + } + + // "workspace/myrepo" should also work directly + got2, err2 := pv.Resolve("workspace/myrepo") + if err2 != nil { + t.Fatalf("unexpected error: %v", err2) + } + if got2 != wsDir { + t.Errorf("got %q, want %q (direct workspace path)", got2, wsDir) + } + }) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := pv.Resolve(tt.path) + if tt.wantErr { + if err == nil { + t.Errorf("expected error, got path %q", got) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if tt.wantAbs != "" && got != tt.wantAbs { + t.Errorf("got %q, want %q", got, tt.wantAbs) + } + }) + } +} diff --git a/forge-core/tools/builtins/register.go b/forge-core/tools/builtins/register.go index 17e218b..0e060ac 100644 --- a/forge-core/tools/builtins/register.go +++ b/forge-core/tools/builtins/register.go @@ -35,3 +35,91 @@ func GetByName(name string) tools.Tool { } return nil } + +// CodeAgentSearchTools returns search/exploration tools (grep, glob, tree). +// These are safe read-only tools for exploring codebases. +func CodeAgentSearchTools(workDir string) []tools.Tool { + pv := NewPathValidator(workDir) + return []tools.Tool{ + &grepSearchTool{pathValidator: pv}, + &globSearchTool{pathValidator: pv}, + &directoryTreeTool{pathValidator: pv}, + } +} + +// RegisterCodeAgentSearchTools registers search/exploration tools. +func RegisterCodeAgentSearchTools(reg *tools.Registry, workDir string) error { + for _, t := range CodeAgentSearchTools(workDir) { + if err := reg.Register(t); err != nil { + return err + } + } + return nil +} + +// CodeAgentBashTool returns the bash execution tool. +func CodeAgentBashTool(workDir string) tools.Tool { + return &bashExecuteTool{workDir: workDir} +} + +// RegisterCodeAgentBashTool registers the bash execution tool. +func RegisterCodeAgentBashTool(reg *tools.Registry, workDir string) error { + return reg.Register(CodeAgentBashTool(workDir)) +} + +// CodeAgentReadTools returns read-only coding tools (file_read + search). +func CodeAgentReadTools(workDir string) []tools.Tool { + pv := NewPathValidator(workDir) + return []tools.Tool{ + &fileReadTool{pathValidator: pv}, + &grepSearchTool{pathValidator: pv}, + &globSearchTool{pathValidator: pv}, + &directoryTreeTool{pathValidator: pv}, + } +} + +// CodeAgentWriteTools returns write/execute tools. +func CodeAgentWriteTools(workDir string) []tools.Tool { + pv := NewPathValidator(workDir) + return []tools.Tool{ + &fileWriteTool{pathValidator: pv}, + &fileEditTool{pathValidator: pv}, + &filePatchTool{pathValidator: pv}, + &bashExecuteTool{workDir: workDir}, + } +} + +// CodeAgentTools returns all coding agent tools (read + write). +func CodeAgentTools(workDir string) []tools.Tool { + return append(CodeAgentReadTools(workDir), CodeAgentWriteTools(workDir)...) +} + +// RegisterCodeAgentReadTools registers only the read-only coding tools. +func RegisterCodeAgentReadTools(reg *tools.Registry, workDir string) error { + for _, t := range CodeAgentReadTools(workDir) { + if err := reg.Register(t); err != nil { + return err + } + } + return nil +} + +// RegisterCodeAgentWriteTools registers the write/execute coding tools. +func RegisterCodeAgentWriteTools(reg *tools.Registry, workDir string) error { + for _, t := range CodeAgentWriteTools(workDir) { + if err := reg.Register(t); err != nil { + return err + } + } + return nil +} + +// RegisterCodeAgentTools registers all coding agent tools with the given registry. +func RegisterCodeAgentTools(reg *tools.Registry, workDir string) error { + for _, t := range CodeAgentTools(workDir) { + if err := reg.Register(t); err != nil { + return err + } + } + return nil +} diff --git a/forge-core/tools/builtins/truncate.go b/forge-core/tools/builtins/truncate.go new file mode 100644 index 0000000..ccf1676 --- /dev/null +++ b/forge-core/tools/builtins/truncate.go @@ -0,0 +1,39 @@ +package builtins + +import "strings" + +const ( + // MaxOutputLines is the maximum number of lines returned by tool output. + MaxOutputLines = 2000 + // MaxOutputBytes is the maximum size of tool output in bytes. + MaxOutputBytes = 50 * 1024 +) + +// TruncateOutput truncates a string to MaxOutputLines and MaxOutputBytes, +// appending a truncation notice if the output was trimmed. +func TruncateOutput(s string) string { + if len(s) == 0 { + return s + } + + truncated := false + + // Truncate by byte size first. + if len(s) > MaxOutputBytes { + s = s[:MaxOutputBytes] + truncated = true + } + + // Truncate by line count. + lines := strings.SplitAfter(s, "\n") + if len(lines) > MaxOutputLines { + lines = lines[:MaxOutputLines] + truncated = true + s = strings.Join(lines, "") + } + + if truncated { + s = strings.TrimRight(s, "\n") + "\n\n... (output truncated)" + } + return s +} diff --git a/forge-plugins/channels/telegram/telegram.go b/forge-plugins/channels/telegram/telegram.go index 20156e5..a114ffe 100644 --- a/forge-plugins/channels/telegram/telegram.go +++ b/forge-plugins/channels/telegram/telegram.go @@ -19,10 +19,12 @@ import ( ) const ( - defaultWebhookPort = 3001 - defaultWebhookPath = "/telegram/webhook" - telegramAPIBase = "https://api.telegram.org" - pollingTimeout = 30 // seconds for long polling + defaultWebhookPort = 3001 + defaultWebhookPath = "/telegram/webhook" + telegramAPIBase = "https://api.telegram.org" + pollingTimeout = 30 // seconds for long polling + handlerTimeout = 10 * time.Minute + longRunningThreshold = 15 * time.Second ) // Plugin implements channels.ChannelPlugin for Telegram. @@ -136,19 +138,7 @@ func (p *Plugin) makeWebhookHandler(handler channels.EventHandler) http.HandlerF w.WriteHeader(http.StatusOK) - go func() { - ctx := context.Background() - stopTyping := p.startTypingIndicator(ctx, event.WorkspaceID) - resp, err := handler(ctx, event) - stopTyping() - if err != nil { - fmt.Printf("telegram: handler error: %v\n", err) - return - } - if err := p.SendResponse(event, resp); err != nil { - fmt.Printf("telegram: send response error: %v\n", err) - } - }() + p.handleEvent(event, handler) } } @@ -192,22 +182,48 @@ func (p *Plugin) startPolling(ctx context.Context, handler channels.EventHandler continue } - go func() { - stopTyping := p.startTypingIndicator(ctx, event.WorkspaceID) - resp, err := handler(ctx, event) - stopTyping() - if err != nil { - fmt.Printf("telegram: handler error: %v\n", err) - return - } - if err := p.SendResponse(event, resp); err != nil { - fmt.Printf("telegram: send response error: %v\n", err) - } - }() + p.handleEvent(event, handler) } } } +// handleEvent runs the handler in a background goroutine with an independent +// timeout context, typing indicator, and interim messaging for long-running tasks. +func (p *Plugin) handleEvent(event *channels.ChannelEvent, handler channels.EventHandler) { + go func() { + taskCtx, taskCancel := context.WithTimeout(context.Background(), handlerTimeout) + defer taskCancel() + + stopTyping := p.startTypingIndicator(taskCtx, event.WorkspaceID) + + // Send an interim message if the task takes longer than the threshold. + done := make(chan struct{}) + go func() { + select { + case <-time.After(longRunningThreshold): + _ = p.sendMessage(map[string]any{ + "chat_id": event.WorkspaceID, + "text": "Working on it \u2014 I'll send the result when ready.", + "reply_to_message_id": event.MessageID, + }) + case <-done: + } + }() + + resp, err := handler(taskCtx, event) + close(done) + stopTyping() + + if err != nil { + fmt.Printf("telegram: handler error: %v\n", err) + return + } + if err := p.SendResponse(event, resp); err != nil { + fmt.Printf("telegram: send response error: %v\n", err) + } + }() +} + func (p *Plugin) getUpdates(ctx context.Context, offset int64) ([]telegramUpdate, error) { url := fmt.Sprintf("%s/bot%s/getUpdates?offset=%d&timeout=%d", p.apiBase, p.botToken, offset, pollingTimeout) diff --git a/forge-skills/contract/types.go b/forge-skills/contract/types.go index ea1597d..010ffd4 100644 --- a/forge-skills/contract/types.go +++ b/forge-skills/contract/types.go @@ -46,6 +46,7 @@ type ForgeSkillMeta struct { Requires *SkillRequirements `yaml:"requires,omitempty" json:"requires,omitempty"` EgressDomains []string `yaml:"egress_domains,omitempty" json:"egress_domains,omitempty"` DeniedTools []string `yaml:"denied_tools,omitempty" json:"denied_tools,omitempty"` + WorkflowPhase string `yaml:"workflow_phase,omitempty" json:"workflow_phase,omitempty"` } // SkillRequirements declares CLI binaries and environment variables a skill needs. @@ -96,6 +97,7 @@ type AggregatedRequirements struct { MaxTimeoutHint int // maximum timeout_hint across all skills (seconds) DeniedTools []string // union of denied tools across skills, deduplicated, sorted EgressDomains []string // union of egress domains across skills, deduplicated, sorted + WorkflowPhases []string // union of workflow_phase values across skills, deduplicated, sorted } // DerivedCLIConfig holds auto-derived cli_execute configuration from skill requirements. @@ -105,6 +107,7 @@ type DerivedCLIConfig struct { TimeoutHint int // suggested timeout in seconds (0 = use default) DeniedTools []string // tools to remove from registry before LLM execution EgressDomains []string // additional egress domains from skills + WorkflowPhases []string // workflow phases from skills (edit, finalize, query) } // TrustLevel indicates the trust classification of a skill. diff --git a/forge-skills/local/embedded/code-agent/SKILL.md b/forge-skills/local/embedded/code-agent/SKILL.md new file mode 100644 index 0000000..81992b7 --- /dev/null +++ b/forge-skills/local/embedded/code-agent/SKILL.md @@ -0,0 +1,282 @@ +--- +name: code-agent +icon: 💻 +category: developer +tags: + - coding + - development + - debugging + - refactoring +description: General-purpose coding agent that reads, writes, and edits code, and searches codebases. +metadata: + forge: + workflow_phase: edit + requires: + bins: + - bash + - jq + env: + required: [] + one_of: [] + optional: [] + egress_domains: + # Node.js / npm + - registry.npmjs.org + # Tailwind CSS CDN (used by scaffold templates) + - cdn.tailwindcss.com + # Python / pip + - pypi.org + - files.pythonhosted.org + # Go modules + - proxy.golang.org + - sum.golang.org + - storage.googleapis.com + # Maven Central (Spring Boot) + - repo.maven.apache.org + - repo1.maven.org + denied_tools: + - bash_execute + - file_write + - file_edit + - file_patch + - file_read + - schedule_set + - schedule_delete + - schedule_list + - schedule_history + timeout_hint: 120 +--- + +# Code Agent + +You are an autonomous coding agent. You EXECUTE — you do NOT describe, plan, or ask. + +## ABSOLUTE RULES (DO NOT VIOLATE) + +1. **Every response MUST include tool calls.** A response with only text is a failure. If you have something to say, say it AND call tools in the same response. + +2. **NEVER say "I'll do X now" without doing X.** No planning text. No "Let me patch that." JUST DO IT — call the tools. + +3. **NEVER ask for confirmation.** Do not ask "Should I proceed?" or "Would you like me to...?" — just act. + +4. **NEVER output code in markdown blocks.** You have file tools. Use them. + +5. **Complete the ENTIRE request in ONE turn.** Scaffold + write all files + run — all in a single response. + +6. **ONE project per app. NEVER create multiple projects for a single application.** Full-stack apps use ONE project where the backend serves the frontend. + +7. **NEVER scaffold over existing code.** If a project already exists in the workspace — whether you created it or the user placed it there — use `directory_tree` and `code_agent_read` to explore it, then `code_agent_edit`/`code_agent_write` to modify it. Only call `code_agent_scaffold` for brand-new projects that don't exist yet. + +## Iteration Rules (CRITICAL) + +When continuing a conversation about an existing project: +- **DO NOT create a new project.** The project already exists — modify it in place. +- **DO NOT call `code_agent_scaffold`.** The skeleton already exists. +- Use `code_agent_read` to read the current files, then `code_agent_edit` or `code_agent_write` to update them. +- If the server is already running, hot-reload will pick up changes automatically — do NOT call `code_agent_run` again. +- If the user asks to switch frameworks (e.g., "add a Go backend"), rewrite files in the SAME project directory. Do NOT create a second project. + +## Full-Stack Architecture (CRITICAL) + +Every backend framework scaffold includes a `static/` directory for frontend files. The backend serves both API routes AND the frontend UI. + +**NEVER create separate projects for frontend and backend.** Use ONE project: + +| Framework | Frontend Location | API Prefix | How It Works | +|-----------|------------------|------------|--------------| +| `node` | `public/` | `/api/` | Express serves static files from `public/` | +| `python` | `static/` | `/api/` | FastAPI mounts `StaticFiles` from `static/` | +| `golang` | `static/` | `/api/` | Gin serves `static/` directory | +| `spring-boot` | `src/main/resources/static/` | `/api/` | Spring Boot auto-serves from resources/static | + +For full-stack apps: +1. Scaffold with the backend framework +2. Write API routes in the backend code +3. Write HTML/JS/CSS in the frontend location above +4. Frontend JS fetches from `/api/...` endpoints +5. ONE `code_agent_run` starts everything + +## One-Shot Workflow + +### New Project (nothing exists yet) +``` +1. code_agent_scaffold → create skeleton +2. code_agent_write → write ALL source files (call multiple times) +3. code_agent_run → install deps + start server + open browser +4. Brief summary + URL +``` + +### Existing Codebase (first time seeing it) +When the user asks you to work on code that already exists in the workspace: +``` +1. directory_tree → discover project structure +2. code_agent_read → read key files to understand the codebase +3. code_agent_edit → apply changes (or code_agent_write for new files) +4. code_agent_run → start the server if not already running +5. Brief summary of changes +``` +**NEVER scaffold over existing code.** Explore it first, then modify in place. + +### Modify Existing Project (continuing conversation) +``` +1. code_agent_read → read file(s) to change +2. code_agent_edit → apply targeted changes (or code_agent_write for rewrites) +3. Brief summary of changes +``` +Do NOT call `code_agent_run` again if the server is already running — hot-reload handles it. + +Do NOT stop after step 1. Complete ALL steps in ONE response. + +## Tool Reference + +| Tool | When to Use | +|------|-------------| +| `code_agent_scaffold` | Bootstrap a NEW project only (never for existing projects) | +| `code_agent_write` | Create or overwrite files | +| `code_agent_edit` | Surgical text replacement in existing files | +| `code_agent_read` | Read a file or list directory | +| `code_agent_run` | Install deps + start server + open browser (call once) | +| `grep_search` | Search file contents by regex (supports include/exclude globs and context lines) | +| `glob_search` | Find files by name pattern | +| `directory_tree` | Show project directory tree | + +### Rules + +- All `project_dir` values are relative names (e.g., `my-app`), NOT absolute paths +- All `file_path` values are relative to `project_dir` (e.g., `src/main.jsx`) +- For frontend frameworks (react, vue, vanilla): only modify files under `src/` — never modify `src/main.jsx` +- Use Tailwind CSS utility classes for styling (loaded via CDN) + +## Scaffold Conventions (DO NOT VIOLATE) + +These rules prevent build errors: + +1. **NEVER modify `src/main.jsx`** (React/Vue) — it is the entry point +2. **ALWAYS use named exports**: `export function ComponentName() {}`, NEVER `export default` +3. **Use Tailwind CSS classes** for all styling — the CDN is pre-loaded +4. Only modify `src/App.jsx` (or `src/App.vue`) and create new component files under `src/` + +## Safety + +- All file operations are confined to the project directory. Path traversal is blocked. +- Read files before editing to avoid mistakes. +- Do not create git commits unless explicitly asked. + +## Tool: code_agent_scaffold + +Bootstrap a new project skeleton. ONLY for new projects — never call on existing ones. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_name | string | yes | Project directory name (e.g., `my-app`) | +| framework | string | yes | One of: `react`, `vue`, `vanilla`, `node`, `python`, `golang`, `spring-boot` | +| title | string | no | Display title (defaults to project_name) | +| force | boolean | no | Overwrite existing project (default: false) | + +**Frameworks:** + +| Framework | Stack | Port | Frontend Dir | +|-----------|-------|------|-------------| +| `react` | Vite + React 19 + Tailwind | 5173 | `src/` | +| `vue` | Vite + Vue 3 + Tailwind | 5173 | `src/` | +| `vanilla` | Vite + vanilla JS + Tailwind | 5173 | `src/` | +| `node` | Express.js | 3000 | `public/` | +| `python` | FastAPI + uvicorn | 8000 | `static/` | +| `golang` | Go + Gin | 8080 | `static/` | +| `spring-boot` | Spring Boot + Maven | 8080 | `src/main/resources/static/` | + +**Output:** + +```json +{ + "status": "created", + "project_name": "my-app", + "framework": "react", + "project_dir": "/path/to/workspace/my-app", + "files": ["package.json", "vite.config.js", "index.html", "src/main.jsx", "src/App.jsx", ".gitignore"] +} +``` + +## Tool: code_agent_write + +Write or update a file. Creates directories automatically. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_dir | string | yes | Project directory name | +| file_path | string | yes | Relative path (e.g., `src/App.jsx`) | +| content | string | yes | Complete file content | + +**Output:** + +```json +{"path": "src/App.jsx", "action": "created", "size": 312} +``` + +## Tool: code_agent_read + +Read a file or list directory contents. Large files are auto-truncated to 300 lines — use offset/limit to read other sections. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_dir | string | yes | Project directory name | +| file_path | string | yes | Relative path, or `"."` for directory listing | +| offset | integer | no | Line number to start reading (1-based). Default: 1 | +| limit | integer | no | Maximum lines to return. Default: 300. Large files are auto-truncated. | + +**Output (file):** + +```json +{"path": "src/App.jsx", "content": "...", "size": 245, "total_lines": 50, "offset": 1, "limit": 300, "truncated": false, "modified": "2025-01-15T10:30:00Z"} +``` + +**Output (directory):** + +```json +{"path": ".", "type": "directory", "files": ["package.json", "src/App.jsx"]} +``` + +## Tool: code_agent_edit + +Surgical text replacement. `old_text` must match exactly once. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_dir | string | yes | Project directory name | +| file_path | string | yes | Relative path | +| old_text | string | yes | Exact text to find (must match once) | +| new_text | string | yes | Replacement text | + +**Output:** + +```json +{"path": "src/App.jsx", "action": "edited", "size": 320, "diff": "..."} +``` + +## Tool: code_agent_run + +Install deps, start server, open browser. Auto-detects project type. + +Call **once** after writing all files. Server stays running — hot-reload handles changes. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_dir | string | yes | Project directory name | + +**Output:** + +```json +{"status": "running", "url": "http://localhost:3000", "pid": 12345, "project_dir": "/path/to/my-app", "install": "installed", "type": "node", "command": "npm run dev"} +``` + +Supported: Node.js (package.json), Python (requirements.txt), Go (go.mod), Spring Boot (pom.xml), static HTML (index.html). diff --git a/forge-skills/local/embedded/code-agent/scripts/code-agent-edit.sh b/forge-skills/local/embedded/code-agent/scripts/code-agent-edit.sh new file mode 100755 index 0000000..6457d1d --- /dev/null +++ b/forge-skills/local/embedded/code-agent/scripts/code-agent-edit.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# code-agent-edit.sh — Surgical string replacement in a project file. +# Usage: ./code-agent-edit.sh '{"project_dir": "my-app", "file_path": "src/App.jsx", "old_text": "Count: 0", "new_text": "Clicks: 0"}' +# +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-edit.sh {\"project_dir\": \"...\", \"file_path\": \"...\", \"old_text\": \"...\", \"new_text\": \"...\"}"}' >&2 + exit 1 +fi +if ! printf '%s' "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +PROJECT_DIR=$(printf '%s' "$INPUT" | jq -r '.project_dir // empty') +FILE_PATH=$(printf '%s' "$INPUT" | jq -r '.file_path // empty') +OLD_TEXT=$(printf '%s' "$INPUT" | jq -r '.old_text // empty') +NEW_TEXT=$(printf '%s' "$INPUT" | jq -r '.new_text // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi +if [ -z "$FILE_PATH" ]; then + echo '{"error": "file_path is required"}' >&2 + exit 1 +fi +if [ -z "$OLD_TEXT" ]; then + echo '{"error": "old_text is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$FILE_PATH" in + /*|*..*) + echo '{"error": "file_path must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve project_dir --- +# Strip workspace/ prefix if present (avoids double-prefix when LLM passes "workspace/foo") +PROJECT_DIR="${PROJECT_DIR#workspace/}" +if [ "${PROJECT_DIR:0:1}" != "/" ]; then + PROJECT_DIR="$(pwd)/workspace/$PROJECT_DIR" +fi + +if [ ! -d "$PROJECT_DIR" ]; then + echo "{\"error\": \"project directory not found: $PROJECT_DIR\"}" >&2 + exit 1 +fi + +RESOLVED_PROJECT=$(cd "$PROJECT_DIR" && pwd) +FULL_PATH="$RESOLVED_PROJECT/$FILE_PATH" + +# Verify path stays within project +RESOLVED_FULL=$(cd "$(dirname "$FULL_PATH")" 2>/dev/null && pwd)/$(basename "$FULL_PATH") 2>/dev/null || true +case "$RESOLVED_FULL" in + "$RESOLVED_PROJECT"/*) + ;; + *) + echo '{"error": "file_path resolves outside project_dir"}' >&2 + exit 1 + ;; +esac + +if [ ! -f "$FULL_PATH" ]; then + echo '{"error": "file not found: '"$FILE_PATH"'"}' >&2 + exit 1 +fi + +# --- Read original file --- +ORIGINAL=$(cat "$FULL_PATH") + +# --- Count occurrences and perform replacement --- +# Use jq -Rs (raw slurp) to handle multi-line old_text/new_text correctly. +# The previous awk-based counting failed on multi-line strings because awk +# processes line-by-line and -v assignment cannot contain literal newlines. +COUNT=$(jq -Rs --arg old "$OLD_TEXT" '(split($old) | length) - 1' < "$FULL_PATH") + +if [ "$COUNT" -eq 0 ]; then + echo '{"error": "old_text not found in file"}' >&2 + exit 1 +fi + +if [ "$COUNT" -gt 1 ]; then + jq -n --arg count "$COUNT" \ + '{error: "old_text found multiple times — be more specific to match exactly once", occurrences: ($count | tonumber)}' >&2 + exit 1 +fi + +# Exactly 1 match — replace first occurrence and write back +jq -Rsj --arg old "$OLD_TEXT" --arg new "$NEW_TEXT" \ + 'split($old) | .[0] + $new + (.[1:] | join($old))' \ + < "$FULL_PATH" > "$FULL_PATH.tmp" +mv "$FULL_PATH.tmp" "$FULL_PATH" + +# --- Generate diff --- +DIFF=$(diff -u <(echo "$ORIGINAL") <(cat "$FULL_PATH") 2>/dev/null || true) +MODIFIED_SIZE=$(wc -c < "$FULL_PATH" | tr -d ' ') + +jq -n \ + --arg path "$FILE_PATH" \ + --arg action "edited" \ + --arg size "$MODIFIED_SIZE" \ + --arg diff "$DIFF" \ + '{path: $path, action: $action, size: ($size | tonumber), diff: $diff}' diff --git a/forge-skills/local/embedded/code-agent/scripts/code-agent-read.sh b/forge-skills/local/embedded/code-agent/scripts/code-agent-read.sh new file mode 100755 index 0000000..b5f074b --- /dev/null +++ b/forge-skills/local/embedded/code-agent/scripts/code-agent-read.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# code-agent-read.sh — Read a file or list a project directory. +# Usage: ./code-agent-read.sh '{"project_dir": "my-app", "file_path": "src/App.jsx"}' +# ./code-agent-read.sh '{"project_dir": "my-app", "file_path": "."}' +# +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-read.sh {\"project_dir\": \"...\", \"file_path\": \"...\"}"}' >&2 + exit 1 +fi + +if ! printf '%s' "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +# --- Extract fields --- +PROJECT_DIR=$(printf '%s' "$INPUT" | jq -r '.project_dir // empty') +FILE_PATH=$(printf '%s' "$INPUT" | jq -r '.file_path // empty') +OFFSET=$(printf '%s' "$INPUT" | jq -r '.offset // 1') +LIMIT=$(printf '%s' "$INPUT" | jq -r '.limit // 300') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi +if [ -z "$FILE_PATH" ]; then + echo '{"error": "file_path is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$FILE_PATH" in + /*|*..*) + echo '{"error": "file_path must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve project_dir (relative paths resolve within workspace/) --- +# Strip workspace/ prefix if present (avoids double-prefix when LLM passes "workspace/foo") +PROJECT_DIR="${PROJECT_DIR#workspace/}" +if [ "${PROJECT_DIR:0:1}" != "/" ]; then + PROJECT_DIR="$(pwd)/workspace/$PROJECT_DIR" +fi + +if [ ! -d "$PROJECT_DIR" ]; then + echo "{\"error\": \"project directory not found: $PROJECT_DIR\"}" >&2 + exit 1 +fi + +RESOLVED_PROJECT=$(cd "$PROJECT_DIR" && pwd) + +# --- Directory listing --- +if [ "$FILE_PATH" = "." ]; then + FILES=$(cd "$RESOLVED_PROJECT" && find . -type f \ + ! -path './node_modules/*' \ + ! -path './.git/*' \ + ! -path './dist/*' \ + ! -path './__pycache__/*' \ + ! -path './venv/*' \ + ! -path './.venv/*' \ + ! -path './vendor/*' \ + -maxdepth 5 | sed 's|^\./||' | sort) + echo "$FILES" | jq -R -s '{ + path: ".", + type: "directory", + files: (split("\n") | map(select(length > 0))) + }' + exit 0 +fi + +# --- File read --- +FULL_PATH="$RESOLVED_PROJECT/$FILE_PATH" + +# Verify resolved path is still under project dir +RESOLVED_FULL=$(cd "$(dirname "$FULL_PATH")" 2>/dev/null && pwd)/$(basename "$FULL_PATH") 2>/dev/null || true +case "$RESOLVED_FULL" in + "$RESOLVED_PROJECT"/*) + ;; + *) + echo '{"error": "file_path resolves outside project_dir"}' >&2 + exit 1 + ;; +esac + +if [ ! -f "$FULL_PATH" ]; then + echo '{"error": "file not found: '"$FILE_PATH"'"}' >&2 + exit 1 +fi + +SIZE=$(wc -c < "$FULL_PATH" | tr -d ' ') +TOTAL_LINES=$(wc -l < "$FULL_PATH" | tr -d ' ') +# Ensure TOTAL_LINES is at least 1 for non-empty files +if [ "$TOTAL_LINES" -eq 0 ] && [ "$SIZE" -gt 0 ]; then + TOTAL_LINES=1 +fi + +END=$((OFFSET + LIMIT - 1)) +if [ "$END" -gt "$TOTAL_LINES" ]; then + END=$TOTAL_LINES +fi + +CONTENT=$(sed -n "${OFFSET},${END}p" "$FULL_PATH") +TRUNCATED="false" +if [ "$END" -lt "$TOTAL_LINES" ]; then + TRUNCATED="true" + CONTENT="${CONTENT} + +[FILE TRUNCATED: showing lines ${OFFSET}-${END} of ${TOTAL_LINES}. Use offset/limit to read other sections.]" +fi + +MODIFIED=$(stat -f '%Sm' -t '%Y-%m-%dT%H:%M:%SZ' "$FULL_PATH" 2>/dev/null || stat --format='%y' "$FULL_PATH" 2>/dev/null || echo "unknown") + +jq -n \ + --arg path "$FILE_PATH" \ + --arg content "$CONTENT" \ + --argjson size "$SIZE" \ + --argjson total_lines "$TOTAL_LINES" \ + --argjson offset "$OFFSET" \ + --argjson limit "$LIMIT" \ + --argjson truncated "$TRUNCATED" \ + --arg modified "$MODIFIED" \ + '{path: $path, content: $content, size: $size, total_lines: $total_lines, offset: $offset, limit: $limit, truncated: $truncated, modified: $modified}' diff --git a/forge-skills/local/embedded/code-agent/scripts/code-agent-run.sh b/forge-skills/local/embedded/code-agent/scripts/code-agent-run.sh new file mode 100755 index 0000000..71f040c --- /dev/null +++ b/forge-skills/local/embedded/code-agent/scripts/code-agent-run.sh @@ -0,0 +1,325 @@ +#!/usr/bin/env bash +# code-agent-run.sh — Install dependencies and start the dev server. +# Detects project type (Node, Python, Go, Spring Boot, static HTML) automatically. +# Usage: ./code-agent-run.sh '{"project_dir": "my-app"}' +# +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-run.sh {\"project_dir\": \"...\"}"}' >&2 + exit 1 +fi + +if ! echo "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +# --- Extract fields --- +PROJECT_DIR=$(echo "$INPUT" | jq -r '.project_dir // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi + +# --- Resolve path --- +# Relative paths resolve within workspace/ subdirectory (where code-agent file tools operate). +# Strip workspace/ prefix if present (avoids double-prefix when LLM passes "workspace/foo") +PROJECT_DIR="${PROJECT_DIR#workspace/}" +if [ "${PROJECT_DIR:0:1}" != "/" ]; then + PROJECT_DIR="$(pwd)/workspace/$PROJECT_DIR" +fi + +if [ ! -d "$PROJECT_DIR" ]; then + echo "{\"error\": \"project directory not found: $PROJECT_DIR\"}" >&2 + exit 1 +fi + +cd "$PROJECT_DIR" + +# --- Helper: open URL in browser --- +open_browser() { + local url="$1" + case "$(uname -s)" in + Darwin) open "$url" 2>/dev/null || true ;; + Linux) xdg-open "$url" 2>/dev/null || true ;; + esac +} + +# --- Detect project type and run --- + +# ===================== +# Node.js (package.json) +# ===================== +if [ -f "package.json" ]; then + # Install dependencies if needed + INSTALL_STATUS="skipped" + if [ ! -d "node_modules" ]; then + INSTALL_STATUS="installed" + if ! npm install --loglevel=error > .forge-install.log 2>&1; then + INSTALL_ERR=$(tail -5 .forge-install.log 2>/dev/null || echo "unknown error") + jq -n --arg err "npm install failed" --arg details "$INSTALL_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + fi + + # Determine start command + DEV_CMD="npm run dev" + if node -e "const p=require('./package.json'); process.exit(p.scripts && p.scripts.dev ? 0 : 1)" 2>/dev/null; then + DEV_CMD="npm run dev" + elif node -e "const p=require('./package.json'); process.exit(p.scripts && p.scripts.start ? 0 : 1)" 2>/dev/null; then + DEV_CMD="npm start" + else + DEV_CMD="npx vite --open" + fi + + # Start dev server in background + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + nohup $DEV_CMD > "$DEV_LOG" 2>&1 & + DEV_PID=$! + + # Wait for server to start + SERVER_URL="http://localhost:3000" + SERVER_READY=false + for i in 1 2 3 4 5 6 7 8; do + sleep 1 + if ! kill -0 "$DEV_PID" 2>/dev/null; then + DEV_ERR=$(tail -10 "$DEV_LOG" 2>/dev/null || echo "process exited") + jq -n --arg err "dev server failed to start" --arg details "$DEV_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + # Try to extract URL from output (works for Vite, Next.js, CRA, etc.) + ACTUAL_URL=$(grep -oE 'https?://localhost:[0-9]+' "$DEV_LOG" 2>/dev/null | head -1 || true) + if [ -n "$ACTUAL_URL" ]; then + SERVER_URL="$ACTUAL_URL" + SERVER_READY=true + break + fi + done + + # Open browser if server detected a URL, otherwise try to open the default + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "$INSTALL_STATUS" \ + --arg type "node" \ + --arg cmd "$DEV_CMD" \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# ===================== +# Python +# ===================== +if [ -f "requirements.txt" ] || [ -f "pyproject.toml" ] || [ -f "setup.py" ]; then + # Install dependencies + INSTALL_STATUS="skipped" + if [ -f "requirements.txt" ]; then + INSTALL_STATUS="installed" + if ! pip install -r requirements.txt > .forge-install.log 2>&1; then + INSTALL_ERR=$(tail -5 .forge-install.log 2>/dev/null || echo "unknown error") + jq -n --arg err "pip install failed" --arg details "$INSTALL_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + fi + + # Detect entry point + DEV_CMD="" + PORT=8000 + if [ -f "manage.py" ]; then + DEV_CMD="python manage.py runserver 0.0.0.0:$PORT" + elif [ -f "app.py" ]; then + DEV_CMD="python app.py" + PORT=5000 + elif [ -f "main.py" ]; then + DEV_CMD="python main.py" + else + # Fallback: try uvicorn or flask + if grep -q "fastapi\|uvicorn" requirements.txt 2>/dev/null; then + DEV_CMD="uvicorn main:app --reload --port $PORT" + elif grep -q "flask" requirements.txt 2>/dev/null; then + DEV_CMD="flask run --port $PORT" + else + DEV_CMD="python -m http.server $PORT" + fi + fi + + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + nohup $DEV_CMD > "$DEV_LOG" 2>&1 & + DEV_PID=$! + + SERVER_URL="http://localhost:$PORT" + sleep 2 + + if ! kill -0 "$DEV_PID" 2>/dev/null; then + DEV_ERR=$(tail -10 "$DEV_LOG" 2>/dev/null || echo "process exited") + jq -n --arg err "server failed to start" --arg details "$DEV_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "$INSTALL_STATUS" \ + --arg type "python" \ + --arg cmd "$DEV_CMD" \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# ===================== +# Go +# ===================== +if [ -f "go.mod" ]; then + # Download dependencies + INSTALL_STATUS="skipped" + if ! [ -d "vendor" ] && ! go env GOMODCACHE | xargs test -d 2>/dev/null; then + INSTALL_STATUS="installed" + fi + if ! go mod download > .forge-install.log 2>&1; then + INSTALL_ERR=$(tail -5 .forge-install.log 2>/dev/null || echo "unknown error") + jq -n --arg err "go mod download failed" --arg details "$INSTALL_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + nohup go run . > "$DEV_LOG" 2>&1 & + DEV_PID=$! + PORT=8080 + + # Wait for server to start (Go compiles first, may take a few seconds) + SERVER_READY=false + for i in 1 2 3 4 5 6 7 8 9 10; do + sleep 1 + if ! kill -0 "$DEV_PID" 2>/dev/null; then + DEV_ERR=$(tail -10 "$DEV_LOG" 2>/dev/null || echo "process exited") + jq -n --arg err "go run failed" --arg details "$DEV_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + ACTUAL_URL=$(grep -oE 'https?://[^[:space:]]+' "$DEV_LOG" 2>/dev/null | head -1 || true) + if [ -n "$ACTUAL_URL" ]; then + SERVER_READY=true + break + fi + done + + SERVER_URL="${ACTUAL_URL:-http://localhost:$PORT}" + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "$INSTALL_STATUS" \ + --arg type "go" \ + --arg cmd "go run ." \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# ===================== +# Spring Boot (pom.xml) +# ===================== +if [ -f "pom.xml" ]; then + # Determine Maven command + MVN_CMD="mvn" + if [ -f "mvnw" ]; then + chmod +x mvnw + MVN_CMD="./mvnw" + fi + + # Install dependencies + INSTALL_STATUS="installed" + if ! $MVN_CMD dependency:resolve -q > .forge-install.log 2>&1; then + INSTALL_ERR=$(tail -10 .forge-install.log 2>/dev/null || echo "unknown error") + jq -n --arg err "maven dependency install failed" --arg details "$INSTALL_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + DEV_CMD="$MVN_CMD spring-boot:run" + nohup $DEV_CMD > "$DEV_LOG" 2>&1 & + DEV_PID=$! + PORT=8080 + + # Spring Boot takes longer to start — wait up to 30 seconds + SERVER_READY=false + for i in $(seq 1 30); do + sleep 1 + if ! kill -0 "$DEV_PID" 2>/dev/null; then + DEV_ERR=$(tail -15 "$DEV_LOG" 2>/dev/null || echo "process exited") + jq -n --arg err "spring-boot:run failed" --arg details "$DEV_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + # Spring Boot logs: "Tomcat started on port 8080" or "Started Application in X seconds" + if grep -qE 'Started \w+ in|Tomcat started on port' "$DEV_LOG" 2>/dev/null; then + SERVER_READY=true + break + fi + done + + ACTUAL_URL=$(grep -oE 'https?://[^[:space:]]+' "$DEV_LOG" 2>/dev/null | head -1 || true) + SERVER_URL="${ACTUAL_URL:-http://localhost:$PORT}" + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "$INSTALL_STATUS" \ + --arg type "spring-boot" \ + --arg cmd "$DEV_CMD" \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# ===================== +# Static HTML (fallback) +# ===================== +if [ -f "index.html" ]; then + PORT=8080 + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + nohup python3 -m http.server "$PORT" > "$DEV_LOG" 2>&1 & + DEV_PID=$! + sleep 1 + + SERVER_URL="http://localhost:$PORT" + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "n/a" \ + --arg type "static" \ + --arg cmd "python3 -m http.server $PORT" \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# No known project type +echo '{"error": "could not detect project type — no package.json, requirements.txt, go.mod, pom.xml, or index.html found"}' >&2 +exit 1 diff --git a/forge-skills/local/embedded/code-agent/scripts/code-agent-scaffold.sh b/forge-skills/local/embedded/code-agent/scripts/code-agent-scaffold.sh new file mode 100755 index 0000000..7febaa2 --- /dev/null +++ b/forge-skills/local/embedded/code-agent/scripts/code-agent-scaffold.sh @@ -0,0 +1,674 @@ +#!/usr/bin/env bash +# code-agent-scaffold.sh — Scaffold a project with a known-good skeleton. +# Usage: ./code-agent-scaffold.sh '{"project_name": "my-app", "framework": "react"}' +# +# Supported frameworks: react, vue, vanilla, node, python, golang, spring-boot +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-scaffold.sh {\"project_name\": \"...\", \"framework\": \"react\"}"}' >&2 + exit 1 +fi +if ! echo "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +PROJECT_NAME=$(echo "$INPUT" | jq -r '.project_name // empty') +FRAMEWORK=$(echo "$INPUT" | jq -r '.framework // empty') +TITLE=$(echo "$INPUT" | jq -r '.title // empty') + +if [ -z "$PROJECT_NAME" ]; then + echo '{"error": "project_name is required"}' >&2 + exit 1 +fi +if [ -z "$FRAMEWORK" ]; then + echo '{"error": "framework is required. Options: react, vue, vanilla, node, python, golang, spring-boot"}' >&2 + exit 1 +fi +if [ -z "$TITLE" ]; then + TITLE="$PROJECT_NAME" +fi + +# --- Resolve output directory within workspace/ --- +OUTPUT_DIR="$(pwd)/workspace/$PROJECT_NAME" + +if [ -d "$OUTPUT_DIR" ] && [ "$(ls -A "$OUTPUT_DIR" 2>/dev/null)" ]; then + FORCE=$(echo "$INPUT" | jq -r '.force // false') + if [ "$FORCE" != "true" ]; then + echo '{"error": "project directory already exists and is not empty; set force: true to overwrite"}' >&2 + exit 1 + fi +fi + +mkdir -p "$OUTPUT_DIR" + +# Track created files +CREATED_FILES=() +write_file() { + local relpath="$1" + local content="$2" + local fullpath="$OUTPUT_DIR/$relpath" + mkdir -p "$(dirname "$fullpath")" + echo "$content" > "$fullpath" + CREATED_FILES+=("$relpath") +} + +# --- Framework templates --- + +scaffold_react() { + write_file "package.json" "{ + \"name\": \"$PROJECT_NAME\", + \"private\": true, + \"version\": \"0.1.0\", + \"type\": \"module\", + \"scripts\": { + \"dev\": \"vite --open\", + \"build\": \"vite build\", + \"preview\": \"vite preview\" + }, + \"dependencies\": { + \"react\": \"^19.0.0\", + \"react-dom\": \"^19.0.0\" + }, + \"devDependencies\": { + \"@vitejs/plugin-react\": \"^4.4.0\", + \"vite\": \"^6.0.0\" + } +}" + + write_file "vite.config.js" "import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +export default defineConfig({ + plugins: [react()], + server: { + port: 3000, + open: true, + }, +})" + + write_file "index.html" " + + + + + $TITLE + + + +
+ + +" + + write_file "src/main.jsx" "import { StrictMode } from 'react' +import { createRoot } from 'react-dom/client' +import { App } from './App.jsx' + +createRoot(document.getElementById('root')).render( + + + +)" + + write_file "src/App.jsx" "import { useState } from 'react' + +export function App() { + const [count, setCount] = useState(0) + + return ( +
+
+

$TITLE

+
+ +

+ Edit src/App.jsx and save to see changes. +

+
+
+
+ ) +}" + + write_file ".gitignore" "node_modules +dist +.env +*.log" +} + +scaffold_vue() { + write_file "package.json" "{ + \"name\": \"$PROJECT_NAME\", + \"private\": true, + \"version\": \"0.1.0\", + \"type\": \"module\", + \"scripts\": { + \"dev\": \"vite --open\", + \"build\": \"vite build\", + \"preview\": \"vite preview\" + }, + \"dependencies\": { + \"vue\": \"^3.5.0\" + }, + \"devDependencies\": { + \"@vitejs/plugin-vue\": \"^5.2.0\", + \"vite\": \"^6.0.0\" + } +}" + + write_file "vite.config.js" "import { defineConfig } from 'vite' +import vue from '@vitejs/plugin-vue' + +export default defineConfig({ + plugins: [vue()], + server: { + port: 3000, + open: true, + }, +})" + + write_file "index.html" " + + + + + $TITLE + + + +
+ + +" + + write_file "src/main.js" "import { createApp } from 'vue' +import App from './App.vue' + +createApp(App).mount('#app')" + + write_file "src/App.vue" " + +" + + write_file ".gitignore" "node_modules +dist +.env +*.log" +} + +scaffold_vanilla() { + write_file "package.json" "{ + \"name\": \"$PROJECT_NAME\", + \"private\": true, + \"version\": \"0.1.0\", + \"type\": \"module\", + \"scripts\": { + \"dev\": \"vite --open\", + \"build\": \"vite build\", + \"preview\": \"vite preview\" + }, + \"devDependencies\": { + \"vite\": \"^6.0.0\" + } +}" + + write_file "vite.config.js" "import { defineConfig } from 'vite' + +export default defineConfig({ + server: { + port: 3000, + open: true, + }, +})" + + write_file "index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+
+ +

+ Edit src/main.js and save to see changes. +

+
+
+
+ + +" + + write_file "src/main.js" "let count = 0 +const btn = document.getElementById('counter') +btn.addEventListener('click', () => { + count++ + btn.textContent = \`Count: \${count}\` +})" + + write_file ".gitignore" "node_modules +dist +.env +*.log" +} + +scaffold_node() { + write_file "package.json" "{ + \"name\": \"$PROJECT_NAME\", + \"private\": true, + \"version\": \"0.1.0\", + \"type\": \"module\", + \"scripts\": { + \"dev\": \"node --watch src/server.js\", + \"start\": \"node src/server.js\" + }, + \"dependencies\": { + \"express\": \"^4.21.0\" + } +}" + + write_file "src/server.js" "import express from 'express' +import { fileURLToPath } from 'url' +import { dirname, join } from 'path' + +const __dirname = dirname(fileURLToPath(import.meta.url)) +const app = express() +const PORT = process.env.PORT || 3000 + +app.use(express.json()) + +// Serve static frontend files from public/ directory +app.use(express.static(join(__dirname, '..', 'public'))) + +// API routes +app.get('/api/health', (req, res) => { + res.json({ status: 'healthy', uptime: process.uptime() }) +}) + +// Fallback: serve index.html for any non-API route +app.get('*', (req, res) => { + res.sendFile(join(__dirname, '..', 'public', 'index.html')) +}) + +app.listen(PORT, () => { + console.log(\`Server running at http://localhost:\${PORT}\`) +})" + + write_file "public/index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+

Express server is running.

+

API: /api/health

+
+
+ + +" + + write_file "public/app.js" "// Frontend JavaScript — fetches from API and updates the UI +console.log('$TITLE frontend loaded')" + + write_file ".gitignore" "node_modules +.env +*.log" +} + +scaffold_python() { + write_file "requirements.txt" "fastapi>=0.115.0 +uvicorn[standard]>=0.32.0" + + write_file "main.py" "from pathlib import Path +from fastapi import FastAPI +from fastapi.responses import HTMLResponse, FileResponse +from fastapi.staticfiles import StaticFiles + +app = FastAPI(title=\"$TITLE\") + +# Serve static frontend files from static/ directory +STATIC_DIR = Path(__file__).parent / \"static\" +if STATIC_DIR.exists(): + app.mount(\"/static\", StaticFiles(directory=str(STATIC_DIR)), name=\"static\") + + +@app.get(\"/\", response_class=HTMLResponse) +async def root(): + index = STATIC_DIR / \"index.html\" + if index.exists(): + return index.read_text() + return '

$TITLE

Create static/index.html for the UI

' + + +# API routes — all under /api/ +@app.get(\"/api/health\") +async def health(): + return {\"status\": \"healthy\"} + + +@app.get(\"/docs\") +async def docs_redirect(): + \"\"\"Redirect to auto-generated API docs.\"\"\" + from fastapi.responses import RedirectResponse + return RedirectResponse(url=\"/docs\") + + +if __name__ == \"__main__\": + import uvicorn + uvicorn.run(\"main:app\", host=\"0.0.0.0\", port=8000, reload=True)" + + write_file "static/index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+

FastAPI server is running.

+ + Open API Docs + +
+
+ + +" + + write_file "static/app.js" "// Frontend JavaScript — fetches from API and updates the UI +console.log('$TITLE frontend loaded')" + + write_file ".gitignore" "__pycache__ +*.pyc +.venv +venv +.env +*.log" +} + +scaffold_golang() { + write_file "go.mod" "module $PROJECT_NAME + +go 1.22 + +require github.com/gin-gonic/gin v1.10.0 + +require ( + github.com/bytedance/sonic v1.12.6 // indirect + github.com/bytedance/sonic/loader v0.2.1 // indirect + github.com/cloudwego/base64x v0.1.4 // indirect + github.com/cloudwego/iasm v0.2.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.7 // indirect + github.com/gin-contrib/sse v0.1.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.23.0 // indirect + github.com/goccy/go-json v0.10.4 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.9 // indirect + github.com/leodido/go-urn v1.4.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/pelletier/go-toml/v2 v2.2.3 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.12 // indirect + golang.org/x/arch v0.12.0 // indirect + golang.org/x/crypto v0.31.0 // indirect + golang.org/x/net v0.33.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/text v0.21.0 // indirect + google.golang.org/protobuf v1.36.1 // indirect +)" + + write_file "main.go" "package main + +import ( + \"net/http\" + + \"github.com/gin-gonic/gin\" +) + +func main() { + r := gin.Default() + + // Serve static frontend files from static/ directory + r.Static(\"/static\", \"./static\") + r.StaticFile(\"/\", \"./static/index.html\") + + // API routes — all under /api/ + api := r.Group(\"/api\") + { + api.GET(\"/health\", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{\"status\": \"healthy\"}) + }) + } + + r.Run(\":8080\") +}" + + write_file "static/index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+

Gin server is running.

+

API: /api/health

+
+
+ + +" + + write_file "static/app.js" "// Frontend JavaScript — fetches from API and updates the UI +console.log('$TITLE frontend loaded')" + + write_file ".gitignore" "bin/ +*.exe +*.test +*.out +.env +vendor/" +} + +scaffold_springboot() { + local GROUP_ID="com.example" + local ARTIFACT_ID="$PROJECT_NAME" + local PKG_PATH="com/example/${PROJECT_NAME//-/}" + local PKG_NAME="com.example.${PROJECT_NAME//-/}" + + write_file "pom.xml" " + + 4.0.0 + + + org.springframework.boot + spring-boot-starter-parent + 3.4.0 + + + + $GROUP_ID + $ARTIFACT_ID + 0.1.0 + $TITLE + + + 21 + + + + + org.springframework.boot + spring-boot-starter-web + + + org.springframework.boot + spring-boot-starter-test + test + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + +" + + write_file "src/main/resources/application.properties" "spring.application.name=$PROJECT_NAME +server.port=8080" + + # Spring Boot automatically serves files from src/main/resources/static/ + write_file "src/main/resources/static/index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+

Spring Boot server is running.

+

API: /api/health

+
+
+ + +" + + write_file "src/main/resources/static/app.js" "// Frontend JavaScript — fetches from API and updates the UI +console.log('$TITLE frontend loaded')" + + write_file "src/main/java/$PKG_PATH/Application.java" "package $PKG_NAME; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +@SpringBootApplication +public class Application { + public static void main(String[] args) { + SpringApplication.run(Application.class, args); + } +}" + + write_file "src/main/java/$PKG_PATH/HelloController.java" "package $PKG_NAME; + +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.Map; + +@RestController +@RequestMapping(\"/api\") +public class HelloController { + + @GetMapping(\"/health\") + public Map health() { + return Map.of(\"status\", \"healthy\"); + } +}" + + write_file ".gitignore" "target/ +*.class +*.jar +.env +*.log +.idea/ +*.iml" + + # Maven wrapper for ./mvnw support + write_file ".mvn/wrapper/maven-wrapper.properties" "distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip +wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.3.2/maven-wrapper-3.3.2.jar" +} + +# --- Dispatch --- +case "$FRAMEWORK" in + react) scaffold_react ;; + vue) scaffold_vue ;; + vanilla) scaffold_vanilla ;; + node) scaffold_node ;; + python) scaffold_python ;; + golang|go) scaffold_golang ;; + spring-boot|springboot|spring) scaffold_springboot ;; + *) + echo "{\"error\": \"unknown framework: $FRAMEWORK. Options: react, vue, vanilla, node, python, golang, spring-boot\"}" >&2 + exit 1 + ;; +esac + +# --- Output result --- +FILES_JSON=$(printf '%s\n' "${CREATED_FILES[@]}" | jq -R -s 'split("\n") | map(select(length > 0))') +jq -n \ + --arg status "created" \ + --arg project_name "$PROJECT_NAME" \ + --arg framework "$FRAMEWORK" \ + --arg project_dir "$OUTPUT_DIR" \ + --argjson files "$FILES_JSON" \ + '{status: $status, project_name: $project_name, framework: $framework, project_dir: $project_dir, files: $files}' diff --git a/forge-skills/local/embedded/code-agent/scripts/code-agent-write.sh b/forge-skills/local/embedded/code-agent/scripts/code-agent-write.sh new file mode 100755 index 0000000..e8249d8 --- /dev/null +++ b/forge-skills/local/embedded/code-agent/scripts/code-agent-write.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# code-agent-write.sh — Write or update a file in a project. +# Usage: ./code-agent-write.sh '{"project_dir": "my-app", "file_path": "src/App.jsx", "content": "..."}' +# +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-write.sh {\"project_dir\": \"...\", \"file_path\": \"...\", \"content\": \"...\"}"}' >&2 + exit 1 +fi + +if ! printf '%s' "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +# --- Extract fields --- +PROJECT_DIR=$(printf '%s' "$INPUT" | jq -r '.project_dir // empty') +FILE_PATH=$(printf '%s' "$INPUT" | jq -r '.file_path // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi +if [ -z "$FILE_PATH" ]; then + echo '{"error": "file_path is required"}' >&2 + exit 1 +fi +# Content can be empty (e.g. empty file), so check existence not emptiness +if ! printf '%s' "$INPUT" | jq -e 'has("content")' >/dev/null 2>&1; then + echo '{"error": "content is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$FILE_PATH" in + /*|*..*) + echo '{"error": "file_path must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve project_dir (relative paths resolve within workspace/) --- +# Strip workspace/ prefix if present (avoids double-prefix when LLM passes "workspace/foo") +PROJECT_DIR="${PROJECT_DIR#workspace/}" +if [ "${PROJECT_DIR:0:1}" != "/" ]; then + PROJECT_DIR="$(pwd)/workspace/$PROJECT_DIR" +fi + +# Create project dir if it doesn't exist +mkdir -p "$PROJECT_DIR" + +RESOLVED_PROJECT=$(cd "$PROJECT_DIR" && pwd) +FULL_PATH="$RESOLVED_PROJECT/$FILE_PATH" + +# Create parent directory +PARENT_DIR=$(dirname "$FULL_PATH") +mkdir -p "$PARENT_DIR" + +# Verify resolved path is still under project dir +RESOLVED_PARENT=$(cd "$PARENT_DIR" && pwd) +case "$RESOLVED_PARENT" in + "$RESOLVED_PROJECT"|"$RESOLVED_PROJECT"/*) + ;; + *) + echo '{"error": "file_path resolves outside project_dir"}' >&2 + exit 1 + ;; +esac + +# --- Determine action --- +ACTION="created" +if [ -f "$FULL_PATH" ]; then + ACTION="updated" +fi + +# --- Write file --- +printf '%s' "$INPUT" | jq -r '.content' > "$FULL_PATH" + +SIZE=$(wc -c < "$FULL_PATH" | tr -d ' ') + +jq -n \ + --arg path "$FILE_PATH" \ + --arg action "$ACTION" \ + --arg size "$SIZE" \ + '{path: $path, action: $action, size: ($size | tonumber)}' diff --git a/forge-skills/local/embedded/github/SKILL.md b/forge-skills/local/embedded/github/SKILL.md index 7dd0fed..d2a7681 100644 --- a/forge-skills/local/embedded/github/SKILL.md +++ b/forge-skills/local/embedded/github/SKILL.md @@ -7,21 +7,118 @@ tags: - issues - pull-requests - repositories -description: Create issues, PRs, and query repositories + - git +description: Create issues, PRs, clone repos, and manage git workflows metadata: forge: + workflow_phase: finalize requires: bins: - gh + - git + - jq env: - required: - - GH_TOKEN + required: [] one_of: [] - optional: [] + optional: + - GH_TOKEN egress_domains: - api.github.com - github.com --- +## System Prompt + +You have access to GitHub and git tools. You MUST use these tools for all git and GitHub operations. Do NOT use cli_execute or bash to run git commands directly. + +**When asked to clone, checkout, or work with a GitHub repository, ALWAYS start by calling `github_clone`.** This is the ONLY way to clone repositories. Do NOT attempt to use cli_execute, bash, or any other tool to run `git clone` directly. + +The `repo` parameter accepts any of these formats: +- `owner/repo` (e.g. `initializ-mk/openclaw`) +- SSH URL: `git@github.com:owner/repo.git` +- HTTPS URL: `https://github.com/owner/repo.git` + +**Required workflow for code changes:** + +1. `github_clone` — clone the repository (auto-creates a feature branch) +2. Explore: use `directory_tree`, `grep_search`, `glob_search`, `code_agent_read` to understand the codebase +3. Edit: use `code_agent_edit` or `code_agent_write` to make changes +4. `github_status` — review what changed before committing +5. `github_commit` — stage and commit changes +6. `github_push` — push the feature branch to remote +7. `github_create_pr` — create a pull request + +**File path convention:** +- `github_clone` clones repos into `workspace/`. The returned `project_dir` (e.g. `openclaw`) is the directory name inside `workspace/`. +- ALL tools that accept `project_dir` (github tools, code-agent tools) accept BOTH `openclaw` and `workspace/openclaw` — the `workspace/` prefix is stripped automatically. +- For `directory_tree`, `grep_search`, `glob_search` use `workspace/` as the `path` (e.g. `workspace/openclaw`). + +**You MUST complete the entire workflow — do NOT stop after exploring.** +When asked to fix a bug or make changes, you must: explore → understand → edit → commit → push → create PR. Do NOT stop after step 2 to report findings. Complete ALL steps in ONE session. Only stop early if you genuinely cannot determine what to change. + +**Exploration strategy — bug fixes:** +1. `directory_tree` to understand project structure. +2. `grep_search` for the error message, config key, or symptom from the bug description. +3. **Trace to the origin:** follow the error/value through call sites until you find where it is first produced or validated. Do not stop at the first file that mentions the symptom. +4. **Read what you will change:** before editing a function, read its implementation. Before replacing a function call, read both the old and new function to confirm the new one handles the same inputs correctly. +5. **Find a working reference:** if similar functionality works elsewhere in the codebase (e.g., another provider, another endpoint), read how it handles the same input. Replicate that approach, not a different one. +6. Form your hypothesis with evidence, then edit. +7. **Verify your fix:** after editing, trace the specific failing input through your new code path. Read the functions your new code calls and confirm they handle the input type that was failing (e.g., objects, not just strings). If your fix adds types but doesn't change runtime behavior, it is wrong. + +**Exploration strategy — features and refactors:** +1. `directory_tree` to understand project structure. +2. `grep_search` for existing patterns similar to what you need to add (2-3 searches). +3. Read the file(s) where you will add or modify code. +4. Follow existing conventions, then edit immediately. + +**Do NOT:** +- Edit test files first — always fix the source code first, then update tests to match +- Read files unrelated to the error path or the code you plan to change +- Pattern-match on function names without reading their implementations +- Replace a function call with another without verifying both handle the same input types (e.g., objects vs strings) +- Keep searching after you have traced the error to its origin or found the insertion point +- Consider a fix complete without tracing the failing input through the new code to confirm it reaches the correct code path + +**Branch safety rules:** +- All work happens on feature branches — never on main/master. +- `github_clone` automatically creates a feature branch after cloning. +- `github_commit`, `github_push`, and `github_checkout` refuse to operate on main/master. +- Always use `github_status` before committing to review what changed. + +## Tool: github_clone + +Clone a GitHub repository and create a feature branch. + +**Input:** repo (string: owner/repo, SSH URL, or HTTPS URL), branch (string, optional: branch name — auto-generated if omitted), project_dir (string, optional: directory name — defaults to repo name) +**Output:** `{status, repo, branch, project_dir}` + +## Tool: github_status + +Show git status for a cloned project. + +**Input:** project_dir (string: project directory name) +**Output:** `{branch, modified[], staged[], untracked[], ahead, behind}` + +## Tool: github_commit + +Stage and commit changes on a feature branch. Refuses to commit on main/master. + +**Input:** project_dir (string), message (string: commit message), files (string[], optional: specific files to stage — stages all if omitted) +**Output:** `{sha, branch, files_changed}` + +## Tool: github_push + +Push a feature branch to the remote. Refuses to push main/master. + +**Input:** project_dir (string), branch (string, optional: defaults to current branch) +**Output:** `{status, branch, sha, remote}` + +## Tool: github_checkout + +Switch to or create a branch. Refuses to switch to main/master. + +**Input:** project_dir (string), branch (string: target branch name), create (boolean, optional: create new branch — default false) +**Output:** `{status, branch}` + ## Tool: github_create_issue Create a GitHub issue. diff --git a/forge-skills/local/embedded/github/scripts/github-checkout.sh b/forge-skills/local/embedded/github/scripts/github-checkout.sh new file mode 100755 index 0000000..6692a1f --- /dev/null +++ b/forge-skills/local/embedded/github/scripts/github-checkout.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# github-checkout.sh — Switch or create a branch in a project. +# Usage: ./github-checkout.sh '{"project_dir": "my-app", "branch": "feat/new-feature", "create": true}' +# +# Requires: git, jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: github-checkout.sh {\"project_dir\": \"...\", \"branch\": \"...\", \"create\": true|false}"}' >&2 + exit 1 +fi +if ! printf '%s' "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +PROJECT_DIR=$(printf '%s' "$INPUT" | jq -r '.project_dir // empty') +BRANCH=$(printf '%s' "$INPUT" | jq -r '.branch // empty') +CREATE=$(printf '%s' "$INPUT" | jq -r '.create // false') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi +if [ -z "$BRANCH" ]; then + echo '{"error": "branch is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$PROJECT_DIR" in + /*|*..*) + echo '{"error": "project_dir must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve workspace --- +# Strip workspace/ prefix if present (avoids double-prefix) +PROJECT_DIR="${PROJECT_DIR#workspace/}" +WORKSPACE="$(pwd)/workspace" +TARGET="$WORKSPACE/$PROJECT_DIR" + +if [ ! -d "$TARGET/.git" ]; then + echo "{\"error\": \"not a git repository: workspace/$PROJECT_DIR\"}" >&2 + exit 1 +fi + +cd "$TARGET" + +# --- Protected branch guard --- +case "$BRANCH" in + main|master) + echo '{"error": "refusing to switch to protected branch: '"$BRANCH"'. Stay on a feature branch."}' >&2 + exit 1 + ;; +esac + +# --- Checkout --- +if [ "$CREATE" = "true" ]; then + git checkout -b "$BRANCH" --quiet +else + git checkout "$BRANCH" --quiet +fi + +jq -n \ + --arg status "switched" \ + --arg branch "$BRANCH" \ + '{status: $status, branch: $branch}' diff --git a/forge-skills/local/embedded/github/scripts/github-clone.sh b/forge-skills/local/embedded/github/scripts/github-clone.sh new file mode 100755 index 0000000..cfe4da7 --- /dev/null +++ b/forge-skills/local/embedded/github/scripts/github-clone.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# github-clone.sh — Clone a GitHub repository and create a feature branch. +# Usage: ./github-clone.sh '{"repo": "owner/repo", "branch": "feat/my-change", "project_dir": "my-app"}' +# +# Requires: gh, git, jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: github-clone.sh {\"repo\": \"owner/repo\", \"branch\": \"...\", \"project_dir\": \"...\"}"}' >&2 + exit 1 +fi +if ! printf '%s' "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +REPO=$(printf '%s' "$INPUT" | jq -r '.repo // empty') +BRANCH=$(printf '%s' "$INPUT" | jq -r '.branch // empty') +PROJECT_DIR=$(printf '%s' "$INPUT" | jq -r '.project_dir // empty') + +if [ -z "$REPO" ]; then + echo '{"error": "repo is required (e.g. owner/repo, git@github.com:owner/repo.git, or https://github.com/owner/repo.git)"}' >&2 + exit 1 +fi + +# --- Normalize repo format --- +# Convert SSH URL: git@github.com:owner/repo.git → owner/repo +if [[ "$REPO" == git@github.com:* ]]; then + REPO="${REPO#git@github.com:}" + REPO="${REPO%.git}" +fi +# Convert HTTPS URL: https://github.com/owner/repo.git → owner/repo +if [[ "$REPO" == https://github.com/* ]]; then + REPO="${REPO#https://github.com/}" + REPO="${REPO%.git}" +fi + +# Default project_dir to the repo name portion +if [ -z "$PROJECT_DIR" ]; then + PROJECT_DIR=$(basename "$REPO") +fi + +# --- Path traversal prevention --- +case "$PROJECT_DIR" in + /*|*..*) + echo '{"error": "project_dir must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve workspace --- +# Strip workspace/ prefix if present (avoids double-prefix) +PROJECT_DIR="${PROJECT_DIR#workspace/}" +WORKSPACE="$(pwd)/workspace" +mkdir -p "$WORKSPACE" +TARGET="$WORKSPACE/$PROJECT_DIR" + +if [ -d "$TARGET" ]; then + echo "{\"error\": \"directory already exists: workspace/$PROJECT_DIR\"}" >&2 + exit 1 +fi + +# --- Clone via gh (uses GH_TOKEN automatically) --- +if ! gh repo clone "$REPO" "$TARGET" -- --quiet 2>/dev/null; then + echo "{\"error\": \"failed to clone $REPO\"}" >&2 + exit 1 +fi + +cd "$TARGET" + +# --- Create feature branch --- +if [ -z "$BRANCH" ]; then + BRANCH="forge/$(date +%Y%m%d)-$(openssl rand -hex 3)" +fi + +# Refuse to stay on main/master +git checkout -b "$BRANCH" --quiet + +# --- Configure git user at repo level --- +git config user.email "266392669+useforgeai@users.noreply.github.com" +git config user.name "Forge Agent" + +jq -n \ + --arg status "cloned" \ + --arg repo "$REPO" \ + --arg branch "$BRANCH" \ + --arg project_dir "$PROJECT_DIR" \ + '{status: $status, repo: $repo, branch: $branch, project_dir: $project_dir}' diff --git a/forge-skills/local/embedded/github/scripts/github-commit.sh b/forge-skills/local/embedded/github/scripts/github-commit.sh new file mode 100755 index 0000000..d46f130 --- /dev/null +++ b/forge-skills/local/embedded/github/scripts/github-commit.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# github-commit.sh — Stage and commit changes on a feature branch. +# Usage: ./github-commit.sh '{"project_dir": "my-app", "message": "fix: resolve login bug", "files": ["src/auth.go"]}' +# +# Requires: git, jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: github-commit.sh {\"project_dir\": \"...\", \"message\": \"...\", \"files\": [...]}"}' >&2 + exit 1 +fi +if ! printf '%s' "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +PROJECT_DIR=$(printf '%s' "$INPUT" | jq -r '.project_dir // empty') +MESSAGE=$(printf '%s' "$INPUT" | jq -r '.message // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi +if [ -z "$MESSAGE" ]; then + echo '{"error": "message is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$PROJECT_DIR" in + /*|*..*) + echo '{"error": "project_dir must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve workspace --- +# Strip workspace/ prefix if present (avoids double-prefix) +PROJECT_DIR="${PROJECT_DIR#workspace/}" +WORKSPACE="$(pwd)/workspace" +TARGET="$WORKSPACE/$PROJECT_DIR" + +if [ ! -d "$TARGET/.git" ]; then + echo "{\"error\": \"not a git repository: workspace/$PROJECT_DIR\"}" >&2 + exit 1 +fi + +cd "$TARGET" + +# --- Protected branch guard --- +BRANCH=$(git branch --show-current) +case "$BRANCH" in + main|master) + echo '{"error": "refusing to commit on protected branch: '"$BRANCH"'. Create or switch to a feature branch first."}' >&2 + exit 1 + ;; +esac + +# --- Configure git user (repo-level, idempotent) --- +git config user.email "266392669+useforgeai@users.noreply.github.com" +git config user.name "Forge Agent" + +# --- Stage files --- +# Normalize: accept files as a string (single file, space-separated, or newline-separated) or array. +# The LLM sometimes passes "files": "path" or "files": "a b c" instead of "files": ["a","b","c"]. +NORMALIZED_INPUT=$(printf '%s' "$INPUT" | jq ' + if (.files | type) == "string" then + .files = (.files | split("\n") | map(split(" ")) | flatten | map(select(length > 0))) + else . end') +HAS_FILES=$(printf '%s' "$NORMALIZED_INPUT" | jq 'has("files") and (.files | length > 0)') +if [ "$HAS_FILES" = "true" ]; then + # Stage specific files + while IFS= read -r file; do + # Validate each file path + case "$file" in + /*|*..*) + echo "{\"error\": \"file path must be relative and must not contain ..: $file\"}" >&2 + exit 1 + ;; + esac + git add -- "$file" + done < <(printf '%s' "$NORMALIZED_INPUT" | jq -r '.files[]') +else + # Stage all changes + git add -A +fi + +# --- Check for staged changes --- +if git diff --cached --quiet; then + echo '{"error": "no changes staged to commit"}' >&2 + exit 1 +fi + +# --- Append co-authored-by trailer --- +FULL_MESSAGE="$MESSAGE + +Co-authored-by: Forge Agent <266392669+useforgeai@users.noreply.github.com>" + +# --- Commit --- +git commit -m "$FULL_MESSAGE" --quiet + +SHA=$(git rev-parse --short HEAD) +FILES_CHANGED=$(git diff-tree --no-commit-id --name-only -r HEAD | wc -l | tr -d ' ') + +jq -n \ + --arg sha "$SHA" \ + --arg branch "$BRANCH" \ + --arg files_changed "$FILES_CHANGED" \ + '{sha: $sha, branch: $branch, files_changed: ($files_changed | tonumber)}' diff --git a/forge-skills/local/embedded/github/scripts/github-create-pr.sh b/forge-skills/local/embedded/github/scripts/github-create-pr.sh new file mode 100755 index 0000000..0e7d71e --- /dev/null +++ b/forge-skills/local/embedded/github/scripts/github-create-pr.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# github-create-pr.sh — Create a pull request on GitHub. +# Usage: ./github-create-pr.sh '{"repo": "owner/repo", "title": "Fix bug", "body": "Description", "head": "feat/branch", "base": "main"}' +# +# Requires: gh, git, jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: github-create-pr.sh {\"repo\": \"...\", \"title\": \"...\", \"body\": \"...\", \"head\": \"...\", \"base\": \"...\"}"}' >&2 + exit 1 +fi +if ! printf '%s' "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +REPO=$(printf '%s' "$INPUT" | jq -r '.repo // empty') +TITLE=$(printf '%s' "$INPUT" | jq -r '.title // empty') +BODY=$(printf '%s' "$INPUT" | jq -r '.body // empty') +HEAD=$(printf '%s' "$INPUT" | jq -r '.head // empty') +BASE=$(printf '%s' "$INPUT" | jq -r '.base // empty') + +if [ -z "$REPO" ]; then + echo '{"error": "repo is required"}' >&2 + exit 1 +fi +if [ -z "$TITLE" ]; then + echo '{"error": "title is required"}' >&2 + exit 1 +fi +if [ -z "$HEAD" ]; then + echo '{"error": "head branch is required"}' >&2 + exit 1 +fi + +# --- Normalize repo format --- +# Convert SSH URL: git@github.com:owner/repo.git → owner/repo +if [[ "$REPO" == git@github.com:* ]]; then + REPO="${REPO#git@github.com:}" + REPO="${REPO%.git}" +fi +# Convert HTTPS URL: https://github.com/owner/repo.git → owner/repo +if [[ "$REPO" == https://github.com/* ]]; then + REPO="${REPO#https://github.com/}" + REPO="${REPO%.git}" +fi + +# Default base branch to main +if [ -z "$BASE" ]; then + BASE="main" +fi + +# --- Create PR via gh CLI --- +PR_URL=$(gh pr create \ + --repo "$REPO" \ + --title "$TITLE" \ + --body "$BODY" \ + --head "$HEAD" \ + --base "$BASE" 2>&1) + +if [ $? -ne 0 ]; then + echo "{\"error\": \"failed to create PR: $PR_URL\"}" >&2 + exit 1 +fi + +jq -n \ + --arg url "$PR_URL" \ + --arg repo "$REPO" \ + --arg head "$HEAD" \ + --arg base "$BASE" \ + '{status: "created", url: $url, repo: $repo, head: $head, base: $base}' diff --git a/forge-skills/local/embedded/github/scripts/github-push.sh b/forge-skills/local/embedded/github/scripts/github-push.sh new file mode 100755 index 0000000..c2a8ae9 --- /dev/null +++ b/forge-skills/local/embedded/github/scripts/github-push.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# github-push.sh — Push a feature branch to the remote. +# Usage: ./github-push.sh '{"project_dir": "my-app", "branch": "feat/my-change"}' +# +# Requires: git, jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: github-push.sh {\"project_dir\": \"...\", \"branch\": \"...\"}"}' >&2 + exit 1 +fi +if ! printf '%s' "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +PROJECT_DIR=$(printf '%s' "$INPUT" | jq -r '.project_dir // empty') +BRANCH=$(printf '%s' "$INPUT" | jq -r '.branch // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$PROJECT_DIR" in + /*|*..*) + echo '{"error": "project_dir must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve workspace --- +# Strip workspace/ prefix if present (avoids double-prefix) +PROJECT_DIR="${PROJECT_DIR#workspace/}" +WORKSPACE="$(pwd)/workspace" +TARGET="$WORKSPACE/$PROJECT_DIR" + +if [ ! -d "$TARGET/.git" ]; then + echo "{\"error\": \"not a git repository: workspace/$PROJECT_DIR\"}" >&2 + exit 1 +fi + +cd "$TARGET" + +# Default to current branch +if [ -z "$BRANCH" ]; then + BRANCH=$(git branch --show-current) +fi + +# --- Protected branch guard --- +case "$BRANCH" in + main|master) + echo '{"error": "refusing to push to protected branch: '"$BRANCH"'. Use a feature branch instead."}' >&2 + exit 1 + ;; +esac + +# --- Configure git credential helper for GH_TOKEN --- +if [ -n "${GH_TOKEN:-}" ]; then + git -c credential.helper="!f() { echo \"protocol=https\"; echo \"host=github.com\"; echo \"username=x-access-token\"; echo \"password=$GH_TOKEN\"; }; f" \ + push -u origin "$BRANCH" --quiet 2>&1 +else + git push -u origin "$BRANCH" --quiet 2>&1 +fi + +SHA=$(git rev-parse --short HEAD) +REMOTE=$(git remote get-url origin 2>/dev/null || echo "origin") + +jq -n \ + --arg status "pushed" \ + --arg branch "$BRANCH" \ + --arg sha "$SHA" \ + --arg remote "$REMOTE" \ + '{status: $status, branch: $branch, sha: $sha, remote: $remote}' diff --git a/forge-skills/local/embedded/github/scripts/github-status.sh b/forge-skills/local/embedded/github/scripts/github-status.sh new file mode 100755 index 0000000..1da0298 --- /dev/null +++ b/forge-skills/local/embedded/github/scripts/github-status.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# github-status.sh — Show git status for a project. +# Usage: ./github-status.sh '{"project_dir": "my-app"}' +# +# Requires: git, jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: github-status.sh {\"project_dir\": \"...\"}"}' >&2 + exit 1 +fi +if ! printf '%s' "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +PROJECT_DIR=$(printf '%s' "$INPUT" | jq -r '.project_dir // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$PROJECT_DIR" in + /*|*..*) + echo '{"error": "project_dir must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve workspace --- +# Strip workspace/ prefix if present (avoids double-prefix) +PROJECT_DIR="${PROJECT_DIR#workspace/}" +WORKSPACE="$(pwd)/workspace" +TARGET="$WORKSPACE/$PROJECT_DIR" + +if [ ! -d "$TARGET/.git" ]; then + echo "{\"error\": \"not a git repository: workspace/$PROJECT_DIR\"}" >&2 + exit 1 +fi + +cd "$TARGET" + +# --- Gather status --- +BRANCH=$(git branch --show-current) + +# Modified (unstaged) +MODIFIED=$(git diff --name-only 2>/dev/null | jq -R -s 'split("\n") | map(select(. != ""))') + +# Staged +STAGED=$(git diff --cached --name-only 2>/dev/null | jq -R -s 'split("\n") | map(select(. != ""))') + +# Untracked +UNTRACKED=$(git ls-files --others --exclude-standard 2>/dev/null | jq -R -s 'split("\n") | map(select(. != ""))') + +# Ahead/behind upstream (if tracking branch exists) +AHEAD=0 +BEHIND=0 +if git rev-parse --abbrev-ref '@{upstream}' >/dev/null 2>&1; then + AHEAD=$(git rev-list --count '@{upstream}..HEAD' 2>/dev/null || echo 0) + BEHIND=$(git rev-list --count 'HEAD..@{upstream}' 2>/dev/null || echo 0) +fi + +jq -n \ + --arg branch "$BRANCH" \ + --argjson modified "$MODIFIED" \ + --argjson staged "$STAGED" \ + --argjson untracked "$UNTRACKED" \ + --arg ahead "$AHEAD" \ + --arg behind "$BEHIND" \ + '{branch: $branch, modified: $modified, staged: $staged, untracked: $untracked, ahead: ($ahead | tonumber), behind: ($behind | tonumber)}' diff --git a/forge-skills/local/registry_embedded_test.go b/forge-skills/local/registry_embedded_test.go index 7247e00..0bf0f5f 100644 --- a/forge-skills/local/registry_embedded_test.go +++ b/forge-skills/local/registry_embedded_test.go @@ -16,12 +16,12 @@ func TestEmbeddedRegistry_DiscoverAll(t *testing.T) { t.Fatalf("List error: %v", err) } - if len(skills) != 12 { + if len(skills) != 13 { names := make([]string, len(skills)) for i, s := range skills { names[i] = s.Name } - t.Fatalf("expected 12 skills, got %d: %v", len(skills), names) + t.Fatalf("expected 13 skills, got %d: %v", len(skills), names) } // Verify all expected skills are present @@ -31,7 +31,8 @@ func TestEmbeddedRegistry_DiscoverAll(t *testing.T) { hasBins bool hasEgress bool }{ - "github": {displayName: "Github", hasEnv: true, hasBins: true, hasEgress: true}, + "code-agent": {displayName: "Code Agent", hasEnv: false, hasBins: false, hasEgress: false}, + "github": {displayName: "Github", hasEnv: false, hasBins: true, hasEgress: true}, "weather": {displayName: "Weather", hasEnv: false, hasBins: true, hasEgress: true}, "tavily-search": {displayName: "Tavily Search", hasEnv: true, hasBins: true, hasEgress: true}, "tavily-research": {displayName: "Tavily Research", hasEnv: true, hasBins: true, hasEgress: true}, @@ -42,7 +43,7 @@ func TestEmbeddedRegistry_DiscoverAll(t *testing.T) { "codegen-react": {displayName: "Codegen React", hasEnv: false, hasBins: true, hasEgress: true}, "codegen-html": {displayName: "Codegen Html", hasEnv: false, hasBins: true, hasEgress: true}, "k8s-pod-rightsizer": {displayName: "K8s Pod Rightsizer", hasEnv: false, hasBins: true, hasEgress: false}, - "k8s-cost-visibility": {displayName: "K8s Cost Visibility", hasEnv: false, hasBins: true, hasEgress: true}, + "k8s-cost-visibility": {displayName: "K8s Cost Visibility", hasEnv: false, hasBins: true, hasEgress: true}, } for _, s := range skills { @@ -79,17 +80,18 @@ func TestEmbeddedRegistry_GitHubDetails(t *testing.T) { if s == nil { t.Fatal("Get(\"github\") returned nil") } - if s.Description != "Create issues, PRs, and query repositories" { + if s.Description != "Create issues, PRs, clone repos, and manage git workflows" { t.Errorf("Description = %q", s.Description) } if s.Icon != "🐙" { t.Errorf("Icon = %q, want 🐙", s.Icon) } - if len(s.RequiredEnv) != 1 || s.RequiredEnv[0] != "GH_TOKEN" { - t.Errorf("RequiredEnv = %v", s.RequiredEnv) + if len(s.RequiredEnv) != 0 { + t.Errorf("RequiredEnv = %v, want empty (GH_TOKEN is optional)", s.RequiredEnv) } - if len(s.RequiredBins) != 1 || s.RequiredBins[0] != "gh" { - t.Errorf("RequiredBins = %v", s.RequiredBins) + expectedBins := []string{"gh", "git", "jq"} + if len(s.RequiredBins) != len(expectedBins) { + t.Errorf("RequiredBins = %v, want %v", s.RequiredBins, expectedBins) } foundDomain := false diff --git a/forge-skills/requirements/derive.go b/forge-skills/requirements/derive.go index be32a23..50d6603 100644 --- a/forge-skills/requirements/derive.go +++ b/forge-skills/requirements/derive.go @@ -6,8 +6,16 @@ import ( "github.com/initializ/forge/forge-skills/contract" ) +// deniedShells lists shell interpreters that must never appear in the +// cli_execute allowlist. Shells bypass the no-shell exec.Command security +// model, so they are excluded even when a skill declares them in requires.bins. +var deniedShells = map[string]bool{ + "bash": true, "sh": true, "zsh": true, "dash": true, + "ksh": true, "csh": true, "tcsh": true, "fish": true, +} + // DeriveCLIConfig produces cli_execute configuration from aggregated requirements. -// AllowedBinaries = reqs.Bins, EnvPassthrough = union of all env vars. +// AllowedBinaries = reqs.Bins (minus shell interpreters), EnvPassthrough = union of all env vars. func DeriveCLIConfig(reqs *contract.AggregatedRequirements) *contract.DerivedCLIConfig { if reqs == nil { return &contract.DerivedCLIConfig{} @@ -35,12 +43,23 @@ func DeriveCLIConfig(reqs *contract.AggregatedRequirements) *contract.DerivedCLI sort.Strings(envPass) } + // Filter out shell interpreters — they are blocked by cli_execute anyway + // but including them confuses the LLM (they appear in the enum/description + // yet always fail, causing the LLM to attempt shell commands via cli_execute). + var bins []string + for _, b := range reqs.Bins { + if !deniedShells[b] { + bins = append(bins, b) + } + } + return &contract.DerivedCLIConfig{ - AllowedBinaries: reqs.Bins, // already sorted from AggregateRequirements + AllowedBinaries: bins, EnvPassthrough: envPass, TimeoutHint: reqs.MaxTimeoutHint, - DeniedTools: reqs.DeniedTools, // already sorted from AggregateRequirements - EgressDomains: reqs.EgressDomains, // already sorted from AggregateRequirements + DeniedTools: reqs.DeniedTools, // already sorted from AggregateRequirements + EgressDomains: reqs.EgressDomains, // already sorted from AggregateRequirements + WorkflowPhases: reqs.WorkflowPhases, // already sorted from AggregateRequirements } } diff --git a/forge-skills/requirements/derive_test.go b/forge-skills/requirements/derive_test.go index bbf9c52..f4e8ce6 100644 --- a/forge-skills/requirements/derive_test.go +++ b/forge-skills/requirements/derive_test.go @@ -36,6 +36,40 @@ func TestDerive_Basic(t *testing.T) { } } +func TestDerive_FiltersShellInterpreters(t *testing.T) { + reqs := &contract.AggregatedRequirements{ + Bins: []string{"bash", "curl", "gh", "jq", "sh", "zsh"}, + } + + cfg := DeriveCLIConfig(reqs) + + // bash, sh, zsh should be filtered out + expected := []string{"curl", "gh", "jq"} + if len(cfg.AllowedBinaries) != len(expected) { + t.Fatalf("AllowedBinaries = %v, want %v", cfg.AllowedBinaries, expected) + } + for i, v := range expected { + if cfg.AllowedBinaries[i] != v { + t.Errorf("AllowedBinaries[%d] = %q, want %q", i, cfg.AllowedBinaries[i], v) + } + } +} + +func TestDerive_WorkflowPhasesPassthrough(t *testing.T) { + reqs := &contract.AggregatedRequirements{ + WorkflowPhases: []string{"edit", "finalize"}, + } + + cfg := DeriveCLIConfig(reqs) + + if len(cfg.WorkflowPhases) != 2 { + t.Fatalf("WorkflowPhases = %v, want 2 items", cfg.WorkflowPhases) + } + if cfg.WorkflowPhases[0] != "edit" || cfg.WorkflowPhases[1] != "finalize" { + t.Errorf("WorkflowPhases = %v, want [edit finalize]", cfg.WorkflowPhases) + } +} + func TestMerge_ExplicitOverrides(t *testing.T) { explicit := &contract.DerivedCLIConfig{ AllowedBinaries: []string{"python"}, diff --git a/forge-skills/requirements/requirements.go b/forge-skills/requirements/requirements.go index 5ee21a9..ae2b83a 100644 --- a/forge-skills/requirements/requirements.go +++ b/forge-skills/requirements/requirements.go @@ -19,6 +19,7 @@ func AggregateRequirements(entries []contract.SkillEntry) *contract.AggregatedRe optSet := make(map[string]bool) deniedSet := make(map[string]bool) egressSet := make(map[string]bool) + phaseSet := make(map[string]bool) var oneOfGroups [][]string for _, e := range entries { @@ -43,6 +44,11 @@ func AggregateRequirements(entries []contract.SkillEntry) *contract.AggregatedRe } } } + if raw, ok := forgeMap["workflow_phase"]; ok { + if s, ok := raw.(string); ok && s != "" { + phaseSet[s] = true + } + } } } @@ -73,10 +79,11 @@ func AggregateRequirements(entries []contract.SkillEntry) *contract.AggregatedRe } agg := &contract.AggregatedRequirements{ - Bins: sortedKeys(binSet), - EnvOneOf: oneOfGroups, - DeniedTools: sortedKeys(deniedSet), - EgressDomains: sortedKeys(egressSet), + Bins: sortedKeys(binSet), + EnvOneOf: oneOfGroups, + DeniedTools: sortedKeys(deniedSet), + EgressDomains: sortedKeys(egressSet), + WorkflowPhases: sortedKeys(phaseSet), } agg.EnvRequired = sortedKeys(reqSet) agg.EnvOptional = sortedKeys(optSet) diff --git a/forge-skills/requirements/requirements_test.go b/forge-skills/requirements/requirements_test.go index c0d21e6..00db3b4 100644 --- a/forge-skills/requirements/requirements_test.go +++ b/forge-skills/requirements/requirements_test.go @@ -160,6 +160,54 @@ func TestAggregate_DeniedToolsCollected(t *testing.T) { } } +func TestAggregate_WorkflowPhaseCollected(t *testing.T) { + entries := []contract.SkillEntry{ + { + Name: "code-agent", + Metadata: &contract.SkillMetadata{ + Metadata: map[string]map[string]any{ + "forge": { + "workflow_phase": "edit", + }, + }, + }, + }, + { + Name: "github", + Metadata: &contract.SkillMetadata{ + Metadata: map[string]map[string]any{ + "forge": { + "workflow_phase": "finalize", + }, + }, + }, + }, + { + Name: "k8s-cost", + // No workflow_phase — should not contribute + Metadata: &contract.SkillMetadata{ + Metadata: map[string]map[string]any{ + "forge": { + "requires": map[string]any{}, + }, + }, + }, + }, + } + + reqs := AggregateRequirements(entries) + // Should be deduplicated and sorted: edit, finalize + if len(reqs.WorkflowPhases) != 2 { + t.Fatalf("WorkflowPhases = %v, want 2 items", reqs.WorkflowPhases) + } + expected := []string{"edit", "finalize"} + for i, v := range expected { + if reqs.WorkflowPhases[i] != v { + t.Errorf("WorkflowPhases[%d] = %q, want %q", i, reqs.WorkflowPhases[i], v) + } + } +} + func TestAggregate_NoRequirements(t *testing.T) { entries := []contract.SkillEntry{ {Name: "a"}, diff --git a/forge-ui/handlers_create.go b/forge-ui/handlers_create.go index 62fcd60..781bc36 100644 --- a/forge-ui/handlers_create.go +++ b/forge-ui/handlers_create.go @@ -29,20 +29,21 @@ func (s *UIServer) handleGetWizardMeta(w http.ResponseWriter, _ *http.Request) { // Per-provider model lists meta.ProviderModels = map[string]ProviderModels{ "openai": { - Default: "gpt-5.2-2025-12-11", + Default: "gpt-5.4", NeedsKey: true, HasOAuth: true, SupportsOrgID: true, APIKey: []ModelOption{ - {DisplayName: "GPT 5.2", ModelID: "gpt-5.2-2025-12-11"}, - {DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini-2025-08-07"}, - {DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano-2025-08-07"}, - {DisplayName: "GPT 4.1 Mini", ModelID: "gpt-4.1-mini-2025-04-14"}, + {DisplayName: "GPT 5.4", ModelID: "gpt-5.4"}, + {DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini"}, + {DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano"}, + {DisplayName: "GPT 4.1", ModelID: "gpt-4.1"}, }, OAuth: []ModelOption{ - {DisplayName: "GPT 5.3 Codex", ModelID: "gpt-5.3-codex"}, - {DisplayName: "GPT 5.2", ModelID: "gpt-5.2-2025-12-11"}, - {DisplayName: "GPT 5.2 Codex", ModelID: "gpt-5.2-codex"}, + {DisplayName: "GPT 5.4", ModelID: "gpt-5.4"}, + {DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini"}, + {DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano"}, + {DisplayName: "GPT 4.1", ModelID: "gpt-4.1"}, }, }, "anthropic": { diff --git a/skills/code-agent/SKILL.md b/skills/code-agent/SKILL.md new file mode 100644 index 0000000..e716a35 --- /dev/null +++ b/skills/code-agent/SKILL.md @@ -0,0 +1,279 @@ +--- +name: code-agent +icon: 💻 +category: developer +tags: + - coding + - development + - debugging + - refactoring +description: General-purpose coding agent that reads, writes, and edits code, and searches codebases. +metadata: + forge: + requires: + bins: + - bash + - jq + env: + required: [] + one_of: [] + optional: [] + egress_domains: + # Node.js / npm + - registry.npmjs.org + # Tailwind CSS CDN (used by scaffold templates) + - cdn.tailwindcss.com + # Python / pip + - pypi.org + - files.pythonhosted.org + # Go modules + - proxy.golang.org + - sum.golang.org + - storage.googleapis.com + # Maven Central (Spring Boot) + - repo.maven.apache.org + - repo1.maven.org + denied_tools: + - bash_execute + - file_write + - file_edit + - file_patch + - file_read + - schedule_set + - schedule_delete + - schedule_list + - schedule_history + timeout_hint: 120 +--- + +# Code Agent + +You are an autonomous coding agent. You EXECUTE — you do NOT describe, plan, or ask. + +## ABSOLUTE RULES (DO NOT VIOLATE) + +1. **Every response MUST include tool calls.** A response with only text is a failure. If you have something to say, say it AND call tools in the same response. + +2. **NEVER say "I'll do X now" without doing X.** No planning text. No "Let me patch that." JUST DO IT — call the tools. + +3. **NEVER ask for confirmation.** Do not ask "Should I proceed?" or "Would you like me to...?" — just act. + +4. **NEVER output code in markdown blocks.** You have file tools. Use them. + +5. **Complete the ENTIRE request in ONE turn.** Scaffold + write all files + run — all in a single response. + +6. **ONE project per app. NEVER create multiple projects for a single application.** Full-stack apps use ONE project where the backend serves the frontend. + +7. **NEVER scaffold over existing code.** If a project already exists in the workspace — whether you created it or the user placed it there — use `directory_tree` and `code_agent_read` to explore it, then `code_agent_edit`/`code_agent_write` to modify it. Only call `code_agent_scaffold` for brand-new projects that don't exist yet. + +## Iteration Rules (CRITICAL) + +When continuing a conversation about an existing project: +- **DO NOT create a new project.** The project already exists — modify it in place. +- **DO NOT call `code_agent_scaffold`.** The skeleton already exists. +- Use `code_agent_read` to read the current files, then `code_agent_edit` or `code_agent_write` to update them. +- If the server is already running, hot-reload will pick up changes automatically — do NOT call `code_agent_run` again. +- If the user asks to switch frameworks (e.g., "add a Go backend"), rewrite files in the SAME project directory. Do NOT create a second project. + +## Full-Stack Architecture (CRITICAL) + +Every backend framework scaffold includes a `static/` directory for frontend files. The backend serves both API routes AND the frontend UI. + +**NEVER create separate projects for frontend and backend.** Use ONE project: + +| Framework | Frontend Location | API Prefix | How It Works | +|-----------|------------------|------------|--------------| +| `node` | `public/` | `/api/` | Express serves static files from `public/` | +| `python` | `static/` | `/api/` | FastAPI mounts `StaticFiles` from `static/` | +| `golang` | `static/` | `/api/` | Gin serves `static/` directory | +| `spring-boot` | `src/main/resources/static/` | `/api/` | Spring Boot auto-serves from resources/static | + +For full-stack apps: +1. Scaffold with the backend framework +2. Write API routes in the backend code +3. Write HTML/JS/CSS in the frontend location above +4. Frontend JS fetches from `/api/...` endpoints +5. ONE `code_agent_run` starts everything + +## One-Shot Workflow + +### New Project (nothing exists yet) +``` +1. code_agent_scaffold → create skeleton +2. code_agent_write → write ALL source files (call multiple times) +3. code_agent_run → install deps + start server + open browser +4. Brief summary + URL +``` + +### Existing Codebase (first time seeing it) +When the user asks you to work on code that already exists in the workspace: +``` +1. directory_tree → discover project structure +2. code_agent_read → read key files to understand the codebase +3. code_agent_edit → apply changes (or code_agent_write for new files) +4. code_agent_run → start the server if not already running +5. Brief summary of changes +``` +**NEVER scaffold over existing code.** Explore it first, then modify in place. + +### Modify Existing Project (continuing conversation) +``` +1. code_agent_read → read file(s) to change +2. code_agent_edit → apply targeted changes (or code_agent_write for rewrites) +3. Brief summary of changes +``` +Do NOT call `code_agent_run` again if the server is already running — hot-reload handles it. + +Do NOT stop after step 1. Complete ALL steps in ONE response. + +## Tool Reference + +| Tool | When to Use | +|------|-------------| +| `code_agent_scaffold` | Bootstrap a NEW project only (never for existing projects) | +| `code_agent_write` | Create or overwrite files | +| `code_agent_edit` | Surgical text replacement in existing files | +| `code_agent_read` | Read a file or list directory | +| `code_agent_run` | Install deps + start server + open browser (call once) | +| `grep_search` | Search file contents by regex | +| `glob_search` | Find files by name pattern | +| `directory_tree` | Show project directory tree | + +### Rules + +- All `project_dir` values are relative names (e.g., `my-app`), NOT absolute paths +- All `file_path` values are relative to `project_dir` (e.g., `src/main.jsx`) +- For frontend frameworks (react, vue, vanilla): only modify files under `src/` — never modify `src/main.jsx` +- Use Tailwind CSS utility classes for styling (loaded via CDN) + +## Scaffold Conventions (DO NOT VIOLATE) + +These rules prevent build errors: + +1. **NEVER modify `src/main.jsx`** (React/Vue) — it is the entry point +2. **ALWAYS use named exports**: `export function ComponentName() {}`, NEVER `export default` +3. **Use Tailwind CSS classes** for all styling — the CDN is pre-loaded +4. Only modify `src/App.jsx` (or `src/App.vue`) and create new component files under `src/` + +## Safety + +- All file operations are confined to the project directory. Path traversal is blocked. +- Read files before editing to avoid mistakes. +- Do not create git commits unless explicitly asked. + +## Tool: code_agent_scaffold + +Bootstrap a new project skeleton. ONLY for new projects — never call on existing ones. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_name | string | yes | Project directory name (e.g., `my-app`) | +| framework | string | yes | One of: `react`, `vue`, `vanilla`, `node`, `python`, `golang`, `spring-boot` | +| title | string | no | Display title (defaults to project_name) | +| force | boolean | no | Overwrite existing project (default: false) | + +**Frameworks:** + +| Framework | Stack | Port | Frontend Dir | +|-----------|-------|------|-------------| +| `react` | Vite + React 19 + Tailwind | 5173 | `src/` | +| `vue` | Vite + Vue 3 + Tailwind | 5173 | `src/` | +| `vanilla` | Vite + vanilla JS + Tailwind | 5173 | `src/` | +| `node` | Express.js | 3000 | `public/` | +| `python` | FastAPI + uvicorn | 8000 | `static/` | +| `golang` | Go + Gin | 8080 | `static/` | +| `spring-boot` | Spring Boot + Maven | 8080 | `src/main/resources/static/` | + +**Output:** + +```json +{ + "status": "created", + "project_name": "my-app", + "framework": "react", + "project_dir": "/path/to/workspace/my-app", + "files": ["package.json", "vite.config.js", "index.html", "src/main.jsx", "src/App.jsx", ".gitignore"] +} +``` + +## Tool: code_agent_write + +Write or update a file. Creates directories automatically. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_dir | string | yes | Project directory name | +| file_path | string | yes | Relative path (e.g., `src/App.jsx`) | +| content | string | yes | Complete file content | + +**Output:** + +```json +{"path": "src/App.jsx", "action": "created", "size": 312} +``` + +## Tool: code_agent_read + +Read a file or list directory contents. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_dir | string | yes | Project directory name | +| file_path | string | yes | Relative path, or `"."` for directory listing | + +**Output (file):** + +```json +{"path": "src/App.jsx", "content": "...", "size": 245, "modified": "2025-01-15T10:30:00Z"} +``` + +**Output (directory):** + +```json +{"path": ".", "type": "directory", "files": ["package.json", "src/App.jsx"]} +``` + +## Tool: code_agent_edit + +Surgical text replacement. `old_text` must match exactly once. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_dir | string | yes | Project directory name | +| file_path | string | yes | Relative path | +| old_text | string | yes | Exact text to find (must match once) | +| new_text | string | yes | Replacement text | + +**Output:** + +```json +{"path": "src/App.jsx", "action": "edited", "size": 320, "diff": "..."} +``` + +## Tool: code_agent_run + +Install deps, start server, open browser. Auto-detects project type. + +Call **once** after writing all files. Server stays running — hot-reload handles changes. + +**Input:** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| project_dir | string | yes | Project directory name | + +**Output:** + +```json +{"status": "running", "url": "http://localhost:3000", "pid": 12345, "project_dir": "/path/to/my-app", "install": "installed", "type": "node", "command": "npm run dev"} +``` + +Supported: Node.js (package.json), Python (requirements.txt), Go (go.mod), Spring Boot (pom.xml), static HTML (index.html). diff --git a/skills/code-agent/scripts/code-agent-edit.sh b/skills/code-agent/scripts/code-agent-edit.sh new file mode 100755 index 0000000..be32180 --- /dev/null +++ b/skills/code-agent/scripts/code-agent-edit.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +# code-agent-edit.sh — Surgical string replacement in a project file. +# Usage: ./code-agent-edit.sh '{"project_dir": "my-app", "file_path": "src/App.jsx", "old_text": "Count: 0", "new_text": "Clicks: 0"}' +# +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-edit.sh {\"project_dir\": \"...\", \"file_path\": \"...\", \"old_text\": \"...\", \"new_text\": \"...\"}"}' >&2 + exit 1 +fi +if ! echo "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +PROJECT_DIR=$(echo "$INPUT" | jq -r '.project_dir // empty') +FILE_PATH=$(echo "$INPUT" | jq -r '.file_path // empty') +OLD_TEXT=$(echo "$INPUT" | jq -r '.old_text // empty') +NEW_TEXT=$(echo "$INPUT" | jq -r '.new_text // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi +if [ -z "$FILE_PATH" ]; then + echo '{"error": "file_path is required"}' >&2 + exit 1 +fi +if [ -z "$OLD_TEXT" ]; then + echo '{"error": "old_text is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$FILE_PATH" in + /*|*..*) + echo '{"error": "file_path must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve project_dir --- +if [ "${PROJECT_DIR:0:1}" != "/" ]; then + PROJECT_DIR="$(pwd)/workspace/$PROJECT_DIR" +fi + +if [ ! -d "$PROJECT_DIR" ]; then + echo "{\"error\": \"project directory not found: $PROJECT_DIR\"}" >&2 + exit 1 +fi + +RESOLVED_PROJECT=$(cd "$PROJECT_DIR" && pwd) +FULL_PATH="$RESOLVED_PROJECT/$FILE_PATH" + +# Verify path stays within project +RESOLVED_FULL=$(cd "$(dirname "$FULL_PATH")" 2>/dev/null && pwd)/$(basename "$FULL_PATH") 2>/dev/null || true +case "$RESOLVED_FULL" in + "$RESOLVED_PROJECT"/*) + ;; + *) + echo '{"error": "file_path resolves outside project_dir"}' >&2 + exit 1 + ;; +esac + +if [ ! -f "$FULL_PATH" ]; then + echo '{"error": "file not found: '"$FILE_PATH"'"}' >&2 + exit 1 +fi + +# --- Read original file --- +ORIGINAL=$(cat "$FULL_PATH") + +# --- Count occurrences of old_text --- +# Use awk for reliable substring counting (handles special chars better than grep) +COUNT=$(awk -v pat="$OLD_TEXT" 'BEGIN{c=0} {while(i=index($0,pat)){c++;$0=substr($0,i+length(pat))}} END{print c}' "$FULL_PATH") + +if [ "$COUNT" -eq 0 ]; then + echo '{"error": "old_text not found in file"}' >&2 + exit 1 +fi +if [ "$COUNT" -gt 1 ]; then + jq -n --arg count "$COUNT" \ + '{error: "old_text found multiple times — be more specific to match exactly once", occurrences: ($count | tonumber)}' >&2 + exit 1 +fi + +# --- Perform replacement --- +# Use python for reliable multi-line string replacement (avoids sed escaping issues) +python3 -c " +import sys, json +inp = json.loads(sys.argv[1]) +with open(sys.argv[2], 'r') as f: + content = f.read() +content = content.replace(inp['old_text'], inp['new_text'], 1) +with open(sys.argv[2], 'w') as f: + f.write(content) +" "$INPUT" "$FULL_PATH" + +# --- Generate diff --- +DIFF=$(diff -u <(echo "$ORIGINAL") <(cat "$FULL_PATH") 2>/dev/null || true) +MODIFIED_SIZE=$(wc -c < "$FULL_PATH" | tr -d ' ') + +jq -n \ + --arg path "$FILE_PATH" \ + --arg action "edited" \ + --arg size "$MODIFIED_SIZE" \ + --arg diff "$DIFF" \ + '{path: $path, action: $action, size: ($size | tonumber), diff: $diff}' diff --git a/skills/code-agent/scripts/code-agent-read.sh b/skills/code-agent/scripts/code-agent-read.sh new file mode 100755 index 0000000..608b10b --- /dev/null +++ b/skills/code-agent/scripts/code-agent-read.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# code-agent-read.sh — Read a file or list a project directory. +# Usage: ./code-agent-read.sh '{"project_dir": "my-app", "file_path": "src/App.jsx"}' +# ./code-agent-read.sh '{"project_dir": "my-app", "file_path": "."}' +# +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-read.sh {\"project_dir\": \"...\", \"file_path\": \"...\"}"}' >&2 + exit 1 +fi + +if ! echo "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +# --- Extract fields --- +PROJECT_DIR=$(echo "$INPUT" | jq -r '.project_dir // empty') +FILE_PATH=$(echo "$INPUT" | jq -r '.file_path // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi +if [ -z "$FILE_PATH" ]; then + echo '{"error": "file_path is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$FILE_PATH" in + /*|*..*) + echo '{"error": "file_path must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve project_dir (relative paths resolve within workspace/) --- +if [ "${PROJECT_DIR:0:1}" != "/" ]; then + PROJECT_DIR="$(pwd)/workspace/$PROJECT_DIR" +fi + +if [ ! -d "$PROJECT_DIR" ]; then + echo "{\"error\": \"project directory not found: $PROJECT_DIR\"}" >&2 + exit 1 +fi + +RESOLVED_PROJECT=$(cd "$PROJECT_DIR" && pwd) + +# --- Directory listing --- +if [ "$FILE_PATH" = "." ]; then + FILES=$(cd "$RESOLVED_PROJECT" && find . -type f \ + ! -path './node_modules/*' \ + ! -path './.git/*' \ + ! -path './dist/*' \ + ! -path './__pycache__/*' \ + ! -path './venv/*' \ + ! -path './.venv/*' \ + ! -path './vendor/*' \ + -maxdepth 5 | sed 's|^\./||' | sort) + echo "$FILES" | jq -R -s '{ + path: ".", + type: "directory", + files: (split("\n") | map(select(length > 0))) + }' + exit 0 +fi + +# --- File read --- +FULL_PATH="$RESOLVED_PROJECT/$FILE_PATH" + +# Verify resolved path is still under project dir +RESOLVED_FULL=$(cd "$(dirname "$FULL_PATH")" 2>/dev/null && pwd)/$(basename "$FULL_PATH") 2>/dev/null || true +case "$RESOLVED_FULL" in + "$RESOLVED_PROJECT"/*) + ;; + *) + echo '{"error": "file_path resolves outside project_dir"}' >&2 + exit 1 + ;; +esac + +if [ ! -f "$FULL_PATH" ]; then + echo '{"error": "file not found: '"$FILE_PATH"'"}' >&2 + exit 1 +fi + +CONTENT=$(cat "$FULL_PATH") +SIZE=$(wc -c < "$FULL_PATH" | tr -d ' ') +MODIFIED=$(stat -f '%Sm' -t '%Y-%m-%dT%H:%M:%SZ' "$FULL_PATH" 2>/dev/null || stat --format='%y' "$FULL_PATH" 2>/dev/null || echo "unknown") + +jq -n \ + --arg path "$FILE_PATH" \ + --arg content "$CONTENT" \ + --arg size "$SIZE" \ + --arg modified "$MODIFIED" \ + '{path: $path, content: $content, size: ($size | tonumber), modified: $modified}' diff --git a/skills/code-agent/scripts/code-agent-run.sh b/skills/code-agent/scripts/code-agent-run.sh new file mode 100755 index 0000000..f9524f9 --- /dev/null +++ b/skills/code-agent/scripts/code-agent-run.sh @@ -0,0 +1,323 @@ +#!/usr/bin/env bash +# code-agent-run.sh — Install dependencies and start the dev server. +# Detects project type (Node, Python, Go, Spring Boot, static HTML) automatically. +# Usage: ./code-agent-run.sh '{"project_dir": "my-app"}' +# +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-run.sh {\"project_dir\": \"...\"}"}' >&2 + exit 1 +fi + +if ! echo "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +# --- Extract fields --- +PROJECT_DIR=$(echo "$INPUT" | jq -r '.project_dir // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi + +# --- Resolve path --- +# Relative paths resolve within workspace/ subdirectory (where code-agent file tools operate). +if [ "${PROJECT_DIR:0:1}" != "/" ]; then + PROJECT_DIR="$(pwd)/workspace/$PROJECT_DIR" +fi + +if [ ! -d "$PROJECT_DIR" ]; then + echo "{\"error\": \"project directory not found: $PROJECT_DIR\"}" >&2 + exit 1 +fi + +cd "$PROJECT_DIR" + +# --- Helper: open URL in browser --- +open_browser() { + local url="$1" + case "$(uname -s)" in + Darwin) open "$url" 2>/dev/null || true ;; + Linux) xdg-open "$url" 2>/dev/null || true ;; + esac +} + +# --- Detect project type and run --- + +# ===================== +# Node.js (package.json) +# ===================== +if [ -f "package.json" ]; then + # Install dependencies if needed + INSTALL_STATUS="skipped" + if [ ! -d "node_modules" ]; then + INSTALL_STATUS="installed" + if ! npm install --loglevel=error > .forge-install.log 2>&1; then + INSTALL_ERR=$(tail -5 .forge-install.log 2>/dev/null || echo "unknown error") + jq -n --arg err "npm install failed" --arg details "$INSTALL_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + fi + + # Determine start command + DEV_CMD="npm run dev" + if node -e "const p=require('./package.json'); process.exit(p.scripts && p.scripts.dev ? 0 : 1)" 2>/dev/null; then + DEV_CMD="npm run dev" + elif node -e "const p=require('./package.json'); process.exit(p.scripts && p.scripts.start ? 0 : 1)" 2>/dev/null; then + DEV_CMD="npm start" + else + DEV_CMD="npx vite --open" + fi + + # Start dev server in background + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + nohup $DEV_CMD > "$DEV_LOG" 2>&1 & + DEV_PID=$! + + # Wait for server to start + SERVER_URL="http://localhost:3000" + SERVER_READY=false + for i in 1 2 3 4 5 6 7 8; do + sleep 1 + if ! kill -0 "$DEV_PID" 2>/dev/null; then + DEV_ERR=$(tail -10 "$DEV_LOG" 2>/dev/null || echo "process exited") + jq -n --arg err "dev server failed to start" --arg details "$DEV_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + # Try to extract URL from output (works for Vite, Next.js, CRA, etc.) + ACTUAL_URL=$(grep -oE 'https?://localhost:[0-9]+' "$DEV_LOG" 2>/dev/null | head -1 || true) + if [ -n "$ACTUAL_URL" ]; then + SERVER_URL="$ACTUAL_URL" + SERVER_READY=true + break + fi + done + + # Open browser if server detected a URL, otherwise try to open the default + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "$INSTALL_STATUS" \ + --arg type "node" \ + --arg cmd "$DEV_CMD" \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# ===================== +# Python +# ===================== +if [ -f "requirements.txt" ] || [ -f "pyproject.toml" ] || [ -f "setup.py" ]; then + # Install dependencies + INSTALL_STATUS="skipped" + if [ -f "requirements.txt" ]; then + INSTALL_STATUS="installed" + if ! pip install -r requirements.txt > .forge-install.log 2>&1; then + INSTALL_ERR=$(tail -5 .forge-install.log 2>/dev/null || echo "unknown error") + jq -n --arg err "pip install failed" --arg details "$INSTALL_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + fi + + # Detect entry point + DEV_CMD="" + PORT=8000 + if [ -f "manage.py" ]; then + DEV_CMD="python manage.py runserver 0.0.0.0:$PORT" + elif [ -f "app.py" ]; then + DEV_CMD="python app.py" + PORT=5000 + elif [ -f "main.py" ]; then + DEV_CMD="python main.py" + else + # Fallback: try uvicorn or flask + if grep -q "fastapi\|uvicorn" requirements.txt 2>/dev/null; then + DEV_CMD="uvicorn main:app --reload --port $PORT" + elif grep -q "flask" requirements.txt 2>/dev/null; then + DEV_CMD="flask run --port $PORT" + else + DEV_CMD="python -m http.server $PORT" + fi + fi + + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + nohup $DEV_CMD > "$DEV_LOG" 2>&1 & + DEV_PID=$! + + SERVER_URL="http://localhost:$PORT" + sleep 2 + + if ! kill -0 "$DEV_PID" 2>/dev/null; then + DEV_ERR=$(tail -10 "$DEV_LOG" 2>/dev/null || echo "process exited") + jq -n --arg err "server failed to start" --arg details "$DEV_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "$INSTALL_STATUS" \ + --arg type "python" \ + --arg cmd "$DEV_CMD" \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# ===================== +# Go +# ===================== +if [ -f "go.mod" ]; then + # Download dependencies + INSTALL_STATUS="skipped" + if ! [ -d "vendor" ] && ! go env GOMODCACHE | xargs test -d 2>/dev/null; then + INSTALL_STATUS="installed" + fi + if ! go mod download > .forge-install.log 2>&1; then + INSTALL_ERR=$(tail -5 .forge-install.log 2>/dev/null || echo "unknown error") + jq -n --arg err "go mod download failed" --arg details "$INSTALL_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + nohup go run . > "$DEV_LOG" 2>&1 & + DEV_PID=$! + PORT=8080 + + # Wait for server to start (Go compiles first, may take a few seconds) + SERVER_READY=false + for i in 1 2 3 4 5 6 7 8 9 10; do + sleep 1 + if ! kill -0 "$DEV_PID" 2>/dev/null; then + DEV_ERR=$(tail -10 "$DEV_LOG" 2>/dev/null || echo "process exited") + jq -n --arg err "go run failed" --arg details "$DEV_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + ACTUAL_URL=$(grep -oE 'https?://[^[:space:]]+' "$DEV_LOG" 2>/dev/null | head -1 || true) + if [ -n "$ACTUAL_URL" ]; then + SERVER_READY=true + break + fi + done + + SERVER_URL="${ACTUAL_URL:-http://localhost:$PORT}" + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "$INSTALL_STATUS" \ + --arg type "go" \ + --arg cmd "go run ." \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# ===================== +# Spring Boot (pom.xml) +# ===================== +if [ -f "pom.xml" ]; then + # Determine Maven command + MVN_CMD="mvn" + if [ -f "mvnw" ]; then + chmod +x mvnw + MVN_CMD="./mvnw" + fi + + # Install dependencies + INSTALL_STATUS="installed" + if ! $MVN_CMD dependency:resolve -q > .forge-install.log 2>&1; then + INSTALL_ERR=$(tail -10 .forge-install.log 2>/dev/null || echo "unknown error") + jq -n --arg err "maven dependency install failed" --arg details "$INSTALL_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + DEV_CMD="$MVN_CMD spring-boot:run" + nohup $DEV_CMD > "$DEV_LOG" 2>&1 & + DEV_PID=$! + PORT=8080 + + # Spring Boot takes longer to start — wait up to 30 seconds + SERVER_READY=false + for i in $(seq 1 30); do + sleep 1 + if ! kill -0 "$DEV_PID" 2>/dev/null; then + DEV_ERR=$(tail -15 "$DEV_LOG" 2>/dev/null || echo "process exited") + jq -n --arg err "spring-boot:run failed" --arg details "$DEV_ERR" \ + '{error: $err, details: $details}' >&2 + exit 1 + fi + # Spring Boot logs: "Tomcat started on port 8080" or "Started Application in X seconds" + if grep -qE 'Started \w+ in|Tomcat started on port' "$DEV_LOG" 2>/dev/null; then + SERVER_READY=true + break + fi + done + + ACTUAL_URL=$(grep -oE 'https?://[^[:space:]]+' "$DEV_LOG" 2>/dev/null | head -1 || true) + SERVER_URL="${ACTUAL_URL:-http://localhost:$PORT}" + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "$INSTALL_STATUS" \ + --arg type "spring-boot" \ + --arg cmd "$DEV_CMD" \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# ===================== +# Static HTML (fallback) +# ===================== +if [ -f "index.html" ]; then + PORT=8080 + DEV_LOG="$PROJECT_DIR/.forge-dev.log" + nohup python3 -m http.server "$PORT" > "$DEV_LOG" 2>&1 & + DEV_PID=$! + sleep 1 + + SERVER_URL="http://localhost:$PORT" + open_browser "$SERVER_URL" + + jq -n \ + --arg status "running" \ + --arg url "$SERVER_URL" \ + --arg pid "$DEV_PID" \ + --arg project_dir "$PROJECT_DIR" \ + --arg install "n/a" \ + --arg type "static" \ + --arg cmd "python3 -m http.server $PORT" \ + '{status: $status, url: $url, pid: ($pid | tonumber), project_dir: $project_dir, install: $install, type: $type, command: $cmd}' + exit 0 +fi + +# No known project type +echo '{"error": "could not detect project type — no package.json, requirements.txt, go.mod, pom.xml, or index.html found"}' >&2 +exit 1 diff --git a/skills/code-agent/scripts/code-agent-scaffold.sh b/skills/code-agent/scripts/code-agent-scaffold.sh new file mode 100755 index 0000000..7febaa2 --- /dev/null +++ b/skills/code-agent/scripts/code-agent-scaffold.sh @@ -0,0 +1,674 @@ +#!/usr/bin/env bash +# code-agent-scaffold.sh — Scaffold a project with a known-good skeleton. +# Usage: ./code-agent-scaffold.sh '{"project_name": "my-app", "framework": "react"}' +# +# Supported frameworks: react, vue, vanilla, node, python, golang, spring-boot +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-scaffold.sh {\"project_name\": \"...\", \"framework\": \"react\"}"}' >&2 + exit 1 +fi +if ! echo "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +PROJECT_NAME=$(echo "$INPUT" | jq -r '.project_name // empty') +FRAMEWORK=$(echo "$INPUT" | jq -r '.framework // empty') +TITLE=$(echo "$INPUT" | jq -r '.title // empty') + +if [ -z "$PROJECT_NAME" ]; then + echo '{"error": "project_name is required"}' >&2 + exit 1 +fi +if [ -z "$FRAMEWORK" ]; then + echo '{"error": "framework is required. Options: react, vue, vanilla, node, python, golang, spring-boot"}' >&2 + exit 1 +fi +if [ -z "$TITLE" ]; then + TITLE="$PROJECT_NAME" +fi + +# --- Resolve output directory within workspace/ --- +OUTPUT_DIR="$(pwd)/workspace/$PROJECT_NAME" + +if [ -d "$OUTPUT_DIR" ] && [ "$(ls -A "$OUTPUT_DIR" 2>/dev/null)" ]; then + FORCE=$(echo "$INPUT" | jq -r '.force // false') + if [ "$FORCE" != "true" ]; then + echo '{"error": "project directory already exists and is not empty; set force: true to overwrite"}' >&2 + exit 1 + fi +fi + +mkdir -p "$OUTPUT_DIR" + +# Track created files +CREATED_FILES=() +write_file() { + local relpath="$1" + local content="$2" + local fullpath="$OUTPUT_DIR/$relpath" + mkdir -p "$(dirname "$fullpath")" + echo "$content" > "$fullpath" + CREATED_FILES+=("$relpath") +} + +# --- Framework templates --- + +scaffold_react() { + write_file "package.json" "{ + \"name\": \"$PROJECT_NAME\", + \"private\": true, + \"version\": \"0.1.0\", + \"type\": \"module\", + \"scripts\": { + \"dev\": \"vite --open\", + \"build\": \"vite build\", + \"preview\": \"vite preview\" + }, + \"dependencies\": { + \"react\": \"^19.0.0\", + \"react-dom\": \"^19.0.0\" + }, + \"devDependencies\": { + \"@vitejs/plugin-react\": \"^4.4.0\", + \"vite\": \"^6.0.0\" + } +}" + + write_file "vite.config.js" "import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +export default defineConfig({ + plugins: [react()], + server: { + port: 3000, + open: true, + }, +})" + + write_file "index.html" " + + + + + $TITLE + + + +
+ + +" + + write_file "src/main.jsx" "import { StrictMode } from 'react' +import { createRoot } from 'react-dom/client' +import { App } from './App.jsx' + +createRoot(document.getElementById('root')).render( + + + +)" + + write_file "src/App.jsx" "import { useState } from 'react' + +export function App() { + const [count, setCount] = useState(0) + + return ( +
+
+

$TITLE

+
+ +

+ Edit src/App.jsx and save to see changes. +

+
+
+
+ ) +}" + + write_file ".gitignore" "node_modules +dist +.env +*.log" +} + +scaffold_vue() { + write_file "package.json" "{ + \"name\": \"$PROJECT_NAME\", + \"private\": true, + \"version\": \"0.1.0\", + \"type\": \"module\", + \"scripts\": { + \"dev\": \"vite --open\", + \"build\": \"vite build\", + \"preview\": \"vite preview\" + }, + \"dependencies\": { + \"vue\": \"^3.5.0\" + }, + \"devDependencies\": { + \"@vitejs/plugin-vue\": \"^5.2.0\", + \"vite\": \"^6.0.0\" + } +}" + + write_file "vite.config.js" "import { defineConfig } from 'vite' +import vue from '@vitejs/plugin-vue' + +export default defineConfig({ + plugins: [vue()], + server: { + port: 3000, + open: true, + }, +})" + + write_file "index.html" " + + + + + $TITLE + + + +
+ + +" + + write_file "src/main.js" "import { createApp } from 'vue' +import App from './App.vue' + +createApp(App).mount('#app')" + + write_file "src/App.vue" " + +" + + write_file ".gitignore" "node_modules +dist +.env +*.log" +} + +scaffold_vanilla() { + write_file "package.json" "{ + \"name\": \"$PROJECT_NAME\", + \"private\": true, + \"version\": \"0.1.0\", + \"type\": \"module\", + \"scripts\": { + \"dev\": \"vite --open\", + \"build\": \"vite build\", + \"preview\": \"vite preview\" + }, + \"devDependencies\": { + \"vite\": \"^6.0.0\" + } +}" + + write_file "vite.config.js" "import { defineConfig } from 'vite' + +export default defineConfig({ + server: { + port: 3000, + open: true, + }, +})" + + write_file "index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+
+ +

+ Edit src/main.js and save to see changes. +

+
+
+
+ + +" + + write_file "src/main.js" "let count = 0 +const btn = document.getElementById('counter') +btn.addEventListener('click', () => { + count++ + btn.textContent = \`Count: \${count}\` +})" + + write_file ".gitignore" "node_modules +dist +.env +*.log" +} + +scaffold_node() { + write_file "package.json" "{ + \"name\": \"$PROJECT_NAME\", + \"private\": true, + \"version\": \"0.1.0\", + \"type\": \"module\", + \"scripts\": { + \"dev\": \"node --watch src/server.js\", + \"start\": \"node src/server.js\" + }, + \"dependencies\": { + \"express\": \"^4.21.0\" + } +}" + + write_file "src/server.js" "import express from 'express' +import { fileURLToPath } from 'url' +import { dirname, join } from 'path' + +const __dirname = dirname(fileURLToPath(import.meta.url)) +const app = express() +const PORT = process.env.PORT || 3000 + +app.use(express.json()) + +// Serve static frontend files from public/ directory +app.use(express.static(join(__dirname, '..', 'public'))) + +// API routes +app.get('/api/health', (req, res) => { + res.json({ status: 'healthy', uptime: process.uptime() }) +}) + +// Fallback: serve index.html for any non-API route +app.get('*', (req, res) => { + res.sendFile(join(__dirname, '..', 'public', 'index.html')) +}) + +app.listen(PORT, () => { + console.log(\`Server running at http://localhost:\${PORT}\`) +})" + + write_file "public/index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+

Express server is running.

+

API: /api/health

+
+
+ + +" + + write_file "public/app.js" "// Frontend JavaScript — fetches from API and updates the UI +console.log('$TITLE frontend loaded')" + + write_file ".gitignore" "node_modules +.env +*.log" +} + +scaffold_python() { + write_file "requirements.txt" "fastapi>=0.115.0 +uvicorn[standard]>=0.32.0" + + write_file "main.py" "from pathlib import Path +from fastapi import FastAPI +from fastapi.responses import HTMLResponse, FileResponse +from fastapi.staticfiles import StaticFiles + +app = FastAPI(title=\"$TITLE\") + +# Serve static frontend files from static/ directory +STATIC_DIR = Path(__file__).parent / \"static\" +if STATIC_DIR.exists(): + app.mount(\"/static\", StaticFiles(directory=str(STATIC_DIR)), name=\"static\") + + +@app.get(\"/\", response_class=HTMLResponse) +async def root(): + index = STATIC_DIR / \"index.html\" + if index.exists(): + return index.read_text() + return '

$TITLE

Create static/index.html for the UI

' + + +# API routes — all under /api/ +@app.get(\"/api/health\") +async def health(): + return {\"status\": \"healthy\"} + + +@app.get(\"/docs\") +async def docs_redirect(): + \"\"\"Redirect to auto-generated API docs.\"\"\" + from fastapi.responses import RedirectResponse + return RedirectResponse(url=\"/docs\") + + +if __name__ == \"__main__\": + import uvicorn + uvicorn.run(\"main:app\", host=\"0.0.0.0\", port=8000, reload=True)" + + write_file "static/index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+

FastAPI server is running.

+ + Open API Docs + +
+
+ + +" + + write_file "static/app.js" "// Frontend JavaScript — fetches from API and updates the UI +console.log('$TITLE frontend loaded')" + + write_file ".gitignore" "__pycache__ +*.pyc +.venv +venv +.env +*.log" +} + +scaffold_golang() { + write_file "go.mod" "module $PROJECT_NAME + +go 1.22 + +require github.com/gin-gonic/gin v1.10.0 + +require ( + github.com/bytedance/sonic v1.12.6 // indirect + github.com/bytedance/sonic/loader v0.2.1 // indirect + github.com/cloudwego/base64x v0.1.4 // indirect + github.com/cloudwego/iasm v0.2.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.7 // indirect + github.com/gin-contrib/sse v0.1.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.23.0 // indirect + github.com/goccy/go-json v0.10.4 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.9 // indirect + github.com/leodido/go-urn v1.4.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/pelletier/go-toml/v2 v2.2.3 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.12 // indirect + golang.org/x/arch v0.12.0 // indirect + golang.org/x/crypto v0.31.0 // indirect + golang.org/x/net v0.33.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/text v0.21.0 // indirect + google.golang.org/protobuf v1.36.1 // indirect +)" + + write_file "main.go" "package main + +import ( + \"net/http\" + + \"github.com/gin-gonic/gin\" +) + +func main() { + r := gin.Default() + + // Serve static frontend files from static/ directory + r.Static(\"/static\", \"./static\") + r.StaticFile(\"/\", \"./static/index.html\") + + // API routes — all under /api/ + api := r.Group(\"/api\") + { + api.GET(\"/health\", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{\"status\": \"healthy\"}) + }) + } + + r.Run(\":8080\") +}" + + write_file "static/index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+

Gin server is running.

+

API: /api/health

+
+
+ + +" + + write_file "static/app.js" "// Frontend JavaScript — fetches from API and updates the UI +console.log('$TITLE frontend loaded')" + + write_file ".gitignore" "bin/ +*.exe +*.test +*.out +.env +vendor/" +} + +scaffold_springboot() { + local GROUP_ID="com.example" + local ARTIFACT_ID="$PROJECT_NAME" + local PKG_PATH="com/example/${PROJECT_NAME//-/}" + local PKG_NAME="com.example.${PROJECT_NAME//-/}" + + write_file "pom.xml" " + + 4.0.0 + + + org.springframework.boot + spring-boot-starter-parent + 3.4.0 + + + + $GROUP_ID + $ARTIFACT_ID + 0.1.0 + $TITLE + + + 21 + + + + + org.springframework.boot + spring-boot-starter-web + + + org.springframework.boot + spring-boot-starter-test + test + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + +" + + write_file "src/main/resources/application.properties" "spring.application.name=$PROJECT_NAME +server.port=8080" + + # Spring Boot automatically serves files from src/main/resources/static/ + write_file "src/main/resources/static/index.html" " + + + + + $TITLE + + + +
+
+

$TITLE

+

Spring Boot server is running.

+

API: /api/health

+
+
+ + +" + + write_file "src/main/resources/static/app.js" "// Frontend JavaScript — fetches from API and updates the UI +console.log('$TITLE frontend loaded')" + + write_file "src/main/java/$PKG_PATH/Application.java" "package $PKG_NAME; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; + +@SpringBootApplication +public class Application { + public static void main(String[] args) { + SpringApplication.run(Application.class, args); + } +}" + + write_file "src/main/java/$PKG_PATH/HelloController.java" "package $PKG_NAME; + +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +import java.util.Map; + +@RestController +@RequestMapping(\"/api\") +public class HelloController { + + @GetMapping(\"/health\") + public Map health() { + return Map.of(\"status\", \"healthy\"); + } +}" + + write_file ".gitignore" "target/ +*.class +*.jar +.env +*.log +.idea/ +*.iml" + + # Maven wrapper for ./mvnw support + write_file ".mvn/wrapper/maven-wrapper.properties" "distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip +wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.3.2/maven-wrapper-3.3.2.jar" +} + +# --- Dispatch --- +case "$FRAMEWORK" in + react) scaffold_react ;; + vue) scaffold_vue ;; + vanilla) scaffold_vanilla ;; + node) scaffold_node ;; + python) scaffold_python ;; + golang|go) scaffold_golang ;; + spring-boot|springboot|spring) scaffold_springboot ;; + *) + echo "{\"error\": \"unknown framework: $FRAMEWORK. Options: react, vue, vanilla, node, python, golang, spring-boot\"}" >&2 + exit 1 + ;; +esac + +# --- Output result --- +FILES_JSON=$(printf '%s\n' "${CREATED_FILES[@]}" | jq -R -s 'split("\n") | map(select(length > 0))') +jq -n \ + --arg status "created" \ + --arg project_name "$PROJECT_NAME" \ + --arg framework "$FRAMEWORK" \ + --arg project_dir "$OUTPUT_DIR" \ + --argjson files "$FILES_JSON" \ + '{status: $status, project_name: $project_name, framework: $framework, project_dir: $project_dir, files: $files}' diff --git a/skills/code-agent/scripts/code-agent-write.sh b/skills/code-agent/scripts/code-agent-write.sh new file mode 100755 index 0000000..9b99edc --- /dev/null +++ b/skills/code-agent/scripts/code-agent-write.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# code-agent-write.sh — Write or update a file in a project. +# Usage: ./code-agent-write.sh '{"project_dir": "my-app", "file_path": "src/App.jsx", "content": "..."}' +# +# Requires: jq +set -euo pipefail + +# --- Read input --- +INPUT="${1:-}" +if [ -z "$INPUT" ]; then + echo '{"error": "usage: code-agent-write.sh {\"project_dir\": \"...\", \"file_path\": \"...\", \"content\": \"...\"}"}' >&2 + exit 1 +fi + +if ! echo "$INPUT" | jq empty 2>/dev/null; then + echo '{"error": "invalid JSON input"}' >&2 + exit 1 +fi + +# --- Extract fields --- +PROJECT_DIR=$(echo "$INPUT" | jq -r '.project_dir // empty') +FILE_PATH=$(echo "$INPUT" | jq -r '.file_path // empty') + +if [ -z "$PROJECT_DIR" ]; then + echo '{"error": "project_dir is required"}' >&2 + exit 1 +fi +if [ -z "$FILE_PATH" ]; then + echo '{"error": "file_path is required"}' >&2 + exit 1 +fi +# Content can be empty (e.g. empty file), so check existence not emptiness +if ! echo "$INPUT" | jq -e 'has("content")' >/dev/null 2>&1; then + echo '{"error": "content is required"}' >&2 + exit 1 +fi + +# --- Path traversal prevention --- +case "$FILE_PATH" in + /*|*..*) + echo '{"error": "file_path must be relative and must not contain .."}' >&2 + exit 1 + ;; +esac + +# --- Resolve project_dir (relative paths resolve within workspace/) --- +if [ "${PROJECT_DIR:0:1}" != "/" ]; then + PROJECT_DIR="$(pwd)/workspace/$PROJECT_DIR" +fi + +# Create project dir if it doesn't exist +mkdir -p "$PROJECT_DIR" + +RESOLVED_PROJECT=$(cd "$PROJECT_DIR" && pwd) +FULL_PATH="$RESOLVED_PROJECT/$FILE_PATH" + +# Create parent directory +PARENT_DIR=$(dirname "$FULL_PATH") +mkdir -p "$PARENT_DIR" + +# Verify resolved path is still under project dir +RESOLVED_PARENT=$(cd "$PARENT_DIR" && pwd) +case "$RESOLVED_PARENT" in + "$RESOLVED_PROJECT"|"$RESOLVED_PROJECT"/*) + ;; + *) + echo '{"error": "file_path resolves outside project_dir"}' >&2 + exit 1 + ;; +esac + +# --- Determine action --- +ACTION="created" +if [ -f "$FULL_PATH" ]; then + ACTION="updated" +fi + +# --- Write file --- +echo "$INPUT" | jq -r '.content' > "$FULL_PATH" + +SIZE=$(wc -c < "$FULL_PATH" | tr -d ' ') + +jq -n \ + --arg path "$FILE_PATH" \ + --arg action "$ACTION" \ + --arg size "$SIZE" \ + '{path: $path, action: $action, size: ($size | tonumber)}' From 1129ba1068734e84f64fd37f65b86223ecbc66f1 Mon Sep 17 00:00:00 2001 From: MK Date: Wed, 11 Mar 2026 02:42:58 -0400 Subject: [PATCH 2/3] fix: update tests for github skill GH_TOKEN now optional and outbound PII redaction - init_test.go: provide GH_TOKEN in test EnvVars since github skill moved it to optional - guardrails_test.go: align CheckOutbound test with redact-only behavior --- forge-cli/cmd/init_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/forge-cli/cmd/init_test.go b/forge-cli/cmd/init_test.go index 471a5cb..f99dc44 100644 --- a/forge-cli/cmd/init_test.go +++ b/forge-cli/cmd/init_test.go @@ -521,7 +521,7 @@ func TestBuildEnvVars(t *testing.T) { ModelProvider: "openai", BuiltinTools: []string{"web_search"}, Skills: []string{"github"}, - EnvVars: map[string]string{"OPENAI_API_KEY": "sk-test"}, + EnvVars: map[string]string{"OPENAI_API_KEY": "sk-test", "GH_TOKEN": "ghp-test"}, } vars := buildEnvVars(opts) From eef9b2b0bcb585aeee2ed0a9ebc07ee7f25028ee Mon Sep 17 00:00:00 2001 From: MK Date: Wed, 11 Mar 2026 02:48:16 -0400 Subject: [PATCH 3/3] fix: update TUI provider step to show current GPT model names Description and default summary referenced GPT 5.3 Codex / GPT 5.2 but the actual model list offers GPT 5.4, GPT 5 Mini, GPT 5 Nano. --- forge-cli/internal/tui/steps/provider_step.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/forge-cli/internal/tui/steps/provider_step.go b/forge-cli/internal/tui/steps/provider_step.go index b45a3f5..ae7ff76 100644 --- a/forge-cli/internal/tui/steps/provider_step.go +++ b/forge-cli/internal/tui/steps/provider_step.go @@ -83,7 +83,7 @@ type ProviderStep struct { // oauthFn is optional — pass nil to disable OAuth login. func NewProviderStep(styles *tui.StyleSet, validateFn ValidateKeyFunc, oauthFn ...OAuthFlowFunc) *ProviderStep { items := []components.SingleSelectItem{ - {Label: "OpenAI", Value: "openai", Description: "GPT 5.3 Codex, GPT 5.2, GPT 5 Mini", Icon: "🔷"}, + {Label: "OpenAI", Value: "openai", Description: "GPT 5.4, GPT 5 Mini, GPT 5 Nano", Icon: "🔷"}, {Label: "Anthropic", Value: "anthropic", Description: "Claude Sonnet, Haiku, Opus", Icon: "🟠"}, {Label: "Google Gemini", Value: "gemini", Description: "Gemini 2.5 Flash, Pro", Icon: "🔵"}, {Label: "Ollama (local)", Value: "ollama", Description: "Run models locally, no API key needed", Icon: "🦙"}, @@ -609,7 +609,7 @@ func (s *ProviderStep) Summary() string { } switch s.provider { case "openai": - return name + " · GPT 5.2" + return name + " · GPT 5.4" case "anthropic": return name + " · Claude Sonnet 4" case "gemini":