Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion forge-cli/cmd/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,7 @@ func buildTemplateData(opts *initOptions) templateData {
func defaultModelNameForProvider(provider string) string {
switch provider {
case "openai":
return "gpt-5.2-2025-12-11"
return "gpt-5.4"
case "anthropic":
return "claude-sonnet-4-20250514"
case "gemini":
Expand Down
4 changes: 2 additions & 2 deletions forge-cli/cmd/init_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ func TestBuildEnvVars(t *testing.T) {
ModelProvider: "openai",
BuiltinTools: []string{"web_search"},
Skills: []string{"github"},
EnvVars: map[string]string{"OPENAI_API_KEY": "sk-test"},
EnvVars: map[string]string{"OPENAI_API_KEY": "sk-test", "GH_TOKEN": "ghp-test"},
}
vars := buildEnvVars(opts)

Expand Down Expand Up @@ -580,7 +580,7 @@ func TestBuildTemplateData_DefaultModels(t *testing.T) {
provider string
expectedModel string
}{
{"openai", "gpt-5.2-2025-12-11"},
{"openai", "gpt-5.4"},
{"anthropic", "claude-sonnet-4-20250514"},
{"gemini", "gemini-2.5-flash"},
{"ollama", "llama3"},
Expand Down
19 changes: 10 additions & 9 deletions forge-cli/internal/tui/steps/provider_step.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,18 @@ type modelOption struct {

// openAIOAuthModels are available when using browser-based OAuth login.
var openAIOAuthModels = []modelOption{
{DisplayName: "GPT 5.3 Codex", ModelID: "gpt-5.3-codex"},
{DisplayName: "GPT 5.2", ModelID: "gpt-5.2-2025-12-11"},
{DisplayName: "GPT 5.2 Codex", ModelID: "gpt-5.2-codex"},
{DisplayName: "GPT 5.4", ModelID: "gpt-5.4"},
{DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini"},
{DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano"},
{DisplayName: "GPT 4.1", ModelID: "gpt-4.1"},
}

// openAIAPIKeyModels are available when using an API key.
var openAIAPIKeyModels = []modelOption{
{DisplayName: "GPT 5.2", ModelID: "gpt-5.2-2025-12-11"},
{DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini-2025-08-07"},
{DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano-2025-08-07"},
{DisplayName: "GPT 4.1 Mini", ModelID: "gpt-4.1-mini-2025-04-14"},
{DisplayName: "GPT 5.4", ModelID: "gpt-5.4"},
{DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini"},
{DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano"},
{DisplayName: "GPT 4.1", ModelID: "gpt-4.1"},
}

// ProviderStep handles model provider selection and API key entry.
Expand Down Expand Up @@ -82,7 +83,7 @@ type ProviderStep struct {
// oauthFn is optional — pass nil to disable OAuth login.
func NewProviderStep(styles *tui.StyleSet, validateFn ValidateKeyFunc, oauthFn ...OAuthFlowFunc) *ProviderStep {
items := []components.SingleSelectItem{
{Label: "OpenAI", Value: "openai", Description: "GPT 5.3 Codex, GPT 5.2, GPT 5 Mini", Icon: "🔷"},
{Label: "OpenAI", Value: "openai", Description: "GPT 5.4, GPT 5 Mini, GPT 5 Nano", Icon: "🔷"},
{Label: "Anthropic", Value: "anthropic", Description: "Claude Sonnet, Haiku, Opus", Icon: "🟠"},
{Label: "Google Gemini", Value: "gemini", Description: "Gemini 2.5 Flash, Pro", Icon: "🔵"},
{Label: "Ollama (local)", Value: "ollama", Description: "Run models locally, no API key needed", Icon: "🦙"},
Expand Down Expand Up @@ -608,7 +609,7 @@ func (s *ProviderStep) Summary() string {
}
switch s.provider {
case "openai":
return name + " · GPT 5.2"
return name + " · GPT 5.4"
case "anthropic":
return name + " · Claude Sonnet 4"
case "gemini":
Expand Down
2 changes: 1 addition & 1 deletion forge-cli/internal/tui/wizard.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ type WizardContext struct {
Provider string
APIKey string
AuthMethod string // "apikey" or "oauth" — how the user authenticated
ModelName string // selected model ID (e.g. "gpt-5.3-codex")
ModelName string // selected model ID (e.g. "gpt-5.4")
OrganizationID string // OpenAI enterprise organization ID
Fallbacks []FallbackProvider
Channel string
Expand Down
114 changes: 98 additions & 16 deletions forge-cli/runtime/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,34 @@ type RunnerConfig struct {
// result to the appropriate channel (e.g. Slack, Telegram).
type ScheduleNotifier func(ctx context.Context, channel, target string, response *a2a.Message) error

// codeAgentDirective is appended to the system prompt when code-agent skill
// is active. Forces the LLM to always call tools — never respond with text only.
const codeAgentDirective = `## Code Agent — MANDATORY RULES

You are a coding agent. Every response MUST include tool calls. NEVER respond with only text.

FORBIDDEN:
- Respond with "I'll do X now" or "Let me X" without calling tools in the same response
- Output code in markdown blocks for the user to copy-paste
- Ask the user for permission or confirmation before acting
- Describe what you plan to do without simultaneously doing it
- Read files unrelated to the error path or code you plan to change
- Edit test files before fixing the source code — always fix source first, then update tests

REQUIRED:
- New project → code_agent_scaffold → code_agent_write (all files) → code_agent_run
- Modify existing code → search + trace error origin + read functions to change → code_agent_edit or code_agent_write
- Any request → ACT IMMEDIATELY with tools. Write ALL files and run in ONE turn.

EXPLORATION RULES:
Bug fixes: search for the error message → trace to its origin (not just where it surfaces) → read functions you plan to call or replace → edit.
Features: search for similar patterns (2-3 searches) → read files you plan to modify → edit.
Both: complete the workflow (commit/push/PR if applicable).
Do NOT read files unrelated to the error path or code you plan to change. Do NOT replace function calls without reading both the old and new function.

VERIFY BUG FIXES:
After editing, trace the failing input through your new code. Read the functions your fix calls — confirm they handle the type that was failing. If the codebase has a working path for similar logic (e.g., another provider), your fix must use the same approach. Type annotations alone do not fix runtime bugs.`

// Runner orchestrates the local A2A development server.
type Runner struct {
cfg RunnerConfig
Expand Down Expand Up @@ -258,6 +286,24 @@ func (r *Runner) Run(ctx context.Context) error {
r.logger.Warn("failed to register builtin tools", map[string]any{"error": err.Error()})
}

// Register search/exploration tools (grep, glob, tree).
// When code-agent skill is active, scope them to workspace/ so searches
// default to cloned repos. Otherwise scope to the main working directory.
searchRoot := r.cfg.WorkDir
if r.hasSkill("code-agent") {
codeDir := filepath.Join(r.cfg.WorkDir, "workspace")
if mkErr := os.MkdirAll(codeDir, 0o755); mkErr != nil {
r.logger.Warn("failed to create code workspace directory", map[string]any{"error": mkErr.Error()})
}
searchRoot = codeDir
r.logger.Info("code-agent skill detected: workspace ready", map[string]any{"workspace": codeDir})
// Script tools (code_agent_read, code_agent_write, code_agent_run)
// are registered by registerSkillTools() from SKILL.md ## Tool: entries.
}
if err := builtins.RegisterCodeAgentSearchTools(reg, searchRoot); err != nil {
r.logger.Warn("failed to register search tools", map[string]any{"error": err.Error()})
}

// Register read_skill tool for lazy-loading skill instructions
readSkill := builtins.NewReadSkillTool(r.cfg.WorkDir)
if regErr := reg.Register(readSkill); regErr != nil {
Expand Down Expand Up @@ -397,15 +443,25 @@ func (r *Runner) Run(ctx context.Context) error {
charBudget = coreruntime.ContextBudgetForModel(mc.Client.Model)
}

// Build system prompt; append code-agent tool directives if those tools are registered.
sysPrompt := r.buildSystemPrompt()
if r.hasSkill("code-agent") {
sysPrompt += "\n\n" + codeAgentDirective
}

execCfg := coreruntime.LLMExecutorConfig{
Client: llmClient,
Tools: reg,
Hooks: hooks,
SystemPrompt: r.buildSystemPrompt(),
Logger: r.logger,
ModelName: mc.Client.Model,
CharBudget: charBudget,
FilesDir: filepath.Join(r.cfg.WorkDir, ".forge", "files"),
Client: llmClient,
Tools: reg,
Hooks: hooks,
SystemPrompt: sysPrompt,
Logger: r.logger,
ModelName: mc.Client.Model,
MaxIterations: 100,
CharBudget: charBudget,
FilesDir: filepath.Join(r.cfg.WorkDir, ".forge", "files"),
}
if r.derivedCLIConfig != nil {
execCfg.WorkflowPhases = r.derivedCLIConfig.WorkflowPhases
}

// Initialize memory persistence (enabled by default).
Expand Down Expand Up @@ -1647,6 +1703,28 @@ func ensureGitignore(workDir string) {
os.WriteFile(gitignorePath, []byte(content+entry), 0644) //nolint:errcheck
}

// hasSkill checks whether a skill with the given name is present in the project's
// discovered skill files. Checks both ## Tool: entry names and frontmatter name.
func (r *Runner) hasSkill(name string) bool {
for _, sf := range r.discoverSkillFiles() {
entries, meta, err := cliskills.ParseFileWithMetadata(sf)
if err != nil {
continue
}
// Check frontmatter name (for skills without ## Tool: entries)
if meta != nil && meta.Name == name {
return true
}
// Check individual tool entry names
for _, e := range entries {
if e.Name == name {
return true
}
}
}
return false
}

// discoverSkillFiles returns all skill file paths from both flat and subdirectory formats,
// plus the main SKILL.md (or custom path from forge.yaml).
func (r *Runner) discoverSkillFiles() []string {
Expand Down Expand Up @@ -1788,7 +1866,7 @@ func (r *Runner) buildSkillCatalog() string {

var catalogEntries []string
for _, match := range matches {
entries, _, err := cliskills.ParseFileWithMetadata(match)
entries, meta, err := cliskills.ParseFileWithMetadata(match)
if err != nil {
continue
}
Expand All @@ -1799,6 +1877,16 @@ func (r *Runner) buildSkillCatalog() string {
catalogSkillDir = filepath.Base(filepath.Dir(match))
}

// If no ## Tool: entries were parsed but frontmatter has name+description,
// create a synthetic entry so the skill appears in the catalog summary.
if len(entries) == 0 && meta != nil && meta.Name != "" && meta.Description != "" {
entries = []contract.SkillEntry{{
Name: meta.Name,
Description: meta.Description,
Metadata: meta,
}}
}

for _, entry := range entries {
// Skip skills that have scripts (already registered as tools)
scriptName := strings.ReplaceAll(entry.Name, "_", "-")
Expand Down Expand Up @@ -1831,13 +1919,6 @@ func (r *Runner) buildSkillCatalog() string {
line += " (uses cli_execute)"
}
catalogEntries = append(catalogEntries, line)

// Include full skill instructions when available
if entry.Body != "" {
catalogEntries = append(catalogEntries, "")
catalogEntries = append(catalogEntries, entry.Body)
catalogEntries = append(catalogEntries, "")
}
}
}
}
Expand All @@ -1848,6 +1929,7 @@ func (r *Runner) buildSkillCatalog() string {

var b strings.Builder
b.WriteString("## Available Skills\n\n")
b.WriteString("Use `read_skill` to load full instructions for a skill before using it.\n\n")
for _, entry := range catalogEntries {
b.WriteString(entry)
b.WriteString("\n")
Expand Down
31 changes: 24 additions & 7 deletions forge-core/forgecore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,15 @@ func TestNewRuntime_WithToolCalling(t *testing.T) {
},
FinishReason: "stop",
},
// The agent loop sends a continuation nudge after the first stop.
// Without workflow phases configured, only 1 nudge fires.
{
Message: llm.ChatMessage{
Role: llm.RoleAssistant,
Content: "I fetched the URL and got: ok",
},
FinishReason: "stop",
},
},
}

Expand Down Expand Up @@ -825,9 +834,9 @@ func TestNewRuntime_WithToolCalling(t *testing.T) {
t.Errorf("response text = %q, want 'I fetched the URL and got: ok'", resp.Parts[0].Text)
}

// Should have made 2 LLM calls
if toolCallClient.callIdx != 2 {
t.Errorf("LLM was called %d times, want 2", toolCallClient.callIdx)
// Should have made 3 LLM calls (tool call + stop + 1 continuation nudge)
if toolCallClient.callIdx != 3 {
t.Errorf("LLM was called %d times, want 3", toolCallClient.callIdx)
}
}

Expand Down Expand Up @@ -1064,7 +1073,7 @@ func TestNewRuntime_LLMError(t *testing.T) {
}

func TestNewRuntime_DefaultMaxIterations(t *testing.T) {
// If MaxIterations is 0, should default to 10
// If MaxIterations is 0, should default to 50
client := &mockLLMClient{
response: &llm.ChatResponse{
Message: llm.ChatMessage{
Expand All @@ -1077,7 +1086,7 @@ func TestNewRuntime_DefaultMaxIterations(t *testing.T) {

executor := NewRuntime(RuntimeConfig{
LLMClient: client,
// MaxIterations: 0 -> defaults to 10
// MaxIterations: 0 -> defaults to 50
})

if executor == nil {
Expand Down Expand Up @@ -1325,6 +1334,14 @@ func TestIntegration_CompileWithToolCallLoop(t *testing.T) {
},
FinishReason: "stop",
},
// Continuation nudge: without workflow phases, only 1 nudge fires.
{
Message: llm.ChatMessage{
Role: llm.RoleAssistant,
Content: "Found and fetched the result",
},
FinishReason: "stop",
},
},
}

Expand Down Expand Up @@ -1354,8 +1371,8 @@ func TestIntegration_CompileWithToolCallLoop(t *testing.T) {
if resp.Parts[0].Text != "Found and fetched the result" {
t.Errorf("response text = %q", resp.Parts[0].Text)
}
if toolCallClient.callIdx != 3 {
t.Errorf("LLM was called %d times, want 3", toolCallClient.callIdx)
if toolCallClient.callIdx != 4 {
t.Errorf("LLM was called %d times, want 4", toolCallClient.callIdx)
}
}

Expand Down
18 changes: 17 additions & 1 deletion forge-core/llm/providers/responses.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ func (c *ResponsesClient) buildRequest(req *llm.ChatRequest, stream bool) respon
inputs = append(inputs, responsesInput{
Type: "function_call",
CallID: tc.ID,
ID: tc.ID,
ID: responsesItemID(tc.ID),
Name: tc.Function.Name,
Arguments: tc.Function.Arguments,
})
Expand All @@ -249,6 +249,13 @@ func (c *ResponsesClient) buildRequest(req *llm.ChatRequest, stream bool) respon
})
}

// The Responses API requires the instructions field. If no system
// message was provided (e.g. summarization calls), use a minimal default
// so the request doesn't fail with "Instructions are required".
if instructions == "" {
instructions = "You are a helpful assistant."
}

r := responsesRequest{
Model: model,
Instructions: instructions,
Expand Down Expand Up @@ -321,6 +328,15 @@ type streamCompleted struct {
Response responsesResponse `json:"response"`
}

// responsesItemID ensures a tool call ID has the "fc_" prefix required by
// the Responses API for function_call item IDs.
func responsesItemID(id string) string {
if strings.HasPrefix(id, "fc_") {
return id
}
return "fc_" + strings.TrimPrefix(id, "call_")
}

func (c *ResponsesClient) readStream(r io.Reader, ch chan<- llm.StreamDelta) {
// Track function calls being built so we can emit them with correct IDs
type pendingFC struct {
Expand Down
2 changes: 1 addition & 1 deletion forge-core/runtime/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func ResolveModelConfig(cfg *types.ForgeConfig, envVars map[string]string, provi
func defaultModelForProvider(provider string) string {
switch provider {
case "openai":
return "gpt-5.2-2025-12-11"
return "gpt-5.4"
case "anthropic":
return "claude-sonnet-4-20250514"
case "gemini":
Expand Down
6 changes: 3 additions & 3 deletions forge-core/runtime/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ func TestResolveModelConfig_OrgIDFromYAML(t *testing.T) {
cfg := &types.ForgeConfig{
Model: types.ModelRef{
Provider: "openai",
Name: "gpt-5.2-2025-12-11",
Name: "gpt-5.4",
OrganizationID: "org-yaml-123",
},
}
Expand All @@ -201,7 +201,7 @@ func TestResolveModelConfig_OrgIDEnvOverridesYAML(t *testing.T) {
cfg := &types.ForgeConfig{
Model: types.ModelRef{
Provider: "openai",
Name: "gpt-5.2-2025-12-11",
Name: "gpt-5.4",
OrganizationID: "org-yaml-123",
},
}
Expand Down Expand Up @@ -303,7 +303,7 @@ func TestDefaultModelForProvider(t *testing.T) {
provider string
expected string
}{
{"openai", "gpt-5.2-2025-12-11"},
{"openai", "gpt-5.4"},
{"anthropic", "claude-sonnet-4-20250514"},
{"gemini", "gemini-2.5-flash"},
{"ollama", "llama3"},
Expand Down
Loading
Loading