initializ · initializ-mk · Mar 11, 2026 · Mar 10, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/forge-cli/cmd/init.go b/forge-cli/cmd/init.go
@@ -1016,7 +1016,7 @@ func buildTemplateData(opts *initOptions) templateData {
 func defaultModelNameForProvider(provider string) string {
 	switch provider {
 	case "openai":
-		return "gpt-5.2-2025-12-11"
+		return "gpt-5.4"
 	case "anthropic":
 		return "claude-sonnet-4-20250514"
 	case "gemini":

diff --git a/forge-cli/cmd/init_test.go b/forge-cli/cmd/init_test.go
@@ -521,7 +521,7 @@ func TestBuildEnvVars(t *testing.T) {
 		ModelProvider: "openai",
 		BuiltinTools:  []string{"web_search"},
 		Skills:        []string{"github"},
-		EnvVars:       map[string]string{"OPENAI_API_KEY": "sk-test"},
+		EnvVars:       map[string]string{"OPENAI_API_KEY": "sk-test", "GH_TOKEN": "ghp-test"},
 	}
 	vars := buildEnvVars(opts)
 
@@ -580,7 +580,7 @@ func TestBuildTemplateData_DefaultModels(t *testing.T) {
 		provider      string
 		expectedModel string
 	}{
-		{"openai", "gpt-5.2-2025-12-11"},
+		{"openai", "gpt-5.4"},
 		{"anthropic", "claude-sonnet-4-20250514"},
 		{"gemini", "gemini-2.5-flash"},
 		{"ollama", "llama3"},

diff --git a/forge-cli/internal/tui/steps/provider_step.go b/forge-cli/internal/tui/steps/provider_step.go
@@ -40,17 +40,18 @@ type modelOption struct {
 
 // openAIOAuthModels are available when using browser-based OAuth login.
 var openAIOAuthModels = []modelOption{
-	{DisplayName: "GPT 5.3 Codex", ModelID: "gpt-5.3-codex"},
-	{DisplayName: "GPT 5.2", ModelID: "gpt-5.2-2025-12-11"},
-	{DisplayName: "GPT 5.2 Codex", ModelID: "gpt-5.2-codex"},
+	{DisplayName: "GPT 5.4", ModelID: "gpt-5.4"},
+	{DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini"},
+	{DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano"},
+	{DisplayName: "GPT 4.1", ModelID: "gpt-4.1"},
 }
 
 // openAIAPIKeyModels are available when using an API key.
 var openAIAPIKeyModels = []modelOption{
-	{DisplayName: "GPT 5.2", ModelID: "gpt-5.2-2025-12-11"},
-	{DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini-2025-08-07"},
-	{DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano-2025-08-07"},
-	{DisplayName: "GPT 4.1 Mini", ModelID: "gpt-4.1-mini-2025-04-14"},
+	{DisplayName: "GPT 5.4", ModelID: "gpt-5.4"},
+	{DisplayName: "GPT 5 Mini", ModelID: "gpt-5-mini"},
+	{DisplayName: "GPT 5 Nano", ModelID: "gpt-5-nano"},
+	{DisplayName: "GPT 4.1", ModelID: "gpt-4.1"},
 }
 
 // ProviderStep handles model provider selection and API key entry.
@@ -82,7 +83,7 @@ type ProviderStep struct {
 // oauthFn is optional — pass nil to disable OAuth login.
 func NewProviderStep(styles *tui.StyleSet, validateFn ValidateKeyFunc, oauthFn ...OAuthFlowFunc) *ProviderStep {
 	items := []components.SingleSelectItem{
-		{Label: "OpenAI", Value: "openai", Description: "GPT 5.3 Codex, GPT 5.2, GPT 5 Mini", Icon: "🔷"},
+		{Label: "OpenAI", Value: "openai", Description: "GPT 5.4, GPT 5 Mini, GPT 5 Nano", Icon: "🔷"},
 		{Label: "Anthropic", Value: "anthropic", Description: "Claude Sonnet, Haiku, Opus", Icon: "🟠"},
 		{Label: "Google Gemini", Value: "gemini", Description: "Gemini 2.5 Flash, Pro", Icon: "🔵"},
 		{Label: "Ollama (local)", Value: "ollama", Description: "Run models locally, no API key needed", Icon: "🦙"},
@@ -608,7 +609,7 @@ func (s *ProviderStep) Summary() string {
 	}
 	switch s.provider {
 	case "openai":
-		return name + " · GPT 5.2"
+		return name + " · GPT 5.4"
 	case "anthropic":
 		return name + " · Claude Sonnet 4"
 	case "gemini":

diff --git a/forge-cli/internal/tui/wizard.go b/forge-cli/internal/tui/wizard.go
@@ -18,7 +18,7 @@ type WizardContext struct {
 	Provider       string
 	APIKey         string
 	AuthMethod     string // "apikey" or "oauth" — how the user authenticated
-	ModelName      string // selected model ID (e.g. "gpt-5.3-codex")
+	ModelName      string // selected model ID (e.g. "gpt-5.4")
 	OrganizationID string // OpenAI enterprise organization ID
 	Fallbacks      []FallbackProvider
 	Channel        string

diff --git a/forge-cli/runtime/runner.go b/forge-cli/runtime/runner.go
@@ -54,6 +54,34 @@ type RunnerConfig struct {
 // result to the appropriate channel (e.g. Slack, Telegram).
 type ScheduleNotifier func(ctx context.Context, channel, target string, response *a2a.Message) error
 
+// codeAgentDirective is appended to the system prompt when code-agent skill
+// is active. Forces the LLM to always call tools — never respond with text only.
+const codeAgentDirective = `## Code Agent — MANDATORY RULES
+
+You are a coding agent. Every response MUST include tool calls. NEVER respond with only text.
+
+FORBIDDEN:
+- Respond with "I'll do X now" or "Let me X" without calling tools in the same response
+- Output code in markdown blocks for the user to copy-paste
+- Ask the user for permission or confirmation before acting
+- Describe what you plan to do without simultaneously doing it
+- Read files unrelated to the error path or code you plan to change
+- Edit test files before fixing the source code — always fix source first, then update tests
+
+REQUIRED:
+- New project → code_agent_scaffold → code_agent_write (all files) → code_agent_run
+- Modify existing code → search + trace error origin + read functions to change → code_agent_edit or code_agent_write
+- Any request → ACT IMMEDIATELY with tools. Write ALL files and run in ONE turn.
+
+EXPLORATION RULES:
+Bug fixes: search for the error message → trace to its origin (not just where it surfaces) → read functions you plan to call or replace → edit.
+Features: search for similar patterns (2-3 searches) → read files you plan to modify → edit.
+Both: complete the workflow (commit/push/PR if applicable).
+Do NOT read files unrelated to the error path or code you plan to change. Do NOT replace function calls without reading both the old and new function.
+
+VERIFY BUG FIXES:
+After editing, trace the failing input through your new code. Read the functions your fix calls — confirm they handle the type that was failing. If the codebase has a working path for similar logic (e.g., another provider), your fix must use the same approach. Type annotations alone do not fix runtime bugs.`
+
 // Runner orchestrates the local A2A development server.
 type Runner struct {
 	cfg              RunnerConfig
@@ -258,6 +286,24 @@ func (r *Runner) Run(ctx context.Context) error {
 				r.logger.Warn("failed to register builtin tools", map[string]any{"error": err.Error()})
 			}
 
+			// Register search/exploration tools (grep, glob, tree).
+			// When code-agent skill is active, scope them to workspace/ so searches
+			// default to cloned repos. Otherwise scope to the main working directory.
+			searchRoot := r.cfg.WorkDir
+			if r.hasSkill("code-agent") {
+				codeDir := filepath.Join(r.cfg.WorkDir, "workspace")
+				if mkErr := os.MkdirAll(codeDir, 0o755); mkErr != nil {
+					r.logger.Warn("failed to create code workspace directory", map[string]any{"error": mkErr.Error()})
+				}
+				searchRoot = codeDir
+				r.logger.Info("code-agent skill detected: workspace ready", map[string]any{"workspace": codeDir})
+				// Script tools (code_agent_read, code_agent_write, code_agent_run)
+				// are registered by registerSkillTools() from SKILL.md ## Tool: entries.
+			}
+			if err := builtins.RegisterCodeAgentSearchTools(reg, searchRoot); err != nil {
+				r.logger.Warn("failed to register search tools", map[string]any{"error": err.Error()})
+			}
+
 			// Register read_skill tool for lazy-loading skill instructions
 			readSkill := builtins.NewReadSkillTool(r.cfg.WorkDir)
 			if regErr := reg.Register(readSkill); regErr != nil {
@@ -397,15 +443,25 @@ func (r *Runner) Run(ctx context.Context) error {
 						charBudget = coreruntime.ContextBudgetForModel(mc.Client.Model)
 					}
 
+					// Build system prompt; append code-agent tool directives if those tools are registered.
+					sysPrompt := r.buildSystemPrompt()
+					if r.hasSkill("code-agent") {
+						sysPrompt += "\n\n" + codeAgentDirective
+					}
+
 					execCfg := coreruntime.LLMExecutorConfig{
-						Client:       llmClient,
-						Tools:        reg,
-						Hooks:        hooks,
-						SystemPrompt: r.buildSystemPrompt(),
-						Logger:       r.logger,
-						ModelName:    mc.Client.Model,
-						CharBudget:   charBudget,
-						FilesDir:     filepath.Join(r.cfg.WorkDir, ".forge", "files"),
+						Client:        llmClient,
+						Tools:         reg,
+						Hooks:         hooks,
+						SystemPrompt:  sysPrompt,
+						Logger:        r.logger,
+						ModelName:     mc.Client.Model,
+						MaxIterations: 100,
+						CharBudget:    charBudget,
+						FilesDir:      filepath.Join(r.cfg.WorkDir, ".forge", "files"),
+					}
+					if r.derivedCLIConfig != nil {
+						execCfg.WorkflowPhases = r.derivedCLIConfig.WorkflowPhases
 					}
 
 					// Initialize memory persistence (enabled by default).
@@ -1647,6 +1703,28 @@ func ensureGitignore(workDir string) {
 	os.WriteFile(gitignorePath, []byte(content+entry), 0644) //nolint:errcheck
 }
 
+// hasSkill checks whether a skill with the given name is present in the project's
+// discovered skill files. Checks both ## Tool: entry names and frontmatter name.
+func (r *Runner) hasSkill(name string) bool {
+	for _, sf := range r.discoverSkillFiles() {
+		entries, meta, err := cliskills.ParseFileWithMetadata(sf)
+		if err != nil {
+			continue
+		}
+		// Check frontmatter name (for skills without ## Tool: entries)
+		if meta != nil && meta.Name == name {
+			return true
+		}
+		// Check individual tool entry names
+		for _, e := range entries {
+			if e.Name == name {
+				return true
+			}
+		}
+	}
+	return false
+}
+
 // discoverSkillFiles returns all skill file paths from both flat and subdirectory formats,
 // plus the main SKILL.md (or custom path from forge.yaml).
 func (r *Runner) discoverSkillFiles() []string {
@@ -1788,7 +1866,7 @@ func (r *Runner) buildSkillCatalog() string {
 
 	var catalogEntries []string
 	for _, match := range matches {
-		entries, _, err := cliskills.ParseFileWithMetadata(match)
+		entries, meta, err := cliskills.ParseFileWithMetadata(match)
 		if err != nil {
 			continue
 		}
@@ -1799,6 +1877,16 @@ func (r *Runner) buildSkillCatalog() string {
 			catalogSkillDir = filepath.Base(filepath.Dir(match))
 		}
 
+		// If no ## Tool: entries were parsed but frontmatter has name+description,
+		// create a synthetic entry so the skill appears in the catalog summary.
+		if len(entries) == 0 && meta != nil && meta.Name != "" && meta.Description != "" {
+			entries = []contract.SkillEntry{{
+				Name:        meta.Name,
+				Description: meta.Description,
+				Metadata:    meta,
+			}}
+		}
+
 		for _, entry := range entries {
 			// Skip skills that have scripts (already registered as tools)
 			scriptName := strings.ReplaceAll(entry.Name, "_", "-")
@@ -1831,13 +1919,6 @@ func (r *Runner) buildSkillCatalog() string {
 					line += " (uses cli_execute)"
 				}
 				catalogEntries = append(catalogEntries, line)
-
-				// Include full skill instructions when available
-				if entry.Body != "" {
-					catalogEntries = append(catalogEntries, "")
-					catalogEntries = append(catalogEntries, entry.Body)
-					catalogEntries = append(catalogEntries, "")
-				}
 			}
 		}
 	}
@@ -1848,6 +1929,7 @@ func (r *Runner) buildSkillCatalog() string {
 
 	var b strings.Builder
 	b.WriteString("## Available Skills\n\n")
+	b.WriteString("Use `read_skill` to load full instructions for a skill before using it.\n\n")
 	for _, entry := range catalogEntries {
 		b.WriteString(entry)
 		b.WriteString("\n")

diff --git a/forge-core/forgecore_test.go b/forge-core/forgecore_test.go
@@ -794,6 +794,15 @@ func TestNewRuntime_WithToolCalling(t *testing.T) {
 				},
 				FinishReason: "stop",
 			},
+			// The agent loop sends a continuation nudge after the first stop.
+			// Without workflow phases configured, only 1 nudge fires.
+			{
+				Message: llm.ChatMessage{
+					Role:    llm.RoleAssistant,
+					Content: "I fetched the URL and got: ok",
+				},
+				FinishReason: "stop",
+			},
 		},
 	}
 
@@ -825,9 +834,9 @@ func TestNewRuntime_WithToolCalling(t *testing.T) {
 		t.Errorf("response text = %q, want 'I fetched the URL and got: ok'", resp.Parts[0].Text)
 	}
 
-	// Should have made 2 LLM calls
-	if toolCallClient.callIdx != 2 {
-		t.Errorf("LLM was called %d times, want 2", toolCallClient.callIdx)
+	// Should have made 3 LLM calls (tool call + stop + 1 continuation nudge)
+	if toolCallClient.callIdx != 3 {
+		t.Errorf("LLM was called %d times, want 3", toolCallClient.callIdx)
 	}
 }
 
@@ -1064,7 +1073,7 @@ func TestNewRuntime_LLMError(t *testing.T) {
 }
 
 func TestNewRuntime_DefaultMaxIterations(t *testing.T) {
-	// If MaxIterations is 0, should default to 10
+	// If MaxIterations is 0, should default to 50
 	client := &mockLLMClient{
 		response: &llm.ChatResponse{
 			Message: llm.ChatMessage{
@@ -1077,7 +1086,7 @@ func TestNewRuntime_DefaultMaxIterations(t *testing.T) {
 
 	executor := NewRuntime(RuntimeConfig{
 		LLMClient: client,
-		// MaxIterations: 0 -> defaults to 10
+		// MaxIterations: 0 -> defaults to 50
 	})
 
 	if executor == nil {
@@ -1325,6 +1334,14 @@ func TestIntegration_CompileWithToolCallLoop(t *testing.T) {
 				},
 				FinishReason: "stop",
 			},
+			// Continuation nudge: without workflow phases, only 1 nudge fires.
+			{
+				Message: llm.ChatMessage{
+					Role:    llm.RoleAssistant,
+					Content: "Found and fetched the result",
+				},
+				FinishReason: "stop",
+			},
 		},
 	}
 
@@ -1354,8 +1371,8 @@ func TestIntegration_CompileWithToolCallLoop(t *testing.T) {
 	if resp.Parts[0].Text != "Found and fetched the result" {
 		t.Errorf("response text = %q", resp.Parts[0].Text)
 	}
-	if toolCallClient.callIdx != 3 {
-		t.Errorf("LLM was called %d times, want 3", toolCallClient.callIdx)
+	if toolCallClient.callIdx != 4 {
+		t.Errorf("LLM was called %d times, want 4", toolCallClient.callIdx)
 	}
 }
 

diff --git a/forge-core/llm/providers/responses.go b/forge-core/llm/providers/responses.go
@@ -222,7 +222,7 @@ func (c *ResponsesClient) buildRequest(req *llm.ChatRequest, stream bool) respon
 				inputs = append(inputs, responsesInput{
 					Type:      "function_call",
 					CallID:    tc.ID,
-					ID:        tc.ID,
+					ID:        responsesItemID(tc.ID),
 					Name:      tc.Function.Name,
 					Arguments: tc.Function.Arguments,
 				})
@@ -249,6 +249,13 @@ func (c *ResponsesClient) buildRequest(req *llm.ChatRequest, stream bool) respon
 		})
 	}
 
+	// The Responses API requires the instructions field. If no system
+	// message was provided (e.g. summarization calls), use a minimal default
+	// so the request doesn't fail with "Instructions are required".
+	if instructions == "" {
+		instructions = "You are a helpful assistant."
+	}
+
 	r := responsesRequest{
 		Model:        model,
 		Instructions: instructions,
@@ -321,6 +328,15 @@ type streamCompleted struct {
 	Response responsesResponse `json:"response"`
 }
 
+// responsesItemID ensures a tool call ID has the "fc_" prefix required by
+// the Responses API for function_call item IDs.
+func responsesItemID(id string) string {
+	if strings.HasPrefix(id, "fc_") {
+		return id
+	}
+	return "fc_" + strings.TrimPrefix(id, "call_")
+}
+
 func (c *ResponsesClient) readStream(r io.Reader, ch chan<- llm.StreamDelta) {
 	// Track function calls being built so we can emit them with correct IDs
 	type pendingFC struct {

diff --git a/forge-core/runtime/config.go b/forge-core/runtime/config.go
@@ -107,7 +107,7 @@ func ResolveModelConfig(cfg *types.ForgeConfig, envVars map[string]string, provi
 func defaultModelForProvider(provider string) string {
 	switch provider {
 	case "openai":
-		return "gpt-5.2-2025-12-11"
+		return "gpt-5.4"
 	case "anthropic":
 		return "claude-sonnet-4-20250514"
 	case "gemini":

diff --git a/forge-core/runtime/config_test.go b/forge-core/runtime/config_test.go
@@ -180,7 +180,7 @@ func TestResolveModelConfig_OrgIDFromYAML(t *testing.T) {
 	cfg := &types.ForgeConfig{
 		Model: types.ModelRef{
 			Provider:       "openai",
-			Name:           "gpt-5.2-2025-12-11",
+			Name:           "gpt-5.4",
 			OrganizationID: "org-yaml-123",
 		},
 	}
@@ -201,7 +201,7 @@ func TestResolveModelConfig_OrgIDEnvOverridesYAML(t *testing.T) {
 	cfg := &types.ForgeConfig{
 		Model: types.ModelRef{
 			Provider:       "openai",
-			Name:           "gpt-5.2-2025-12-11",
+			Name:           "gpt-5.4",
 			OrganizationID: "org-yaml-123",
 		},
 	}
@@ -303,7 +303,7 @@ func TestDefaultModelForProvider(t *testing.T) {
 		provider string
 		expected string
 	}{
-		{"openai", "gpt-5.2-2025-12-11"},
+		{"openai", "gpt-5.4"},
 		{"anthropic", "claude-sonnet-4-20250514"},
 		{"gemini", "gemini-2.5-flash"},
 		{"ollama", "llama3"},