initializ · initializ-mk · Mar 11, 2026 · Mar 6, 2026 · Mar 11, 2026
diff --git a/forge-cli/build/policy_stage.go b/forge-cli/build/policy_stage.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/initializ/forge/forge-core/agentspec"
 	"github.com/initializ/forge/forge-core/pipeline"
+	"github.com/initializ/forge/forge-skills/contract"
 )
 
 // PolicyStage generates the policy scaffold file.
@@ -31,6 +32,41 @@ func (s *PolicyStage) Execute(ctx context.Context, bc *pipeline.BuildContext) er
 		}
 	}
 
+	// Inject aggregated skill guardrails if present
+	if bc.SkillRequirements != nil {
+		if reqs, ok := bc.SkillRequirements.(*contract.AggregatedRequirements); ok && reqs.SkillGuardrails != nil {
+			sg := reqs.SkillGuardrails
+			rules := &agentspec.SkillGuardrailRules{}
+			for _, c := range sg.DenyCommands {
+				rules.DenyCommands = append(rules.DenyCommands, agentspec.CommandFilter{
+					Pattern: c.Pattern,
+					Message: c.Message,
+				})
+			}
+			for _, o := range sg.DenyOutput {
+				rules.DenyOutput = append(rules.DenyOutput, agentspec.OutputFilter{
+					Pattern: o.Pattern,
+					Action:  o.Action,
+				})
+			}
+			for _, p := range sg.DenyPrompts {
+				rules.DenyPrompts = append(rules.DenyPrompts, agentspec.CommandFilter{
+					Pattern: p.Pattern,
+					Message: p.Message,
+				})
+			}
+			for _, r := range sg.DenyResponses {
+				rules.DenyResponses = append(rules.DenyResponses, agentspec.CommandFilter{
+					Pattern: r.Pattern,
+					Message: r.Message,
+				})
+			}
+			if len(rules.DenyCommands) > 0 || len(rules.DenyOutput) > 0 || len(rules.DenyPrompts) > 0 || len(rules.DenyResponses) > 0 {
+				bc.Spec.PolicyScaffold.SkillGuardrails = rules
+			}
+		}
+	}
+
 	data, err := json.MarshalIndent(bc.Spec.PolicyScaffold, "", "  ")
 	if err != nil {
 		return fmt.Errorf("marshalling policy scaffold: %w", err)

diff --git a/forge-cli/runtime/runner.go b/forge-cli/runtime/runner.go
@@ -59,12 +59,13 @@ type Runner struct {
 	cfg              RunnerConfig
 	logger           coreruntime.Logger
 	cliExecTool      *clitools.CLIExecuteTool
-	modelConfig      *coreruntime.ModelConfig   // resolved model config (for banner)
-	derivedCLIConfig *contract.DerivedCLIConfig // auto-derived from skill requirements
-	sched            *scheduler.Scheduler       // cron scheduler (nil until started)
-	startTime        time.Time                  // server start time (for /health uptime)
-	scheduleNotifier ScheduleNotifier           // optional: delivers cron results to channels
-	authToken        string                     // resolved auth token (empty if --no-auth)
+	modelConfig      *coreruntime.ModelConfig       // resolved model config (for banner)
+	derivedCLIConfig *contract.DerivedCLIConfig     // auto-derived from skill requirements
+	skillGuardrails  *agentspec.SkillGuardrailRules // runtime-parsed skill guardrails (fallback when no build artifact)
+	sched            *scheduler.Scheduler           // cron scheduler (nil until started)
+	startTime        time.Time                      // server start time (for /health uptime)
+	scheduleNotifier ScheduleNotifier               // optional: delivers cron results to channels
+	authToken        string                         // resolved auth token (empty if --no-auth)
 }
 
 // NewRunner creates a Runner from the given config.
@@ -379,6 +380,17 @@ func (r *Runner) Run(ctx context.Context) error {
 					r.registerProgressHooks(hooks)
 					r.registerGuardrailHooks(hooks, guardrails)
 
+					// Register skill-level guardrails if present.
+					// Prefer build-time artifact; fall back to runtime-parsed guardrails.
+					sgRules := scaffold.SkillGuardrails
+					if sgRules == nil {
+						sgRules = r.skillGuardrails
+					}
+					if sgRules != nil {
+						sg := coreruntime.NewSkillGuardrailEngine(sgRules, r.cfg.EnforceGuardrails, r.logger)
+						r.registerSkillGuardrailHooks(hooks, sg)
+					}
+
 					// Compute model-aware character budget.
 					charBudget := r.cfg.Config.Memory.CharBudget
 					if charBudget == 0 {
@@ -1388,6 +1400,46 @@ func (r *Runner) registerGuardrailHooks(hooks *coreruntime.HookRegistry, guardra
 	})
 }
 
+// registerSkillGuardrailHooks registers hooks that enforce skill-declared deny
+// patterns on user prompts (BeforeLLMCall), command inputs (BeforeToolExec),
+// and tool outputs (AfterToolExec).
+func (r *Runner) registerSkillGuardrailHooks(hooks *coreruntime.HookRegistry, sg *coreruntime.SkillGuardrailEngine) {
+	// Block capability-enumeration and other denied prompts before the LLM sees them.
+	hooks.Register(coreruntime.BeforeLLMCall, func(_ context.Context, hctx *coreruntime.HookContext) error {
+		if len(hctx.Messages) == 0 {
+			return nil
+		}
+		// Check only the latest user message.
+		last := hctx.Messages[len(hctx.Messages)-1]
+		if last.Role == "user" {
+			return sg.CheckUserInput(last.Content)
+		}
+		return nil
+	})
+	hooks.Register(coreruntime.BeforeToolExec, func(_ context.Context, hctx *coreruntime.HookContext) error {
+		return sg.CheckCommandInput(hctx.ToolName, hctx.ToolInput)
+	})
+	hooks.Register(coreruntime.AfterToolExec, func(_ context.Context, hctx *coreruntime.HookContext) error {
+		redacted, err := sg.CheckCommandOutput(hctx.ToolName, hctx.ToolOutput)
+		if err != nil {
+			return err
+		}
+		hctx.ToolOutput = redacted
+		return nil
+	})
+	// Rewrite LLM responses that enumerate binary names or internal tooling.
+	hooks.Register(coreruntime.AfterLLMCall, func(_ context.Context, hctx *coreruntime.HookContext) error {
+		if hctx.Response == nil {
+			return nil
+		}
+		replaced, changed := sg.CheckLLMResponse(hctx.Response.Message.Content)
+		if changed {
+			hctx.Response.Message.Content = replaced
+		}
+		return nil
+	})
+}
+
 // buildLLMClient creates the LLM client from the resolved model config.
 // If fallback providers are configured, wraps them in a FallbackChain.
 func (r *Runner) buildLLMClient(mc *coreruntime.ModelConfig) (llm.Client, error) {
@@ -1771,9 +1823,12 @@ func (r *Runner) buildSkillCatalog() string {
 
 			if entry.Name != "" && entry.Description != "" {
 				line := fmt.Sprintf("- %s: %s", entry.Name, entry.Description)
-				// Add tool hint when skill requires specific binaries
+				// Note that skill uses cli_execute without listing specific
+				// binary names — the LLM already sees the allowed enum in the
+				// tool schema, and listing names here leaks internal tooling
+				// when users ask "what skills/tools do you have?"
 				if entry.ForgeReqs != nil && len(entry.ForgeReqs.Bins) > 0 {
-					line += fmt.Sprintf(" (use cli_execute with: %s)", strings.Join(entry.ForgeReqs.Bins, ", "))
+					line += " (uses cli_execute)"
 				}
 				catalogEntries = append(catalogEntries, line)
 
@@ -1826,6 +1881,13 @@ func (r *Runner) validateSkillRequirements(envVars map[string]string) error {
 	entries := allEntries
 
 	reqs := requirements.AggregateRequirements(entries)
+
+	// Store runtime-parsed skill guardrails early so they are available at
+	// hook registration even when no bins/env requirements exist.
+	if reqs.SkillGuardrails != nil {
+		r.skillGuardrails = convertSkillGuardrails(reqs.SkillGuardrails)
+	}
+
 	if len(reqs.Bins) == 0 && len(reqs.EnvRequired) == 0 && len(reqs.EnvOneOf) == 0 && len(reqs.EnvOptional) == 0 {
 		return nil
 	}
@@ -1881,6 +1943,41 @@ func (r *Runner) validateSkillRequirements(envVars map[string]string) error {
 	return nil
 }
 
+// convertSkillGuardrails converts skill-contract guardrail config into the
+// agentspec representation used by the guardrail engine. This mirrors the
+// conversion in build/policy_stage.go for the runtime (no-build) path.
+func convertSkillGuardrails(sg *contract.SkillGuardrailConfig) *agentspec.SkillGuardrailRules {
+	rules := &agentspec.SkillGuardrailRules{}
+	for _, c := range sg.DenyCommands {
+		rules.DenyCommands = append(rules.DenyCommands, agentspec.CommandFilter{
+			Pattern: c.Pattern,
+			Message: c.Message,
+		})
+	}
+	for _, o := range sg.DenyOutput {
+		rules.DenyOutput = append(rules.DenyOutput, agentspec.OutputFilter{
+			Pattern: o.Pattern,
+			Action:  o.Action,
+		})
+	}
+	for _, p := range sg.DenyPrompts {
+		rules.DenyPrompts = append(rules.DenyPrompts, agentspec.CommandFilter{
+			Pattern: p.Pattern,
+			Message: p.Message,
+		})
+	}
+	for _, r := range sg.DenyResponses {
+		rules.DenyResponses = append(rules.DenyResponses, agentspec.CommandFilter{
+			Pattern: r.Pattern,
+			Message: r.Message,
+		})
+	}
+	if len(rules.DenyCommands) == 0 && len(rules.DenyOutput) == 0 && len(rules.DenyPrompts) == 0 && len(rules.DenyResponses) == 0 {
+		return nil
+	}
+	return rules
+}
+
 func envFromOS() map[string]string {
 	env := make(map[string]string)
 	for _, e := range os.Environ() {

diff --git a/forge-cli/tools/cli_execute.go b/forge-cli/tools/cli_execute.go
@@ -71,6 +71,17 @@ func NewCLIExecuteTool(config CLIExecuteConfig) *CLIExecuteTool {
 	}
 	homeDir := os.Getenv("HOME")
 
+	// Filter denied shells from the allowed list before constructing the
+	// tool. Execute() blocks them at runtime, but including them in the
+	// schema/description causes the LLM to hallucinate they are available.
+	filtered := make([]string, 0, len(config.AllowedBinaries))
+	for _, bin := range config.AllowedBinaries {
+		if !deniedShells[bin] {
+			filtered = append(filtered, bin)
+		}
+	}
+	config.AllowedBinaries = filtered
+
 	t := &CLIExecuteTool{
 		config:      config,
 		allowedSet:  make(map[string]bool, len(config.AllowedBinaries)),
@@ -99,12 +110,14 @@ func (t *CLIExecuteTool) Name() string { return "cli_execute" }
 // Category returns CategoryBuiltin.
 func (t *CLIExecuteTool) Category() coretools.Category { return coretools.CategoryBuiltin }
 
-// Description returns a dynamic description listing available binaries.
+// Description returns a description of the tool. Binary names are deliberately
+// omitted — listing them here causes the LLM to regurgitate them when users
+// ask capability questions. The LLM discovers allowed binaries from the schema enum.
 func (t *CLIExecuteTool) Description() string {
 	if len(t.available) == 0 {
-		return "Execute pre-approved CLI binaries (none available)"
+		return "Execute CLI commands for skill operations (none available)"
 	}
-	return fmt.Sprintf("Execute pre-approved CLI binaries: %s", strings.Join(t.available, ", "))
+	return "Execute CLI commands for skill operations. Use the binary field's allowed values from the schema."
 }
 
 // InputSchema returns a dynamic JSON schema with the binary field's enum
@@ -299,6 +312,10 @@ func validateArg(arg string) error {
 	if strings.ContainsAny(arg, "\n\r") {
 		return fmt.Errorf("argument contains newline: %q", arg)
 	}
+	// Defense-in-depth: block file:// URLs which can read the host filesystem.
+	if strings.Contains(strings.ToLower(arg), "file://") {
+		return fmt.Errorf("argument contains file:// protocol: %q", arg)
+	}
 	return nil
 }
 

diff --git a/forge-cli/tools/cli_execute_test.go b/forge-cli/tools/cli_execute_test.go
@@ -147,6 +147,49 @@ func TestCLIExecute_ShellInjection(t *testing.T) {
 	}
 }
 
+func TestCLIExecute_FileProtocolBlocked(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("echo behavior differs on Windows")
+	}
+
+	tool := NewCLIExecuteTool(CLIExecuteConfig{
+		AllowedBinaries: []string{"echo"},
+	})
+
+	tests := []struct {
+		name    string
+		arg     string
+		blocked bool
+	}{
+		{"file_lower", "file:///etc/passwd", true},
+		{"file_upper", "FILE:///etc/shadow", true},
+		{"file_mixed", "File:///etc/hosts", true},
+		{"http_allowed", "http://example.com", false},
+		{"https_allowed", "https://example.com", false},
+		{"plain_arg", "get", false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			args, _ := json.Marshal(cliExecuteArgs{
+				Binary: "echo",
+				Args:   []string{tt.arg},
+			})
+
+			_, err := tool.Execute(context.Background(), args)
+			if tt.blocked && err == nil {
+				t.Errorf("Execute() expected error for %q, got nil", tt.arg)
+			}
+			if tt.blocked && err != nil && !strings.Contains(err.Error(), "file:// protocol") {
+				t.Errorf("error = %q, want it to mention 'file:// protocol'", err.Error())
+			}
+			if !tt.blocked && err != nil {
+				t.Errorf("Execute() unexpected error for %q: %v", tt.arg, err)
+			}
+		})
+	}
+}
+
 func TestCLIExecute_Timeout(t *testing.T) {
 	if runtime.GOOS == "windows" {
 		t.Skip("sleep not available on Windows")

diff --git a/forge-core/agentspec/policy_scaffold.go b/forge-core/agentspec/policy_scaffold.go
@@ -2,11 +2,32 @@ package agentspec
 
 // PolicyScaffold defines the policy and guardrail configuration for an agent.
 type PolicyScaffold struct {
-	Guardrails []Guardrail `json:"guardrails,omitempty" bson:"guardrails,omitempty" yaml:"guardrails,omitempty"`
+	Guardrails      []Guardrail          `json:"guardrails,omitempty" bson:"guardrails,omitempty" yaml:"guardrails,omitempty"`
+	SkillGuardrails *SkillGuardrailRules `json:"skill_guardrails,omitempty" bson:"skill_guardrails,omitempty" yaml:"skill_guardrails,omitempty"`
 }
 
 // Guardrail defines a single guardrail rule applied to an agent.
 type Guardrail struct {
 	Type   string         `json:"type" bson:"type" yaml:"type"`
 	Config map[string]any `json:"config,omitempty" bson:"config,omitempty" yaml:"config,omitempty"`
 }
+
+// SkillGuardrailRules holds aggregated skill-level deny patterns.
+type SkillGuardrailRules struct {
+	DenyCommands  []CommandFilter `json:"deny_commands,omitempty"`
+	DenyOutput    []OutputFilter  `json:"deny_output,omitempty"`
+	DenyPrompts   []CommandFilter `json:"deny_prompts,omitempty"`
+	DenyResponses []CommandFilter `json:"deny_responses,omitempty"`
+}
+
+// CommandFilter blocks tool execution when the command matches.
+type CommandFilter struct {
+	Pattern string `json:"pattern"`
+	Message string `json:"message"`
+}
+
+// OutputFilter blocks or redacts tool output matching a pattern.
+type OutputFilter struct {
+	Pattern string `json:"pattern"`
+	Action  string `json:"action"` // "block" or "redact"
+}