Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions forge-cli/build/policy_stage.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/initializ/forge/forge-core/agentspec"
"github.com/initializ/forge/forge-core/pipeline"
"github.com/initializ/forge/forge-skills/contract"
)

// PolicyStage generates the policy scaffold file.
Expand All @@ -31,6 +32,41 @@ func (s *PolicyStage) Execute(ctx context.Context, bc *pipeline.BuildContext) er
}
}

// Inject aggregated skill guardrails if present
if bc.SkillRequirements != nil {
if reqs, ok := bc.SkillRequirements.(*contract.AggregatedRequirements); ok && reqs.SkillGuardrails != nil {
sg := reqs.SkillGuardrails
rules := &agentspec.SkillGuardrailRules{}
for _, c := range sg.DenyCommands {
rules.DenyCommands = append(rules.DenyCommands, agentspec.CommandFilter{
Pattern: c.Pattern,
Message: c.Message,
})
}
for _, o := range sg.DenyOutput {
rules.DenyOutput = append(rules.DenyOutput, agentspec.OutputFilter{
Pattern: o.Pattern,
Action: o.Action,
})
}
for _, p := range sg.DenyPrompts {
rules.DenyPrompts = append(rules.DenyPrompts, agentspec.CommandFilter{
Pattern: p.Pattern,
Message: p.Message,
})
}
for _, r := range sg.DenyResponses {
rules.DenyResponses = append(rules.DenyResponses, agentspec.CommandFilter{
Pattern: r.Pattern,
Message: r.Message,
})
}
if len(rules.DenyCommands) > 0 || len(rules.DenyOutput) > 0 || len(rules.DenyPrompts) > 0 || len(rules.DenyResponses) > 0 {
bc.Spec.PolicyScaffold.SkillGuardrails = rules
}
}
}

data, err := json.MarshalIndent(bc.Spec.PolicyScaffold, "", " ")
if err != nil {
return fmt.Errorf("marshalling policy scaffold: %w", err)
Expand Down
113 changes: 105 additions & 8 deletions forge-cli/runtime/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,13 @@ type Runner struct {
cfg RunnerConfig
logger coreruntime.Logger
cliExecTool *clitools.CLIExecuteTool
modelConfig *coreruntime.ModelConfig // resolved model config (for banner)
derivedCLIConfig *contract.DerivedCLIConfig // auto-derived from skill requirements
sched *scheduler.Scheduler // cron scheduler (nil until started)
startTime time.Time // server start time (for /health uptime)
scheduleNotifier ScheduleNotifier // optional: delivers cron results to channels
authToken string // resolved auth token (empty if --no-auth)
modelConfig *coreruntime.ModelConfig // resolved model config (for banner)
derivedCLIConfig *contract.DerivedCLIConfig // auto-derived from skill requirements
skillGuardrails *agentspec.SkillGuardrailRules // runtime-parsed skill guardrails (fallback when no build artifact)
sched *scheduler.Scheduler // cron scheduler (nil until started)
startTime time.Time // server start time (for /health uptime)
scheduleNotifier ScheduleNotifier // optional: delivers cron results to channels
authToken string // resolved auth token (empty if --no-auth)
}

// NewRunner creates a Runner from the given config.
Expand Down Expand Up @@ -379,6 +380,17 @@ func (r *Runner) Run(ctx context.Context) error {
r.registerProgressHooks(hooks)
r.registerGuardrailHooks(hooks, guardrails)

// Register skill-level guardrails if present.
// Prefer build-time artifact; fall back to runtime-parsed guardrails.
sgRules := scaffold.SkillGuardrails
if sgRules == nil {
sgRules = r.skillGuardrails
}
if sgRules != nil {
sg := coreruntime.NewSkillGuardrailEngine(sgRules, r.cfg.EnforceGuardrails, r.logger)
r.registerSkillGuardrailHooks(hooks, sg)
}

// Compute model-aware character budget.
charBudget := r.cfg.Config.Memory.CharBudget
if charBudget == 0 {
Expand Down Expand Up @@ -1388,6 +1400,46 @@ func (r *Runner) registerGuardrailHooks(hooks *coreruntime.HookRegistry, guardra
})
}

// registerSkillGuardrailHooks registers hooks that enforce skill-declared deny
// patterns on user prompts (BeforeLLMCall), command inputs (BeforeToolExec),
// and tool outputs (AfterToolExec).
func (r *Runner) registerSkillGuardrailHooks(hooks *coreruntime.HookRegistry, sg *coreruntime.SkillGuardrailEngine) {
// Block capability-enumeration and other denied prompts before the LLM sees them.
hooks.Register(coreruntime.BeforeLLMCall, func(_ context.Context, hctx *coreruntime.HookContext) error {
if len(hctx.Messages) == 0 {
return nil
}
// Check only the latest user message.
last := hctx.Messages[len(hctx.Messages)-1]
if last.Role == "user" {
return sg.CheckUserInput(last.Content)
}
return nil
})
hooks.Register(coreruntime.BeforeToolExec, func(_ context.Context, hctx *coreruntime.HookContext) error {
return sg.CheckCommandInput(hctx.ToolName, hctx.ToolInput)
})
hooks.Register(coreruntime.AfterToolExec, func(_ context.Context, hctx *coreruntime.HookContext) error {
redacted, err := sg.CheckCommandOutput(hctx.ToolName, hctx.ToolOutput)
if err != nil {
return err
}
hctx.ToolOutput = redacted
return nil
})
// Rewrite LLM responses that enumerate binary names or internal tooling.
hooks.Register(coreruntime.AfterLLMCall, func(_ context.Context, hctx *coreruntime.HookContext) error {
if hctx.Response == nil {
return nil
}
replaced, changed := sg.CheckLLMResponse(hctx.Response.Message.Content)
if changed {
hctx.Response.Message.Content = replaced
}
return nil
})
}

// buildLLMClient creates the LLM client from the resolved model config.
// If fallback providers are configured, wraps them in a FallbackChain.
func (r *Runner) buildLLMClient(mc *coreruntime.ModelConfig) (llm.Client, error) {
Expand Down Expand Up @@ -1771,9 +1823,12 @@ func (r *Runner) buildSkillCatalog() string {

if entry.Name != "" && entry.Description != "" {
line := fmt.Sprintf("- %s: %s", entry.Name, entry.Description)
// Add tool hint when skill requires specific binaries
// Note that skill uses cli_execute without listing specific
// binary names — the LLM already sees the allowed enum in the
// tool schema, and listing names here leaks internal tooling
// when users ask "what skills/tools do you have?"
if entry.ForgeReqs != nil && len(entry.ForgeReqs.Bins) > 0 {
line += fmt.Sprintf(" (use cli_execute with: %s)", strings.Join(entry.ForgeReqs.Bins, ", "))
line += " (uses cli_execute)"
}
catalogEntries = append(catalogEntries, line)

Expand Down Expand Up @@ -1826,6 +1881,13 @@ func (r *Runner) validateSkillRequirements(envVars map[string]string) error {
entries := allEntries

reqs := requirements.AggregateRequirements(entries)

// Store runtime-parsed skill guardrails early so they are available at
// hook registration even when no bins/env requirements exist.
if reqs.SkillGuardrails != nil {
r.skillGuardrails = convertSkillGuardrails(reqs.SkillGuardrails)
}

if len(reqs.Bins) == 0 && len(reqs.EnvRequired) == 0 && len(reqs.EnvOneOf) == 0 && len(reqs.EnvOptional) == 0 {
return nil
}
Expand Down Expand Up @@ -1881,6 +1943,41 @@ func (r *Runner) validateSkillRequirements(envVars map[string]string) error {
return nil
}

// convertSkillGuardrails converts skill-contract guardrail config into the
// agentspec representation used by the guardrail engine. This mirrors the
// conversion in build/policy_stage.go for the runtime (no-build) path.
func convertSkillGuardrails(sg *contract.SkillGuardrailConfig) *agentspec.SkillGuardrailRules {
rules := &agentspec.SkillGuardrailRules{}
for _, c := range sg.DenyCommands {
rules.DenyCommands = append(rules.DenyCommands, agentspec.CommandFilter{
Pattern: c.Pattern,
Message: c.Message,
})
}
for _, o := range sg.DenyOutput {
rules.DenyOutput = append(rules.DenyOutput, agentspec.OutputFilter{
Pattern: o.Pattern,
Action: o.Action,
})
}
for _, p := range sg.DenyPrompts {
rules.DenyPrompts = append(rules.DenyPrompts, agentspec.CommandFilter{
Pattern: p.Pattern,
Message: p.Message,
})
}
for _, r := range sg.DenyResponses {
rules.DenyResponses = append(rules.DenyResponses, agentspec.CommandFilter{
Pattern: r.Pattern,
Message: r.Message,
})
}
if len(rules.DenyCommands) == 0 && len(rules.DenyOutput) == 0 && len(rules.DenyPrompts) == 0 && len(rules.DenyResponses) == 0 {
return nil
}
return rules
}

func envFromOS() map[string]string {
env := make(map[string]string)
for _, e := range os.Environ() {
Expand Down
23 changes: 20 additions & 3 deletions forge-cli/tools/cli_execute.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,17 @@ func NewCLIExecuteTool(config CLIExecuteConfig) *CLIExecuteTool {
}
homeDir := os.Getenv("HOME")

// Filter denied shells from the allowed list before constructing the
// tool. Execute() blocks them at runtime, but including them in the
// schema/description causes the LLM to hallucinate they are available.
filtered := make([]string, 0, len(config.AllowedBinaries))
for _, bin := range config.AllowedBinaries {
if !deniedShells[bin] {
filtered = append(filtered, bin)
}
}
config.AllowedBinaries = filtered

t := &CLIExecuteTool{
config: config,
allowedSet: make(map[string]bool, len(config.AllowedBinaries)),
Expand Down Expand Up @@ -99,12 +110,14 @@ func (t *CLIExecuteTool) Name() string { return "cli_execute" }
// Category returns CategoryBuiltin.
func (t *CLIExecuteTool) Category() coretools.Category { return coretools.CategoryBuiltin }

// Description returns a dynamic description listing available binaries.
// Description returns a description of the tool. Binary names are deliberately
// omitted — listing them here causes the LLM to regurgitate them when users
// ask capability questions. The LLM discovers allowed binaries from the schema enum.
func (t *CLIExecuteTool) Description() string {
if len(t.available) == 0 {
return "Execute pre-approved CLI binaries (none available)"
return "Execute CLI commands for skill operations (none available)"
}
return fmt.Sprintf("Execute pre-approved CLI binaries: %s", strings.Join(t.available, ", "))
return "Execute CLI commands for skill operations. Use the binary field's allowed values from the schema."
}

// InputSchema returns a dynamic JSON schema with the binary field's enum
Expand Down Expand Up @@ -299,6 +312,10 @@ func validateArg(arg string) error {
if strings.ContainsAny(arg, "\n\r") {
return fmt.Errorf("argument contains newline: %q", arg)
}
// Defense-in-depth: block file:// URLs which can read the host filesystem.
if strings.Contains(strings.ToLower(arg), "file://") {
return fmt.Errorf("argument contains file:// protocol: %q", arg)
}
return nil
}

Expand Down
43 changes: 43 additions & 0 deletions forge-cli/tools/cli_execute_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,49 @@ func TestCLIExecute_ShellInjection(t *testing.T) {
}
}

func TestCLIExecute_FileProtocolBlocked(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("echo behavior differs on Windows")
}

tool := NewCLIExecuteTool(CLIExecuteConfig{
AllowedBinaries: []string{"echo"},
})

tests := []struct {
name string
arg string
blocked bool
}{
{"file_lower", "file:///etc/passwd", true},
{"file_upper", "FILE:///etc/shadow", true},
{"file_mixed", "File:///etc/hosts", true},
{"http_allowed", "http://example.com", false},
{"https_allowed", "https://example.com", false},
{"plain_arg", "get", false},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
args, _ := json.Marshal(cliExecuteArgs{
Binary: "echo",
Args: []string{tt.arg},
})

_, err := tool.Execute(context.Background(), args)
if tt.blocked && err == nil {
t.Errorf("Execute() expected error for %q, got nil", tt.arg)
}
if tt.blocked && err != nil && !strings.Contains(err.Error(), "file:// protocol") {
t.Errorf("error = %q, want it to mention 'file:// protocol'", err.Error())
}
if !tt.blocked && err != nil {
t.Errorf("Execute() unexpected error for %q: %v", tt.arg, err)
}
})
}
}

func TestCLIExecute_Timeout(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("sleep not available on Windows")
Expand Down
23 changes: 22 additions & 1 deletion forge-core/agentspec/policy_scaffold.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,32 @@ package agentspec

// PolicyScaffold defines the policy and guardrail configuration for an agent.
type PolicyScaffold struct {
Guardrails []Guardrail `json:"guardrails,omitempty" bson:"guardrails,omitempty" yaml:"guardrails,omitempty"`
Guardrails []Guardrail `json:"guardrails,omitempty" bson:"guardrails,omitempty" yaml:"guardrails,omitempty"`
SkillGuardrails *SkillGuardrailRules `json:"skill_guardrails,omitempty" bson:"skill_guardrails,omitempty" yaml:"skill_guardrails,omitempty"`
}

// Guardrail defines a single guardrail rule applied to an agent.
type Guardrail struct {
Type string `json:"type" bson:"type" yaml:"type"`
Config map[string]any `json:"config,omitempty" bson:"config,omitempty" yaml:"config,omitempty"`
}

// SkillGuardrailRules holds aggregated skill-level deny patterns.
type SkillGuardrailRules struct {
DenyCommands []CommandFilter `json:"deny_commands,omitempty"`
DenyOutput []OutputFilter `json:"deny_output,omitempty"`
DenyPrompts []CommandFilter `json:"deny_prompts,omitempty"`
DenyResponses []CommandFilter `json:"deny_responses,omitempty"`
}

// CommandFilter blocks tool execution when the command matches.
type CommandFilter struct {
Pattern string `json:"pattern"`
Message string `json:"message"`
}

// OutputFilter blocks or redacts tool output matching a pattern.
type OutputFilter struct {
Pattern string `json:"pattern"`
Action string `json:"action"` // "block" or "redact"
}
Loading
Loading