diff --git a/examples/curl_fetch.clad.toml b/examples/curl_fetch.clad.toml index 46efb48..90e8b27 100644 --- a/examples/curl_fetch.clad.toml +++ b/examples/curl_fetch.clad.toml @@ -19,6 +19,9 @@ required = true type = "url" schemes = ["http", "https"] scope_check = true +# curl fetches this URL — default-deny internal/SSRF hosts (loopback, private, +# cloud metadata). Use an allowlist if internal fetches are genuinely required. +block_internal = true description = "URL to fetch" [args.method] diff --git a/examples/nmap_scan.clad.toml b/examples/nmap_scan.clad.toml index 60e608b..67ea6b7 100644 --- a/examples/nmap_scan.clad.toml +++ b/examples/nmap_scan.clad.toml @@ -24,6 +24,9 @@ hash = "sha256" position = 1 required = true type = "scope_target" +# nmap connects to this target — default-deny internal/SSRF ranges (loopback, +# private, cloud metadata). Use an allowlist for legitimate internal scans. +block_internal = true description = "Target CIDR, IP, or hostname" [args.scan_type] diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 1190b11..14f0895 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -108,6 +108,38 @@ pub fn parse_manifest(toml_str: &str) -> Result { Ok(manifest) } +/// Binaries that connect to a network destination derived from their arguments +/// — the ones for which an unconstrained `scope_target`/`url` is an SSRF surface. +const EGRESS_BINARIES: &[&str] = &[ + "curl", "wget", "nmap", "nc", "ncat", "netcat", "masscan", "telnet", "ssh", + "ping", "fping", "httpie", "http", "wfuzz", "ffuf", "hydra", "nikto", +]; + +fn command_basename(s: &str) -> String { + s.rsplit(['/', '\\']).next().unwrap_or(s).to_ascii_lowercase() +} + +/// True if the manifest's command dispatches one of the known egress binaries +/// (checked across `tool.binary`, `command.exec[0]`, and `command.template`). +fn command_invokes_egress_binary(m: &Manifest) -> bool { + let mut names: Vec = Vec::new(); + if !m.tool.binary.is_empty() { + names.push(command_basename(&m.tool.binary)); + } + if let Some(first) = m.command.exec.as_ref().and_then(|e| e.first()) { + names.push(command_basename(first)); + } + if let Some(first) = m + .command + .template + .as_ref() + .and_then(|t| t.split_whitespace().next()) + { + names.push(command_basename(first)); + } + names.iter().any(|n| EGRESS_BINARIES.contains(&n.as_str())) +} + /// Validate internal consistency of a parsed manifest. fn validate_manifest(manifest: &Manifest) -> Result<(), ToolCladError> { // Validate dispatch mode. @@ -160,6 +192,21 @@ fn validate_manifest(manifest: &Manifest) -> Result<(), ToolCladError> { } } + // Egress safety: a tool that connects to a network target must default-deny + // internal/SSRF addresses. If the command invokes a known network-egress + // binary and a scope_target/url arg does not set block_internal, refuse the + // manifest. (HTTP/MCP/browser backends are separate egress surfaces.) + if command_invokes_egress_binary(manifest) { + for (name, def) in &manifest.args { + if matches!(def.type_name.as_str(), "scope_target" | "url") && !def.block_internal { + return Err(ToolCladError::ManifestError(format!( + "argument '{name}' is a network target for an egress binary but block_internal is not set; \ + set block_internal = true to default-deny internal/SSRF targets" + ))); + } + } + } + // Validate that mapping keys reference existing args. if let Some(ref mappings) = manifest.command.mappings { for arg_name in mappings.keys() { @@ -438,6 +485,7 @@ risk_tier = "low" position = 1 required = true type = "scope_target" +block_internal = true description = "Target" [args.scan_type] @@ -476,6 +524,50 @@ type = "object" assert_eq!(mappings["scan_type"]["ping"], "-sn -PE"); } + #[test] + fn test_egress_binary_requires_block_internal() { + // curl on a url arg WITHOUT block_internal must be refused at load. + let bad = r#" +[tool] +name = "fetch" +version = "1.0.0" +binary = "curl" +description = "x" +[args.url] +position = 1 +required = true +type = "url" +[command] +template = "curl {url}" +[output] +format = "text" +"#; + let err = parse_manifest(bad).unwrap_err().to_string(); + assert!(err.contains("block_internal"), "unexpected: {err}"); + + // Same manifest WITH block_internal loads fine. + let good = bad.replace("type = \"url\"", "type = \"url\"\nblock_internal = true"); + assert!(parse_manifest(&good).is_ok()); + + // Non-egress binary (whois) does not require the flag. + let whois = r#" +[tool] +name = "whois" +version = "1.0.0" +binary = "whois" +description = "x" +[args.target] +position = 1 +required = true +type = "scope_target" +[command] +template = "whois {target}" +[output] +format = "text" +"#; + assert!(parse_manifest(whois).is_ok()); + } + #[test] fn test_manifest_missing_command() { let toml_str = r#" diff --git a/rust/src/validator.rs b/rust/src/validator.rs index 557e1d1..8c9001e 100644 --- a/rust/src/validator.rs +++ b/rust/src/validator.rs @@ -69,7 +69,7 @@ pub fn validate_arg(name: &str, def: &ArgDef, value: &str) -> Result validate_boolean(name, val), "enum" => validate_enum(name, def, val), "scope_target" => validate_scope_target(name, def, val), - "url" => validate_url(name, val), + "url" => validate_url(name, def, val), "path" => validate_path(name, val), "ip_address" => validate_ip_address(name, val), "cidr" => validate_cidr(name, val), @@ -296,26 +296,14 @@ fn validate_scope_target(name: &str, def: &ArgDef, val: &str) -> Result() { - if def.block_internal { - if let Some(reason) = non_public_ip_reason(&IpAddr::V4(base)) { - return Err(ToolCladError::ValidationError(format!( - "argument '{name}' scope_target CIDR base {reason} (blocked by block_internal policy)" - ))); - } - } + enforce_ip_policy(name, &IpAddr::V4(base), def.block_internal)?; return Ok(val.to_string()); } } @@ -371,6 +359,48 @@ fn tld_not_all_numeric(val: &str) -> bool { } } +/// Reason if `ip` must be blocked regardless of policy: link-local +/// (169.254.0.0/16, fe80::/10), which includes the cloud-metadata endpoint +/// 169.254.169.254. No agent tool has a legitimate reason to reach these, and +/// the blast radius (instance-credential theft) is maximal — so block always, +/// even when `block_internal` is off. v4-mapped IPv6 is unwrapped. +fn always_blocked_ip_reason(ip: &IpAddr) -> Option<&'static str> { + let v4 = match ip { + IpAddr::V4(a) => Some(*a), + IpAddr::V6(a) => a.to_ipv4_mapped(), + }; + if let Some(a) = v4 { + return a + .is_link_local() + .then_some("is a link-local / cloud-metadata address (169.254.0.0/16) — always blocked"); + } + if let IpAddr::V6(a) = ip { + if (a.segments()[0] & 0xffc0) == 0xfe80 { + return Some("is an IPv6 link-local address (fe80::/10) — always blocked"); + } + } + None +} + +/// Enforce the egress policy for a resolved target IP: link-local / metadata is +/// always blocked; the remaining non-public ranges are blocked only under the +/// opt-in `block_internal` policy. +fn enforce_ip_policy(name: &str, ip: &IpAddr, block_internal: bool) -> Result<(), ToolCladError> { + if let Some(reason) = always_blocked_ip_reason(ip) { + return Err(ToolCladError::ValidationError(format!( + "argument '{name}' target {reason}" + ))); + } + if block_internal { + if let Some(reason) = non_public_ip_reason(ip) { + return Err(ToolCladError::ValidationError(format!( + "argument '{name}' target {reason} (blocked by block_internal policy)" + ))); + } + } + Ok(()) +} + /// Returns a reason string if `ip` is a non-public address that SSRF defenses /// should block: loopback, private, link-local (incl. cloud metadata), etc. /// IPv6 v4-mapped addresses are unwrapped so `::ffff:127.0.0.1` is caught. @@ -421,15 +451,34 @@ fn has_punycode_label(host: &str) -> bool { .any(|label| label.len() >= 4 && label.as_bytes()[..4].eq_ignore_ascii_case(b"xn--")) } -fn validate_url(name: &str, val: &str) -> Result { +fn validate_url(name: &str, def: &ArgDef, val: &str) -> Result { reject_injection(name, val)?; - if URL_RE.is_match(val) { - Ok(val.to_string()) - } else { - Err(ToolCladError::ValidationError(format!( + if !URL_RE.is_match(val) { + return Err(ToolCladError::ValidationError(format!( "argument '{name}' is not a valid URL" - ))) + ))); + } + // Apply the same host hygiene as scope_target — curl/wget-style fetchers are + // the prime SSRF vector. URL_RE guarantees `scheme://host[/path]` with a + // simple host (no port/userinfo/brackets), so the host is everything between + // "://" and the first "/". + let host = val + .split_once("://") + .map(|(_, rest)| rest.split('/').next().unwrap_or("")) + .unwrap_or(""); + if has_punycode_label(host) { + return Err(ToolCladError::ValidationError(format!( + "argument '{name}' URL host must not contain punycode (xn--) labels" + ))); + } + if let Some(ip) = parse_canonical_ip(host) { + enforce_ip_policy(name, &ip, def.block_internal)?; + } else if looks_like_ip_literal(host) { + return Err(ToolCladError::ValidationError(format!( + "argument '{name}' URL host looks like a non-canonical IP literal; use dotted-quad IPv4 or a hostname" + ))); } + Ok(val.to_string()) } fn validate_path(name: &str, val: &str) -> Result { @@ -874,32 +923,40 @@ mod tests { #[test] fn test_scope_target_block_internal_off_allows_internal_by_default() { - // Default policy is permissive: tools that legitimately target internal - // hosts keep working. block_internal must be opted into. + // Default policy is permissive for loopback/private — tools that + // legitimately target internal hosts keep working without opt-in. let def = make_arg("scope_target"); assert!(validate_arg("t", &def, "127.0.0.1").is_ok()); - assert!(validate_arg("t", &def, "169.254.169.254").is_ok()); assert!(validate_arg("t", &def, "10.0.0.5").is_ok()); assert!(validate_arg("t", &def, "::1").is_ok()); + // ...but link-local / cloud metadata is blocked UNCONDITIONALLY. + assert!(validate_arg("t", &def, "169.254.169.254").is_err()); + } + + #[test] + fn test_scope_target_blocks_metadata_unconditionally() { + let def = make_arg("scope_target"); // block_internal = false + for v in ["169.254.169.254", "169.254.0.1", "::ffff:169.254.169.254", "fe80::1"] { + let err = validate_arg("t", &def, v).unwrap_err().to_string(); + assert!(err.contains("always blocked"), "{v} -> {err}"); + } } #[test] fn test_scope_target_block_internal_on_rejects_non_public() { let def = blocking_arg(); - // loopback / private / link-local (cloud metadata) / unspecified for v in [ "127.0.0.1", "10.0.0.5", "192.168.1.1", - "169.254.169.254", // IMDS + "169.254.169.254", // IMDS (always-block path) "0.0.0.0", "::1", "::ffff:127.0.0.1", // v4-mapped loopback must be unwrapped "fe80::1", // IPv6 link-local "fc00::1", // IPv6 unique-local ] { - let err = validate_arg("t", &def, v).unwrap_err().to_string(); - assert!(err.contains("block_internal"), "{v} -> {err}"); + assert!(validate_arg("t", &def, v).is_err(), "{v} should be blocked"); } // Public addresses and hostnames still pass under the policy. assert!(validate_arg("t", &def, "93.184.216.34").is_ok()); @@ -908,6 +965,26 @@ mod tests { assert!(validate_arg("t", &def, "10.0.0.0/24").is_err()); } + #[test] + fn test_url_host_hardening() { + let public = make_arg("url"); + // valid public URL passes + assert!(validate_arg("u", &public, "https://example.com/path").is_ok()); + assert!(validate_arg("u", &public, "http://93.184.216.34/x").is_ok()); + // cloud metadata blocked unconditionally, even without block_internal + assert!(validate_arg("u", &public, "http://169.254.169.254/latest/meta-data/").is_err()); + // obfuscated IP hosts rejected + assert!(validate_arg("u", &public, "http://0x7f000001/").is_err()); + assert!(validate_arg("u", &public, "http://2130706433/").is_err()); + // punycode host rejected + assert!(validate_arg("u", &public, "http://xn--example-9c.com/").is_err()); + // loopback allowed only when block_internal is off + assert!(validate_arg("u", &public, "http://127.0.0.1/").is_ok()); + let blocking = ArgDef { type_name: "url".to_string(), block_internal: true, ..Default::default() }; + assert!(validate_arg("u", &blocking, "http://127.0.0.1/").is_err()); + assert!(validate_arg("u", &blocking, "http://10.0.0.5/x").is_err()); + } + #[test] fn test_scope_target_specific_traversal_message() { // ../../etc/passwd previously collapsed into the generic