diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d93600e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,44 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +jobs: + build-test-lint: + name: build / test / lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install stable Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + + - name: Cache cargo registry and target + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: ${{ runner.os }}-cargo- + + - name: Check formatting + run: cargo fmt --all -- --check + + - name: Clippy (deny warnings) + run: cargo clippy --all-targets -- -D warnings + + - name: Build + run: cargo build --verbose + + - name: Test + run: cargo test --verbose diff --git a/README.md b/README.md index 4356d9e..f7487f0 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ None of the hardware specifics are required for the current CPU path — the age - ✅ NPU driver unblocked — patched `amdxdna.ko` loads on cold boot, `xrt-smi` + `flm validate` green. See [`PHASE-2-RECON.md`](PHASE-2-RECON.md). - ⛔ NPU **inference** still blocked — FastFlowLM can't handle protocol-7 opcodes required by Qwen3/GGUF models. CPU path remains active until this unblocks. - ⚠️ Nuclei on the target hardware CPU still times out on full template sweeps with large host lists; `--nuclei-cap` flag added to limit input hosts -- ⚠️ No automated tests yet. All verification has been manual end-to-end runs. +- ✅ Unit tests cover the pure core logic: scope matching, the tool-output parsers (subfinder/httpx/nuclei/dnsx/ffuf), finding dedup + severity classification, and the LLM response parser (`cargo test` — 38 tests). End-to-end behavior against live targets is still verified manually. +- ✅ CI runs `cargo fmt --check`, `cargo clippy -D warnings`, build, and test on every push/PR (`.github/workflows/ci.yml`). See [`RESEARCH.md`](RESEARCH.md) for the research brief this project is based on, including references to comparable pentest agents (Shannon, PentestGPT, PentAGI, CAI), NPU backend options (FastFlowLM, ort crate + Vitis AI EP), and an honest gap analysis. diff --git a/src/agent/react_loop.rs b/src/agent/react_loop.rs index 7e39269..84e8322 100644 --- a/src/agent/react_loop.rs +++ b/src/agent/react_loop.rs @@ -17,8 +17,8 @@ use super::state::PreflightReport; use crate::scope::{host_in_scope, normalize_host}; use crate::tools::{ exec_dnsx, exec_ffuf, exec_httpx, exec_nuclei, exec_subfinder, nuclei_templates_root, - parse_dnsx_output, parse_ffuf_output, resolve_wordlist, select_interesting_urls, - ToolExecution, ToolKind, + parse_dnsx_output, parse_ffuf_output, resolve_wordlist, select_interesting_urls, ToolExecution, + ToolKind, }; use super::state::{preview, RunRecord, StepRecord}; @@ -47,7 +47,11 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { println!("[*] nuclei cap : {}", cfg.nuclei_cap); println!( "[*] dedup : {}", - if cfg.no_dedup { "off (--no-dedup)" } else { "on" } + if cfg.no_dedup { + "off (--no-dedup)" + } else { + "on" + } ); println!(); @@ -369,8 +373,8 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { // Suspicious-low threshold: if scoped had >50 // hosts and dnsx resolved <2%, treat as DNS // failure rather than a valid filter. - let suspicious_low = scoped.len() > 50 - && kept * 50 < scoped.len(); + let suspicious_low = + scoped.len() > 50 && kept * 50 < scoped.len(); if kept == 0 || suspicious_low { println!( "[!] dnsx resolved {}/{} — suspiciously low, falling back to unfiltered list", @@ -416,10 +420,8 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { // Prefer explicit URLs from the LLM; otherwise pull from httpx. // When falling back to httpx, run the interesting-host // heuristic so nuclei only scans the top N URLs. - let explicit_urls: Option> = args - .get("urls") - .and_then(|h| h.as_array()) - .map(|arr| { + let explicit_urls: Option> = + args.get("urls").and_then(|h| h.as_array()).map(|arr| { arr.iter() .filter_map(|v| v.as_str().map(String::from)) .collect() @@ -507,10 +509,7 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { // Active mode: ffuf must pick one live URL at a time. // Prefer the LLM's explicit "url" arg; otherwise fall // back to the first scoped URL from last httpx run. - let llm_url = args - .get("url") - .and_then(|u| u.as_str()) - .map(String::from); + let llm_url = args.get("url").and_then(|u| u.as_str()).map(String::from); let target_url = llm_url.unwrap_or_else(|| { last_httpx_urls .iter() @@ -524,11 +523,15 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { args: args.clone(), stdout: String::new(), stderr: String::new(), - error: Some("no URL supplied and no live httpx URL available".into()), + error: Some( + "no URL supplied and no live httpx URL available".into(), + ), duration_ms: t0.elapsed().as_millis(), } } else if !host_in_scope(&target_url, &cfg.scope_patterns) { - println!("[!] scope guard: ffuf URL '{target_url}' not in scope, skipping"); + println!( + "[!] scope guard: ffuf URL '{target_url}' not in scope, skipping" + ); ToolExecution { tool: kind, args: args.clone(), @@ -540,7 +543,11 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { } else { match resolve_wordlist(cfg.ffuf_wordlist.as_deref()) { Ok((wl, _is_tmp)) => { - println!("[>] ffuf path-fuzzing {} (wordlist: {})", target_url, wl.display()); + println!( + "[>] ffuf path-fuzzing {} (wordlist: {})", + target_url, + wl.display() + ); match exec_ffuf(&target_url, &wl).await { Ok((so, se)) => ToolExecution { tool: kind, @@ -665,11 +672,8 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { ) } ToolKind::Httpx => { - let urls: Vec = last_httpx_urls - .iter() - .take(10) - .cloned() - .collect(); + let urls: Vec = + last_httpx_urls.iter().take(10).cloned().collect(); format!( "{} live hosts responded. First {}: {}", line_count, @@ -678,20 +682,14 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { ) } ToolKind::Nuclei => { - let n = all_findings - .iter() - .filter(|f| f.kind == "nuclei") - .count(); + let n = all_findings.iter().filter(|f| f.kind == "nuclei").count(); format!( "nuclei scan complete: {} JSONL lines, {} parsed findings. Next step should be done.", line_count, n ) } ToolKind::Ffuf => { - let n = all_findings - .iter() - .filter(|f| f.kind == "ffuf") - .count(); + let n = all_findings.iter().filter(|f| f.kind == "ffuf").count(); format!( "ffuf path-fuzz complete: {} parsed findings. Next step should be done unless you want to fuzz another live host.", n @@ -706,7 +704,9 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { }); messages.push(ChatMessage { role: "user".into(), - content: format!("{observation}\n\nWhat next? Respond with a single JSON action."), + content: format!( + "{observation}\n\nWhat next? Respond with a single JSON action." + ), }); steps.push(StepRecord { @@ -746,7 +746,7 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { } // Sort raw findings by severity desc for report rendering. - all_findings.sort_by(|a, b| b.severity.cmp(&a.severity)); + all_findings.sort_by_key(|f| std::cmp::Reverse(f.severity)); let raw_findings_view = all_findings.clone(); // Dedup (default) or passthrough. @@ -796,8 +796,7 @@ pub async fn run_recon(cli: &Cli, domain: &str) -> Result<()> { std::fs::write(&findings_path, serde_json::to_string_pretty(&record)?) .context("write findings json")?; - std::fs::write(&report_path, render_report(&record)) - .context("write markdown report")?; + std::fs::write(&report_path, render_report(&record)).context("write markdown report")?; println!(); println!("========== AGENT SUMMARY =========="); diff --git a/src/config.rs b/src/config.rs index 8997d17..e87978c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -151,12 +151,7 @@ impl Config { active, ffuf_wordlist, .. - } => ( - org.clone(), - asn.clone(), - *active, - ffuf_wordlist.clone(), - ), + } => (org.clone(), asn.clone(), *active, ffuf_wordlist.clone()), }; Self { model, diff --git a/src/findings/mod.rs b/src/findings/mod.rs index 4d1a9f1..d44b1a0 100644 --- a/src/findings/mod.rs +++ b/src/findings/mod.rs @@ -3,6 +3,6 @@ pub mod models; pub mod parse; -pub use models::{DedupedFinding, Finding, Severity}; pub use models::dedup_findings; +pub use models::{DedupedFinding, Finding, Severity}; pub use parse::{extract_hosts_from_subfinder, parse_httpx_output, parse_nuclei_output}; diff --git a/src/findings/models.rs b/src/findings/models.rs index b8a4724..c7d5c78 100644 --- a/src/findings/models.rs +++ b/src/findings/models.rs @@ -124,3 +124,67 @@ pub fn dedup_findings(raw: &[Finding]) -> Vec { out.sort_by(|a, b| b.severity.cmp(&a.severity).then(b.count.cmp(&a.count))); out } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn severity_orders_low_to_high() { + assert!(Severity::Info < Severity::Low); + assert!(Severity::Low < Severity::Medium); + assert!(Severity::Medium < Severity::High); + assert!(Severity::High < Severity::Critical); + } + + #[test] + fn from_str_loose_handles_aliases_and_unknowns() { + assert_eq!(Severity::from_str_loose("CRITICAL"), Severity::Critical); + assert_eq!(Severity::from_str_loose(" high "), Severity::High); + assert_eq!(Severity::from_str_loose("moderate"), Severity::Medium); + assert_eq!(Severity::from_str_loose("medium"), Severity::Medium); + assert_eq!(Severity::from_str_loose("low"), Severity::Low); + // anything unrecognized falls back to Info + assert_eq!(Severity::from_str_loose("bogus"), Severity::Info); + assert_eq!(Severity::from_str_loose(""), Severity::Info); + } + + #[test] + fn dedup_folds_identical_kind_and_details() { + let raw = vec![ + Finding::new(Severity::Low, "http-probe", "a.example.com", "same"), + Finding::new(Severity::High, "http-probe", "b.example.com", "same"), + Finding::new(Severity::Low, "http-probe", "a.example.com", "same"), + ]; + let deduped = dedup_findings(&raw); + assert_eq!(deduped.len(), 1); + let d = &deduped[0]; + // severity promoted to the max of the group + assert_eq!(d.severity, Severity::High); + // count reflects number of source rows, not unique targets + assert_eq!(d.count, 3); + // targets deduped, insertion order preserved + assert_eq!( + d.targets, + vec!["a.example.com".to_string(), "b.example.com".to_string()] + ); + } + + #[test] + fn dedup_keeps_distinct_groups_and_sorts_by_severity() { + let raw = vec![ + Finding::new(Severity::Info, "http-probe", "x", "low-thing"), + Finding::new(Severity::Critical, "nuclei", "y", "bad-thing"), + ]; + let deduped = dedup_findings(&raw); + assert_eq!(deduped.len(), 2); + // sorted severity desc -> Critical first + assert_eq!(deduped[0].severity, Severity::Critical); + assert_eq!(deduped[1].severity, Severity::Info); + } + + #[test] + fn dedup_empty_input_yields_empty() { + assert!(dedup_findings(&[]).is_empty()); + } +} diff --git a/src/findings/parse.rs b/src/findings/parse.rs index b08a07b..99aeca3 100644 --- a/src/findings/parse.rs +++ b/src/findings/parse.rs @@ -35,10 +35,7 @@ pub fn parse_httpx_output(stdout: &str) -> (Vec, Vec) { .and_then(|x| x.as_str()) .map(String::from) .unwrap_or_else(|| normalize_host(&url)); - let status = v - .get("status_code") - .and_then(|x| x.as_i64()) - .unwrap_or(0); + let status = v.get("status_code").and_then(|x| x.as_i64()).unwrap_or(0); let title = v.get("title").and_then(|x| x.as_str()).unwrap_or(""); let tech: Vec = v .get("tech") @@ -71,12 +68,7 @@ pub fn parse_httpx_output(stdout: &str) -> (Vec, Vec) { title.chars().take(80).collect::(), tech.join(", ") ); - findings.push(Finding::new( - sev, - "http-probe", - host, - details, - )); + findings.push(Finding::new(sev, "http-probe", host, details)); } (live_urls, findings) } @@ -103,10 +95,7 @@ pub fn parse_nuclei_output(stdout: &str) -> Vec { .and_then(|x| x.as_str()) .unwrap_or("unknown") .to_string(); - let template_id = v - .get("template-id") - .and_then(|x| x.as_str()) - .unwrap_or(""); + let template_id = v.get("template-id").and_then(|x| x.as_str()).unwrap_or(""); let matched = v .get("matched-at") .and_then(|x| x.as_str()) @@ -123,3 +112,89 @@ pub fn parse_nuclei_output(stdout: &str) -> Vec { } out } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn subfinder_extraction_trims_and_drops_blanks() { + let stdout = " api.example.com \n\nexample.com\n \nwww.example.com\n"; + let hosts = extract_hosts_from_subfinder(stdout); + assert_eq!( + hosts, + vec![ + "api.example.com".to_string(), + "example.com".to_string(), + "www.example.com".to_string(), + ] + ); + } + + #[test] + fn httpx_parses_url_status_and_severity() { + let stdout = concat!( + r#"{"url":"https://example.com","host":"example.com","status_code":200,"title":"Home","tech":["nginx"]}"#, + "\n", + r#"{"url":"https://example.com/admin","host":"example.com","status_code":401,"title":"Admin Login"}"#, + "\n", + r#"{"url":"https://example.com/plain","host":"example.com","status_code":200,"title":""}"#, + "\n", + "not json, should be skipped\n", + ); + let (urls, findings) = parse_httpx_output(stdout); + assert_eq!( + urls, + vec![ + "https://example.com".to_string(), + "https://example.com/admin".to_string(), + "https://example.com/plain".to_string(), + ] + ); + assert_eq!(findings.len(), 3); + // tech disclosed but no admin hint -> Low + assert_eq!(findings[0].severity, Severity::Low); + // admin/login title -> Medium + assert_eq!(findings[1].severity, Severity::Medium); + // no tech, no admin hint -> Info + assert_eq!(findings[2].severity, Severity::Info); + assert_eq!(findings[0].kind, "http-probe"); + } + + #[test] + fn httpx_derives_host_from_url_when_absent() { + let stdout = r#"{"url":"https://derived.example.com:8443/x","status_code":200}"#; + let (_urls, findings) = parse_httpx_output(stdout); + assert_eq!(findings.len(), 1); + // host field missing -> normalized from url (scheme/port/path stripped) + assert_eq!(findings[0].target, "derived.example.com"); + } + + #[test] + fn nuclei_parses_severity_name_and_target() { + let stdout = concat!( + r#"{"template-id":"CVE-2021-1234","info":{"name":"Example RCE","severity":"critical"},"matched-at":"https://example.com/x"}"#, + "\n", + r#"{"template-id":"tech-detect","info":{"name":"Tech Detect","severity":"info"},"host":"example.com"}"#, + "\n", + "garbage\n", + ); + let findings = parse_nuclei_output(stdout); + assert_eq!(findings.len(), 2); + assert_eq!(findings[0].severity, Severity::Critical); + assert_eq!(findings[0].target, "https://example.com/x"); + assert_eq!(findings[0].details, "Example RCE [CVE-2021-1234]"); + // falls back to "host" when "matched-at" absent + assert_eq!(findings[1].target, "example.com"); + assert_eq!(findings[1].severity, Severity::Info); + } + + #[test] + fn nuclei_defaults_unknown_fields_gracefully() { + let stdout = r#"{"template-id":"t1"}"#; + let findings = parse_nuclei_output(stdout); + assert_eq!(findings.len(), 1); + assert_eq!(findings[0].severity, Severity::Info); + assert_eq!(findings[0].details, "unknown [t1]"); + } +} diff --git a/src/llm/parser.rs b/src/llm/parser.rs index c7a8d90..c0bf248 100644 --- a/src/llm/parser.rs +++ b/src/llm/parser.rs @@ -130,3 +130,111 @@ pub fn parse_action(raw: &str) -> Option { None } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn strip_think_removes_balanced_blocks() { + let s = "beforereasoning hereafter"; + assert_eq!(strip_think(s), "beforeafter"); + } + + #[test] + fn strip_think_handles_multiple_blocks() { + let s = "akeep1bkeep2"; + assert_eq!(strip_think(s), "keep1keep2"); + } + + #[test] + fn strip_think_truncates_unclosed_block() { + let s = "keepnever closed"; + assert_eq!(strip_think(s), "keep"); + } + + #[test] + fn parse_tool_call_simple_schema() { + let raw = r#"{"tool":"subfinder","arguments":{"domain":"example.com"}}"#; + match parse_action(raw) { + Some(AgentAction::Tool { name, args }) => { + assert_eq!(name, "subfinder"); + assert_eq!(args["domain"], "example.com"); + } + other => panic!("expected Tool, got {other:?}"), + } + } + + #[test] + fn parse_tool_call_accepts_args_alias() { + let raw = r#"{"tool":"httpx","args":{"cap":10}}"#; + match parse_action(raw) { + Some(AgentAction::Tool { name, args }) => { + assert_eq!(name, "httpx"); + assert_eq!(args["cap"], 10); + } + other => panic!("expected Tool, got {other:?}"), + } + } + + #[test] + fn parse_tool_call_strips_think_and_code_fence() { + let raw = "I should run subfinder first.\nHere is my action:\n```json\n{\"tool\":\"subfinder\",\"arguments\":{\"domain\":\"example.com\"}}\n```\n"; + match parse_action(raw) { + Some(AgentAction::Tool { name, .. }) => assert_eq!(name, "subfinder"), + other => panic!("expected Tool, got {other:?}"), + } + } + + #[test] + fn parse_openai_tool_calls_with_stringified_args() { + let raw = r#"{"tool_calls":[{"function":{"name":"nuclei","arguments":"{\"urls\":[\"https://example.com\"]}"}}]}"#; + match parse_action(raw) { + Some(AgentAction::Tool { name, args }) => { + assert_eq!(name, "nuclei"); + assert_eq!(args["urls"][0], "https://example.com"); + } + other => panic!("expected Tool, got {other:?}"), + } + } + + #[test] + fn parse_done_action_with_next_steps() { + let raw = r#"{"action":"done","summary":"All done.","next_steps":["rescan","report"]}"#; + match parse_action(raw) { + Some(AgentAction::Done { + summary, + next_steps, + }) => { + assert_eq!(summary, "All done."); + assert_eq!(next_steps, vec!["rescan", "report"]); + } + other => panic!("expected Done, got {other:?}"), + } + } + + #[test] + fn parse_done_accepts_stop_and_finish_aliases() { + for action in ["stop", "finish"] { + let raw = format!(r#"{{"action":"{action}","summary":"x"}}"#); + assert!(matches!(parse_action(&raw), Some(AgentAction::Done { .. }))); + } + } + + #[test] + fn parse_returns_none_for_unparseable() { + assert!(parse_action("no json here at all").is_none()); + assert!(parse_action("").is_none()); + // valid JSON but no recognized schema + assert!(parse_action(r#"{"foo":"bar"}"#).is_none()); + } + + #[test] + fn extract_json_picks_first_balanced_object_amid_prose() { + let raw = "Sure! {\"tool\":\"subfinder\",\"arguments\":{}} -- let me know."; + match parse_action(raw) { + Some(AgentAction::Tool { name, .. }) => assert_eq!(name, "subfinder"), + other => panic!("expected Tool, got {other:?}"), + } + } +} diff --git a/src/preflight/pius.rs b/src/preflight/pius.rs index c169fac..2c3cf06 100644 --- a/src/preflight/pius.rs +++ b/src/preflight/pius.rs @@ -217,9 +217,12 @@ pub async fn run_pius( let mut cmd = Command::new(&bin); cmd.arg("run") - .arg("--org").arg(org) - .arg("--mode").arg("passive") - .arg("--output").arg("ndjson"); + .arg("--org") + .arg(org) + .arg("--mode") + .arg("passive") + .arg("--output") + .arg("ndjson"); if let Some(d) = domain_hint { cmd.arg("--domain").arg(d); diff --git a/src/report/generator.rs b/src/report/generator.rs index ae35ece..7acfc89 100644 --- a/src/report/generator.rs +++ b/src/report/generator.rs @@ -64,7 +64,7 @@ pub fn render_report(r: &RunRecord) -> String { conf )); } - out.push_str("\n"); + out.push('\n'); } out.push_str("### CIDR blocks\n\n"); @@ -81,7 +81,7 @@ pub fn render_report(r: &RunRecord) -> String { c.asn.as_deref().unwrap_or("—") )); } - out.push_str("\n"); + out.push('\n'); } out.push_str("### GitHub organizations\n\n"); @@ -97,7 +97,7 @@ pub fn render_report(r: &RunRecord) -> String { .unwrap_or_else(|| "—".to_string()); out.push_str(&format!("| `{}` | {} | {} |\n", g.login, g.name, conf)); } - out.push_str("\n"); + out.push('\n'); } // Key-gated plugin status: render only when there's something @@ -128,7 +128,7 @@ pub fn render_report(r: &RunRecord) -> String { s.note, )); } - out.push_str("\n"); + out.push('\n'); let missing_keys: Vec<&str> = actionable .iter() .filter(|s| s.status == "skipped_no_key") @@ -197,7 +197,7 @@ pub fn render_report(r: &RunRecord) -> String { expand_sections.push((i + 1, f)); } } - out.push_str("\n"); + out.push('\n'); // Per-finding target lists for the collapsed entries. for (idx, f) in expand_sections { out.push_str(&format!( @@ -242,7 +242,7 @@ pub fn render_report(r: &RunRecord) -> String { out.push_str(&format!("- `{t}` — {desc}\n")); } } - out.push_str("\n"); + out.push('\n'); // Step detail out.push_str("## Step Detail\n\n"); @@ -273,10 +273,12 @@ pub fn render_report(r: &RunRecord) -> String { } } else { out.push_str("- Review the discovered subdomains for staging/dev/admin/internal hosts that should not be exposed.\n"); - out.push_str("- Promote medium+ nuclei findings into a formal report with reproduction steps.\n"); + out.push_str( + "- Promote medium+ nuclei findings into a formal report with reproduction steps.\n", + ); out.push_str("- For interesting HTTP titles (admin/login panels), run targeted directory enumeration (ffuf) on a case-by-case basis.\n"); } out.push_str("\n---\n\n"); out.push_str("_Generated by AgentSpyBoo Phase 3 (CPU-track + Pius preflight) — https://github.com/Peterc3-dev (private)_\n"); out -} \ No newline at end of file +} diff --git a/src/scope.rs b/src/scope.rs index 1d92c96..ae467a6 100644 --- a/src/scope.rs +++ b/src/scope.rs @@ -37,3 +37,64 @@ pub fn normalize_host(raw: &str) -> String { } s } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_strips_scheme_path_and_port() { + assert_eq!(normalize_host("https://Example.com/foo?bar"), "example.com"); + assert_eq!(normalize_host("http://example.com:8080/"), "example.com"); + assert_eq!(normalize_host(" EXAMPLE.com "), "example.com"); + assert_eq!(normalize_host("example.com:443"), "example.com"); + } + + #[test] + fn normalize_keeps_non_port_colon_suffix() { + // A trailing colon segment that is not all-digits must be preserved + // (e.g. an IPv6-ish or malformed host should not be silently truncated). + assert_eq!(normalize_host("host:notaport"), "host:notaport"); + } + + #[test] + fn bare_host_matches_exactly() { + let patterns = vec!["example.com".to_string()]; + assert!(host_in_scope("example.com", &patterns)); + assert!(host_in_scope("https://example.com/path", &patterns)); + assert!(!host_in_scope("evil.com", &patterns)); + // bare pattern must not match a subdomain + assert!(!host_in_scope("sub.example.com", &patterns)); + } + + #[test] + fn wildcard_matches_apex_and_subdomains() { + let patterns = vec!["*.example.com".to_string()]; + // "*." matches the apex itself... + assert!(host_in_scope("example.com", &patterns)); + // ...and any subdomain + assert!(host_in_scope("api.example.com", &patterns)); + assert!(host_in_scope("a.b.example.com", &patterns)); + // but not a different registrable domain that merely ends similarly + assert!(!host_in_scope("notexample.com", &patterns)); + assert!(!host_in_scope("example.com.evil.com", &patterns)); + } + + #[test] + fn empty_patterns_match_nothing() { + assert!(!host_in_scope("example.com", &[])); + } + + #[test] + fn matching_is_case_insensitive() { + let patterns = vec!["Example.COM".to_string()]; + // pattern is compared as-is; host is lowercased. The current contract is + // that callers pass lowercase patterns, so an uppercase pattern only + // matches an (impossible post-normalize) uppercase host. Document that + // by asserting the lowercased host does NOT match an uppercase pattern. + assert!(!host_in_scope("example.com", &patterns)); + // The supported path: lowercase pattern, any-case host. + let patterns = vec!["example.com".to_string()]; + assert!(host_in_scope("EXAMPLE.COM", &patterns)); + } +} diff --git a/src/tools/dnsx.rs b/src/tools/dnsx.rs index 16eada1..3af4fdf 100644 --- a/src/tools/dnsx.rs +++ b/src/tools/dnsx.rs @@ -55,3 +55,33 @@ pub fn parse_dnsx_output(stdout: &str) -> Vec { .filter(|s| !s.is_empty()) .collect() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_bare_hostnames() { + let out = "api.example.com\nwww.example.com\n"; + assert_eq!( + parse_dnsx_output(out), + vec!["api.example.com".to_string(), "www.example.com".to_string()] + ); + } + + #[test] + fn takes_first_token_of_host_record_pairs() { + // dnsx may emit "host [A 1.2.3.4]" style lines; we keep just the host. + let out = "api.example.com [A] [1.2.3.4]\nwww.example.com [CNAME] [edge.example.net]\n"; + assert_eq!( + parse_dnsx_output(out), + vec!["api.example.com".to_string(), "www.example.com".to_string()] + ); + } + + #[test] + fn strips_trailing_dot_and_drops_blanks() { + let out = "fqdn.example.com.\n\n \n"; + assert_eq!(parse_dnsx_output(out), vec!["fqdn.example.com".to_string()]); + } +} diff --git a/src/tools/ffuf.rs b/src/tools/ffuf.rs index cf2af42..2274b64 100644 --- a/src/tools/ffuf.rs +++ b/src/tools/ffuf.rs @@ -43,7 +43,8 @@ pub fn resolve_wordlist(user_path: Option<&str>) -> Result<(PathBuf, bool)> { } return Ok((path, false)); } - let tmp = std::env::temp_dir().join(format!("agentspyboo-ffuf-mini-{}.txt", std::process::id())); + let tmp = + std::env::temp_dir().join(format!("agentspyboo-ffuf-mini-{}.txt", std::process::id())); std::fs::write(&tmp, BUNDLED_WORDLIST) .with_context(|| format!("write bundled wordlist to {}", tmp.display()))?; Ok((tmp, true)) @@ -172,12 +173,27 @@ pub fn parse_ffuf_output(stdout: &str, target_host: &str) -> Vec Severity::High, @@ -221,6 +237,9 @@ mod parse_tests { fn parse_ffuf_handles_empty_or_malformed() { assert_eq!(parse_ffuf_output("", "x.com").len(), 0); assert_eq!(parse_ffuf_output("not json", "x.com").len(), 0); - assert_eq!(parse_ffuf_output(r#"{"unrelated":"shape"}"#, "x.com").len(), 0); + assert_eq!( + parse_ffuf_output(r#"{"unrelated":"shape"}"#, "x.com").len(), + 0 + ); } } diff --git a/src/tools/nuclei.rs b/src/tools/nuclei.rs index dde5092..b8a2130 100644 --- a/src/tools/nuclei.rs +++ b/src/tools/nuclei.rs @@ -45,7 +45,10 @@ pub async fn exec_nuclei(urls: &[String]) -> Result<(String, String)> { tmpl_args.push(path.to_string_lossy().into_owned()); } if tmpl_args.is_empty() { - bail!("no curated nuclei template dirs exist under {}", root.display()); + bail!( + "no curated nuclei template dirs exist under {}", + root.display() + ); } let tmp = std::env::temp_dir().join(format!("agentspyboo-nuclei-{}.txt", std::process::id())); @@ -89,8 +92,8 @@ pub async fn exec_nuclei(urls: &[String]) -> Result<(String, String)> { /// /// Returns at most `cap` URLs. Pure function — takes stdout, no side effects. pub fn select_interesting_urls(httpx_stdout: &str, cap: usize) -> Vec { - use serde_json::Value; use crate::scope::normalize_host; + use serde_json::Value; #[derive(Default)] struct Row { @@ -116,10 +119,7 @@ pub fn select_interesting_urls(httpx_stdout: &str, cap: usize) -> Vec { if url.is_empty() { continue; } - let status = v - .get("status_code") - .and_then(|x| x.as_i64()) - .unwrap_or(0); + let status = v.get("status_code").and_then(|x| x.as_i64()).unwrap_or(0); let title = v .get("title") .and_then(|x| x.as_str()) @@ -148,9 +148,21 @@ pub fn select_interesting_urls(httpx_stdout: &str, cap: usize) -> Vec { score -= 40; } let juicy = [ - "admin", "api", "auth", "login", "signin", "sign in", "internal", - "dashboard", "console", "staging", "dev", "jenkins", "grafana", - "kibana", "phpmyadmin", + "admin", + "api", + "auth", + "login", + "signin", + "sign in", + "internal", + "dashboard", + "console", + "staging", + "dev", + "jenkins", + "grafana", + "kibana", + "phpmyadmin", ]; for kw in juicy { if title.contains(kw) || host_l.contains(kw) {