Skip to content

Commit 80fc122

Browse files
committed
feat: improved score, and detection filters
1 parent b23d197 commit 80fc122

File tree

9 files changed

+379
-311
lines changed

9 files changed

+379
-311
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ wildmatch = "2.5.0"
3030
lru = "0.16.2"
3131
sysinfo = "0.37.2"
3232
bloomfilter = "3.0"
33+
ipnet = "2"
3334

3435
[profile.release]
3536
codegen-units = 1

README.md

Lines changed: 52 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
- **Scan multiple detection categories** 🔍:
1010

1111
- **Network**: Detect potentially malicious IPv4/IPv6 addresses and URLs
12-
- **Crypto**: Find cryptographic implementations and sensitive material
1312
- **Malicious**: Identify suspicious code patterns (backdoors, exploits, etc.)
1413
- **Obfuscation**: Detect obfuscated code, high entropy, and suspicious naming patterns
1514

@@ -24,7 +23,7 @@
2423
- **Multi-threading**: Parallel processing of files for faster scanning
2524
- **Fast mode**: Stop processing after first suspicious item
2625
- **Path filtering**: Include or exclude paths matching patterns
27-
- **Custom ignore lists**: Skip specified suspicious or crypto keywords
26+
- **Custom ignore lists**: Skip specified suspicious keywords
2827

2928
## ⚙️ Installation
3029

@@ -55,7 +54,6 @@ collapsescanner directory
5554

5655
# Different detection modes
5756
collapsescanner file.jar --mode network
58-
collapsescanner file.jar --mode crypto
5957
collapsescanner file.jar --mode malicious
6058
collapsescanner file.jar --mode obfuscation
6159

@@ -78,24 +76,26 @@ collapsescanner file.jar --threads 8
7876
collapsescanner file.jar --exclude "assets/**" --exclude "*.log" --find "com/example/*"
7977

8078
# Skip specific keywords
81-
collapsescanner file.jar --ignore_keywords_file ignore_keywords.txt
79+
collapsescanner file.jar --ignore_keywords ignore_keywords.txt
8280
```
8381

8482
## 🔍 Command-line Options
8583

86-
| Option | Description |
87-
| ------------------------ | ----------------------------------------------------------------------------------- |
88-
| `path` | Path to a JAR file, class file, or directory to scan |
89-
| `--mode` | Detection mode: `network`, `crypto`, `malicious`, `obfuscation`, or `all` (default) |
90-
| `--extract` | Extract all resources from JAR files |
91-
| `--strings` | Extract all strings from class files |
92-
| `--output` | Specify the output directory (default: ./extracted) |
93-
| `--json` | Export results in JSON format |
94-
| `-v, --verbose` | Enable verbose output (shows size/entropy, etc.) |
95-
| `--threads` | Number of threads to use for parallel processing (0 = automatic based on CPU cores) |
96-
| `--exclude` | Exclude paths matching the wildcard pattern (can be used multiple times) |
97-
| `--find` | Only scan paths matching the wildcard pattern (can be used multiple times) |
98-
| `--ignore_keywords_file` | Path to a .txt file with keywords to ignore (one per line) |
84+
| Option | Description |
85+
| ------------------- | ----------------------------------------------------------------------------------- |
86+
| `path` | Path to a JAR file, class file, or directory to scan |
87+
| `--mode` | Detection mode: `network`, `malicious`, `obfuscation`, or `all` (default) |
88+
| `--extract` | Extract all resources from JAR files |
89+
| `--strings` | Extract all strings from class files |
90+
| `--output` | Specify the output directory (default: ./extracted) |
91+
| `--json` | Export results in JSON format |
92+
| `-v, --verbose` | Enable verbose output (shows size/entropy, etc.) |
93+
| `--threads` | Number of threads to use for parallel processing (0 = automatic based on CPU cores) |
94+
| `--exclude` | Exclude paths matching the wildcard pattern (can be used multiple times) |
95+
| `--find` | Only scan paths matching the wildcard pattern (can be used multiple times) |
96+
| `--ignore_keywords` | Path to a .txt file with keywords to ignore (one per line) |
97+
| `--show` | Print a detailed findings report to the terminal (useful for interactive runs) |
98+
| `--max_file_size` | Maximum file size to scan (in MB). Files larger than this will be skipped. |
9999

100100
## 🛡️ Detection Capabilities
101101

@@ -114,7 +114,6 @@ CollapseScanner analyzes Java class files to find:
114114
- Key management and password handling
115115

116116
- **Obfuscation indicators**:
117-
- Excessively long names
118117
- Suspicious character sequences
119118
- Unicode characters in identifiers
120119
- High entropy (potentially obfuscated) files
@@ -151,30 +150,43 @@ cargo run --bin remapper input.jar output.jar
151150
<details><summary>📋 Example Output</summary>
152151

153152
```
154-
==== CollapseScanner - Enhanced Analysis ====
155-
🎯 Target: suspicious.jar
156-
🔧 Mode: All
157-
🚀 Starting scan...
158-
159-
⚠️ Findings Report:
153+
╔══════════════════════════════════════════════════════════════════════════════╗
154+
║ FINDINGS REPORT ║
155+
╚══════════════════════════════════════════════════════════════════════════════╝
160156
161157
📄 File: suspicious.jar/com/example/malicious/Payload.class
162-
🌐 IPv4 Address: 192.168.1.100
163-
🌐 IPv6 Address: 9e53:c40f:5969:6a04:68b6:2c98:5c80:25fb
164-
🔗 URL: http://malicious-domain.com/c2
165-
🔒 Crypto Keyword: 'encrypt' in "AES encryption used here"
166-
❗ Suspicious Keyword: 'payload' in "Executing payload"
167-
168-
==== Scan Summary ====
169-
📈 Total Findings: 4
170-
- Crypto Keyword: 1
171-
- IPv4 Address: 1
172-
- IPv6 Address: 1
173-
- Suspicious Keyword: 1
174-
- URL: 1
175-
176-
📦 Resources extracted to ./extracted
177-
🔤 Strings extracted to ./extracted
158+
🌐 IPv4 Address: 192.168.1.100
159+
🔗 URL: http://malicious-domain.com/c2
160+
🤖 Discord Webhook: https://discord.com/api/webhooks/12345/abcdef
161+
❗ Suspicious Keyword: 'payload' in "Executing payload"
162+
🔥 High Entropy: Very High entropy value: 8.45 (threshold: 7.20) - suggests possible encryption or compression
163+
164+
╔══════════════════════════════════════════════════════════════════════════════╗
165+
║ SCAN SUMMARY ║
166+
╚══════════════════════════════════════════════════════════════════════════════╝
167+
168+
📊 Total Findings: 5 | Files with Findings: 1 | Risk Level: HIGH RISK (9/10)
169+
⏱️ Scan Time: 1.23s | Total Files Scanned: 12 | Processing Rate: 9.8 files/sec
170+
171+
🔍 Findings Breakdown:
172+
173+
🌐 IPv4 Address (1)
174+
• 192.168.1.100
175+
176+
🔗 URL (1)
177+
• http://malicious-domain.com/c2
178+
179+
🤖 Discord Webhook (1)
180+
• https://discord.com/api/webhooks/12345/abcdef
181+
182+
❗ Suspicious Keyword (1)
183+
• 'payload' in "Executing payload"
184+
185+
🔥 High Entropy (1)
186+
• Very High entropy value: 8.45
187+
188+
👻 Custom JVM Warning: Files with unusual magic bytes detected. These may require a custom ClassLoader.
189+
178190
```
179191

180192
</details>

src/database.rs

Lines changed: 0 additions & 43 deletions
This file was deleted.

src/detection.rs

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
use regex::Regex;
21
use std::collections::HashSet;
32

4-
pub const NAME_LENGTH_THRESHOLD: usize = 100;
53
pub const ENTROPY_THRESHOLD: f64 = 7.2;
64

75
lazy_static::lazy_static! {
@@ -19,26 +17,13 @@ lazy_static::lazy_static! {
1917
"discord.com",
2018
"discordapp.com",
2119
"pastebin.com",
20+
"bit.ly",
21+
"tinyurl.com",
2222
]
2323
.iter()
2424
.map(|&s| s.to_lowercase())
2525
.collect()
2626
};
27-
28-
pub static ref IP_REGEX: Regex =
29-
Regex::new(r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b").unwrap();
30-
31-
pub static ref IPV6_REGEX: Regex =
32-
Regex::new(r"(?i)\b(?:[0-9a-f]{1,4}:){2,7}[0-9a-f]{1,4}\b").unwrap();
33-
34-
pub static ref URL_REGEX: Regex =
35-
Regex::new(r#"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»""'']))"#).unwrap();
36-
37-
pub static ref CRYPTO_REGEX: Regex =
38-
Regex::new(r"(?i)\b(aes|des|rsa|md5|sha[1-9]*-?\d*|blowfish|twofish|pgp|gpg|cipher|keystore|keygenerator|secretkey|password|encrypt|decrypt|hash|salt|ivParameterSpec|SecureRandom)\b").unwrap();
39-
40-
pub static ref MALICIOUS_PATTERN_REGEX: Regex =
41-
Regex::new(r"(?i)\b(backdoor|exploit|payload|shellcode|bypass|rootkit|keylog|rat\b|trojan|malware|spyware|meterpreter|cobaltstrike|powershell|cmd\.exe|Runtime\.getRuntime\(\)\.exec|ProcessBuilder|loadLibrary|download|upload|socket\(|bind\(|connect\(|URL\(|URLConnection|Class\.forName|defineClass|getMethod|unsafe|jndi|ldap|rmi|base64|decode)\b").unwrap();
4227
}
4328

4429
pub fn is_cached_safe_string(s: &str) -> bool {

src/filters.rs

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
use ipnet::IpNet;
2+
use regex::Regex;
3+
use std::collections::HashSet;
4+
use std::net::IpAddr;
5+
6+
lazy_static::lazy_static! {
7+
/// IPv4 address regex
8+
pub static ref IP_REGEX: Regex = Regex::new(r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b").unwrap();
9+
10+
/// IPv6 address regex (simple/fast heuristic)
11+
pub static ref IPV6_REGEX: Regex = Regex::new(r"(?i)\b(?:[0-9a-f]{1,4}:){2,7}[0-9a-f]{1,4}\b").unwrap();
12+
13+
/// URL detection regex (captures many common URL forms)
14+
pub static ref URL_REGEX: Regex = Regex::new(r#"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^
15+
\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^
16+
\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»""']))"#).unwrap();
17+
18+
/// Generic malicious / suspicious pattern keywords
19+
pub static ref MALICIOUS_PATTERN_REGEX: Regex = Regex::new(r"(?i)\b(payload|powershell|cmd\.exe|Runtime\.getRuntime\(\)\.exec|ProcessBuilder|loadLibrary|socket\(|bind\(|connect\(|URL\(|URLConnection|Class\.forName|defineClass|getMethod|jndi|ldap|rmi)\b").unwrap();
20+
21+
/// Known "good" links / domains
22+
pub static ref GOOD_LINKS: Vec<String> = vec![
23+
"account.mojang.com".to_string(),
24+
"aka.ms".to_string(),
25+
"apache.org".to_string(),
26+
"api.mojang.com".to_string(),
27+
"api.spiget.org".to_string(),
28+
"authserver.mojang.com".to_string(),
29+
"bugs.mojang.com".to_string(),
30+
"cabaletta/baritone".to_string(),
31+
"ci.viaversion.com".to_string(),
32+
"com/viaversion/".to_string(),
33+
"docs.advntr.dev".to_string(),
34+
"dominos.com".to_string(),
35+
"dump.viaversion.com".to_string(),
36+
"eclipse.org".to_string(),
37+
"java.sun.org".to_string(),
38+
"jo0001.github.io".to_string(),
39+
"logging.apache.org".to_string(),
40+
"login.live.com".to_string(),
41+
"lwjgl.org".to_string(),
42+
"minecraft.net".to_string(),
43+
"minecraft.org".to_string(),
44+
"minotar.net".to_string(),
45+
"mojang.com".to_string(),
46+
"netty.io".to_string(),
47+
"optifine.net".to_string(),
48+
"paulscode/sound/".to_string(),
49+
"s.optifine.net".to_string(),
50+
"sessionserver.mojang.com".to_string(),
51+
"shader-tutorial.dev".to_string(),
52+
"snoop.minecraft.net".to_string(),
53+
"tools.ietf.org".to_string(),
54+
"viaversion.com".to_string(),
55+
"www.openssl.org".to_string(),
56+
"www.rfc-editor.org".to_string(),
57+
"www.slf4j.org".to_string(),
58+
"www.w3.org".to_string(),
59+
"yaml.org".to_string(),
60+
"openssl.org".to_string(),
61+
"yggdrasil-auth-session-staging.mojang.zone".to_string(),
62+
];
63+
64+
/// Known "good" / unreachable / reserved IPs and ranges
65+
pub static ref GOOD_IPS: HashSet<&'static str> = {
66+
let mut s = HashSet::new();
67+
// Unspecified / non-routable single addresses
68+
s.insert("0.0.0.0");
69+
s.insert("::");
70+
71+
// Loopback
72+
s.insert("127.0.0.1");
73+
s.insert("::1");
74+
75+
// Broadcast
76+
s.insert("255.255.255.255");
77+
78+
// Link-local (often unreachable from other networks)
79+
s.insert("169.254.0.0/16");
80+
81+
// Documentation / TEST-NET ranges (RFC 5737) - used in examples/tests
82+
s.insert("192.0.2.0/24");
83+
s.insert("198.51.100.0/24");
84+
s.insert("203.0.113.0/24");
85+
86+
// Private address ranges (commonly non-public)
87+
s.insert("10.0.0.0/8");
88+
s.insert("172.16.0.0/12");
89+
s.insert("192.168.0.0/16");
90+
91+
// Minecraft UDP multicast address
92+
s.insert("224.0.2.60");
93+
94+
// DNS resolvers
95+
s.insert("8.8.8.8");
96+
s.insert("8.8.4.4");
97+
s.insert("1.1.1.1");
98+
s.insert("9.9.9.9");
99+
100+
s
101+
};
102+
103+
pub static ref GOOD_IP_ADDRS: HashSet<IpAddr> = {
104+
let mut out = HashSet::new();
105+
for s in GOOD_IPS.iter() {
106+
if !s.contains('/') {
107+
if let Ok(a) = s.parse::<IpAddr>() {
108+
out.insert(a);
109+
}
110+
}
111+
}
112+
out
113+
};
114+
115+
pub static ref GOOD_IP_NETWORKS: Vec<IpNet> = {
116+
let mut out = Vec::new();
117+
for s in GOOD_IPS.iter() {
118+
if s.contains('/') {
119+
if let Ok(n) = s.parse::<IpNet>() {
120+
out.push(n);
121+
}
122+
}
123+
}
124+
out
125+
};
126+
}
127+
128+
pub fn is_known_good_ip(ip: &str) -> bool {
129+
if let Ok(addr) = ip.parse::<IpAddr>() {
130+
if GOOD_IP_ADDRS.contains(&addr) {
131+
return true;
132+
}
133+
134+
for net in GOOD_IP_NETWORKS.iter() {
135+
if net.contains(&addr) {
136+
return true;
137+
}
138+
}
139+
}
140+
false
141+
}

0 commit comments

Comments
 (0)