Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions bbot/core/helpers/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1118,6 +1118,32 @@ def str_or_file(s):
yield s


_comment_re = re.compile(r"\s#")


def strip_comments(line):
"""Strip #-style comments from a line.

Handles full-line comments (``# ...``) and inline comments (``target # ...``).
The ``#`` must be preceded by whitespace to count as an inline comment,
so URL fragments like ``http://example.com/page#section`` are preserved.

Examples:
>>> strip_comments("evilcorp.com # main domain")
'evilcorp.com'
>>> strip_comments("# full line comment")
''
>>> strip_comments("http://example.com/page#section")
'http://example.com/page#section'
"""
if line.lstrip().startswith("#"):
return ""
m = _comment_re.search(line)
if m:
return line[: m.start()]
return line


split_regex = re.compile(r"[\s,]")


Expand All @@ -1128,6 +1154,7 @@ def chain_lists(
remove_blank=True,
validate=False,
validate_chars='<>:"/\\|?*)',
_strip_comments=False,
):
"""Chains together list elements, allowing for entries separated by commas.

Expand All @@ -1143,6 +1170,7 @@ def chain_lists(
remove_blank (bool, optional): Whether to remove blank entries from the list. Defaults to True.
validate (bool, optional): Whether to perform validation for undesirable characters. Defaults to False.
validate_chars (str, optional): When performing validation, what additional set of characters to block (blocks non-printable ascii automatically). Defaults to '<>:"/\\|?*)'
_strip_comments (bool, optional): Whether to strip ``#``-style comments from entries and file lines. Defaults to False.

Returns:
list: The list of chained elements.
Expand All @@ -1161,6 +1189,8 @@ def chain_lists(
l = [l]
final_list = {}
for entry in l:
if _strip_comments:
entry = strip_comments(entry)
for s in split_regex.split(entry):
f = s.strip()
if validate:
Expand All @@ -1172,6 +1202,8 @@ def chain_lists(
new_msg = str(msg).format(filename=f_path)
log.info(new_msg)
for line in str_or_file(f):
if _strip_comments:
line = strip_comments(line)
final_list[line] = None
else:
final_list[f] = None
Expand Down
6 changes: 3 additions & 3 deletions bbot/scanner/preset/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,14 +403,14 @@ def sanitize_args(self):
self.parsed.exclude_modules = chain_lists(self.parsed.exclude_modules)
self.parsed.output_modules = chain_lists(self.parsed.output_modules)
self.parsed.targets = chain_lists(
self.parsed.targets, try_files=True, msg="Reading targets from file: {filename}"
self.parsed.targets, try_files=True, msg="Reading targets from file: {filename}", _strip_comments=True
)
if self.parsed.seeds is not None:
self.parsed.seeds = chain_lists(
self.parsed.seeds, try_files=True, msg="Reading seeds from file: {filename}"
self.parsed.seeds, try_files=True, msg="Reading seeds from file: {filename}", _strip_comments=True
)
self.parsed.blacklist = chain_lists(
self.parsed.blacklist, try_files=True, msg="Reading blacklist from file: {filename}"
self.parsed.blacklist, try_files=True, msg="Reading blacklist from file: {filename}", _strip_comments=True
)
self.parsed.flags = chain_lists(self.parsed.flags)
self.parsed.exclude_flags = chain_lists(self.parsed.exclude_flags)
Expand Down
3 changes: 3 additions & 0 deletions bbot/scanner/preset/preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,20 +673,23 @@ def from_dict(cls, preset_dict, name=None, _exclude=None, _log=False):
cls._resolve_file_entries(target_vals),
try_files=True,
msg="Reading targets from preset file: {filename}",
_strip_comments=True,
)
seeds = preset_dict.get("seeds")
if seeds is not None:
seeds = chain_lists(
cls._resolve_file_entries(seeds),
try_files=True,
msg="Reading seeds from preset file: {filename}",
_strip_comments=True,
)
blacklist = preset_dict.get("blacklist")
if blacklist is not None:
blacklist = chain_lists(
cls._resolve_file_entries(blacklist),
try_files=True,
msg="Reading blacklist from preset file: {filename}",
_strip_comments=True,
)
new_preset = cls(
*targets,
Expand Down
6 changes: 3 additions & 3 deletions bbot/scanner/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def _fnv1a_64(data_strings):
from bbot.errors import *
from bbot.core.event import is_event
from bbot.core.event.helpers import EventSeed, BaseEventSeed
from bbot.core.helpers.misc import is_dns_name, is_ip, is_ip_type
from bbot.core.helpers.misc import is_dns_name, is_ip, is_ip_type, strip_comments

log = logging.getLogger("bbot.core.target")

Expand Down Expand Up @@ -61,8 +61,8 @@ class BaseTarget:
accept_target_types = ["TARGET"]

def __init__(self, *targets, strict_scope=False, acl_mode=False):
# ignore blank targets (sometimes happens as a symptom of .splitlines())
targets = [stripped for t in targets if (stripped := (t.strip() if isinstance(t, str) else t))]
# strip comments and ignore blank targets
targets = [stripped for t in targets if (stripped := (strip_comments(t).strip() if isinstance(t, str) else t))]
self.strict_scope = strict_scope
self._rt = RadixTarget(strict_scope=strict_scope, acl_mode=acl_mode)
self.event_seeds = set()
Expand Down
123 changes: 123 additions & 0 deletions bbot/test/test_step_1/test_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,3 +710,126 @@ def test_target_pickle():

# hashes match
assert target.hash == restored.hash


def test_target_comments():
"""Target strings support # comments — both full-line and inline."""
from bbot.scanner.target import BBOTTarget

target = BBOTTarget(
target=[
"# this is a full-line comment",
"evilcorp.com # main evilcorp domain",
" # indented comment ",
"1.2.3.0/24 # internal network",
"othercorp.com",
],
)

# comment-only lines are ignored
assert len(target.target) == 3

# inline comments are stripped — targets work normally
assert target.in_target("evilcorp.com")
assert target.in_target("www.evilcorp.com")
assert target.in_target("1.2.3.4")
assert target.in_target("othercorp.com")

# the comment text itself is not a target
assert not target.in_target("main")
assert not target.in_target("internal")


def test_target_comments_url_fragment_not_stripped():
"""A # inside a URL (fragment) must NOT be treated as a comment.

BBOT's URL normalisation may drop fragments, but the important thing
is that the host is still recognised as a valid target.
"""
from bbot.scanner.target import BBOTTarget

target = BBOTTarget(target=["http://evilcorp.com/page#section"])
assert target.in_target("evilcorp.com")
assert len(target.target) == 1


def test_target_comments_blacklist():
"""Comments work for blacklist entries too."""
from bbot.scanner.target import BBOTTarget

target = BBOTTarget(
target=["evilcorp.com"],
blacklist=[
"# don't scan the blog",
"blog.evilcorp.com # unstable host",
],
)
assert target.in_scope("www.evilcorp.com")
assert not target.in_scope("blog.evilcorp.com")
assert len(target.blacklist) == 1


def test_target_comments_seeds():
"""Comments work for seed entries too."""
from bbot.scanner.target import BBOTTarget

target = BBOTTarget(
target=["evilcorp.com"],
seeds=[
"# seed comment",
"evilcorp.com # the main domain",
],
)
assert "evilcorp.com" in target.seeds
assert len(target.seeds) == 1


def test_target_comments_from_file(tmp_path):
"""Comments in a target file are stripped when loaded via chain_lists."""
from bbot.core.helpers.misc import chain_lists

target_file = tmp_path / "targets.txt"
target_file.write_text(
"# My target list\n"
"evilcorp.com # main domain\n"
"\n"
" # another comment\n"
"othercorp.com\n"
"192.168.1.0/24 # lab network\n"
"http://example.com/page#fragment # with a URL fragment\n"
)

result = chain_lists([str(target_file)], try_files=True, _strip_comments=True)
assert "evilcorp.com" in result
assert "othercorp.com" in result
assert "192.168.1.0/24" in result
assert "http://example.com/page#fragment" in result
# comments and blank lines are gone
assert not any(r.lstrip().startswith("#") for r in result)
assert len(result) == 4


def test_strip_comments_helper():
"""Unit tests for the strip_comments function."""
from bbot.core.helpers.misc import strip_comments

# full-line comments
assert strip_comments("# comment") == ""
assert strip_comments(" # indented comment") == ""

# inline comments
assert strip_comments("evilcorp.com # main domain") == "evilcorp.com"
assert strip_comments("1.2.3.0/24\t# tab comment") == "1.2.3.0/24"

# no comment
assert strip_comments("evilcorp.com") == "evilcorp.com"

# URL fragment (no space before #) is preserved
assert strip_comments("http://example.com/page#section") == "http://example.com/page#section"

# URL fragment with trailing inline comment
assert strip_comments("http://example.com/page#section # a comment") == "http://example.com/page#section"

# empty / whitespace
assert strip_comments("") == ""
assert strip_comments(" ") == " "
Loading