Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 84 additions & 1 deletion src/hexgraph/sandbox/probes/ghidra_probe.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,76 @@ def _commit_marker(marker: str, prog: str) -> None:
# /scratch via _JAVA_OPTIONS so EVERY writable Ghidra/Java path lands on the one tmpfs the
# hardened sandbox guarantees, making the probe self-sufficient under bare --read-only +
# --user 1000 with only /scratch writable. Prepend so a caller-supplied _JAVA_OPTIONS wins.
#
# F13: also let the heap scale with the container. The JVM's default max heap is ~25% of the
# cgroup RAM cap, which OOMs (the "DB buffer" failure) on a 100 MB+ ELF — and the sandbox now
# grants a large artifact a BIGGER `--memory` cap (sandbox/resources.py size-scaling). A RAM
# PERCENTAGE (the JDK is cgroup-aware) self-adjusts to whatever cap THIS container got, so there's
# no hardcoded -Xmx to drift from the cap and it tracks a larger/smaller `resources.sandbox.mem`
# too. ~45% leaves room for the tmpfs (which counts against the same cap) + JVM native overhead.
# Tunable per-run via HEXGRAPH_GHIDRA_HEAP_PCT without rebuilding. A caller-supplied -Xmx (appended
# below) still wins.
_GHIDRA_HEAP_PCT = os.environ.get("HEXGRAPH_GHIDRA_HEAP_PCT", "45.0")
_existing_jopts = os.environ.get("_JAVA_OPTIONS", "")
os.environ["_JAVA_OPTIONS"] = (f"-Djava.io.tmpdir={SCRATCH} {_existing_jopts}").strip()
os.environ["_JAVA_OPTIONS"] = (
f"-Djava.io.tmpdir={SCRATCH} -XX:MaxRAMPercentage={_GHIDRA_HEAP_PCT} {_existing_jopts}"
).strip()

# F13: bound Ghidra's auto-analysis so a 100 MB+ ELF whose FULL analysis would outrun the
# container's wall-clock budget stops GRACEFULLY and SAVES partial results (functions, call graph,
# the postScript still runs) instead of being torn down by the external timeout with nothing
# persisted. We read the budget the host advertised (HEXGRAPH_PROBE_TIMEOUT_S = run_probe's
# wall-clock) and leave headroom for import + save + the postScript, so analysis halts BEFORE the
# kill. Only the COLD import path runs auto-analysis (the warm -process path passes -noanalysis).
GHIDRA_SAVE_OVERHEAD_S = 180


def _analysis_timeout_args() -> list:
"""`-analysisTimeoutPerFile <s>` sized just under the host's wall-clock budget so analysis
stops+saves before the external kill. Returns [] only when no budget is advertised or it's too
small to usefully split import/analyze/save (a tiny budget can't run a monolith anyway). For a
non-trivial budget we ALWAYS keep a graceful stop: leave the import/save headroom, but never
fall below ~half the wall-clock, so lowering `resources.sandbox.timeout` can't silently drop
the graceful save it's meant to provide on a large ELF."""
try:
total = int(float(os.environ.get("HEXGRAPH_PROBE_TIMEOUT_S", "")))
except (TypeError, ValueError):
return []
if total < 120:
return []
budget = max(int(total * 0.5), total - GHIDRA_SAVE_OVERHEAD_S)
return ["-analysisTimeoutPerFile", str(budget)]


# F13: above this size, the cold import runs a "fast profile" preScript (below) that turns off the
# auto-analysis passes that grind for ages on a monolith. Smaller binaries keep FULL analysis.
GHIDRA_FAST_PROFILE_BYTES = int(float(os.environ.get("HEXGRAPH_GHIDRA_FAST_PROFILE_MB", "100")) * 1024 * 1024)

# A Jython -preScript (runs BEFORE auto-analysis) that disables the passes proven pathological on a
# 100 MB+ monolith: Call-Fixup Installer (O(n^2) AddressSet — tens of minutes of CPU on a large ELF), the
# <processor> Constant Reference Analyzer + Scalar Operand References (constant propagation over
# every function), and the decompile-EVERY-function passes (Decompiler Parameter ID / Switch
# Analysis) + Aggressive Instruction Finder. The call-graph / reference / function-discovery
# analyzers are KEPT, so recon still gets functions + call graph + strings + basic xrefs; HexGraph
# decompiles on demand (re_decompile_function), so the batch decompile passes aren't needed here.
# Matched by suffix so it's architecture-agnostic ("PowerPC/ARM/x86 … Constant Reference Analyzer").
FAST_PROFILE_SCRIPT = """# -*- coding: utf-8 -*-
def _slow(name):
if "." in name:
return False
if name in ("Call-Fixup Installer", "Decompiler Parameter ID", "Decompiler Switch Analysis",
"Aggressive Instruction Finder"):
return True
return name.endswith("Constant Reference Analyzer") or name.endswith("Scalar Operand References")

opts = currentProgram.getOptions("Analyzers")
for _n in list(opts.getOptionNames()):
if _slow(_n):
try:
opts.setBoolean(_n, False)
except:
pass
"""

# Jython postScript Ghidra runs after auto-analysis. It writes JSON to args[0];
# args[1] (optional) is the focus function to decompile.
Expand Down Expand Up @@ -854,6 +922,19 @@ def main() -> int:
with open(script_path, "w") as fh:
fh.write(script_body)

# F13: a LARGE binary's cold import gets the fast-profile preScript (disables the pathological
# auto-analysis passes); small binaries keep the FULL analysis (no preScript). The WARM path
# runs no auto-analysis, so it never needs it.
pre_script_args = []
try:
_large = artifact is not None and os.path.getsize(artifact) >= GHIDRA_FAST_PROFILE_BYTES
except OSError:
_large = False
if _large:
with open(os.path.join(SCRATCH, "hexgraph_fast_profile.py"), "w") as fh:
fh.write(FAST_PROFILE_SCRIPT)
pre_script_args = ["-preScript", "hexgraph_fast_profile.py"]

# Persistent-project cache (analyze-once / reuse). The host resolves
# <data_dir>/ghidra/<sha256>__<version>/project and bind-mounts it writable here; if a
# prior COLD run already imported the program (a non-empty project dir), reuse it via
Expand Down Expand Up @@ -897,7 +978,9 @@ def main() -> int:
cmd = [
hl, proj_dir, PROJECT_NAME,
"-import", artifact,
*_analysis_timeout_args(), # F13: stop+save before the wall-clock kill on a monolith
"-scriptPath", SCRATCH,
*pre_script_args, # F13: fast-profile preScript for a large binary
"-postScript", script_name, out_path, focus or "", rename_addr, rename_name,
]
if not persistent:
Expand Down
114 changes: 102 additions & 12 deletions src/hexgraph/sandbox/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,23 @@
SIZE_TIMEOUT_SECONDS_PER_MIB = 5 # added per MiB of artifact above the threshold
SIZE_TIMEOUT_CAP_SECONDS = 3600 # size-scaling alone never pushes the budget past 1 h

# F13 (the OTHER half): a LARGE artifact also needs more container MEMORY and a bigger /scratch
# tmpfs than the shipped 2 GiB / 512 MiB. Ghidra's import + auto-analysis of a 100 MB+ ELF exhausts
# the default heap AND fills the tmpfs it writes its DB/recovery into (TMPDIR=/scratch) — the "DB
# buffer" failure — and the tmpfs counts against the same mem cgroup, squeezing the heap further.
# Unlike the timeout, raising mem/tmpfs has a real HOST cost, so it starts at a HIGHER threshold,
# grows linearly per byte over it, and is bounded by BOTH a hard cap and a fraction of host RAM so
# a big artifact never over-commits the box. Monotonic and never below the configured base — like
# the timeout, scaling only ever widens. The probe sizes Ghidra's -Xmx from the resulting cgroup
# cap (sandbox/probes/ghidra_probe.py), so a bigger container automatically yields a bigger heap.
SIZE_RAM_THRESHOLD_BYTES = 64 * 1024 * 1024 # below this, mem/tmpfs stay the configured defaults
SIZE_MEM_BYTES_PER_BYTE = 64 # container mem added per artifact byte over the threshold
SIZE_TMPFS_BYTES_PER_BYTE = 24 # /scratch tmpfs added per artifact byte (Ghidra DB/recovery)
SIZE_MEM_CAP_BYTES = 16 * 1024 ** 3 # hard ceiling on the mem size-bonus
SIZE_TMPFS_CAP_BYTES = 8 * 1024 ** 3 # hard ceiling on the tmpfs size-bonus
SIZE_RAM_HOST_FRACTION = 0.75 # never scale mem past this fraction of host MemTotal
SIZE_TMPFS_MEM_FRACTION = 0.5 # tmpfs counts against mem — keep it ≤ half so the heap has room

# The container types that can carry their own per-type override under `resources.<type>`
# (each inherits `resources.default` for any key it doesn't set). Rehosting containers are
# privileged full-system emulators and are deliberately NOT resource-capped here.
Expand Down Expand Up @@ -176,22 +193,95 @@ def size_scaled_timeout(size_bytes: int | None, base_timeout: int) -> int:
return min(scaled, max(base_timeout, SIZE_TIMEOUT_CAP_SECONDS))


def _parse_bytes(token) -> int:
"""A docker size token ('2g', '512m', '2048') → bytes. Lenient; any unparseable token yields 0
(the caller then leaves the base unchanged), so a weird Settings value can never crash a probe."""
try:
s = str(token).strip().lower()
for suffix, mult in (("g", 1024 ** 3), ("m", 1024 ** 2), ("k", 1024)):
if s.endswith(suffix):
return int(float(s[:-1]) * mult)
if s.endswith("b"):
s = s[:-1]
return int(float(s))
except (TypeError, ValueError):
return 0


def _fmt_mb(nbytes: int) -> str:
"""Bytes → a docker MiB token (e.g. '6144m'). MiB granularity keeps the value docker-legal and
readable; floor at 1 MiB so a tiny value never formats to '0m' (which docker rejects)."""
return f"{max(1, nbytes // (1024 * 1024))}m"


def _host_mem_total_bytes() -> int | None:
"""Host RAM (MemTotal) in bytes, or None — used to cap the mem size-bonus so a big artifact
never asks docker for more than a fraction of the box."""
try:
for line in open("/proc/meminfo"):
if line.startswith("MemTotal:"):
return int(line.split()[1]) * 1024
except (OSError, ValueError):
return None
return None


def size_scaled_mem(size_bytes: int | None, base_mem: str) -> str:
"""Container `--memory` for a probe over an artifact of `size_bytes`, scaled up from `base_mem`
for a large artifact (F13). Returns `base_mem` UNCHANGED at/below `SIZE_RAM_THRESHOLD_BYTES` (or
None/unparseable), so the normal path is untouched. Above it, grows linearly
(`SIZE_MEM_BYTES_PER_BYTE` per byte over the threshold), bounded by BOTH `SIZE_MEM_CAP_BYTES`
and `SIZE_RAM_HOST_FRACTION` of host RAM so a multi-GB artifact never over-commits the box.
Only ever widens, never shrinks below the configured base."""
base = _parse_bytes(base_mem)
if not size_bytes or size_bytes <= SIZE_RAM_THRESHOLD_BYTES or base <= 0:
return base_mem
target = base + (size_bytes - SIZE_RAM_THRESHOLD_BYTES) * SIZE_MEM_BYTES_PER_BYTE
cap = base + SIZE_MEM_CAP_BYTES
host = _host_mem_total_bytes()
if host:
cap = min(cap, int(host * SIZE_RAM_HOST_FRACTION))
scaled = min(target, max(base, cap))
return _fmt_mb(scaled) if scaled > base else base_mem


def size_scaled_tmpfs(size_bytes: int | None, base_tmpfs: str, mem_bytes: int) -> str:
"""`/scratch` tmpfs size for a probe over an artifact of `size_bytes`, scaled up from
`base_tmpfs` for a large artifact (F13) so Ghidra's DB/recovery have room. Unchanged at/below
the threshold. Grows linearly above it, capped by `SIZE_TMPFS_CAP_BYTES` AND
`SIZE_TMPFS_MEM_FRACTION` of the container mem — the tmpfs counts against the mem cgroup, so it
must stay well under it or the JVM heap has nowhere to live. Only ever widens."""
base = _parse_bytes(base_tmpfs)
if not size_bytes or size_bytes <= SIZE_RAM_THRESHOLD_BYTES or base <= 0:
return base_tmpfs
target = base + (size_bytes - SIZE_RAM_THRESHOLD_BYTES) * SIZE_TMPFS_BYTES_PER_BYTE
cap = min(base + SIZE_TMPFS_CAP_BYTES, int(mem_bytes * SIZE_TMPFS_MEM_FRACTION))
scaled = min(target, max(base, cap))
return _fmt_mb(scaled) if scaled > base else base_tmpfs


def resource_spec_for_artifact(artifact, container_type: str = "sandbox") -> ResourceSpec:
"""The resolved ResourceSpec for a probe over `artifact`, with a size-aware `timeout` (F13).

Starts from `resource_spec_for(container_type)` — so a user's `resources.<type>.timeout`
override is the base/floor this scales up from — and raises ONLY `timeout`, and only when
`artifact` is a large file (per `size_scaled_timeout`). A small file, a `None` artifact (a
path-less Channel surface that mounts no bytes), or an unreadable path yields the base spec
verbatim: the size budget is a pure widening for big inputs and changes nothing else
(mem/cpu/pids/tmpfs are exactly the configured ceilings). Use this for the analysis probes
(recon/decompile/strings/binutils/…); the detached fuzz path keeps its own hard-cap timeout."""
"""The resolved ResourceSpec for a probe over `artifact`, size-aware (F13).

Starts from `resource_spec_for(container_type)` — so a user's `resources.<type>.*` overrides are
the base/floor this scales up from — and raises `timeout` (≥32 MiB) and, for a genuinely large
artifact (≥64 MiB), `mem` + `tmpfs` so Ghidra's import/auto-analysis of a 100 MB+ ELF doesn't
exhaust the heap or fill the DB/recovery tmpfs (the "DB buffer" failure). A small file, a `None`
artifact (a path-less Channel surface), or an unreadable path yields the base spec verbatim, and
`unconstrained` (the user already gave the container the whole box) is left untouched. Every
knob only ever widens, never shrinks. Used as run_probe's default; the probe then sizes Ghidra's
-Xmx from the resulting cgroup cap. The detached fuzz path keeps its own hard-cap spec."""
base = resource_spec_for(container_type)
try:
size = os.path.getsize(artifact) if artifact is not None else None
except OSError:
return base
scaled = size_scaled_timeout(size, base.timeout)
if scaled <= base.timeout:
timeout = size_scaled_timeout(size, base.timeout)
if base.unconstrained: # ceilings already dropped — nothing to widen
mem, tmpfs = base.mem, base.tmpfs
else:
mem = size_scaled_mem(size, base.mem)
tmpfs = size_scaled_tmpfs(size, base.tmpfs, _parse_bytes(mem))
if timeout <= base.timeout and mem == base.mem and tmpfs == base.tmpfs:
return base
return replace(base, timeout=scaled)
return replace(base, timeout=timeout, mem=mem, tmpfs=tmpfs)
5 changes: 5 additions & 0 deletions src/hexgraph/sandbox/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,11 @@ def run_probe(
name = f"hexgraph-{uuid.uuid4().hex[:12]}"
cmd = [
"docker", "run", "--rm", "--name", name,
# Expose THIS run's wall-clock budget to the probe so a long-running tool can stop
# itself GRACEFULLY a little before the external kill and save partial work, rather
# than being torn down with nothing (Ghidra's `-analysisTimeoutPerFile` uses this on a
# huge ELF whose full auto-analysis would outrun the budget — F13). Informational only.
"-e", f"HEXGRAPH_PROBE_TIMEOUT_S={timeout}",
*self._hardening_args(allow_network=allow_network, net_container=net_container,
resources=resources, secret=bool(secret)),
# A channel probe (live target, no bytes at rest) mounts no artifact.
Expand Down
53 changes: 53 additions & 0 deletions tests/test_ghidra_fast_profile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""F13: Ghidra's analysis of a 100 MB+ monolith is bounded two ways — (1) a fast-profile preScript
disables the passes proven pathological on a huge binary (Call-Fixup Installer's O(n^2) AddressSet,
the per-processor Constant Reference Analyzer, the decompile-every-function passes) while KEEPING
the call-graph/reference analyzers; (2) auto-analysis is told to stop+save just under the host's
wall-clock budget. These check the pure host-side logic; the end-to-end behavior is validated
against a real monolith separately. The probe is stdlib-only at import (Ghidra API is lazy)."""

from __future__ import annotations

from hexgraph.sandbox.probes import ghidra_probe as G


def test_analysis_timeout_sits_just_under_the_host_budget(monkeypatch):
monkeypatch.setenv("HEXGRAPH_PROBE_TIMEOUT_S", "1000") # large: budget = 1000 - overhead
assert G._analysis_timeout_args() == ["-analysisTimeoutPerFile", str(1000 - G.GHIDRA_SAVE_OVERHEAD_S)]


def test_small_nontrivial_budget_still_gets_a_graceful_stop(monkeypatch):
# A lowered resources.sandbox.timeout (e.g. 200s) must NOT silently drop the graceful save:
# the budget floors at ~half the wall-clock (here 100s) rather than vanishing.
monkeypatch.setenv("HEXGRAPH_PROBE_TIMEOUT_S", "200")
assert G._analysis_timeout_args() == ["-analysisTimeoutPerFile", "100"]


def test_no_analysis_timeout_when_budget_absent_or_bad(monkeypatch):
monkeypatch.delenv("HEXGRAPH_PROBE_TIMEOUT_S", raising=False)
assert G._analysis_timeout_args() == [] # no budget advertised -> let it run
monkeypatch.setenv("HEXGRAPH_PROBE_TIMEOUT_S", "90") # < 120 -> too small to split usefully
assert G._analysis_timeout_args() == []
monkeypatch.setenv("HEXGRAPH_PROBE_TIMEOUT_S", "not-a-number")
assert G._analysis_timeout_args() == []


def test_fast_profile_threshold_default_is_100mib():
assert G.GHIDRA_FAST_PROFILE_BYTES == 100 * 1024 * 1024


def test_fast_profile_disables_the_proven_slow_passes():
s = G.FAST_PROFILE_SCRIPT
for slow in ("Call-Fixup Installer", "Decompiler Parameter ID", "Decompiler Switch Analysis",
"Aggressive Instruction Finder"):
assert slow in s
# processor-agnostic match for the constant-propagation pass ("PowerPC/ARM/x86 … "):
assert "Constant Reference Analyzer" in s and "Scalar Operand References" in s
assert "setBoolean" in s and "False" in s


def test_fast_profile_keeps_the_call_graph_analyzers():
# The recon value (function list + CALL GRAPH + xrefs) depends on these — they must NOT be named
# in the disable script. (Checked names are not substrings of any disabled analyzer name.)
s = G.FAST_PROFILE_SCRIPT
for keep in ("Subroutine References", "Function ID", "Demangler GNU", "Disassemble Entry Points"):
assert keep not in s
Loading
Loading