diff --git a/src/hexgraph/sandbox/probes/ghidra_probe.py b/src/hexgraph/sandbox/probes/ghidra_probe.py index 7f40567..9da188b 100644 --- a/src/hexgraph/sandbox/probes/ghidra_probe.py +++ b/src/hexgraph/sandbox/probes/ghidra_probe.py @@ -124,8 +124,76 @@ def _commit_marker(marker: str, prog: str) -> None: # /scratch via _JAVA_OPTIONS so EVERY writable Ghidra/Java path lands on the one tmpfs the # hardened sandbox guarantees, making the probe self-sufficient under bare --read-only + # --user 1000 with only /scratch writable. Prepend so a caller-supplied _JAVA_OPTIONS wins. +# +# F13: also let the heap scale with the container. The JVM's default max heap is ~25% of the +# cgroup RAM cap, which OOMs (the "DB buffer" failure) on a 100 MB+ ELF — and the sandbox now +# grants a large artifact a BIGGER `--memory` cap (sandbox/resources.py size-scaling). A RAM +# PERCENTAGE (the JDK is cgroup-aware) self-adjusts to whatever cap THIS container got, so there's +# no hardcoded -Xmx to drift from the cap and it tracks a larger/smaller `resources.sandbox.mem` +# too. ~45% leaves room for the tmpfs (which counts against the same cap) + JVM native overhead. +# Tunable per-run via HEXGRAPH_GHIDRA_HEAP_PCT without rebuilding. A caller-supplied -Xmx (appended +# below) still wins. +_GHIDRA_HEAP_PCT = os.environ.get("HEXGRAPH_GHIDRA_HEAP_PCT", "45.0") _existing_jopts = os.environ.get("_JAVA_OPTIONS", "") -os.environ["_JAVA_OPTIONS"] = (f"-Djava.io.tmpdir={SCRATCH} {_existing_jopts}").strip() +os.environ["_JAVA_OPTIONS"] = ( + f"-Djava.io.tmpdir={SCRATCH} -XX:MaxRAMPercentage={_GHIDRA_HEAP_PCT} {_existing_jopts}" +).strip() + +# F13: bound Ghidra's auto-analysis so a 100 MB+ ELF whose FULL analysis would outrun the +# container's wall-clock budget stops GRACEFULLY and SAVES partial results (functions, call graph, +# the postScript still runs) instead of being torn down by the external timeout with nothing +# persisted. We read the budget the host advertised (HEXGRAPH_PROBE_TIMEOUT_S = run_probe's +# wall-clock) and leave headroom for import + save + the postScript, so analysis halts BEFORE the +# kill. Only the COLD import path runs auto-analysis (the warm -process path passes -noanalysis). +GHIDRA_SAVE_OVERHEAD_S = 180 + + +def _analysis_timeout_args() -> list: + """`-analysisTimeoutPerFile ` sized just under the host's wall-clock budget so analysis + stops+saves before the external kill. Returns [] only when no budget is advertised or it's too + small to usefully split import/analyze/save (a tiny budget can't run a monolith anyway). For a + non-trivial budget we ALWAYS keep a graceful stop: leave the import/save headroom, but never + fall below ~half the wall-clock, so lowering `resources.sandbox.timeout` can't silently drop + the graceful save it's meant to provide on a large ELF.""" + try: + total = int(float(os.environ.get("HEXGRAPH_PROBE_TIMEOUT_S", ""))) + except (TypeError, ValueError): + return [] + if total < 120: + return [] + budget = max(int(total * 0.5), total - GHIDRA_SAVE_OVERHEAD_S) + return ["-analysisTimeoutPerFile", str(budget)] + + +# F13: above this size, the cold import runs a "fast profile" preScript (below) that turns off the +# auto-analysis passes that grind for ages on a monolith. Smaller binaries keep FULL analysis. +GHIDRA_FAST_PROFILE_BYTES = int(float(os.environ.get("HEXGRAPH_GHIDRA_FAST_PROFILE_MB", "100")) * 1024 * 1024) + +# A Jython -preScript (runs BEFORE auto-analysis) that disables the passes proven pathological on a +# 100 MB+ monolith: Call-Fixup Installer (O(n^2) AddressSet — tens of minutes of CPU on a large ELF), the +# Constant Reference Analyzer + Scalar Operand References (constant propagation over +# every function), and the decompile-EVERY-function passes (Decompiler Parameter ID / Switch +# Analysis) + Aggressive Instruction Finder. The call-graph / reference / function-discovery +# analyzers are KEPT, so recon still gets functions + call graph + strings + basic xrefs; HexGraph +# decompiles on demand (re_decompile_function), so the batch decompile passes aren't needed here. +# Matched by suffix so it's architecture-agnostic ("PowerPC/ARM/x86 … Constant Reference Analyzer"). +FAST_PROFILE_SCRIPT = """# -*- coding: utf-8 -*- +def _slow(name): + if "." in name: + return False + if name in ("Call-Fixup Installer", "Decompiler Parameter ID", "Decompiler Switch Analysis", + "Aggressive Instruction Finder"): + return True + return name.endswith("Constant Reference Analyzer") or name.endswith("Scalar Operand References") + +opts = currentProgram.getOptions("Analyzers") +for _n in list(opts.getOptionNames()): + if _slow(_n): + try: + opts.setBoolean(_n, False) + except: + pass +""" # Jython postScript Ghidra runs after auto-analysis. It writes JSON to args[0]; # args[1] (optional) is the focus function to decompile. @@ -854,6 +922,19 @@ def main() -> int: with open(script_path, "w") as fh: fh.write(script_body) + # F13: a LARGE binary's cold import gets the fast-profile preScript (disables the pathological + # auto-analysis passes); small binaries keep the FULL analysis (no preScript). The WARM path + # runs no auto-analysis, so it never needs it. + pre_script_args = [] + try: + _large = artifact is not None and os.path.getsize(artifact) >= GHIDRA_FAST_PROFILE_BYTES + except OSError: + _large = False + if _large: + with open(os.path.join(SCRATCH, "hexgraph_fast_profile.py"), "w") as fh: + fh.write(FAST_PROFILE_SCRIPT) + pre_script_args = ["-preScript", "hexgraph_fast_profile.py"] + # Persistent-project cache (analyze-once / reuse). The host resolves # /ghidra/__/project and bind-mounts it writable here; if a # prior COLD run already imported the program (a non-empty project dir), reuse it via @@ -897,7 +978,9 @@ def main() -> int: cmd = [ hl, proj_dir, PROJECT_NAME, "-import", artifact, + *_analysis_timeout_args(), # F13: stop+save before the wall-clock kill on a monolith "-scriptPath", SCRATCH, + *pre_script_args, # F13: fast-profile preScript for a large binary "-postScript", script_name, out_path, focus or "", rename_addr, rename_name, ] if not persistent: diff --git a/src/hexgraph/sandbox/resources.py b/src/hexgraph/sandbox/resources.py index 91f63f2..9a2afac 100644 --- a/src/hexgraph/sandbox/resources.py +++ b/src/hexgraph/sandbox/resources.py @@ -60,6 +60,23 @@ SIZE_TIMEOUT_SECONDS_PER_MIB = 5 # added per MiB of artifact above the threshold SIZE_TIMEOUT_CAP_SECONDS = 3600 # size-scaling alone never pushes the budget past 1 h +# F13 (the OTHER half): a LARGE artifact also needs more container MEMORY and a bigger /scratch +# tmpfs than the shipped 2 GiB / 512 MiB. Ghidra's import + auto-analysis of a 100 MB+ ELF exhausts +# the default heap AND fills the tmpfs it writes its DB/recovery into (TMPDIR=/scratch) — the "DB +# buffer" failure — and the tmpfs counts against the same mem cgroup, squeezing the heap further. +# Unlike the timeout, raising mem/tmpfs has a real HOST cost, so it starts at a HIGHER threshold, +# grows linearly per byte over it, and is bounded by BOTH a hard cap and a fraction of host RAM so +# a big artifact never over-commits the box. Monotonic and never below the configured base — like +# the timeout, scaling only ever widens. The probe sizes Ghidra's -Xmx from the resulting cgroup +# cap (sandbox/probes/ghidra_probe.py), so a bigger container automatically yields a bigger heap. +SIZE_RAM_THRESHOLD_BYTES = 64 * 1024 * 1024 # below this, mem/tmpfs stay the configured defaults +SIZE_MEM_BYTES_PER_BYTE = 64 # container mem added per artifact byte over the threshold +SIZE_TMPFS_BYTES_PER_BYTE = 24 # /scratch tmpfs added per artifact byte (Ghidra DB/recovery) +SIZE_MEM_CAP_BYTES = 16 * 1024 ** 3 # hard ceiling on the mem size-bonus +SIZE_TMPFS_CAP_BYTES = 8 * 1024 ** 3 # hard ceiling on the tmpfs size-bonus +SIZE_RAM_HOST_FRACTION = 0.75 # never scale mem past this fraction of host MemTotal +SIZE_TMPFS_MEM_FRACTION = 0.5 # tmpfs counts against mem — keep it ≤ half so the heap has room + # The container types that can carry their own per-type override under `resources.` # (each inherits `resources.default` for any key it doesn't set). Rehosting containers are # privileged full-system emulators and are deliberately NOT resource-capped here. @@ -176,22 +193,95 @@ def size_scaled_timeout(size_bytes: int | None, base_timeout: int) -> int: return min(scaled, max(base_timeout, SIZE_TIMEOUT_CAP_SECONDS)) +def _parse_bytes(token) -> int: + """A docker size token ('2g', '512m', '2048') → bytes. Lenient; any unparseable token yields 0 + (the caller then leaves the base unchanged), so a weird Settings value can never crash a probe.""" + try: + s = str(token).strip().lower() + for suffix, mult in (("g", 1024 ** 3), ("m", 1024 ** 2), ("k", 1024)): + if s.endswith(suffix): + return int(float(s[:-1]) * mult) + if s.endswith("b"): + s = s[:-1] + return int(float(s)) + except (TypeError, ValueError): + return 0 + + +def _fmt_mb(nbytes: int) -> str: + """Bytes → a docker MiB token (e.g. '6144m'). MiB granularity keeps the value docker-legal and + readable; floor at 1 MiB so a tiny value never formats to '0m' (which docker rejects).""" + return f"{max(1, nbytes // (1024 * 1024))}m" + + +def _host_mem_total_bytes() -> int | None: + """Host RAM (MemTotal) in bytes, or None — used to cap the mem size-bonus so a big artifact + never asks docker for more than a fraction of the box.""" + try: + for line in open("/proc/meminfo"): + if line.startswith("MemTotal:"): + return int(line.split()[1]) * 1024 + except (OSError, ValueError): + return None + return None + + +def size_scaled_mem(size_bytes: int | None, base_mem: str) -> str: + """Container `--memory` for a probe over an artifact of `size_bytes`, scaled up from `base_mem` + for a large artifact (F13). Returns `base_mem` UNCHANGED at/below `SIZE_RAM_THRESHOLD_BYTES` (or + None/unparseable), so the normal path is untouched. Above it, grows linearly + (`SIZE_MEM_BYTES_PER_BYTE` per byte over the threshold), bounded by BOTH `SIZE_MEM_CAP_BYTES` + and `SIZE_RAM_HOST_FRACTION` of host RAM so a multi-GB artifact never over-commits the box. + Only ever widens, never shrinks below the configured base.""" + base = _parse_bytes(base_mem) + if not size_bytes or size_bytes <= SIZE_RAM_THRESHOLD_BYTES or base <= 0: + return base_mem + target = base + (size_bytes - SIZE_RAM_THRESHOLD_BYTES) * SIZE_MEM_BYTES_PER_BYTE + cap = base + SIZE_MEM_CAP_BYTES + host = _host_mem_total_bytes() + if host: + cap = min(cap, int(host * SIZE_RAM_HOST_FRACTION)) + scaled = min(target, max(base, cap)) + return _fmt_mb(scaled) if scaled > base else base_mem + + +def size_scaled_tmpfs(size_bytes: int | None, base_tmpfs: str, mem_bytes: int) -> str: + """`/scratch` tmpfs size for a probe over an artifact of `size_bytes`, scaled up from + `base_tmpfs` for a large artifact (F13) so Ghidra's DB/recovery have room. Unchanged at/below + the threshold. Grows linearly above it, capped by `SIZE_TMPFS_CAP_BYTES` AND + `SIZE_TMPFS_MEM_FRACTION` of the container mem — the tmpfs counts against the mem cgroup, so it + must stay well under it or the JVM heap has nowhere to live. Only ever widens.""" + base = _parse_bytes(base_tmpfs) + if not size_bytes or size_bytes <= SIZE_RAM_THRESHOLD_BYTES or base <= 0: + return base_tmpfs + target = base + (size_bytes - SIZE_RAM_THRESHOLD_BYTES) * SIZE_TMPFS_BYTES_PER_BYTE + cap = min(base + SIZE_TMPFS_CAP_BYTES, int(mem_bytes * SIZE_TMPFS_MEM_FRACTION)) + scaled = min(target, max(base, cap)) + return _fmt_mb(scaled) if scaled > base else base_tmpfs + + def resource_spec_for_artifact(artifact, container_type: str = "sandbox") -> ResourceSpec: - """The resolved ResourceSpec for a probe over `artifact`, with a size-aware `timeout` (F13). - - Starts from `resource_spec_for(container_type)` — so a user's `resources..timeout` - override is the base/floor this scales up from — and raises ONLY `timeout`, and only when - `artifact` is a large file (per `size_scaled_timeout`). A small file, a `None` artifact (a - path-less Channel surface that mounts no bytes), or an unreadable path yields the base spec - verbatim: the size budget is a pure widening for big inputs and changes nothing else - (mem/cpu/pids/tmpfs are exactly the configured ceilings). Use this for the analysis probes - (recon/decompile/strings/binutils/…); the detached fuzz path keeps its own hard-cap timeout.""" + """The resolved ResourceSpec for a probe over `artifact`, size-aware (F13). + + Starts from `resource_spec_for(container_type)` — so a user's `resources..*` overrides are + the base/floor this scales up from — and raises `timeout` (≥32 MiB) and, for a genuinely large + artifact (≥64 MiB), `mem` + `tmpfs` so Ghidra's import/auto-analysis of a 100 MB+ ELF doesn't + exhaust the heap or fill the DB/recovery tmpfs (the "DB buffer" failure). A small file, a `None` + artifact (a path-less Channel surface), or an unreadable path yields the base spec verbatim, and + `unconstrained` (the user already gave the container the whole box) is left untouched. Every + knob only ever widens, never shrinks. Used as run_probe's default; the probe then sizes Ghidra's + -Xmx from the resulting cgroup cap. The detached fuzz path keeps its own hard-cap spec.""" base = resource_spec_for(container_type) try: size = os.path.getsize(artifact) if artifact is not None else None except OSError: return base - scaled = size_scaled_timeout(size, base.timeout) - if scaled <= base.timeout: + timeout = size_scaled_timeout(size, base.timeout) + if base.unconstrained: # ceilings already dropped — nothing to widen + mem, tmpfs = base.mem, base.tmpfs + else: + mem = size_scaled_mem(size, base.mem) + tmpfs = size_scaled_tmpfs(size, base.tmpfs, _parse_bytes(mem)) + if timeout <= base.timeout and mem == base.mem and tmpfs == base.tmpfs: return base - return replace(base, timeout=scaled) + return replace(base, timeout=timeout, mem=mem, tmpfs=tmpfs) diff --git a/src/hexgraph/sandbox/runner.py b/src/hexgraph/sandbox/runner.py index e63d15b..64eea63 100644 --- a/src/hexgraph/sandbox/runner.py +++ b/src/hexgraph/sandbox/runner.py @@ -494,6 +494,11 @@ def run_probe( name = f"hexgraph-{uuid.uuid4().hex[:12]}" cmd = [ "docker", "run", "--rm", "--name", name, + # Expose THIS run's wall-clock budget to the probe so a long-running tool can stop + # itself GRACEFULLY a little before the external kill and save partial work, rather + # than being torn down with nothing (Ghidra's `-analysisTimeoutPerFile` uses this on a + # huge ELF whose full auto-analysis would outrun the budget — F13). Informational only. + "-e", f"HEXGRAPH_PROBE_TIMEOUT_S={timeout}", *self._hardening_args(allow_network=allow_network, net_container=net_container, resources=resources, secret=bool(secret)), # A channel probe (live target, no bytes at rest) mounts no artifact. diff --git a/tests/test_ghidra_fast_profile.py b/tests/test_ghidra_fast_profile.py new file mode 100644 index 0000000..d2de9e6 --- /dev/null +++ b/tests/test_ghidra_fast_profile.py @@ -0,0 +1,53 @@ +"""F13: Ghidra's analysis of a 100 MB+ monolith is bounded two ways — (1) a fast-profile preScript +disables the passes proven pathological on a huge binary (Call-Fixup Installer's O(n^2) AddressSet, +the per-processor Constant Reference Analyzer, the decompile-every-function passes) while KEEPING +the call-graph/reference analyzers; (2) auto-analysis is told to stop+save just under the host's +wall-clock budget. These check the pure host-side logic; the end-to-end behavior is validated +against a real monolith separately. The probe is stdlib-only at import (Ghidra API is lazy).""" + +from __future__ import annotations + +from hexgraph.sandbox.probes import ghidra_probe as G + + +def test_analysis_timeout_sits_just_under_the_host_budget(monkeypatch): + monkeypatch.setenv("HEXGRAPH_PROBE_TIMEOUT_S", "1000") # large: budget = 1000 - overhead + assert G._analysis_timeout_args() == ["-analysisTimeoutPerFile", str(1000 - G.GHIDRA_SAVE_OVERHEAD_S)] + + +def test_small_nontrivial_budget_still_gets_a_graceful_stop(monkeypatch): + # A lowered resources.sandbox.timeout (e.g. 200s) must NOT silently drop the graceful save: + # the budget floors at ~half the wall-clock (here 100s) rather than vanishing. + monkeypatch.setenv("HEXGRAPH_PROBE_TIMEOUT_S", "200") + assert G._analysis_timeout_args() == ["-analysisTimeoutPerFile", "100"] + + +def test_no_analysis_timeout_when_budget_absent_or_bad(monkeypatch): + monkeypatch.delenv("HEXGRAPH_PROBE_TIMEOUT_S", raising=False) + assert G._analysis_timeout_args() == [] # no budget advertised -> let it run + monkeypatch.setenv("HEXGRAPH_PROBE_TIMEOUT_S", "90") # < 120 -> too small to split usefully + assert G._analysis_timeout_args() == [] + monkeypatch.setenv("HEXGRAPH_PROBE_TIMEOUT_S", "not-a-number") + assert G._analysis_timeout_args() == [] + + +def test_fast_profile_threshold_default_is_100mib(): + assert G.GHIDRA_FAST_PROFILE_BYTES == 100 * 1024 * 1024 + + +def test_fast_profile_disables_the_proven_slow_passes(): + s = G.FAST_PROFILE_SCRIPT + for slow in ("Call-Fixup Installer", "Decompiler Parameter ID", "Decompiler Switch Analysis", + "Aggressive Instruction Finder"): + assert slow in s + # processor-agnostic match for the constant-propagation pass ("PowerPC/ARM/x86 … "): + assert "Constant Reference Analyzer" in s and "Scalar Operand References" in s + assert "setBoolean" in s and "False" in s + + +def test_fast_profile_keeps_the_call_graph_analyzers(): + # The recon value (function list + CALL GRAPH + xrefs) depends on these — they must NOT be named + # in the disable script. (Checked names are not substrings of any disabled analyzer name.) + s = G.FAST_PROFILE_SCRIPT + for keep in ("Subroutine References", "Function ID", "Demangler GNU", "Disassemble Entry Points"): + assert keep not in s diff --git a/tests/test_size_aware_resources.py b/tests/test_size_aware_resources.py new file mode 100644 index 0000000..4574a87 --- /dev/null +++ b/tests/test_size_aware_resources.py @@ -0,0 +1,162 @@ +"""F13 (the heap half): a sandbox probe over a LARGE artifact gets more container `--memory` and a +bigger `/scratch` tmpfs (so Ghidra's import/auto-analysis of a 100 MB+ ELF doesn't exhaust the heap +or fill the DB/recovery tmpfs — the "DB buffer" failure), while a normal-size artifact, a path-less +channel, and an explicit (fuzz/poc) spec are untouched. Offline: the scaling helpers are pure, and +the run_probe wiring is checked by capturing the docker flags it builds. +""" + +from __future__ import annotations + +import subprocess +from types import SimpleNamespace + +import pytest + +from hexgraph.sandbox import runner as R +from hexgraph.sandbox.resources import ( + SIZE_MEM_BYTES_PER_BYTE, + SIZE_RAM_THRESHOLD_BYTES, + SIZE_TMPFS_MEM_FRACTION, + ResourceSpec, + _fmt_mb, + _parse_bytes, + resource_spec_for, + resource_spec_for_artifact, + size_scaled_mem, + size_scaled_tmpfs, +) +import hexgraph.sandbox.resources as RS + +MIB = 1024 * 1024 +GIB = 1024 * 1024 * 1024 + + +@pytest.fixture(autouse=True) +def _big_host(monkeypatch): + # Pin host RAM large + deterministic so the host-fraction cap doesn't make tests depend on the box. + monkeypatch.setattr(RS, "_host_mem_total_bytes", lambda: 64 * GIB) + + +# ---- size string parsing ---------------------------------------------------------------- + +def test_parse_and_format_bytes(): + assert _parse_bytes("2g") == 2 * GIB + assert _parse_bytes("512m") == 512 * MIB + assert _parse_bytes("2048") == 2048 + assert _parse_bytes("garbage") == 0 # lenient: unparseable -> 0 (caller keeps base) + assert _fmt_mb(6 * GIB) == "6144m" + assert _fmt_mb(1) == "1m" # floors at 1m, never '0m' (docker rejects 0) + + +# ---- mem / tmpfs scaling ---------------------------------------------------------------- + +def test_mem_unchanged_below_threshold(): + assert size_scaled_mem(SIZE_RAM_THRESHOLD_BYTES, "2g") == "2g" + assert size_scaled_mem(None, "2g") == "2g" + assert size_scaled_mem(8 * MIB, "2g") == "2g" + + +def test_mem_scales_above_threshold(): + size = SIZE_RAM_THRESHOLD_BYTES + 100 * MIB + got = _parse_bytes(size_scaled_mem(size, "2g")) + assert got == 2 * GIB + 100 * MIB * SIZE_MEM_BYTES_PER_BYTE + assert got > 2 * GIB + + +def test_mem_capped_by_host_fraction(monkeypatch): + monkeypatch.setattr(RS, "_host_mem_total_bytes", lambda: 8 * GIB) # small box + huge = SIZE_RAM_THRESHOLD_BYTES + 4 * GIB + assert _parse_bytes(size_scaled_mem(huge, "2g")) <= int(8 * GIB * 0.75) # never over-commit + + +def test_tmpfs_scales_but_stays_under_mem(): + size = SIZE_RAM_THRESHOLD_BYTES + 200 * MIB + mem = _parse_bytes(size_scaled_mem(size, "2g")) + tmpfs = _parse_bytes(size_scaled_tmpfs(size, "512m", mem)) + assert tmpfs > 512 * MIB + assert tmpfs <= int(mem * SIZE_TMPFS_MEM_FRACTION) # tmpfs counts against mem -> leave heap room + + +def test_tmpfs_unchanged_below_threshold(): + assert size_scaled_tmpfs(8 * MIB, "512m", 2 * GIB) == "512m" + + +# ---- resource_spec_for_artifact: composes timeout + mem + tmpfs -------------------------- + +def test_spec_unchanged_for_small_artifact(tmp_path): + base = resource_spec_for("sandbox") + f = tmp_path / "small.bin" + f.write_bytes(b"\x00" * (4 * MIB)) + assert resource_spec_for_artifact(f, "sandbox") == base + + +def test_spec_widens_mem_and_tmpfs_for_large_artifact(tmp_path): + base = resource_spec_for("sandbox") + f = tmp_path / "big.bin" + with open(f, "wb") as fh: + fh.truncate(SIZE_RAM_THRESHOLD_BYTES + 150 * MIB) # sparse — no real disk + spec = resource_spec_for_artifact(f, "sandbox") + assert _parse_bytes(spec.mem) > _parse_bytes(base.mem) + assert _parse_bytes(spec.tmpfs) > _parse_bytes(base.tmpfs) + assert spec.timeout > base.timeout + assert (spec.cpus, spec.pids, spec.unconstrained) == (base.cpus, base.pids, base.unconstrained) + + +def test_unconstrained_spec_is_left_alone(tmp_path, monkeypatch): + monkeypatch.setattr(RS, "resource_spec_for", + lambda ct="default": ResourceSpec(unconstrained=True)) + f = tmp_path / "big.bin" + with open(f, "wb") as fh: + fh.truncate(SIZE_RAM_THRESHOLD_BYTES + 200 * MIB) + spec = resource_spec_for_artifact(f, "sandbox") + assert spec.mem == ResourceSpec().mem and spec.tmpfs == ResourceSpec().tmpfs # ceilings already dropped + + +# ---- run_probe wiring: the scaled mem/tmpfs reach the docker flags ----------------------- + +def _capture_docker_cmd(monkeypatch, runner, artifact, **kw): + captured = {} + + def fake_run(cmd, *a, **k): + if len(cmd) > 1 and cmd[1] == "run": + captured["cmd"] = cmd + raise subprocess.TimeoutExpired(cmd, k.get("timeout")) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(R.subprocess, "run", fake_run) + with pytest.raises(R.SandboxTimeout): + runner.run_probe("ghidra_probe.py", artifact, **kw) + return captured["cmd"] + + +def test_run_probe_passes_scaled_mem_and_tmpfs_for_a_large_artifact(tmp_path, monkeypatch): + runner = R.SandboxRunner(image="hexgraph-bogus:nope") + f = tmp_path / "mono.elf" + with open(f, "wb") as fh: + fh.truncate(SIZE_RAM_THRESHOLD_BYTES + 150 * MIB) + cmd = _capture_docker_cmd(monkeypatch, runner, f) + joined = " ".join(cmd) + mem = cmd[cmd.index("--memory") + 1] + assert _parse_bytes(mem) > 2 * GIB # widened past the 2g default + assert "size=" in joined and "/scratch" in joined # tmpfs present + tmpfs_tok = next(p for p in cmd if p.startswith("/scratch:")) + assert _parse_bytes(tmpfs_tok.split("size=")[1]) > 512 * MIB + + +def test_run_probe_small_artifact_keeps_base_mem(tmp_path, monkeypatch): + runner = R.SandboxRunner(image="hexgraph-bogus:nope") + base_mem = resource_spec_for("sandbox").mem + f = tmp_path / "small.elf" + f.write_bytes(b"\x00" * (8 * MIB)) + cmd = _capture_docker_cmd(monkeypatch, runner, f) + assert cmd[cmd.index("--memory") + 1] == base_mem # unchanged for a normal binary + + +def test_run_probe_advertises_its_deadline_to_the_probe(tmp_path, monkeypatch): + # F13: run_probe exposes its wall-clock budget (-e HEXGRAPH_PROBE_TIMEOUT_S) so a long-running + # probe (Ghidra) can stop+save just before the external kill instead of dying with nothing. + runner = R.SandboxRunner(image="hexgraph-bogus:nope") + f = tmp_path / "x.elf" + f.write_bytes(b"\x00" * (8 * MIB)) + cmd = _capture_docker_cmd(monkeypatch, runner, f) + assert f"HEXGRAPH_PROBE_TIMEOUT_S={resource_spec_for('sandbox').timeout}" in cmd diff --git a/tests/test_size_aware_timeout.py b/tests/test_size_aware_timeout.py index e50f874..ca6b3db 100644 --- a/tests/test_size_aware_timeout.py +++ b/tests/test_size_aware_timeout.py @@ -81,13 +81,17 @@ def test_spec_unchanged_for_small_artifact(tmp_path): assert resource_spec_for_artifact(small, "sandbox") == base # identical, timeout incl. -def test_spec_only_timeout_widens_for_large_artifact(tmp_path): +def test_spec_widens_only_timeout_in_the_medium_band(tmp_path): + # Between the 32 MiB timeout threshold and the (higher) 64 MiB mem/tmpfs threshold, ONLY the + # timeout widens — a medium binary gets more wall-clock but not the heavier mem/tmpfs bump + # (those are reserved for genuinely large artifacts, F13 heap-half). 48 MiB sits in that band. base = resource_spec_for("sandbox") - big = _sparse_file(tmp_path, "big.bin", SIZE_TIMEOUT_THRESHOLD_BYTES + 100 * MIB) - spec = resource_spec_for_artifact(big, "sandbox") - assert spec.timeout == size_scaled_timeout(SIZE_TIMEOUT_THRESHOLD_BYTES + 100 * MIB, base.timeout) + size = SIZE_TIMEOUT_THRESHOLD_BYTES + 16 * MIB # 48 MiB: > timeout threshold, < ram threshold + med = _sparse_file(tmp_path, "med.bin", size) + spec = resource_spec_for_artifact(med, "sandbox") + assert spec.timeout == size_scaled_timeout(size, base.timeout) assert spec.timeout > base.timeout - # Everything that is NOT the timeout is byte-for-byte the configured ceiling. + # mem/cpu/pids/tmpfs are all byte-for-byte the configured ceiling in this band. assert (spec.mem, spec.cpus, spec.pids, spec.tmpfs, spec.unconstrained) == ( base.mem, base.cpus, base.pids, base.tmpfs, base.unconstrained)