diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..5b75667
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,18 @@
+.git
+.agents
+.codex
+.deps
+.pytest_cache
+.venv
+__pycache__/
+*.pyc
+backend/lattigo/
+frontend/dacapo/
+frontend/log/
+input_constants/
+input_data/
+mlirs_execute/
+mlirs_output/
+old_backup/
+qbps_cache*/
+repro_results/
diff --git a/.gitignore b/.gitignore
index 4a3b01d..460c66b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,6 @@ qbps_cache*/
 frontend/dacapo/
 frontend/log/
 backend/lattigo/
-.venv/
\ No newline at end of file
+.venv/
+.deps/
+repro_results/
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..78c7526
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,39 @@
+FROM python:3.11-slim
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        bash \
+        build-essential \
+        ca-certificates \
+        clang \
+        cmake \
+        git \
+        golang-go \
+        lld \
+        ninja-build && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /opt/orbit
+
+COPY requirements.txt requirements-dev.txt ./
+RUN python -m pip install --upgrade pip && \
+    python -m pip install -r requirements-dev.txt
+
+ARG USER_ID=1000
+ARG GROUP_ID=1000
+RUN groupadd --gid "${GROUP_ID}" orbit && \
+    useradd --uid "${USER_ID}" --gid "${GROUP_ID}" --create-home --shell /bin/bash orbit
+
+COPY . .
+RUN chmod +x scripts/reproduce.sh scripts/setup_dependencies.sh && \
+    chown -R orbit:orbit /opt/orbit
+
+USER orbit
+
+CMD ["./scripts/reproduce.sh", "quick"]
diff --git a/README.md b/README.md
index 4ea4b4f..6999404 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Run the following commands to install them:
 pip install -r requirements.txt
 ```
 
-For `gurobipy`, you have to apply for a Gurobi license. It is free to apply for an unlimited-use Gurobi Optimizer license for academic use.
+Orbit uses Gurobi (`gurobipy`) to solve its ILPs. The `gurobipy` wheel ships a bundled size-limited license that is sufficient for every partitioned benchmark, so most runs need no extra setup. Only the `--nopart` configurations build a single large ILP that exceeds the size-limited cap; for those, apply for a free unlimited-use Gurobi Optimizer license for academic use and point `GRB_LICENSE_FILE` at it.
 
 ### Frontend and Backend
 
@@ -27,6 +27,138 @@ Orbit uses Dacapo's frontend interface and Lattigo as backend. To evaluate the c
 
 Check `frontend/README.md` and `backend/README.md` for installation instructions.
 
+### Docker and reproducibility scripts
+
+The repository ships a `Dockerfile` that provides a self-contained Python
+environment for running the checked-in tests and the compilation benchmarks.
+It uses Gurobi's bundled size-limited license, so no academic license is
+required for the standard (partitioned) benchmarks.
+
+#### 1. Build the image
+
+```bash
+docker build \
+  --build-arg USER_ID="$(id -u)" \
+  --build-arg GROUP_ID="$(id -g)" \
+  -t orbit:dev .
+```
+
+The `USER_ID`/`GROUP_ID` build args make the in-container `orbit` user match
+your host user, so files written to mounted volumes are owned by you. Building
+only creates the image; it does not run anything yet.
+
+#### 2. Run the container (auto-runs the quick check)
+
+The image's default command is `./scripts/reproduce.sh quick`, so starting a
+container with no extra arguments runs `pytest` plus a smoke compilation of
+`mlirs_input/motivation.mlir` with the toy cost model. Mount `repro_results/`
+and `mlirs_output/` to keep the logs and compiled MLIR on the host:
+
+```bash
+mkdir -p repro_results mlirs_output
+docker run --rm \
+  -v "$PWD/repro_results:/opt/orbit/repro_results" \
+  -v "$PWD/mlirs_output:/opt/orbit/mlirs_output" \
+  orbit:dev
+```
+
+Results are written under `repro_results/<timestamp>/summary.txt`.
+
+#### 3. Run other modes
+
+Override the default command to run any other `reproduce.sh` mode in the
+container, for example:
+
+```bash
+docker run --rm \
+  -v "$PWD/repro_results:/opt/orbit/repro_results" \
+  -v "$PWD/mlirs_output:/opt/orbit/mlirs_output" \
+  orbit:dev ./scripts/reproduce.sh compile-base
+```
+
+Available modes:
+
+```text
+quick                   pytest + motivation smoke compile (default)
+tests                   pytest only
+benchmark-smoke         compile mlirs_input/motivation.mlir
+compile-base            base configuration suite   (n=64k, Lm=16, Sw=40)
+compile-16k             n=16k configuration suite
+compile-lm12            Lm=12 configuration suite
+compile-sw51            Sw=51 configuration suite
+compile-sim-vari        simulated-variadic cost-model suite
+compile-micro-comppart  compression/partitioning micro suite
+compile-micro-bypass    bypass-handling micro suite
+compile-micro-reqbp     cross-benchmark QBP-reuse micro suite
+compile-all             every suite above
+```
+
+Set `ORBIT_THREADS` to control the solver thread count (default 2), e.g.
+`docker run ... orbit:dev env ORBIT_THREADS=8 ./scripts/reproduce.sh compile-base`.
+
+#### Gurobi license
+
+Orbit solves its ILPs with Gurobi (`gurobipy`). Because Orbit partitions each
+problem into small per-partition ILPs, every partitioned benchmark fits within
+Gurobi's bundled **size-limited license** and runs without an academic license.
+Only the `--nopart` configurations (partitioning disabled) build a single large
+ILP that exceeds the size-limited cap and therefore require a full/academic
+Gurobi license. To use an academic license, mount your `gurobi.lic` into the
+container and point `GRB_LICENSE_FILE` at it:
+
+```bash
+docker run --rm \
+  -v "$HOME/gurobi.lic:/opt/orbit/gurobi.lic:ro" \
+  -e GRB_LICENSE_FILE=/opt/orbit/gurobi.lic \
+  -v "$PWD/repro_results:/opt/orbit/repro_results" \
+  -v "$PWD/mlirs_output:/opt/orbit/mlirs_output" \
+  orbit:dev ./scripts/reproduce.sh compile-micro-comppart
+```
+
+#### Execution (optional)
+
+The container is compile-and-estimate by default. To actually **execute** a
+compiled MLIR under FHE you need the Lattigo backend plus data generated by the
+Dacapo frontend; neither is baked into the image (the frontend in particular
+builds LLVM/MLIR and SEAL from source, so it is large and long-running). The
+full chain is:
+
+```bash
+# 1. Build the Lattigo backend (network required). Mount the repo so the build
+#    persists on the host; you only do this once.
+docker run --rm -v "$PWD:/opt/orbit" -e HOME=/tmp \
+  orbit:dev ./scripts/setup_dependencies.sh backend
+
+# 2. Build the Dacapo frontend and generate the execution data. This produces
+#    input_constants/<bench>_hecate.cst and input_data/<n>k/<model>/<act>/...
+#    The MLIR inputs in mlirs_input/ are already checked in; this step adds the
+#    plaintext constants and the CIFAR-10 samples (GBs) that are not committed.
+docker run --rm -v "$PWD:/opt/orbit" -e HOME=/tmp \
+  orbit:dev ./scripts/setup_dependencies.sh frontend
+docker run --rm -v "$PWD:/opt/orbit" -e HOME=/tmp -w /opt/orbit/frontend/dacapo \
+  orbit:dev bash -c '../dacapo_patch/gen_all_mlirs.sh && python3 examples/tests/gen_input_data.py 10'
+
+# 3. Evaluate a compiled benchmark through the backend. The MLIR is compiled on
+#    demand for the default configuration if it does not already exist.
+docker run --rm -v "$PWD:/opt/orbit" -e HOME=/tmp \
+  -v "$PWD/mlirs_execute:/opt/orbit/mlirs_execute" \
+  orbit:dev ./scripts/reproduce.sh execute \
+    --model ResNet --act SiLU --n 64 --Lm 16 --Sw 40 --run 0
+```
+
+`execute` accepts `--model --act --n --Lm --Sw` (required) and optional
+`--run <id>`, `--cmt <comment>`, `--Csw <scale>`, and `--plain` (plaintext mode).
+It fails fast with the exact missing paths if the backend or the
+`input_data/`/`input_constants/` files are absent. Results are written under
+`mlirs_execute/orbit/<n>/<model>/<act>/`. See `frontend/README.md` and
+`backend/README.md` for full details.
+
+If you only need the backend toolchain (e.g. data is already present):
+
+```bash
+./scripts/setup_dependencies.sh backend
+```
+
 ## Usage
 
 ### Compilation on one benchmark
diff --git a/requirements.txt b/requirements.txt
index 17c1707..c87dca4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,5 +3,4 @@ numpy==1.25.2
 matplotlib==3.8.1
 more-itertools==8.10.0
 joblib==1.5.1
-gurobipy==12.0.2
-pulp>=2.7.0
\ No newline at end of file
+gurobipy==12.0.2
\ No newline at end of file
diff --git a/scripts/gurobi_license_sweep.sh b/scripts/gurobi_license_sweep.sh
new file mode 100755
index 0000000..4770ad8
--- /dev/null
+++ b/scripts/gurobi_license_sweep.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+# Sweep every micro/macro benchmark with the size-limited (non-academic) Gurobi
+# license and record which ones fail because a partition exceeds the limit.
+#
+# Classification (per config):
+#   PASS         run_orbit.py exited 0 (.mlir produced)
+#   LICENSE_FAIL solver hit the size-limited license error (model too large)
+#   TIMEOUT      did not finish within PER_TASK_TIMEOUT (slow, NOT a license issue)
+#   ERROR(n)     other non-zero exit
+set -u
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT"
+
+OUT="${ORBIT_RESULTS_DIR:-repro_results}/gurobi_sweep"
+mkdir -p "$OUT"
+RES="$OUT/results.tsv"
+: > "$RES"
+PER_TASK_TIMEOUT="${PER_TASK_TIMEOUT:-1800}"
+TASK_THREADS="${TASK_THREADS:-4}"
+PARALLEL="${PARALLEL:-12}"
+
+run() {
+    local label="$1"; shift
+    local log="$OUT/${label}.log"
+    local start end code verdict
+    start=$(date +%s)
+    timeout "$PER_TASK_TIMEOUT" python3 scripts/optimizer/orbit/run_orbit.py "$@" \
+        --threads "$TASK_THREADS" >"$log" 2>&1
+    code=$?
+    end=$(date +%s)
+    if [ "$code" -eq 0 ]; then
+        verdict="PASS"
+    elif grep -qiE 'too large for size-limited|size-limited license|exceeds the limit' "$log"; then
+        verdict="LICENSE_FAIL"
+    elif [ "$code" -eq 124 ]; then
+        verdict="TIMEOUT"
+    else
+        verdict="ERROR($code)"
+    fi
+    printf '%s\t%s\t%ss\n' "$label" "$verdict" "$((end-start))" >> "$RES"
+}
+export -f run
+export OUT RES PER_TASK_TIMEOUT TASK_THREADS ROOT
+
+# Build the full job list: "label -- args..."
+jobs_file="$OUT/jobs.txt"
+: > "$jobs_file"
+emit() { printf '%s\n' "$*" >> "$jobs_file"; }
+
+for m in AlexNet MobileNet SqueezeNet VGG16 ResNet; do for a in SiLU ReLU; do
+    emit "base_${m}_${a} -- --model $m --act $a --n 64 --Lm 16 --Sw 40"
+done; done
+for m in AlexNet SqueezeNet VGG16 ResNet; do for a in SiLU ReLU; do
+    emit "16k_${m}_${a} -- --model $m --act $a --n 16 --Lm 16 --Sw 40"
+done; done
+for m in AlexNet MobileNet SqueezeNet VGG16 ResNet; do for a in SiLU ReLU; do
+    emit "lm12_${m}_${a} -- --model $m --act $a --n 64 --Lm 12 --Sw 40"
+done; done
+for m in AlexNet MobileNet SqueezeNet VGG16 ResNet; do for a in SiLU ReLU; do
+    emit "sw51_${m}_${a} -- --model $m --act $a --n 64 --Lm 16 --Sw 51"
+done; done
+for m in AlexNet MobileNet SqueezeNet VGG16 ResNet; do for a in SiLU ReLU; do
+    emit "simvari_${m}_${a} -- --model $m --act $a --n 64 --Lm 16 --Sw 40 --sim-vari"
+done; done
+emit "micro-comppart_comp_part -- --model CompPart --act SiLU --n 64 --Lm 16 --Sw 40"
+emit "micro-comppart_comp_nopart -- --model CompPart --act SiLU --n 64 --Lm 16 --Sw 40 --nopart"
+emit "micro-comppart_nocomp_part -- --model CompPart --act SiLU --n 64 --Lm 16 --Sw 40 --nocomp"
+emit "micro-comppart_nocomp_nopart -- --model CompPart --act SiLU --n 64 --Lm 16 --Sw 40 --nocomp --nopart"
+emit "micro-bypass_ResNet_ReLU_64 -- --model ResNet --act ReLU --n 64 --Lm 16 --Sw 40 --nobypass"
+emit "micro-bypass_ResNet_SiLU_64 -- --model ResNet --act SiLU --n 64 --Lm 16 --Sw 40 --nobypass"
+emit "micro-bypass_ResNet_SiLU_16 -- --model ResNet --act SiLU --n 16 --Lm 16 --Sw 40 --nobypass"
+emit "micro-bypass_ResNet_ReLU_16 -- --model ResNet --act ReLU --n 16 --Lm 16 --Sw 40 --nobypass"
+for m in AlexNet SqueezeNet VGG16 ResNet; do for a in SiLU ReLU; do
+    emit "micro-reqbp_${m}_${a} -- --model $m --act $a --n 16 --Lm 16 --Sw 40 --qbp"
+done; done
+
+echo "Total jobs: $(wc -l < "$jobs_file"), parallelism: $PARALLEL, threads/task: $TASK_THREADS, timeout: ${PER_TASK_TIMEOUT}s"
+
+# Run in parallel. Each line: "label -- args"
+cat "$jobs_file" | xargs -P "$PARALLEL" -I {} bash -c '
+    line="{}"
+    label="${line%% -- *}"
+    args="${line#* -- }"
+    run "$label" $args
+'
+
+echo "=== DONE. $(wc -l < "$RES") results in $RES ==="
+sort "$RES"
diff --git a/scripts/optimizer/orbit/ilp_core.py b/scripts/optimizer/orbit/ilp_core.py
index d680ba4..333b262 100644
--- a/scripts/optimizer/orbit/ilp_core.py
+++ b/scripts/optimizer/orbit/ilp_core.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
-import re
-from typing import Any, Union
+from typing import Any
 
 from ...tdag import *
 from ...assignment import *
@@ -9,152 +8,8 @@
 from ...visualize import *
 from ...params.params import Params
 
-try:
-    import pulp
-except ImportError:  # pragma: no cover
-    pulp = None  # type: ignore
 
-
-def _pulp_safe_name(s: str, max_len: int = 240) -> str:
-    """PuLP warns if problem/constraint names contain spaces or some punctuation."""
-    t = re.sub(r"\s+", "_", str(s).strip())
-    t = re.sub(r"[^\w.\-]+", "_", t)
-    t = re.sub(r"_+", "_", t).strip("_")
-    return (t or "orbit_ilp")[:max_len]
-
-
-def _pulp_r_upper(params: Params, smax: int) -> int:
-    """Conservative upper bound on rescale-use integer variables (required by CBC)."""
-    return max(128, params.lvl_ub * (smax // max(params.Sf, 1) + 4))
-
-
-class PulpVarPool:
-    """Variable pool for PuLP (CBC) — mirrors Gurobi :class:`VarPool` with explicit bounds on integers."""
-
-    def __init__(self, tdag: Tdag, params: Params, model: Any):
-        if pulp is None:
-            raise ImportError("PuLP is required for ilp_solver='pulp'. Install with: pip install pulp")
-        self.params = params
-        self.Smax = params.Sf + 2 * params.Sw
-        self.model = model
-        self.r_ub = _pulp_r_upper(params, self.Smax)
-        bts_lb = params.bts_lb
-        sf = params.Sf
-        # Big-M values for indicator linearization (Gurobi addGenConstrIndicator replacement)
-        self.M1 = float(self.Smax + sf * max(0, bts_lb - 1) + sf * params.lvl_ub + 1000.0)
-        self.M2 = float(params.bts_lb + params.lvl_ub + 100.0)
-        self.M3 = float(max(self.Smax - sf, sf - params.Sw) + 100.0)
-        self.M4 = float(params.lvl_ub + self.r_ub + 1000.0)
-        self.M5 = float(self.Smax + sf * self.r_ub + 10000.0)
-
-        self.vars: dict[str, Any] = {}
-        self.total_cost: list[Any] = []
-        self.rescale_cost = None
-
-        for v in tdag.nodes:
-            if tdag.nodes[v]['op'] == 'constant':
-                continue
-            self.vars[f"v_lvl_in_{v}"] = pulp.LpVariable(f"v_lvl_in_{v}", lowBound=1, upBound=params.lvl_ub, cat=pulp.LpInteger)
-            self.vars[f"v_scl_in_{v}"] = pulp.LpVariable(f"v_scl_in_{v}", lowBound=params.Sw, upBound=self.Smax, cat=pulp.LpInteger)
-            self.vars[f"v_lvl_out_{v}"] = pulp.LpVariable(f"v_lvl_out_{v}", lowBound=1, upBound=params.lvl_ub, cat=pulp.LpInteger)
-            self.vars[f"v_scl_out_{v}"] = pulp.LpVariable(f"v_scl_out_{v}", lowBound=params.Sw, upBound=self.Smax, cat=pulp.LpInteger)
-            self.vars[f"v_use_r_{v}"] = pulp.LpVariable(f"v_use_r_{v}", lowBound=0, upBound=self.r_ub, cat=pulp.LpInteger)
-            self.vars[f"v_use_b_{v}"] = pulp.LpVariable(f"v_use_b_{v}", cat=pulp.LpBinary)
-
-        for u, v in tdag.edges:
-            if tdag.nodes[u]['op'] == 'constant':
-                continue
-            edge_label = self.get_edge_label(u, v)
-            self.vars[f"e_lvl_in_{edge_label}"] = self.vars[f"v_lvl_out_{u}"]
-            self.vars[f"e_scl_in_{edge_label}"] = self.vars[f"v_scl_out_{u}"]
-            self.vars[f"e_lvl_out_{edge_label}"] = self.vars[f"v_lvl_in_{v}"]
-            if tdag.nodes[v]['op'] == 'mul':
-                self.vars[f"e_scl_out_{edge_label}"] = pulp.LpVariable(
-                    f"e_scl_out_{edge_label}", lowBound=params.Sw, upBound=self.Smax, cat=pulp.LpInteger
-                )
-            else:
-                self.vars[f"e_scl_out_{edge_label}"] = self.vars[f"v_scl_in_{v}"]
-            self.vars[f"e_use_r_{edge_label}"] = pulp.LpVariable(
-                f"e_use_r_{edge_label}", lowBound=0, upBound=self.r_ub, cat=pulp.LpInteger
-            )
-
-        for u in tdag.nodes:
-            if tdag.nodes[u]['op'] != 'constant':
-                continue
-            assert tdag.out_degree(u) == 1, f"Constant node {u} should have exactly one output"
-            v = list(tdag.successors(u))[0]
-            self.vars[f"v_lvl_out_{u}"] = self.vars[f"v_lvl_in_{v}"]
-            if tdag.nodes[v]['op'] == 'mul':
-                self.vars[f"v_scl_out_{u}"] = pulp.LpVariable(
-                    f"v_scl_out_{u}", lowBound=params.Csw, upBound=params.Csw, cat=pulp.LpInteger
-                )
-            else:
-                self.vars[f"v_scl_out_{u}"] = self.vars[f"v_scl_in_{v}"]
-            edge_label = self.get_edge_label(u, v)
-            self.vars[f"e_lvl_out_{edge_label}"] = self.vars[f"v_lvl_out_{u}"]
-            self.vars[f"e_scl_out_{edge_label}"] = self.vars[f"v_scl_out_{u}"]
-
-    def get_edge_label(self, u: str, v: str) -> str:
-        return f"({u}_{v})"
-
-    def var_lvl(self, v: str | tuple[str, str], type: str) -> Any:
-        if isinstance(v, str):
-            return self.vars[f"v_lvl_{type}_{v}"]
-        edge_label = self.get_edge_label(v[0], v[1])
-        return self.vars[f"e_lvl_{type}_{edge_label}"]
-
-    def var_scl(self, v: str | tuple[str, str], type: str) -> Any:
-        if isinstance(v, str):
-            return self.vars[f"v_scl_{type}_{v}"]
-        edge_label = self.get_edge_label(v[0], v[1])
-        return self.vars[f"e_scl_{type}_{edge_label}"]
-
-    def var_use(self, v: str | tuple[str, str], type: str) -> Any:
-        if isinstance(v, str):
-            return self.vars[f"v_use_{type}_{v}"]
-        edge_label = self.get_edge_label(v[0], v[1])
-        return self.vars[f"e_use_{type}_{edge_label}"]
-
-
-def add_ilp_constraints_pulp(tdag: Tdag, vp: PulpVarPool):
-    """Same semantics as Gurobi :func:`add_ilp_constraints` using Big-M linearization for indicators."""
-    prob = vp.model
-    params = vp.params
-    sf = params.Sf
-    bts_lb = params.bts_lb
-
-    for v in tdag.nodes:
-        if tdag.nodes[v]['op'] != 'mul':
-            continue
-        ilist = list(tdag.predecessors(v))
-        if len(ilist) == 1:
-            i0, i1 = ilist[0], ilist[0]
-        else:
-            i0, i1 = ilist[0], ilist[1]
-        prob += vp.var_scl(v, 'in') == vp.var_scl((i0, v), 'out') + vp.var_scl((i1, v), 'out'), _pulp_safe_name(f"scl_mul_{v}")
-
-    for v in tdag.nodes:
-        if tdag.nodes[v]['op'] == 'constant':
-            continue
-        b = vp.var_use(v, 'b')
-        s_in, l_in = vp.var_scl(v, 'in'), vp.var_lvl(v, 'in')
-        l_out, s_out = vp.var_lvl(v, 'out'), vp.var_scl(v, 'out')
-        r = vp.var_use(v, 'r')
-        prob += s_in - sf * (l_in - bts_lb + 1) <= vp.M1 * (1 - b), _pulp_safe_name(f"bts_scl_in_{v}")
-        prob += (bts_lb + 1) - l_out <= vp.M2 * (1 - b), _pulp_safe_name(f"bts_lvl_{v}")
-        prob += sf - s_out <= vp.M3 * (1 - b), _pulp_safe_name(f"bts_scl_out_{v}")
-        prob += l_out - l_in + r <= vp.M4 * b, _pulp_safe_name(f"nobts_lvl_{v}")
-        prob += s_in - sf * r - s_out <= vp.M5 * b, _pulp_safe_name(f"nobts_scl_{v}")
-
-    for u, v in tdag.edges:
-        if tdag.nodes[u]['op'] == 'constant':
-            continue
-        edge_label = vp.get_edge_label(u, v)
-        prob += vp.var_lvl((u, v), 'out') <= vp.var_lvl((u, v), 'in') - vp.var_use((u, v), 'r'), _pulp_safe_name(f"edge_lvl_{edge_label}")
-        prob += vp.var_scl((u, v), 'out') >= vp.var_scl((u, v), 'in') - sf * vp.var_use((u, v), 'r'), _pulp_safe_name(f"edge_scl_{edge_label}")
-
-
-def add_ilp_linear_cost(tdag: Tdag, vp: Union[Any, PulpVarPool], le: LatencyEstimator):
+def add_ilp_linear_cost(tdag: Tdag, vp: Any, le: LatencyEstimator):
     cost_rescale_s = le.lin_op_lmaps['rescale_single']
     cost_bts_s = le.lin_op_lmaps['bootstrap_single']
     vp.rescale_cost = cost_rescale_s[1]
@@ -180,104 +35,29 @@ def add_ilp_linear_cost(tdag: Tdag, vp: Union[Any, PulpVarPool], le: LatencyEsti
             vp.total_cost.append(double_cnt * (this_cost[0] * vp.var_lvl(v, 'in') + this_cost[1]))
 
 
-def add_ilp_io_budgets_pulp(tdag: Tdag, vp: PulpVarPool, io_budgets):
-    prob = vp.model
-    params = vp.params
-    v_in = list(tdag.inputs)[0]
-    v_out = list(tdag.outputs)[0]
-    if 'in_lvl' in io_budgets and io_budgets['in_lvl'] >= 0:
-        in_lvl = io_budgets['in_lvl']
-        prob += vp.var_lvl(v_in, 'in') == in_lvl, _pulp_safe_name(f"input_level_{in_lvl}")
-    if 'in_scl' in io_budgets and io_budgets['in_scl'] >= 0:
-        in_scl = io_budgets['in_scl']
-        prob += vp.var_scl(v_in, 'in') == in_scl, _pulp_safe_name(f"input_scale_{in_scl}")
-    if 'out_lvl' in io_budgets and io_budgets['out_lvl'] >= 0:
-        out_lvl = io_budgets['out_lvl']
-        prob += vp.var_lvl(v_out, 'out') == out_lvl, _pulp_safe_name(f"output_level_{out_lvl}")
-
-    prob += vp.var_scl(v_out, 'out') <= params.Sf * (vp.var_lvl(v_out, 'out') + 1) - 7, _pulp_safe_name("lattigo_output_scale_level_relation")
-
-    vp.total_cost.append(0.2 * vp.rescale_cost * tdag.get_full_size() * vp.var_scl(v_out, 'out'))
-
-
-def _pulp_cbc_solver(num_threads: int):
-    return pulp.PULP_CBC_CMD(msg=0, threads=num_threads, gapRel=0.01)
-
-
-def solve_ilp_core_pulp(vp: PulpVarPool, num_threads: int):
-    prob = vp.model
-    prob += pulp.lpSum(vp.total_cost)
-    prob.solve(_pulp_cbc_solver(num_threads))
-
-
-def solve_ilp_core_bypass_pulp(tdag: Tdag, vp: PulpVarPool, io_budgets, num_threads: int):
-    prob = vp.model
-    v_main_o = io_budgets['maino_v']
-    assert v_main_o in tdag.nodes, f"Main output node {v_main_o} not in Tdag {tdag.name}"
-    main_dag_size = io_budgets['main_dag_size']
-    main_qbp_cost = io_budgets['main_qbp_cost']
-    all_out = list(tdag.outputs)[0]
-    solver = _pulp_cbc_solver(num_threads)
-
-    min_cost = None
-    min_cost_ls = None
-
-    for (mo_lvl, mo_scl), main_cost in main_qbp_cost.items():
-        c_lvl = _pulp_safe_name(f"bypass_main_lvl_{mo_lvl}_{mo_scl}")
-        c_scl = _pulp_safe_name(f"bypass_main_scl_{mo_lvl}_{mo_scl}")
-        prob += vp.var_lvl(v_main_o, 'in') == mo_lvl, c_lvl
-        prob += vp.var_scl(v_main_o, 'in') == mo_scl, c_scl
-        prob.solve(solver)
-        if pulp.LpStatus[prob.status] == 'Optimal':
-            s_out = pulp.value(vp.var_scl(all_out, 'out'))
-            if s_out is None:
-                s_out = 0.0
-            ft_cost = 0.2 * vp.rescale_cost * main_dag_size * round(float(s_out))
-            obj = float(pulp.value(prob.objective) or 0.0)
-            this_cost = obj + main_cost + ft_cost
-            if (min_cost is None) or (this_cost < min_cost):
-                min_cost = this_cost
-                min_cost_ls = (mo_lvl, mo_scl)
-        del prob.constraints[c_lvl]
-        del prob.constraints[c_scl]
-
-    if min_cost_ls is None:
-        return
-    prob += vp.var_lvl(v_main_o, 'in') == min_cost_ls[0], _pulp_safe_name("bypass_final_lvl")
-    prob += vp.var_scl(v_main_o, 'in') == min_cost_ls[1], _pulp_safe_name("bypass_final_scl")
-    prob.solve(solver)
-
-
-def _var_sol(x: Any, use_pulp: bool) -> float:
-    if use_pulp:
-        if pulp is None:
-            raise RuntimeError("PuLP not available")
-        v = pulp.value(x)
-        if v is None:
-            raise ValueError("Missing PuLP variable value")
-        return float(v)
+def _var_sol(x: Any) -> float:
     return float(x.X)
 
 
-def decode_ilp_sol(tdag: Tdag, vp: Any, *, use_pulp: bool = False) -> Assign:
+def decode_ilp_sol(tdag: Tdag, vp: Any) -> Assign:
     assign = Assign(tdag)
     params = vp.params
     for v in tdag.nodes:
         if tdag.nodes[v]['op'] == 'input':
             # input nodes, need to store in-level/scale
-            assign.v_lvl_in[v] = round(_var_sol(vp.var_lvl(v, 'in'), use_pulp))
-            assign.v_scl_in[v] = round(_var_sol(vp.var_scl(v, 'in'), use_pulp))
+            assign.v_lvl_in[v] = round(_var_sol(vp.var_lvl(v, 'in')))
+            assign.v_scl_in[v] = round(_var_sol(vp.var_scl(v, 'in')))
             assert assign.v_scl_in[v] >= params.Sw, f"Node {v} input scale {assign.v_scl_in[v]} below Sw={params.Sw}"
-        assign.v_lvl_out[v] = round(_var_sol(vp.var_lvl(v, 'out'), use_pulp))
-        assign.v_scl_out[v] = round(_var_sol(vp.var_scl(v, 'out'), use_pulp))
+        assign.v_lvl_out[v] = round(_var_sol(vp.var_lvl(v, 'out')))
+        assign.v_scl_out[v] = round(_var_sol(vp.var_scl(v, 'out')))
         if tdag.nodes[v]['op'] != 'constant':
             assert assign.v_scl_out[v] >= params.Sw, f"Node {v} output scale {assign.v_scl_out[v]} below Sw={params.Sw}"
 
     for u, v in tdag.edges:
         if tdag.nodes[u]['op'] == 'constant':
             continue
-        assign.e_lvl_out[(u, v)] = round(_var_sol(vp.var_lvl((u, v), 'out'), use_pulp))
-        assign.e_scl_out[(u, v)] = round(_var_sol(vp.var_scl((u, v), 'out'), use_pulp))
+        assign.e_lvl_out[(u, v)] = round(_var_sol(vp.var_lvl((u, v), 'out')))
+        assign.e_scl_out[(u, v)] = round(_var_sol(vp.var_scl((u, v), 'out')))
         assert assign.e_scl_out[(u, v)] >= params.Sw, f"Edge ({u},{v}) output scale {assign.e_scl_out[(u,v)]} below Sw={params.Sw}"
 
     return assign
@@ -290,20 +70,6 @@ def solve_ilp(
     task_name: str,
     num_threads: int,
     params: Params,
-) -> tuple[Assign | None, float | None]:
-    solver = getattr(params, "ilp_solver", "gurobi")
-    if solver == "pulp":
-        return _solve_ilp_pulp(tdag, io_budgets, le, task_name, num_threads, params)
-    return _solve_ilp_gurobi(tdag, io_budgets, le, task_name, num_threads, params)
-
-
-def _solve_ilp_gurobi(
-    tdag: Tdag,
-    io_budgets: dict,
-    le: LatencyEstimator,
-    task_name: str,
-    num_threads: int,
-    params: Params,
 ) -> tuple[Assign | None, float | None]:
     try:
         import gurobipy as gp
@@ -317,7 +83,7 @@ def _solve_ilp_gurobi(
         )
     except ImportError as e:
         raise ImportError(
-            "gurobipy is required for ilp_solver='gurobi'. Install gurobipy or use ilp_solver='pulp' (PuLP + CBC)."
+            "gurobipy is required to run Orbit. Install it with: pip install gurobipy"
         ) from e
 
     model = gp.Model(task_name)
@@ -333,41 +99,7 @@ def _solve_ilp_gurobi(
 
     if not gurobi_has_solution(model):
         return None, None
-    assign = decode_ilp_sol(tdag, vp, use_pulp=False)
-    if 'maino_v' in io_budgets:
-        v_main_o = io_budgets['maino_v']
-        mo_lvl = assign.v_lvl_in[v_main_o]
-        mo_scl = assign.v_scl_in[v_main_o]
-        assert (mo_lvl, mo_scl) in io_budgets['main_qbp_cost'], f"Main output (lvl, scl)=({mo_lvl}, {mo_scl}) not in provided QBP costs"
-
-    assign_cost = estimate_assign(assign, le)
-    return assign, assign_cost
-
-
-def _solve_ilp_pulp(
-    tdag: Tdag,
-    io_budgets: dict,
-    le: LatencyEstimator,
-    task_name: str,
-    num_threads: int,
-    params: Params,
-) -> tuple[Assign | None, float | None]:
-    if pulp is None:
-        raise ImportError("PuLP is required for ilp_solver='pulp'. Install with: pip install pulp")
-    prob = pulp.LpProblem(_pulp_safe_name(task_name), pulp.LpMinimize)
-    vp = PulpVarPool(tdag, params, prob)
-
-    add_ilp_constraints_pulp(tdag, vp)
-    add_ilp_linear_cost(tdag, vp, le)
-    add_ilp_io_budgets_pulp(tdag, vp, io_budgets)
-    if 'maino_v' in io_budgets:
-        solve_ilp_core_bypass_pulp(tdag, vp, io_budgets, num_threads)
-    else:
-        solve_ilp_core_pulp(vp, num_threads)
-
-    if pulp.LpStatus[prob.status] != 'Optimal':
-        return None, None
-    assign = decode_ilp_sol(tdag, vp, use_pulp=True)
+    assign = decode_ilp_sol(tdag, vp)
     if 'maino_v' in io_budgets:
         v_main_o = io_budgets['maino_v']
         mo_lvl = assign.v_lvl_in[v_main_o]
diff --git a/scripts/optimizer/orbit/ilp_worker.py b/scripts/optimizer/orbit/ilp_worker.py
index 855cc4e..39e8b2e 100644
--- a/scripts/optimizer/orbit/ilp_worker.py
+++ b/scripts/optimizer/orbit/ilp_worker.py
@@ -31,7 +31,6 @@ def _lsabts_worker(self, ilp_threads: int, pdag: Tdag, task_queue: Queue, lock:
                     mq = io_budget["main_qbp_cost"]
                     key_part = "_".join(f"{a}_{b}" for a, b in sorted(mq.keys()))
                     io_budget_name += f"_main_qbp_{key_part}"
-                # No spaces: PuLP/CBC rejects spaces in problem names.
                 task_name = f"Partition_{pdag.name}_{io_budget_name}"
                 pasn, pasn_cost = solve_ilp(pdag, io_budget, self.le, task_name, ilp_threads, self.params)
                 if pasn is not None:
diff --git a/scripts/optimizer/orbit/optimizer.py b/scripts/optimizer/orbit/optimizer.py
index 9615e4b..e63d93a 100644
--- a/scripts/optimizer/orbit/optimizer.py
+++ b/scripts/optimizer/orbit/optimizer.py
@@ -109,8 +109,6 @@ def main():
     parser.add_argument('--enable-reqbp', action='store_true', help='Enable QBP cross-bench reusing')
     parser.add_argument('--bypass-dep', type=int, default=15, help='Bypass dependency level (default: 15)')
     parser.add_argument('--threads', type=int, default=16, help='Number of threads (default: 16)')
-    parser.add_argument('--ilp-solver', type=str, default=None, choices=['gurobi', 'pulp'],
-                        help='MILP backend: gurobipy (default) or PuLP with CBC (no Gurobi license required)')
     parser.add_argument('--netname', type=str, default="", help='Network name for qbp reusing purposes (default: mlirs_input/<netname>.mlir)')
     
     args = parser.parse_args()
@@ -125,8 +123,8 @@ def main():
         netname = os.path.splitext(os.path.basename(args.inputfile))[0]
     params = Params(args.costjson, "Orbit", mode="compile", 
                     Sw=args.waterscale, CSw=args.constantscale, bpsdepth=bypass_dep, threads=args.threads, 
-                    comp=not args.no_compress, part=not args.no_partition, reqbp=args.enable_reqbp, 
-                    netname=netname, ilp_solver=args.ilp_solver)
+                    comp=not args.no_compress, part=not args.no_partition, reqbp=args.enable_reqbp,
+                    netname=netname)
     if args.maxlevel is not None:
         params.lvl_ub = args.maxlevel
     if args.btsupperbound is not None:
diff --git a/scripts/optimizer/orbit/run_orbit.py b/scripts/optimizer/orbit/run_orbit.py
index 6c46738..2c3cbd4 100644
--- a/scripts/optimizer/orbit/run_orbit.py
+++ b/scripts/optimizer/orbit/run_orbit.py
@@ -15,7 +15,8 @@
     parser.add_argument('--nocomp', action='store_true', help='Disable Compression')
     parser.add_argument('--nopart', action='store_true', help='Disable Partitioning')
     parser.add_argument('--sim-vari', action='store_true', help='Use simulated variadic bootstrapping cost model')
-    
+    parser.add_argument('--threads', type=int, default=None, help='Number of solver threads passed to optimizer.py')
+
     args = parser.parse_args()
     benchmark = args.model+args.act+str(args.n)+"k"
     this_n = args.Lm if args.Lm != 16 else args.n
@@ -50,7 +51,9 @@
         cmds.append("--no-partition")
     if args.qbp:
         cmds.append("--enable-reqbp")
-        
+    if args.threads is not None:
+        cmds += ["--threads", str(args.threads)]
+
     with open(f"{result_dir}{outname}.txt", "w", buffering=1) as stdout_file, \
         open(f"{result_dir}{outname}.err", "w", buffering=1) as stderr_file:
         process = subprocess.Popen(
@@ -62,4 +65,5 @@
         if exit_code != 0:
             with open(f"{result_dir}{outname}.err", "r") as err_file:
                 error_msg = err_file.read()
-            print(f"Subprocess exited with code {exit_code}.\n    Error message:\n{error_msg}")
\ No newline at end of file
+            print(f"Subprocess exited with code {exit_code}.\n    Error message:\n{error_msg}")
+            raise SystemExit(exit_code)
diff --git a/scripts/params/params.py b/scripts/params/params.py
index 84b1871..f7bba17 100644
--- a/scripts/params/params.py
+++ b/scripts/params/params.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 class Params:
-    def __init__(self, le_json, sysname, mode, Sw=None, CSw=None, bpsdepth=None, threads=None, comp=None, part=None, reqbp=None, netname=None, ilp_solver=None):
+    def __init__(self, le_json, sysname, mode, Sw=None, CSw=None, bpsdepth=None, threads=None, comp=None, part=None, reqbp=None, netname=None):
         if le_json is None:
             return # should be filled later
         json_parsed = {}
@@ -35,10 +35,7 @@ def __init__(self, le_json, sysname, mode, Sw=None, CSw=None, bpsdepth=None, thr
         self.part = part if part is not None else True
         self.reqbp = reqbp if reqbp is not None else False
         self.netname = netname if netname is not None else ""
-        self.ilp_solver = ilp_solver if ilp_solver is not None else json_parsed.get("ilp_solver", "gurobi")
-        if self.ilp_solver not in ("gurobi", "pulp"):
-            raise ValueError(f"ilp_solver must be 'gurobi' or 'pulp', got {self.ilp_solver!r}")
-        
+
         self.trunc_val = 1 # truncation value for latency estimation
         self.dacapo_mlir_in = True  # need to revert the input MLIR level
         self.dacapo_mlir_out = True # need to revert the output MLIR level
diff --git a/scripts/reproduce.sh b/scripts/reproduce.sh
new file mode 100755
index 0000000..d9173e9
--- /dev/null
+++ b/scripts/reproduce.sh
@@ -0,0 +1,384 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT"
+
+MODE="${1:-quick}"
+THREADS="${ORBIT_THREADS:-2}"
+RESULT_ROOT="${ORBIT_RESULTS_DIR:-repro_results}"
+RUN_ID="${ORBIT_RUN_ID:-$(date -u +%Y%m%dT%H%M%SZ)}"
+OUT_DIR="$RESULT_ROOT/$RUN_ID"
+SUMMARY="$OUT_DIR/summary.txt"
+
+mkdir -p "$OUT_DIR"
+
+usage() {
+    cat <<'EOF'
+Usage:
+  scripts/reproduce.sh [mode]
+
+Modes:
+  quick                  Run pytest and a small optimizer benchmark smoke test.
+  tests                  Run pytest only.
+  benchmark-smoke        Compile mlirs_input/motivation.mlir with the toy cost model.
+  deps                   Install Python test/runtime dependencies with pip.
+  compile-base           Run the README base compilation suite.
+  compile-16k            Run the README 16k compilation suite.
+  compile-lm12           Run the README Lm=12 compilation suite.
+  compile-sw51           Run the README Sw=51 compilation suite.
+  compile-sim-vari       Run the simulated-variadic compilation suite.
+  compile-micro-comppart Run the compression/partitioning micro suite.
+  compile-micro-bypass   Run the bypass micro suite.
+  compile-micro-reqbp    Run the qbp reuse micro suite.
+  compile-all            Run all compilation suites above.
+  execute                Evaluate a compiled MLIR through the Lattigo backend.
+
+Execute usage:
+  scripts/reproduce.sh execute --model M --act A --n N --Lm L --Sw S \
+                               [--run ID] [--cmt CMT] [--Csw CSW] [--plain]
+
+  Requires the Lattigo backend (scripts/setup_dependencies.sh backend) and the
+  frontend-generated input_data/ and input_constants/ files (not shipped; mount
+  them). The MLIR is compiled on demand for the default configuration if absent.
+
+Orbit solves its ILPs with Gurobi (gurobipy). The image ships Gurobi's bundled
+size-limited license, which is enough for every partitioned benchmark; only the
+--nopart configurations need a full/academic Gurobi license.
+
+Environment:
+  ORBIT_THREADS          Solver threads. Default: 2.
+  ORBIT_RESULTS_DIR      Result log root. Default: repro_results.
+  ORBIT_RUN_ID           Result subdirectory name. Default: UTC timestamp.
+EOF
+}
+
+timestamp() {
+    date -u +%Y-%m-%dT%H:%M:%SZ
+}
+
+record() {
+    printf '%s\n' "$*" | tee -a "$SUMMARY"
+}
+
+run_logged() {
+    local name="$1"
+    shift
+    local log="$OUT_DIR/${name}.log"
+
+    record "[$(timestamp)] RUN $*"
+    if "$@" >"$log" 2>&1; then
+        record "[$(timestamp)] PASS $name"
+    else
+        local status=$?
+        record "[$(timestamp)] FAIL $name (exit $status)"
+        record "Last 40 lines from $log:"
+        tail -40 "$log" | tee -a "$SUMMARY"
+        return "$status"
+    fi
+}
+
+summarize_optimizer_log() {
+    local log="$1"
+    if [[ ! -f "$log" ]]; then
+        record "Missing optimizer log: $log"
+        return 1
+    fi
+
+    record "Optimizer summary from $log:"
+    grep -E 'Built original DAG|After Compression|Final assignment latency|Final tdag latency|Orbit Compilation time' "$log" \
+        | tee -a "$SUMMARY" || record "No optimizer summary lines found in $log"
+}
+
+run_tests() {
+    run_logged pytest python3 -m pytest -q
+    record "Pytest summary:"
+    tail -5 "$OUT_DIR/pytest.log" | tee -a "$SUMMARY"
+}
+
+run_benchmark_smoke() {
+    local output="mlirs_output/repro/motivation.mlir"
+    run_logged benchmark-smoke \
+        python3 -u -m scripts.optimizer.orbit.optimizer \
+            --inputfile mlirs_input/motivation.mlir \
+            --outputfile "$output" \
+            --costjson cost_models/toy_backend.json \
+            --waterscale 40 \
+            --threads "$THREADS"
+    summarize_optimizer_log "$OUT_DIR/benchmark-smoke.log"
+}
+
+run_orbit_bench() {
+    local model="$1"
+    local act="$2"
+    local n="$3"
+    local Lm="$4"
+    local Sw="$5"
+    shift 5
+    local flags=("$@")
+
+    local bypass="bypass"
+    local qbp="noqbp"
+    local comp="comp"
+    local part="part"
+    local simvari=""
+    local flag
+    for flag in "${flags[@]}"; do
+        case "$flag" in
+            --nobypass) bypass="nobypass" ;;
+            --qbp) qbp="qbp" ;;
+            --nocomp) comp="nocomp" ;;
+            --nopart) part="nopart" ;;
+            --sim-vari) simvari="_simvari" ;;
+        esac
+    done
+
+    local this_n="$n"
+    if [[ "$Lm" != "16" ]]; then
+        this_n="$Lm"
+    fi
+
+    local outname="orbit_${model}${act}${n}k_Lm${Lm}_Sw${Sw}_${bypass}_${qbp}_${comp}_${part}${simvari}"
+    local orbit_log="mlirs_output/orbit/${model}/${Sw}/${act}/${this_n}/${outname}.txt"
+    local orbit_err="${orbit_log%.txt}.err"
+
+    run_logged "compile-${outname}" \
+        python3 scripts/optimizer/orbit/run_orbit.py \
+            --model "$model" \
+            --act "$act" \
+            --n "$n" \
+            --Lm "$Lm" \
+            --Sw "$Sw" \
+            --threads "$THREADS" \
+            "${flags[@]}"
+
+    summarize_optimizer_log "$orbit_log"
+    if [[ -s "$orbit_err" ]]; then
+        record "Non-empty stderr log: $orbit_err"
+        tail -40 "$orbit_err" | tee -a "$SUMMARY"
+        return 1
+    fi
+}
+
+run_compile_suite() {
+    local suite="$1"
+    local model
+    local act
+    local no_comp
+    local no_part
+
+    case "$suite" in
+        base)
+            for model in AlexNet MobileNet SqueezeNet VGG16 ResNet; do
+                for act in SiLU ReLU; do
+                    run_orbit_bench "$model" "$act" 64 16 40
+                done
+            done
+            ;;
+        16k)
+            for model in AlexNet SqueezeNet VGG16 ResNet; do
+                for act in SiLU ReLU; do
+                    run_orbit_bench "$model" "$act" 16 16 40
+                done
+            done
+            ;;
+        lm12)
+            for model in AlexNet MobileNet SqueezeNet VGG16 ResNet; do
+                for act in SiLU ReLU; do
+                    run_orbit_bench "$model" "$act" 64 12 40
+                done
+            done
+            ;;
+        sw51)
+            for model in AlexNet MobileNet SqueezeNet VGG16 ResNet; do
+                for act in SiLU ReLU; do
+                    run_orbit_bench "$model" "$act" 64 16 51
+                done
+            done
+            ;;
+        sim-vari)
+            for model in AlexNet MobileNet SqueezeNet VGG16 ResNet; do
+                for act in SiLU ReLU; do
+                    run_orbit_bench "$model" "$act" 64 16 40 --sim-vari
+                done
+            done
+            ;;
+        micro-comppart)
+            for no_comp in "" "--nocomp"; do
+                for no_part in "" "--nopart"; do
+                    run_orbit_bench CompPart SiLU 64 16 40 ${no_comp:+"$no_comp"} ${no_part:+"$no_part"}
+                done
+            done
+            ;;
+        micro-bypass)
+            for act in ReLU SiLU; do
+                run_orbit_bench ResNet "$act" 64 16 40 --nobypass
+            done
+            for act in SiLU ReLU; do
+                run_orbit_bench ResNet "$act" 16 16 40 --nobypass
+            done
+            ;;
+        micro-reqbp)
+            for model in AlexNet SqueezeNet VGG16 ResNet; do
+                for act in SiLU ReLU; do
+                    run_orbit_bench "$model" "$act" 16 16 40 --qbp
+                done
+            done
+            ;;
+        *)
+            record "Unknown compilation suite: $suite"
+            return 2
+            ;;
+    esac
+}
+
+run_execute() {
+    # Evaluate a compiled Orbit MLIR through the Lattigo backend.
+    local model="" act="" n="" Lm="" Sw="" run="0" cmt="bypass_noqbp_comp_part" Csw="" plain=""
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --model) model="$2"; shift 2 ;;
+            --act)   act="$2";   shift 2 ;;
+            --n)     n="$2";     shift 2 ;;
+            --Lm)    Lm="$2";    shift 2 ;;
+            --Sw)    Sw="$2";    shift 2 ;;
+            --run)   run="$2";   shift 2 ;;
+            --cmt)   cmt="$2";   shift 2 ;;
+            --Csw)   Csw="$2";   shift 2 ;;
+            --plain) plain="--plain"; shift ;;
+            *) record "Unknown execute option: $1"; return 2 ;;
+        esac
+    done
+
+    if [[ -z "$model" || -z "$act" || -z "$n" || -z "$Lm" || -z "$Sw" ]]; then
+        record "execute requires --model --act --n --Lm --Sw (and optional --run --cmt --Csw --plain)"
+        return 2
+    fi
+
+    # Backend must be built first (kept out of the image; see setup_dependencies.sh).
+    local lowering="backend/lattigo/lowering"
+    if [[ ! -f "$lowering/run_orbit_one_eval.py" ]]; then
+        record "Lattigo backend not found at $lowering."
+        record "Build it first (network required), e.g. inside the container with a persisted volume:"
+        record "  ./scripts/setup_dependencies.sh backend"
+        return 1
+    fi
+
+    local benchmark="${model}${act}${n}k"
+    local this_n="$n"
+    if [[ "$Lm" != "16" ]]; then
+        this_n="$Lm"
+    fi
+
+    # Compile the MLIR on demand for the default configuration if it is missing.
+    local outname="orbit_${benchmark}_Lm${Lm}_Sw${Sw}"
+    [[ -n "$Csw" ]] && outname="${outname}_Csw${Csw}"
+    outname="${outname}_${cmt}"
+    local mlir="mlirs_output/orbit/${model}/${Sw}/${act}/${this_n}/${outname}.mlir"
+    if [[ ! -f "$mlir" ]]; then
+        if [[ "$cmt" == "bypass_noqbp_comp_part" ]]; then
+            record "Compiled MLIR missing; compiling it first: $mlir"
+            run_orbit_bench "$model" "$act" "$n" "$Lm" "$Sw" ${Csw:+--Csw "$Csw"}
+        else
+            record "Compiled MLIR not found for cmt='$cmt': $mlir"
+            record "Compile it first with the matching scripts/reproduce.sh compile-* mode."
+            return 1
+        fi
+    fi
+
+    # Frontend-generated data is required and is not shipped in the image.
+    local data_model="$model" data_act_dir
+    if [[ "$model" == "CompPart" ]]; then
+        data_model="ResNet"   # CompPart reuses ResNet inputs/constants
+    fi
+    local cst="input_constants/${data_model}${act}${n}k_hecate.cst"
+    local inp="input_data/${n}k/${data_model,,}/${act,,}/inputs/input${run}.txt"
+    if [[ ! -f "$cst" || ! -f "$inp" ]]; then
+        record "Execution data not found:"
+        [[ -f "$cst" ]] || record "  missing constants: $cst"
+        [[ -f "$inp" ]] || record "  missing input sample: $inp"
+        record "These are generated by the frontend (GBs) and must be provided/mounted."
+        record "See backend/README.md and frontend/README.md."
+        return 1
+    fi
+
+    run_logged "execute-${outname}_run${run}${plain:+_pl}" \
+        bash -c 'cd "$1" && shift && python3 run_orbit_one_eval.py "$@"' _ "$lowering" \
+            --model "$model" --act "$act" --n "$n" --Lm "$Lm" --Sw "$Sw" \
+            --run "$run" --cmt "$cmt" ${Csw:+--Csw "$Csw"} ${plain:+$plain}
+
+    local exec_log="mlirs_execute/orbit/${n}/${model}/${act}/${outname}_run${run}${plain:+_pl}.log"
+    if [[ -f "$exec_log" ]]; then
+        record "Execution log: $exec_log"
+        tail -20 "$exec_log" | tee -a "$SUMMARY"
+    fi
+}
+
+record "Orbit reproducibility run"
+record "Mode: $MODE"
+record "Solver: gurobi"
+record "Threads: $THREADS"
+record "Results: $OUT_DIR"
+
+case "$MODE" in
+    quick)
+        run_tests
+        run_benchmark_smoke
+        ;;
+    tests)
+        run_tests
+        ;;
+    benchmark-smoke)
+        run_benchmark_smoke
+        ;;
+    deps)
+        run_logged install-python-deps python3 -m pip install -r requirements-dev.txt
+        ;;
+    compile-base)
+        run_compile_suite base
+        ;;
+    compile-16k)
+        run_compile_suite 16k
+        ;;
+    compile-lm12)
+        run_compile_suite lm12
+        ;;
+    compile-sw51)
+        run_compile_suite sw51
+        ;;
+    compile-sim-vari)
+        run_compile_suite sim-vari
+        ;;
+    compile-micro-comppart)
+        run_compile_suite micro-comppart
+        ;;
+    compile-micro-bypass)
+        run_compile_suite micro-bypass
+        ;;
+    compile-micro-reqbp)
+        run_compile_suite micro-reqbp
+        ;;
+    compile-all)
+        run_compile_suite base
+        run_compile_suite 16k
+        run_compile_suite lm12
+        run_compile_suite sw51
+        run_compile_suite sim-vari
+        run_compile_suite micro-comppart
+        run_compile_suite micro-bypass
+        run_compile_suite micro-reqbp
+        ;;
+    execute)
+        run_execute "${@:2}"
+        ;;
+    -h|--help|help)
+        usage
+        ;;
+    *)
+        usage
+        exit 2
+        ;;
+esac
+
+record "[$(timestamp)] DONE"
+record "Summary written to $SUMMARY"
diff --git a/scripts/setup_dependencies.sh b/scripts/setup_dependencies.sh
new file mode 100755
index 0000000..49d0860
--- /dev/null
+++ b/scripts/setup_dependencies.sh
@@ -0,0 +1,152 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT"
+
+MODE="${1:-python}"
+BUILD_JOBS="${ORBIT_BUILD_JOBS:-$(nproc)}"
+DEPS_DIR="${ORBIT_DEPS_DIR:-$ROOT/.deps}"
+
+usage() {
+    cat <<'EOF'
+Usage:
+  scripts/setup_dependencies.sh [python|backend|frontend|all]
+
+Targets:
+  python    Install Python runtime/test dependencies from requirements-dev.txt.
+  backend   Clone Lattigo, apply Orbit's patch, run go mod tidy, and build fhe_binary.
+  frontend  Build LLVM/MLIR, SEAL, and patched Dacapo/Hecate from source.
+  all       Run python, backend, and frontend.
+
+Notes:
+  frontend is large. It builds LLVM/MLIR from source and may take a long time.
+  ORBIT_DEPS_DIR controls where external source trees are cloned. Default: .deps.
+EOF
+}
+
+need_cmd() {
+    if ! command -v "$1" >/dev/null 2>&1; then
+        echo "Missing required command: $1" >&2
+        return 1
+    fi
+}
+
+install_cmake_build() {
+    if [[ "$(id -u)" == "0" ]]; then
+        cmake --install build
+    else
+        need_cmd sudo
+        sudo cmake --install build
+    fi
+}
+
+setup_python() {
+    python3 -m pip install -r requirements-dev.txt
+}
+
+setup_backend() {
+    need_cmd git
+    need_cmd go
+
+    mkdir -p backend
+    if [[ ! -d backend/lattigo/.git ]]; then
+        git clone https://github.com/tuneinsight/lattigo.git backend/lattigo
+    fi
+
+    (
+        cd backend
+        ./patch_lattigo.sh
+    )
+
+    (
+        cd backend/lattigo
+        go mod tidy
+        cd lowering
+        go build -o fhe_binary ./fhe
+    )
+}
+
+setup_frontend() {
+    need_cmd clang
+    need_cmd clang++
+    need_cmd cmake
+    need_cmd git
+    need_cmd ninja
+    need_cmd python3
+
+    mkdir -p "$DEPS_DIR"
+
+    if [[ ! -d "$DEPS_DIR/llvm-project/.git" ]]; then
+        git clone https://github.com/llvm/llvm-project.git "$DEPS_DIR/llvm-project"
+    fi
+    (
+        cd "$DEPS_DIR/llvm-project"
+        git checkout llvmorg-18.1.2
+        cmake -GNinja -Bbuild \
+            -DCMAKE_C_COMPILER=clang \
+            -DCMAKE_CXX_COMPILER=clang++ \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DLLVM_ENABLE_PROJECTS=mlir \
+            -DLLVM_INSTALL_UTILS=ON \
+            -DLLVM_TARGETS_TO_BUILD=host \
+            llvm
+        cmake --build build --parallel "$BUILD_JOBS"
+        install_cmake_build
+    )
+
+    if [[ ! -d "$DEPS_DIR/SEAL/.git" ]]; then
+        git clone https://github.com/microsoft/SEAL.git "$DEPS_DIR/SEAL"
+    fi
+    (
+        cd "$DEPS_DIR/SEAL"
+        git checkout 4.0.0
+        cmake -S . -B build
+        cmake --build build --parallel "$BUILD_JOBS"
+        install_cmake_build
+    )
+
+    if [[ ! -d frontend/dacapo/.git ]]; then
+        git clone https://github.com/corelab-src/dacapo.git frontend/dacapo
+    fi
+
+    (
+        cd frontend
+        ./patch_dacapo.sh
+    )
+
+    (
+        cd frontend/dacapo
+        cmake -S . -B build -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
+        cmake --build build --parallel "$BUILD_JOBS"
+        python3 -m venv .venv
+        . .venv/bin/activate
+        . config.sh
+        python3 -m pip install -r requirements.txt
+        ./install.sh
+    )
+}
+
+case "$MODE" in
+    python)
+        setup_python
+        ;;
+    backend)
+        setup_backend
+        ;;
+    frontend)
+        setup_frontend
+        ;;
+    all)
+        setup_python
+        setup_backend
+        setup_frontend
+        ;;
+    -h|--help|help)
+        usage
+        ;;
+    *)
+        usage
+        exit 2
+        ;;
+esac
diff --git a/tests/test_cli_pipeline_smoke.py b/tests/test_cli_pipeline_smoke.py
index 984e33f..94c3fb6 100644
--- a/tests/test_cli_pipeline_smoke.py
+++ b/tests/test_cli_pipeline_smoke.py
@@ -46,6 +46,8 @@ def wait(self):
             "--nocomp",
             "--nopart",
             "--sim-vari",
+            "--threads",
+            "2",
         ],
     )
 
@@ -62,6 +64,9 @@ def wait(self):
     assert "--enable-reqbp" in cmd
     assert "--no-compress" in cmd
     assert "--no-partition" in cmd
+    assert "--threads" in cmd
+    assert "2" in cmd
+    assert "--ilp-solver" not in cmd
     assert str(captured["stdout_name"]).endswith(".txt")
     assert str(captured["stderr_name"]).endswith(".err")
 
@@ -151,8 +156,6 @@ def fake_makedirs(path, exist_ok=False):
             "--enable-reqbp",
             "--threads",
             "2",
-            "--ilp-solver",
-            "pulp",
         ],
     )
 
@@ -169,7 +172,7 @@ def fake_makedirs(path, exist_ok=False):
     assert init["part"] is False
     assert init["reqbp"] is True
     assert init["netname"] == "motivation"
-    assert init["ilp_solver"] == "pulp"
+    assert "ilp_solver" not in init
 
     run_call = captured["run"]
     assert run_call["input_file"] == "mlirs_input/motivation.mlir"
diff --git a/tests/test_ilp_pulp_safe_name.py b/tests/test_ilp_pulp_safe_name.py
deleted file mode 100644
index a9fb076..0000000
--- a/tests/test_ilp_pulp_safe_name.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""PuLP name sanitization used for MILP problem/constraint names."""
-
-from __future__ import annotations
-
-from scripts.optimizer.orbit.ilp_core import _pulp_safe_name
-
-
-def test_pulp_safe_name_strips_spaces():
-    assert _pulp_safe_name("a b c") == "a_b_c"
-    assert " " not in _pulp_safe_name("Task (Partition-x y_z)")
-
-
-def test_pulp_safe_name_non_empty():
-    assert _pulp_safe_name("   ") == "orbit_ilp"
-
-
-def test_pulp_safe_name_truncates():
-    long = "x" * 500
-    out = _pulp_safe_name(long, max_len=50)
-    assert len(out) == 50
diff --git a/tests/test_orbit_integration.py b/tests/test_orbit_integration.py
index 9ba38b4..7517fc6 100644
--- a/tests/test_orbit_integration.py
+++ b/tests/test_orbit_integration.py
@@ -1,19 +1,24 @@
-"""Compressed DAG → orbit_core (PuLP) smoke test."""
+"""Compressed DAG → orbit_core smoke test (Gurobi)."""
 
 from __future__ import annotations
 
+import pytest
+
 from scripts.latency_estimator.latency_estimator import LatencyEstimator
 from scripts.optimizer.orbit.orbit_core import orbit_core
 from scripts.params.params import Params
 from scripts.tdag import addition_squash, auto_compress, build_from_mlir
 
 
-def test_orbit_core_motivation_pulp(motivation_mlir: str, toy_cost_json: str):
+def test_orbit_core_motivation(motivation_mlir: str, toy_cost_json: str):
+    # Orbit solves with Gurobi. The motivation DAG is tiny, so it fits well
+    # within Gurobi's bundled size-limited license (no academic license needed).
+    pytest.importorskip("gurobipy")
+
     params = Params(
         toy_cost_json,
         "Orbit",
         "compile",
-        ilp_solver="pulp",
         threads=2,
         bpsdepth=15,
     )
diff --git a/tests/test_params.py b/tests/test_params.py
index 5d300bc..ad822ce 100644
--- a/tests/test_params.py
+++ b/tests/test_params.py
@@ -1,38 +1,11 @@
-"""Params loading and ilp_solver validation."""
+"""Params loading."""
 
 from __future__ import annotations
 
-import json
-from pathlib import Path
-
-import pytest
-
 from scripts.params.params import Params
 
 
 def test_params_toy_runtime(toy_cost_json: str):
     p = Params(toy_cost_json, "Orbit", "compile")
     assert p.backend == "Toy"
-    assert p.ilp_solver == "gurobi"
     assert p.mode == "compile"
-
-
-def test_params_ilp_solver_override(toy_cost_json: str):
-    p = Params(toy_cost_json, "Orbit", "compile", ilp_solver="pulp")
-    assert p.ilp_solver == "pulp"
-
-
-def test_params_ilp_solver_from_json(tmp_path: Path, toy_cost_json: str):
-    with open(toy_cost_json) as f:
-        data = json.load(f)
-    data["ilp_solver"] = "pulp"
-    jf = tmp_path / "cfg.json"
-    with open(jf, "w") as f:
-        json.dump(data, f)
-    p = Params(str(jf), "Orbit", "compile")
-    assert p.ilp_solver == "pulp"
-
-
-def test_params_rejects_bad_ilp_solver(toy_cost_json: str):
-    with pytest.raises(ValueError, match="ilp_solver"):
-        Params(toy_cost_json, "Orbit", "compile", ilp_solver="not_a_solver")