From ca4a18cbfdb47ab83e99367c217dda42c6833ff4 Mon Sep 17 00:00:00 2001
From: Professor Synapse <131487882+ProfSynapse@users.noreply.github.com>
Date: Fri, 10 Apr 2026 11:50:19 -0400
Subject: [PATCH 1/2] Add Docker-first local runtime bootstrap and eval flow

---
 .agents/skills/fine-tuning/SKILL.md          |  32 ++
 .claude/skills/fine-tuning/SKILL.md          |  32 ++
 .skills/fine-tuning/SKILL.md                 |  32 ++
 Evaluator/vllm_setup.py                      |  86 +--
 docker/bucket-helper/Dockerfile              |  12 +
 docker/bucket-helper/requirements.txt        |   3 +
 docs/plans/local-docker-runtime-plan.md      | 399 ++++++++++++++
 shared/utilities/bucket_artifacts.py         |  74 ++-
 shared/utilities/env.py                      |   8 +
 shared/utilities/paths.py                    |  82 +++
 tuner/backends/evaluation/unsloth_backend.py |  45 +-
 tuner/cli/main.py                            |   8 +
 tuner/cli/parser.py                          |  34 +-
 tuner/cli/router.py                          | 179 ++++--
 tuner/cloud/hf_jobs.py                       |  10 +-
 tuner/discovery/training_runs.py             |  19 +-
 tuner/handlers/__init__.py                   |  61 +--
 tuner/handlers/base.py                       |   4 +-
 tuner/handlers/bucket_handler.py             | 192 ++++++-
 tuner/handlers/docker_handler.py             | 540 +++++++++++++++++++
 tuner/handlers/eval_handler.py               | 415 +++++++++++++-
 tuner/handlers/train_handler.py              | 130 ++++-
 tuner/utils/docker_runtime.py                | 206 +++++++
 23 files changed, 2406 insertions(+), 197 deletions(-)
 create mode 100644 docker/bucket-helper/Dockerfile
 create mode 100644 docker/bucket-helper/requirements.txt
 create mode 100644 docs/plans/local-docker-runtime-plan.md
 create mode 100644 tuner/handlers/docker_handler.py
 create mode 100644 tuner/utils/docker_runtime.py

diff --git a/.agents/skills/fine-tuning/SKILL.md b/.agents/skills/fine-tuning/SKILL.md
index 829d0066..2378edcc 100644
--- a/.agents/skills/fine-tuning/SKILL.md
+++ b/.agents/skills/fine-tuning/SKILL.md
@@ -13,6 +13,13 @@ Train language models with SFT, KTO, and GRPO locally or on supported cloud prov
 | Task | Command |
 |------|---------|
 | Interactive menu | `./run.sh` → Train |
+| Local Docker status | `python tuner.py docker status` |
+| Bootstrap local Docker runtime | `python tuner.py docker bootstrap --docker-target all` |
+| Build Docker bucket helper | `python tuner.py docker build --docker-target bucket` |
+| Pull local Docker runtime | `python tuner.py docker pull --docker-target unsloth` |
+| Smoke test local Docker runtime | `python tuner.py docker smoke --docker-target all` |
+| Local Docker training | `python tuner.py train --runtime docker` |
+| Local Docker evaluation | `python tuner.py eval --runtime docker` |
 | SFT training | `cd Trainers/rtx3090_sft && python train_sft.py --model-size 7b` |
 | KTO training | `cd Trainers/rtx3090_kto && python train_kto.py --model-size 7b` |
 | GRPO training | `cd Trainers/grpo && python train_grpo.py` |
@@ -89,6 +96,9 @@ Use `--tier` on the local SFT and KTO trainers when you want a preset instead of
 - Treat `loss_summary.json` as a supporting artifact, not the canonical final loss metadata file.
 - The ledger should accumulate real model-size / hardware / timing / cost data so future hardware planning can optimize against observed evidence instead of memory.
 - For local trainer iteration, use the checked-in `train_sft.py`, `train_kto.py`, and `train_grpo.py` entrypoints.
+- For Windows local GPU work, prefer Docker Desktop plus `python tuner.py docker smoke --docker-target all` as the first environment check before debugging conda or package drift.
+- For first-time local Docker setup, prefer `python tuner.py docker bootstrap --docker-target all`. It should tell you whether Docker Desktop is installed/running, pull or build the required images, and finish with smoke tests.
+- Prefer `python tuner.py train --runtime docker` and `python tuner.py eval --runtime docker` when you want the CLI to stay Docker-first locally while reusing the checked-in trainer and evaluator entrypoints.
 - For canonical HF experiments, prefer `python tuner.py cloud-pipeline ...` over `cloud-run`.
 - For full train → eval → exact loss → analysis → recommendation runs, prefer `python tuner.py run-experiment ...`.
 - Evolutionary SFT is experimental but now first-class in the cloud experiment path. Prefer a checked-in experiment spec or `cloud-pipeline --train-evolutionary-*` overrides over editing trainer YAMLs by hand.
@@ -102,7 +112,9 @@ Use `--tier` on the local SFT and KTO trainers when you want a preset instead of
 - For in-flight cloud-run health checks, inspect the bucket-backed artifacts first (`training_latest.jsonl`, `stage_summary.json`, `training_lineage.json`, eval/loss partials). Use raw HF logs only as a fallback when the bucket prefix has not started writing yet.
 - For quick bucket spot checks, use `python tuner.py bucket read ...` or `python tuner.py bucket list ...` instead of manual `hf buckets cp` commands.
 - For local inspection or offline diffing, use `python tuner.py bucket pull ...` to sync a bucket-relative path into the current workspace while preserving its relative path.
+- Pulled cloud adapters under `toolset-training-artifacts/runs/...` or `runs/...` should be treated as first-class local runs by `train`/`eval` discovery. Do not spin up a one-off container just because a run originated in HF Jobs.
 - For one-off uploads back into the HF artifact bucket, use `python tuner.py bucket push ...` instead of ad hoc `sync_bucket` snippets.
+- If the active Python lacks modern HF Buckets support, `python tuner.py bucket ...` should fall back to the checked-in Docker bucket helper instead of mutating the main Unsloth environment. Prebuild it with `python tuner.py docker build --docker-target bucket` when you want the fallback path ready ahead of time.
 - For `a100-large` or larger tiers, bias toward aggressive packing. Do not lower batch just because the adapter recipe changed. Start from the highest known-good packed shape for the same model family and only back off after a real OOM or clear instability signal.
 - Treat large unused VRAM on `a100-large` as a mistake, not a comfort margin. If `training_lineage.json` shows tens of GB of reserved headroom, the run is underpacked and the next iteration should push batch size harder even if that risks OOM.
 - For vLLM eval on multi-GPU hardware, prequantized BitsAndBytes base models (for example `*-bnb-4bit`) cannot use tensor parallelism. Do not assume `x4` means vLLM will shard generation across all GPUs; in this path, eval may need to fall back to single-GPU while exact loss still fans out across all visible GPUs afterward.
@@ -172,6 +184,26 @@ See `reference/lora-techniques.md` for full details, integration status, and com
 
 ## Common Patterns
 
+**Bootstrap local Docker on a fresh machine:**
+```bash
+python tuner.py docker bootstrap --docker-target all
+```
+Use this before touching conda if the goal is local GPU training/eval through Docker Desktop. The command should:
+- tell you if Docker Desktop is missing or not running
+- prepare `unsloth`, `vllm`, and the Buckets helper image
+- run smoke tests so you know GPU containers actually work
+
+**Pull a cloud adapter and evaluate it locally through Docker:**
+```bash
+python tuner.py bucket pull \
+  --path runs/hf_jobs/sft/<run-prefix>/final_model \
+  --dest toolset-training-artifacts
+
+python tuner.py eval --runtime docker
+```
+Gotcha:
+- The pulled adapter should now appear in the normal local eval discovery flow. If it does not, inspect where the pull landed and keep it under `toolset-training-artifacts/runs/...` or `runs/...` inside the repo.
+
 **Quick SFT test run:**
 ```bash
 cd Trainers/rtx3090_sft
diff --git a/.claude/skills/fine-tuning/SKILL.md b/.claude/skills/fine-tuning/SKILL.md
index 829d0066..2378edcc 100644
--- a/.claude/skills/fine-tuning/SKILL.md
+++ b/.claude/skills/fine-tuning/SKILL.md
@@ -13,6 +13,13 @@ Train language models with SFT, KTO, and GRPO locally or on supported cloud prov
 | Task | Command |
 |------|---------|
 | Interactive menu | `./run.sh` → Train |
+| Local Docker status | `python tuner.py docker status` |
+| Bootstrap local Docker runtime | `python tuner.py docker bootstrap --docker-target all` |
+| Build Docker bucket helper | `python tuner.py docker build --docker-target bucket` |
+| Pull local Docker runtime | `python tuner.py docker pull --docker-target unsloth` |
+| Smoke test local Docker runtime | `python tuner.py docker smoke --docker-target all` |
+| Local Docker training | `python tuner.py train --runtime docker` |
+| Local Docker evaluation | `python tuner.py eval --runtime docker` |
 | SFT training | `cd Trainers/rtx3090_sft && python train_sft.py --model-size 7b` |
 | KTO training | `cd Trainers/rtx3090_kto && python train_kto.py --model-size 7b` |
 | GRPO training | `cd Trainers/grpo && python train_grpo.py` |
@@ -89,6 +96,9 @@ Use `--tier` on the local SFT and KTO trainers when you want a preset instead of
 - Treat `loss_summary.json` as a supporting artifact, not the canonical final loss metadata file.
 - The ledger should accumulate real model-size / hardware / timing / cost data so future hardware planning can optimize against observed evidence instead of memory.
 - For local trainer iteration, use the checked-in `train_sft.py`, `train_kto.py`, and `train_grpo.py` entrypoints.
+- For Windows local GPU work, prefer Docker Desktop plus `python tuner.py docker smoke --docker-target all` as the first environment check before debugging conda or package drift.
+- For first-time local Docker setup, prefer `python tuner.py docker bootstrap --docker-target all`. It should tell you whether Docker Desktop is installed/running, pull or build the required images, and finish with smoke tests.
+- Prefer `python tuner.py train --runtime docker` and `python tuner.py eval --runtime docker` when you want the CLI to stay Docker-first locally while reusing the checked-in trainer and evaluator entrypoints.
 - For canonical HF experiments, prefer `python tuner.py cloud-pipeline ...` over `cloud-run`.
 - For full train → eval → exact loss → analysis → recommendation runs, prefer `python tuner.py run-experiment ...`.
 - Evolutionary SFT is experimental but now first-class in the cloud experiment path. Prefer a checked-in experiment spec or `cloud-pipeline --train-evolutionary-*` overrides over editing trainer YAMLs by hand.
@@ -102,7 +112,9 @@ Use `--tier` on the local SFT and KTO trainers when you want a preset instead of
 - For in-flight cloud-run health checks, inspect the bucket-backed artifacts first (`training_latest.jsonl`, `stage_summary.json`, `training_lineage.json`, eval/loss partials). Use raw HF logs only as a fallback when the bucket prefix has not started writing yet.
 - For quick bucket spot checks, use `python tuner.py bucket read ...` or `python tuner.py bucket list ...` instead of manual `hf buckets cp` commands.
 - For local inspection or offline diffing, use `python tuner.py bucket pull ...` to sync a bucket-relative path into the current workspace while preserving its relative path.
+- Pulled cloud adapters under `toolset-training-artifacts/runs/...` or `runs/...` should be treated as first-class local runs by `train`/`eval` discovery. Do not spin up a one-off container just because a run originated in HF Jobs.
 - For one-off uploads back into the HF artifact bucket, use `python tuner.py bucket push ...` instead of ad hoc `sync_bucket` snippets.
+- If the active Python lacks modern HF Buckets support, `python tuner.py bucket ...` should fall back to the checked-in Docker bucket helper instead of mutating the main Unsloth environment. Prebuild it with `python tuner.py docker build --docker-target bucket` when you want the fallback path ready ahead of time.
 - For `a100-large` or larger tiers, bias toward aggressive packing. Do not lower batch just because the adapter recipe changed. Start from the highest known-good packed shape for the same model family and only back off after a real OOM or clear instability signal.
 - Treat large unused VRAM on `a100-large` as a mistake, not a comfort margin. If `training_lineage.json` shows tens of GB of reserved headroom, the run is underpacked and the next iteration should push batch size harder even if that risks OOM.
 - For vLLM eval on multi-GPU hardware, prequantized BitsAndBytes base models (for example `*-bnb-4bit`) cannot use tensor parallelism. Do not assume `x4` means vLLM will shard generation across all GPUs; in this path, eval may need to fall back to single-GPU while exact loss still fans out across all visible GPUs afterward.
@@ -172,6 +184,26 @@ See `reference/lora-techniques.md` for full details, integration status, and com
 
 ## Common Patterns
 
+**Bootstrap local Docker on a fresh machine:**
+```bash
+python tuner.py docker bootstrap --docker-target all
+```
+Use this before touching conda if the goal is local GPU training/eval through Docker Desktop. The command should:
+- tell you if Docker Desktop is missing or not running
+- prepare `unsloth`, `vllm`, and the Buckets helper image
+- run smoke tests so you know GPU containers actually work
+
+**Pull a cloud adapter and evaluate it locally through Docker:**
+```bash
+python tuner.py bucket pull \
+  --path runs/hf_jobs/sft/<run-prefix>/final_model \
+  --dest toolset-training-artifacts
+
+python tuner.py eval --runtime docker
+```
+Gotcha:
+- The pulled adapter should now appear in the normal local eval discovery flow. If it does not, inspect where the pull landed and keep it under `toolset-training-artifacts/runs/...` or `runs/...` inside the repo.
+
 **Quick SFT test run:**
 ```bash
 cd Trainers/rtx3090_sft
diff --git a/.skills/fine-tuning/SKILL.md b/.skills/fine-tuning/SKILL.md
index 829d0066..2378edcc 100644
--- a/.skills/fine-tuning/SKILL.md
+++ b/.skills/fine-tuning/SKILL.md
@@ -13,6 +13,13 @@ Train language models with SFT, KTO, and GRPO locally or on supported cloud prov
 | Task | Command |
 |------|---------|
 | Interactive menu | `./run.sh` → Train |
+| Local Docker status | `python tuner.py docker status` |
+| Bootstrap local Docker runtime | `python tuner.py docker bootstrap --docker-target all` |
+| Build Docker bucket helper | `python tuner.py docker build --docker-target bucket` |
+| Pull local Docker runtime | `python tuner.py docker pull --docker-target unsloth` |
+| Smoke test local Docker runtime | `python tuner.py docker smoke --docker-target all` |
+| Local Docker training | `python tuner.py train --runtime docker` |
+| Local Docker evaluation | `python tuner.py eval --runtime docker` |
 | SFT training | `cd Trainers/rtx3090_sft && python train_sft.py --model-size 7b` |
 | KTO training | `cd Trainers/rtx3090_kto && python train_kto.py --model-size 7b` |
 | GRPO training | `cd Trainers/grpo && python train_grpo.py` |
@@ -89,6 +96,9 @@ Use `--tier` on the local SFT and KTO trainers when you want a preset instead of
 - Treat `loss_summary.json` as a supporting artifact, not the canonical final loss metadata file.
 - The ledger should accumulate real model-size / hardware / timing / cost data so future hardware planning can optimize against observed evidence instead of memory.
 - For local trainer iteration, use the checked-in `train_sft.py`, `train_kto.py`, and `train_grpo.py` entrypoints.
+- For Windows local GPU work, prefer Docker Desktop plus `python tuner.py docker smoke --docker-target all` as the first environment check before debugging conda or package drift.
+- For first-time local Docker setup, prefer `python tuner.py docker bootstrap --docker-target all`. It should tell you whether Docker Desktop is installed/running, pull or build the required images, and finish with smoke tests.
+- Prefer `python tuner.py train --runtime docker` and `python tuner.py eval --runtime docker` when you want the CLI to stay Docker-first locally while reusing the checked-in trainer and evaluator entrypoints.
 - For canonical HF experiments, prefer `python tuner.py cloud-pipeline ...` over `cloud-run`.
 - For full train → eval → exact loss → analysis → recommendation runs, prefer `python tuner.py run-experiment ...`.
 - Evolutionary SFT is experimental but now first-class in the cloud experiment path. Prefer a checked-in experiment spec or `cloud-pipeline --train-evolutionary-*` overrides over editing trainer YAMLs by hand.
@@ -102,7 +112,9 @@ Use `--tier` on the local SFT and KTO trainers when you want a preset instead of
 - For in-flight cloud-run health checks, inspect the bucket-backed artifacts first (`training_latest.jsonl`, `stage_summary.json`, `training_lineage.json`, eval/loss partials). Use raw HF logs only as a fallback when the bucket prefix has not started writing yet.
 - For quick bucket spot checks, use `python tuner.py bucket read ...` or `python tuner.py bucket list ...` instead of manual `hf buckets cp` commands.
 - For local inspection or offline diffing, use `python tuner.py bucket pull ...` to sync a bucket-relative path into the current workspace while preserving its relative path.
+- Pulled cloud adapters under `toolset-training-artifacts/runs/...` or `runs/...` should be treated as first-class local runs by `train`/`eval` discovery. Do not spin up a one-off container just because a run originated in HF Jobs.
 - For one-off uploads back into the HF artifact bucket, use `python tuner.py bucket push ...` instead of ad hoc `sync_bucket` snippets.
+- If the active Python lacks modern HF Buckets support, `python tuner.py bucket ...` should fall back to the checked-in Docker bucket helper instead of mutating the main Unsloth environment. Prebuild it with `python tuner.py docker build --docker-target bucket` when you want the fallback path ready ahead of time.
 - For `a100-large` or larger tiers, bias toward aggressive packing. Do not lower batch just because the adapter recipe changed. Start from the highest known-good packed shape for the same model family and only back off after a real OOM or clear instability signal.
 - Treat large unused VRAM on `a100-large` as a mistake, not a comfort margin. If `training_lineage.json` shows tens of GB of reserved headroom, the run is underpacked and the next iteration should push batch size harder even if that risks OOM.
 - For vLLM eval on multi-GPU hardware, prequantized BitsAndBytes base models (for example `*-bnb-4bit`) cannot use tensor parallelism. Do not assume `x4` means vLLM will shard generation across all GPUs; in this path, eval may need to fall back to single-GPU while exact loss still fans out across all visible GPUs afterward.
@@ -172,6 +184,26 @@ See `reference/lora-techniques.md` for full details, integration status, and com
 
 ## Common Patterns
 
+**Bootstrap local Docker on a fresh machine:**
+```bash
+python tuner.py docker bootstrap --docker-target all
+```
+Use this before touching conda if the goal is local GPU training/eval through Docker Desktop. The command should:
+- tell you if Docker Desktop is missing or not running
+- prepare `unsloth`, `vllm`, and the Buckets helper image
+- run smoke tests so you know GPU containers actually work
+
+**Pull a cloud adapter and evaluate it locally through Docker:**
+```bash
+python tuner.py bucket pull \
+  --path runs/hf_jobs/sft/<run-prefix>/final_model \
+  --dest toolset-training-artifacts
+
+python tuner.py eval --runtime docker
+```
+Gotcha:
+- The pulled adapter should now appear in the normal local eval discovery flow. If it does not, inspect where the pull landed and keep it under `toolset-training-artifacts/runs/...` or `runs/...` inside the repo.
+
 **Quick SFT test run:**
 ```bash
 cd Trainers/rtx3090_sft
diff --git a/Evaluator/vllm_setup.py b/Evaluator/vllm_setup.py
index 85770f68..c218850f 100644
--- a/Evaluator/vllm_setup.py
+++ b/Evaluator/vllm_setup.py
@@ -21,7 +21,7 @@
 
 import requests
 
-from shared.utilities.paths import iter_training_output_dirs
+from shared.utilities.paths import iter_training_run_dirs
 
 # ---------------------------------------------------------------------------
 # Constants
@@ -52,6 +52,7 @@ class TrainingRun:
     has_merged_16bit: bool
     has_lora: bool
     model_size: Optional[str] = None
+    source: str = "local_training"
 
     @property
     def display_name(self) -> str:
@@ -238,44 +239,38 @@ def discover_training_runs(base_dir: Optional[Path] = None) -> List[TrainingRun]
     repo_root = base_dir.parent if base_dir.name == "Trainers" else base_dir
 
     for trainer_type in TRAINING_METHODS:
-        for output_dir in iter_training_output_dirs(trainer_type, repo_root):
-            if not output_dir.exists():
+        for run_dir in iter_training_run_dirs(trainer_type, repo_root):
+            if not re.match(r"\d{8}_\d{6}", run_dir.name):
                 continue
 
-            for run_dir in output_dir.iterdir():
-                if not run_dir.is_dir():
-                    continue
-
-                if not re.match(r"\d{8}_\d{6}", run_dir.name):
-                    continue
-
-                has_final_model = (run_dir / "final_model").exists()
-                has_merged_16bit = False
-                has_lora = False
-
-                for subdir in run_dir.iterdir():
-                    if subdir.is_dir():
-                        if (subdir / "merged-16bit").exists():
-                            has_merged_16bit = True
-                        if (subdir / "lora").exists():
-                            has_lora = True
-
-                if has_final_model:
-                    adapter_config = run_dir / "final_model" / "adapter_config.json"
-                    has_lora = has_lora or adapter_config.exists()
-
-                model_size = _detect_model_size(run_dir)
-
-                runs.append(TrainingRun(
-                    path=run_dir,
-                    name=run_dir.name,
-                    timestamp=run_dir.name,
-                    trainer_type=trainer_type,
-                    has_final_model=has_final_model,
-                    has_merged_16bit=has_merged_16bit,
-                    has_lora=has_lora,
-                    model_size=model_size,
-                ))
+            has_final_model = (run_dir / "final_model").exists()
+            has_merged_16bit = False
+            has_lora = False
+
+            for subdir in run_dir.iterdir():
+                if subdir.is_dir():
+                    if (subdir / "merged-16bit").exists():
+                        has_merged_16bit = True
+                    if (subdir / "lora").exists():
+                        has_lora = True
+
+            if has_final_model:
+                adapter_config = run_dir / "final_model" / "adapter_config.json"
+                has_lora = has_lora or adapter_config.exists()
+
+            model_size = _detect_model_size(run_dir)
+
+            runs.append(TrainingRun(
+                path=run_dir,
+                name=run_dir.name,
+                timestamp=run_dir.name,
+                trainer_type=trainer_type,
+                has_final_model=has_final_model,
+                has_merged_16bit=has_merged_16bit,
+                has_lora=has_lora,
+                model_size=model_size,
+                source=_detect_run_source(run_dir),
+            ))
 
     # Sort by timestamp (newest first)
     runs.sort(key=lambda r: r.timestamp, reverse=True)
@@ -299,15 +294,24 @@ def _detect_model_size(run_dir: Path) -> Optional[str]:
             with open(adapter_config) as f:
                 config = json.load(f)
             base_model = config.get("base_model_name_or_path", "")
-            # Extract size from model name
-            for size in ["3b", "7b", "13b", "20b", "70b"]:
-                if size in base_model.lower():
-                    return size.upper()
+            match = re.search(r"(\d+(?:\.\d+)?)\s*([bm])", base_model.lower())
+            if match:
+                return f"{match.group(1)}{match.group(2).upper()}"
         except Exception:
             pass
     return None
 
 
+def _detect_run_source(run_dir: Path) -> str:
+    """Identify whether a run came from local training or imported artifacts."""
+    parts = {part.lower() for part in run_dir.parts}
+    if "toolset-training-artifacts" in parts:
+        return "bucket_pull"
+    if "runs" in parts and "trainers" not in parts:
+        return "cloud_artifact"
+    return "local_training"
+
+
 def discover_huggingface_models() -> List[str]:
     """Return list of recommended base models from HuggingFace.
 
diff --git a/docker/bucket-helper/Dockerfile b/docker/bucket-helper/Dockerfile
new file mode 100644
index 00000000..ea0d99e9
--- /dev/null
+++ b/docker/bucket-helper/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /opt/bucket-helper
+
+COPY requirements.txt /opt/bucket-helper/requirements.txt
+
+RUN pip install --no-cache-dir -r /opt/bucket-helper/requirements.txt
+
+ENTRYPOINT ["python"]
diff --git a/docker/bucket-helper/requirements.txt b/docker/bucket-helper/requirements.txt
new file mode 100644
index 00000000..debda414
--- /dev/null
+++ b/docker/bucket-helper/requirements.txt
@@ -0,0 +1,3 @@
+huggingface_hub>=1.5.0,<2.0
+python-dotenv>=1.0.0,<2.0
+PyYAML>=6.0,<7.0
diff --git a/docs/plans/local-docker-runtime-plan.md b/docs/plans/local-docker-runtime-plan.md
new file mode 100644
index 00000000..23da4567
--- /dev/null
+++ b/docs/plans/local-docker-runtime-plan.md
@@ -0,0 +1,399 @@
+# Implementation Plan: Local Docker Runtime
+
+> Generated by `/PACT:plan-mode` on 2026-04-09
+> Status: PENDING APPROVAL
+
+<!-- Status Lifecycle:
+     PENDING APPROVAL → APPROVED → IN_PROGRESS → IMPLEMENTED
+                    ↘ SUPERSEDED (if replaced by newer plan)
+                    ↘ BLOCKED (if unresolved conflicts)
+
+     Transition Ownership:
+     - PENDING APPROVAL → APPROVED: User
+     - APPROVED → IN_PROGRESS: Orchestrator
+     - IN_PROGRESS → IMPLEMENTED: Orchestrator
+     - Any → SUPERSEDED: plan-mode
+     - Any → BLOCKED: plan-mode
+-->
+
+<!-- Forward Reference Convention:
+     When deferring work to a later phase, use:
+     "⚠️ Handled during {PHASE_NAME}"
+-->
+
+## Summary
+
+Introduce a first-class local Docker runtime for the repo so users can run the main GPU-heavy workflows through pinned container images instead of the fragile `unsloth_latest` conda environment. The target state is:
+
+- Docker-first local training using official `unsloth/unsloth` images
+- Docker-first local evaluation using either direct Unsloth or a dedicated `vllm/vllm-openai` container
+- host-side CLI orchestration that mounts the repo into containers and writes artifacts back into the existing workspace layout
+- canonical skill/docs updated to recommend Docker Desktop as the default local setup path for Windows users with NVIDIA GPUs
+
+This is not a proposal to containerize every lightweight repo task. The design goal is narrower and more pragmatic:
+
+- containerize GPU execution
+- keep orchestration and editing on the host
+- reduce dependency drift
+- align local and cloud runtime behavior
+
+---
+
+## Specialist Perspectives
+
+### 📋 Preparation Phase
+**Effort**: Medium
+
+#### Research Needed
+- [x] Current local environment setup path in `setup_env.ps1`, `setup_env.sh`, and `Trainers/activate_unsloth_latest.*`
+- [x] Current cloud image/profile abstraction in `Trainers/cloud/cloud_config.yaml` and `tuner/backends/training/cloud/base_cloud.py`
+- [x] Current local evaluation/inference backends in `tuner/handlers/eval_handler.py` and `tuner/handlers/inference_handler.py`
+- [x] Existing repo guidance around Docker and Unsloth images in `.skills/fine-tuning/*` and `docs/prep/*`
+- [x] Current machine state: Docker engine reachable, NVIDIA RTX 3090 available
+- [ ] Confirm best current pinned local training image tag for first rollout
+- [ ] Confirm best current pinned local vLLM image tag for first rollout
+
+#### Current State Findings
+- Local GPU execution is still centered on the `unsloth_latest` conda environment:
+  - `setup_env.ps1`
+  - `Trainers/activate_unsloth_latest.ps1`
+  - `tuner/utils/conda.py`
+- Cloud execution already has a mature image abstraction:
+  - training image profiles: `stable`, `next`
+  - eval image profiles: `stable_unsloth`, `latest_unsloth`, `fast_vllm`
+- The repo already assumes official Docker images are the clean dependency boundary for cloud workflows.
+- Local vLLM is not a first-class runtime yet; current inference guidance is still manual/server-centric.
+- Docker engine is available locally, but long `docker pull` operations need better observability so users can distinguish slow layers from a hung session.
+
+#### Key External References
+- Official Unsloth Docker image: `unsloth/unsloth`
+- Official vLLM Docker image: `vllm/vllm-openai`
+- Docker Desktop for Windows
+- NVIDIA Container Toolkit / Docker GPU support docs
+
+#### Questions to Resolve
+- [ ] Should Docker become the default recommendation for all local GPU users, while conda remains fallback?
+- [ ] Should the first local Docker rollout reuse cloud image profiles directly or introduce a separate `local_image_profiles` section?
+- [ ] Should local Docker training support both interactive shell access and one-shot command execution in v1?
+- [ ] Should local eval default to direct Unsloth or vLLM when both are available?
+- [ ] Should local Docker support WSL path mounts explicitly, or standardize on Windows host mounts first?
+
+---
+
+### 🏗️ Architecture Phase
+**Effort**: High
+
+#### Components Affected
+
+| Component | Change Type | Impact |
+|----------|-------------|--------|
+| `tuner/cli/parser.py` | Modify | Add local Docker runtime/provider flags and commands |
+| `tuner/cli/router.py` | Modify | Route new local Docker flows |
+| `tuner/handlers/train_handler.py` | Modify | Add Docker-backed local training path |
+| `tuner/handlers/eval_handler.py` | Modify | Add Docker-backed local evaluation backend selection |
+| `tuner/handlers/inference_handler.py` | Modify | Replace manual vLLM guidance with managed Docker path |
+| `tuner/utils/conda.py` | Modify later | Demote conda from default local GPU path |
+| `Trainers/cloud/cloud_config.yaml` | Modify | Add reusable local image/runtime config |
+| `shared/` local runtime helpers | New | Common Docker command assembly, mount handling, log streaming |
+| `.skills/fine-tuning/SKILL.md` | Modify | Recommend Docker-first local setup |
+| `.agents/skills/fine-tuning/SKILL.md` | Sync | Mirror canonical skill |
+| `.claude/skills/fine-tuning/SKILL.md` | Sync | Mirror canonical skill |
+| `docs/prep/*` | Modify selectively | Update installation guidance to prefer Docker |
+
+#### Design Approach
+
+The repo should treat local Docker as a new local execution runtime, not as a special case hacked into cloud code.
+
+Core split:
+
+1. **Host orchestration**
+- CLI argument parsing
+- config resolution
+- workspace path selection
+- output directory management
+- user messaging
+
+2. **Container execution**
+- training stack inside `unsloth/unsloth:*`
+- direct LoRA eval inside `unsloth/unsloth:*`
+- vLLM serving/eval inside `vllm/vllm-openai:*`
+
+3. **Shared contracts**
+- image profile resolution
+- mounted workspace path contract
+- canonical output directories
+- structured logs and smoke-test health checks
+
+#### Core Principle: Containerize GPU Execution, Not Everything
+
+The repo should not force every operation through Docker. The goal is to containerize the unstable, GPU-heavy dependency surface:
+
+- `torch`
+- CUDA userspace
+- `transformers`
+- `trl`
+- `peft`
+- `unsloth`
+- `vllm`
+
+Keep lightweight operations on the host:
+
+- editing configs
+- generating plan/docs
+- orchestration logic
+- artifact inspection
+- non-GPU tests
+
+#### Runtime Model
+
+```text
+Host CLI
+  ├── resolve config + image profile
+  ├── prepare bind mounts / output dirs
+  ├── launch Docker container
+  ├── stream logs with plain progress
+  └── persist outputs to repo workspace
+
+Docker Runtime
+  ├── training image: unsloth/unsloth:<pinned-tag>
+  │     └── run repo trainer entrypoints against mounted workspace
+  └── eval image: vllm/vllm-openai:<pinned-tag> or unsloth/unsloth:<pinned-tag>
+        ├── start managed local server if needed
+        └── run evaluator against local endpoint or direct runtime
+```
+
+#### Key Decisions
+
+| Decision | Options | Recommendation | Rationale |
+|----------|---------|----------------|-----------|
+| Local execution default | A) Conda B) Docker C) Hybrid | **C) Hybrid, Docker-first** | Lowest migration risk, best dependency control |
+| Image source | A) Custom repo Dockerfiles B) Official upstream images C) Mixed | **B) Official upstream images first** | Lower maintenance, already used in cloud |
+| Config reuse | A) Separate local config B) Reuse cloud profiles C) Full unification | **B) Reuse cloud profiles with local overrides** | Avoid drift while keeping local-specific knobs |
+| Training runtime | A) Unsloth image B) vLLM image C) Host Python | **A) Unsloth image** | Matches trainer stack |
+| Fast eval runtime | A) Unsloth direct B) vLLM only C) Both | **C) Both** | Stable fallback plus fast path |
+| Docker recommendation | A) Optional docs only B) Default docs path C) Mandatory | **B) Default docs path** | Strong recommendation without blocking edge cases |
+| Pull observability | A) Default docker output B) Plain progress + health checks C) Silent background | **B) Plain progress + health checks** | Users need to see real progress |
+
+#### Interface Contracts
+
+**1. Local runtime selection**
+```text
+python tuner.py train --runtime docker ...
+python tuner.py eval --runtime docker ...
+python tuner.py infer --runtime docker --backend vllm ...
+```
+
+**2. Image profile resolution**
+- Reuse `cloud_config.yaml` image-profile semantics
+- Allow local commands to reference named profiles rather than raw tags
+
+**3. Mount contract**
+- Repo root mounted read/write into container
+- model cache mounted separately if needed
+- outputs written back into canonical repo-relative paths
+
+**4. Logging contract**
+- Pulls and runs must stream plain progress
+- CLI should detect common stuck states:
+  - waiting for image pull
+  - no GPU visible in container
+  - image downloaded but container failed immediately
+  - vLLM server boot timeout
+
+---
+
+### 💻 Code Phase
+**Effort**: High
+
+#### Files to Modify
+
+| File | Changes |
+|------|---------|
+| `tuner/cli/parser.py` | Add local Docker flags / commands |
+| `tuner/cli/router.py` | Route local Docker flows |
+| `tuner/handlers/train_handler.py` | Support Docker runtime for local training |
+| `tuner/handlers/eval_handler.py` | Add Docker-backed local eval path |
+| `tuner/handlers/inference_handler.py` | Add managed local Docker vLLM flow |
+| `Trainers/cloud/cloud_config.yaml` | Add local image/runtime config knobs |
+| `.skills/fine-tuning/SKILL.md` | Update local setup guidance |
+| `docs/prep/README.md` | Promote Docker-first path |
+| `docs/prep/UNSLOTH_WINDOWS_INSTALLATION_GUIDE.md` | Align with new repo-standard runtime |
+
+#### Files to Create
+
+| File | Purpose |
+|------|---------|
+| `tuner/utils/docker_runtime.py` | Shared Docker command construction and process handling |
+| `tuner/handlers/local_docker_handler.py` or equivalent helper | Centralize local Docker orchestration |
+| `scripts/docker/` helpers as needed | Smoke tests, health checks, or wrapper entrypoints |
+
+#### Implementation Sequence
+1. Add a small Docker utility layer for process execution, mounts, GPU args, and log streaming.
+2. Add a local Docker smoke command for Unsloth image viability.
+3. Add a local Docker smoke command for vLLM image viability.
+4. Extend local train flow to run trainers through Docker.
+5. Extend local eval flow to run direct Unsloth eval through Docker.
+6. Extend local inference/eval flow to support managed vLLM Docker server lifecycle.
+7. Update canonical skill and prep docs.
+8. Sync skill trees with `.skills/scripts/sync_skill_trees.py`.
+
+#### Critical Note: Do Not Collapse Cloud and Local Handlers Prematurely
+Cloud and local both use Docker images, but they do not have the same lifecycle:
+
+- cloud launches remote jobs against provider SDKs
+- local launches host-managed Docker processes with bind mounts
+
+Shared pieces should be:
+
+- image profile resolution
+- dependency/runtime policy
+- helper command fragments where appropriate
+
+Not shared:
+
+- provider launch orchestration
+- secrets injection semantics
+- artifact transport semantics
+
+#### Critical Note: Windows Path Handling
+The first local Docker implementation must treat Windows path mounting and log rendering as first-class concerns. A design that only works cleanly from Linux-style repo paths is not sufficient for this repo's current local usage.
+
+---
+
+### 🧪 Test Phase
+**Effort**: Medium-High
+
+#### Test Scenarios
+
+| Scenario | Type | Priority |
+|----------|------|----------|
+| Image profile resolves correctly for local Docker runtime | Unit | P0 |
+| Docker command builder mounts repo paths correctly on Windows | Unit | P0 |
+| Local training runtime selects Unsloth image | Unit | P0 |
+| Local eval runtime selects Unsloth or vLLM image correctly | Unit | P0 |
+| Docker log streamer surfaces pull/run progress clearly | Unit | P0 |
+| Managed local vLLM startup waits for health before eval | Unit | P0 |
+| Unsloth image smoke test imports expected modules | Manual | P0 |
+| vLLM image smoke test starts server with GPU visible | Manual | P0 |
+| Local train dry-run writes outputs into mounted workspace | Manual | P1 |
+| Local eval against adapter path works through Docker | Manual | P1 |
+
+#### Coverage Targets
+- New Docker utility layer: 90%+
+- New local runtime branch logic: 85%+
+- Preserve current handler coverage for non-Docker paths
+
+#### Manual Smoke Test Goals
+- `docker run --gpus all ... nvidia-smi` succeeds
+- Unsloth image can import `unsloth`, `transformers`, and trainer dependencies
+- vLLM image can boot OpenAI-compatible server locally
+- repo-mounted trainer/evaluator commands can execute without path or permission failures
+
+---
+
+## Synthesized Implementation Roadmap
+
+### Phase Sequence
+
+```text
+PREPARE
+  ├── lock first pinned image tags
+  ├── validate Docker Desktop + GPU assumptions
+  └── define local runtime CLI surface
+      ↓
+ARCHITECT
+  ├── finalize host/container boundary
+  ├── finalize config reuse strategy
+  └── finalize logging/health contracts
+      ↓
+CODE
+  ├── docker utility layer
+  ├── local smoke commands
+  ├── local train runtime
+  ├── local eval runtime
+  ├── local vLLM runtime
+  └── docs + skill updates
+      ↓
+TEST
+  ├── unit coverage for command builders and config
+  ├── manual image smoke tests
+  └── end-to-end dry-run validation
+```
+
+### Proposed Commit Sequence
+1. `feat(local-docker): add shared docker runtime utilities`
+2. `feat(local-docker): add local unsloth smoke runtime`
+3. `feat(local-docker): add local docker training path`
+4. `feat(local-docker): add local docker eval and vllm runtime`
+5. `docs(local-docker): make docker-first local setup the default guidance`
+
+---
+
+## Cross-Cutting Concerns
+
+| Concern | Status | Notes |
+|---------|--------|-------|
+| Dependency stability | Strong upside | Main reason for this effort |
+| Windows UX | Needs attention | Mount paths and logs must be excellent |
+| GPU visibility | Needs explicit checks | Must fail fast if Docker GPU support is broken |
+| Disk usage | Significant | Large images and model caches need planning |
+| Startup latency | Significant | Pull time and cold boot should be made visible |
+| Observability | Critical | Users need better progress than silent long-running pulls |
+| Security | Moderate | Local Docker is lower risk than remote, but mounted secrets still matter |
+
+---
+
+## Open Questions
+
+### Require User Decision
+- [ ] Should Docker-first local runtime become the default recommendation immediately, or ship first as an opt-in beta?
+- [ ] Should conda remain documented as a full fallback path or only as an escape hatch for maintainers?
+
+### Require Further Research
+- [ ] Which exact pinned Unsloth image tag should be the first local-default training image?
+- [ ] Which exact pinned vLLM image tag should be the first local-default fast-eval image?
+- [ ] Whether Windows host mounts into Docker Desktop are performant enough for full training from `F:\Code\Toolset-Training`, or whether WSL-path mounting should become the recommended path
+- [ ] Best local model cache mount strategy to avoid repeated downloads across containers
+- [ ] Best CLI pattern for interactive shell vs one-shot execution in containers
+
+---
+
+## Risk Assessment
+
+| Risk | Likelihood | Impact | Mitigation |
+|------|------------|--------|------------|
+| Docker GPU runtime works inconsistently across user machines | Medium | High | Add explicit smoke command and fail-fast diagnostics |
+| Large image pulls feel hung or get interrupted | High | Medium | Force plain progress output and add guidance about expected size/time |
+| Windows bind-mount performance is poor for training | Medium | High | Benchmark; recommend WSL-backed mounts if needed |
+| Local Docker logic duplicates too much cloud logic | Medium | Medium | Share only image/profile resolution and helper utilities |
+| vLLM local lifecycle is flaky | Medium | Medium | Keep direct Unsloth eval as fallback |
+| Docs overpromise “Docker solves everything” | Medium | Medium | Frame Docker as dependency control, not total infra elimination |
+
+---
+
+## Scope Assessment
+
+### In Scope
+- First-class local Docker runtime for GPU-heavy repo workflows
+- Local training via Unsloth container
+- Local direct-adapter eval via Unsloth container
+- Local fast eval / serving via vLLM container
+- Canonical skill/docs update to prefer Docker-first setup
+- Better pull/runtime logs and smoke diagnostics
+
+### Out of Scope
+- Replacing cloud providers with local Docker
+- Rewriting trainer logic to be Docker-native internally
+- Eliminating all host Python paths
+- Building and maintaining custom training images in v1
+- Full Kubernetes or multi-service local orchestration
+
+---
+
+## Success Criteria
+
+- A Windows user with Docker Desktop and a supported NVIDIA GPU can run a repo-provided local smoke command and verify Unsloth container viability without touching conda.
+- A Windows user can run local training and local eval through repo CLI commands that use Docker under the hood.
+- A Windows user can start a managed local vLLM container for eval/inference without assembling manual commands.
+- Canonical skills/docs recommend Docker as the default local GPU runtime.
+- Long-running pulls and container startups provide enough progress detail that “slow” is visibly different from “stuck.”
diff --git a/shared/utilities/bucket_artifacts.py b/shared/utilities/bucket_artifacts.py
index 8c7bd6cc..84b3df8c 100644
--- a/shared/utilities/bucket_artifacts.py
+++ b/shared/utilities/bucket_artifacts.py
@@ -7,15 +7,63 @@
 import json
 import shutil
 from pathlib import Path
+from posixpath import normpath
 from typing import Any, Iterable, TextIO
 
 from huggingface_hub import HfFileSystem
-from huggingface_hub import sync_bucket
 
-from shared.cloud_artifacts import sync_directory_to_hf_bucket, sync_file_to_hf_bucket
 from shared.utilities.env import get_hf_token
 
 
+def _load_sync_bucket():
+    """Import sync_bucket lazily so list/read still work on older Hub builds."""
+    try:
+        from huggingface_hub import sync_bucket as _sync_bucket
+    except ImportError:
+        return None
+    return _sync_bucket
+
+
+def _strip_hf_scheme(path: str) -> str:
+    normalized = str(path or "").strip()
+    if normalized.startswith("hf://"):
+        return normalized[len("hf://") :]
+    return normalized
+
+
+def _hf_relative_child(root: str, child: str) -> str:
+    root_norm = normpath(_strip_hf_scheme(root)).rstrip("/")
+    child_norm = normpath(_strip_hf_scheme(child)).rstrip("/")
+    prefix = f"{root_norm}/"
+    if child_norm.startswith(prefix):
+        return child_norm[len(prefix) :]
+    return Path(child_norm).name
+
+
+def _copy_hf_tree(fs: HfFileSystem, artifact_path: str, target: Path) -> None:
+    """Fallback pull implementation when sync_bucket is unavailable."""
+    info = fs.info(artifact_path)
+    entry_type = info.get("type", "file")
+
+    if entry_type == "directory":
+        target.mkdir(parents=True, exist_ok=True)
+        raw_entries = fs.find(artifact_path, detail=True)
+        iterator = raw_entries.items() if isinstance(raw_entries, dict) else []
+        for remote_path, details in iterator:
+            if details.get("type") == "directory":
+                continue
+            relative = _hf_relative_child(artifact_path, remote_path)
+            destination = target / relative
+            destination.parent.mkdir(parents=True, exist_ok=True)
+            with fs.open(remote_path, "rb") as src, open(destination, "wb") as dst:
+                shutil.copyfileobj(src, dst)
+        return
+
+    target.parent.mkdir(parents=True, exist_ok=True)
+    with fs.open(artifact_path, "rb") as src, open(target, "wb") as dst:
+        shutil.copyfileobj(src, dst)
+
+
 def build_artifact_path(path: str, *, bucket_id: str | None = None) -> str:
     """Return a local path or fully-qualified HF bucket URI."""
     normalized = str(path or "").strip()
@@ -34,8 +82,17 @@ def _artifact_relative_path(path: str, *, bucket_id: str | None = None) -> Path:
     artifact_path = build_artifact_path(path, bucket_id=bucket_id)
     if artifact_path.startswith("hf://buckets/"):
         remainder = artifact_path[len("hf://buckets/") :]
-        parts = remainder.split("/", 1)
-        relative = parts[1] if len(parts) > 1 else ""
+        normalized_bucket = str(bucket_id or "").strip("/")
+        if normalized_bucket and remainder.startswith(f"{normalized_bucket}/"):
+            relative = remainder[len(normalized_bucket) + 1 :]
+        else:
+            parts = remainder.split("/")
+            if len(parts) >= 3:
+                relative = "/".join(parts[2:])
+            elif len(parts) >= 2:
+                relative = "/".join(parts[1:])
+            else:
+                relative = ""
         return Path(relative)
     local_path = Path(path)
     if local_path.is_absolute():
@@ -178,7 +235,12 @@ def pull_artifacts(
     target.parent.mkdir(parents=True, exist_ok=True)
 
     if artifact_path.startswith("hf://"):
-        sync_bucket(artifact_path, str(target), token=get_hf_token())
+        sync_bucket = _load_sync_bucket()
+        if sync_bucket is not None:
+            sync_bucket(artifact_path, str(target), token=get_hf_token())
+        else:
+            fs = HfFileSystem(token=get_hf_token())
+            _copy_hf_tree(fs, artifact_path, target)
         return target
 
     source = Path(artifact_path)
@@ -226,6 +288,8 @@ def push_artifacts(
     bucket_id: str,
     destination: str | None = None,
 ) -> str:
+    from shared.cloud_artifacts import sync_directory_to_hf_bucket, sync_file_to_hf_bucket
+
     source = Path(path).resolve()
     if not source.exists():
         raise FileNotFoundError(str(source))
diff --git a/shared/utilities/env.py b/shared/utilities/env.py
index c8a3ea18..1d7febd6 100644
--- a/shared/utilities/env.py
+++ b/shared/utilities/env.py
@@ -73,6 +73,14 @@ def get_hf_token() -> Optional[str]:
     Returns:
         HuggingFace token or None
     """
+    for key in ("HF_TOKEN", "HF_API_KEY"):
+        value = os.environ.get(key)
+        if value is None:
+            continue
+        value = value.strip()
+        if value:
+            return value
+    load_env_file()
     for key in ("HF_TOKEN", "HF_API_KEY"):
         value = os.environ.get(key)
         if value is None:
diff --git a/shared/utilities/paths.py b/shared/utilities/paths.py
index 7358d4fc..2deff751 100644
--- a/shared/utilities/paths.py
+++ b/shared/utilities/paths.py
@@ -15,6 +15,10 @@
 
 CANONICAL_OUTPUT_DIRS = {method: f"{method}_output" for method in TRAINING_METHODS}
 LEGACY_OUTPUT_DIRS = {method: f"{method}_output_rtx3090" for method in TRAINING_METHODS}
+LOCAL_ARTIFACT_RUN_ROOTS = (
+    Path("toolset-training-artifacts") / "runs",
+    Path("runs"),
+)
 
 
 def get_project_root() -> Path:
@@ -135,6 +139,84 @@ def iter_training_output_dirs(method: str, repo_root: Optional[Path] = None) ->
     return [preferred_trainer_dir / get_canonical_output_dir_name(normalized)]
 
 
+def get_local_artifact_run_roots(repo_root: Optional[Path] = None) -> list[Path]:
+    """
+    Return repo-local roots that may contain pulled cloud training artifacts.
+
+    These roots match the relative path preserved by ``python tuner.py bucket pull``.
+    """
+    root = repo_root or get_project_root()
+    candidates: list[Path] = []
+    for relative_root in LOCAL_ARTIFACT_RUN_ROOTS:
+        candidate = root / relative_root
+        if candidate not in candidates:
+            candidates.append(candidate)
+    return candidates
+
+
+def iter_imported_training_run_dirs(method: str, repo_root: Optional[Path] = None) -> list[Path]:
+    """
+    Return imported cloud run directories for a method.
+
+    Expected artifact layout:
+        <root>/runs/<provider>/<method>/<run_id>/
+    where ``<root>`` is either ``toolset-training-artifacts`` or the repo root.
+    """
+    normalized = normalize_trainer_method(method)
+    runs: list[Path] = []
+    seen: set[Path] = set()
+
+    for runs_root in get_local_artifact_run_roots(repo_root):
+        if not runs_root.exists():
+            continue
+
+        for provider_dir in runs_root.iterdir():
+            if not provider_dir.is_dir():
+                continue
+
+            method_dir = provider_dir / normalized
+            if not method_dir.exists():
+                continue
+
+            for run_dir in method_dir.iterdir():
+                if run_dir.is_dir():
+                    resolved = run_dir.resolve()
+                    if resolved not in seen:
+                        seen.add(resolved)
+                        runs.append(run_dir)
+
+    return runs
+
+
+def iter_training_run_dirs(method: str, repo_root: Optional[Path] = None) -> list[Path]:
+    """
+    Return all locally discoverable run directories for a method.
+
+    This includes native trainer outputs and imported cloud/bucket artifacts.
+    """
+    normalized = normalize_trainer_method(method)
+    runs: list[Path] = []
+    seen: set[Path] = set()
+
+    for output_dir in iter_training_output_dirs(normalized, repo_root):
+        if not output_dir.exists():
+            continue
+        for run_dir in output_dir.iterdir():
+            if run_dir.is_dir():
+                resolved = run_dir.resolve()
+                if resolved not in seen:
+                    seen.add(resolved)
+                    runs.append(run_dir)
+
+    for run_dir in iter_imported_training_run_dirs(normalized, repo_root):
+        resolved = run_dir.resolve()
+        if resolved not in seen:
+            seen.add(resolved)
+            runs.append(run_dir)
+
+    return runs
+
+
 def get_primary_training_output_dir(method: str, repo_root: Optional[Path] = None) -> Path:
     """
     Get the preferred output directory for new runs of a method.
diff --git a/tuner/backends/evaluation/unsloth_backend.py b/tuner/backends/evaluation/unsloth_backend.py
index dee0d5c7..f7a982bb 100644
--- a/tuner/backends/evaluation/unsloth_backend.py
+++ b/tuner/backends/evaluation/unsloth_backend.py
@@ -18,7 +18,7 @@
 from pathlib import Path
 from typing import List, Optional, Tuple
 
-from shared.utilities.paths import iter_training_output_dirs
+from shared.utilities.paths import iter_training_run_dirs
 from .base import IEvaluationBackend
 
 
@@ -62,12 +62,10 @@ def list_models(self) -> List[str]:
         models = []
 
         for method in ("sft", "kto", "grpo"):
-            for output_dir in iter_training_output_dirs(method, self._repo_root):
-                if not output_dir.exists():
-                    continue
-
-                for adapter_config in output_dir.rglob("final_model/adapter_config.json"):
-                    adapter_dir = adapter_config.parent
+            for run_dir in iter_training_run_dirs(method, self._repo_root):
+                adapter_dir = run_dir / "final_model"
+                adapter_config = adapter_dir / "adapter_config.json"
+                if adapter_config.exists():
                     models.append(str(adapter_dir.resolve()))
 
         # Sort by modification time (newest first)
@@ -131,13 +129,8 @@ def get_model_info(self, adapter_path: str) -> dict:
             size_mb = round(adapter_file.stat().st_size / (1024 ** 2), 1)
 
         # Detect trainer type from path
-        trainer_type = "unknown"
-        if "sft_output" in str(path):
-            trainer_type = "sft"
-        elif "kto_output" in str(path):
-            trainer_type = "kto"
-        elif "grpo_output" in str(path):
-            trainer_type = "grpo"
+        trainer_type = self._detect_trainer_type(path)
+        source = self._detect_source(path)
 
         # Extract run timestamp from parent directory
         timestamp = path.parent.name if path.parent else "unknown"
@@ -153,7 +146,31 @@ def get_model_info(self, adapter_path: str) -> dict:
             "base_model_short": base_model_short,
             "size_mb": size_mb,
             "trainer_type": trainer_type,
+            "source": source,
             "timestamp": timestamp,
             "r": config.get("r"),  # LoRA rank
             "lora_alpha": config.get("lora_alpha"),
         }
+
+    @staticmethod
+    def _detect_trainer_type(path: Path) -> str:
+        parts = [part.lower() for part in path.parts]
+        markers = {
+            "sft": {"sft_output", "sft_output_rtx3090", "rtx3090_sft", "sft"},
+            "kto": {"kto_output", "kto_output_rtx3090", "rtx3090_kto", "kto"},
+            "grpo": {"grpo_output", "grpo_output_rtx3090", "rtx3090_grpo", "grpo"},
+        }
+        for trainer_type, candidates in markers.items():
+            if any(candidate in parts for candidate in candidates):
+                if trainer_type in {"sft", "kto", "grpo"}:
+                    return trainer_type
+        return "unknown"
+
+    @staticmethod
+    def _detect_source(path: Path) -> str:
+        parts = {part.lower() for part in path.parts}
+        if "toolset-training-artifacts" in parts:
+            return "bucket_pull"
+        if "runs" in parts and "trainers" not in parts:
+            return "cloud_artifact"
+        return "local_training"
diff --git a/tuner/cli/main.py b/tuner/cli/main.py
index 13c22adc..3deefcaa 100644
--- a/tuner/cli/main.py
+++ b/tuner/cli/main.py
@@ -7,12 +7,20 @@
 """
 
 import sys
+import io
 from pathlib import Path
 from tuner.utils import load_env_file
 from .parser import create_parser
 from .router import route_command
 
 
+if sys.platform == "win32":
+    if hasattr(sys.stdout, "buffer"):
+        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
+    if hasattr(sys.stderr, "buffer"):
+        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8")
+
+
 def main():
     """
     Main CLI entry point.
diff --git a/tuner/cli/parser.py b/tuner/cli/parser.py
index 5aff3805..90f828d9 100644
--- a/tuner/cli/parser.py
+++ b/tuner/cli/parser.py
@@ -67,6 +67,7 @@ def create_parser() -> argparse.ArgumentParser:
 Commands:
   (none)      Interactive menu
   train       Training workflow (SFT, KTO, GRPO)
+  docker      Local Docker runtime helper (status, bootstrap, pull, smoke, build)
   cloud       Cloud training (HF Jobs, Modal, RunPod)
   cloud-run   Config-driven HF cloud job
   cloud-jobs  Inspect or manage live HF Jobs
@@ -109,6 +110,11 @@ def create_parser() -> argparse.ArgumentParser:
 Examples:
   python tuner.py              # Interactive mode
   python tuner.py train        # Go directly to training
+  python tuner.py docker status
+  python tuner.py docker bootstrap --docker-target all
+  python tuner.py docker build --docker-target bucket
+  python tuner.py docker pull --docker-target unsloth
+  python tuner.py docker smoke --docker-target vllm
   python tuner.py cloud        # Cloud training submenu
   python tuner.py eval         # Go directly to evaluation
   python tuner.py synthchat    # Generate or improve data
@@ -138,7 +144,7 @@ def create_parser() -> argparse.ArgumentParser:
     parser.add_argument(
         "command",
         nargs="?",
-        choices=["train", "cloud", "cloud-run", "cloud-jobs", "plan-hardware", "cloud-pipeline", "cloud-eval", "cloud-gym", "cloud-inspect", "bucket", "run-experiment", "analyze-experiment", "eval", "synthchat", "modelops", "ml", "flywheel", "experiment-loop", "surgery", "status", "doctor", "list", "list-runs", "compute-losses", "compare-runs", "judge-sample", "create-experiment", "cloud-compare", "download-experiment"],
+        choices=["train", "docker", "cloud", "cloud-run", "cloud-jobs", "plan-hardware", "cloud-pipeline", "cloud-eval", "cloud-gym", "cloud-inspect", "bucket", "run-experiment", "analyze-experiment", "eval", "synthchat", "modelops", "ml", "flywheel", "experiment-loop", "surgery", "status", "doctor", "list", "list-runs", "compute-losses", "compare-runs", "judge-sample", "create-experiment", "cloud-compare", "download-experiment"],
         help="Command to run (optional, defaults to interactive menu)"
     )
 
@@ -150,7 +156,7 @@ def create_parser() -> argparse.ArgumentParser:
         "subcommand",
         nargs="?",
         default=None,
-        help="Sub-command (e.g., 'datasets' for list, 'train' for ml)"
+        help="Sub-command (e.g., 'datasets' for list, 'train' for ml, 'bootstrap' for docker)"
     )
 
     # Global flags
@@ -166,6 +172,12 @@ def create_parser() -> argparse.ArgumentParser:
         dest="auto_confirm",
         help="Skip confirmation prompts for non-interactive command execution",
     )
+    parser.add_argument(
+        "--runtime",
+        choices=["native", "docker"],
+        default="native",
+        help="Local runtime for train/eval flows. Use 'docker' to run on Docker Desktop instead of the local conda environment."
+    )
 
     # Doctor-specific flags
     parser.add_argument(
@@ -207,6 +219,24 @@ def create_parser() -> argparse.ArgumentParser:
         help="Path to flywheel config YAML (flywheel commands only)"
     )
 
+    # Local Docker runtime flags
+    parser.add_argument(
+        "--docker-target",
+        choices=["unsloth", "vllm", "bucket", "all"],
+        dest="docker_target",
+        help="Docker runtime target for 'docker' command (unsloth, vllm, bucket, or all)"
+    )
+    parser.add_argument(
+        "--docker-image",
+        dest="docker_image",
+        help="Explicit Docker image override for local Docker flows ('docker', 'train --runtime docker', 'eval --runtime docker')"
+    )
+    parser.add_argument(
+        "--docker-profile",
+        dest="docker_profile",
+        help="Named Docker image profile from Trainers/cloud/cloud_config.yaml for local Docker flows"
+    )
+
     # Surgery-specific flags
     parser.add_argument(
         "--surgery-config",
diff --git a/tuner/cli/router.py b/tuner/cli/router.py
index c45b0a57..ad237166 100644
--- a/tuner/cli/router.py
+++ b/tuner/cli/router.py
@@ -68,21 +68,56 @@ def route_command(args: Namespace) -> int:
     """
     # Check for JSON mode - affects error output
     json_mode = getattr(args, 'json', False)
+    command = getattr(args, 'command', None)
+
+    # Special-case Docker helper so unrelated cloud import failures do not block it.
+    if command == 'docker':
+        try:
+            from tuner.handlers.docker_handler import DockerHandler
+        except ImportError as e:
+            error_msg = f"Handlers not yet implemented: {e}"
+            if json_mode:
+                output = {
+                    "success": False,
+                    "error": {
+                        "message": error_msg,
+                        "code": "HANDLER_IMPORT_ERROR",
+                    },
+                    "timestamp": datetime.now().isoformat()
+                }
+                print(json.dumps(output, indent=2))
+            else:
+                print(f"Error: {error_msg}")
+                print("This is expected during migration. Please use tuner_legacy.py instead.")
+            return 1
+        handler = DockerHandler(args=args)
+        return handler.handle()
+
+    if command == 'bucket':
+        try:
+            from tuner.handlers.bucket_handler import BucketHandler
+        except ImportError as e:
+            error_msg = f"Handlers not yet implemented: {e}"
+            if json_mode:
+                output = {
+                    "success": False,
+                    "error": {
+                        "message": error_msg,
+                        "code": "HANDLER_IMPORT_ERROR",
+                    },
+                    "timestamp": datetime.now().isoformat()
+                }
+                print(json.dumps(output, indent=2))
+            else:
+                print(f"Error: {error_msg}")
+            return 1
+        handler = BucketHandler(args=args)
+        return handler.handle()
 
-    # Import handlers (deferred to avoid circular imports)
+    # Import local handlers first so cloud dependency drift does not block local commands.
     try:
         from tuner.handlers.train_handler import TrainHandler
         from tuner.handlers.eval_handler import EvalHandler
-        from tuner.handlers.cloud_pipeline_handler import CloudPipelineHandler
-        from tuner.handlers.hardware_plan_handler import HardwarePlanHandler
-        from tuner.handlers.cloud_eval_handler import CloudEvalHandler
-        from tuner.handlers.cloud_inspect_handler import CloudInspectHandler
-        from tuner.handlers.cloud_jobs_handler import CloudJobsHandler
-        from tuner.handlers.cloud_gym_handler import CloudGymHandler
-        from tuner.handlers.cloud_run_handler import CloudRunHandler
-        from tuner.handlers.bucket_handler import BucketHandler
-        from tuner.handlers.experiment_handler import ExperimentHandler
-        from tuner.handlers.experiment_analysis_handler import ExperimentAnalysisHandler
         from tuner.handlers.synthchat_handler import SynthChatHandler
         from tuner.handlers.modelops_handler import ModelOpsHandler
         from tuner.handlers.ml_handler import MLHandler
@@ -93,7 +128,6 @@ def route_command(args: Namespace) -> int:
         from tuner.handlers.flywheel_handler import FlywheelHandler
         from tuner.handlers.surgery_handler import SurgeryHandler
     except ImportError as e:
-        # Graceful degradation if handlers not yet implemented
         error_msg = f"Handlers not yet implemented: {e}"
         if json_mode:
             output = {
@@ -110,16 +144,13 @@ def route_command(args: Namespace) -> int:
             print("This is expected during migration. Please use tuner_legacy.py instead.")
         return 1
 
-    # Get command from args
-    command = getattr(args, 'command', None)
-
     # JSON mode without command is an error (interactive menu needs input)
     # Exception: status, doctor, and list commands work in JSON mode
     if json_mode and not command:
         output = {
             "success": False,
             "error": {
-                "message": "JSON mode requires a command (train, cloud, cloud-run, cloud-jobs, plan-hardware, cloud-pipeline, cloud-eval, cloud-gym, cloud-inspect, bucket, run-experiment, analyze-experiment, eval, synthchat, modelops, ml, flywheel, surgery, status, doctor, list)",
+                "message": "JSON mode requires a command (train, cloud, cloud-run, cloud-jobs, plan-hardware, cloud-pipeline, cloud-eval, cloud-gym, cloud-inspect, bucket, run-experiment, analyze-experiment, eval, synthchat, modelops, ml, flywheel, docker, surgery, status, doctor, list)",
                 "code": "COMMAND_REQUIRED",
             },
             "timestamp": datetime.now().isoformat()
@@ -151,27 +182,22 @@ def route_command(args: Namespace) -> int:
     # Special handling for ml command (has subcommand and --config)
     if command == 'ml':
         ml_sub = getattr(args, 'subcommand', None)
-        # Map the generic subcommand to ml_subcommand for the handler
         if args is not None:
             args.ml_subcommand = ml_sub
         handler = MLHandler(args=args)
         return handler.handle()
 
-    # Special handling for flywheel command (has subcommand)
     if command == 'flywheel':
         handler = FlywheelHandler(args=args)
         return handler.handle()
 
-    # Autonomous experiment loop
     if command == 'experiment-loop':
         return _handle_experiment_loop(args, json_mode)
 
-    # Surgery command
     if command == 'surgery':
         handler = SurgeryHandler(args=args)
         return handler.handle()
 
-    # Experiment pipeline
     if command == 'compare-runs':
         import subprocess
         import sys
@@ -197,43 +223,102 @@ def route_command(args: Namespace) -> int:
         print(f"Created experiment: {exp.experiment_id}")
         return 0
 
-    # Import cloud handler (conditional - may not have deps)
-    try:
-        from tuner.handlers.cloud_train_handler import CloudTrainHandler
-    except ImportError:
-        CloudTrainHandler = None
-
-    # Map commands to handlers
-    handlers = {
+    local_handlers = {
         'train': TrainHandler,
-        'cloud-pipeline': CloudPipelineHandler,
-        'cloud-run': CloudRunHandler,
-        'cloud-jobs': CloudJobsHandler,
-        'plan-hardware': HardwarePlanHandler,
         'eval': EvalHandler,
-        'cloud-eval': CloudEvalHandler,
-        'cloud-gym': CloudGymHandler,
-        'cloud-inspect': CloudInspectHandler,
-        'bucket': BucketHandler,
-        'run-experiment': ExperimentHandler,
-        'analyze-experiment': ExperimentAnalysisHandler,
         'synthchat': SynthChatHandler,
         'modelops': ModelOpsHandler,
         'ml': MLHandler,
     }
-    if CloudTrainHandler is not None:
-        handlers['cloud'] = CloudTrainHandler
 
-    # Execute handler with args
-    if command and command in handlers:
-        handler_class = handlers[command]
+    if command and command in local_handlers:
+        handler_class = local_handlers[command]
         handler = handler_class(args=args)
         return handler.handle()
-    else:
-        # No command = interactive menu
-        handler = MainMenuHandler(args=args)
+
+    cloud_commands = {
+        'cloud',
+        'cloud-pipeline',
+        'cloud-run',
+        'cloud-jobs',
+        'plan-hardware',
+        'cloud-eval',
+        'cloud-gym',
+        'cloud-inspect',
+        'run-experiment',
+        'analyze-experiment',
+    }
+
+    if command in cloud_commands:
+        try:
+            from tuner.handlers.cloud_pipeline_handler import CloudPipelineHandler
+            from tuner.handlers.hardware_plan_handler import HardwarePlanHandler
+            from tuner.handlers.cloud_eval_handler import CloudEvalHandler
+            from tuner.handlers.cloud_inspect_handler import CloudInspectHandler
+            from tuner.handlers.cloud_jobs_handler import CloudJobsHandler
+            from tuner.handlers.cloud_gym_handler import CloudGymHandler
+            from tuner.handlers.cloud_run_handler import CloudRunHandler
+            from tuner.handlers.experiment_handler import ExperimentHandler
+            from tuner.handlers.experiment_analysis_handler import ExperimentAnalysisHandler
+
+            try:
+                from tuner.handlers.cloud_train_handler import CloudTrainHandler
+            except ImportError:
+                CloudTrainHandler = None
+        except ImportError as e:
+            error_msg = f"Cloud handlers unavailable: {e}"
+            if json_mode:
+                output = {
+                    "success": False,
+                    "error": {
+                        "message": error_msg,
+                        "code": "HANDLER_IMPORT_ERROR",
+                    },
+                    "timestamp": datetime.now().isoformat()
+                }
+                print(json.dumps(output, indent=2))
+            else:
+                print(f"Error: {error_msg}")
+            return 1
+
+        cloud_handlers = {
+            'cloud-pipeline': CloudPipelineHandler,
+            'cloud-run': CloudRunHandler,
+            'cloud-jobs': CloudJobsHandler,
+            'plan-hardware': HardwarePlanHandler,
+            'cloud-eval': CloudEvalHandler,
+            'cloud-gym': CloudGymHandler,
+            'cloud-inspect': CloudInspectHandler,
+            'run-experiment': ExperimentHandler,
+            'analyze-experiment': ExperimentAnalysisHandler,
+        }
+        if CloudTrainHandler is not None:
+            cloud_handlers['cloud'] = CloudTrainHandler
+
+        if command == 'cloud' and CloudTrainHandler is None:
+            error_msg = "Cloud training handler unavailable in the current environment."
+            if json_mode:
+                output = {
+                    "success": False,
+                    "error": {
+                        "message": error_msg,
+                        "code": "HANDLER_IMPORT_ERROR",
+                    },
+                    "timestamp": datetime.now().isoformat()
+                }
+                print(json.dumps(output, indent=2))
+            else:
+                print(f"Error: {error_msg}")
+            return 1
+
+        handler_class = cloud_handlers[command]
+        handler = handler_class(args=args)
         return handler.handle()
 
+    # Execute handler with args
+    handler = MainMenuHandler(args=args)
+    return handler.handle()
+
 
 def _handle_experiment_loop(args: Namespace, json_mode: bool) -> int:
     """Run the autonomous experiment loop."""
diff --git a/tuner/cloud/hf_jobs.py b/tuner/cloud/hf_jobs.py
index b9322287..15e06b0e 100644
--- a/tuner/cloud/hf_jobs.py
+++ b/tuner/cloud/hf_jobs.py
@@ -8,11 +8,19 @@
 from dataclasses import dataclass, field
 from typing import Any, Dict, Iterable, List, Optional
 
-from shared.cloud_artifacts import normalize_hf_bucket_id
 from shared.utilities.env import get_env_var, get_hf_token
 from tuner.core.exceptions import CloudProviderError
 
 
+def normalize_hf_bucket_id(bucket_id: str) -> str:
+    """Normalize bucket identifiers to the canonical namespace/name form."""
+    normalized = str(bucket_id or "").strip()
+    for prefix in ("hf://buckets/", "buckets/"):
+        if normalized.startswith(prefix):
+            normalized = normalized[len(prefix):]
+    return normalized.strip("/")
+
+
 @dataclass(frozen=True)
 class RepoCheckoutSpec:
     """Exact repository source needed to reproduce a cloud job."""
diff --git a/tuner/discovery/training_runs.py b/tuner/discovery/training_runs.py
index ed11ae02..e4671ab2 100644
--- a/tuner/discovery/training_runs.py
+++ b/tuner/discovery/training_runs.py
@@ -16,7 +16,7 @@
 from pathlib import Path
 from typing import List
 
-from shared.utilities.paths import iter_training_output_dirs
+from shared.utilities.paths import iter_training_run_dirs
 
 
 class TrainingRunDiscovery:
@@ -100,19 +100,12 @@ def discover(self, trainer_type: str, limit: int = 10) -> List[Path]:
         """
         runs = []
 
-        for output_dir in iter_training_output_dirs(trainer_type, self.repo_root):
-            if not output_dir.exists():
-                continue
+        for d in iter_training_run_dirs(trainer_type, self.repo_root):
+            has_final = (d / "final_model").exists()
+            has_checkpoints = (d / "checkpoints").exists() and any((d / "checkpoints").iterdir())
 
-            for d in sorted(output_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
-                if not d.is_dir():
-                    continue
-
-                has_final = (d / "final_model").exists()
-                has_checkpoints = (d / "checkpoints").exists() and any((d / "checkpoints").iterdir())
-
-                if has_final or has_checkpoints:
-                    runs.append(d)
+            if has_final or has_checkpoints:
+                runs.append(d)
 
         runs.sort(key=lambda p: p.stat().st_mtime, reverse=True)
         if limit is not None:
diff --git a/tuner/handlers/__init__.py b/tuner/handlers/__init__.py
index d5d809dc..f87570a9 100644
--- a/tuner/handlers/__init__.py
+++ b/tuner/handlers/__init__.py
@@ -1,38 +1,33 @@
 """
-Command handlers for the Synaptic Tuner CLI.
+Lazy exports for Synaptic Tuner command handlers.
 
-This package contains handler implementations for different CLI commands:
-- TrainHandler: Training workflow orchestration (STUB - to be implemented)
-- UploadHandler: Model upload workflow (STUB - to be implemented)
-- EvalHandler: Evaluation workflow
-- PipelineHandler: Full pipeline (train -> upload -> eval)
-- MainMenuHandler: Interactive main menu
-- SynthChatHandler: Synthetic data generation and improvement
-- StatusHandler: System status overview for AI assistants
-- DoctorHandler: System diagnostics with recommendations and auto-fix
-
-Each handler implements the IHandler interface and can be registered
-with the router for command dispatching.
+Avoid eager imports here. Some handlers pull in heavyweight optional
+dependencies, and importing the package itself should not force every
+runtime path to load them.
 """
 
-from tuner.handlers.train_handler import TrainHandler
-from tuner.handlers.upload_handler import UploadHandler
-from tuner.handlers.eval_handler import EvalHandler
-from tuner.handlers.pipeline_handler import PipelineHandler
-from tuner.handlers.main_menu_handler import MainMenuHandler
-from tuner.handlers.synthchat_handler import SynthChatHandler
-from tuner.handlers.status_handler import StatusHandler
-from tuner.handlers.doctor_handler import DoctorHandler
-from tuner.handlers.ml_handler import MLHandler
+from __future__ import annotations
+
+import importlib
+
+_HANDLER_MODULES = {
+    "TrainHandler": "tuner.handlers.train_handler",
+    "UploadHandler": "tuner.handlers.upload_handler",
+    "EvalHandler": "tuner.handlers.eval_handler",
+    "PipelineHandler": "tuner.handlers.pipeline_handler",
+    "MainMenuHandler": "tuner.handlers.main_menu_handler",
+    "SynthChatHandler": "tuner.handlers.synthchat_handler",
+    "StatusHandler": "tuner.handlers.status_handler",
+    "DoctorHandler": "tuner.handlers.doctor_handler",
+    "MLHandler": "tuner.handlers.ml_handler",
+}
+
+__all__ = list(_HANDLER_MODULES)
+
 
-__all__ = [
-    "TrainHandler",
-    "UploadHandler",
-    "EvalHandler",
-    "PipelineHandler",
-    "MainMenuHandler",
-    "SynthChatHandler",
-    "StatusHandler",
-    "DoctorHandler",
-    "MLHandler",
-]
+def __getattr__(name: str):
+    module_name = _HANDLER_MODULES.get(name)
+    if module_name is None:
+        raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+    module = importlib.import_module(module_name)
+    return getattr(module, name)
diff --git a/tuner/handlers/base.py b/tuner/handlers/base.py
index a20d38b7..6f8f1733 100644
--- a/tuner/handlers/base.py
+++ b/tuner/handlers/base.py
@@ -230,7 +230,7 @@ def output_error(
             try:
                 from tuner.ui import print_error
                 print_error(message)
-            except ImportError:
+            except (ImportError, UnicodeEncodeError):
                 print(f"Error: {message}", file=sys.stderr)
 
     def output_info(self, message: str, data: Optional[Dict[str, Any]] = None) -> None:
@@ -260,7 +260,7 @@ def output_info(self, message: str, data: Optional[Dict[str, Any]] = None) -> No
             try:
                 from tuner.ui import print_info
                 print_info(message)
-            except ImportError:
+            except (ImportError, UnicodeEncodeError):
                 print(f"Info: {message}")
 
     def output_list(
diff --git a/tuner/handlers/bucket_handler.py b/tuner/handlers/bucket_handler.py
index c467eeb3..df937828 100644
--- a/tuner/handlers/bucket_handler.py
+++ b/tuner/handlers/bucket_handler.py
@@ -5,6 +5,8 @@
 from __future__ import annotations
 
 import json
+import os
+import subprocess
 from argparse import Namespace
 from pathlib import Path
 from typing import Any, Optional
@@ -15,6 +17,16 @@
 from tuner.cloud import load_huggingface_hub, resolve_hf_bucket_id
 from tuner.core.exceptions import CloudProviderError
 from tuner.handlers.base import BaseHandler
+from tuner.utils.docker_runtime import (
+    BUCKET_HELPER_ENV_MARKER,
+    BUCKET_HELPER_IMAGE,
+    CONTAINER_REPO_ROOT,
+    bucket_helper_image_present,
+    build_bucket_helper_image_command,
+    build_bucket_helper_run_command,
+    container_repo_path,
+    ensure_docker_cli,
+)
 
 
 class BucketHandler(BaseHandler):
@@ -30,6 +42,173 @@ def can_handle_direct_mode(self) -> bool:
     def _cloud_config_path(self) -> Path:
         return self.repo_root / "Trainers" / "cloud" / "cloud_config.yaml"
 
+    def _native_bucket_support(self) -> tuple[bool, str]:
+        """Check whether the current Python has the Buckets APIs we rely on."""
+        try:
+            import huggingface_hub
+        except ImportError as exc:
+            return False, str(exc)
+
+        version = getattr(huggingface_hub, "__version__", "unknown")
+        missing = [
+            name for name in ("HfFileSystem", "create_bucket")
+            if not hasattr(huggingface_hub, name)
+        ]
+        if missing:
+            return False, f"huggingface_hub {version} does not support required APIs: {', '.join(missing)}"
+        return True, ""
+
+    def _bucket_command_is_remote(self, subcommand: str) -> bool:
+        """Return True when the selected bucket command needs HF Buckets support."""
+        if subcommand in {"push", "pull"}:
+            return True
+        if getattr(self.args, "bucket", None):
+            return True
+
+        path = str(getattr(self.args, "path", "") or "").strip()
+        if not path:
+            return True
+        if path.startswith("hf://"):
+            return True
+
+        local_path = Path(path)
+        if local_path.exists():
+            return False
+        if path.startswith("./") or path.startswith("../") or local_path.is_absolute():
+            return False
+        return True
+
+    def _path_for_helper(self, value: Optional[str], *, local_default_to_repo: bool = False) -> Optional[str]:
+        """Translate a repo-local host path into the helper container mount path."""
+        raw = str(value or "").strip()
+        if not raw:
+            if local_default_to_repo:
+                return str(CONTAINER_REPO_ROOT)
+            return None
+        if raw.startswith("hf://"):
+            return raw
+
+        candidate = Path(raw)
+        treat_as_local = (
+            candidate.exists()
+            or candidate.is_absolute()
+            or raw.startswith("./")
+            or raw.startswith("../")
+            or local_default_to_repo
+        )
+        if not treat_as_local:
+            return raw
+
+        resolved = (candidate if candidate.is_absolute() else (self.repo_root / candidate)).resolve()
+        try:
+            return container_repo_path(resolved, self.repo_root)
+        except ValueError as exc:
+            raise CloudProviderError(
+                f"Bucket helper can only access paths inside the repo workspace: {resolved}"
+            ) from exc
+
+    def _helper_cli_args(self, subcommand: str) -> list[str]:
+        """Build the CLI argument list to forward into the helper container."""
+        args = ["bucket", subcommand]
+        if self.json_mode:
+            args.append("--json")
+
+        bucket_id = getattr(self.args, "bucket", None)
+        if bucket_id:
+            args.extend(["--bucket", str(bucket_id)])
+
+        path_value = getattr(self.args, "path", None)
+        if path_value:
+            if subcommand == "push":
+                translated = self._path_for_helper(path_value)
+            else:
+                translated = self._path_for_helper(path_value, local_default_to_repo=False)
+            args.extend(["--path", translated])
+
+        dest_value = getattr(self.args, "dest", None)
+        if subcommand == "pull":
+            translated_dest = self._path_for_helper(dest_value, local_default_to_repo=True)
+            args.extend(["--dest", translated_dest])
+        elif dest_value:
+            args.extend(["--dest", str(dest_value)])
+
+        eval_path = getattr(self.args, "eval_path", None)
+        if eval_path:
+            args.extend(["--eval-path", str(eval_path)])
+
+        loss_path = getattr(self.args, "loss_path", None)
+        if loss_path:
+            args.extend(["--loss-path", str(loss_path)])
+
+        for flag_name, cli_flag in (("tail", "--tail"), ("limit", "--limit")):
+            value = getattr(self.args, flag_name, None)
+            if value is not None:
+                args.extend([cli_flag, str(value)])
+
+        for flag_name, cli_flag in (
+            ("jsonl_latest", "--jsonl-latest"),
+            ("pretty", "--pretty"),
+            ("recursive", "--recursive"),
+            ("files_only", "--files-only"),
+            ("dirs_only", "--dirs-only"),
+        ):
+            if bool(getattr(self.args, flag_name, False)):
+                args.append(cli_flag)
+
+        return args
+
+    def _render_helper_output(self, output: str) -> str:
+        """Rewrite helper container repo paths back to the host workspace path."""
+        if not output:
+            return output
+        return output.replace(str(CONTAINER_REPO_ROOT), str(self.repo_root).replace("\\", "/"))
+
+    def _delegate_to_docker_helper(self, subcommand: str) -> int:
+        """Run the bucket command inside the dedicated Docker helper image."""
+        docker_ok, docker_error = ensure_docker_cli()
+        if not docker_ok:
+            raise CloudProviderError(
+                f"{docker_error} The current Python also lacks required HF Buckets APIs."
+            )
+
+        if not bucket_helper_image_present(self.repo_root):
+            if not self.json_mode:
+                print(f"Building Docker bucket helper image: {BUCKET_HELPER_IMAGE}")
+            build = subprocess.run(
+                build_bucket_helper_image_command(self.repo_root),
+                cwd=str(self.repo_root),
+                capture_output=True,
+                text=True,
+                encoding="utf-8",
+                errors="replace",
+            )
+            if build.returncode != 0:
+                raise CloudProviderError(
+                    build.stderr.strip() or build.stdout.strip() or "Failed to build Docker bucket helper image."
+                )
+
+        helper = subprocess.run(
+            build_bucket_helper_run_command(
+                self.repo_root,
+                helper_args=self._helper_cli_args(subcommand),
+            ),
+            cwd=str(self.repo_root),
+            capture_output=True,
+            text=True,
+            encoding="utf-8",
+            errors="replace",
+        )
+        rendered_stdout = self._render_helper_output(helper.stdout or "")
+        rendered_stderr = self._render_helper_output(helper.stderr or "")
+
+        if helper.returncode != 0:
+            message = rendered_stderr.strip() or rendered_stdout.strip() or "Docker bucket helper failed."
+            raise CloudProviderError(message)
+
+        if rendered_stdout.strip():
+            print(rendered_stdout, end="" if rendered_stdout.endswith("\n") else "\n")
+        return 0
+
     def _default_bucket_id(self) -> Optional[str]:
         settings = load_cloud_config(self._cloud_config_path()).get("hf_jobs", {})
         configured = str(settings.get("artifact_identifier", "")).strip()
@@ -352,6 +531,14 @@ def _handle_push(self) -> int:
     def handle(self) -> int:
         try:
             subcommand = str(getattr(self.args, "subcommand", "") or "").strip().lower()
+            if (
+                subcommand in {"read", "list", "pull", "push", "analyze"}
+                and os.getenv(BUCKET_HELPER_ENV_MARKER) != "1"
+                and self._bucket_command_is_remote(subcommand)
+            ):
+                native_ok, _reason = self._native_bucket_support()
+                if not native_ok:
+                    return self._delegate_to_docker_helper(subcommand)
             if subcommand == "read":
                 return self._handle_read()
             if subcommand == "list":
@@ -364,5 +551,8 @@ def handle(self) -> int:
                 return self._handle_analyze()
             raise CloudProviderError("Bucket command requires subcommand 'read', 'list', 'pull', 'push', or 'analyze'.")
         except Exception as exc:
-            self.output_error(str(exc), code="BUCKET_ERROR")
+            try:
+                self.output_error(str(exc), code="BUCKET_ERROR")
+            except UnicodeEncodeError:
+                print(f"Error: {exc}")
             return 1
diff --git a/tuner/handlers/docker_handler.py b/tuner/handlers/docker_handler.py
new file mode 100644
index 00000000..4b4512f9
--- /dev/null
+++ b/tuner/handlers/docker_handler.py
@@ -0,0 +1,540 @@
+"""
+Local Docker runtime helper for Synaptic Tuner.
+
+Location: tuner/handlers/docker_handler.py
+Purpose: Validate and manage local Docker-backed model runtimes
+Used by: Router when 'docker' command is invoked
+"""
+
+from __future__ import annotations
+
+import json
+import shutil
+import subprocess
+from argparse import Namespace
+from pathlib import Path
+from typing import Optional
+
+from shared.utilities.env import get_hf_token
+from tuner.backends.training.cloud.base_cloud import resolve_cloud_image
+from tuner.core.exceptions import CloudProviderError
+from tuner.handlers.base import BaseHandler
+from tuner.utils.docker_runtime import (
+    BUCKET_HELPER_IMAGE,
+    bucket_helper_image_present,
+    build_bucket_helper_image_command,
+)
+
+
+class DockerHandler(BaseHandler):
+    """Handler for ``tuner docker`` subcommands."""
+
+    _SUBCOMMANDS = {
+        "build": "_handle_build",
+        "bootstrap": "_handle_bootstrap",
+        "status": "_handle_status",
+        "pull": "_handle_pull",
+        "smoke": "_handle_smoke",
+    }
+
+    def __init__(self, args: Optional[Namespace] = None):
+        super().__init__(args=args)
+
+    @property
+    def name(self) -> str:
+        return "docker"
+
+    def can_handle_direct_mode(self) -> bool:
+        return True
+
+    @property
+    def cloud_config_path(self) -> Path:
+        return self.repo_root / "Trainers" / "cloud" / "cloud_config.yaml"
+
+    def handle(self) -> int:
+        action = getattr(self.args, "subcommand", None) if self.args else None
+        if not action:
+            action = "status"
+
+        method_name = self._SUBCOMMANDS.get(action)
+        if not method_name:
+            self.output_error(f"Unknown docker subcommand: {action}", code="UNKNOWN_SUBCOMMAND")
+            return 1
+        return getattr(self, method_name)()
+
+    def _ensure_docker_available(self) -> bool:
+        if shutil.which("docker") is None:
+            self.output_error(
+                "Docker CLI not found. Install Docker Desktop first.",
+                code="DOCKER_NOT_FOUND",
+            )
+            return False
+        return True
+
+    def _run(self, cmd: list[str], *, stream: bool = False, log_path: Optional[Path] = None) -> tuple[int, str]:
+        if not stream:
+            result = subprocess.run(
+                cmd,
+                cwd=str(self.repo_root),
+                capture_output=True,
+                text=True,
+                encoding="utf-8",
+                errors="replace",
+            )
+            output = (result.stdout or "") + (result.stderr or "")
+            return result.returncode, output.strip()
+
+        log_handle = None
+        output_lines: list[str] = []
+        if log_path is not None:
+            log_path.parent.mkdir(parents=True, exist_ok=True)
+            log_handle = log_path.open("w", encoding="utf-8")
+
+        try:
+            process = subprocess.Popen(
+                cmd,
+                cwd=str(self.repo_root),
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                encoding="utf-8",
+                errors="replace",
+                bufsize=1,
+            )
+            assert process.stdout is not None
+            for raw_line in process.stdout:
+                line = raw_line.rstrip()
+                output_lines.append(line)
+                if not self.json_mode:
+                    print(line)
+                if log_handle is not None:
+                    log_handle.write(raw_line)
+            process.wait()
+            return process.returncode, "\n".join(output_lines).strip()
+        finally:
+            if log_handle is not None:
+                log_handle.close()
+
+    def _docker_info(self) -> tuple[int, str]:
+        return self._run(["docker", "info", "--format", "{{.ServerVersion}}"])
+
+    def _resolve_target_images(self, *, target_override: Optional[str] = None) -> list[tuple[str, str, Optional[str]]]:
+        target = target_override or getattr(self.args, "docker_target", None) or "unsloth"
+        if target == "all":
+            targets = ["unsloth", "vllm", "bucket"]
+        else:
+            targets = [target]
+
+        explicit_image = getattr(self.args, "docker_image", None)
+        requested_profile = getattr(self.args, "docker_profile", None)
+        resolved: list[tuple[str, str, Optional[str]]] = []
+
+        for runtime in targets:
+            try:
+                if explicit_image and len(targets) > 1:
+                    image, profile = explicit_image, None
+                elif runtime == "bucket":
+                    image, profile = BUCKET_HELPER_IMAGE, "local_build"
+                elif runtime == "unsloth":
+                    image, profile = resolve_cloud_image(
+                        self.cloud_config_path,
+                        explicit_image=explicit_image,
+                        requested_profile=requested_profile,
+                        default_profile="latest_unsloth",
+                        fallback_image=None,
+                        profile_section="eval_image_profiles",
+                    )
+                else:
+                    image, profile = resolve_cloud_image(
+                        self.cloud_config_path,
+                        explicit_image=explicit_image,
+                        requested_profile=requested_profile,
+                        default_profile="fast_vllm",
+                        fallback_image=None,
+                        profile_section="eval_image_profiles",
+                    )
+            except CloudProviderError as exc:
+                raise RuntimeError(str(exc)) from exc
+            resolved.append((runtime, image, profile))
+        return resolved
+
+    def _inspect_target_image(self, runtime: str, image: str, profile: Optional[str]) -> dict:
+        if runtime == "bucket":
+            present = bucket_helper_image_present(self.repo_root, image=image)
+            image_output = image if present else ""
+        else:
+            image_code, image_output = self._run(
+                ["docker", "images", "--format", "{{.Repository}}:{{.Tag}}|{{.ID}}|{{.Size}}", image]
+            )
+            present = image_code == 0 and bool(image_output)
+
+        return {
+            "runtime": runtime,
+            "image": image,
+            "profile": profile,
+            "present": present,
+            "local_images": image_output.splitlines() if image_output else [],
+        }
+
+    def _build_bucket_helper_image(self) -> tuple[int, str]:
+        log_path = self.repo_root / "logs" / "bucket-docker-build.log"
+        if not self.json_mode:
+            print(f"Building bucket helper image: {BUCKET_HELPER_IMAGE}")
+        return self._run(
+            build_bucket_helper_image_command(self.repo_root),
+            stream=True,
+            log_path=log_path,
+        )
+
+    def _pull_target(self, runtime: str, image: str, profile: Optional[str]) -> tuple[int, dict]:
+        if runtime == "bucket":
+            code, output = self._build_bucket_helper_image()
+            return code, {
+                "runtime": runtime,
+                "image": image,
+                "profile": profile,
+                "success": code == 0,
+                "log_path": str(self.repo_root / "logs" / "bucket-docker-build.log"),
+                "tail": output.splitlines()[-10:] if output else [],
+            }
+
+        log_path = self.repo_root / "logs" / f"{runtime}-docker-pull.log"
+        self.output_info(f"Pulling {runtime} image: {image}")
+        code, output = self._run(["docker", "pull", image], stream=True, log_path=log_path)
+        return code, {
+            "runtime": runtime,
+            "image": image,
+            "profile": profile,
+            "success": code == 0,
+            "log_path": str(log_path),
+            "tail": output.splitlines()[-10:] if output else [],
+        }
+
+    def _handle_build(self) -> int:
+        if not self._ensure_docker_available():
+            return 1
+
+        target = getattr(self.args, "docker_target", None) or "bucket"
+        if target not in {"bucket", "all"}:
+            self.output_error("Docker build currently supports only the local bucket helper image.", code="DOCKER_BUILD_UNSUPPORTED")
+            return 1
+
+        code, output = self._build_bucket_helper_image()
+        if code != 0:
+            self.output_error("docker build failed for bucket helper image", code="DOCKER_BUILD_FAILED")
+            return 1
+
+        payload = {
+            "runtime": "bucket",
+            "image": BUCKET_HELPER_IMAGE,
+            "present": bucket_helper_image_present(self.repo_root),
+            "tail": output.splitlines()[-10:] if output else [],
+        }
+        self.output(payload, f"Bucket helper image ready: {BUCKET_HELPER_IMAGE}")
+        return 0
+
+    def _handle_status(self) -> int:
+        if not self._ensure_docker_available():
+            return 1
+
+        docker_version_code, docker_version = self._run(["docker", "--version"])
+        info_code, server_version = self._docker_info()
+        if docker_version_code != 0:
+            self.output_error(docker_version or "Failed to run docker --version", code="DOCKER_VERSION_ERROR")
+            return 1
+
+        entries = []
+        try:
+            for runtime, image, profile in self._resolve_target_images():
+                entries.append(self._inspect_target_image(runtime, image, profile))
+        except RuntimeError as exc:
+            self.output_error(str(exc), code="DOCKER_IMAGE_RESOLUTION_ERROR")
+            return 1
+
+        payload = {
+            "docker_cli": docker_version,
+            "docker_engine": server_version if info_code == 0 else "unavailable",
+            "targets": entries,
+        }
+        human = (
+            f"Docker CLI: {docker_version}\n"
+            f"Docker Engine: {server_version if info_code == 0 else 'unavailable'}\n"
+            + "\n".join(
+                f"{item['runtime']}: {item['image']} ({'present' if item['present'] else 'missing'})"
+                for item in entries
+            )
+        )
+        self.output(payload, human)
+        return 0 if info_code == 0 else 1
+
+    def _handle_pull(self) -> int:
+        if not self._ensure_docker_available():
+            return 1
+
+        try:
+            targets = self._resolve_target_images()
+        except RuntimeError as exc:
+            self.output_error(str(exc), code="DOCKER_IMAGE_RESOLUTION_ERROR")
+            return 1
+
+        results = []
+        for runtime, image, profile in targets:
+            code, result = self._pull_target(runtime, image, profile)
+            results.append(result)
+            if code != 0:
+                error_code = "DOCKER_BUILD_FAILED" if runtime == "bucket" else "DOCKER_PULL_FAILED"
+                message = (
+                    "docker build failed for bucket helper image"
+                    if runtime == "bucket"
+                    else f"docker pull failed for {image}"
+                )
+                self.output_error(message, code=error_code)
+                if self.json_mode:
+                    self.output({"results": results}, success=False)
+                return 1
+
+        self.output({"results": results}, "Docker image pull complete.")
+        return 0
+
+    def _unsloth_smoke_command(self, image: str) -> list[str]:
+        repo_mount = str(self.repo_root)
+        smoke_code = (
+            "import os, sys, torch; "
+            "print('cuda', torch.cuda.is_available()); "
+            "print('torch', torch.__version__); "
+            "from unsloth import FastLanguageModel; "
+            "print('unsloth-ok'); "
+            "print('repo-mounted', os.path.exists('/workspace/repo')); "
+            "sys.path.insert(0, '/workspace/repo'); "
+            "import tuner; "
+            "print('tuner-ok')"
+        )
+        return [
+            "docker", "run", "--rm", "--gpus", "all",
+            "-v", f"{repo_mount}:/workspace/repo",
+            "--entrypoint", "python",
+            image,
+            "-c", smoke_code,
+        ]
+
+    def _vllm_smoke_command(self, image: str) -> list[str]:
+        smoke_code = (
+            "import torch, vllm; "
+            "print('cuda', torch.cuda.is_available()); "
+            "print('torch', torch.__version__); "
+            "print('vllm', vllm.__version__)"
+        )
+        return [
+            "docker", "run", "--rm", "--gpus", "all",
+            "--entrypoint", "python3",
+            image,
+            "-c", smoke_code,
+        ]
+
+    def _bucket_smoke_command(self, image: str) -> list[str]:
+        smoke_code = (
+            "import huggingface_hub, dotenv, yaml; "
+            "print('hf_hub', huggingface_hub.__version__); "
+            "print('has_create_bucket', hasattr(huggingface_hub, 'create_bucket')); "
+            "print('has_hffs', hasattr(huggingface_hub, 'HfFileSystem'))"
+        )
+        return [
+            "docker", "run", "--rm",
+            "--entrypoint", "python",
+            image,
+            "-c", smoke_code,
+        ]
+
+    def _smoke_target(self, runtime: str, image: str, profile: Optional[str]) -> tuple[int, dict]:
+        if runtime == "bucket" and not bucket_helper_image_present(self.repo_root, image=image):
+            code, _ = self._build_bucket_helper_image()
+            if code != 0:
+                return 1, {
+                    "runtime": runtime,
+                    "image": image,
+                    "profile": profile,
+                    "success": False,
+                    "output": ["Bucket helper image is missing and could not be built."],
+                }
+
+        if runtime == "unsloth":
+            cmd = self._unsloth_smoke_command(image)
+        elif runtime == "vllm":
+            cmd = self._vllm_smoke_command(image)
+        else:
+            cmd = self._bucket_smoke_command(image)
+
+        code, output = self._run(cmd)
+        return code, {
+            "runtime": runtime,
+            "image": image,
+            "profile": profile,
+            "success": code == 0,
+            "output": output.splitlines(),
+        }
+
+    def _bootstrap_guidance(self, *, cli_ok: bool, engine_ok: bool) -> list[str]:
+        guidance: list[str] = []
+        if not cli_ok:
+            guidance.extend(
+                [
+                    "Install Docker Desktop for Windows and leave WSL 2 integration enabled.",
+                    "Start Docker Desktop and wait for the engine status to show Running.",
+                    "Re-run `python tuner.py docker bootstrap --docker-target all`.",
+                ]
+            )
+            return guidance
+
+        if not engine_ok:
+            guidance.extend(
+                [
+                    "Start Docker Desktop and wait for the engine to finish initializing.",
+                    "If GPU containers are required, confirm the NVIDIA driver is installed on the host.",
+                    "Re-run `python tuner.py docker status` to confirm the engine is reachable.",
+                ]
+            )
+            return guidance
+
+        guidance.extend(
+            [
+                "Use `python tuner.py train --runtime docker` for local Docker-backed training.",
+                "Use `python tuner.py eval --runtime docker` for local Docker-backed evaluation.",
+                "Use `python tuner.py bucket pull ...` to bring cloud adapters local; pulled runs under `toolset-training-artifacts/runs/...` are now discoverable in local eval flows.",
+            ]
+        )
+        return guidance
+
+    def _handle_bootstrap(self) -> int:
+        cli_ok = shutil.which("docker") is not None
+        docker_version = None
+        server_version = None
+        info_code = 1
+
+        if cli_ok:
+            version_code, docker_version_output = self._run(["docker", "--version"])
+            if version_code == 0:
+                docker_version = docker_version_output
+            info_code, server_version_output = self._docker_info()
+            if info_code == 0:
+                server_version = server_version_output
+
+        engine_ok = cli_ok and info_code == 0
+        guidance = self._bootstrap_guidance(cli_ok=cli_ok, engine_ok=engine_ok)
+
+        if not cli_ok or not engine_ok:
+            payload = {
+                "docker_cli_found": cli_ok,
+                "docker_cli": docker_version,
+                "docker_engine": server_version,
+                "ready": False,
+                "guidance": guidance,
+            }
+            human_lines = [
+                f"Docker CLI: {docker_version or 'missing'}",
+                f"Docker Engine: {server_version or 'unavailable'}",
+                "",
+                "Next steps:",
+                *[f"  - {line}" for line in guidance],
+            ]
+            self.output(payload, "\n".join(human_lines), success=False)
+            return 1
+
+        try:
+            targets = self._resolve_target_images(
+                target_override=getattr(self.args, "docker_target", None) or "all"
+            )
+        except RuntimeError as exc:
+            self.output_error(str(exc), code="DOCKER_IMAGE_RESOLUTION_ERROR")
+            return 1
+
+        status_entries = [self._inspect_target_image(runtime, image, profile) for runtime, image, profile in targets]
+        pull_results = []
+        smoke_results = []
+
+        for runtime, image, profile in targets:
+            inspected = next((entry for entry in status_entries if entry["runtime"] == runtime), None)
+            present = bool(inspected and inspected["present"])
+            if present:
+                pull_results.append(
+                    {
+                        "runtime": runtime,
+                        "image": image,
+                        "profile": profile,
+                        "success": True,
+                        "skipped": True,
+                        "reason": "already_present",
+                    }
+                )
+            else:
+                code, result = self._pull_target(runtime, image, profile)
+                pull_results.append(result)
+                if code != 0:
+                    self.output_error(
+                        "Docker bootstrap failed while preparing local images.",
+                        code="DOCKER_BOOTSTRAP_PULL_FAILED",
+                        details={"runtime": runtime, "image": image},
+                    )
+                    return 1
+
+            code, smoke_result = self._smoke_target(runtime, image, profile)
+            smoke_results.append(smoke_result)
+            if code != 0:
+                self.output_error(
+                    f"{runtime} smoke test failed",
+                    code="DOCKER_BOOTSTRAP_SMOKE_FAILED",
+                    details={"image": image, "output": smoke_result.get("output", [])},
+                )
+                return 1
+
+        hf_token_available = bool(get_hf_token())
+        payload = {
+            "docker_cli_found": True,
+            "docker_cli": docker_version,
+            "docker_engine": server_version,
+            "ready": True,
+            "targets": [self._inspect_target_image(runtime, image, profile) for runtime, image, profile in targets],
+            "pull_results": pull_results,
+            "smoke_results": smoke_results,
+            "hf_token_available": hf_token_available,
+            "guidance": guidance,
+        }
+        human_lines = [
+            f"Docker CLI: {docker_version}",
+            f"Docker Engine: {server_version}",
+            "Local Docker runtime is ready:",
+        ]
+        for result in smoke_results:
+            human_lines.append(f"  - {result['runtime']}: {result['image']}")
+        human_lines.append(f"HF_TOKEN available via env/.env: {'yes' if hf_token_available else 'no'}")
+        human_lines.append("Next steps:")
+        human_lines.extend(f"  - {line}" for line in guidance)
+        self.output(payload, "\n".join(human_lines))
+        return 0
+
+    def _handle_smoke(self) -> int:
+        if not self._ensure_docker_available():
+            return 1
+
+        try:
+            targets = self._resolve_target_images()
+        except RuntimeError as exc:
+            self.output_error(str(exc), code="DOCKER_IMAGE_RESOLUTION_ERROR")
+            return 1
+
+        results = []
+        for runtime, image, profile in targets:
+            code, result = self._smoke_target(runtime, image, profile)
+            results.append(result)
+            if code != 0:
+                self.output_error(
+                    f"{runtime} smoke test failed",
+                    code="DOCKER_SMOKE_FAILED",
+                    details={"image": image, "output": result.get("output", [])},
+                )
+                return 1
+
+        human_lines = ["Docker smoke tests passed:"]
+        for result in results:
+            human_lines.append(f"  {result['runtime']}: {result['image']}")
+        self.output({"results": results}, "\n".join(human_lines))
+        return 0
diff --git a/tuner/handlers/eval_handler.py b/tuner/handlers/eval_handler.py
index e817cd19..ed2b5ca7 100644
--- a/tuner/handlers/eval_handler.py
+++ b/tuner/handlers/eval_handler.py
@@ -19,15 +19,29 @@
 - All output is JSON formatted for programmatic parsing
 """
 
+import json
+import socket
+import subprocess
+import time
+from http.client import RemoteDisconnected
 from argparse import Namespace
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Callable, List, Optional, Tuple
+from urllib.error import URLError
+from urllib.request import urlopen
 
 from tuner.handlers.base import BaseHandler
 from tuner.backends.registry import EvaluationBackendRegistry
 from tuner.discovery import TrainingRunDiscovery, CheckpointDiscovery
+from tuner.utils.docker_runtime import (
+    CONTAINER_REPO_ROOT,
+    build_docker_run_command,
+    container_repo_path,
+    ensure_docker_cli,
+    resolve_eval_image,
+)
 
 # Import shared UI components (delegates to Trainers/shared/ui/)
 from shared.ui import (
@@ -105,17 +119,24 @@ def _get_eval_status(self) -> dict:
 
         Returns dict with available backends, models, and scenarios.
         """
-        # List available backends
+        runtime = getattr(self.args, "runtime", "native") if self.args else "native"
         backends = []
 
-        # Check each backend
-        backend_configs = [
-            ("unsloth", "Unsloth (LoRA - direct)"),
-            ("llamacpp", "llama.cpp (GGUF)"),
-            ("mlc", "MLC/WebLLM (WebGPU)"),
-            ("ollama", "Ollama (local server)"),
-            ("lmstudio", "LM Studio (local server)"),
-        ]
+        if runtime == "docker":
+            docker_ok, docker_error = ensure_docker_cli()
+            backend_configs = [
+                ("unsloth", "Unsloth (Docker - direct LoRA)"),
+                ("vllm", "vLLM (Docker OpenAI server)"),
+            ]
+        else:
+            docker_ok, docker_error = True, ""
+            backend_configs = [
+                ("unsloth", "Unsloth (LoRA - direct)"),
+                ("llamacpp", "llama.cpp (GGUF)"),
+                ("mlc", "MLC/WebLLM (WebGPU)"),
+                ("ollama", "Ollama (local server)"),
+                ("lmstudio", "LM Studio (local server)"),
+            ]
 
         for backend_id, backend_name in backend_configs:
             backend_info = {
@@ -126,18 +147,30 @@ def _get_eval_status(self) -> dict:
             }
 
             try:
-                if backend_id in ("llamacpp", "mlc", "unsloth"):
-                    backend = EvaluationBackendRegistry.get(backend_id, repo_root=self.repo_root)
+                if runtime == "docker":
+                    backend_info["available"] = docker_ok
+                    if backend_id == "unsloth" and docker_ok:
+                        backend = EvaluationBackendRegistry.get("unsloth", repo_root=self.repo_root)
+                        models = backend.list_models()
+                        backend_info["models"] = models[:20] if models else []
+                        backend_info["model_count"] = len(models) if models else 0
+                    elif backend_id == "vllm" and docker_ok:
+                        runs = self._discover_vllm_runs()
+                        backend_info["models"] = [r.display_name for r in runs[:20]]
+                        backend_info["model_count"] = len(runs)
                 else:
-                    backend = EvaluationBackendRegistry.get(backend_id)
+                    if backend_id in ("llamacpp", "mlc", "unsloth"):
+                        backend = EvaluationBackendRegistry.get(backend_id, repo_root=self.repo_root)
+                    else:
+                        backend = EvaluationBackendRegistry.get(backend_id)
 
-                is_connected, _ = backend.validate_connection()
-                backend_info["available"] = is_connected
+                    is_connected, _ = backend.validate_connection()
+                    backend_info["available"] = is_connected
 
-                if is_connected:
-                    models = backend.list_models()
-                    backend_info["models"] = models[:20] if models else []  # Limit for brevity
-                    backend_info["model_count"] = len(models) if models else 0
+                    if is_connected:
+                        models = backend.list_models()
+                        backend_info["models"] = models[:20] if models else []
+                        backend_info["model_count"] = len(models) if models else 0
 
             except (ValueError, Exception):
                 pass
@@ -161,8 +194,11 @@ def _get_eval_status(self) -> dict:
         return {
             "command": "eval",
             "status": "ready",
+            "runtime": runtime,
             "backends": backends,
             "scenarios": scenarios,
+            "docker_available": docker_ok,
+            "docker_error": docker_error or None,
         }
 
     def _list_scenarios(self):
@@ -176,6 +212,16 @@ def _list_scenarios(self):
         discovery = PromptSetDiscovery(repo_root=self.repo_root)
         return discovery.discover_all()
 
+    def _discover_vllm_runs(self):
+        """Discover training runs that can be evaluated through vLLM."""
+        try:
+            from Evaluator.vllm_setup import discover_training_runs
+        except ImportError:
+            return []
+
+        runs = discover_training_runs(self.repo_root / "Trainers")
+        return [run for run in runs if run.best_model_path or run.lora_path]
+
     # -- Generic table display infrastructure ----------------------------------
 
     @dataclass
@@ -285,6 +331,7 @@ def _display_lora_models_table(self, backend, models: List[str]) -> None:
             columns=[
                 C("Run"), C("Base Model", style=COLORS["aqua"]),
                 C("Type", style=COLORS["purple"]),
+                C("Source", style="dim"),
                 C("Size", style="dim", justify="right"),
             ],
             row_extractor=lambda i, mp: self._lora_row(backend, mp),
@@ -299,6 +346,7 @@ def _lora_row(backend, model_path: str) -> List[str]:
             info.get("timestamp", "unknown"),
             info.get("base_model_short", "unknown"),
             info.get("trainer_type", "-").upper(),
+            info.get("source", "-"),
             f"{info.get('size_mb', 0):.0f}MB" if info.get("size_mb") else "-",
         ]
 
@@ -308,7 +356,8 @@ def _lora_plain(backend, model_path: str) -> str:
         return (
             f"{info.get('timestamp', 'unknown')} "
             f"({info.get('base_model_short', 'unknown')}) "
-            f"[{info.get('trainer_type', '-').upper()}]"
+            f"[{info.get('trainer_type', '-').upper()}] "
+            f"{info.get('source', '-')}"
         )
 
     def _display_mlc_models_table(self, backend, models: List[str]) -> None:
@@ -350,6 +399,7 @@ def _display_training_runs_table(self, runs: List[Path], trainer_type: str) -> N
             title=f"Available {trainer_type.upper()} Training Runs",
             columns=[
                 C("Run"), C("Has Final", style=COLORS["aqua"], justify="center"),
+                C("Source", style="dim"),
                 C("Checkpoints", style=COLORS["purple"], justify="right"),
             ],
             row_extractor=lambda i, rp: self._training_run_row(rp),
@@ -364,12 +414,18 @@ def _training_run_row(run_path: Path) -> List[str]:
         cp_count = 0
         if checkpoints_dir.exists():
             cp_count = len(list(checkpoints_dir.glob("checkpoint-*")))
-        return [run_path.name, has_final, str(cp_count)]
+        source = "bucket_pull" if "toolset-training-artifacts" in {part.lower() for part in run_path.parts} else (
+            "cloud_artifact" if "runs" in {part.lower() for part in run_path.parts} and "trainers" not in {part.lower() for part in run_path.parts}
+            else "local_training"
+        )
+        return [run_path.name, has_final, source, str(cp_count)]
 
     @staticmethod
     def _training_run_plain(run_path: Path) -> str:
         has_final = "(final)" if (run_path / "final_model").exists() else ""
-        return f"{run_path.name} {has_final}"
+        parts = {part.lower() for part in run_path.parts}
+        source = "bucket_pull" if "toolset-training-artifacts" in parts else ("cloud_artifact" if "runs" in parts and "trainers" not in parts else "local_training")
+        return f"{run_path.name} {has_final} [{source}]"
 
     def _display_checkpoints_table(self, checkpoints: List, trainer_type: str) -> None:
         """Display available checkpoints with metrics in a table."""
@@ -523,6 +579,305 @@ def _display_scenarios_table(self, scenarios) -> None:
         )
         self._display_table(scenarios, spec)
 
+    def _display_vllm_runs_table(self, runs) -> None:
+        """Display vLLM-compatible training runs."""
+        C = self._ColumnSpec
+        spec = self._TableSpec(
+            title="Available vLLM Model Candidates",
+            columns=[
+                C("Run"),
+                C("Trainer", style=COLORS["aqua"]),
+                C("Source", style=COLORS["purple"]),
+                C("Model", style="dim"),
+            ],
+            row_extractor=lambda i, run: [
+                run.timestamp,
+                run.trainer_type.upper(),
+                run.source,
+                run.best_model_path.name if run.best_model_path else "LoRA",
+            ],
+            plain_formatter=lambda i, run: (
+                f"{run.timestamp} [{run.trainer_type.upper()}] "
+                f"{run.source} "
+                f"{run.best_model_path.name if run.best_model_path else 'LoRA'}"
+            ),
+        )
+        self._display_table(runs, spec)
+
+    def _select_vllm_run(self):
+        """Select a local training run for Dockerized vLLM evaluation."""
+        runs = self._discover_vllm_runs()
+        if not runs:
+            print_error("No vLLM-compatible training runs found.")
+            print_info("A merged-16bit export or a final_model LoRA adapter is required.")
+            return None
+
+        self._display_vllm_runs_table(runs)
+
+        while True:
+            try:
+                sel = prompt(f"Select model run (1-{len(runs)})", "1")
+                idx = int(sel) - 1
+                if 0 <= idx < len(runs):
+                    return runs[idx]
+            except ValueError:
+                pass
+            print_error("Invalid selection.")
+
+    def _find_available_port(self, preferred: int = 8000) -> int:
+        """Reserve a local TCP port for the Dockerized vLLM server."""
+        for candidate in (preferred, 0):
+            try:
+                with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+                    sock.bind(("127.0.0.1", candidate))
+                    return sock.getsockname()[1]
+            except OSError:
+                continue
+        return preferred
+
+    def _wait_for_vllm_models(self, host: str, port: int, timeout_seconds: int = 300) -> list[str]:
+        """Poll the local vLLM endpoint until models are available."""
+        deadline = time.time() + timeout_seconds
+        last_error = "vLLM server did not return any models."
+        while time.time() < deadline:
+            try:
+                with urlopen(f"http://{host}:{port}/v1/models", timeout=5) as response:
+                    payload = json.loads(response.read().decode("utf-8"))
+                model_ids = [item.get("id") for item in payload.get("data", []) if item.get("id")]
+                if model_ids:
+                    return model_ids
+                last_error = "vLLM server is up but returned no models."
+            except (URLError, TimeoutError, json.JSONDecodeError, RemoteDisconnected, ConnectionResetError) as exc:
+                last_error = str(exc)
+            time.sleep(2)
+        raise RuntimeError(last_error)
+
+    def _run_docker_unsloth_evaluation(self, model: str, scenario) -> int:
+        """Run direct Unsloth evaluation inside the Docker runtime."""
+        try:
+            image, profile = resolve_eval_image(
+                self.repo_root,
+                runtime="unsloth",
+                explicit_image=getattr(self.args, "docker_image", None),
+                requested_profile=getattr(self.args, "docker_profile", None),
+            )
+        except Exception as exc:
+            print_error(f"Failed to resolve Docker evaluation image: {exc}")
+            return 1
+
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        results_dir = self.repo_root / "Evaluator" / "results"
+        results_dir.mkdir(parents=True, exist_ok=True)
+        output_json = results_dir / f"run_{timestamp}.json"
+        output_md = results_dir / f"run_{timestamp}.md"
+
+        cmd = build_docker_run_command(
+            image=image,
+            repo_root=self.repo_root,
+            workdir=str(CONTAINER_REPO_ROOT),
+            entrypoint="python",
+            env={"PYTHONPATH": str(CONTAINER_REPO_ROOT)},
+            command=[
+                "-m",
+                "Evaluator.cli",
+                "--backend",
+                "unsloth",
+                "--model",
+                container_repo_path(Path(model), self.repo_root),
+                "--scenario",
+                scenario.path.name,
+                "--output",
+                container_repo_path(output_json, self.repo_root),
+                "--markdown",
+                container_repo_path(output_md, self.repo_root),
+            ],
+        )
+
+        profile_suffix = f" ({profile})" if profile else ""
+        print_info(f"Running Docker evaluation with: {image}{profile_suffix}")
+        print()
+        result = subprocess.run(cmd, cwd=str(self.repo_root))
+
+        if result.returncode == 0:
+            print()
+            print_info(f"Results saved to: {output_json.relative_to(self.repo_root)}")
+            print_info(f"Markdown report: {output_md.relative_to(self.repo_root)}")
+        return result.returncode
+
+    def _run_docker_vllm_evaluation(self, run, scenario) -> int:
+        """Start a Dockerized vLLM server and evaluate against it."""
+        try:
+            image, profile = resolve_eval_image(
+                self.repo_root,
+                runtime="vllm",
+                explicit_image=getattr(self.args, "docker_image", None),
+                requested_profile=getattr(self.args, "docker_profile", None),
+            )
+        except Exception as exc:
+            print_error(f"Failed to resolve Docker vLLM image: {exc}")
+            return 1
+
+        model_path = run.best_model_path
+        if model_path is None:
+            print_error("Selected run does not have a usable model path for vLLM.")
+            return 1
+
+        command = [
+            "--host",
+            "0.0.0.0",
+            "--port",
+            "8000",
+            "--gpu-memory-utilization",
+            "0.9",
+        ]
+        preferred_model_id = None
+
+        adapter_config = model_path / "adapter_config.json"
+        if adapter_config.exists():
+            try:
+                adapter_data = json.loads(adapter_config.read_text(encoding="utf-8"))
+            except json.JSONDecodeError as exc:
+                print_error(f"Failed to parse adapter_config.json: {exc}")
+                return 1
+
+            base_model = adapter_data.get("base_model_name_or_path")
+            if not base_model:
+                print_error("Adapter config is missing base_model_name_or_path.")
+                return 1
+
+            preferred_model_id = f"{run.trainer_type}-{run.timestamp}"
+            command = [
+                "--model",
+                base_model,
+                *command,
+                "--enable-lora",
+                "--max-lora-rank",
+                "64",
+                "--lora-modules",
+                f"{preferred_model_id}={container_repo_path(model_path, self.repo_root)}",
+            ]
+        else:
+            command = [
+                "--model",
+                container_repo_path(model_path, self.repo_root),
+                *command,
+            ]
+
+        host_port = self._find_available_port(8000)
+        container_name = f"tuner-vllm-eval-{datetime.now().strftime('%Y%m%d%H%M%S')}"
+        run_cmd = build_docker_run_command(
+            image=image,
+            repo_root=self.repo_root,
+            publish_ports=[(host_port, 8000)],
+            command=command,
+            name=container_name,
+            detach=True,
+        )
+
+        profile_suffix = f" ({profile})" if profile else ""
+        print_info(f"Starting Docker vLLM server with: {image}{profile_suffix}")
+        print_info(f"Container: {container_name}")
+        print_info(f"Endpoint: http://127.0.0.1:{host_port}/v1")
+        print()
+
+        log_process = None
+        try:
+            start = subprocess.run(run_cmd, cwd=str(self.repo_root), capture_output=True, text=True)
+            if start.returncode != 0:
+                print_error(start.stderr.strip() or start.stdout.strip() or "Failed to start Docker vLLM server.")
+                return 1
+
+            log_process = subprocess.Popen(
+                ["docker", "logs", "-f", container_name],
+                cwd=str(self.repo_root),
+            )
+
+            model_ids = self._wait_for_vllm_models("127.0.0.1", host_port)
+            model_id = preferred_model_id if preferred_model_id in model_ids else model_ids[0]
+            print()
+            print_info(f"vLLM server ready. Using model id: {model_id}")
+            print()
+            return self._run_subprocess_evaluation(
+                "vllm",
+                model_id,
+                scenario,
+                host="127.0.0.1",
+                port=host_port,
+            )
+        except RuntimeError as exc:
+            print_error(f"Docker vLLM server failed to become ready: {exc}")
+            return 1
+        finally:
+            if log_process is not None and log_process.poll() is None:
+                log_process.terminate()
+            subprocess.run(
+                ["docker", "rm", "-f", container_name],
+                cwd=str(self.repo_root),
+                capture_output=True,
+                text=True,
+            )
+
+    def _handle_docker_eval(self) -> int:
+        """Run the local evaluation workflow through Docker."""
+        print_header("EVALUATION", "Test your model's performance (Docker runtime)")
+
+        docker_ok, docker_error = ensure_docker_cli()
+        if not docker_ok:
+            print_error(docker_error)
+            return 1
+
+        backend_choice = print_menu([
+            ("unsloth", f"{BOX['star']} Unsloth (Docker - direct LoRA)"),
+            ("vllm", f"{BOX['bullet']} vLLM (Docker - OpenAI server)"),
+        ], "Select backend:")
+
+        if not backend_choice:
+            return 0
+
+        if backend_choice == "unsloth":
+            model, _trainer_type = self._select_unsloth_model()
+            if not model:
+                return 0
+            model_label = model
+        else:
+            selected_run = self._select_vllm_run()
+            if selected_run is None:
+                return 1
+            model = selected_run
+            model_label = selected_run.display_name
+
+        scenarios = self._list_scenarios()
+        if not scenarios:
+            print_error("No test scenarios found in Evaluator/config/scenarios/")
+            return 1
+
+        self._display_scenarios_table(scenarios)
+        while True:
+            try:
+                sel = prompt(f"Select test scenario (1-{len(scenarios)})", "1")
+                idx = int(sel) - 1
+                if 0 <= idx < len(scenarios):
+                    selected = scenarios[idx]
+                    break
+            except ValueError:
+                pass
+            print_error("Invalid selection.")
+
+        print_config({
+            "Runtime": "docker",
+            "Backend": backend_choice,
+            "Model": model_label,
+            "Scenario": f"{selected.name} ({selected.count} tests)",
+        }, "Evaluation Configuration")
+
+        if not confirm("Start evaluation?"):
+            print_info("Evaluation cancelled.")
+            return 0
+
+        if backend_choice == "unsloth":
+            return self._run_docker_unsloth_evaluation(model, selected)
+        return self._run_docker_vllm_evaluation(model, selected)
+
     def handle(self) -> int:
         """
         Execute evaluation workflow.
@@ -538,6 +893,10 @@ def handle(self) -> int:
             self.output(status)
             return 0
 
+        runtime = getattr(self.args, "runtime", "native") if self.args else "native"
+        if runtime == "docker":
+            return self._handle_docker_eval()
+
         print_header("EVALUATION", "Test your model's performance")
 
         # Step 1: Select backend
@@ -802,7 +1161,15 @@ def on_record_dashboard(record):
         passed = sum(1 for r in records if r.passed)
         return 0 if passed == len(records) else 1
 
-    def _run_subprocess_evaluation(self, backend: str, model: str, scenario) -> int:
+    def _run_subprocess_evaluation(
+        self,
+        backend: str,
+        model: str,
+        scenario,
+        *,
+        host: str | None = None,
+        port: int | None = None,
+    ) -> int:
         """
         Fallback: Run evaluation via subprocess.
 
@@ -834,6 +1201,10 @@ def _run_subprocess_evaluation(self, backend: str, model: str, scenario) -> int:
             "--output", str(output_json),
             "--markdown", str(output_md)
         ]
+        if host:
+            cmd.extend(["--host", host])
+        if port:
+            cmd.extend(["--port", str(port)])
 
         print_info(f"Running: {' '.join(cmd)}")
         print()
diff --git a/tuner/handlers/train_handler.py b/tuner/handlers/train_handler.py
index 82f26c69..202e06ef 100644
--- a/tuner/handlers/train_handler.py
+++ b/tuner/handlers/train_handler.py
@@ -10,6 +10,7 @@
 - All output is JSON formatted for programmatic parsing
 """
 
+import shutil
 import subprocess
 from argparse import Namespace
 from pathlib import Path
@@ -17,6 +18,13 @@
 
 from tuner.handlers.base import BaseHandler
 from tuner.backends.registry import TrainingBackendRegistry
+from tuner.utils.docker_runtime import (
+    CONTAINER_REPO_ROOT,
+    build_docker_run_command,
+    container_repo_path,
+    ensure_docker_cli,
+    resolve_training_image,
+)
 from tuner.ui import (
     print_menu,
     print_header,
@@ -75,6 +83,26 @@ def detect_platform() -> str | None:
         return None
 
 
+def detect_docker_platform() -> str | None:
+    """Detect Docker-capable NVIDIA hardware without importing host torch."""
+    nvidia_smi = shutil.which("nvidia-smi")
+    if not nvidia_smi:
+        return None
+
+    try:
+        result = subprocess.run(
+            [nvidia_smi, "--query-gpu=name", "--format=csv,noheader"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+        if result.returncode == 0 and result.stdout.strip():
+            return "rtx"
+    except Exception:
+        pass
+    return None
+
+
 class TrainHandler(BaseHandler):
     """
     Handler for training workflow.
@@ -122,11 +150,15 @@ def _get_training_status(self) -> dict:
         has_cuda = False
         has_mlx = False
 
+        runtime = getattr(self.args, "runtime", "native") if self.args else "native"
+
         try:
             import torch
             has_cuda = torch.cuda.is_available()
         except ImportError:
             pass
+        if runtime == "docker" and not has_cuda:
+            has_cuda = detect_docker_platform() == "rtx"
 
         try:
             import mlx.core as mx
@@ -148,13 +180,66 @@ def _get_training_status(self) -> dict:
                 "methods": ["mlx"]
             })
 
+        docker_ok, docker_error = ensure_docker_cli() if runtime == "docker" else (True, "")
+
         return {
             "command": "train",
             "status": "ready" if platforms else "no_platforms",
             "platforms": platforms,
-            "detected_platform": detect_platform(),
+            "detected_platform": detect_platform() if runtime != "docker" else (detect_platform() or detect_docker_platform()),
+            "runtime": runtime,
+            "docker_available": docker_ok,
+            "docker_error": docker_error or None,
         }
 
+    @staticmethod
+    def _script_name_for_config(config) -> str:
+        if config.method == "grpo" and config.config_path.name == "env_config.yaml":
+            return "train_env_grpo.py"
+        return f"train_{config.method}.py"
+
+    def _execute_docker_training(self, config) -> int:
+        try:
+            image, profile = resolve_training_image(
+                self.repo_root,
+                explicit_image=getattr(self.args, "docker_image", None),
+                requested_profile=getattr(self.args, "docker_profile", None),
+            )
+        except Exception as exc:
+            print_error(f"Failed to resolve Docker training image: {exc}")
+            return 1
+
+        script_name = self._script_name_for_config(config)
+        trainer_dir = container_repo_path(config.trainer_dir, self.repo_root)
+        command = [script_name]
+        if script_name == "train_env_grpo.py":
+            command.extend(["--config", container_repo_path(config.config_path, self.repo_root)])
+
+        cmd = build_docker_run_command(
+            image=image,
+            repo_root=self.repo_root,
+            workdir=trainer_dir,
+            entrypoint="python",
+            env={"PYTHONPATH": str(CONTAINER_REPO_ROOT)},
+            command=command,
+        )
+
+        profile_suffix = f" ({profile})" if profile else ""
+        print_info(f"Executing training in Docker with: {image}{profile_suffix}")
+        print()
+
+        try:
+            process = subprocess.Popen(cmd, cwd=str(self.repo_root))
+            return process.wait()
+        except KeyboardInterrupt:
+            print("\nTraining interrupted by user.")
+            if "process" in locals():
+                process.terminate()
+            return 130
+        except Exception as exc:
+            print_error(f"Docker training execution error: {exc}")
+            return 1
+
     def handle(self) -> int:
         """
         Execute training workflow.
@@ -170,10 +255,15 @@ def handle(self) -> int:
             self.output(status)
             return 0
 
+        runtime = getattr(self.args, "runtime", "native") if self.args else "native"
         print_header("TRAINING", "Select your platform and training method")
+        if runtime == "docker":
+            print_info("Using Docker runtime for local GPU execution.")
 
         # Step 1: Auto-detect or select platform
         platform_choice = detect_platform()
+        if runtime == "docker" and not platform_choice:
+            platform_choice = detect_docker_platform()
 
         if platform_choice:
             platform_name = "NVIDIA GPU (CUDA)" if platform_choice == "rtx" else "Apple Silicon (MLX)"
@@ -195,10 +285,19 @@ def handle(self) -> int:
             return 1
 
         # Step 3: Validate environment
-        is_valid, error = backend.validate_environment()
-        if not is_valid:
-            print_error(f"Environment validation failed: {error}")
-            return 1
+        if runtime == "docker":
+            if platform_choice != "rtx":
+                print_error("Docker runtime currently supports NVIDIA/CUDA local training only.")
+                return 1
+            docker_ok, docker_error = ensure_docker_cli()
+            if not docker_ok:
+                print_error(docker_error)
+                return 1
+        else:
+            is_valid, error = backend.validate_environment()
+            if not is_valid:
+                print_error(f"Environment validation failed: {error}")
+                return 1
 
         # Step 4: Select method (if multiple available)
         methods = backend.get_available_methods()
@@ -238,21 +337,20 @@ def handle(self) -> int:
             print_info("Training cancelled.")
             return 0
 
-        # Step 8: Execute training
-        # Mac uses system python3 (no conda needed), NVIDIA uses conda python
-        if platform_choice == "mac":
-            import shutil
-            python = shutil.which("python3") or "python3"
-        else:
-            python = self.get_conda_python()
-        print_info(f"Executing training with: {python}")
-        print()
-
         # Play training start animation (if available)
         if ASCIIMATICS_AVAILABLE:
             play_training_start(duration_frames=40)
 
-        exit_code = backend.execute(config, python)
+        if runtime == "docker":
+            exit_code = self._execute_docker_training(config)
+        else:
+            if platform_choice == "mac":
+                python = shutil.which("python3") or "python3"
+            else:
+                python = self.get_conda_python()
+            print_info(f"Executing training with: {python}")
+            print()
+            exit_code = backend.execute(config, python)
 
         if exit_code == 0:
             # Play celebration animation on success
diff --git a/tuner/utils/docker_runtime.py b/tuner/utils/docker_runtime.py
new file mode 100644
index 00000000..c20bcba6
--- /dev/null
+++ b/tuner/utils/docker_runtime.py
@@ -0,0 +1,206 @@
+"""
+Shared helpers for local Docker-backed runtimes.
+
+Location: tuner/utils/docker_runtime.py
+Purpose: Resolve local runtime images and build Docker commands
+Used by: docker_handler, train_handler, eval_handler
+"""
+
+from __future__ import annotations
+
+import subprocess
+import shutil
+from pathlib import Path
+from typing import Mapping, Optional, Sequence
+
+from tuner.backends.training.cloud.base_cloud import resolve_cloud_image
+from tuner.core.exceptions import CloudProviderError
+
+CONTAINER_REPO_ROOT = Path("/workspace/repo")
+BUCKET_HELPER_IMAGE = "toolset-training-bucket-helper:latest"
+BUCKET_HELPER_ENV_MARKER = "TUNER_BUCKET_HELPER_ACTIVE"
+
+
+def get_cloud_config_path(repo_root: Path) -> Path:
+    """Return the canonical cloud config path."""
+    return repo_root / "Trainers" / "cloud" / "cloud_config.yaml"
+
+
+def get_bucket_helper_dir(repo_root: Path) -> Path:
+    """Return the checked-in Docker helper directory for Buckets support."""
+    return repo_root / "docker" / "bucket-helper"
+
+
+def get_bucket_helper_dockerfile(repo_root: Path) -> Path:
+    """Return the Buckets helper Dockerfile path."""
+    return get_bucket_helper_dir(repo_root) / "Dockerfile"
+
+
+def ensure_docker_cli() -> tuple[bool, str]:
+    """Check whether Docker is available on the host."""
+    if shutil.which("docker") is None:
+        return False, "Docker CLI not found. Install Docker Desktop first."
+    return True, ""
+
+
+def bucket_helper_image_present(repo_root: Path, *, image: str = BUCKET_HELPER_IMAGE) -> bool:
+    """Return True when the local Buckets helper image already exists."""
+    result = subprocess.run(
+        ["docker", "images", "--format", "{{.Repository}}:{{.Tag}}", image],
+        cwd=str(repo_root),
+        capture_output=True,
+        text=True,
+    )
+    return result.returncode == 0 and bool((result.stdout or "").strip())
+
+
+def build_bucket_helper_image_command(
+    repo_root: Path,
+    *,
+    image: str = BUCKET_HELPER_IMAGE,
+) -> list[str]:
+    """Build the checked-in Buckets helper image."""
+    helper_dir = get_bucket_helper_dir(repo_root)
+    dockerfile = get_bucket_helper_dockerfile(repo_root)
+    return [
+        "docker",
+        "build",
+        "-t",
+        image,
+        "-f",
+        str(dockerfile),
+        str(helper_dir),
+    ]
+
+
+def build_bucket_helper_run_command(
+    repo_root: Path,
+    *,
+    helper_args: Sequence[str],
+    image: str = BUCKET_HELPER_IMAGE,
+    remove: bool = True,
+) -> list[str]:
+    """Run the Buckets helper image against the mounted repo checkout."""
+    cmd = ["docker", "run"]
+    if remove:
+        cmd.append("--rm")
+    cmd.extend(["-v", f"{repo_root}:/workspace/repo"])
+    cmd.extend(["-e", f"{BUCKET_HELPER_ENV_MARKER}=1"])
+    cmd.extend(["-e", f"PYTHONPATH={CONTAINER_REPO_ROOT}"])
+    cmd.extend(["--entrypoint", "python"])
+    cmd.append(image)
+    cmd.append(str((CONTAINER_REPO_ROOT / "tuner.py").as_posix()))
+    cmd.extend(helper_args)
+    return cmd
+
+
+def resolve_training_image(
+    repo_root: Path,
+    *,
+    explicit_image: Optional[str] = None,
+    requested_profile: Optional[str] = None,
+) -> tuple[str, Optional[str]]:
+    """Resolve the Docker image for local training."""
+    return resolve_cloud_image(
+        get_cloud_config_path(repo_root),
+        explicit_image=explicit_image,
+        requested_profile=requested_profile,
+        default_profile="stable",
+        fallback_image=None,
+        profile_section="docker_image_profiles",
+    )
+
+
+def resolve_eval_image(
+    repo_root: Path,
+    *,
+    runtime: str,
+    explicit_image: Optional[str] = None,
+    requested_profile: Optional[str] = None,
+) -> tuple[str, Optional[str]]:
+    """Resolve the Docker image for local evaluation."""
+    default_profile = "fast_vllm" if runtime == "vllm" else "stable_unsloth"
+    return resolve_cloud_image(
+        get_cloud_config_path(repo_root),
+        explicit_image=explicit_image,
+        requested_profile=requested_profile,
+        default_profile=default_profile,
+        fallback_image=None,
+        profile_section="eval_image_profiles",
+    )
+
+
+def container_repo_path(host_path: Path, repo_root: Path) -> str:
+    """Map a host repo-relative path into the mounted container path."""
+    resolved_host = host_path.resolve()
+    resolved_root = repo_root.resolve()
+    relative = resolved_host.relative_to(resolved_root)
+    return str((CONTAINER_REPO_ROOT / relative).as_posix())
+
+
+def build_docker_run_command(
+    *,
+    image: str,
+    repo_root: Path,
+    command: Sequence[str],
+    workdir: Optional[str] = None,
+    entrypoint: Optional[str] = None,
+    env: Optional[Mapping[str, str]] = None,
+    publish_ports: Optional[Sequence[tuple[int, int]]] = None,
+    gpus: bool = True,
+    name: Optional[str] = None,
+    detach: bool = False,
+    remove: bool = True,
+) -> list[str]:
+    """Build a `docker run` command with the repo mounted into the container."""
+    cmd = ["docker", "run"]
+    if detach:
+        cmd.append("-d")
+    if remove:
+        cmd.append("--rm")
+    if name:
+        cmd.extend(["--name", name])
+    if gpus:
+        cmd.extend(["--gpus", "all"])
+
+    cmd.extend(["-v", f"{repo_root}:/workspace/repo"])
+
+    if workdir:
+        cmd.extend(["-w", workdir])
+    if publish_ports:
+        for host_port, container_port in publish_ports:
+            cmd.extend(["-p", f"{host_port}:{container_port}"])
+    if env:
+        for key, value in env.items():
+            cmd.extend(["-e", f"{key}={value}"])
+    if entrypoint:
+        cmd.extend(["--entrypoint", entrypoint])
+
+    cmd.append(image)
+    cmd.extend(command)
+    return cmd
+
+
+def resolve_runtime_image(
+    repo_root: Path,
+    *,
+    command_name: str,
+    runtime: str,
+    explicit_image: Optional[str] = None,
+    requested_profile: Optional[str] = None,
+) -> tuple[str, Optional[str]]:
+    """Resolve the correct Docker image for a local command/runtime pair."""
+    if command_name == "train":
+        return resolve_training_image(
+            repo_root,
+            explicit_image=explicit_image,
+            requested_profile=requested_profile,
+        )
+    if command_name == "eval":
+        return resolve_eval_image(
+            repo_root,
+            runtime=runtime,
+            explicit_image=explicit_image,
+            requested_profile=requested_profile,
+        )
+    raise CloudProviderError(f"Unsupported local Docker runtime command: {command_name}")

From a84246afe02ef21ff4766e1dcb249f4c2ea4611f Mon Sep 17 00:00:00 2001
From: Professor Synapse <131487882+ProfSynapse@users.noreply.github.com>
Date: Fri, 10 Apr 2026 12:06:51 -0400
Subject: [PATCH 2/2] Document Docker-first local workflow

---
 README.md                 | 60 +++++++++++++++++++++++++++++----------
 docs/project-reference.md | 27 ++++++++++++------
 2 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index 3b301bd5..97df343c 100644
--- a/README.md
+++ b/README.md
@@ -62,24 +62,54 @@ That path can run train -> evaluation -> exact loss -> analysis -> recommendatio
 - `plan-hardware` for blind hardware planning using live HF Jobs flavors and pricing
 - `cloud-gym` to run the vault gym against a trained cloud run
 
-## Recent Updates
-
-- HF Jobs is now the canonical cloud path for train + evaluate, with `cloud-pipeline` handling the common workflow end-to-end.
-- Cloud evaluation writes structured artifacts back into the source run, including `evaluation_results.json`, `evaluation_results.md`, and `evaluation_lineage.json`.
-- Bucket-backed progress is a first-class UX: training and evaluation stream JSONL progress that the local dashboard can replay.
-- `run-experiment` now supports fuller cloud orchestration, including post-training evaluation and exact-loss stages as separate sibling jobs by default.
-- `plan-hardware` and `scripts/hf_jobs_hardware.py` make hardware selection less guessy by using the live HF Jobs hardware surface.
-- Evolutionary SFT is now supported in the cloud experiment path through checked-in specs and `cloud-pipeline --train-*` overrides.
+## Recent Updates
+
+- HF Jobs is now the canonical cloud path for train + evaluate, with `cloud-pipeline` handling the common workflow end-to-end.
+- Cloud evaluation writes structured artifacts back into the source run, including `evaluation_results.json`, `evaluation_results.md`, and `evaluation_lineage.json`.
+- Bucket-backed progress is a first-class UX: training and evaluation stream JSONL progress that the local dashboard can replay.
+- Local NVIDIA workflows now have a first-class Docker path with `python tuner.py docker bootstrap --docker-target all`, `train --runtime docker`, and `eval --runtime docker`.
+- Pulled HF bucket adapters under `toolset-training-artifacts/runs/...` are now discoverable in local Docker eval flows without manual `docker run` commands.
+- `run-experiment` now supports fuller cloud orchestration, including post-training evaluation and exact-loss stages as separate sibling jobs by default.
+- `plan-hardware` and `scripts/hf_jobs_hardware.py` make hardware selection less guessy by using the live HF Jobs hardware surface.
+- Evolutionary SFT is now supported in the cloud experiment path through checked-in specs and `cloud-pipeline --train-*` overrides.
 
 ## Quick Start
 
-| Path | How |
-|------|-----|
-| **Claude Code (recommended)** | Open repo in [Claude Code](https://docs.anthropic.com/en/docs/claude-code) and tell it what you want |
-| **HF Jobs cloud train + eval** | `python tuner.py cloud-pipeline --method sft --preset full` |
-| **Full cloud experiment bundle** | `python tuner.py run-experiment --experiment-spec Trainers/cloud/experiments/<spec>.yaml --yes` |
-| **Interactive CLI** | `./run.sh` (Linux/WSL) or `.\run.ps1` (PowerShell) |
-| **Beginner (no GPU)** | `Trainers/notebooks/sft_colab_beginner.ipynb` in Google Colab |
+| Path | How |
+|------|-----|
+| **Claude Code (recommended)** | Open repo in [Claude Code](https://docs.anthropic.com/en/docs/claude-code) and tell it what you want |
+| **Local Docker setup (Windows/NVIDIA)** | `python tuner.py docker bootstrap --docker-target all` |
+| **Local Docker train** | `python tuner.py train --runtime docker` |
+| **Local Docker eval** | `python tuner.py eval --runtime docker` |
+| **HF Jobs cloud train + eval** | `python tuner.py cloud-pipeline --method sft --preset full` |
+| **Full cloud experiment bundle** | `python tuner.py run-experiment --experiment-spec Trainers/cloud/experiments/<spec>.yaml --yes` |
+| **Interactive CLI** | `./run.sh` (Linux/WSL) or `.\run.ps1` (PowerShell) |
+| **Beginner (no GPU)** | `Trainers/notebooks/sft_colab_beginner.ipynb` in Google Colab |
+
+## Local Docker Workflow
+
+For Windows users with NVIDIA GPUs, the recommended local path is now Docker Desktop, not manual dependency wrangling inside the host training environment.
+
+```bash
+python tuner.py docker bootstrap --docker-target all
+python tuner.py train --runtime docker
+python tuner.py eval --runtime docker
+```
+
+What `docker bootstrap` does:
+- checks whether Docker Desktop is installed and the engine is reachable
+- prepares the local `unsloth`, `vllm`, and bucket-helper images
+- runs smoke tests so you can verify GPU containers work before debugging model code
+
+If you pull a cloud adapter locally, keep it inside the repo under `toolset-training-artifacts/runs/...` and it will show up in local eval discovery:
+
+```bash
+python tuner.py bucket pull \
+  --path runs/hf_jobs/sft/<run-prefix>/final_model \
+  --dest toolset-training-artifacts
+
+python tuner.py eval --runtime docker
+```
 
 ## Using with Claude Code
 
diff --git a/docs/project-reference.md b/docs/project-reference.md
index 9ab6b584..138536e3 100644
--- a/docs/project-reference.md
+++ b/docs/project-reference.md
@@ -7,8 +7,10 @@ Scripts, configuration files, environment variables, data patterns, and platform
 ## Key Bash Scripts
 
 **Root Level:**
-- `run.sh` / `run.ps1` - Main CLI wrappers (auto-activate conda)
-- `setup_env.sh` / `setup_env.ps1` - Environment setup
+- `run.sh` / `run.ps1` - Main CLI wrappers
+- `setup_env.sh` / `setup_env.ps1` - Legacy host-environment setup / fallback path
+- `python tuner.py docker bootstrap --docker-target all` - Preferred local Docker bootstrap for Windows + NVIDIA
+- `python tuner.py docker status|pull|smoke|build` - Local Docker runtime management
 
 **Trainers:**
 - `Trainers/rtx3090_sft/setup.sh` - Full SFT environment setup
@@ -64,6 +66,8 @@ OLLAMA_HOST=http://localhost:11434
 WANDB_API_KEY=your_wandb_key
 ```
 
+The CLI now auto-loads repo-root `.env`, including for Docker bootstrap and bucket helper flows.
+
 ---
 
 ## Data Patterns
@@ -116,8 +120,14 @@ tail -f sft_output_rtx3090/YYYYMMDD_HHMMSS/logs/training_latest.jsonl
 
 **Windows PowerShell:**
 - Use `.ps1` scripts
-- Some multiprocessing limitations
-- Prefer WSL2 if possible
+- Docker Desktop is now the preferred local GPU path
+- Start with `python tuner.py docker bootstrap --docker-target all`
+- Use `python tuner.py train --runtime docker` and `python tuner.py eval --runtime docker`
+- Keep the host conda path as a fallback, not the default recommendation
+
+**Local Docker Artifacts:**
+- Pulled cloud runs under `toolset-training-artifacts/runs/...` are treated as first-class local eval candidates
+- Use `python tuner.py bucket pull --path runs/hf_jobs/<method>/<run-prefix>/final_model --dest toolset-training-artifacts`
 
 ---
 
@@ -125,13 +135,14 @@ tail -f sft_output_rtx3090/YYYYMMDD_HHMMSS/logs/training_latest.jsonl
 
 | Task | Fully Auto | Needs User Input | Notes |
 |------|:----------:|:----------------:|-------|
-| Environment setup | X | | `./setup_env.sh` |
+| Docker bootstrap | X | | `python tuner.py docker bootstrap --docker-target all` |
+| Environment setup (legacy host path) | X | | `./setup_env.sh` |
 | Dependency install | X | | `./run.sh doctor --fix` |
 | List resources | X | | `./run.sh list *` |
 | Dataset validation | X | | `python3 .skills/synethetic-data-generation/scripts/validate_syngen.py` |
 | System diagnostics | X | | `./run.sh doctor` |
-| Training (SFT/KTO) | | X | Needs dataset choice, model size |
-| Evaluation | | X | Needs model path, scenario set |
+| Training (SFT/KTO/GRPO) | | X | Prefer `python tuner.py train --runtime docker` for local NVIDIA |
+| Evaluation | | X | Prefer `python tuner.py eval --runtime docker` for local NVIDIA |
 | Upload to HuggingFace | | X | Needs repo name, HF_TOKEN |
 | Dataset improvement | | X | Needs rubrics, line range |
 | Synthetic data gen | | X | Needs config, teacher model |
@@ -157,4 +168,4 @@ tail -f sft_output_rtx3090/YYYYMMDD_HHMMSS/logs/training_latest.jsonl
 - Run dry runs: `python train_sft.py --dry-run`
 - Validate first: `python3 .skills/synethetic-data-generation/scripts/validate_syngen.py dataset.jsonl`
 
-**Key Principle:** Use the bash scripts (`./run.sh`, `setup.sh`, etc.) rather than direct Python when possible - they handle environment setup, dependency checks, and provide better UX.
+**Key Principle:** For local NVIDIA GPU work, prefer the repo CLI plus Docker Desktop over hand-managed host dependencies. Start with `python tuner.py docker bootstrap --docker-target all`.