diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..7635a93
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,47 @@
+# pytorch/pytorch images ship with Python, pip, and PyTorch pre-installed,
+# so no NVIDIA registry auth or manual CUDA installation is needed.
+FROM pytorch/pytorch:2.9.1-cuda12.8-cudnn9-devel
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV PYTORCH_ALLOC_CONF=expandable_segments:True
+
+# LOW_VRAM_MODE=1: cast aggregator to bf16 on CPU before GPU transfer (~2-3 GB VRAM savings).
+# Per original authors: "no measurable quality change" for the aggregator trunk,
+# but the scale-phase RoPE computations are affected on very small GPUs.
+# Build with: docker build --build-arg LOW_VRAM_MODE=1 -t lingbot-map-demo-light .
+ARG LOW_VRAM_MODE=0
+ENV LOW_VRAM_MODE=${LOW_VRAM_MODE}
+
+# System dependencies (Python/pip already present in base image)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    wget \
+    curl \
+    libglib2.0-0 \
+    libgl1 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy source and install lingbot-map with visualization extras
+WORKDIR /app
+COPY . .
+RUN pip install --no-cache-dir -e ".[vis]" && \
+    pip install --no-cache-dir onnxruntime
+
+# FlashInfer for efficient KV-cache attention (falls back to SDPA if unavailable)
+RUN pip install --no-cache-dir flashinfer-python \
+    -i https://flashinfer.ai/whl/cu128/torch2.9/ || \
+    echo "WARNING: FlashInfer not installed — demo will use --use_sdpa fallback"
+
+RUN mkdir -p /model /data/images
+
+COPY docker/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+EXPOSE 8080
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/README.md b/README.md
index d73b1c6..0fc7f90 100644
--- a/README.md
+++ b/README.md
@@ -208,6 +208,151 @@ python demo.py --model_path /path/to/checkpoint.pt \
 
 `--camera_num_iterations` defaults to `4`; setting it to `1` skips three refinement passes in the camera head (and shrinks its KV cache by 4×).
 
+# 🐳 Docker
+
+Run the full demo — including model download, inference, and 3D viewer — without any local Python or CUDA setup.
+
+## Image Design
+
+| Layer | Detail |
+|:---|:---|
+| Base image | `pytorch/pytorch:2.9.1-cuda12.8-cudnn9-devel` (public, no auth required) |
+| Attention backend | [FlashInfer](https://github.com/flashinfer-ai/flashinfer) for paged KV-cache; auto-falls back to PyTorch SDPA if unavailable |
+| Visualisation | [viser](https://github.com/nerfstudio-project/viser) web viewer exposed on port **8080** |
+| Model resolution | `docker/entrypoint.sh` checks `/model/` at startup and auto-downloads from HuggingFace when no `.pt` file is found |
+| Data access | Images and model weights are provided via **volume mounts** — nothing user-specific is baked into the image |
+
+```
+lingbot-map-demo
+├── /app/              ← source code + built-in example scenes
+│   └── example/{church,oxford,university,loop}/
+├── /model/            ← mount a host directory here to cache the model
+└── /data/             ← mount your images or video here
+```
+
+## Prerequisites
+
+- [Docker](https://docs.docker.com/get-docker/) with the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
+- An NVIDIA GPU (CUDA 12.8 driver)
+
+## Build
+
+```bash
+git clone https://github.com/YoshiRi/lingbot-map-docker.git
+cd lingbot-map-docker
+docker build -t lingbot-map-demo .
+```
+
+## Try the Built-in Example Scenes
+
+The four example scenes from `example/` are already baked into the image at `/app/example/`.
+No extra data mount is needed — just provide a writable directory for the model cache.
+
+```bash
+# Church (outdoor, sky masking recommended)
+docker run --gpus all \
+  -v $(pwd)/model:/model \
+  -p 8080:8080 \
+  lingbot-map-demo \
+  --image_folder /app/example/church --mask_sky
+
+# Oxford
+docker run --gpus all \
+  -v $(pwd)/model:/model \
+  -p 8080:8080 \
+  lingbot-map-demo \
+  --image_folder /app/example/oxford --mask_sky
+
+# University
+docker run --gpus all \
+  -v $(pwd)/model:/model \
+  -p 8080:8080 \
+  lingbot-map-demo \
+  --image_folder /app/example/university --mask_sky
+
+# Loop (loop-closure trajectory, no sky masking needed)
+docker run --gpus all \
+  -v $(pwd)/model:/model \
+  -p 8080:8080 \
+  lingbot-map-demo \
+  --image_folder /app/example/loop
+```
+
+On **first run** the model is downloaded from HuggingFace and cached in `./model/`; subsequent runs start immediately.
+Open **http://localhost:8080** in your browser once inference completes.
+
+## Run with Your Own Images
+
+Place your images (`.jpg` / `.png`) in a local folder, then mount it:
+
+```bash
+docker run --gpus all \
+  -v /path/to/your/images:/data/images \
+  -v $(pwd)/model:/model \
+  -p 8080:8080 \
+  lingbot-map-demo \
+  --image_folder /data/images
+```
+
+## Run with a Video File
+
+```bash
+docker run --gpus all \
+  -v /path/to/video.mp4:/data/video.mp4 \
+  -v $(pwd)/model:/model \
+  -p 8080:8080 \
+  lingbot-map-demo \
+  --video_path /data/video.mp4 --fps 10
+```
+
+## docker-compose
+
+Edit `docker-compose.yml` to set your image folder and model variant, then:
+
+```bash
+# Put your images in ./images/
+docker compose up
+```
+
+## Environment Variables
+
+| Variable | Default | Description |
+|:---|:---|:---|
+| `HF_MODEL_NAME` | `lingbot-map` | Checkpoint to download: `lingbot-map`, `lingbot-map-long`, or `lingbot-map-stage1` |
+| `MODEL_PATH` | *(auto)* | Explicit path to a `.pt` file inside the container (skips auto-download) |
+| `MODEL_CACHE_DIR` | `/model` | Directory where the downloaded model is stored |
+| `HUGGING_FACE_HUB_TOKEN` | *(none)* | HuggingFace token for gated repos |
+
+## Tips
+
+**Use a pre-downloaded model** (avoids HuggingFace download at runtime):
+```bash
+docker run --gpus all \
+  -v /path/to/checkpoint.pt:/model/lingbot-map.pt \
+  -v $(pwd)/images:/data/images \
+  -p 8080:8080 \
+  lingbot-map-demo \
+  --image_folder /data/images
+```
+
+**Limited GPU memory** — add one or both flags:
+```bash
+  --num_scale_frames 2      # reduces activation peak of the initial scale phase
+  --keyframe_interval 6     # keeps only every 6th frame in KV cache
+```
+
+**Long sequences (> 3000 frames)** — use windowed mode:
+```bash
+  --mode windowed --window_size 128
+```
+
+**Faster inference** — reduce camera head iterations (small accuracy trade-off):
+```bash
+  --camera_num_iterations 1
+```
+
+---
+
 # 📜 License
 
 This project is released under the Apache License 2.0. See [LICENSE](LICENSE.txt) file for details.
diff --git a/demo.py b/demo.py
index a8c8636..faa1dc1 100644
--- a/demo.py
+++ b/demo.py
@@ -20,7 +20,9 @@
 
 import argparse
 import glob
+import json
 import os
+import struct
 import time
 
 # Must be set before `import torch` / any CUDA init. Reduces the reserved-vs-allocated
@@ -127,7 +129,7 @@ def load_model(args, device):
 
     if args.model_path:
         print(f"Loading checkpoint: {args.model_path}")
-        ckpt = torch.load(args.model_path, map_location=device, weights_only=False)
+        ckpt = torch.load(args.model_path, map_location="cpu", weights_only=False)
         state_dict = ckpt.get("model", ckpt)
         missing, unexpected = model.load_state_dict(state_dict, strict=False)
         if missing:
@@ -136,6 +138,13 @@ def load_model(args, device):
             print(f"  Unexpected keys: {len(unexpected)}")
         print("  Checkpoint loaded.")
 
+    if os.environ.get("LOW_VRAM_MODE", "0") == "1" and \
+            device.type == "cuda" and getattr(model, "aggregator", None) is not None:
+        cap = torch.cuda.get_device_capability(device)
+        dtype = torch.bfloat16 if cap[0] >= 8 else torch.float16
+        print(f"[LOW_VRAM_MODE] Casting aggregator to {dtype} on CPU before GPU transfer")
+        model.aggregator = model.aggregator.to(dtype=dtype)
+
     return model.to(device).eval()
 
 
@@ -227,6 +236,107 @@ def prepare_for_visualization(predictions, images=None):
     return vis_predictions
 
 
+# =============================================================================
+# Export
+# =============================================================================
+
+def export_results(predictions, images_cpu, output_dir, conf_threshold=0.0, ply_stride=1):
+    """Save inference results to output_dir.
+
+    Writes three files:
+      predictions.npz  – raw numpy arrays (depth, world_points, extrinsic, intrinsic, images)
+      pointcloud.ply   – merged, confidence-filtered point cloud (binary PLY)
+      cameras.json     – per-frame camera-to-world poses and intrinsics
+    """
+    os.makedirs(output_dir, exist_ok=True)
+
+    # ── NPZ ──────────────────────────────────────────────────────────────────
+    npz_path = os.path.join(output_dir, "predictions.npz")
+    save_dict = {}
+    for k, v in predictions.items():
+        if isinstance(v, torch.Tensor):
+            save_dict[k] = v.cpu().numpy()
+        elif isinstance(v, np.ndarray):
+            save_dict[k] = v
+    if isinstance(images_cpu, torch.Tensor):
+        images_arr = images_cpu.numpy()
+    elif isinstance(images_cpu, np.ndarray):
+        images_arr = images_cpu
+    else:
+        images_arr = None
+    if images_arr is not None:
+        if images_arr.ndim == 5 and images_arr.shape[0] == 1:
+            images_arr = images_arr[0]  # (1,S,C,H,W) → (S,C,H,W)
+        save_dict["images"] = images_arr
+    np.savez_compressed(npz_path, **save_dict)
+    print(f"  Saved predictions → {npz_path}")
+
+    # ── PLY ──────────────────────────────────────────────────────────────────
+    world_points = save_dict.get("world_points")   # (S, H, W, 3)
+    depth        = save_dict.get("depth")          # (S, H, W, 1) fallback
+    depth_conf   = save_dict.get("depth_conf")     # (S, H, W)
+    images_np    = save_dict.get("images")         # (S, 3, H, W)
+
+    if world_points is None and depth is not None:
+        from lingbot_map.utils.geometry import unproject_depth_map_to_point_map
+        world_points = unproject_depth_map_to_point_map(
+            depth, save_dict["extrinsic"], save_dict["intrinsic"]
+        )
+
+    if world_points is not None and images_np is not None:
+        S, H, W = world_points.shape[:3]
+        colors = images_np.transpose(0, 2, 3, 1)  # (S, H, W, 3)
+        pts_all, col_all = [], []
+        st = max(1, int(ply_stride))
+        for i in range(S):
+            pts = world_points[i, ::st, ::st].reshape(-1, 3)
+            col = (colors[i, ::st, ::st].reshape(-1, 3) * 255).clip(0, 255).astype(np.uint8)
+            valid = np.isfinite(pts).all(axis=1)
+            if depth_conf is not None:
+                valid &= depth_conf[i, ::st, ::st].reshape(-1) > conf_threshold
+            pts_all.append(pts[valid])
+            col_all.append(col[valid])
+        pts_merged = np.concatenate(pts_all, axis=0).astype(np.float32)
+        col_merged = np.concatenate(col_all, axis=0)
+        n = len(pts_merged)
+
+        ply_path = os.path.join(output_dir, "pointcloud.ply")
+        header = (
+            "ply\nformat binary_little_endian 1.0\n"
+            f"element vertex {n}\n"
+            "property float x\nproperty float y\nproperty float z\n"
+            "property uchar red\nproperty uchar green\nproperty uchar blue\n"
+            "end_header\n"
+        ).encode()
+        with open(ply_path, "wb") as f:
+            f.write(header)
+            # interleave xyz + rgb tightly
+            data = np.empty(n, dtype=[("x","f4"),("y","f4"),("z","f4"),
+                                      ("r","u1"),("g","u1"),("b","u1")])
+            data["x"], data["y"], data["z"] = pts_merged[:,0], pts_merged[:,1], pts_merged[:,2]
+            data["r"], data["g"], data["b"] = col_merged[:,0], col_merged[:,1], col_merged[:,2]
+            f.write(data.tobytes())
+        print(f"  Saved point cloud ({n:,} pts) → {ply_path}")
+    else:
+        print("  Skipping PLY export (world_points or images not available)")
+
+    # ── cameras.json ─────────────────────────────────────────────────────────
+    extrinsic  = save_dict.get("extrinsic")   # (S, 3, 4)  c2w
+    intrinsic  = save_dict.get("intrinsic")   # (S, 3, 3)
+    if extrinsic is not None and intrinsic is not None:
+        cameras = []
+        for i in range(len(extrinsic)):
+            cameras.append({
+                "frame": i,
+                "c2w":  extrinsic[i].tolist(),
+                "K":    intrinsic[i].tolist(),
+            })
+        cam_path = os.path.join(output_dir, "cameras.json")
+        with open(cam_path, "w") as f:
+            json.dump(cameras, f, indent=2)
+        print(f"  Saved cameras ({len(cameras)} frames) → {cam_path}")
+
+
 # =============================================================================
 # Main
 # =============================================================================
@@ -291,6 +401,12 @@ def main():
     parser.add_argument("--export_preprocessed", type=str, default=None,
                         help="Export stride-sampled, resized/cropped images to this folder")
 
+    # Output
+    parser.add_argument("--output_dir", type=str, default="/data/output",
+                        help="Directory for exported results (predictions.npz, pointcloud.ply, cameras.json)")
+    parser.add_argument("--no_viewer", action="store_true",
+                        help="Skip the interactive viewer (export only)")
+
     args = parser.parse_args()
     assert args.image_folder or args.video_path, \
         "Provide --image_folder or --video_path"
@@ -398,6 +514,15 @@ def main():
 
     predictions, images_cpu = postprocess(predictions, images_for_post)
 
+    # ── Export ───────────────────────────────────────────────────────────────
+    print(f"Exporting results to {args.output_dir} ...")
+    export_results(predictions, images_cpu, args.output_dir,
+                   conf_threshold=args.conf_threshold, ply_stride=args.downsample_factor)
+
+    if args.no_viewer:
+        print("Viewer skipped (--no_viewer). Done.")
+        return
+
     # ── Visualize ────────────────────────────────────────────────────────────
     try:
         from lingbot_map.vis import PointCloudViewer
diff --git a/docker-compose.lowvram.yml b/docker-compose.lowvram.yml
new file mode 100644
index 0000000..634091b
--- /dev/null
+++ b/docker-compose.lowvram.yml
@@ -0,0 +1,29 @@
+services:
+  lingbot-map:
+    build:
+      context: .
+      args:
+        LOW_VRAM_MODE: "1"               # cast aggregator to bf16 before GPU transfer
+    image: lingbot-map-demo-light
+    runtime: nvidia
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - HF_MODEL_NAME=lingbot-map
+      - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-}
+    volumes:
+      - ${IMAGE_HOST_PATH:-./example/oxford}:/data/images   # override: IMAGE_HOST_PATH=~/photos docker compose up
+      - ./model:/model                                      # model cache (downloaded on first run)
+      - ./output:/data/output                               # exported results (PLY / NPZ / JSON)
+    ports:
+      - "8080:8080"
+    command: >
+      --image_folder /data/images
+      --output_dir /data/output
+      --mode windowed
+      --window_size 16
+      --overlap_size 8
+      --num_scale_frames 4
+      --conf_threshold 2.0
+      --downsample_factor 4
+      --mask_sky
+      --point_size 0.005
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..0cac8ba
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,21 @@
+services:
+  lingbot-map:
+    build:
+      context: .
+      args:
+        LOW_VRAM_MODE: "0"
+    image: lingbot-map-demo
+    runtime: nvidia
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - HF_MODEL_NAME=lingbot-map        # lingbot-map | lingbot-map-long | lingbot-map-stage1
+      - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-}
+    volumes:
+      - ${IMAGE_HOST_PATH:-./example/oxford}:/data/images   # override: IMAGE_HOST_PATH=~/photos docker compose up
+      - ./model:/model                                      # model cache (downloaded on first run)
+      - ./output:/data/output                               # exported results (PLY / NPZ / JSON)
+    ports:
+      - "8080:8080"
+    command: >
+      --image_folder /data/images
+      --output_dir /data/output
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
new file mode 100644
index 0000000..da8d123
--- /dev/null
+++ b/docker/entrypoint.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+set -e
+
+MODEL_CACHE_DIR="${MODEL_CACHE_DIR:-/model}"
+HF_MODEL_NAME="${HF_MODEL_NAME:-lingbot-map}"
+MODEL_PATH="${MODEL_PATH:-}"
+
+# ── Resolve model path ────────────────────────────────────────────────────────
+if [ -z "$MODEL_PATH" ]; then
+    # Look for a matching .pt file already in the cache volume
+    MODEL_FILE=$(find "$MODEL_CACHE_DIR" -name "${HF_MODEL_NAME}.pt" -print -quit 2>/dev/null || true)
+    if [ -z "$MODEL_FILE" ]; then
+        MODEL_FILE=$(find "$MODEL_CACHE_DIR" -name "*.pt" -print -quit 2>/dev/null || true)
+    fi
+
+    if [ -z "$MODEL_FILE" ]; then
+        echo "Model not found in ${MODEL_CACHE_DIR}. Downloading '${HF_MODEL_NAME}' from HuggingFace..."
+        python - <<PYEOF
+from huggingface_hub import snapshot_download
+snapshot_download(
+    repo_id="robbyant/lingbot-map",
+    local_dir="${MODEL_CACHE_DIR}",
+    ignore_patterns=["*.pdf", "*.md", "*.txt", "*.gitattributes"],
+)
+print("Download complete.")
+PYEOF
+        MODEL_FILE=$(find "$MODEL_CACHE_DIR" -name "${HF_MODEL_NAME}.pt" -print -quit 2>/dev/null || \
+                     find "$MODEL_CACHE_DIR" -name "*.pt" -print -quit 2>/dev/null || true)
+    fi
+
+    MODEL_PATH="$MODEL_FILE"
+fi
+
+if [ -z "$MODEL_PATH" ] || [ ! -f "$MODEL_PATH" ]; then
+    echo "ERROR: No model .pt file found. Either:"
+    echo "  - Mount a pre-downloaded model: -v /path/to/model.pt:/model/lingbot-map.pt"
+    echo "  - Or let auto-download run (requires internet access)"
+    exit 1
+fi
+
+echo "Using model: ${MODEL_PATH}"
+
+# ── Check FlashInfer availability ─────────────────────────────────────────────
+EXTRA_ARGS=""
+python -c "import flashinfer" 2>/dev/null || EXTRA_ARGS="--use_sdpa"
+if [ -n "$EXTRA_ARGS" ]; then
+    echo "FlashInfer not available, using SDPA backend."
+fi
+
+# ── Launch demo ───────────────────────────────────────────────────────────────
+exec python /app/demo.py \
+    --model_path "$MODEL_PATH" \
+    $EXTRA_ARGS \
+    "$@"
diff --git a/lingbot_map/vis/point_cloud_viewer.py b/lingbot_map/vis/point_cloud_viewer.py
index a1d698c..87e9ab4 100644
--- a/lingbot_map/vis/point_cloud_viewer.py
+++ b/lingbot_map/vis/point_cloud_viewer.py
@@ -97,7 +97,7 @@ def __init__(
         self.size = size
         self.state_args = state_args
         self.server = viser.ViserServer(host="0.0.0.0", port=port)
-        self.server.gui.configure_theme(titlebar_content=None, control_layout="collapsible")
+        self.server.gui.configure_theme(titlebar_content=None, control_layout="fixed")
         self.device = device
         self.conf_list = conf_list
         self.vis_threshold = vis_threshold
@@ -415,8 +415,8 @@ def _(event: viser.GuiEvent) -> None:
             "Show Camera", initial_value=self.show_camera
         )
         self.vis_threshold_slider = self.server.gui.add_slider(
-            "Visibility Threshold", min=1.0, max=5.0, step=0.01,
-            initial_value=self.vis_threshold,
+            "Visibility Threshold", min=0.0, max=5.0, step=0.01,
+            initial_value=max(self.vis_threshold, 0.0),
         )
         self.camera_downsample_slider = self.server.gui.add_slider(
             "Camera Downsample Factor", min=1, max=50, step=1, initial_value=1
diff --git a/tools/analyze_predictions.py b/tools/analyze_predictions.py
new file mode 100644
index 0000000..cf3f9d4
--- /dev/null
+++ b/tools/analyze_predictions.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+"""Diagnose predictions.npz from demo.py.
+
+Usage:
+    python tools/analyze_predictions.py output/predictions.npz
+    python tools/analyze_predictions.py output/predictions.npz --cameras output/cameras.json
+    python tools/analyze_predictions.py output/predictions.npz --save report.png
+"""
+import argparse
+import json
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+
+
+def stat(arr, name):
+    if arr is None:
+        print(f"  {name}: missing")
+        return
+    finite = np.isfinite(arr)
+    n_inf = np.sum(~finite)
+    valid = arr[finite]
+    print(f"  {name}: shape={arr.shape}  dtype={arr.dtype}")
+    if valid.size:
+        print(f"    range=[{valid.min():.4g}, {valid.max():.4g}]  mean={valid.mean():.4g}  "
+              f"nan/inf={n_inf} ({100*n_inf/arr.size:.1f}%)")
+    else:
+        print(f"    ALL VALUES INVALID (nan/inf)")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("npz", help="Path to predictions.npz")
+    parser.add_argument("--cameras", default=None, help="Path to cameras.json (optional)")
+    parser.add_argument("--save", default=None, help="Save figure to file")
+    parser.add_argument("--conf_threshold", type=float, default=None,
+                        help="Confidence threshold to simulate (default: show distribution)")
+    args = parser.parse_args()
+
+    print(f"Loading {args.npz} ...")
+    d = np.load(args.npz, allow_pickle=False)
+    keys = list(d.keys())
+    print(f"Keys: {keys}\n")
+
+    world_points = d.get("world_points")   # (S, H, W, 3)
+    depth_conf   = d.get("depth_conf") if "depth_conf" in keys else (
+                   d.get("world_points_conf") if "world_points_conf" in keys else None)
+    images       = d.get("images")         # (S, 3, H, W) or (S, H, W, 3)
+    extrinsic    = d.get("extrinsic")      # (S, 3, 4)
+
+    print("=== Array stats ===")
+    stat(world_points, "world_points")
+    stat(depth_conf,   "depth_conf / world_points_conf")
+    stat(images,       "images")
+    stat(extrinsic,    "extrinsic")
+    print()
+
+    if world_points is None:
+        print("No world_points found — cannot analyse point cloud.")
+        sys.exit(1)
+
+    S, H, W = world_points.shape[:3]
+    pts_flat = world_points.reshape(-1, 3)
+    finite_mask = np.isfinite(pts_flat).all(axis=1)
+
+    print("=== Point cloud sanity ===")
+    print(f"  Total pixels : {len(pts_flat):,}")
+    print(f"  Finite points: {finite_mask.sum():,}  ({100*finite_mask.mean():.1f}%)")
+
+    if depth_conf is not None:
+        conf_flat = depth_conf.reshape(-1)
+        print(f"\n  Confidence stats (all):")
+        pcts = [0, 1, 5, 25, 50, 75, 90, 95, 99, 100]
+        vals = np.nanpercentile(conf_flat, pcts)
+        for p, v in zip(pcts, vals):
+            print(f"    p{p:3d}: {v:.4f}")
+
+        for thr in [0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]:
+            n_pass = int((conf_flat[finite_mask] > thr).sum())
+            print(f"  conf > {thr:.1f}: {n_pass:>8,} pts  ({100*n_pass/max(finite_mask.sum(),1):.1f}% of finite)")
+
+    pts_valid = pts_flat[finite_mask]
+    if len(pts_valid):
+        print(f"\n  X range: [{pts_valid[:,0].min():.4f}, {pts_valid[:,0].max():.4f}]")
+        print(f"  Y range: [{pts_valid[:,1].min():.4f}, {pts_valid[:,1].max():.4f}]")
+        print(f"  Z range: [{pts_valid[:,2].min():.4f}, {pts_valid[:,2].max():.4f}]")
+        dist = np.linalg.norm(pts_valid, axis=1)
+        print(f"  Distance from origin: min={dist.min():.4f}  median={np.median(dist):.4f}  "
+              f"p99={np.percentile(dist,99):.4f}  max={dist.max():.4f}")
+
+    if images is not None:
+        print(f"\n  Image pixel range: [{images.min():.4f}, {images.max():.4f}]")
+        if images.max() <= 1.01:
+            print("  → pixels appear to be in [0,1] — color mapping should be fine")
+        elif images.max() <= 255.5:
+            print("  → pixels appear to be in [0,255] — need /255 before color mapping (possible color bug!)")
+        else:
+            print("  WARNING: pixel values outside expected range — color mapping will be wrong")
+
+    # ── Figures ──────────────────────────────────────────────────────────────
+    cam_positions = None
+    if args.cameras:
+        with open(args.cameras) as f:
+            cams = json.load(f)
+        cam_positions = np.array([[c["c2w"][0][3], c["c2w"][1][3], c["c2w"][2][3]] for c in cams])
+
+    if extrinsic is not None and cam_positions is None:
+        cam_positions = extrinsic[:, :3, 3]  # translation from c2w
+
+    fig = plt.figure(figsize=(14, 10))
+    gs = gridspec.GridSpec(2, 3, figure=fig)
+
+    # Sample points for plotting (avoid OOM on huge arrays)
+    MAX_PLOT = 50_000
+    if depth_conf is not None:
+        conf_thr = args.conf_threshold if args.conf_threshold is not None else 2.0
+        sel = finite_mask & (depth_conf.reshape(-1) > conf_thr)
+    else:
+        sel = finite_mask
+    pts_sel = pts_flat[sel]
+    if len(pts_sel) > MAX_PLOT:
+        idx = np.random.choice(len(pts_sel), MAX_PLOT, replace=False)
+        pts_sel = pts_sel[idx]
+    print(f"\n  Plotting {len(pts_sel):,} points (conf>{args.conf_threshold if args.conf_threshold else 2.0:.1f})")
+
+    # Top-down view (X-Z)
+    ax1 = fig.add_subplot(gs[0, 0])
+    if len(pts_sel):
+        ax1.scatter(pts_sel[:, 0], pts_sel[:, 2], s=0.5, alpha=0.3, c="steelblue")
+    if cam_positions is not None:
+        ax1.plot(cam_positions[:, 0], cam_positions[:, 2], "r-", lw=1, label="cameras")
+        ax1.scatter(cam_positions[0, 0], cam_positions[0, 2], c="lime", s=60, zorder=5)
+        ax1.scatter(cam_positions[-1, 0], cam_positions[-1, 2], c="red", s=60, zorder=5)
+        ax1.legend(fontsize=7)
+    ax1.set_xlabel("X"); ax1.set_ylabel("Z")
+    ax1.set_title("Top-down (X-Z)")
+    ax1.set_aspect("equal")
+
+    # Side view (X-Y)
+    ax2 = fig.add_subplot(gs[0, 1])
+    if len(pts_sel):
+        ax2.scatter(pts_sel[:, 0], pts_sel[:, 1], s=0.5, alpha=0.3, c="steelblue")
+    if cam_positions is not None:
+        ax2.plot(cam_positions[:, 0], cam_positions[:, 1], "r-", lw=1)
+        ax2.scatter(cam_positions[0, 0], cam_positions[0, 1], c="lime", s=60, zorder=5)
+        ax2.scatter(cam_positions[-1, 0], cam_positions[-1, 1], c="red", s=60, zorder=5)
+    ax2.set_xlabel("X"); ax2.set_ylabel("Y")
+    ax2.set_title("Side (X-Y)")
+    ax2.set_aspect("equal")
+
+    # Front view (Y-Z)
+    ax3 = fig.add_subplot(gs[0, 2])
+    if len(pts_sel):
+        ax3.scatter(pts_sel[:, 2], pts_sel[:, 1], s=0.5, alpha=0.3, c="steelblue")
+    if cam_positions is not None:
+        ax3.plot(cam_positions[:, 2], cam_positions[:, 1], "r-", lw=1)
+    ax3.set_xlabel("Z"); ax3.set_ylabel("Y")
+    ax3.set_title("Front (Z-Y)")
+    ax3.set_aspect("equal")
+
+    # Confidence histogram
+    ax4 = fig.add_subplot(gs[1, 0])
+    if depth_conf is not None:
+        cf = depth_conf.reshape(-1)
+        cf_finite = cf[np.isfinite(cf)]
+        ax4.hist(cf_finite, bins=100, color="steelblue", alpha=0.7)
+        for thr in [1.0, 2.0, 3.0]:
+            ax4.axvline(thr, color="red", lw=1, linestyle="--", label=f"thr={thr}")
+        ax4.set_xlabel("confidence"); ax4.set_ylabel("count")
+        ax4.set_title("Confidence distribution")
+        ax4.legend(fontsize=7)
+    else:
+        ax4.text(0.5, 0.5, "No confidence data", ha="center", va="center", transform=ax4.transAxes)
+
+    # Distance histogram
+    ax5 = fig.add_subplot(gs[1, 1])
+    if len(pts_valid):
+        dist = np.linalg.norm(pts_valid, axis=1)
+        p99 = np.percentile(dist, 99)
+        ax5.hist(dist[dist < p99 * 2], bins=100, color="darkorange", alpha=0.7)
+        ax5.axvline(p99, color="red", lw=1, linestyle="--", label=f"p99={p99:.2f}")
+        ax5.set_xlabel("distance from origin"); ax5.set_ylabel("count")
+        ax5.set_title("Point distance distribution")
+        ax5.legend(fontsize=7)
+
+    # Point count per frame
+    ax6 = fig.add_subplot(gs[1, 2])
+    if depth_conf is not None:
+        conf_thr = args.conf_threshold if args.conf_threshold is not None else 2.0
+        counts = []
+        for i in range(S):
+            pts_i = world_points[i].reshape(-1, 3)
+            fin = np.isfinite(pts_i).all(axis=1)
+            cnf = depth_conf[i].reshape(-1) > conf_thr
+            counts.append(int((fin & cnf).sum()))
+        ax6.bar(range(S), counts, color="mediumseagreen", alpha=0.8)
+        ax6.set_xlabel("frame"); ax6.set_ylabel("valid points")
+        ax6.set_title(f"Valid points per frame (conf>{conf_thr:.1f})")
+    else:
+        ax6.text(0.5, 0.5, "No confidence data", ha="center", va="center", transform=ax6.transAxes)
+
+    plt.suptitle(f"LingBot-Map predictions analysis  (S={S}, H={H}, W={W})", fontsize=12)
+    plt.tight_layout()
+
+    if args.save:
+        plt.savefig(args.save, dpi=150)
+        print(f"\nSaved → {args.save}")
+    else:
+        plt.show()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/debug_reconstruction.py b/tools/debug_reconstruction.py
new file mode 100644
index 0000000..24388b1
--- /dev/null
+++ b/tools/debug_reconstruction.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+"""Debug reconstruction quality from predictions.npz.
+
+Checks:
+  1. Are world_points in front of or behind each camera?
+  2. Do reprojected points align with the original image?
+  3. Depth map plausibility per frame.
+
+Usage:
+    python tools/debug_reconstruction.py output/predictions.npz
+    python tools/debug_reconstruction.py output/predictions.npz --frames 0 10 50 100
+    python tools/debug_reconstruction.py output/predictions.npz --save debug/
+"""
+import argparse
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+
+
+def c2w_to_w2c(c2w_3x4):
+    """Invert c2w (3x4) → w2c (3x4)."""
+    R = c2w_3x4[:3, :3]
+    t = c2w_3x4[:3, 3]
+    R_inv = R.T
+    t_inv = -R_inv @ t
+    w2c = np.eye(4)
+    w2c[:3, :3] = R_inv
+    w2c[:3, 3] = t_inv
+    return w2c[:3, :]  # (3, 4)
+
+
+def project_to_camera(world_pts, w2c_3x4, K_3x3):
+    """world_pts (N,3) → pixel coords (N,2) and depth (N,)."""
+    R, t = w2c_3x4[:3, :3], w2c_3x4[:3, 3]
+    cam_pts = (R @ world_pts.T).T + t          # (N, 3) in camera space
+    depth_cam = cam_pts[:, 2]
+    fx, fy = K_3x3[0, 0], K_3x3[1, 1]
+    cx, cy = K_3x3[0, 2], K_3x3[1, 2]
+    u = fx * cam_pts[:, 0] / (cam_pts[:, 2] + 1e-8) + cx
+    v = fy * cam_pts[:, 1] / (cam_pts[:, 2] + 1e-8) + cy
+    return np.stack([u, v], axis=1), depth_cam
+
+
+def analyze_frame(frame_idx, world_points, extrinsic, intrinsic, images,
+                  depth_conf, conf_threshold=2.0):
+    """Return dict of diagnostics for one frame."""
+    pts = world_points[frame_idx].reshape(-1, 3)          # (H*W, 3)
+    c2w = np.eye(4); c2w[:3, :] = extrinsic[frame_idx]   # (4, 4)
+    w2c = c2w_to_w2c(extrinsic[frame_idx])
+    K   = intrinsic[frame_idx]
+    H, W = world_points.shape[1:3]
+
+    # Camera position and forward direction
+    cam_pos     = extrinsic[frame_idx][:3, 3]
+    cam_forward = extrinsic[frame_idx][:3, 2]  # 3rd column of R = Z axis
+
+    # Transform points to camera space
+    _, depth_cam = project_to_camera(pts, w2c, K)
+
+    finite = np.isfinite(pts).all(axis=1)
+    if depth_conf is not None:
+        conf_mask = depth_conf[frame_idx].reshape(-1) > conf_threshold
+    else:
+        conf_mask = np.ones(len(pts), dtype=bool)
+    valid = finite & conf_mask
+
+    n_valid      = valid.sum()
+    n_front      = (depth_cam[valid] > 0).sum()
+    n_behind     = (depth_cam[valid] <= 0).sum()
+    pct_front    = 100 * n_front  / max(n_valid, 1)
+    pct_behind   = 100 * n_behind / max(n_valid, 1)
+    depth_median = float(np.median(depth_cam[valid])) if n_valid else float("nan")
+    depth_p5     = float(np.percentile(depth_cam[valid], 5))  if n_valid else float("nan")
+    depth_p95    = float(np.percentile(depth_cam[valid], 95)) if n_valid else float("nan")
+
+    # Image for display (C,H,W) → (H,W,C), clip to [0,1]
+    img_display = None
+    if images is not None:
+        img = images[frame_idx]
+        if img.shape[0] == 3:          # (3,H,W) → (H,W,3)
+            img = img.transpose(1, 2, 0)
+        img_display = np.clip(img, 0, 1)
+
+    # Reprojection: project valid pts back and compare to pixel grid
+    uv, _ = project_to_camera(pts[valid], w2c, K)
+
+    return dict(
+        frame=frame_idx,
+        cam_pos=cam_pos,
+        cam_forward=cam_forward,
+        n_valid=n_valid,
+        pct_front=pct_front,
+        pct_behind=pct_behind,
+        depth_median=depth_median,
+        depth_p5=depth_p5,
+        depth_p95=depth_p95,
+        depth_cam_valid=depth_cam[valid],
+        world_pts_valid=pts[valid],
+        uv_reprojected=uv,
+        img_display=img_display,
+        H=H, W=W,
+    )
+
+
+def plot_frame(ax_row, diag):
+    """Fill one row of subplots for a single frame."""
+    ax_img, ax_depth, ax_reproj, ax_text = ax_row
+    f = diag["frame"]
+
+    # ── image ──────────────────────────────────────────────────────
+    if diag["img_display"] is not None:
+        ax_img.imshow(diag["img_display"])
+    ax_img.set_title(f"frame {f}: input image", fontsize=8)
+    ax_img.axis("off")
+
+    # ── depth histogram ────────────────────────────────────────────
+    dc = diag["depth_cam_valid"]
+    if len(dc):
+        p1, p99 = np.percentile(dc, 1), np.percentile(dc, 99)
+        ax_depth.hist(np.clip(dc, p1 * 1.5, p99 * 1.5), bins=80,
+                      color="steelblue" if diag["pct_front"] > 90 else "tomato",
+                      alpha=0.8)
+        ax_depth.axvline(0, color="red", lw=1.5, label="camera plane")
+        ax_depth.set_xlabel("depth in camera space", fontsize=7)
+        ax_depth.set_title(
+            f"front {diag['pct_front']:.0f}%  behind {diag['pct_behind']:.0f}%\n"
+            f"median={diag['depth_median']:.2f}  p5={diag['depth_p5']:.2f}  p95={diag['depth_p95']:.2f}",
+            fontsize=7)
+        ax_depth.legend(fontsize=6)
+    ax_depth.tick_params(labelsize=6)
+
+    # ── reprojection scatter ───────────────────────────────────────
+    uv = diag["uv_reprojected"]
+    H, W = diag["H"], diag["W"]
+    if diag["img_display"] is not None:
+        ax_reproj.imshow(diag["img_display"], alpha=0.5)
+    in_frame = ((uv[:, 0] >= 0) & (uv[:, 0] < W) &
+                (uv[:, 1] >= 0) & (uv[:, 1] < H))
+    MAX_PTS = 2000
+    if in_frame.sum():
+        idx = np.random.choice(in_frame.sum(),
+                               min(MAX_PTS, in_frame.sum()), replace=False)
+        ax_reproj.scatter(uv[in_frame][idx, 0], uv[in_frame][idx, 1],
+                          s=0.3, alpha=0.4, c="lime")
+    pct_in = 100 * in_frame.mean()
+    ax_reproj.set_xlim(0, W); ax_reproj.set_ylim(H, 0)
+    ax_reproj.set_title(f"reprojection  {pct_in:.0f}% in frame", fontsize=8)
+    ax_reproj.axis("off")
+
+    # ── text summary ───────────────────────────────────────────────
+    pos = diag["cam_pos"]
+    fwd = diag["cam_forward"]
+    txt = (f"frame {f}\n"
+           f"pos  [{pos[0]:.2f}, {pos[1]:.2f}, {pos[2]:.2f}]\n"
+           f"fwd  [{fwd[0]:.2f}, {fwd[1]:.2f}, {fwd[2]:.2f}]\n"
+           f"valid pts: {diag['n_valid']:,}\n"
+           f"front: {diag['pct_front']:.1f}%\n"
+           f"behind: {diag['pct_behind']:.1f}%")
+    color = "limegreen" if diag["pct_front"] > 90 else \
+            "orange"    if diag["pct_front"] > 50 else "red"
+    ax_text.text(0.05, 0.95, txt, transform=ax_text.transAxes,
+                 fontsize=8, va="top", fontfamily="monospace",
+                 bbox=dict(boxstyle="round", facecolor=color, alpha=0.3))
+    ax_text.axis("off")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("npz", help="Path to predictions.npz")
+    parser.add_argument("--frames", nargs="+", type=int, default=None,
+                        help="Frame indices to inspect (default: evenly spaced 5 frames)")
+    parser.add_argument("--conf_threshold", type=float, default=2.0)
+    parser.add_argument("--save", default=None,
+                        help="Directory to save debug figures")
+    args = parser.parse_args()
+
+    print(f"Loading {args.npz} ...")
+    d = np.load(args.npz, allow_pickle=False)
+
+    world_points = d["world_points"]          # (S, H, W, 3)
+    extrinsic    = d["extrinsic"]             # (S, 3, 4)  c2w
+    intrinsic    = d["intrinsic"]             # (S, 3, 3)
+    images       = d.get("images")            # (S, 3, H, W) or None
+    depth_conf   = (d.get("depth_conf") if "depth_conf" in d
+                    else d.get("world_points_conf") if "world_points_conf" in d
+                    else None)
+    chunk_scales = d["chunk_scales"] if "chunk_scales" in d else None  # (num_windows,)
+
+    S = world_points.shape[0]
+
+    # ── Window alignment scales ────────────────────────────────────
+    if chunk_scales is not None:
+        cs = chunk_scales.reshape(-1)
+        print(f"\n=== Window alignment scales (chunk_scales) ===")
+        print(f"  num windows : {len(cs)}")
+        print(f"  values      : {np.array2string(cs, precision=4, separator=', ')}")
+        bad = (cs < 0.01) | (cs > 100)
+        if bad.any():
+            print(f"  WARNING: windows {np.where(bad)[0].tolist()} have extreme scales "
+                  f"(clamped to 1e-3 or 1e3) — alignment failed for these windows")
+        else:
+            print(f"  All scales look reasonable (range [{cs.min():.4f}, {cs.max():.4f}])")
+
+    # ── Full-sequence front% scan ─────────────────────────────────
+    print(f"\n=== Front% scan (every 10th frame) ===")
+    stride = max(1, S // 50)
+    scan_frames = list(range(0, S, stride))
+    front_pcts = []
+    for fi in scan_frames:
+        pts = world_points[fi].reshape(-1, 3)
+        c2w = extrinsic[fi]
+        w2c = c2w_to_w2c(c2w)
+        _, dc = project_to_camera(pts, w2c, intrinsic[fi])
+        finite = np.isfinite(pts).all(axis=1)
+        conf_ok = depth_conf[fi].reshape(-1) > args.conf_threshold if depth_conf is not None else finite
+        valid = finite & conf_ok
+        pct = 100 * (dc[valid] > 0).sum() / max(valid.sum(), 1)
+        front_pcts.append(float(pct))
+
+    front_arr = np.array(front_pcts)
+    bad_frames = [scan_frames[i] for i, p in enumerate(front_pcts) if p < 50]
+    good_frames = [scan_frames[i] for i, p in enumerate(front_pcts) if p >= 90]
+    print(f"  Frames with <50% front (flipped): {bad_frames}")
+    print(f"  Frames with >=90% front (correct): count={len(good_frames)}/{len(scan_frames)}")
+
+    frames = args.frames or [0, S // 4, S // 2, 3 * S // 4, S - 1]
+    frames = [min(f, S - 1) for f in frames]
+    print(f"\nDetail frames: {frames}")
+
+    # ── Per-frame text summary ─────────────────────────────────────
+    print(f"\n{'frame':>6}  {'front%':>7}  {'behind%':>8}  {'depth_median':>12}  "
+          f"{'cam_pos':>30}  {'cam_fwd':>30}")
+    diags = []
+    for fi in frames:
+        diag = analyze_frame(fi, world_points, extrinsic, intrinsic,
+                             images, depth_conf, args.conf_threshold)
+        diags.append(diag)
+        fwd = diag["cam_forward"]
+        print(f"{fi:6d}  {diag['pct_front']:7.1f}  {diag['pct_behind']:8.1f}  "
+              f"{diag['depth_median']:12.3f}  "
+              f"[{diag['cam_pos'][0]:5.2f},{diag['cam_pos'][1]:5.2f},{diag['cam_pos'][2]:5.2f}]  "
+              f"[{fwd[0]:5.2f},{fwd[1]:5.2f},{fwd[2]:5.2f}]")
+
+    # ── Diagnosis ─────────────────────────────────────────────────
+    print()
+    avg_front = np.mean([d["pct_front"] for d in diags])
+    pct_bad_windows = 100 * len(bad_frames) / max(len(scan_frames), 1)
+    if avg_front > 90:
+        print("✓ Points are mostly in front of cameras — geometry looks correct.")
+        print("  Blank viewer / bad PLY is likely a density/scale issue, not a logic bug.")
+    elif pct_bad_windows > 10 and chunk_scales is not None and ((chunk_scales.reshape(-1) < 0.01).any()):
+        print("✗ Window scale clamped to minimum — depth-ratio alignment failed.")
+        print("  Likely cause: near-zero or negative depth in overlap frames.")
+        print("  Fix: increase --overlap_size or --num_scale_frames.")
+    elif pct_bad_windows > 10:
+        print("✗ Many frames have points behind cameras.")
+        print("  Pattern: check if bad frames cluster at window boundaries.")
+        print("  If clustered → windowed stitching issue (overlap too small).")
+        print("  If scattered → model output inconsistency (try --mode streaming).")
+    else:
+        print("△ Partial front/behind mix — possible coordinate convention mismatch.")
+
+    # ── Figures ───────────────────────────────────────────────────
+    n = len(diags)
+    fig, axes = plt.subplots(n, 4, figsize=(16, 4 * n),
+                             gridspec_kw={"width_ratios": [2, 2, 2, 1]})
+    if n == 1:
+        axes = [axes]
+    for row, diag in zip(axes, diags):
+        plot_frame(row, diag)
+
+    plt.suptitle("Reconstruction debug: per-frame geometry check", fontsize=11)
+    plt.tight_layout()
+
+    if args.save:
+        os.makedirs(args.save, exist_ok=True)
+        path = os.path.join(args.save, "debug_frames.png")
+        plt.savefig(path, dpi=120)
+        print(f"\nSaved → {path}")
+    else:
+        plt.show()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/visualize_cameras.py b/tools/visualize_cameras.py
new file mode 100644
index 0000000..a790fa1
--- /dev/null
+++ b/tools/visualize_cameras.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+"""Visualize camera trajectory from cameras.json produced by demo.py.
+
+Usage:
+    python tools/visualize_cameras.py output/cameras.json
+    python tools/visualize_cameras.py output/cameras.json --save trajectory.png
+"""
+import argparse
+import json
+import numpy as np
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D  # noqa: F401
+
+
+def draw_camera(ax, c2w, scale=0.05, color="steelblue"):
+    """Draw a small camera frustum: 3 axes + pyramid outline."""
+    origin = c2w[:3, 3]
+    # Unit axes in camera space (right=x, up=-y, forward=z)
+    axes = c2w[:3, :3] @ np.array([[1, 0, 0], [0, -1, 0], [0, 0, 1]], dtype=float).T
+    colors = ["red", "green", color]
+    labels = ["X", "Y", "Z"]
+    for i, (col, lbl) in enumerate(zip(colors, labels)):
+        tip = origin + axes[:, i] * scale
+        ax.plot(*zip(origin, tip), color=col, linewidth=1)
+
+    # Frustum corners (simplified: just 4 corner rays)
+    corners_cam = np.array([[1, 1, 2], [-1, 1, 2], [-1, -1, 2], [1, -1, 2]], dtype=float) * scale * 0.5
+    corners_world = (c2w[:3, :3] @ corners_cam.T).T + origin
+    for corner in corners_world:
+        ax.plot(*zip(origin, corner), color=color, linewidth=0.5, alpha=0.5)
+    # Close the frustum rectangle
+    rect = np.vstack([corners_world, corners_world[0]])
+    ax.plot(rect[:, 0], rect[:, 1], rect[:, 2], color=color, linewidth=0.5, alpha=0.5)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("cameras_json", help="Path to cameras.json")
+    parser.add_argument("--save", default=None, help="Save figure to file instead of showing")
+    parser.add_argument("--skip", type=int, default=1, help="Draw every N-th camera (default 1 = all)")
+    parser.add_argument("--frustum_scale", type=float, default=None, help="Frustum size (auto if unset)")
+    args = parser.parse_args()
+
+    with open(args.cameras_json) as f:
+        cameras = json.load(f)
+
+    positions = np.array([c["c2w"][i][3] for c in cameras for i in range(3)]).reshape(-1, 3)
+    positions = np.array([[c["c2w"][0][3], c["c2w"][1][3], c["c2w"][2][3]] for c in cameras])
+
+    span = np.max(positions, axis=0) - np.min(positions, axis=0)
+    scale = float(np.max(span)) * 0.04 if args.frustum_scale is None else args.frustum_scale
+
+    fig = plt.figure(figsize=(10, 8))
+    ax = fig.add_subplot(111, projection="3d")
+
+    # Trajectory line
+    ax.plot(positions[:, 0], positions[:, 1], positions[:, 2],
+            color="gray", linewidth=1, alpha=0.6, label="trajectory")
+
+    # Start / end markers
+    ax.scatter(*positions[0], color="lime", s=80, zorder=5, label="start")
+    ax.scatter(*positions[-1], color="red", s=80, zorder=5, label="end")
+
+    # Camera frustums
+    for i, cam in enumerate(cameras):
+        if i % args.skip != 0:
+            continue
+        c2w = np.array(cam["c2w"])            # (3, 4)
+        c2w_4x4 = np.eye(4)
+        c2w_4x4[:3, :] = c2w
+        t = i / max(len(cameras) - 1, 1)
+        color = plt.cm.cool(t)
+        draw_camera(ax, c2w_4x4, scale=scale, color=color)
+
+    ax.set_xlabel("X")
+    ax.set_ylabel("Y")
+    ax.set_zlabel("Z")
+    ax.set_title(f"Camera trajectory  ({len(cameras)} frames)")
+    ax.legend()
+
+    # Equal aspect ratio
+    center = positions.mean(axis=0)
+    half = float(np.max(span)) * 0.55
+    ax.set_xlim(center[0] - half, center[0] + half)
+    ax.set_ylim(center[1] - half, center[1] + half)
+    ax.set_zlim(center[2] - half, center[2] + half)
+
+    plt.tight_layout()
+    if args.save:
+        plt.savefig(args.save, dpi=150)
+        print(f"Saved → {args.save}")
+    else:
+        plt.show()
+
+
+if __name__ == "__main__":
+    main()