diff --git a/README.md b/README.md
index f7c2f11..f92c1c5 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # QuickSilver Pro CLI
 
-`qsp` — a command-line client for [QuickSilver Pro](https://quicksilverpro.io), an OpenAI-compatible inference API for top open-source LLMs (DeepSeek V3, DeepSeek R1, Qwen 3.5) priced 20% below OpenRouter / Together / Fireworks.
+`qsp` — a command-line client for [QuickSilver Pro](https://quicksilverpro.io), one OpenAI-compatible API for top LLMs (DeepSeek, Qwen, Kimi, Gemini) **and** FLUX text-to-image — billed to a single balance. Open-source chat models run 20% below OpenRouter / Together / Fireworks.
 
 Designed to be **AI-agent friendly**: every command accepts `--json` for structured output, exit codes are reliable, and the API surface is intentionally small.
 
@@ -33,11 +33,12 @@ Python 3.9+. Also exports itself as `quicksilverpro` if you prefer the long name
 ## Quick start
 
 ```bash
-qsp init                     # opens dashboard to get a key, stores it locally
-qsp chat "Write me a haiku"  # one-shot streaming chat (deepseek-v3 by default)
-qsp balance                  # current credits
-qsp models                   # supported models with prices & context length
-qsp status                   # live per-model latency
+qsp init                          # opens dashboard to get a key, stores it locally
+qsp chat "Write me a haiku"       # one-shot streaming chat (deepseek-v4-flash by default)
+qsp image "a fox in the snow"     # text-to-image, saves a file (flux.2-pro by default)
+qsp balance                       # current credits
+qsp models                        # supported models with prices & context length
+qsp status                        # live per-model latency
 ```
 
 ---
@@ -52,6 +53,7 @@ qsp status                   # live per-model latency
 | `qsp balance [--json]` | Credit balance + lifetime spend |
 | `qsp models [--json]` | Available models + pricing + context length |
 | `qsp chat "PROMPT" [-m MODEL] [-s SYS] [--max-tokens N] [--temperature F] [--no-stream] [--json]` | One-shot completion, streams to stdout by default |
+| `qsp image "PROMPT" [-m MODEL] [-o FILE] [--size WxH] [-n N] [--json]` | Text-to-image; saves to a file (`flux.2-pro` by default) |
 | `qsp usage [-n 10] [--json]` | Recent calls + aggregate per-model |
 | `qsp status [--json]` | Live health of API + per-model probes |
 | `qsp keys list [--json]` | Your API keys |
@@ -69,6 +71,8 @@ Every command supports `--json` and prints OpenAI-shaped JSON to stdout with err
 qsp models --json | jq '.[].id'
 qsp usage --json  | jq '.totals.cost'
 qsp chat "Summarize: $DOCUMENT" --json --no-stream | jq -r '.choices[0].message.content'
+qsp image "a fox in the snow" -o fox.png        # writes fox.png
+qsp image "a fox in the snow" --json | jq -r '.data[0].b64_json' | base64 -d > fox.png
 ```
 
 Exit codes: `0` success · `1` remote/operational error · `2` usage / auth error.
@@ -83,8 +87,9 @@ Key stored at `~/.config/quicksilverpro/config.json` (chmod 600). Override with:
 - `QSP_API_URL` — default `https://api.quicksilverpro.io/v1`
 - `QSP_AUTH_URL` — default `https://pay.quicksilverpro.io`
 - `QSP_MODEL` — default model for `qsp chat`
+- `QSP_IMAGE_MODEL` — default model for `qsp image`
 - `QSP_CONFIG_DIR` — where to store config
-- `QSP_HTTP_TIMEOUT` — seconds, default 60
+- `QSP_HTTP_TIMEOUT` — seconds, default 60 (image requests default to 180)
 
 ---
 
diff --git a/pyproject.toml b/pyproject.toml
index a6f88d4..0317d1b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,15 +4,16 @@ build-backend = "hatchling.build"
 
 [project]
 name = "quicksilverpro"
-version = "0.1.2"
-description = "CLI for QuickSilver Pro — OpenAI-compatible API for DeepSeek V3, DeepSeek R1, and Qwen 3.5 at 20% below resellers."
+version = "0.2.0"
+description = "CLI for QuickSilver Pro — one OpenAI-compatible API for DeepSeek, Qwen, Kimi, Gemini chat and FLUX image generation."
 readme = "README.md"
 requires-python = ">=3.9"
 license = { text = "MIT" }
 authors = [{ name = "MachineFi Inc.", email = "hello@quicksilverpro.io" }]
 keywords = [
-  "openai", "deepseek", "qwen", "llm", "inference", "api",
-  "chatgpt", "ai", "cli", "openrouter-alternative", "quicksilverpro",
+  "openai", "deepseek", "qwen", "kimi", "gemini", "flux",
+  "llm", "inference", "api", "chatgpt", "ai", "cli",
+  "image-generation", "text-to-image", "openrouter-alternative", "quicksilverpro",
 ]
 classifiers = [
   "Development Status :: 4 - Beta",
diff --git a/src/quicksilverpro/__init__.py b/src/quicksilverpro/__init__.py
index af6a710..83ffd8b 100644
--- a/src/quicksilverpro/__init__.py
+++ b/src/quicksilverpro/__init__.py
@@ -3,4 +3,4 @@
 See https://quicksilverpro.io for docs. `qsp --help` for quick reference.
 """
 
-__version__ = "0.1.2"
+__version__ = "0.2.0"
diff --git a/src/quicksilverpro/cli.py b/src/quicksilverpro/cli.py
index d3d92bf..f3ffa20 100644
--- a/src/quicksilverpro/cli.py
+++ b/src/quicksilverpro/cli.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import base64
 import json
 import os
 import sys
@@ -33,8 +34,12 @@
 DEFAULT_API_URL  = os.environ.get("QSP_API_URL",  "https://api.quicksilverpro.io/v1")
 DEFAULT_AUTH_URL = os.environ.get("QSP_AUTH_URL", "https://pay.quicksilverpro.io")
 DEFAULT_APP_URL  = os.environ.get("QSP_APP_URL",  "https://quicksilverpro.io")
-DEFAULT_MODEL    = os.environ.get("QSP_MODEL",    "deepseek-v3")
+DEFAULT_MODEL    = os.environ.get("QSP_MODEL",    "deepseek-v4-flash")
+DEFAULT_IMAGE_MODEL = os.environ.get("QSP_IMAGE_MODEL", "flux.2-pro")
 HTTP_TIMEOUT     = float(os.environ.get("QSP_HTTP_TIMEOUT", "60"))
+# Image generation can be slower than chat (no token streaming to mask latency),
+# so give it a roomier default unless the user has pinned QSP_HTTP_TIMEOUT.
+IMAGE_TIMEOUT    = float(os.environ.get("QSP_HTTP_TIMEOUT", "180"))
 
 CONFIG_DIR = Path(os.environ.get("QSP_CONFIG_DIR",
                                  Path.home() / ".config" / "quicksilverpro"))
@@ -266,21 +271,57 @@ def balance(as_json: bool) -> None:
 
 # ────────────────────────── qsp models ──────────────────────────
 
-# Hardcoded so `qsp models` works before the user has signed in. Keep in sync
-# with the backend; tests in CI should catch drift.
+# Per-image prices for image-generation models. The live /models endpoint does
+# not expose per-image pricing (its schema is per-token), so the CLI carries it
+# here so `qsp models` can show image costs whether or not the user is signed in.
+_IMAGE_PRICES: dict[str, float] = {
+    "flux.2-pro":     0.027,
+    "flux.1-schnell": 0.0025,
+}
+
+# Hardcoded so `qsp models` works before the user has signed in. A curated view
+# of the lineup — once signed in, `qsp models` shows the full live catalog
+# (incl. Qwen/Kimi/Gemini variants). Keep prices in sync with the backend; CI
+# smoke-tests this path. Prices verified 2026-05-26.
 _MODELS_FALLBACK: list[dict] = [
+    {"id": "deepseek-v4-flash", "object": "model", "owned_by": "quicksilverpro",
+     "context_length": 1048576,
+     "pricing": {"prompt": "0.0000000800", "completion": "0.0000001600"},
+     "best_for": "fast, cheap default · 1M context"},
+    {"id": "deepseek-v4-pro", "object": "model", "owned_by": "quicksilverpro",
+     "context_length": 1048576,
+     "pricing": {"prompt": "0.0000003480", "completion": "0.0000006960"},
+     "best_for": "frontier reasoning + coding · 1M context"},
     {"id": "deepseek-v3", "object": "model", "owned_by": "quicksilverpro",
      "context_length": 131072,
-     "pricing": {"prompt": "0.0000002400", "completion": "0.0000007000"},
-     "best_for": "chat, coding, structured output"},
+     "pricing": {"prompt": "0.0000001600", "completion": "0.0000006160"},
+     "best_for": "balanced chat, coding, structured output"},
     {"id": "deepseek-r1", "object": "model", "owned_by": "quicksilverpro",
      "context_length": 131072,
-     "pricing": {"prompt": "0.0000004000", "completion": "0.0000017000"},
-     "best_for": "math, multi-step reasoning, logic"},
-    {"id": "qwen3.5-35b", "object": "model", "owned_by": "quicksilverpro",
+     "pricing": {"prompt": "0.0000005600", "completion": "0.0000020000"},
+     "best_for": "deep reasoning, math, logic"},
+    {"id": "qwen3.7-max", "object": "model", "owned_by": "quicksilverpro",
+     "context_length": 1048576,
+     "pricing": {"prompt": "0.0000020000", "completion": "0.0000060000"},
+     "best_for": "flagship Qwen · agentic, long-context"},
+    {"id": "qwen3.6-plus", "object": "model", "owned_by": "quicksilverpro",
+     "context_length": 1048576,
+     "pricing": {"prompt": "0.0000002600", "completion": "0.0000015600"},
+     "best_for": "strong general-purpose · 1M context"},
+    {"id": "qwen3.6-35b", "object": "model", "owned_by": "quicksilverpro",
      "context_length": 262144,
-     "pricing": {"prompt": "0.0000001300", "completion": "0.0000010000"},
-     "best_for": "long-context RAG, summarization (thinking model)"},
+     "pricing": {"prompt": "0.0000001200", "completion": "0.0000008000"},
+     "best_for": "efficient long-context RAG (thinking model)"},
+    {"id": "kimi-k2.6", "object": "model", "owned_by": "quicksilverpro",
+     "context_length": 256000,
+     "pricing": {"prompt": "0.0000005840", "completion": "0.0000027900"},
+     "best_for": "agentic, tool use, long-context"},
+    {"id": "flux.2-pro", "object": "model", "owned_by": "quicksilverpro",
+     "price_per_image": 0.027,
+     "best_for": "high-fidelity text-to-image"},
+    {"id": "flux.1-schnell", "object": "model", "owned_by": "quicksilverpro",
+     "price_per_image": 0.0025,
+     "best_for": "fast, cheap text-to-image"},
 ]
 
 
@@ -308,22 +349,50 @@ def models(as_json: bool) -> None:
     _emit(data, as_json=as_json, table_fn=_print_models)
 
 
+def _is_image_model(m: dict) -> bool:
+    """Image-gen models bill per image, not per token. Detect via an explicit
+    per-image price or the known-id allowlist — deliberately NOT by "missing
+    token pricing", which would misfile any sparse/new chat entry (or a
+    token-priced model like gemini-*-image) into the image table. An image
+    model we haven't catalogued yet just shows in the chat table with '—'
+    prices until it's added to _IMAGE_PRICES."""
+    return m.get("price_per_image") is not None or m.get("id") in _IMAGE_PRICES
+
+
+def _image_price(m: dict) -> float | None:
+    p = m.get("price_per_image")
+    return p if p is not None else _IMAGE_PRICES.get(m.get("id", ""))
+
+
 def _print_models(rows: list[dict]) -> None:
-    t = Table(show_lines=False)
-    t.add_column("id", style="bold")
-    t.add_column("context", justify="right")
-    t.add_column("prompt $/M", justify="right")
-    t.add_column("completion $/M", justify="right")
-    for m in rows:
-        p = m.get("pricing") or {}
-        ctx = m.get("context_length")
-        t.add_row(
-            m.get("id", ""),
-            f"{ctx:,}" if ctx else "—",
-            f"{float(p.get('prompt', 0)) * 1_000_000:.2f}" if p else "—",
-            f"{float(p.get('completion', 0)) * 1_000_000:.2f}" if p else "—",
-        )
-    _out.print(t)
+    chat = [m for m in rows if not _is_image_model(m)]
+    imgs = [m for m in rows if _is_image_model(m)]
+
+    if chat:
+        t = Table(show_lines=False)
+        t.add_column("id", style="bold")
+        t.add_column("context", justify="right")
+        t.add_column("prompt $/M", justify="right")
+        t.add_column("completion $/M", justify="right")
+        for m in chat:
+            p = m.get("pricing") or {}
+            ctx = m.get("context_length")
+            t.add_row(
+                m.get("id", ""),
+                f"{ctx:,}" if ctx else "—",
+                f"{float(p.get('prompt', 0)) * 1_000_000:.2f}" if p else "—",
+                f"{float(p.get('completion', 0)) * 1_000_000:.2f}" if p else "—",
+            )
+        _out.print(t)
+
+    if imgs:
+        t = Table(show_lines=False, title="Image generation")
+        t.add_column("id", style="bold")
+        t.add_column("$/image", justify="right")
+        for m in imgs:
+            price = _image_price(m)
+            t.add_row(m.get("id", ""), f"${price:.4f}" if price is not None else "—")
+        _out.print(t)
 
 
 # ────────────────────────── qsp chat ──────────────────────────
@@ -449,6 +518,155 @@ def _chat_stream(key: str, body: dict) -> None:
         )
 
 
+# ────────────────────────── qsp image ──────────────────────────
+
+# Magic-byte → extension table for sniffing the returned image format when the
+# server doesn't tell us (it usually does via `output_format`).
+def _img_ext(raw: bytes, fmt_hint: str | None) -> str:
+    if fmt_hint:
+        h = fmt_hint.lower().lstrip(".")
+        if h in ("jpg", "jpeg"):  return "jpg"
+        if h in ("png", "webp", "gif"):  return h
+    if raw[:3] == b"\xff\xd8\xff":              return "jpg"
+    if raw[:8] == b"\x89PNG\r\n\x1a\n":         return "png"
+    if raw[:4] == b"RIFF" and raw[8:12] == b"WEBP":  return "webp"
+    if raw[:4] in (b"GIF8",):                   return "gif"
+    return "png"  # safe default
+
+
+def _image_bytes(item: dict) -> bytes:
+    """An image item carries either inline base64 or a URL. Prefer base64 (what
+    the API returns by default); otherwise download the URL. Raises ValueError
+    on a malformed item or bad payload so callers can fail cleanly rather than
+    tracebacking on an AttributeError / silently writing garbage."""
+    if not isinstance(item, dict):
+        raise ValueError("image item was not an object")
+    b64 = item.get("b64_json")
+    if b64:
+        try:
+            # Tolerate MIME-style line wrapping; reject non-alphabet garbage
+            # (validate=True), which would otherwise decode to empty/garbage.
+            cleaned = "".join(b64.split()) if isinstance(b64, str) else b64
+            raw = base64.b64decode(cleaned, validate=True)
+        except (ValueError, TypeError) as e:
+            raise ValueError(f"could not decode base64 image data ({e})") from e
+        if not raw:
+            raise ValueError("decoded image was empty")
+        return raw
+    url = item.get("url")
+    if url:
+        with httpx.Client(timeout=HTTP_TIMEOUT,
+                          headers={"User-Agent": f"quicksilverpro-cli/{__version__}"},
+                          follow_redirects=True) as c:
+            r = c.get(url)
+        r.raise_for_status()
+        return r.content
+    raise ValueError("image response had neither b64_json nor url")
+
+
+def _image_stem(output: str | None) -> Path:
+    """The shared filename base for an invocation: the user's path minus its
+    extension, or a single timestamped default (computed once so every image in
+    a multi-image run shares the same base)."""
+    return Path(output).with_suffix("") if output else Path(f"qsp-image-{int(time.time())}")
+
+
+def _image_path(stem: Path, index: int, count: int, ext: str) -> Path:
+    """One output path. Single image → stem.<ext>; many → stem-1.<ext>, … . The
+    extension always reflects the *actual* returned format (sniffed per image)
+    so a file is never mislabeled; the caller warns if it differs from a
+    user-requested extension."""
+    if count == 1:
+        return stem.with_suffix(f".{ext}")
+    return stem.parent / f"{stem.name}-{index + 1}.{ext}"
+
+
+@main.command(help="Generate an image from a text prompt and save it to disk.")
+@click.argument("prompt")
+@click.option("-m", "--model", default=DEFAULT_IMAGE_MODEL, show_default=True,
+              help="Image model, e.g. flux.2-pro or flux.1-schnell.")
+@click.option("-o", "--output", default=None,
+              help="Output file. Default: qsp-image-<ts>.<ext> in the current dir. "
+                   "With -n > 1 a 1-based index is inserted before the extension.")
+@click.option("--size", default=None, help="Image size, e.g. 1024x1024 (model-dependent).")
+@click.option("-n", "--n", "count", type=click.IntRange(min=1, max=10), default=1,
+              show_default=True,
+              help="Number of images. Each is a separate request and a separate charge.")
+@click.option("--json", "as_json", is_flag=True,
+              help="Emit the raw JSON response(s) (incl. base64) instead of saving files.")
+def image(prompt: str, model: str, output: str | None, size: str | None,
+          count: int, as_json: bool) -> None:
+    key = _require_key()
+    body: dict = {"model": model, "prompt": prompt}
+    if size:
+        body["size"] = size
+
+    # The image endpoint accepts only n=1, so for multiple images we loop
+    # client-side rather than sending n>1 (which the server rejects). Images can
+    # take longer than chat with no stream to mask the wait, hence IMAGE_TIMEOUT.
+    #
+    # Each generation is a separate paid request, so we emit/write each result
+    # the moment its request succeeds — never batch then flush. That way a
+    # failure on request 3 of 5 still leaves images 1–2 safely on disk (or
+    # already streamed to stdout) rather than discarding work the user paid for.
+    stem = _image_stem(output)
+    with httpx.Client(
+        base_url=DEFAULT_API_URL, timeout=IMAGE_TIMEOUT,
+        headers={"Authorization": f"Bearer {key}",
+                 "User-Agent": f"quicksilverpro-cli/{__version__}",
+                 "Content-Type": "application/json"},
+        follow_redirects=False,
+    ) as c:
+        for i in range(count):
+            r = c.post("/images/generations", json=body)
+            if r.status_code >= 400:
+                _err.print(f"[red]{_extract_error(r)}[/red]")
+                sys.exit(1)
+            resp = r.json()
+
+            # A 200 with no image — or an unexpected response shape — is still a
+            # failure. Fail loudly rather than silently producing fewer outputs
+            # than the user paid for (or tracebacking on a non-dict response).
+            data = resp.get("data") if isinstance(resp, dict) else None
+            if not isinstance(data, list) or not data:
+                _err.print(f"[red]Image {i + 1} of {count}: server returned no image.[/red]")
+                sys.exit(1)
+
+            if as_json:
+                # Stream each response immediately. One object for n=1 (pretty);
+                # one compact object per line (JSONL) for n>1, so an interrupted
+                # multi-image run still leaves the earlier payloads on stdout.
+                click.echo(json.dumps(resp, indent=2 if count == 1 else None))
+                continue
+
+            # Materialize + write each image under a guard so a bad payload
+            # (invalid base64) or a disk problem on image k exits cleanly with a
+            # message rather than a raw traceback. httpx errors from a URL fetch
+            # propagate to run()'s network handler, as elsewhere in the CLI.
+            try:
+                raw = _image_bytes(data[0])
+                ext = _img_ext(raw, resp.get("output_format"))
+                if i == 0:
+                    _warn_ext_override(output, ext)
+                path = _image_path(stem, i, count, ext)
+                path.write_bytes(raw)
+            except (OSError, ValueError) as e:
+                _err.print(f"[red]Failed to save image {i + 1} of {count}:[/red] {e}")
+                sys.exit(1)
+            _out.print(f"✓ Saved [bold]{path}[/bold] [dim]({len(raw):,} bytes)[/dim]")
+
+
+def _warn_ext_override(output: str | None, ext: str) -> None:
+    """The upstream image format is fixed (currently JPEG) and ignores any
+    requested extension. We always save with the real extension so the file is
+    never mislabeled — but if the user asked for a different one, say so."""
+    if output and (want := Path(output).suffix.lower().lstrip(".")):
+        want = "jpg" if want == "jpeg" else want
+        if want != ext:
+            _err.print(f"[yellow]Note:[/yellow] the image API returned {ext.upper()}; "
+                       f"saved with a .{ext} extension (you requested .{want}).")
+
+
 # ────────────────────────── qsp keys ──────────────────────────
 
 @main.group(help="Manage API keys.")
@@ -672,6 +890,14 @@ def run() -> None:
         # is often noisy and unhelpful to end users.
         _err.print(f"[red]Network error:[/red] {type(e).__name__}: {e}")
         sys.exit(1)
+    except json.JSONDecodeError:
+        # A 2xx with a non-JSON / truncated body (proxy error page, partial
+        # read) would otherwise traceback. Fail like any other remote error.
+        _err.print(
+            f"[red]Unexpected response from QuickSilver Pro[/red] (could not parse). "
+            f"Try again, or check [bold]https://quicksilverpro.io/status[/bold]."
+        )
+        sys.exit(1)
     except click.UsageError as e:
         e.show()
         sys.exit(e.exit_code)