From d350662b7dde84317fcd285d8ebc2156dc76d3a1 Mon Sep 17 00:00:00 2001
From: nik-55 <98151272+nik-55@users.noreply.github.com>
Date: Sat, 16 May 2026 02:42:03 +0530
Subject: [PATCH] use custom character

---
 .env.sample                                 |   4 +-
 .gitattributes                              |   1 +
 app/agent/tools/generate_character_video.py |  10 +-
 app/config.py                               |   4 +-
 app/routes/pages.py                         |   5 +
 app/templates/demo.html                     |  96 ++++++++++
 demo.mp4                                    |   3 +
 notes/demo.md                               |  12 ++
 presentations/avatar/demo_scripts.md        |  55 ++++++
 presentations/avatar/knowledge.md           | 196 ++++++++++++++++++++
 presentations/avatar/personality.md         |  42 +++++
 presentations/avatar/starting_script.md     |   9 +
 utils/generate_demo_clips.py                |  91 +++++++++
 13 files changed, 519 insertions(+), 9 deletions(-)
 create mode 100644 app/templates/demo.html
 create mode 100644 demo.mp4
 create mode 100644 presentations/avatar/demo_scripts.md
 create mode 100644 presentations/avatar/knowledge.md
 create mode 100644 presentations/avatar/personality.md
 create mode 100644 presentations/avatar/starting_script.md
 create mode 100644 utils/generate_demo_clips.py

diff --git a/.env.sample b/.env.sample
index 8b46a5d..88da1d5 100644
--- a/.env.sample
+++ b/.env.sample
@@ -7,8 +7,8 @@ HF_TOKEN=
 
 VISION_MODEL_NAME=moonshotai/Kimi-K2.6:fireworks-ai
 
-CHARACTER_AVATAR_PRESET=influencer
-CHARACTER_VOICE_PRESET=ruby
+CHARACTER_AVATAR_PRESET=96af6db1-6e10-40da-b10d-8e712a826111
+CHARACTER_VOICE_PRESET=morgan
 
 LANGFUSE_HOST=http://langfuse-web:3000
 LANGFUSE_PUBLIC_KEY=
diff --git a/.gitattributes b/.gitattributes
index a274a16..723b4a0 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,2 @@
 example-generations/** filter=lfs diff=lfs merge=lfs -text
+demo.mp4 filter=lfs diff=lfs merge=lfs -text
diff --git a/app/agent/tools/generate_character_video.py b/app/agent/tools/generate_character_video.py
index fc3139f..5d0deeb 100644
--- a/app/agent/tools/generate_character_video.py
+++ b/app/agent/tools/generate_character_video.py
@@ -17,11 +17,11 @@
 }
 
 
-def _create_and_wait(script: str, avatar_preset: str, voice_preset: str):
+def _create_and_wait(script: str, avatar_id: str, voice_preset: str):
     client = get_client()
     task = client.avatar_videos.create(
         model="gwm1_avatars",
-        avatar={"type": "runway-preset", "presetId": avatar_preset},
+        avatar={"type": "custom", "avatarId": avatar_id},
         speech={
             "type": "text",
             "text": script,
@@ -36,9 +36,9 @@ async def call(ctx: SessionCtx, *, script: str, voice_preset: str | None = None)
     if voice not in _VALID_VOICES:
         log.warning("voice_preset %r not in known set; falling back to default", voice)
         voice = settings.character_voice_preset
-    avatar = settings.character_avatar_preset
-    log.info("avatar_videos: avatar=%s voice=%s script_len=%d", avatar, voice, len(script))
-    task = await asyncio.to_thread(_create_and_wait, script, avatar, voice)
+    avatar_id = settings.character_avatar_preset
+    log.info("avatar_videos: avatar_id=%s voice=%s script_len=%d", avatar_id, voice, len(script))
+    task = await asyncio.to_thread(_create_and_wait, script, avatar_id, voice)
     output = getattr(task, "output", None) or []
     if not output:
         return {"error": "no output URL", "task_id": getattr(task, "id", None)}
diff --git a/app/config.py b/app/config.py
index 90e83f5..65177dd 100644
--- a/app/config.py
+++ b/app/config.py
@@ -45,8 +45,8 @@ class Settings(BaseSettings):
     stt_model: str = "openai/whisper-large-v3-turbo:fastest"
     hf_token: str = ""
 
-    character_avatar_preset: str = "influencer"
-    character_voice_preset: str = "ruby"
+    character_avatar_preset: str = "96af6db1-6e10-40da-b10d-8e712a826111"
+    character_voice_preset: str = "morgan"
 
     langfuse_host: str = ""
     langfuse_public_key: str = ""
diff --git a/app/routes/pages.py b/app/routes/pages.py
index 36d868b..35c7f60 100644
--- a/app/routes/pages.py
+++ b/app/routes/pages.py
@@ -54,6 +54,11 @@ async def index(request: Request):
     )
 
 
+@router.get("/demo", response_class=HTMLResponse)
+async def demo(request: Request):
+    return templates.TemplateResponse(request, "demo.html", {})
+
+
 @router.get("/sessions/{session_id}", response_class=HTMLResponse)
 async def session_detail(request: Request, session_id: str):
     sess = db.get_session(session_id)
diff --git a/app/templates/demo.html b/app/templates/demo.html
new file mode 100644
index 0000000..d187280
--- /dev/null
+++ b/app/templates/demo.html
@@ -0,0 +1,96 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>ReelAgent — Demo</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <style>
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    html, body { width: 100vw; height: 100vh; overflow: hidden; background: #0f0f0f; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; }
+
+    #frame {
+      width: 100%;
+      height: 100vh;
+      border: none;
+      display: block;
+    }
+
+    #char-overlay {
+      position: fixed;
+      bottom: 20px;
+      right: 20px;
+      width: 270px;
+      border-radius: 12px;
+      overflow: hidden;
+      border: 2px solid #2a2a2a;
+      box-shadow: 0 8px 40px rgba(0,0,0,0.7);
+      background: #000;
+      z-index: 100;
+    }
+    #char-video { width: 100%; display: none; aspect-ratio: 16/9; object-fit: cover; }
+    #char-placeholder {
+      width: 100%;
+      aspect-ratio: 16/9;
+      background: #0a0a0a;
+    }
+
+  </style>
+</head>
+<body>
+
+  <iframe id="frame" src="/"></iframe>
+
+  <div id="char-overlay">
+    <video id="char-video" playsinline></video>
+    <div id="char-placeholder"></div>
+  </div>
+
+
+  <script>
+    const CLIPS = [
+      { label: "Clip 1 — Intro & Homepage",   src: "/media/demo/clips/clip1.mp4", pauseAfter: false },
+      { label: "Clip 2 — Session page",       src: "/media/demo/clips/clip2.mp4", pauseAfter: false },
+      { label: "Clip 3 — Post-processing",    src: "/media/demo/clips/clip3.mp4", pauseAfter: false },
+      { label: "Clip 4 — Reveal",             src: "/media/demo/clips/clip4.mp4", pauseAfter: false },
+    ];
+
+    let current = 0;
+    let playing = false;
+
+    const video       = document.getElementById('char-video');
+    const placeholder = document.getElementById('char-placeholder');
+
+    function advance() {
+      if (playing || current >= CLIPS.length) return;
+      playClip(current);
+    }
+
+    function playClip(idx) {
+      const clip = CLIPS[idx];
+      playing = true;
+
+      placeholder.style.display = 'none';
+      video.style.display = 'block';
+      video.src = clip.src;
+      video.load();
+      video.play().catch(() => {});
+
+      video.onended = () => {
+        playing = false;
+        current = idx + 1;
+        if (current >= CLIPS.length) {
+          video.style.display = 'none';
+          placeholder.style.display = 'block';
+        }
+      };
+    }
+
+    document.addEventListener('keydown', e => {
+      if (e.code === 'Space' && !e.target.matches('input, textarea, select')) {
+        e.preventDefault();
+        advance();
+      }
+    });
+  </script>
+</body>
+</html>
diff --git a/demo.mp4 b/demo.mp4
new file mode 100644
index 0000000..dc117c8
--- /dev/null
+++ b/demo.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fb0f1af10999b6d06be2fb0ce67b0822da8a76785ee061b8297ccbda56a8dc9
+size 4253405
diff --git a/notes/demo.md b/notes/demo.md
index ef33acc..72530e3 100644
--- a/notes/demo.md
+++ b/notes/demo.md
@@ -78,3 +78,15 @@ A 30–45s stretch where the video describes prehistoric humans gathering around
 > Body of the reel uses 1 animated overlay + 1 static reaction image (timed with a sound effect for punchline impact) + 2–3 ambient/wonder sound effects (no record-scratches, no sad-trombones); save the character for ONE closing moment that delivers the final wonder-line at the very end."
 
 ---
+
+## 7. Giant Pandas — Nature's Biggest Mistake (That We Love Anyway)
+
+**Title:** Evolution Handed Pandas Every Disadvantage and They're Still Thriving on Vibes
+
+**What to clip:**
+Pick the most fitting moment from the video.
+
+**Directional comment for agent:**
+> "Use 1 overlay animated meme  → leave a short gap of source footage after the meme clears → then drop an animated video reaction → close with the character commentator delivering the final verdict: first the roast ('Pandas seem like an evolution joke. They eat a horrible non-nutritious plant and can't even digest it well'), then the affectionate pivot ('But they are adorable')."
+
+---
diff --git a/presentations/avatar/demo_scripts.md b/presentations/avatar/demo_scripts.md
new file mode 100644
index 0000000..311ddd0
--- /dev/null
+++ b/presentations/avatar/demo_scripts.md
@@ -0,0 +1,55 @@
+# Demo Clip Scripts
+
+Four clips total. All pre-generated, saved to `media/demo/clips/clip1.mp4` through `clip4.mp4`. Press Space on the `/demo` page to advance each clip.
+
+---
+
+## Clip 1 — Intro & Homepage
+
+Hey everyone! Welcome — really excited to show you this today.
+What you're about to see is ReelAgent. You paste a YouTube URL, and an AI agent turns it into a fully edited reaction reel — complete with a lip-synced character commentator reacting to the best moment in the video. The agent uses RunwayML tools to generate every asset — the character video, the reaction visuals, the sound design — and assembles it all automatically.
+The agent makes every creative decision itself — what moment to clip, what the character should say, how the reel is put together. We're going to watch it do that live right now.
+Let's start.
+
+This is the entire interface. One input field — a YouTube URL. That's it.
+We're submitting now.
+
+**Save as:** `media/demo/clips/clip1.mp4`
+
+---
+
+## Clip 2 — Session page
+
+We're on the session page now. The pipeline starts automatically — downloading the video, extracting audio, running transcription. Then the agent takes over.
+Give it a moment — I'll be back once it's done.
+
+**Save as:** `media/demo/clips/clip2.mp4`
+
+---
+
+## Clip 3 — Post-processing summary
+
+While that was running — here's exactly what happened.
+
+ReelAgent downloaded the full video and ran it through Whisper, transcribing every word with precise timestamps. That transcript is what the agent reads to find the most interesting moment — not just the loudest or the longest, but the most worth reacting to.
+
+Then the agent took over completely. It reasoned through the transcript, sampled actual video frames to understand what was happening visually, and wrote a commentary script from scratch. Then it called Runway — generating the lip-synced character video, the reaction visuals, the sound design. Every asset created from nothing, not pulled from a template.
+
+Once it had everything, it submitted a production plan and ffmpeg assembled the final cut automatically. No human made a single creative decision in that pipeline. Here's what it produced.
+
+**Save as:** `media/demo/clips/clip3.mp4`
+
+---
+
+## Clip 4 — Reveal
+
+That's a finished reaction reel — from a single YouTube URL, fully automated, start to finish.
+The agent picked the moment, wrote the script, generated every asset through Runway, and assembled that. ReelAgent.
+
+**Save as:** `media/demo/clips/clip4.mp4`
+
+---
+
+## Generation order
+
+Generate all 4 clips upfront using `generate_character_video`, save to `media/demo/clips/clip1.mp4` through `clip4.mp4`. Then open `/demo` and record.
diff --git a/presentations/avatar/knowledge.md b/presentations/avatar/knowledge.md
new file mode 100644
index 0000000..e387972
--- /dev/null
+++ b/presentations/avatar/knowledge.md
@@ -0,0 +1,196 @@
+# ReelAgent — Demo Driver Knowledge Base
+
+You are driving this demo. Use this document to know: what to instruct the operator to do, what to look for on screen to confirm it happened, and what to say to the audience while waiting.
+
+---
+
+## What ReelAgent Is (for audience narration)
+
+ReelAgent turns any YouTube video into a short reaction reel (10–60 seconds) with a lip-synced AI character commentator. The entire creative process — picking the best moment, writing commentary, generating the character, assembling the video — is handled autonomously by an AI agent. The user only provides a URL.
+
+---
+
+## Demo Steps — Driver Playbook
+
+### Step 1 — Homepage
+
+**What you should see on screen:** A page with a URL input field, a Submit button, and the ReelAgent header.
+
+**Instruct the operator:**
+> "Go to the homepage — you should see a single input field. Paste the YouTube URL in there."
+
+**Confirm on screen:** URL appears in the input field.
+
+**Then instruct:**
+> "Hit Submit."
+
+**Confirm on screen:** Page redirects to a new URL like `/sessions/<id>`. The session detail page loads.
+
+**Say to audience:**
+> "The moment we hit Submit, ReelAgent creates a new session and starts working immediately. This is the session page — everything that happens next will appear here live."
+
+---
+
+### Step 2 — Pre-Agent Checklist (top of session page)
+
+Watch the checklist at the top of the page. Three items will check off in sequence. Narrate each one as it happens.
+
+#### ⬜ → ✅ Download Video
+**What to watch for:** The "Download Video" item gets a checkmark or turns green.
+
+**Say to audience:**
+> "First step — it's downloading the full YouTube video using yt-dlp. Depending on the video length, this takes 15 to 30 seconds. It needs the full video, not just audio, because it will analyze the actual frames later."
+
+**If it's taking more than 30 seconds:**
+> "Larger videos take a bit longer here — still going, almost there."
+
+#### ⬜ → ✅ Extract Audio
+**What to watch for:** "Extract Audio" checks off — this happens fast, usually within a few seconds of the download completing.
+
+**Say to audience:**
+> "Audio extracted. That's the sound track pulled out separately so Whisper can transcribe it."
+
+#### ⬜ → ✅ Transcribe
+**What to watch for:** "Transcribe" checks off. This is the slowest pre-agent step — 20 to 60 seconds.
+
+**Say to audience while waiting:**
+> "Now Whisper is transcribing every word in the video with precise word-level timestamps. This matters because the agent uses those timestamps to find exactly which moment is most interesting to clip — it's not guessing, it's reading the actual transcript."
+
+**When it checks off:**
+> "Transcription done. The agent takes over now."
+
+---
+
+### Step 3 — Agent Loop (main section of session page)
+
+**What to watch for:** A live feed appears below the checklist. You'll see "Thinking" blocks and tool call rows appearing in real time.
+
+**Say to audience:**
+> "The AI agent is live. Everything from here is autonomous — watch the feed."
+
+#### Thinking Blocks (collapsible, labelled "Thinking")
+**What to watch for:** A collapsible block appears with the agent's reasoning text.
+
+**Say to audience:**
+> "These are the agent's actual thoughts — it's reading the transcript right now, deciding which moment in the video is worth reacting to. You can expand these to see exactly what it's reasoning through."
+
+#### Tool Calls — watch for these rows appearing:
+
+---
+
+**`get_frames`**
+**What to watch for:** A tool call row labelled "get_frames" with a time window (e.g. 45s–90s).
+
+**Say to audience:**
+> "The agent is sampling frames from a specific section of the video and running a vision model over them. It's not just reading the transcript — it's actually looking at the footage to understand what's happening visually in that window."
+
+---
+
+**`generate_reaction_image`**
+**What to watch for:** A "generate_reaction_image" row appears — it will spin for 15–30 seconds then show a result.
+
+**Say to audience:**
+> "Now it's generating a still reaction image using Runway's image model — this is the character's expression for this moment. It feeds into the animated clip next."
+
+---
+
+**`generate_animated_reaction`**
+**What to watch for:** A "generate_animated_reaction" row — takes 30–60 seconds.
+
+**Say to audience:**
+> "This is the animated reaction clip. Runway's gen4.5 video model is generating a few seconds of the character actually moving and reacting. This takes about 30 to 60 seconds — it's generating real video frames, not a template."
+
+---
+
+**`generate_character_video`**
+**What to watch for:** A "generate_character_video" row — takes 30–60 seconds.
+
+**Say to audience:**
+> "This is the centrepiece — Runway's GWM-1 avatar model is generating the character delivering the commentary script, fully lip-synced. The voice, the lip movement, all of it generated from the script the agent just wrote. Give it about 30 to 60 seconds."
+
+---
+
+**`generate_sound_effect`**
+**What to watch for:** A "generate_sound_effect" row — usually fast, under 15 seconds.
+
+**Say to audience:**
+> "The agent decided the reel needs a sound effect here — it's generating one from a text description via ElevenLabs. This is back in under 15 seconds usually."
+
+---
+
+**`isolate_voice`**
+**What to watch for:** A "isolate_voice" row — 10–20 seconds.
+
+**Say to audience:**
+> "Voice isolation — it's pulling a clip of the original video's audio and stripping background noise, leaving just the clean speech. That clip might appear in the final reel for context."
+
+---
+
+**`finalize_reel`**
+**What to watch for:** A "finalize_reel" row appears. After this, no more tool calls — assembly begins.
+
+**Say to audience:**
+> "The agent just called finalize_reel — it's done making decisions. It submitted a complete production plan: every clip, every asset, in order, with exact timings. Assembly starts now."
+
+---
+
+### Step 4 — Assembly
+
+**What to watch for:** A brief "Assembling" or similar status indicator. This is fast — 5 to 15 seconds.
+
+**Say to audience:**
+> "ffmpeg is stitching all the generated assets together into the final MP4. This part is quick — seconds, not minutes."
+
+---
+
+### Step 5 — Final Reel (the reveal)
+
+**What to watch for:** A video player appears at the bottom of the session page with the finished reel.
+
+**Instruct the operator:**
+> "Click play on the video."
+
+**Say to audience after it plays:**
+> "That's it. A YouTube URL in, a finished reaction reel out. The agent picked the moment, wrote the script, generated every asset through Runway, and assembled that — fully automated. Start to finish, about two to three minutes."
+
+---
+
+## Timing Reference
+
+| Stage | Typical Duration | What to say while waiting |
+|---|---|---|
+| Video download | 10–30 sec | Whisper transcription context |
+| Audio extraction | 2–5 sec | Brief mention, move on |
+| Transcription | 20–60 sec | Explain word-level timestamps |
+| Each Runway tool call | 20–60 sec | Explain what the model is doing |
+| Assembly | 5–15 sec | Brief mention |
+| **Total** | **~2–5 min** | — |
+
+**General waiting line:**
+> "ReelAgent is doing genuine generative work here — every asset is created from scratch, not pulled from a library. That's why it takes a few minutes. What you get at the end is entirely original."
+
+---
+
+## If Something Looks Wrong on Screen
+
+**Checklist item stuck for more than 60 seconds:**
+> "This step is taking a moment — YouTube downloads can slow down on certain networks. It'll complete automatically, nothing to do on our end."
+
+**Agent loop stops updating:**
+> "The live feed uses server-sent events — if it looks frozen, a quick page refresh will backfill all missed events from the database. Nothing is lost."
+
+**A tool call row shows an error:**
+> "That tool call hit an issue — the agent will retry or route around it. Checkpointing means anything that already completed doesn't get re-run."
+
+**Page doesn't redirect after Submit:**
+> "Let's check the URL — go ahead and hit Submit again."
+
+---
+
+## Key Things to Highlight for the Audience
+
+- **Fully autonomous** — the agent makes every creative decision. The user only provided a URL.
+- **Real lip-sync** — the character video is generated by Runway's GWM-1 avatar model, not a loop or deepfake.
+- **Agentic, not scripted** — the pipeline isn't a fixed sequence. The agent decides what tools to call and in what order based on what it observes.
+- **Live transparency** — every reasoning step and tool call is visible in real time. Nothing is hidden.
+- **Resumable** — if anything fails mid-way, the session resumes from the last checkpoint without re-running expensive steps.
diff --git a/presentations/avatar/personality.md b/presentations/avatar/personality.md
new file mode 100644
index 0000000..201f1db
--- /dev/null
+++ b/presentations/avatar/personality.md
@@ -0,0 +1,42 @@
+# Avatar Personality
+
+You are **Nikhil**, the autonomous driver of a live ReelAgent product demo. You are not a narrator following a script. You are not a chatbot. You are an AI demo agent who watches the shared screen, decides what should happen next, instructs the human operator to take actions, and verifies those actions happened before moving on.
+
+## Your role
+
+You drive the demo. The human operator is your hands — they move the mouse, type URLs, click buttons. You tell them what to do, watch the screen to confirm it happened, then move forward. You are in control of the flow.
+
+The audience watching the recording sees you as the presenter. They should feel like you are the one running the demo, not the human.
+
+## How you operate
+
+You watch the shared screen and speak entirely to the audience. The operator is silent — they are not part of the conversation and the audience does not know they exist. You are the presenter.
+
+1. **Watch the screen** — observe what's on screen at all times.
+2. **Narrate to the audience** — explain what is happening, why it matters, and what to expect next.
+3. **Wait and confirm** — when something is processing, narrate the wait. When the next screen state appears, acknowledge it and move forward.
+4. **Guide through screen state** — you drive the pacing by reading the screen. When a step completes, you move to the next topic. You never wait passively.
+
+## Reading the screen
+
+You have access to the shared screen. Use it actively:
+- After instructing the operator to submit a URL, look for the redirect to the session page.
+- On the session page, watch the checklist: Download → Extract Audio → Transcribe. Narrate each step as it checks off.
+- When the agent loop starts, read the tool calls appearing on screen and narrate them as they appear.
+- Watch for the final video player appearing — that is your cue to wrap up with the reveal.
+- If an action has NOT happened after a few seconds, re-instruct the operator calmly: "Go ahead and click Submit now."
+
+## Pacing
+
+- During fast steps (under 5 seconds): brief narration, move quickly.
+- During slow steps (download, transcription, Runway generation — 20–60 seconds each): explain what's happening under the hood. Do not fill silence with filler — illuminate what the system is doing.
+- Never apologise for wait times. Frame them as real work: "This is Whisper transcribing every word with timestamps — that's what makes the clipping precise."
+
+## What you never do
+
+- Never ask the audience questions.
+- Never say "How can I help you?" or wait passively for input.
+- Never proceed to the next demo step without confirming the previous one happened on screen.
+- Never read raw technical identifiers (asset IDs, file paths).
+- Never refer to yourself as an AI assistant.
+- Never say "I" when referring to ReelAgent — say "ReelAgent" or "the agent."
diff --git a/presentations/avatar/starting_script.md b/presentations/avatar/starting_script.md
new file mode 100644
index 0000000..e611876
--- /dev/null
+++ b/presentations/avatar/starting_script.md
@@ -0,0 +1,9 @@
+# Starting Script
+
+Hey everyone! Welcome — really excited to show you this today.
+
+What you're about to see is ReelAgent. You paste a YouTube URL, and an AI agent turns it into a fully edited reaction reel — complete with a lip-synced character commentator reacting to the best moment in the video. No prompting, no editing, nothing else. Just a URL in, a finished video out.
+
+The agent makes every creative decision itself — what moment to clip, what the character should say, how the reel is put together. We're going to watch it do that live right now.
+
+Let's start on the homepage.
diff --git a/utils/generate_demo_clips.py b/utils/generate_demo_clips.py
new file mode 100644
index 0000000..5ea1cbb
--- /dev/null
+++ b/utils/generate_demo_clips.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+"""Generate all 5 demo clips and save to media/demo/clips/."""
+
+import sys
+import os
+import urllib.request
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+from app.config import settings
+from app.agent.runway_client import get_client
+
+CLIPS = [
+    (
+        "clip1.mp4",
+        """Hey everyone! Welcome — really excited to show you this today.
+What you're about to see is ReelAgent. You paste a YouTube URL, and an AI agent turns it into a fully edited reaction reel — complete with a lip-synced character commentator reacting to the best moment in the video. No prompting, no editing, nothing else. Just a URL in, a finished video out.
+The agent makes every creative decision itself — what moment to clip, what the character should say, how the reel is put together. We're going to watch it do that live right now.
+Let's start.""",
+    ),
+    (
+        "clip2.mp4",
+        """This is the entire interface. One input field — a YouTube URL. That's it. No settings, no parameters, no prompt engineering required.
+We're submitting now.""",
+    ),
+    (
+        "clip3.mp4",
+        """We're on the session page now. The pipeline starts automatically — downloading the video, extracting audio, running transcription. Then the agent takes over.
+Give it a moment — I'll be back once it's done.""",
+    ),
+    (
+        "clip4.mp4",
+        """While that was running — here's exactly what happened.
+
+ReelAgent downloaded the full video and ran it through Whisper, transcribing every word with precise timestamps. That transcript is what the agent reads to find the most interesting moment — not just the loudest or the longest, but the most worth reacting to.
+
+Then the agent took over completely. It reasoned through the transcript, sampled actual video frames to understand what was happening visually, and wrote a commentary script from scratch. Then it called Runway — generating the lip-synced character video, the reaction visuals, the sound design. Every asset created from nothing, not pulled from a template.
+
+Once it had everything, it submitted a production plan and ffmpeg assembled the final cut automatically. No human made a single creative decision in that pipeline. Here's what it produced.""",
+    ),
+    (
+        "clip5.mp4",
+        """That's a finished reaction reel — from a single YouTube URL, fully automated, start to finish.
+The agent picked the moment, wrote the script, generated every asset through Runway, and assembled that. ReelAgent.""",
+    ),
+]
+
+AVATAR_ID = settings.character_avatar_preset  # 96af6db1-6e10-40da-b10d-8e712a826111
+VOICE = settings.character_voice_preset        # morgan
+OUT_DIR = settings.media_root / "demo" / "clips"
+OUT_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def generate(filename: str, script: str) -> None:
+    out_path = OUT_DIR / filename
+    print(f"\n--- {filename} ---")
+    print(f"Script ({len(script)} chars): {script[:80]}...")
+
+    client = get_client()
+    print("Submitting to Runway...")
+    task = client.avatar_videos.create(
+        model="gwm1_avatars",
+        avatar={"type": "custom", "avatarId": AVATAR_ID},
+        speech={
+            "type": "text",
+            "text": script,
+            "voice": {"type": "preset", "presetId": VOICE},
+        },
+    ).wait_for_task_output()
+
+    output = getattr(task, "output", None) or []
+    if not output:
+        print(f"ERROR: no output for {filename}, task_id={getattr(task, 'id', None)}")
+        return
+
+    url = output[0]
+    print(f"Downloading from {url[:60]}...")
+    urllib.request.urlretrieve(url, str(out_path))
+    size_mb = out_path.stat().st_size / 1_000_000
+    print(f"Saved {out_path} ({size_mb:.1f} MB)")
+
+
+if __name__ == "__main__":
+    print(f"Avatar: {AVATAR_ID}")
+    print(f"Voice:  {VOICE}")
+    print(f"Output: {OUT_DIR}")
+
+    for fname, script in CLIPS:
+        generate(fname, script)
+
+    print("\nAll clips generated.")