vineethvijay · vineethvijay · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/README.md b/README.md
@@ -200,6 +200,7 @@ The **Chunk Generator** supports both CPU encoding (`libx264`) and NVIDIA hardwa
 | POST | `/api/generate_chunk` | Triggers the generator to build new chunks |
 | POST | `/api/skip_to_next` | Skip current chunk and advance to next (audio position preserved) |
 | POST | `/api/skip_to_next_audio` | Skip to the next audio track |
+| POST | `/api/play_chunk` | Play a specific chunk next in the live stream (body: `{"chunk_name": "xyz.mp4"}`) |
 
 ## Troubleshooting
 

diff --git a/app.py b/app.py
@@ -5,6 +5,7 @@
 Pushes pre-generated chunks to RTMP server for continuous live streaming
 """
 
+import html
 import json
 import os
 import re
@@ -98,7 +99,20 @@ def _build_chunks_list(settings=None):
                     try:
                         with open(meta_path, 'r') as _f:
                             meta = _json.load(_f)
-                            source_videos = meta.get('source_videos') or []
+                            raw_sources = meta.get('source_videos') or []
+                            # Normalize: support old [path, ...] and new [{path, model}, ...]
+                            source_videos = []
+                            for item in raw_sources:
+                                if isinstance(item, str):
+                                    source_videos.append({'path': item, 'model': None, 'thumbnail_url': None, 'title': None, 'channel': None})
+                                elif isinstance(item, dict) and 'path' in item:
+                                    source_videos.append({
+                                        'path': item['path'],
+                                        'model': item.get('model'),
+                                        'thumbnail_url': item.get('thumbnail_url'),
+                                        'title': item.get('title'),
+                                        'channel': item.get('channel'),
+                                    })
                             model_info = meta.get('model_info') or []
                             video_codec = meta.get('video_codec')
                             width = meta.get('width')
@@ -271,7 +285,8 @@ def index():
     current_chunk_data = next((c for c in chunks if c['name'] == current_chunk), None) if current_chunk else None
     chunks_excluding_current = [c for c in chunks if c['name'] != current_chunk]
     show_model_column = bool((settings.get('TUBEARCHIVIST_URL') or '').strip() and (settings.get('TUBEARCHIVIST_TOKEN') or '').strip())
-    return render_template('dashboard.html', chunks=chunks, chunks_excluding_current=chunks_excluding_current, current_chunk_data=current_chunk_data, audio_files=audio_files, settings=settings, show_model_column=show_model_column, hls_port=HLS_PORT, sys_info=sys_info, current_chunk=current_chunk, current_audio=current_audio, initial_stream_status=initial_stream_status, stream_stats=stream_stats, initial_chunks_limit=INITIAL_CHUNKS_LIMIT)
+    tubearchivist_url = (settings.get('TUBEARCHIVIST_URL') or '').strip().rstrip('/')
+    return render_template('dashboard.html', chunks=chunks, chunks_excluding_current=chunks_excluding_current, current_chunk_data=current_chunk_data, audio_files=audio_files, settings=settings, show_model_column=show_model_column, tubearchivist_url=tubearchivist_url, hls_port=HLS_PORT, sys_info=sys_info, current_chunk=current_chunk, current_audio=current_audio, initial_stream_status=initial_stream_status, stream_stats=stream_stats, initial_chunks_limit=INITIAL_CHUNKS_LIMIT)
 
 
 def _admin_context():
@@ -394,7 +409,7 @@ def _fetch_og_meta(url: str, timeout: float = 4.0) -> dict:
         if not m_image:
             m_image = re.search(r'<meta[^>]+content=["\']([^"\']+)["\'][^>]+property=["\']og:image["\']', html, re.I)
         if m_title:
-            result['title'] = m_title.group(1).strip()[:120]
+            result['title'] = html.unescape(m_title.group(1).strip())[:120]
         if m_image:
             result['image'] = m_image.group(1).strip()
     except Exception:
@@ -419,16 +434,20 @@ def _stats_context():
         'total_seconds_streamed': current_status.get('total_seconds_streamed'),
     }
     play_counts = clip_pusher.get_play_counts()
-    # Enrich models with og:title and og:image (cached)
+    # Enrich models with og:title, og:image, and YouTube thumbnail (when video_id available)
     models_enriched = []
-    for model, count in play_counts.get('models', []):
+    for item in play_counts.get('models', []):
+        model, count = item[0], item[1]
+        video_id = item[2] if len(item) > 2 else None
         url = model if model.startswith('http') else 'https://' + model
         meta = _fetch_og_meta(url)
+        title = html.unescape(meta.get('title') or url)
+        thumbnail = f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg" if video_id else meta.get('image')
         models_enriched.append({
             'url': url,
             'count': count,
-            'title': meta.get('title') or url,
-            'image': meta.get('image'),
+            'title': title,
+            'image': thumbnail,
         })
     play_counts = dict(play_counts, models=models_enriched)
     return {'stream_stats': stream_stats, 'play_counts': play_counts}
@@ -743,6 +762,19 @@ def skip_to_next_audio():
     return jsonify({'success': True, 'skipped': skipped})
 
 
+@app.route('/api/play_chunk', methods=['POST'])
+def play_chunk():
+    """Play a specific chunk next in the live stream."""
+    data = request.get_json()
+    chunk_name = data.get('chunk_name') if data else None
+    if not chunk_name or not isinstance(chunk_name, str):
+        return jsonify({'success': False, 'error': 'Missing or invalid chunk_name'}), 400
+    ok = clip_pusher.play_chunk(chunk_name)
+    if not ok:
+        return jsonify({'success': False, 'error': 'Chunk not found'}), 404
+    return jsonify({'success': True})
+
+
 @app.route('/api/delete_audio', methods=['POST'])
 def delete_audio():
     """Delete an audio file from the filesystem. Requires path within AUDIO_FOLDER."""

diff --git a/clip_pusher.py b/clip_pusher.py
@@ -61,6 +61,8 @@ def __init__(self, chunk_folder: str, rtmp_url: str,
         self._errors         = 0
         self._last_error: Optional[str] = None
         self._streamer_process: Optional[subprocess.Popen] = None
+        self._play_chunk_next: Optional[str] = None
+        self._play_chunk_lock = threading.Lock()
 
         self._load_stream_stats()
 
@@ -160,6 +162,13 @@ def _save_play_counts(self, data: dict) -> None:
         except OSError:
             pass
 
+    def _extract_video_id(self, path: str) -> Optional[str]:
+        """Extract 11-char YouTube video ID from path (e.g. .../UCxxx/abc123.mp4 -> abc123)."""
+        stem = os.path.splitext(os.path.basename(path))[0]
+        if stem and len(stem) == 11 and stem.replace('-', '').replace('_', '').isalnum():
+            return stem
+        return None
+
     def _record_play_count(self, chunk_path: str, audio_name: Optional[str]) -> None:
         """Record play count for models (from chunk meta) and audio (current track)."""
         data = self._load_play_counts()
@@ -171,9 +180,31 @@ def _record_play_count(self, chunk_path: str, audio_name: Optional[str]) -> None
             try:
                 with open(meta_path, 'r') as f:
                     meta = json.load(f)
+                raw_sources = meta.get('source_videos') or []
+                model_to_video = {}
+                fallback_vid = None
+                for item in raw_sources:
+                    path = item.get('path') if isinstance(item, dict) else (item if isinstance(item, str) else None)
+                    model = item.get('model') if isinstance(item, dict) else None
+                    if path:
+                        vid = self._extract_video_id(path)
+                        if vid:
+                            if not fallback_vid:
+                                fallback_vid = vid
+                            if model and model not in model_to_video:
+                                model_to_video[model] = vid
                 for m in (meta.get('model_info') or []):
                     if m:
-                        models[m] = models.get(m, 0) + 1
+                        vid = model_to_video.get(m) or fallback_vid
+                        entry = models.get(m)
+                        if isinstance(entry, dict):
+                            entry['count'] = entry.get('count', 0) + 1
+                            if vid:
+                                entry['video_id'] = vid
+                        elif isinstance(entry, (int, float)):
+                            models[m] = {'count': entry + 1, 'video_id': vid} if vid else entry + 1
+                        else:
+                            models[m] = {'count': 1, 'video_id': vid} if vid else 1
             except (json.JSONDecodeError, OSError):
                 pass
 
@@ -189,7 +220,13 @@ def get_play_counts(self) -> dict:
         data = self._load_play_counts()
         models = data.get('models', {})
         audio = data.get('audio', {})
-        top_models = sorted(models.items(), key=lambda x: -x[1])[:20]
+        top_models = []
+        for url, entry in models.items():
+            count = entry.get('count', entry) if isinstance(entry, dict) else entry
+            video_id = entry.get('video_id') if isinstance(entry, dict) else None
+            top_models.append((url, count, video_id))
+        top_models.sort(key=lambda x: -x[1])
+        top_models = top_models[:20]
         top_audio = sorted(audio.items(), key=lambda x: -x[1])[:20]
         return {'models': top_models, 'audio': top_audio}
 
@@ -262,6 +299,19 @@ def skip_to_next_audio(self) -> bool:
             return True
         return False
 
+    def play_chunk(self, chunk_name: str) -> bool:
+        """Queue a specific chunk to play next in the stream. Stops current chunk if running."""
+        base = os.path.basename(chunk_name)
+        if not base.endswith('.mp4'):
+            return False
+        path = os.path.join(self.chunk_folder, base)
+        if not os.path.isfile(path):
+            return False
+        with self._play_chunk_lock:
+            self._play_chunk_next = base
+        self.skip_to_next()
+        return True
+
     # ── Internal ──────────────────────────────────────────────────
 
     def _get_audio_file(self) -> Optional[str]:
@@ -383,6 +433,15 @@ def _push_loop(self):
 
             random.shuffle(chunks)
 
+            with self._play_chunk_lock:
+                next_name = self._play_chunk_next
+                if next_name:
+                    self._play_chunk_next = None
+                    full = os.path.join(self.chunk_folder, next_name)
+                    if full in chunks:
+                        chunks.remove(full)
+                        chunks.insert(0, full)
+
             # Pick one audio track for the whole round; get duration so we can resume position across chunks
             if self._audio_files:
                 if self._persistent_audio_path is None or not os.path.isfile(self._persistent_audio_path):
@@ -437,5 +496,9 @@ def _push_loop(self):
                     except:
                         self._streamer_process.kill()
 
+                with self._play_chunk_lock:
+                    if self._play_chunk_next:
+                        break
+
         print("Clip pusher loop ended")
 
diff --git a/generate_chunk.sh b/generate_chunk.sh
@@ -162,26 +162,44 @@ for i in $(seq 1 "$CHUNKS_PER_RUN"); do
   ffmpeg -y -f concat -safe 0 -i "$CONCAT_LIST" \
     -c copy "$CHUNK_NAME" -loglevel error
 
-  # Write metadata: source videos (full paths), codec, resolution (for dashboard)
+  # Write metadata: source videos (full paths + model per source), codec, resolution (for dashboard)
   META_FILE="$OUTPUT_DIR/${CHUNK_BASE}.meta.json"
   SOURCES_JSON="[]"
-  [ -n "$SOURCE_BASENAMES" ] && SOURCES_JSON=$(echo "$SOURCE_BASENAMES" | sort -u | python3 -c "import sys,json; print(json.dumps([l.strip() for l in sys.stdin if l.strip()]))" 2>/dev/null) || SOURCES_JSON="[]"
-
-  # TubeArchivist metadata (model info from description) — when TUBEARCHIVIST_URL + TOKEN set
-  MODEL_JSON="[]"
-  if [ -n "${TUBEARCHIVIST_URL}" ] && [ -n "${TUBEARCHIVIST_TOKEN}" ]; then
-    TUBE_SCRIPT="${TUBEARCHIVIST_SCRIPT:-/scripts/tubearchivist_metadata.py}"
-    MODELS_TMP=$(mktemp)
-    for path in $(echo "$SOURCE_BASENAMES" | sort -u); do
-      [ -z "$path" ] && continue
-      out=$(python3 "$TUBE_SCRIPT" "$TUBEARCHIVIST_URL" "$TUBEARCHIVIST_TOKEN" "$path" 2>/dev/null || true)
-      model=$(echo "$out" | python3 -c "import sys,json; d=json.load(sys.stdin); m=d.get('model_info'); print(m if m else '')" 2>/dev/null || true)
-      [ -n "$model" ] && echo "$model" >> "$MODELS_TMP"
-    done
-    [ -f "$MODELS_TMP" ] && MODEL_JSON=$(sort -u "$MODELS_TMP" | python3 -c "import sys,json; print(json.dumps([l.strip() for l in sys.stdin if l.strip()]))" 2>/dev/null) || true
-    rm -f "$MODELS_TMP"
+  if [ -n "$SOURCE_BASENAMES" ]; then
+    export TUBEARCHIVIST_URL TUBEARCHIVIST_TOKEN TUBEARCHIVIST_SCRIPT
+    SOURCES_JSON=$(echo "$SOURCE_BASENAMES" | sort -u | python3 -c "
+import sys, json, subprocess, os
+paths = [l.strip() for l in sys.stdin if l.strip()]
+tube_url = (os.environ.get('TUBEARCHIVIST_URL') or '').strip().rstrip('/')
+tube_token = (os.environ.get('TUBEARCHIVIST_TOKEN') or '').strip()
+script = os.environ.get('TUBEARCHIVIST_SCRIPT', '/scripts/tubearchivist_metadata.py')
+sources = []
+for path in paths:
+    model = None
+    thumb = None
+    title = None
+    channel = None
+    if tube_url and tube_token:
+        try:
+            out = subprocess.run([sys.executable, script, tube_url, tube_token, path], capture_output=True, text=True, timeout=12)
+            if out.returncode == 0:
+                d = json.loads(out.stdout or '{}')
+                model = d.get('model_info')
+                thumb = d.get('thumbnail_url')
+                title = d.get('title')
+                channel = d.get('channel')
+        except: pass
+    sources.append({'path': path, 'model': model, 'thumbnail_url': thumb, 'title': title, 'channel': channel})
+print(json.dumps(sources))
+" 2>/dev/null)
+    if [ -z "$SOURCES_JSON" ] || [ "$SOURCES_JSON" = "[]" ]; then
+      SOURCES_JSON=$(echo "$SOURCE_BASENAMES" | sort -u | python3 -c "import sys,json; print(json.dumps([{'path': l.strip(), 'model': None, 'thumbnail_url': None, 'title': None, 'channel': None} for l in sys.stdin if l.strip()]))" 2>/dev/null) || SOURCES_JSON="[]"
+    fi
   fi
 
+  # model_info = unique models from sources (for Models button)
+  MODEL_JSON=$(echo "$SOURCES_JSON" | python3 -c "import sys,json; s=json.load(sys.stdin); m= sorted(set(x.get('model') for x in s if x.get('model'))); print(json.dumps(m))" 2>/dev/null) || MODEL_JSON="[]"
+
   VIDEO_EXTRA=""
   if codec=$(ffprobe -v error -select_streams v:0 -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 "$CHUNK_NAME" 2>/dev/null) && \
      width=$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=noprint_wrappers=1:nokey=1 "$CHUNK_NAME" 2>/dev/null) && \

diff --git a/scripts/tubearchivist_metadata.py b/scripts/tubearchivist_metadata.py
@@ -1,11 +1,10 @@
 #!/usr/bin/env python3
 """
-Fetch TubeArchivist video metadata and extract model info from description.
-Model info is typically a line like "Model - https://www.instagram.com/..." in the video description.
+Fetch TubeArchivist video metadata: model from description, thumbnail, title, channel.
 
 Usage:
   tubearchivist_metadata.py <base_url> <token> <video_path_or_id>
-  Prints JSON: {"model_info": "..."} or {"model_info": null} on stdout.
+  Prints JSON: {"model_info": "...", "thumbnail_url": "...", "title": "...", "channel": "..."} on stdout.
   If arg looks like a path (contains /), extracts video_id from path (channel_id/video_id.mp4).
 """
 import json
@@ -15,49 +14,66 @@
 import urllib.request
 from pathlib import Path
 
-# TubeArchivist path: channel_id/video_id.mp4 — video_id is 11 chars
+# TubeArchivist path: channel_id/video_id.mp4 — video_id is 11 chars (YouTube format: alnum, -, _)
+VIDEO_ID_RE = re.compile(r"^[a-zA-Z0-9_-]{11}$")
 MODEL_PATTERN = re.compile(r"Model\s*[-–:]\s*(.+?)(?:\n|$)", re.IGNORECASE | re.DOTALL)
 
 
 def extract_video_id(filepath: str) -> str | None:
     """Extract TubeArchivist video ID from path (e.g. .../UCxxx/abc123.mp4 -> abc123)."""
     stem = Path(filepath).stem
-    if stem and len(stem) == 11 and stem.isalnum():
+    if stem and len(stem) == 11 and VIDEO_ID_RE.match(stem):
         return stem
     return None
 
 
-def fetch_model_info(base_url: str, token: str, video_id: str) -> str | None:
-    """Fetch video metadata from TubeArchivist API and extract model line from description."""
+def fetch_video_metadata(base_url: str, token: str, video_id: str) -> dict:
+    """Fetch video metadata from TubeArchivist API. Returns dict with model_info, thumbnail_url, title, channel."""
     url = f"{base_url.rstrip('/')}/api/video/{video_id}/"
     req = urllib.request.Request(url, headers={"Authorization": f"Token {token}"})
     try:
         with urllib.request.urlopen(req, timeout=10) as resp:
             data = json.loads(resp.read().decode())
     except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, OSError):
-        return None
+        return {}
+    out = {"model_info": None, "thumbnail_url": None, "title": None, "channel": None}
     desc = data.get("description") or data.get("description_html") or ""
-    desc = re.sub(r"<[^>]+>", "", desc)  # strip HTML if present
+    desc = re.sub(r"<[^>]+>", "", desc)
     m = MODEL_PATTERN.search(desc)
     if m:
-        return m.group(1).strip()
-    return None
+        out["model_info"] = m.group(1).strip()
+    out["title"] = data.get("title") or data.get("video_title")
+    out["channel"] = data.get("channel_name") or data.get("channel")
+    thumb = (
+        data.get("vid_thumb_url")
+        or data.get("thumbnail_url")
+        or data.get("thumbnail")
+        or data.get("thumbnails")
+    )
+    if isinstance(thumb, str) and thumb:
+        out["thumbnail_url"] = thumb if thumb.startswith("http") else f"{base_url.rstrip('/')}{thumb}" if thumb.startswith("/") else thumb
+    elif isinstance(thumb, list) and thumb:
+        t = thumb[0]
+        out["thumbnail_url"] = t.get("url") if isinstance(t, dict) else t
+    if not out["thumbnail_url"]:
+        out["thumbnail_url"] = f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg"
+    return out
 
 
 def main() -> None:
     if len(sys.argv) != 4:
-        print(json.dumps({"model_info": None}))
+        print(json.dumps({"model_info": None, "thumbnail_url": None, "title": None, "channel": None}))
         sys.exit(1)
     _, base_url, token, path_or_id = sys.argv
     if not base_url or not token or not path_or_id:
-        print(json.dumps({"model_info": None}))
+        print(json.dumps({"model_info": None, "thumbnail_url": None, "title": None, "channel": None}))
         sys.exit(1)
     video_id = extract_video_id(path_or_id) if "/" in path_or_id else path_or_id
     if not video_id:
-        print(json.dumps({"model_info": None}))
+        print(json.dumps({"model_info": None, "thumbnail_url": None, "title": None, "channel": None}))
         sys.exit(0)
-    model = fetch_model_info(base_url, token, video_id)
-    print(json.dumps({"model_info": model}))
+    result = fetch_video_metadata(base_url, token, video_id)
+    print(json.dumps(result))
 
 
 if __name__ == "__main__":