diff --git a/README.md b/README.md index fb71da9..5d3b750 100644 --- a/README.md +++ b/README.md @@ -200,6 +200,7 @@ The **Chunk Generator** supports both CPU encoding (`libx264`) and NVIDIA hardwa | POST | `/api/generate_chunk` | Triggers the generator to build new chunks | | POST | `/api/skip_to_next` | Skip current chunk and advance to next (audio position preserved) | | POST | `/api/skip_to_next_audio` | Skip to the next audio track | +| POST | `/api/play_chunk` | Play a specific chunk next in the live stream (body: `{"chunk_name": "xyz.mp4"}`) | ## Troubleshooting diff --git a/app.py b/app.py index 81812a5..0ad585d 100644 --- a/app.py +++ b/app.py @@ -5,6 +5,7 @@ Pushes pre-generated chunks to RTMP server for continuous live streaming """ +import html import json import os import re @@ -98,7 +99,20 @@ def _build_chunks_list(settings=None): try: with open(meta_path, 'r') as _f: meta = _json.load(_f) - source_videos = meta.get('source_videos') or [] + raw_sources = meta.get('source_videos') or [] + # Normalize: support old [path, ...] and new [{path, model}, ...] + source_videos = [] + for item in raw_sources: + if isinstance(item, str): + source_videos.append({'path': item, 'model': None, 'thumbnail_url': None, 'title': None, 'channel': None}) + elif isinstance(item, dict) and 'path' in item: + source_videos.append({ + 'path': item['path'], + 'model': item.get('model'), + 'thumbnail_url': item.get('thumbnail_url'), + 'title': item.get('title'), + 'channel': item.get('channel'), + }) model_info = meta.get('model_info') or [] video_codec = meta.get('video_codec') width = meta.get('width') @@ -271,7 +285,8 @@ def index(): current_chunk_data = next((c for c in chunks if c['name'] == current_chunk), None) if current_chunk else None chunks_excluding_current = [c for c in chunks if c['name'] != current_chunk] show_model_column = bool((settings.get('TUBEARCHIVIST_URL') or '').strip() and (settings.get('TUBEARCHIVIST_TOKEN') or '').strip()) - return render_template('dashboard.html', chunks=chunks, chunks_excluding_current=chunks_excluding_current, current_chunk_data=current_chunk_data, audio_files=audio_files, settings=settings, show_model_column=show_model_column, hls_port=HLS_PORT, sys_info=sys_info, current_chunk=current_chunk, current_audio=current_audio, initial_stream_status=initial_stream_status, stream_stats=stream_stats, initial_chunks_limit=INITIAL_CHUNKS_LIMIT) + tubearchivist_url = (settings.get('TUBEARCHIVIST_URL') or '').strip().rstrip('/') + return render_template('dashboard.html', chunks=chunks, chunks_excluding_current=chunks_excluding_current, current_chunk_data=current_chunk_data, audio_files=audio_files, settings=settings, show_model_column=show_model_column, tubearchivist_url=tubearchivist_url, hls_port=HLS_PORT, sys_info=sys_info, current_chunk=current_chunk, current_audio=current_audio, initial_stream_status=initial_stream_status, stream_stats=stream_stats, initial_chunks_limit=INITIAL_CHUNKS_LIMIT) def _admin_context(): @@ -394,7 +409,7 @@ def _fetch_og_meta(url: str, timeout: float = 4.0) -> dict: if not m_image: m_image = re.search(r']+content=["\']([^"\']+)["\'][^>]+property=["\']og:image["\']', html, re.I) if m_title: - result['title'] = m_title.group(1).strip()[:120] + result['title'] = html.unescape(m_title.group(1).strip())[:120] if m_image: result['image'] = m_image.group(1).strip() except Exception: @@ -419,16 +434,20 @@ def _stats_context(): 'total_seconds_streamed': current_status.get('total_seconds_streamed'), } play_counts = clip_pusher.get_play_counts() - # Enrich models with og:title and og:image (cached) + # Enrich models with og:title, og:image, and YouTube thumbnail (when video_id available) models_enriched = [] - for model, count in play_counts.get('models', []): + for item in play_counts.get('models', []): + model, count = item[0], item[1] + video_id = item[2] if len(item) > 2 else None url = model if model.startswith('http') else 'https://' + model meta = _fetch_og_meta(url) + title = html.unescape(meta.get('title') or url) + thumbnail = f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg" if video_id else meta.get('image') models_enriched.append({ 'url': url, 'count': count, - 'title': meta.get('title') or url, - 'image': meta.get('image'), + 'title': title, + 'image': thumbnail, }) play_counts = dict(play_counts, models=models_enriched) return {'stream_stats': stream_stats, 'play_counts': play_counts} @@ -743,6 +762,19 @@ def skip_to_next_audio(): return jsonify({'success': True, 'skipped': skipped}) +@app.route('/api/play_chunk', methods=['POST']) +def play_chunk(): + """Play a specific chunk next in the live stream.""" + data = request.get_json() + chunk_name = data.get('chunk_name') if data else None + if not chunk_name or not isinstance(chunk_name, str): + return jsonify({'success': False, 'error': 'Missing or invalid chunk_name'}), 400 + ok = clip_pusher.play_chunk(chunk_name) + if not ok: + return jsonify({'success': False, 'error': 'Chunk not found'}), 404 + return jsonify({'success': True}) + + @app.route('/api/delete_audio', methods=['POST']) def delete_audio(): """Delete an audio file from the filesystem. Requires path within AUDIO_FOLDER.""" diff --git a/clip_pusher.py b/clip_pusher.py index c52b903..7fa3d6c 100644 --- a/clip_pusher.py +++ b/clip_pusher.py @@ -61,6 +61,8 @@ def __init__(self, chunk_folder: str, rtmp_url: str, self._errors = 0 self._last_error: Optional[str] = None self._streamer_process: Optional[subprocess.Popen] = None + self._play_chunk_next: Optional[str] = None + self._play_chunk_lock = threading.Lock() self._load_stream_stats() @@ -160,6 +162,13 @@ def _save_play_counts(self, data: dict) -> None: except OSError: pass + def _extract_video_id(self, path: str) -> Optional[str]: + """Extract 11-char YouTube video ID from path (e.g. .../UCxxx/abc123.mp4 -> abc123).""" + stem = os.path.splitext(os.path.basename(path))[0] + if stem and len(stem) == 11 and stem.replace('-', '').replace('_', '').isalnum(): + return stem + return None + def _record_play_count(self, chunk_path: str, audio_name: Optional[str]) -> None: """Record play count for models (from chunk meta) and audio (current track).""" data = self._load_play_counts() @@ -171,9 +180,31 @@ def _record_play_count(self, chunk_path: str, audio_name: Optional[str]) -> None try: with open(meta_path, 'r') as f: meta = json.load(f) + raw_sources = meta.get('source_videos') or [] + model_to_video = {} + fallback_vid = None + for item in raw_sources: + path = item.get('path') if isinstance(item, dict) else (item if isinstance(item, str) else None) + model = item.get('model') if isinstance(item, dict) else None + if path: + vid = self._extract_video_id(path) + if vid: + if not fallback_vid: + fallback_vid = vid + if model and model not in model_to_video: + model_to_video[model] = vid for m in (meta.get('model_info') or []): if m: - models[m] = models.get(m, 0) + 1 + vid = model_to_video.get(m) or fallback_vid + entry = models.get(m) + if isinstance(entry, dict): + entry['count'] = entry.get('count', 0) + 1 + if vid: + entry['video_id'] = vid + elif isinstance(entry, (int, float)): + models[m] = {'count': entry + 1, 'video_id': vid} if vid else entry + 1 + else: + models[m] = {'count': 1, 'video_id': vid} if vid else 1 except (json.JSONDecodeError, OSError): pass @@ -189,7 +220,13 @@ def get_play_counts(self) -> dict: data = self._load_play_counts() models = data.get('models', {}) audio = data.get('audio', {}) - top_models = sorted(models.items(), key=lambda x: -x[1])[:20] + top_models = [] + for url, entry in models.items(): + count = entry.get('count', entry) if isinstance(entry, dict) else entry + video_id = entry.get('video_id') if isinstance(entry, dict) else None + top_models.append((url, count, video_id)) + top_models.sort(key=lambda x: -x[1]) + top_models = top_models[:20] top_audio = sorted(audio.items(), key=lambda x: -x[1])[:20] return {'models': top_models, 'audio': top_audio} @@ -262,6 +299,19 @@ def skip_to_next_audio(self) -> bool: return True return False + def play_chunk(self, chunk_name: str) -> bool: + """Queue a specific chunk to play next in the stream. Stops current chunk if running.""" + base = os.path.basename(chunk_name) + if not base.endswith('.mp4'): + return False + path = os.path.join(self.chunk_folder, base) + if not os.path.isfile(path): + return False + with self._play_chunk_lock: + self._play_chunk_next = base + self.skip_to_next() + return True + # ── Internal ────────────────────────────────────────────────── def _get_audio_file(self) -> Optional[str]: @@ -383,6 +433,15 @@ def _push_loop(self): random.shuffle(chunks) + with self._play_chunk_lock: + next_name = self._play_chunk_next + if next_name: + self._play_chunk_next = None + full = os.path.join(self.chunk_folder, next_name) + if full in chunks: + chunks.remove(full) + chunks.insert(0, full) + # Pick one audio track for the whole round; get duration so we can resume position across chunks if self._audio_files: if self._persistent_audio_path is None or not os.path.isfile(self._persistent_audio_path): @@ -437,5 +496,9 @@ def _push_loop(self): except: self._streamer_process.kill() + with self._play_chunk_lock: + if self._play_chunk_next: + break + print("Clip pusher loop ended") diff --git a/generate_chunk.sh b/generate_chunk.sh index d17d775..1b11d3e 100755 --- a/generate_chunk.sh +++ b/generate_chunk.sh @@ -162,26 +162,44 @@ for i in $(seq 1 "$CHUNKS_PER_RUN"); do ffmpeg -y -f concat -safe 0 -i "$CONCAT_LIST" \ -c copy "$CHUNK_NAME" -loglevel error - # Write metadata: source videos (full paths), codec, resolution (for dashboard) + # Write metadata: source videos (full paths + model per source), codec, resolution (for dashboard) META_FILE="$OUTPUT_DIR/${CHUNK_BASE}.meta.json" SOURCES_JSON="[]" - [ -n "$SOURCE_BASENAMES" ] && SOURCES_JSON=$(echo "$SOURCE_BASENAMES" | sort -u | python3 -c "import sys,json; print(json.dumps([l.strip() for l in sys.stdin if l.strip()]))" 2>/dev/null) || SOURCES_JSON="[]" - - # TubeArchivist metadata (model info from description) — when TUBEARCHIVIST_URL + TOKEN set - MODEL_JSON="[]" - if [ -n "${TUBEARCHIVIST_URL}" ] && [ -n "${TUBEARCHIVIST_TOKEN}" ]; then - TUBE_SCRIPT="${TUBEARCHIVIST_SCRIPT:-/scripts/tubearchivist_metadata.py}" - MODELS_TMP=$(mktemp) - for path in $(echo "$SOURCE_BASENAMES" | sort -u); do - [ -z "$path" ] && continue - out=$(python3 "$TUBE_SCRIPT" "$TUBEARCHIVIST_URL" "$TUBEARCHIVIST_TOKEN" "$path" 2>/dev/null || true) - model=$(echo "$out" | python3 -c "import sys,json; d=json.load(sys.stdin); m=d.get('model_info'); print(m if m else '')" 2>/dev/null || true) - [ -n "$model" ] && echo "$model" >> "$MODELS_TMP" - done - [ -f "$MODELS_TMP" ] && MODEL_JSON=$(sort -u "$MODELS_TMP" | python3 -c "import sys,json; print(json.dumps([l.strip() for l in sys.stdin if l.strip()]))" 2>/dev/null) || true - rm -f "$MODELS_TMP" + if [ -n "$SOURCE_BASENAMES" ]; then + export TUBEARCHIVIST_URL TUBEARCHIVIST_TOKEN TUBEARCHIVIST_SCRIPT + SOURCES_JSON=$(echo "$SOURCE_BASENAMES" | sort -u | python3 -c " +import sys, json, subprocess, os +paths = [l.strip() for l in sys.stdin if l.strip()] +tube_url = (os.environ.get('TUBEARCHIVIST_URL') or '').strip().rstrip('/') +tube_token = (os.environ.get('TUBEARCHIVIST_TOKEN') or '').strip() +script = os.environ.get('TUBEARCHIVIST_SCRIPT', '/scripts/tubearchivist_metadata.py') +sources = [] +for path in paths: + model = None + thumb = None + title = None + channel = None + if tube_url and tube_token: + try: + out = subprocess.run([sys.executable, script, tube_url, tube_token, path], capture_output=True, text=True, timeout=12) + if out.returncode == 0: + d = json.loads(out.stdout or '{}') + model = d.get('model_info') + thumb = d.get('thumbnail_url') + title = d.get('title') + channel = d.get('channel') + except: pass + sources.append({'path': path, 'model': model, 'thumbnail_url': thumb, 'title': title, 'channel': channel}) +print(json.dumps(sources)) +" 2>/dev/null) + if [ -z "$SOURCES_JSON" ] || [ "$SOURCES_JSON" = "[]" ]; then + SOURCES_JSON=$(echo "$SOURCE_BASENAMES" | sort -u | python3 -c "import sys,json; print(json.dumps([{'path': l.strip(), 'model': None, 'thumbnail_url': None, 'title': None, 'channel': None} for l in sys.stdin if l.strip()]))" 2>/dev/null) || SOURCES_JSON="[]" + fi fi + # model_info = unique models from sources (for Models button) + MODEL_JSON=$(echo "$SOURCES_JSON" | python3 -c "import sys,json; s=json.load(sys.stdin); m= sorted(set(x.get('model') for x in s if x.get('model'))); print(json.dumps(m))" 2>/dev/null) || MODEL_JSON="[]" + VIDEO_EXTRA="" if codec=$(ffprobe -v error -select_streams v:0 -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 "$CHUNK_NAME" 2>/dev/null) && \ width=$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=noprint_wrappers=1:nokey=1 "$CHUNK_NAME" 2>/dev/null) && \ diff --git a/scripts/tubearchivist_metadata.py b/scripts/tubearchivist_metadata.py index 4145fcf..ef5e016 100644 --- a/scripts/tubearchivist_metadata.py +++ b/scripts/tubearchivist_metadata.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 """ -Fetch TubeArchivist video metadata and extract model info from description. -Model info is typically a line like "Model - https://www.instagram.com/..." in the video description. +Fetch TubeArchivist video metadata: model from description, thumbnail, title, channel. Usage: tubearchivist_metadata.py - Prints JSON: {"model_info": "..."} or {"model_info": null} on stdout. + Prints JSON: {"model_info": "...", "thumbnail_url": "...", "title": "...", "channel": "..."} on stdout. If arg looks like a path (contains /), extracts video_id from path (channel_id/video_id.mp4). """ import json @@ -15,49 +14,66 @@ import urllib.request from pathlib import Path -# TubeArchivist path: channel_id/video_id.mp4 — video_id is 11 chars +# TubeArchivist path: channel_id/video_id.mp4 — video_id is 11 chars (YouTube format: alnum, -, _) +VIDEO_ID_RE = re.compile(r"^[a-zA-Z0-9_-]{11}$") MODEL_PATTERN = re.compile(r"Model\s*[-–:]\s*(.+?)(?:\n|$)", re.IGNORECASE | re.DOTALL) def extract_video_id(filepath: str) -> str | None: """Extract TubeArchivist video ID from path (e.g. .../UCxxx/abc123.mp4 -> abc123).""" stem = Path(filepath).stem - if stem and len(stem) == 11 and stem.isalnum(): + if stem and len(stem) == 11 and VIDEO_ID_RE.match(stem): return stem return None -def fetch_model_info(base_url: str, token: str, video_id: str) -> str | None: - """Fetch video metadata from TubeArchivist API and extract model line from description.""" +def fetch_video_metadata(base_url: str, token: str, video_id: str) -> dict: + """Fetch video metadata from TubeArchivist API. Returns dict with model_info, thumbnail_url, title, channel.""" url = f"{base_url.rstrip('/')}/api/video/{video_id}/" req = urllib.request.Request(url, headers={"Authorization": f"Token {token}"}) try: with urllib.request.urlopen(req, timeout=10) as resp: data = json.loads(resp.read().decode()) except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, OSError): - return None + return {} + out = {"model_info": None, "thumbnail_url": None, "title": None, "channel": None} desc = data.get("description") or data.get("description_html") or "" - desc = re.sub(r"<[^>]+>", "", desc) # strip HTML if present + desc = re.sub(r"<[^>]+>", "", desc) m = MODEL_PATTERN.search(desc) if m: - return m.group(1).strip() - return None + out["model_info"] = m.group(1).strip() + out["title"] = data.get("title") or data.get("video_title") + out["channel"] = data.get("channel_name") or data.get("channel") + thumb = ( + data.get("vid_thumb_url") + or data.get("thumbnail_url") + or data.get("thumbnail") + or data.get("thumbnails") + ) + if isinstance(thumb, str) and thumb: + out["thumbnail_url"] = thumb if thumb.startswith("http") else f"{base_url.rstrip('/')}{thumb}" if thumb.startswith("/") else thumb + elif isinstance(thumb, list) and thumb: + t = thumb[0] + out["thumbnail_url"] = t.get("url") if isinstance(t, dict) else t + if not out["thumbnail_url"]: + out["thumbnail_url"] = f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg" + return out def main() -> None: if len(sys.argv) != 4: - print(json.dumps({"model_info": None})) + print(json.dumps({"model_info": None, "thumbnail_url": None, "title": None, "channel": None})) sys.exit(1) _, base_url, token, path_or_id = sys.argv if not base_url or not token or not path_or_id: - print(json.dumps({"model_info": None})) + print(json.dumps({"model_info": None, "thumbnail_url": None, "title": None, "channel": None})) sys.exit(1) video_id = extract_video_id(path_or_id) if "/" in path_or_id else path_or_id if not video_id: - print(json.dumps({"model_info": None})) + print(json.dumps({"model_info": None, "thumbnail_url": None, "title": None, "channel": None})) sys.exit(0) - model = fetch_model_info(base_url, token, video_id) - print(json.dumps({"model_info": model})) + result = fetch_video_metadata(base_url, token, video_id) + print(json.dumps(result)) if __name__ == "__main__": diff --git a/templates/dashboard.html b/templates/dashboard.html index 4a01953..24a6dda 100644 --- a/templates/dashboard.html +++ b/templates/dashboard.html @@ -33,14 +33,14 @@
+ class="bg-gray-800 rounded-lg shadow-xl border border-gray-600 max-w-4xl w-full max-h-[90vh] flex flex-col">

@@ -122,7 +122,7 @@

Generated Video Chunks

- + {{ current_chunk }} {% if m.source_videos %} @@ -130,6 +130,7 @@

Generated Video Chunks

Models {% endif %} + Now Playing ▶
@@ -143,7 +144,7 @@

Generated Video Chunks

i -