Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ The **Chunk Generator** supports both CPU encoding (`libx264`) and NVIDIA hardwa
| POST | `/api/generate_chunk` | Triggers the generator to build new chunks |
| POST | `/api/skip_to_next` | Skip current chunk and advance to next (audio position preserved) |
| POST | `/api/skip_to_next_audio` | Skip to the next audio track |
| POST | `/api/play_chunk` | Play a specific chunk next in the live stream (body: `{"chunk_name": "xyz.mp4"}`) |

## Troubleshooting

Expand Down
46 changes: 39 additions & 7 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Pushes pre-generated chunks to RTMP server for continuous live streaming
"""

import html
import json
import os
import re
Expand Down Expand Up @@ -98,7 +99,20 @@ def _build_chunks_list(settings=None):
try:
with open(meta_path, 'r') as _f:
meta = _json.load(_f)
source_videos = meta.get('source_videos') or []
raw_sources = meta.get('source_videos') or []
# Normalize: support old [path, ...] and new [{path, model}, ...]
source_videos = []
for item in raw_sources:
if isinstance(item, str):
source_videos.append({'path': item, 'model': None, 'thumbnail_url': None, 'title': None, 'channel': None})
elif isinstance(item, dict) and 'path' in item:
source_videos.append({
'path': item['path'],
'model': item.get('model'),
'thumbnail_url': item.get('thumbnail_url'),
'title': item.get('title'),
'channel': item.get('channel'),
})
model_info = meta.get('model_info') or []
video_codec = meta.get('video_codec')
width = meta.get('width')
Expand Down Expand Up @@ -271,7 +285,8 @@ def index():
current_chunk_data = next((c for c in chunks if c['name'] == current_chunk), None) if current_chunk else None
chunks_excluding_current = [c for c in chunks if c['name'] != current_chunk]
show_model_column = bool((settings.get('TUBEARCHIVIST_URL') or '').strip() and (settings.get('TUBEARCHIVIST_TOKEN') or '').strip())
return render_template('dashboard.html', chunks=chunks, chunks_excluding_current=chunks_excluding_current, current_chunk_data=current_chunk_data, audio_files=audio_files, settings=settings, show_model_column=show_model_column, hls_port=HLS_PORT, sys_info=sys_info, current_chunk=current_chunk, current_audio=current_audio, initial_stream_status=initial_stream_status, stream_stats=stream_stats, initial_chunks_limit=INITIAL_CHUNKS_LIMIT)
tubearchivist_url = (settings.get('TUBEARCHIVIST_URL') or '').strip().rstrip('/')
return render_template('dashboard.html', chunks=chunks, chunks_excluding_current=chunks_excluding_current, current_chunk_data=current_chunk_data, audio_files=audio_files, settings=settings, show_model_column=show_model_column, tubearchivist_url=tubearchivist_url, hls_port=HLS_PORT, sys_info=sys_info, current_chunk=current_chunk, current_audio=current_audio, initial_stream_status=initial_stream_status, stream_stats=stream_stats, initial_chunks_limit=INITIAL_CHUNKS_LIMIT)


def _admin_context():
Expand Down Expand Up @@ -394,7 +409,7 @@ def _fetch_og_meta(url: str, timeout: float = 4.0) -> dict:
if not m_image:
m_image = re.search(r'<meta[^>]+content=["\']([^"\']+)["\'][^>]+property=["\']og:image["\']', html, re.I)
if m_title:
result['title'] = m_title.group(1).strip()[:120]
result['title'] = html.unescape(m_title.group(1).strip())[:120]
if m_image:
result['image'] = m_image.group(1).strip()
except Exception:
Expand All @@ -419,16 +434,20 @@ def _stats_context():
'total_seconds_streamed': current_status.get('total_seconds_streamed'),
}
play_counts = clip_pusher.get_play_counts()
# Enrich models with og:title and og:image (cached)
# Enrich models with og:title, og:image, and YouTube thumbnail (when video_id available)
models_enriched = []
for model, count in play_counts.get('models', []):
for item in play_counts.get('models', []):
model, count = item[0], item[1]
video_id = item[2] if len(item) > 2 else None
url = model if model.startswith('http') else 'https://' + model
meta = _fetch_og_meta(url)
title = html.unescape(meta.get('title') or url)
thumbnail = f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg" if video_id else meta.get('image')
models_enriched.append({
'url': url,
'count': count,
'title': meta.get('title') or url,
'image': meta.get('image'),
'title': title,
'image': thumbnail,
})
play_counts = dict(play_counts, models=models_enriched)
return {'stream_stats': stream_stats, 'play_counts': play_counts}
Expand Down Expand Up @@ -743,6 +762,19 @@ def skip_to_next_audio():
return jsonify({'success': True, 'skipped': skipped})


@app.route('/api/play_chunk', methods=['POST'])
def play_chunk():
"""Play a specific chunk next in the live stream."""
data = request.get_json()
chunk_name = data.get('chunk_name') if data else None
if not chunk_name or not isinstance(chunk_name, str):
return jsonify({'success': False, 'error': 'Missing or invalid chunk_name'}), 400
ok = clip_pusher.play_chunk(chunk_name)
if not ok:
return jsonify({'success': False, 'error': 'Chunk not found'}), 404
return jsonify({'success': True})


@app.route('/api/delete_audio', methods=['POST'])
def delete_audio():
"""Delete an audio file from the filesystem. Requires path within AUDIO_FOLDER."""
Expand Down
67 changes: 65 additions & 2 deletions clip_pusher.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def __init__(self, chunk_folder: str, rtmp_url: str,
self._errors = 0
self._last_error: Optional[str] = None
self._streamer_process: Optional[subprocess.Popen] = None
self._play_chunk_next: Optional[str] = None
self._play_chunk_lock = threading.Lock()

self._load_stream_stats()

Expand Down Expand Up @@ -160,6 +162,13 @@ def _save_play_counts(self, data: dict) -> None:
except OSError:
pass

def _extract_video_id(self, path: str) -> Optional[str]:
"""Extract 11-char YouTube video ID from path (e.g. .../UCxxx/abc123.mp4 -> abc123)."""
stem = os.path.splitext(os.path.basename(path))[0]
if stem and len(stem) == 11 and stem.replace('-', '').replace('_', '').isalnum():
return stem
return None

def _record_play_count(self, chunk_path: str, audio_name: Optional[str]) -> None:
"""Record play count for models (from chunk meta) and audio (current track)."""
data = self._load_play_counts()
Expand All @@ -171,9 +180,31 @@ def _record_play_count(self, chunk_path: str, audio_name: Optional[str]) -> None
try:
with open(meta_path, 'r') as f:
meta = json.load(f)
raw_sources = meta.get('source_videos') or []
model_to_video = {}
fallback_vid = None
for item in raw_sources:
path = item.get('path') if isinstance(item, dict) else (item if isinstance(item, str) else None)
model = item.get('model') if isinstance(item, dict) else None
if path:
vid = self._extract_video_id(path)
if vid:
if not fallback_vid:
fallback_vid = vid
if model and model not in model_to_video:
model_to_video[model] = vid
for m in (meta.get('model_info') or []):
if m:
models[m] = models.get(m, 0) + 1
vid = model_to_video.get(m) or fallback_vid
entry = models.get(m)
if isinstance(entry, dict):
entry['count'] = entry.get('count', 0) + 1
if vid:
entry['video_id'] = vid
elif isinstance(entry, (int, float)):
models[m] = {'count': entry + 1, 'video_id': vid} if vid else entry + 1
else:
models[m] = {'count': 1, 'video_id': vid} if vid else 1
except (json.JSONDecodeError, OSError):
pass

Expand All @@ -189,7 +220,13 @@ def get_play_counts(self) -> dict:
data = self._load_play_counts()
models = data.get('models', {})
audio = data.get('audio', {})
top_models = sorted(models.items(), key=lambda x: -x[1])[:20]
top_models = []
for url, entry in models.items():
count = entry.get('count', entry) if isinstance(entry, dict) else entry
video_id = entry.get('video_id') if isinstance(entry, dict) else None
top_models.append((url, count, video_id))
top_models.sort(key=lambda x: -x[1])
top_models = top_models[:20]
top_audio = sorted(audio.items(), key=lambda x: -x[1])[:20]
return {'models': top_models, 'audio': top_audio}

Expand Down Expand Up @@ -262,6 +299,19 @@ def skip_to_next_audio(self) -> bool:
return True
return False

def play_chunk(self, chunk_name: str) -> bool:
"""Queue a specific chunk to play next in the stream. Stops current chunk if running."""
base = os.path.basename(chunk_name)
if not base.endswith('.mp4'):
return False
path = os.path.join(self.chunk_folder, base)
if not os.path.isfile(path):
return False
with self._play_chunk_lock:
self._play_chunk_next = base
self.skip_to_next()
return True

# ── Internal ──────────────────────────────────────────────────

def _get_audio_file(self) -> Optional[str]:
Expand Down Expand Up @@ -383,6 +433,15 @@ def _push_loop(self):

random.shuffle(chunks)

with self._play_chunk_lock:
next_name = self._play_chunk_next
if next_name:
self._play_chunk_next = None
full = os.path.join(self.chunk_folder, next_name)
if full in chunks:
chunks.remove(full)
chunks.insert(0, full)

# Pick one audio track for the whole round; get duration so we can resume position across chunks
if self._audio_files:
if self._persistent_audio_path is None or not os.path.isfile(self._persistent_audio_path):
Expand Down Expand Up @@ -437,5 +496,9 @@ def _push_loop(self):
except:
self._streamer_process.kill()

with self._play_chunk_lock:
if self._play_chunk_next:
break

print("Clip pusher loop ended")

50 changes: 34 additions & 16 deletions generate_chunk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -162,26 +162,44 @@ for i in $(seq 1 "$CHUNKS_PER_RUN"); do
ffmpeg -y -f concat -safe 0 -i "$CONCAT_LIST" \
-c copy "$CHUNK_NAME" -loglevel error

# Write metadata: source videos (full paths), codec, resolution (for dashboard)
# Write metadata: source videos (full paths + model per source), codec, resolution (for dashboard)
META_FILE="$OUTPUT_DIR/${CHUNK_BASE}.meta.json"
SOURCES_JSON="[]"
[ -n "$SOURCE_BASENAMES" ] && SOURCES_JSON=$(echo "$SOURCE_BASENAMES" | sort -u | python3 -c "import sys,json; print(json.dumps([l.strip() for l in sys.stdin if l.strip()]))" 2>/dev/null) || SOURCES_JSON="[]"

# TubeArchivist metadata (model info from description) — when TUBEARCHIVIST_URL + TOKEN set
MODEL_JSON="[]"
if [ -n "${TUBEARCHIVIST_URL}" ] && [ -n "${TUBEARCHIVIST_TOKEN}" ]; then
TUBE_SCRIPT="${TUBEARCHIVIST_SCRIPT:-/scripts/tubearchivist_metadata.py}"
MODELS_TMP=$(mktemp)
for path in $(echo "$SOURCE_BASENAMES" | sort -u); do
[ -z "$path" ] && continue
out=$(python3 "$TUBE_SCRIPT" "$TUBEARCHIVIST_URL" "$TUBEARCHIVIST_TOKEN" "$path" 2>/dev/null || true)
model=$(echo "$out" | python3 -c "import sys,json; d=json.load(sys.stdin); m=d.get('model_info'); print(m if m else '')" 2>/dev/null || true)
[ -n "$model" ] && echo "$model" >> "$MODELS_TMP"
done
[ -f "$MODELS_TMP" ] && MODEL_JSON=$(sort -u "$MODELS_TMP" | python3 -c "import sys,json; print(json.dumps([l.strip() for l in sys.stdin if l.strip()]))" 2>/dev/null) || true
rm -f "$MODELS_TMP"
if [ -n "$SOURCE_BASENAMES" ]; then
export TUBEARCHIVIST_URL TUBEARCHIVIST_TOKEN TUBEARCHIVIST_SCRIPT
SOURCES_JSON=$(echo "$SOURCE_BASENAMES" | sort -u | python3 -c "
import sys, json, subprocess, os
paths = [l.strip() for l in sys.stdin if l.strip()]
tube_url = (os.environ.get('TUBEARCHIVIST_URL') or '').strip().rstrip('/')
tube_token = (os.environ.get('TUBEARCHIVIST_TOKEN') or '').strip()
script = os.environ.get('TUBEARCHIVIST_SCRIPT', '/scripts/tubearchivist_metadata.py')
sources = []
for path in paths:
model = None
thumb = None
title = None
channel = None
if tube_url and tube_token:
try:
out = subprocess.run([sys.executable, script, tube_url, tube_token, path], capture_output=True, text=True, timeout=12)
if out.returncode == 0:
d = json.loads(out.stdout or '{}')
model = d.get('model_info')
thumb = d.get('thumbnail_url')
title = d.get('title')
channel = d.get('channel')
except: pass
sources.append({'path': path, 'model': model, 'thumbnail_url': thumb, 'title': title, 'channel': channel})
print(json.dumps(sources))
" 2>/dev/null)
if [ -z "$SOURCES_JSON" ] || [ "$SOURCES_JSON" = "[]" ]; then
SOURCES_JSON=$(echo "$SOURCE_BASENAMES" | sort -u | python3 -c "import sys,json; print(json.dumps([{'path': l.strip(), 'model': None, 'thumbnail_url': None, 'title': None, 'channel': None} for l in sys.stdin if l.strip()]))" 2>/dev/null) || SOURCES_JSON="[]"
fi
fi

# model_info = unique models from sources (for Models button)
MODEL_JSON=$(echo "$SOURCES_JSON" | python3 -c "import sys,json; s=json.load(sys.stdin); m= sorted(set(x.get('model') for x in s if x.get('model'))); print(json.dumps(m))" 2>/dev/null) || MODEL_JSON="[]"

VIDEO_EXTRA=""
if codec=$(ffprobe -v error -select_streams v:0 -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 "$CHUNK_NAME" 2>/dev/null) && \
width=$(ffprobe -v error -select_streams v:0 -show_entries stream=width -of default=noprint_wrappers=1:nokey=1 "$CHUNK_NAME" 2>/dev/null) && \
Expand Down
48 changes: 32 additions & 16 deletions scripts/tubearchivist_metadata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#!/usr/bin/env python3
"""
Fetch TubeArchivist video metadata and extract model info from description.
Model info is typically a line like "Model - https://www.instagram.com/..." in the video description.
Fetch TubeArchivist video metadata: model from description, thumbnail, title, channel.

Usage:
tubearchivist_metadata.py <base_url> <token> <video_path_or_id>
Prints JSON: {"model_info": "..."} or {"model_info": null} on stdout.
Prints JSON: {"model_info": "...", "thumbnail_url": "...", "title": "...", "channel": "..."} on stdout.
If arg looks like a path (contains /), extracts video_id from path (channel_id/video_id.mp4).
"""
import json
Expand All @@ -15,49 +14,66 @@
import urllib.request
from pathlib import Path

# TubeArchivist path: channel_id/video_id.mp4 — video_id is 11 chars
# TubeArchivist path: channel_id/video_id.mp4 — video_id is 11 chars (YouTube format: alnum, -, _)
VIDEO_ID_RE = re.compile(r"^[a-zA-Z0-9_-]{11}$")
MODEL_PATTERN = re.compile(r"Model\s*[-–:]\s*(.+?)(?:\n|$)", re.IGNORECASE | re.DOTALL)


def extract_video_id(filepath: str) -> str | None:
"""Extract TubeArchivist video ID from path (e.g. .../UCxxx/abc123.mp4 -> abc123)."""
stem = Path(filepath).stem
if stem and len(stem) == 11 and stem.isalnum():
if stem and len(stem) == 11 and VIDEO_ID_RE.match(stem):
return stem
return None


def fetch_model_info(base_url: str, token: str, video_id: str) -> str | None:
"""Fetch video metadata from TubeArchivist API and extract model line from description."""
def fetch_video_metadata(base_url: str, token: str, video_id: str) -> dict:
"""Fetch video metadata from TubeArchivist API. Returns dict with model_info, thumbnail_url, title, channel."""
url = f"{base_url.rstrip('/')}/api/video/{video_id}/"
req = urllib.request.Request(url, headers={"Authorization": f"Token {token}"})
try:
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read().decode())
except (urllib.error.HTTPError, urllib.error.URLError, json.JSONDecodeError, OSError):
return None
return {}
out = {"model_info": None, "thumbnail_url": None, "title": None, "channel": None}
desc = data.get("description") or data.get("description_html") or ""
desc = re.sub(r"<[^>]+>", "", desc) # strip HTML if present
desc = re.sub(r"<[^>]+>", "", desc)
m = MODEL_PATTERN.search(desc)
if m:
return m.group(1).strip()
return None
out["model_info"] = m.group(1).strip()
out["title"] = data.get("title") or data.get("video_title")
out["channel"] = data.get("channel_name") or data.get("channel")
thumb = (
data.get("vid_thumb_url")
or data.get("thumbnail_url")
or data.get("thumbnail")
or data.get("thumbnails")
)
if isinstance(thumb, str) and thumb:
out["thumbnail_url"] = thumb if thumb.startswith("http") else f"{base_url.rstrip('/')}{thumb}" if thumb.startswith("/") else thumb
elif isinstance(thumb, list) and thumb:
t = thumb[0]
out["thumbnail_url"] = t.get("url") if isinstance(t, dict) else t
if not out["thumbnail_url"]:
out["thumbnail_url"] = f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg"
return out


def main() -> None:
if len(sys.argv) != 4:
print(json.dumps({"model_info": None}))
print(json.dumps({"model_info": None, "thumbnail_url": None, "title": None, "channel": None}))
sys.exit(1)
_, base_url, token, path_or_id = sys.argv
if not base_url or not token or not path_or_id:
print(json.dumps({"model_info": None}))
print(json.dumps({"model_info": None, "thumbnail_url": None, "title": None, "channel": None}))
sys.exit(1)
video_id = extract_video_id(path_or_id) if "/" in path_or_id else path_or_id
if not video_id:
print(json.dumps({"model_info": None}))
print(json.dumps({"model_info": None, "thumbnail_url": None, "title": None, "channel": None}))
sys.exit(0)
model = fetch_model_info(base_url, token, video_id)
print(json.dumps({"model_info": model}))
result = fetch_video_metadata(base_url, token, video_id)
print(json.dumps(result))


if __name__ == "__main__":
Expand Down
Loading
Loading