diff --git a/.dockerignore b/.dockerignore
index a54022b..95368e8 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,22 +1,14 @@
-.env
-.env.*
-!.env.example
-.venv/
-venv/
-__pycache__/
-*.py[cod]
-*$py.class
.git/
-.github/
.gitignore
-.dockerignore
+.env
+.env.example
logs/
downloads/
-download_temp/
+__pycache__/
+**/__pycache__/
+*.pyc
+imghdr.py
+.kilo/
+AGENTS.md
+*.md
ffmpeg/
-token.txt
-egg-socialvideodownload.json
-Dockerfile.pelican
-entrypoint.sh
-README.md
-DOCS.md
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 643224b..79f3d9e 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -8,7 +8,20 @@ on:
- develop
jobs:
+ validate:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v6
+ - uses: actions/setup-python@v6
+ with:
+ python-version: '3.11'
+ - name: Install dependencies
+ run: pip install -r requirements.txt
+ - name: Verify imports
+ run: python -c "import main"
+
build-and-push:
+ needs: validate
runs-on: ubuntu-latest
permissions:
contents: read
@@ -17,18 +30,11 @@ jobs:
- name: Check out the repo
uses: actions/checkout@v6
- - name: Set up Python
- uses: actions/setup-python@v6
- with:
- python-version: '3.x'
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v4
- - name: Install dependencies
- run: pip install -r requirements.txt
-
- - name: Run tests
- run: |
- # Add your test commands here
- echo "No tests to run"
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v4
- name: Log in to GitHub Packages
uses: docker/login-action@v4
@@ -45,14 +51,9 @@ jobs:
id: tags
run: |
REPO="ghcr.io/${{ env.repo_name }}"
- if [ -f .env.example ] && grep -q '^VERSION=' .env.example; then
- BOT_VERSION=$(grep '^VERSION=' .env.example | cut -d= -f2 | tr -d '[:space:]')
- else
- BOT_VERSION="unknown"
- fi
- VERSION_SHORT=$(echo "$BOT_VERSION" | sed -E 's/^((V[0-9]+)(\.[0-9]+)?).*/\1/')
if [ "${{ github.ref }}" = "refs/heads/main" ]; then
- echo "tags=${REPO}:latest,${REPO}:${VERSION_SHORT},${REPO}:${BOT_VERSION}" >> $GITHUB_OUTPUT
+ VERSION=$(grep '^VERSION' .env.example | head -1 | sed 's/.*=\(.*\)/\1/')
+ echo "tags=${REPO}:latest,${REPO}:${VERSION}" >> $GITHUB_OUTPUT
elif [ "${{ github.ref }}" = "refs/heads/develop" ]; then
echo "tags=${REPO}:dev" >> $GITHUB_OUTPUT
else
@@ -66,6 +67,7 @@ jobs:
context: .
push: true
tags: ${{ steps.tags.outputs.tags }}
+ platforms: linux/amd64,linux/arm64
- name: Image digest
run: echo ${{ steps.build-and-push.outputs.digest }}
diff --git a/AGENTS.md b/AGENTS.md
index 3fe13d4..a451bf3 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,162 +1,169 @@
# Agent Guidelines: SocialVideoDownload.py
## Project Overview
-A **Telegram bot** that downloads videos and music from social media links (YouTube, TikTok, etc.) and sends them back to users. Deployed as a Docker container to GitHub Packages.
+A **Telegram bot** (modular Python application) that downloads videos and music from social media links (YouTube, TikTok, etc.) and sends them back to users. Deployed as a Docker container to GitHub Container Registry.
-- **Language**: Python 3.11 (target, required — Python 3.13+ is incompatible with `python-telegram-bot==13.7`)
-- **Primary file**: `main.py` (~80 lines, orchestration only)
-- **Framework**: `python-telegram-bot==13.7` — **critical**: this is the old v13 synchronous API (`Updater`, `Dispatcher`, `use_context=True`). Do NOT use modern v20+ async patterns (`Application`, `ContextTypes`, etc.); they are incompatible.
-- **Deployment target**: Docker image → `ghcr.io/...` (GitHub Container Registry)
-
-## Project Structure
-
-```
-SocialVideoDownload.py/
-├── main.py # Entry point — token loading, handler registration, polling loop
-├── config.py # Centralized constants (VERSION, FFMPEG_PATH, retention, disk thresholds)
-├── requirements.txt # Python deps
-├── Dockerfile # Multi-stage build (ffmpeg → builder → final)
-├── .env.example # Template for BOT_TOKEN and other env vars
-├── .github/workflows/ # CI/CD (deploy.yml)
-├── commands/ # Telegram command handlers
-│ ├── start.py
-│ ├── help.py
-│ ├── download.py
-│ ├── music.py
-│ ├── stats.py
-│ └── auto_download.py
-└── utils/ # Shared utilities
- ├── token_loader.py # Reads BOT_TOKEN from .env (auto-creates template if missing)
- ├── logger.py # Colored console + file logging
- ├── file_manager.py # Hash-based deduplication (SHA-256 of URLs in downloads/hashes.txt)
- ├── disk_manager.py # Free-space monitoring, emergency cleanup of downloads/
- ├── cache.py # JSON metadata cache (download_temp/cache_metadata.json) with TTL
- ├── retention.py # Sets file mtime to future based on retention policy
- ├── upload.py # Telegram upload (< 35 MB) or external fallback via curl.libriciel.fr
- ├── curl_uploader.py # PycURL-based upload to curl.libriciel.fr with progress callback
- └── progress_file.py # Progress tracking helpers
-```
+- **Language**: Python 3.11 (target)
+- **Architecture**: Modular (commands/, utils/ packages) with entry point `main.py`
+- **Framework**: `python-telegram-bot==13.7` — **critical**: this is the old v12 synchronous API (`Updater`, `Dispatcher`, `use_context=True`). Do NOT use modern v20+ async patterns; they are incompatible.
+- **Deployment**: Docker image → `ghcr.io/OverStyleFR/SocialVideoDownload.py`
+- **CI/CD**: GitHub Actions with multi-arch (`linux/amd64`, `linux/arm64`)
## Essential Commands
| Command | Purpose |
|---------|---------|
-| `python main.py` | Run the bot locally |
+| `python main.py` | Run the bot locally (requires `.env`) |
+| `bash setup.sh` | One-time local setup (venv + pip install + .env) |
+| `docker compose up -d` | Run the Docker container locally |
| `docker build -t socialvideodownload .` | Build Docker image |
-| `docker run -e BOT_TOKEN=your_token socialvideodownload` | Run container (pass token via env) |
+| `docker run -v $(pwd)/.env:/app/.env socialvideodownload` | Run container |
| `pip install -r requirements.txt` | Install dependencies |
| `echo "No tests to run"` | Current test suite (there are **no tests**) |
**No test framework is configured.** The CI workflow explicitly skips tests with a placeholder `echo`.
+## Project Structure
+
+```
+.
+├── main.py # Entry point
+├── config.py # Configuration from .env
+├── commands/
+│ ├── start.py # /start handler
+│ ├── help.py # /help handler
+│ ├── download.py # /download handler
+│ ├── music.py # /music handler
+│ ├── stats.py # /stats handler
+│ ├── auto_download.py # Auto-download via text messages
+│ └── upload.py # Upload helper (now in utils/)
+├── utils/
+│ ├── cache.py # Cache hit tracking
+│ ├── disk_manager.py # Downloads cleanup (retention + emergency)
+│ ├── file_manager.py # Hash-based dedup (sha256 of URL)
+│ ├── logger.py # Console + file logging
+│ ├── progress_file.py # Progress-aware file wrapper for uploads
+│ ├── retention.py # File retention via mtime
+│ ├── token_loader.py # .env token loading
+│ ├── curl_uploader.py # External upload via curl.libriciel.fr
+│ └── upload.py # Telegram file upload with progress
+├── imghdr.py # Compatibility shim (removed from stdlib 3.13+)
+├── Dockerfile # Multi-stage: bookworm base
+├── docker-compose.yml # Local dev container
+├── setup.sh # Standalone setup script
+├── .dockerignore # Excludes ffmpeg/, .kilo/, *.md, etc.
+├── .env.example # Environment template
+├── .gitignore
+├── requirements.txt
+└── AGENTS.md
+```
+
## Architecture & Data Flow
```
Telegram Message
↓
- python-telegram-bot v13 handlers (commands/*.py)
+ python-telegram-bot v12 handlers (commands/*.py)
↓
- yt-dlp Python API (yt_dlp.YoutubeDL)
+ yt-dlp (Python package, not subprocess) → downloads/
.
↓
- downloads/.
- ↓
- upload.py decides:
- ├─ < 35 MB → send via Telegram API (reply_video / reply_audio / reply_document)
- └─ ≥ 35 MB → upload via curl.libriciel.fr with progress updates, return URL
+ [Video] → check retention → bot.send_video()
+ [Music] → ffmpeg extract-audio → .mp3 → retention → bot.send_audio()
```
+- **Modules**: Logic is split into `commands/` (handlers) and `utils/` (infrastructure).
+- **yt-dlp**: Used as a **Python package** (`import yt_dlp`), not a subprocess binary.
- **Synchronous**: The entire bot is sync. All handlers block on I/O. Do not introduce `async`/`await` unless migrating the entire framework.
-- **Subprocess**: Only `ffmpeg` (for `/music` conversion to MP3) spawns an external binary.
-### Caching & Deduplication
-- **URL deduplication**: `utils/file_manager.py` stores SHA-256 hashes of URLs in `downloads/hashes.txt`. Before downloading, the bot checks if the hash exists.
-- **Metadata cache**: `utils/cache.py` maintains `download_temp/cache_metadata.json` with per-entry TTL:
- - Small files (≤ 5 MB) → 24 hours
- - Large files (> 5 MB) → 1 hour
+### Caching & Dedup
+- **Hash-based**: URL → SHA-256 → stored in `downloads/hashes.txt`. Before downloading, `is_already_downloaded()` checks if the hash exists.
+- **File check**: Even if the hash exists, the bot verifies the file still exists on disk (yt-dlp's `prepare_filename`). If missing, it re-downloads and the hash line persists (harmless, duplicates are per-session only).
### Retention Policy
-- `utils/retention.py` sets file `mtime` to a future timestamp based on size/type:
- - MP3 files → long retention (default 24h)
- - Small files (< 4 MB default) → long retention
- - Large files → short retention (default 2h)
-- These values are configurable via `.env` (`SMALL_FILE_SIZE_MB`, `RETENTION_SMALL_HOURS`, `RETENTION_LARGE_HOURS`).
+- Files get their **mtime set to `now + retention`** after download via `set_retention()`.
+- Small files (< `SMALL_FILE_SIZE_MB`) and mp3s: retention = `RETENTION_SMALL_HOURS` (default 24h).
+- Large files: retention = `RETENTION_LARGE_HOURS` (default 2h).
+- `cleanup_by_retention()` removes files whose mtime < now (expired retention).
+- `check_and_clean_if_needed()` tries retention first, then full clear if still low on space.
### Startup Behavior (`main()`)
-1. Loads cache from `download_temp/cache_metadata.json` (creates empty if missing)
-2. Checks free disk space; triggers emergency cleanup if below `MIN_FREE_SPACE_MB` (default 500)
-3. Registers all command handlers on the dispatcher
-4. Sets Telegram bot command menu (`/start`, `/help`, `/download`)
-5. Starts a daemon thread for periodic cleanup every `CLEANUP_INTERVAL_HOURS`
-6. Begins polling
-
-### Periodic Cleanup (`scheduled_cleanup` thread)
-- Runs every `CLEANUP_INTERVAL_HOURS` (default 24h)
-- Calls `clear_downloads()` which empties `downloads/` but preserves `hashes.txt`
-
-## External Dependencies (Binaries)
-
-| Binary | Expected Location | Used For |
-|--------|-------------------|----------|
-| `yt-dlp` | Python package (`yt_dlp`) | Video/audio downloading |
-| `ffmpeg` | `ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg` (local) or `/usr/local/bin/ffmpeg` (Docker) | MP3 extraction (`/music`) |
-
-- In Docker, FFmpeg is copied from a multi-stage `ghcr.io/linuxserver/ffmpeg:latest` image.
-- Locally, the bundled static build `ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg` is used (path configurable via `.env`).
-
-## Configuration & Secrets
-
-- **Token source**: `.env` file, variable `BOT_TOKEN`. Auto-generated if missing by `utils/token_loader.py`.
-- **No `token.txt`**: The old flat-file approach has been replaced by `python-dotenv`.
-- **Environment variables loaded**:
- - `BOT_TOKEN` — Telegram bot token (required)
- - `VERSION` — Bot version string (default `V.8-7`)
- - `DEVELOPED_BY` — Author string (default `Tom V. | OverStyleFR`)
- - `FFMPEG_PATH` — Path to ffmpeg binary
- - `CLEANUP_INTERVAL_HOURS`, `MIN_FREE_SPACE_MB` — Disk/rotation tuning
- - `SMALL_FILE_SIZE_MB`, `RETENTION_SMALL_HOURS`, `RETENTION_LARGE_HOURS` — Retention tuning
+1. `clear_downloads()` — deletes everything in `downloads/` (fresh start).
+2. `load_cache()` — loads cache tracking from disk.
+3. Background thread: `scheduled_cleanup()` runs `cleanup_by_retention()` every `CLEANUP_INTERVAL_HOURS`.
+
+### File-Size Guard
+Telegram bot API limits: the bot hardcodes a **35 MB** ceiling (`MAX_FILE_SIZE = 35 * 1024 * 1024` in `utils/upload.py`). Files exceeding this are uploaded externally via `curl.libriciel.fr`.
+
+## External Dependencies
+
+| Dependency | Type | Used For |
+|------------|------|----------|
+| `yt-dlp` | Python package (pip) | Video/audio downloading |
+| `ffmpeg` | System binary | Audio extraction (music command) |
+
+- **FFmpeg** is resolved via `config.FFMPEG_PATH`:
+ - Default: `ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg` (local dev)
+ - If the configured path doesn't exist: falls back to `"ffmpeg"` (system PATH)
+ - In Docker: copied from `ghcr.io/linuxserver/ffmpeg:latest` into `/usr/local/bin/`
+- **yt-dlp** is a Python dependency (not a vendored binary).
+
+## Configuration & Secrets (`.env`)
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `BOT_TOKEN` | *(required)* | Telegram bot token |
+| `VERSION` | `V9.2` | Bot version (used for Docker tags, /stats, /help) — source de vérité : `.env` / `.env.example` |
+| `DEVELOPED_BY` | `Tom V. \| OverStyleFR` | Author credit |
+| `FFMPEG_PATH` | see above | Path to ffmpeg binary |
+| `MIN_FREE_SPACE_MB` | `500` | Min free space before emergency cleanup |
+| `CLEANUP_INTERVAL_HOURS` | `24` | Interval between scheduled cleanups |
+| `SMALL_FILE_SIZE_MB` | `4` | Threshold for small/large file retention |
+| `RETENTION_SMALL_HOURS` | `24` | Retention for small files + mp3 |
+| `RETENTION_LARGE_HOURS` | `2` | Retention for large files |
+
+- **Token source**: `.env` file (read via `python-dotenv`). If missing, `token_loader.py` creates a template and exits.
+- **`token.txt`** is deprecated (was used by the old egg-pterodactyl setup). Now `.env` is the sole config source.
+- **Version** : la source de vérité est `.env` (via `VERSION=`). `.env.example` est le template commité. La CI lit `VERSION` depuis `.env.example` pour les tags Docker. `config.py` n'a plus de fallback hardcodé — si `.env` manque, la version affichée est `"unknown"`.
## Code Patterns & Conventions
-- **Language**: UI strings and comments are in **French** (e.g., "Téléchargement en cours", "Veuillez patienter..."). Maintain this for user-facing messages.
-- **Logging**: Single colored logger (`utils/logger.py`):
- - `console_logger` (TelegramBot logger) writes to both `logs/YYYY-MM-DD.log` and `StreamHandler`
- - Format: `'%(asctime)s - %(levelname)s - %(message)s'`
- - Category-based ANSI colors (e.g., `[DOWNLOAD]` blue, `[MUSIC]` magenta, `[UPLOAD]` yellow)
-- **Retry logic**: Commands use `max_attempts = 3` / `while attempts < max_attempts` loops.
-- **Error handling**: Broad `except Exception` with logging; generally falls back to retrying or sending an error message to the user.
+- **Language**: UI strings and comments are in **French** (e.g., "Téléchargement en cours"). Maintain this for user-facing messages.
+- **Logging**: Single `console_logger` ("TelegramBot") with colored console output + daily file logs in `logs/`.
+- **Retry logic**: Downloads use a `while attempts < max_attempts` loop with `max_attempts = 3`.
+- **Error handling**: Broad `except Exception` with logging. Some paths use `try/except` inside retry loops.
## Important Gotchas
-1. **Old Telegram API**: If you add new handlers, use v13 semantics:
- - `CommandHandler("cmd", func)` (in v13, `pass_args=True` is implicit via `context.args`)
+1. **Old Telegram API**: Use v12 semantics:
+ - `CommandHandler("cmd", func, pass_args=True)` for arguments
- `MessageHandler(Filters.text & ~Filters.command, func)` for plain text
- - `update.message.chat_id`, `context.bot.send_video(...)` — **not** v20 kwargs.
+ - `update.message.chat_id`, `context.bot.send_video(...)` — **not** v20 patterns.
-2. **File upload limit**: The bot uses a **35 MB** ceiling (`MAX_FILE_SIZE` in `utils/upload.py`). Files above this are uploaded externally via `curl.libriciel.fr` with a PycURL PUT and a 10%-step progress callback message.
+2. **`python-telegram-bot==13.7` + Python ≥3.13**: The vendored urllib3 in PTB breaks on Python ≥3.13. A local `imghdr.py` shim is committed for Python 3.13+ stdlib changes. `urllib3<2` is pinned in `requirements.txt` to avoid removal of `urllib3.contrib.appengine`.
-3. **Python version lock**: `python-telegram-bot==13.7` is **incompatible with Python 3.13+**. Always target Python 3.11. If running locally on a newer OS, install Python 3.11 via `pyenv` or similar.
+3. **CI validate job**: Runs on Python 3.11 (target version). Using newer Python (3.13+) will fail due to PTB compatibility issues.
-4. **Docker setuptools fix**: The Dockerfile explicitly reinstalls `setuptools<71` after wheel install because modern setuptoolsdrops `pkg_resources`, which APScheduler 3.6.3 requires. Do not remove this line.
+4. **Cleanup on startup**: `clear_downloads()` wipes `downloads/` entirely (including `hashes.txt`). This means the hash cache is not persistent across restarts.
-5. **No yt-dlp binary**: `yt-dlp` is installed as a Python package (`yt_dlp`), not a vendored binary. Both `download.py` and `music.py` import and call `yt_dlp.YoutubeDL(...)` directly.
+5. **`egg-socialvideodownload.json`**: **Deleted** and deprecated. Was used for Pterodactyl/Pelican panel integration with `token.txt`. The bot now uses `.env` exclusively.
-6. **Stats command is restricted**: `commands/stats.py` allows only `AUTHORIZED_USER` (username `overstylefr`) or `AUTHORIZED_IDS` (hardcoded Telegram user ID). Unauthorized users get "Accès refusé."
+6. **Branch-based image tags** (`ghcr.io/...`):
+ - `main` → `latest` + VERSION tag
+ - `develop` → `dev`
-7. **CI skips tests**: The GitHub Actions workflow has a placeholder `echo "No tests to run"`. Adding tests requires updating `.github/workflows/deploy.yml`.
+7. **CI skips tests**: The workflow's "Run tests" is a placeholder. Adding tests requires updating `.github/workflows/deploy.yml`.
-8. **Branch-based image tags** (`ghcr.io/...`):
- - `main` → `latest` + version tags extracted from `config.py`
- - `develop` → `dev`
- - Other branches → branch name
+8. **Version**: Stored in `config.py` as `VERSION = os.getenv("VERSION", "V9.2")`. The CI reads it via `grep` to tag Docker images.
## Docker Notes
-- Multi-stage build:
- 1. `ffmpeg` stage — copies binaries from `linuxserver/ffmpeg`
- 2. `builder` stage — runs `pip wheel` to create wheels
- 3. Final stage — installs wheels, fixes setuptools, copies FFmpeg, copies source, runs `python main.py`
-- Base image: `python:3.11-slim-bullseye`
-- The local `ffmpeg/` directory is copied into the image but the Dockerfile prefers the stage-copied `/usr/local/bin/ffmpeg`.
+- **Multi-stage build**:
+ 1. `ffmpeg` stage — copies binaries from `linuxserver/ffmpeg:latest`
+ 2. `builder` stage — `pip wheel` on `python:3.11-slim-bookworm`
+ 3. Final stage — installs wheels (excluding setuptools — kept from base image), copies FFmpeg, `COPY . .`, runs `python main.py`
+- **Base image**: `python:3.11-slim-bookworm`
+- **`.dockerignore`** excludes `ffmpeg/`, `.kilo/`, `*.md`, `.env`, etc. to minimize image size.
+- **`docker-compose.yml`** mounts `.env`, `downloads/`, and `logs/` as volumes.
## Git & Branches
@@ -166,10 +173,10 @@ Telegram Message
## When Modifying This Codebase
-- The project is split into modules; add new commands to `commands/` and new utilities to `utils/`.
-- Preserve French user-facing strings.
-- Do not upgrade `python-telegram-bot` without rewriting all handler signatures and startup logic.
-- If adding a new command, remember to `dp.add_handler(CommandHandler("cmd", func))` in `main.py` before `updater.start_polling()`.
-- If you introduce `async`, you must rewrite the entire bot (handlers, dispatcher, updater → ApplicationBuilder). Prefer sync additions to avoid a full migration.
-- Update `VERSION` in `config.py` (and/or `.env`) when shipping meaningful changes.
-- If you touch packaging, verify the Docker build still starts without `ModuleNotFoundError: pkg_resources`.
+- **Preserve French** user-facing strings.
+- **Do not upgrade** `python-telegram-bot` without rewriting all handler signatures (v12 → v20 is a full rewrite).
+- If adding a new command, create the handler in `commands/`, add `dp.add_handler(...)` in `main()` before `updater.start_polling()`.
+- If you introduce `async`, you must rewrite the entire bot (handlers, dispatcher, updater → ApplicationBuilder). Prefer sync additions.
+- **Update `VERSION`** in `.env` / `config.py` default when shipping meaningful changes.
+- **New dependencies**: If a dependency requires a Python feature removed in 3.13+ (like `imghdr`), provide a compatibility shim and commit it (do NOT gitignore).
+- **`urllib3` pin**: Keep `urllib3<2` pinned — PTB v13.7 uses `urllib3.contrib.appengine` which was removed in urllib3 2.x.
diff --git a/Dockerfile b/Dockerfile
index 6a13872..6614c9e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,28 +1,36 @@
+# --- FFmpeg Stage ---
+FROM ghcr.io/linuxserver/ffmpeg:latest AS ffmpeg
+
+# --- Build Stage ---
FROM python:3.11-slim-bookworm AS builder
-WORKDIR /build
+WORKDIR /app
+
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends build-essential && \
+ rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
-RUN pip wheel --no-cache-dir --wheel-dir /wheels -r requirements.txt
+RUN pip wheel --no-cache-dir --wheel-dir /app/wheels -r requirements.txt
+
+# --- Final Stage ---
FROM python:3.11-slim-bookworm
ENV PYTHONDONTWRITEBYTECODE=1 \
- PYTHONUNBUFFERED=1 \
- PIP_NO_CACHE_DIR=1
+ PYTHONUNBUFFERED=1
WORKDIR /app
-RUN apt-get update && \
- apt-get install -y --no-install-recommends ffmpeg ca-certificates && \
- rm -rf /var/lib/apt/lists/*
+COPY --from=builder /app/wheels /wheels
+COPY --from=ffmpeg /usr/local/bin/ffmpeg /usr/local/bin/ffmpeg
+COPY --from=ffmpeg /usr/local/bin/ffprobe /usr/local/bin/ffprobe
+RUN chmod +x /usr/local/bin/ffmpeg /usr/local/bin/ffprobe
-COPY --from=builder /wheels /wheels
-RUN pip install --no-cache-dir /wheels/* && \
- rm -rf /wheels
+RUN pip install --no-cache $(ls /wheels/*.whl | grep -v setuptools) && rm -rf /wheels
COPY . .
-RUN mkdir -p /app/logs /app/downloads /app/download_temp
+RUN mkdir -p /app/logs /app/downloads
CMD ["python", "main.py"]
diff --git a/commands/download.py b/commands/download.py
index 2093a71..17e8781 100644
--- a/commands/download.py
+++ b/commands/download.py
@@ -1,11 +1,21 @@
# commands/download.py
+import os
import yt_dlp
from utils.logger import console_logger
from utils.file_manager import is_already_downloaded, save_download
from utils.disk_manager import check_and_clean_if_needed
from utils.retention import set_retention
+from utils.cache import add_to_cache, record_cache_hit
from utils.upload import upload_file
+
+def _edit_progress(bot, chat_id, msg_id, text):
+ try:
+ bot.edit_message_text(chat_id=chat_id, message_id=msg_id, text=text)
+ except Exception:
+ pass
+
+
def download(update, context):
args = context.args
if not args:
@@ -14,41 +24,61 @@ def download(update, context):
return
url = args[0]
+ chat_id = update.message.chat_id
+ bot = context.bot
- # Vérification de l'espace disque avant téléchargement
check_and_clean_if_needed()
console_logger.info(f"[DOWNLOAD] Traitement de l'URL: {url} par {update.message.from_user.username}")
+
+ progress_msg = update.message.reply_text(
+ "⏳ Téléchargement en cours...",
+ reply_to_message_id=update.message.message_id
+ )
+ progress_msg_id = progress_msg.message_id
ydl_opts = {'outtmpl': 'downloads/%(title)s.%(ext)s'}
+ should_download = True
+ from_cache = False
+ filename = None
+
if is_already_downloaded(url):
- console_logger.info(f"[DOWNLOAD] Fichier déjà téléchargé pour l'URL: {url} par {update.message.from_user.username}. Récupération du fichier...")
+ console_logger.info(f"[DOWNLOAD] Fichier déjà téléchargé pour l'URL: {url} par {update.message.from_user.username}. Vérification du fichier...")
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
filename = ydl.prepare_filename(info)
- upload_file(update, filename, context)
- console_logger.info(f"[DOWNLOAD] Fichier envoyé pour l'URL: {url} par {update.message.from_user.username}")
+ if os.path.exists(filename):
+ should_download = False
+ from_cache = True
+ set_retention(filename)
+ add_to_cache(url, os.path.getsize(filename))
+ record_cache_hit(url)
+ _edit_progress(bot, chat_id, progress_msg_id, "📦 Utilisation du cache...")
+ else:
+ console_logger.warning(f"[DOWNLOAD] Fichier manquant malgré hash pour l'URL: {url}. Retéléchargement...")
except Exception as e:
- update.message.reply_text("Erreur lors de la récupération du fichier.")
- console_logger.error(f"[DOWNLOAD] Erreur récupération fichier pour l'URL: {url} par {update.message.from_user.username} - {str(e)}")
- return
+ console_logger.error(f"[DOWNLOAD] Erreur récupération infos pour l'URL: {url} - {str(e)}")
- max_attempts = 3
- attempts = 0
- while attempts < max_attempts:
- try:
- console_logger.info(f"[DOWNLOAD] Tentative {attempts + 1} de téléchargement pour l'URL: {url} par {update.message.from_user.username}")
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
- info = ydl.extract_info(url, download=True)
- filename = ydl.prepare_filename(info)
- save_download(url)
- set_retention(filename)
- console_logger.info(f"[DOWNLOAD] Téléchargement terminé pour l'URL: {url} par {update.message.from_user.username}. Envoi du fichier...")
- upload_file(update, filename, context)
- break
- except Exception as e:
- attempts += 1
- console_logger.error(f"[DOWNLOAD] Tentative {attempts} échouée pour l'URL: {url} par {update.message.from_user.username} - {str(e)}")
- if attempts >= max_attempts:
- update.message.reply_text("Erreur lors du téléchargement après plusieurs tentatives.")
+ if should_download:
+ max_attempts = 3
+ attempts = 0
+ while attempts < max_attempts:
+ try:
+ console_logger.info(f"[DOWNLOAD] Tentative {attempts + 1} de téléchargement pour l'URL: {url} par {update.message.from_user.username}")
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+ info = ydl.extract_info(url, download=True)
+ filename = ydl.prepare_filename(info)
+ save_download(url)
+ set_retention(filename)
+ add_to_cache(url, os.path.getsize(filename))
+ break
+ except Exception as e:
+ attempts += 1
+ console_logger.error(f"[DOWNLOAD] Tentative {attempts} échouée pour l'URL: {url} par {update.message.from_user.username} - {str(e)}")
+ if attempts >= max_attempts:
+ _edit_progress(bot, chat_id, progress_msg_id, "❌ Échec du téléchargement après plusieurs tentatives.")
+ return
+
+ _edit_progress(bot, chat_id, progress_msg_id, "📤 Envoi en cours... 0%")
+ upload_file(update, filename, context, progress_msg_id=progress_msg_id, from_cache=from_cache)
diff --git a/commands/music.py b/commands/music.py
index e5fe788..8726abb 100644
--- a/commands/music.py
+++ b/commands/music.py
@@ -5,9 +5,18 @@
from utils.logger import console_logger
from utils.file_manager import is_already_downloaded, save_download
from utils.retention import set_retention
+from utils.cache import add_to_cache, record_cache_hit
from utils.upload import upload_file
from config import FFMPEG_PATH
+
+def _edit_progress(bot, chat_id, msg_id, text):
+ try:
+ bot.edit_message_text(chat_id=chat_id, message_id=msg_id, text=text)
+ except Exception:
+ pass
+
+
def music(update, context):
args = context.args
if not args:
@@ -16,58 +25,79 @@ def music(update, context):
return
url = args[0]
+ chat_id = update.message.chat_id
+ bot = context.bot
+
console_logger.info(f"[MUSIC] Traitement de l'URL: {url} par {update.message.from_user.username}")
+
+ progress_msg = update.message.reply_text(
+ "⏳ Téléchargement vidéo en cours...",
+ reply_to_message_id=update.message.message_id
+ )
+ progress_msg_id = progress_msg.message_id
ydl_opts = {'outtmpl': 'downloads/%(title)s.%(ext)s'}
+ should_download = True
+ from_cache = False
+ video_file = None
+
if is_already_downloaded(url):
- console_logger.info(f"[MUSIC] Vidéo déjà téléchargée pour l'URL: {url} par {update.message.from_user.username}. Récupération du fichier...")
+ console_logger.info(f"[MUSIC] Vidéo déjà téléchargée pour l'URL: {url} par {update.message.from_user.username}. Vérification du fichier...")
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
video_file = ydl.prepare_filename(info)
- console_logger.info(f"[MUSIC] Vidéo trouvée: {video_file}")
+ if os.path.exists(video_file):
+ should_download = False
+ from_cache = True
+ set_retention(video_file)
+ add_to_cache(url, os.path.getsize(video_file))
+ record_cache_hit(url)
+ _edit_progress(bot, chat_id, progress_msg_id, "📦 Utilisation du cache...")
+ else:
+ console_logger.warning(f"[MUSIC] Vidéo manquante malgré hash pour l'URL: {url}. Retéléchargement...")
except Exception as e:
- update.message.reply_text("Erreur lors de la récupération du fichier vidéo.")
- console_logger.error(f"[MUSIC] Erreur récupération vidéo pour l'URL: {url} par {update.message.from_user.username} - {str(e)}")
+ console_logger.error(f"[MUSIC] Erreur récupération infos pour l'URL: {url} - {str(e)}")
+ _edit_progress(bot, chat_id, progress_msg_id, "❌ Erreur lors de la récupération de la vidéo.")
return
- else:
+
+ if should_download:
max_attempts = 3
attempts = 0
while attempts < max_attempts:
try:
- console_logger.info(f"[MUSIC] Tentative {attempts + 1} de téléchargement de la vidéo pour l'URL: {url} par {update.message.from_user.username}")
+ console_logger.info(f"[MUSIC] Tentative {attempts + 1} de téléchargement pour l'URL: {url} par {update.message.from_user.username}")
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
video_file = ydl.prepare_filename(info)
save_download(url)
- console_logger.info(f"[MUSIC] Téléchargement terminé: {video_file} par {update.message.from_user.username}")
+ set_retention(video_file)
+ add_to_cache(url, os.path.getsize(video_file))
break
except Exception as e:
attempts += 1
console_logger.error(f"[MUSIC] Tentative {attempts} échouée pour l'URL: {url} par {update.message.from_user.username} - {str(e)}")
if attempts >= max_attempts:
- update.message.reply_text("Erreur lors du téléchargement de la vidéo après plusieurs tentatives.")
+ _edit_progress(bot, chat_id, progress_msg_id, "❌ Échec du téléchargement après plusieurs tentatives.")
return
- # Conversion en audio MP3
+ _edit_progress(bot, chat_id, progress_msg_id, "🔄 Conversion audio...")
+
audio_file = os.path.splitext(video_file)[0] + ".mp3"
if os.path.exists(audio_file):
console_logger.info(f"[MUSIC] Fichier audio déjà converti: {audio_file}")
else:
try:
- console_logger.info(f"[MUSIC] Conversion de {video_file} en audio {audio_file} via ffmpeg pour {update.message.from_user.username}...")
stream = ffmpeg.input(video_file)
stream = ffmpeg.output(stream, audio_file, format='mp3', acodec='libmp3lame', audio_bitrate='192k')
ffmpeg.run(stream, cmd=FFMPEG_PATH, quiet=True)
+ set_retention(audio_file)
+ add_to_cache(url + "#audio", os.path.getsize(audio_file))
console_logger.info(f"[MUSIC] Conversion terminée: {audio_file} pour {update.message.from_user.username}")
except Exception as e:
- update.message.reply_text("Erreur lors de la conversion en audio.")
+ _edit_progress(bot, chat_id, progress_msg_id, "❌ Erreur lors de la conversion en audio.")
console_logger.error(f"[MUSIC] Erreur conversion en audio pour {video_file} par {update.message.from_user.username} - {str(e)}")
return
- try:
- console_logger.info(f"[MUSIC] Envoi du fichier audio: {audio_file} pour {update.message.from_user.username}")
- upload_file(update, audio_file, context)
- except Exception as e:
- update.message.reply_text("Erreur lors de l'envoi du fichier audio.")
- console_logger.error(f"[MUSIC] Erreur envoi audio pour {audio_file} par {update.message.from_user.username} - {str(e)}")
+ _edit_progress(bot, chat_id, progress_msg_id, "📤 Envoi en cours... 0%")
+ upload_file(update, audio_file, context, progress_msg_id=progress_msg_id, from_cache=from_cache)
diff --git a/commands/start.py b/commands/start.py
index 5c574b3..ec5cc73 100644
--- a/commands/start.py
+++ b/commands/start.py
@@ -1,4 +1,3 @@
-from telegram import ParseMode
from utils.logger import console_logger
def start(update, context):
@@ -8,5 +7,5 @@ def start(update, context):
"Je suis un bot qui permet de télécharger des vidéos/musiques via des liens de réseaux sociaux (principalement YouTube & TikTok)."
)
- update.message.reply_text(welcome_message, parse_mode=ParseMode.HTML)
+ update.message.reply_text(welcome_message)
console_logger.info(f"[START] Commande /start exécutée par {update.message.from_user.username}")
diff --git a/commands/stats.py b/commands/stats.py
index c1680a4..b176a19 100644
--- a/commands/stats.py
+++ b/commands/stats.py
@@ -1,14 +1,17 @@
from utils.logger import console_logger
-from utils.cache import download_cache
+from utils.cache import cache_stats
from utils.disk_manager import get_free_space_mb
from config import VERSION, DEVELOPED_BY
import os
import time
-import hashlib
import psutil
AUTHORIZED_USER = "overstylefr"
-AUTHORIZED_IDS = {5092023723} # ID Telegram de @overstylefr
+AUTHORIZED_IDS = {5092023723}
+
+
+def _fmt_fr(value, decimals=2):
+ return f"{value:.{decimals}f}".replace(".", ",")
def stats(update, context):
@@ -23,35 +26,12 @@ def stats(update, context):
return
# --- Cache Stats ---
- cache_entries = len(download_cache)
- cache_hits = 0
- cache_misses = 0
- cache_expired = 0
- cache_total_size = 0
- cache_small_files = 0
- cache_large_files = 0
-
- for link_hash, (timestamp, file_size) in download_cache.items():
- from utils.cache import get_ttl
- ttl = get_ttl(file_size)
- age = time.time() - timestamp
- if age < ttl:
- cache_hits += 1
- cache_total_size += file_size
- if file_size <= 5 * 1024 * 1024:
- cache_small_files += 1
- else:
- cache_large_files += 1
- else:
- cache_expired += 1
+ cs = cache_stats()
# --- Disk Stats ---
downloads_dir = "downloads"
- download_temp_dir = "download_temp"
total_downloads_size = 0
total_downloads_files = 0
- total_temp_size = 0
- total_temp_files = 0
if os.path.exists(downloads_dir):
for root, dirs, files in os.walk(downloads_dir):
@@ -60,14 +40,7 @@ def stats(update, context):
total_downloads_size += os.path.getsize(fp)
total_downloads_files += 1
- if os.path.exists(download_temp_dir):
- for root, dirs, files in os.walk(download_temp_dir):
- for f in files:
- fp = os.path.join(root, f)
- total_temp_size += os.path.getsize(fp)
- total_temp_files += 1
-
- free_space_mb = get_free_space_mb()
+ free_space_gb = get_free_space_mb() / 1024
# --- Logs Stats ---
logs_dir = "logs"
@@ -85,8 +58,8 @@ def stats(update, context):
uptime_str = f"{int(uptime_seconds // 3600)}h {int((uptime_seconds % 3600) // 60)}m"
cpu_percent = psutil.cpu_percent(interval=0.1)
memory = psutil.virtual_memory()
- memory_used_mb = memory.used / (1024 * 1024)
- memory_total_mb = memory.total / (1024 * 1024)
+ memory_used_gb = memory.used / (1024 ** 3)
+ memory_total_gb = memory.total / (1024 ** 3)
memory_percent = memory.percent
# --- Hash Stats ---
@@ -102,18 +75,18 @@ def stats(update, context):
f"`Version:` {VERSION}\n"
f"`Développé par:` {DEVELOPED_BY}\n\n"
- f"🗂️ *Cache*\n"
- f"`Entrées totales:` {cache_entries}\n"
- f"`Entrées valides:` {cache_hits}\n"
- f"`Entrées expirées:` {cache_expired}\n"
- f"`Petits fichiers (≤5Mo):` {cache_small_files}\n"
- f"`Gros fichiers (>5Mo):` {cache_large_files}\n"
- f"`Taille totale cache:` {cache_total_size / (1024 * 1024):.2f} Mo\n\n"
+ f"🗂️ *Cache (session)*\n"
+ f"`Entrées totales:` {cs['total_entries']}\n"
+ f"`Entrées valides:` {cs['valid']}\n"
+ f"`Entrées expirées:` {cs['expired']}\n"
+ f"`Petits fichiers (≤5Mo):` {cs['small']}\n"
+ f"`Gros fichiers (>5Mo):` {cs['large']}\n"
+ f"`Taille totale cache:` {cs['total_size'] / (1024 * 1024):.2f} Mo\n"
+ f"`Hits cache:` {cs['total_hits']} ({cs['bytes_saved'] / (1024 * 1024):.2f} Mo économisés)\n\n"
f"💾 *Disque*\n"
- f"`Espace libre:` {free_space_mb:.2f} Mo\n"
+ f"`Espace libre:` {_fmt_fr(free_space_gb)} Go\n"
f"`Fichiers downloads:` {total_downloads_files} ({total_downloads_size / (1024 * 1024):.2f} Mo)\n"
- f"`Fichiers temp:` {total_temp_files} ({total_temp_size / (1024 * 1024):.2f} Mo)\n"
f"`URLs enregistrées:` {total_hashes}\n\n"
f"📝 *Logs*\n"
@@ -123,7 +96,7 @@ def stats(update, context):
f"🖥️ *Système*\n"
f"`Uptime:` {uptime_str}\n"
f"`CPU:` {cpu_percent:.1f}%\n"
- f"`RAM:` {memory_used_mb:.1f}/{memory_total_mb:.1f} Mo ({memory_percent}%)\n"
+ f"`RAM:` {_fmt_fr(memory_used_gb, 1)}/{_fmt_fr(memory_total_gb, 1)} Go ({memory_percent}%)\n"
)
update.message.reply_text(msg, parse_mode="Markdown")
diff --git a/config.py b/config.py
index 12f7595..2f58ec3 100644
--- a/config.py
+++ b/config.py
@@ -4,9 +4,12 @@
load_dotenv(".env", override=True)
-VERSION = os.getenv("VERSION", "")
+VERSION = os.getenv("VERSION", "unknown") # Source de verite : .env / .env.example
DEVELOPED_BY = os.getenv("DEVELOPED_BY", "Tom V. | OverStyleFR")
-FFMPEG_PATH = os.getenv("FFMPEG_PATH", "ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg") # Change cette valeur si nécessaire (chemin complet vers l'exécutable ffmpeg)
+_FFMPEG_DEFAULT = "ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg"
+FFMPEG_PATH = os.getenv("FFMPEG_PATH", _FFMPEG_DEFAULT)
+if not os.path.exists(FFMPEG_PATH):
+ FFMPEG_PATH = "ffmpeg"
# Disk Management Configuration
MIN_FREE_SPACE_MB = int(os.getenv("MIN_FREE_SPACE_MB", 500))
diff --git a/docker-compose.yml b/docker-compose.yml
index 7d01395..a36e13d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,15 +1,9 @@
-version: '3.8'
-
services:
bot:
- build:
- context: .
- dockerfile: Dockerfile.compose
+ build: .
container_name: socialvideodownload
- env_file:
- - .env
volumes:
- - ./logs:/app/logs
+ - ./.env:/app/.env
- ./downloads:/app/downloads
- - ./download_temp:/app/download_temp
+ - ./logs:/app/logs
restart: unless-stopped
diff --git a/imghdr.py b/imghdr.py
new file mode 100644
index 0000000..f2584ed
--- /dev/null
+++ b/imghdr.py
@@ -0,0 +1,17 @@
+import struct
+
+def what(filename, h=None):
+ if h is None:
+ with open(filename, 'rb') as f:
+ h = f.read(32)
+ if h is None or len(h) < 8:
+ return None
+ if h.startswith(b'\x89PNG\r\n\x1a\n'):
+ return 'png'
+ if h.startswith(b'\xff\xd8'):
+ return 'jpeg'
+ if h.startswith(b'GIF87a') or h.startswith(b'GIF89a'):
+ return 'gif'
+ if h.startswith(b'RIFF') and h[8:12] == b'WEBP':
+ return 'webp'
+ return None
diff --git a/main.py b/main.py
index cd73c05..27d691d 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,4 @@
# main.py
-import os
import threading
import time
from dotenv import load_dotenv
@@ -14,48 +13,34 @@
from utils.cache import load_cache
from utils.token_loader import get_token
from config import CLEANUP_INTERVAL_HOURS
-from utils.disk_manager import clear_downloads, check_and_clean_if_needed
+from utils.disk_manager import clear_downloads, cleanup_by_retention
from utils.logger import console_logger
load_dotenv(".env", override=True)
def scheduled_cleanup():
- """Thread de nettoyage périodique du dossier downloads."""
+ """Thread de nettoyage périodique — respecte la rétention (mtimes)."""
interval_seconds = CLEANUP_INTERVAL_HOURS * 3600
console_logger.info(
f"[CLEANUP] Rotation planifiée activée — nettoyage toutes les {CLEANUP_INTERVAL_HOURS}h."
)
while True:
time.sleep(interval_seconds)
- console_logger.info("[CLEANUP] Nettoyage périodique du dossier downloads...")
- clear_downloads()
+ console_logger.info("[CLEANUP] Nettoyage périodique (rétention)...")
+ cleanup_by_retention()
console_logger.info("[CLEANUP] Nettoyage périodique terminé.")
def main():
- console_logger.info("[INIT] Début de la réinitialisation des dossiers...")
+ console_logger.info("[INIT] Nettoyage du dossier downloads...")
+ clear_downloads()
load_cache()
- # Vérification de l'espace disque au démarrage
- check_and_clean_if_needed()
-
token = get_token()
updater = Updater(token, use_context=True)
dp = updater.dispatcher
- # Set webhook mode. This is often more robust than polling.
- # It requires a public URL to be configured for Telegram to send updates to.
- # For local testing, this might need further configuration or a tunneling service.
- WEBHOOK_MODE = True # Set to True to use webhooks
- if WEBHOOK_MODE:
- # You would typically set a webhook URL here, e.g.:
- # bot = Bot(token)
- # bot.set_webhook("YOUR_WEBHOOK_URL")
- # For now, we'll keep the polling logic but set the flag.
- # If this resolves the conflict, the next step would be to configure webhooks properly.
- console_logger.info("[INIT] Running in webhook mode (polling fallback).")
-
# Enregistrement des handlers pour les commandes du bot
dp.add_handler(CommandHandler("start", start))
dp.add_handler(CommandHandler("help", help_command))
@@ -70,7 +55,7 @@ def main():
bot = updater.bot
bot.set_my_commands([
BotCommand("start", "Pour commencer"),
- BotCommand("help", "Pour obtenir de l\'aide"),
+ BotCommand("help", "Pour obtenir de l'aide"),
BotCommand("download", "Télécharger une vidéo avec yt-dlp"),
])
console_logger.info("[INIT] Menu des commandes configuré.")
@@ -80,13 +65,9 @@ def main():
cleanup_thread.start()
console_logger.info("[INIT] Démarrage du bot et lancement du polling...")
- # If using webhooks, you would typically start them here and not use polling.
- # For now, we'll keep polling for simplicity if WEBHOOK_MODE is set but not fully configured.
updater.start_polling()
updater.idle()
-
-
if __name__ == '__main__':
main()
diff --git a/requirements.txt b/requirements.txt
index 4734ee2..5005b5c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
python-telegram-bot==13.7
+urllib3>=1.26,<2
yt-dlp>=2021.12.1
ffmpeg-python>=0.2.0
pycurl>=7.43.0.6
python-dotenv>=1.0.0
psutil>=5.9.0
-setuptools<71
diff --git a/setup.sh b/setup.sh
new file mode 100755
index 0000000..5aaa19f
--- /dev/null
+++ b/setup.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+set -e
+
+echo "=== SocialVideoDownload.py — Installation autonome ==="
+echo ""
+
+# Création de l'environnement virtuel
+python3 -m venv .venv
+source .venv/bin/activate
+
+# Installation des dépendances
+pip install -r requirements.txt
+
+# Création du fichier .env
+if [ ! -f .env ]; then
+ cp .env.example .env
+ echo "Fichier .env créé à partir de .env.example."
+else
+ echo "Fichier .env déjà existant, aucun changement."
+fi
+
+echo ""
+echo "Installation terminée."
+echo ""
+echo "Configurez votre token Telegram dans .env (BOT_TOKEN), puis lancez :"
+echo " source .venv/bin/activate && python main.py"
+echo ""
+echo "Assurez-vous que ffmpeg est accessible dans votre PATH ou défini via FFMPEG_PATH dans .env."
diff --git a/utils/cache.py b/utils/cache.py
index 11f5808..709a6ce 100644
--- a/utils/cache.py
+++ b/utils/cache.py
@@ -1,79 +1,76 @@
-import os
-import json
import time
import hashlib
from utils.logger import console_logger
-# Bot version related constants (assuming these are defined in main.py or a config file)
-# For now, hardcoding as example:
-BOT_VERSION = "V9.1"
-
-# Cache configuration
-SMALL_FILE_THRESHOLD = 5 * 1024 * 1024 # 5 MB
-LONG_TTL = 24 * 3600 # 24 hours for files ≤5MB
-STANDARD_TTL = 1 * 3600 # 1 hour for files >5MB
-AUTHORIZED_USER = "overstylefr"
-CACHE_FILE = "download_temp/cache_metadata.json"
+SMALL_FILE_THRESHOLD = 5 * 1024 * 1024
+LONG_TTL = 24 * 3600
+STANDARD_TTL = 1 * 3600
download_cache = {}
+
def load_cache():
global download_cache
- try:
- with open(CACHE_FILE, 'r') as f:
- download_cache = json.load(f)
- console_logger.info(f'Cache loaded from {CACHE_FILE}')
- except FileNotFoundError:
- console_logger.warning(f'Cache file {CACHE_FILE} not found. Initializing empty cache.')
- download_cache = {}
- except json.JSONDecodeError:
- console_logger.error(f'Error decoding JSON from {CACHE_FILE}. Initializing empty cache.')
- download_cache = {}
- except Exception as e:
- console_logger.error(f'An unexpected error occurred loading cache: {e}')
- download_cache = {}
-
-def save_cache():
- try:
- # Ensure the directory exists
- cache_dir = os.path.dirname(CACHE_FILE)
- if cache_dir and not os.path.exists(cache_dir):
- os.makedirs(cache_dir)
- console_logger.info(f'Created cache directory: {cache_dir}')
-
- with open(CACHE_FILE, 'w') as f:
- json.dump(download_cache, f, indent=4) # Use indent for readability
- console_logger.info(f'Cache saved to {CACHE_FILE}')
- except Exception as e:
- console_logger.error(f'An error occurred saving cache to {CACHE_FILE}: {e}')
+ download_cache.clear()
+ console_logger.info("Cache initialisé (session en mémoire).")
+
def get_ttl(file_size):
return LONG_TTL if file_size <= SMALL_FILE_THRESHOLD else STANDARD_TTL
+
def is_cache_valid(link_hash):
if link_hash not in download_cache:
return False
- timestamp, size = download_cache[link_hash]
- current_time = time.time()
- ttl = get_ttl(size)
- is_valid = (current_time - timestamp) < ttl
- # console_logger.debug(f"Cache check for hash {link_hash}: valid={is_valid}, age={(current_time - timestamp):.0f}s, ttl={ttl}s")
- return is_valid
+ timestamp, size, _hits = download_cache[link_hash]
+ return (time.time() - timestamp) < get_ttl(size)
+
def add_to_cache(link, file_size):
- """Adds or updates an entry in the cache."""
- link_hash = hashlib.md5(link.encode()).hexdigest()
- download_cache[link_hash] = (time.time(), file_size)
- save_cache()
+ link_hash = hashlib.sha256(link.encode()).hexdigest()
+ download_cache[link_hash] = [time.time(), file_size, 0]
return link_hash
-def get_cached_file_path(link_hash):
- """Tries to find the actual file path from the hash. Assumes a known file structure."""
- # This is a placeholder, a more robust solution might be needed if extensions vary wildly
- # or if files are stored with different naming conventions.
- base_path = os.path.join("download_temp", link_hash)
- for ext in ['.mp4', '.mkv', '.webm', '.avi', '.mov', '.mp3', '.m4a', '.ogg', '.wav']:
- if os.path.exists(base_path + ext):
- return base_path + ext
- return None
+def record_cache_hit(link):
+ link_hash = hashlib.sha256(link.encode()).hexdigest()
+ if link_hash in download_cache:
+ download_cache[link_hash][2] += 1
+ return True
+ return False
+
+
+def cache_stats():
+ total_entries = len(download_cache)
+ hits = 0
+ expired = 0
+ total_size = 0
+ small = 0
+ large = 0
+ total_hits = 0
+ bytes_saved = 0
+
+ for timestamp, file_size, hit_count in download_cache.values():
+ age = time.time() - timestamp
+ total_hits += hit_count
+ bytes_saved += file_size * hit_count
+ if age < get_ttl(file_size):
+ hits += 1
+ total_size += file_size
+ if file_size <= SMALL_FILE_THRESHOLD:
+ small += 1
+ else:
+ large += 1
+ else:
+ expired += 1
+
+ return {
+ "total_entries": total_entries,
+ "valid": hits,
+ "expired": expired,
+ "small": small,
+ "large": large,
+ "total_size": total_size,
+ "total_hits": total_hits,
+ "bytes_saved": bytes_saved,
+ }
\ No newline at end of file
diff --git a/utils/curl_uploader.py b/utils/curl_uploader.py
index 6a0dd9d..93f0b94 100644
--- a/utils/curl_uploader.py
+++ b/utils/curl_uploader.py
@@ -44,6 +44,7 @@ def upload_large_file_via_curl(file_path, progress_callback=None):
last_reported = [0]
while attempts < 3:
c = pycurl.Curl()
+ f = None
try:
c.setopt(c.URL, target_url)
c.setopt(c.UPLOAD, 1)
@@ -82,6 +83,10 @@ def progress(download_total, download_now, upload_total, upload_now):
finally:
try:
c.close()
- f.close()
except Exception:
pass
+ if f is not None:
+ try:
+ f.close()
+ except Exception:
+ pass
diff --git a/utils/disk_manager.py b/utils/disk_manager.py
index 5287111..901812a 100644
--- a/utils/disk_manager.py
+++ b/utils/disk_manager.py
@@ -2,49 +2,68 @@
import shutil
from config import MIN_FREE_SPACE_MB
from utils.logger import console_logger
+from utils.retention import is_file_expired
DOWNLOADS_DIR = "downloads"
+HASH_FILE = os.path.join(DOWNLOADS_DIR, "hashes.txt")
def get_free_space_mb() -> float:
- """Retourne l'espace disque libre en Mo sur la partition du dossier downloads."""
stat = shutil.disk_usage(DOWNLOADS_DIR if os.path.exists(DOWNLOADS_DIR) else ".")
return stat.free / (1024 * 1024)
def clear_downloads():
- """Vide le dossier downloads et recrée sa structure (conserve hashes.txt)."""
- hash_file = os.path.join(DOWNLOADS_DIR, "hashes.txt")
- hashes_backup = None
-
- # Sauvegarde des hashes avant suppression pour éviter les re-téléchargements
- if os.path.exists(hash_file):
- with open(hash_file, "r") as f:
- hashes_backup = f.read()
-
+ """Vidage complet du dossier downloads (fichiers + hashes.txt). Démarrage frais."""
if os.path.exists(DOWNLOADS_DIR):
shutil.rmtree(DOWNLOADS_DIR)
- console_logger.info("[DISK_MANAGER] Dossier downloads vidé.")
+ console_logger.info("[DISK_MANAGER] Dossier downloads entièrement supprimé.")
+ os.makedirs(DOWNLOADS_DIR, exist_ok=True)
+
+
+def cleanup_by_retention():
+ """Supprime les fichiers dont la rétention est expirée et nettoie hashes.txt."""
+ if not os.path.exists(DOWNLOADS_DIR):
+ os.makedirs(DOWNLOADS_DIR, exist_ok=True)
+ return
- os.makedirs(DOWNLOADS_DIR)
+ removed = 0
+ for entry in os.listdir(DOWNLOADS_DIR):
+ file_path = os.path.join(DOWNLOADS_DIR, entry)
+ if entry == "hashes.txt" or not os.path.isfile(file_path):
+ continue
+ if is_file_expired(file_path):
+ try:
+ os.remove(file_path)
+ console_logger.info(f"[DISK_MANAGER] Fichier expiré supprimé : {file_path}")
+ removed += 1
+ except Exception as e:
+ console_logger.error(f"[DISK_MANAGER] Erreur suppression {file_path}: {e}")
- if hashes_backup is not None:
- with open(hash_file, "w") as f:
- f.write(hashes_backup)
- console_logger.info("[DISK_MANAGER] Fichier hashes.txt restauré après nettoyage.")
+ if removed:
+ console_logger.info(f"[DISK_MANAGER] Nettoyage par rétention terminé — {removed} fichier(s) supprimé(s).")
+ else:
+ console_logger.debug("[DISK_MANAGER] Aucun fichier expiré trouvé.")
def check_and_clean_if_needed():
- """Vérifie l'espace libre et vide le dossier downloads si le seuil est atteint."""
+ """Vérifie l'espace libre. Nettoie par rétention d'abord, sinon vidage complet."""
free_mb = get_free_space_mb()
console_logger.debug(f"[DISK_MANAGER] Espace libre : {free_mb:.1f} Mo (seuil : {MIN_FREE_SPACE_MB} Mo)")
if free_mb < MIN_FREE_SPACE_MB:
console_logger.warning(
f"[DISK_MANAGER] Espace libre insuffisant ({free_mb:.1f} Mo < {MIN_FREE_SPACE_MB} Mo). "
- "Nettoyage d'urgence du dossier downloads..."
+ "Nettoyage par rétention..."
)
- clear_downloads()
+ cleanup_by_retention()
+ free_mb = get_free_space_mb()
+ if free_mb < MIN_FREE_SPACE_MB:
+ console_logger.warning(
+ f"[DISK_MANAGER] Toujours insuffisant après rétention ({free_mb:.1f} Mo). "
+ "Vidage complet du dossier downloads..."
+ )
+ clear_downloads()
console_logger.info("[DISK_MANAGER] Nettoyage d'urgence terminé.")
return True
return False
diff --git a/utils/file_manager.py b/utils/file_manager.py
index 7e86bd0..ab988a8 100644
--- a/utils/file_manager.py
+++ b/utils/file_manager.py
@@ -1,23 +1,10 @@
import os
import hashlib
-import shutil
from utils.logger import console_logger
DOWNLOADS_DIR = "downloads"
HASH_FILE = os.path.join(DOWNLOADS_DIR, "hashes.txt")
-def create_folders():
- console_logger.info("[FILE_MANAGER] Réinitialisation du dossier downloads...")
- if os.path.exists(DOWNLOADS_DIR):
- shutil.rmtree(DOWNLOADS_DIR)
- console_logger.info("[FILE_MANAGER] Dossier downloads supprimé.")
- os.makedirs(DOWNLOADS_DIR)
- console_logger.info("[FILE_MANAGER] Dossier downloads recréé.")
-
- if not os.path.exists("logs"):
- os.makedirs("logs")
- console_logger.info("[FILE_MANAGER] Dossier logs créé.")
-
def compute_hash(url):
hash_value = hashlib.sha256(url.encode('utf-8')).hexdigest()
console_logger.debug(f"[FILE_MANAGER] Hash calculé pour l'URL: {url} -> {hash_value}")
diff --git a/utils/progress_file.py b/utils/progress_file.py
index 67f020a..15b5bb2 100644
--- a/utils/progress_file.py
+++ b/utils/progress_file.py
@@ -3,13 +3,14 @@
from utils.logger import console_logger
class ProgressFile:
- def __init__(self, filename, progress_interval=20):
+ def __init__(self, filename, progress_interval=10, callback=None):
self.filename = filename
self.f = open(filename, "rb")
self.total = os.path.getsize(filename)
self.read_bytes = 0
self.last_percent = 0
- self.progress_interval = progress_interval # Intervalle de 20% par défaut
+ self.progress_interval = progress_interval
+ self.callback = callback
def read(self, size=-1):
data = self.f.read(size)
@@ -19,11 +20,12 @@ def read(self, size=-1):
if percent - self.last_percent >= self.progress_interval:
self.last_percent = percent
console_logger.info(f"[UPLOAD] Envoi du fichier {self.filename} : {percent}% complété.")
+ if self.callback:
+ self.callback(percent)
return data
def close(self):
self.f.close()
def __getattr__(self, attr):
- # Permet d'accéder aux autres attributs du fichier
return getattr(self.f, attr)
diff --git a/utils/retention.py b/utils/retention.py
index 74689b1..9a6d015 100644
--- a/utils/retention.py
+++ b/utils/retention.py
@@ -1,4 +1,5 @@
import os
+import time
from datetime import datetime, timedelta
from utils.logger import console_logger
@@ -31,3 +32,13 @@ def set_retention(file_path: str):
console_logger.debug(f"[RETENTION] Set future mtime for {file_path} ({minutes} min)")
except Exception as e:
console_logger.error(f"[RETENTION] Failed to set mtime for {file_path}: {e}")
+
+def is_file_expired(file_path: str) -> bool:
+ """Check if a file's retention period has expired.
+ The file's mtime was set to (now + retention) by set_retention(),
+ so if mtime < now, the retention has elapsed.
+ """
+ if not os.path.exists(file_path):
+ return True
+ mtime = os.path.getmtime(file_path)
+ return mtime < time.time()
diff --git a/utils/token_loader.py b/utils/token_loader.py
index d943d06..21b0cb5 100644
--- a/utils/token_loader.py
+++ b/utils/token_loader.py
@@ -2,29 +2,21 @@
from dotenv import load_dotenv
def get_token():
- # Priorité : variable d'environnement directe (Docker, Pelican, CI…)
- token = os.getenv("BOT_TOKEN", "").strip()
- if token and token != "YOUR_TELEGRAM_BOT_TOKEN_HERE":
- return token
-
env_file = ".env"
# Génération automatique du fichier .env s'il n'existe pas
if not os.path.exists(env_file):
- try:
- with open(env_file, "w") as f:
- f.write("# === Configuration du bot Telegram ===\n")
- f.write("BOT_TOKEN=YOUR_TELEGRAM_BOT_TOKEN_HERE\n\n")
- f.write("# === Configuration générale ===\n")
- f.write("VERSION=V.8-7\n")
- f.write("DEVELOPED_BY=Tom V. | OverStyleFR\n")
- f.write("FFMPEG_PATH=ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg\n")
- print(f"Le fichier {env_file} a été créé. Veuillez y renseigner votre token Telegram (BOT_TOKEN).")
- except OSError:
- print("Impossible de créer le fichier .env (système de fichiers en lecture seule). Veuillez définir la variable d'environnement BOT_TOKEN.")
+ with open(env_file, "w") as f:
+ f.write("# === Configuration du bot Telegram ===\n")
+ f.write("BOT_TOKEN=YOUR_TELEGRAM_BOT_TOKEN_HERE\n\n")
+ f.write("# === Configuration générale ===\n")
+ f.write("VERSION=V9.2\n")
+ f.write("DEVELOPED_BY=Tom V. | OverStyleFR\n")
+ f.write("FFMPEG_PATH=ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg\n")
+ print(f"Le fichier {env_file} a été créé. Veuillez y renseigner votre token Telegram (BOT_TOKEN).")
exit(1)
- load_dotenv(env_file, override=True)
+ load_dotenv(env_file)
token = os.getenv("BOT_TOKEN", "").strip()
diff --git a/utils/upload.py b/utils/upload.py
index 4c044b1..9756b3f 100644
--- a/utils/upload.py
+++ b/utils/upload.py
@@ -1,67 +1,89 @@
# utils/upload.py
import os
from utils.logger import console_logger
+from utils.progress_file import ProgressFile
-def upload_file(update, file_path, context):
- """
- Envoie le fichier via Telegram si sa taille est < 35 Mo.
- Sinon, le fichier est uploadé via curl.libriciel.fr à l'aide de
- upload_large_file_via_curl() et l'URL de téléchargement est renvoyée à l'utilisateur.
- Un callback de progression met à jour un message Telegram tous les 10% avec l'emoji ⏳.
- """
+
+def _edit_progress(bot, chat_id, msg_id, text):
+ try:
+ bot.edit_message_text(chat_id=chat_id, message_id=msg_id, text=text)
+ except Exception:
+ pass
+
+
+def upload_file(update, file_path, context, progress_msg_id=None, from_cache=False):
if not os.path.exists(file_path):
update.message.reply_text("Erreur : Fichier non trouvé.")
console_logger.error(f"[UPLOAD] Fichier non trouvé: {file_path}")
return
- MAX_FILE_SIZE = 35 * 1024 * 1024 # 35 Mo
+ chat_id = update.message.chat_id
+ bot = context.bot
+
+ MAX_FILE_SIZE = 35 * 1024 * 1024
file_size = os.path.getsize(file_path)
+ caption = "📦 Envoyé depuis le cache" if from_cache else None
+
if file_size > MAX_FILE_SIZE:
console_logger.info(f"[UPLOAD] Fichier '{file_path}' trop volumineux ({file_size} octets). Upload externe via curl.libriciel.fr.")
- progress_msg = update.message.reply_text("Upload externe en cours : 0% ⏳")
-
+ if progress_msg_id is None:
+ progress_msg = update.message.reply_text("Upload externe en cours : 0% ⏳")
+ progress_msg_id = progress_msg.message_id
+ else:
+ _edit_progress(bot, chat_id, progress_msg_id, "Upload externe en cours : 0% ⏳")
+
def progress_callback(percent):
- try:
- context.bot.edit_message_text(
- chat_id=update.message.chat_id,
- message_id=progress_msg.message_id,
- text=f"Upload externe en cours : {percent}% ⏳"
- )
- except Exception:
- pass
+ _edit_progress(bot, chat_id, progress_msg_id,
+ f"Upload externe en cours : {percent}% ⏳")
try:
from utils.curl_uploader import upload_large_file_via_curl
shareable_url = upload_large_file_via_curl(file_path, progress_callback=progress_callback)
- context.bot.delete_message(chat_id=update.message.chat_id,
- message_id=progress_msg.message_id)
+ bot.delete_message(chat_id=chat_id, message_id=progress_msg_id)
update.message.reply_text(
f"Le fichier est trop volumineux pour être envoyé directement par Telegram.\n"
f"Veuillez le télécharger ici : {shareable_url}"
)
console_logger.info(f"[UPLOAD] Upload externe réussi pour '{file_path}' -> {shareable_url}")
except Exception as e:
- context.bot.delete_message(chat_id=update.message.chat_id,
- message_id=progress_msg.message_id)
+ bot.delete_message(chat_id=chat_id, message_id=progress_msg_id)
update.message.reply_text(
"Erreur lors de l'upload externe du fichier.\nVeuillez uploader manuellement via https://curl.libriciel.fr/"
)
console_logger.error(f"[UPLOAD] Erreur upload externe pour '{file_path}': {str(e)}")
return
- # Sinon, envoyer le fichier via l'API Telegram
+ # Envoi direct via Telegram avec progression
ext = os.path.splitext(file_path)[1].lower()
+ if progress_msg_id is not None:
+ _edit_progress(bot, chat_id, progress_msg_id, "📤 Envoi en cours... 0%")
+
try:
- with open(file_path, "rb") as f:
- if ext in [".mp4", ".mkv", ".avi"]:
- update.message.reply_video(video=f, reply_to_message_id=update.message.message_id)
- console_logger.info(f"[UPLOAD] Vidéo envoyée : {file_path}")
- elif ext in [".mp3", ".wav"]:
- update.message.reply_audio(audio=f, reply_to_message_id=update.message.message_id)
- console_logger.info(f"[UPLOAD] Audio envoyé : {file_path}")
- else:
- update.message.reply_document(document=f, reply_to_message_id=update.message.message_id)
- console_logger.info(f"[UPLOAD] Document envoyé : {file_path}")
+ progress_file = ProgressFile(
+ file_path,
+ progress_interval=10,
+ callback=lambda p: _edit_progress(bot, chat_id, progress_msg_id,
+ f"📤 Envoi en cours... {p}%") if progress_msg_id else None
+ )
+ if ext in [".mp4", ".mkv", ".avi"]:
+ update.message.reply_video(video=progress_file,
+ caption=caption,
+ reply_to_message_id=update.message.message_id)
+ console_logger.info(f"[UPLOAD] Vidéo envoyée : {file_path}")
+ elif ext in [".mp3", ".wav"]:
+ update.message.reply_audio(audio=progress_file,
+ caption=caption,
+ reply_to_message_id=update.message.message_id)
+ console_logger.info(f"[UPLOAD] Audio envoyé : {file_path}")
+ else:
+ update.message.reply_document(document=progress_file,
+ caption=caption,
+ reply_to_message_id=update.message.message_id)
+ console_logger.info(f"[UPLOAD] Document envoyé : {file_path}")
+ if progress_msg_id is not None:
+ bot.delete_message(chat_id=chat_id, message_id=progress_msg_id)
except Exception as e:
+ if progress_msg_id is not None:
+ bot.delete_message(chat_id=chat_id, message_id=progress_msg_id)
update.message.reply_text("Erreur lors de l'envoi du fichier.")
console_logger.error(f"[UPLOAD] Erreur lors de l'envoi du fichier '{file_path}': {str(e)}")