diff --git a/.dockerignore b/.dockerignore index a54022b..95368e8 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,22 +1,14 @@ -.env -.env.* -!.env.example -.venv/ -venv/ -__pycache__/ -*.py[cod] -*$py.class .git/ -.github/ .gitignore -.dockerignore +.env +.env.example logs/ downloads/ -download_temp/ +__pycache__/ +**/__pycache__/ +*.pyc +imghdr.py +.kilo/ +AGENTS.md +*.md ffmpeg/ -token.txt -egg-socialvideodownload.json -Dockerfile.pelican -entrypoint.sh -README.md -DOCS.md diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 643224b..79f3d9e 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -8,7 +8,20 @@ on: - develop jobs: + validate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: '3.11' + - name: Install dependencies + run: pip install -r requirements.txt + - name: Verify imports + run: python -c "import main" + build-and-push: + needs: validate runs-on: ubuntu-latest permissions: contents: read @@ -17,18 +30,11 @@ jobs: - name: Check out the repo uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: '3.x' + - name: Set up QEMU + uses: docker/setup-qemu-action@v4 - - name: Install dependencies - run: pip install -r requirements.txt - - - name: Run tests - run: | - # Add your test commands here - echo "No tests to run" + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 - name: Log in to GitHub Packages uses: docker/login-action@v4 @@ -45,14 +51,9 @@ jobs: id: tags run: | REPO="ghcr.io/${{ env.repo_name }}" - if [ -f .env.example ] && grep -q '^VERSION=' .env.example; then - BOT_VERSION=$(grep '^VERSION=' .env.example | cut -d= -f2 | tr -d '[:space:]') - else - BOT_VERSION="unknown" - fi - VERSION_SHORT=$(echo "$BOT_VERSION" | sed -E 's/^((V[0-9]+)(\.[0-9]+)?).*/\1/') if [ "${{ github.ref }}" = "refs/heads/main" ]; then - echo "tags=${REPO}:latest,${REPO}:${VERSION_SHORT},${REPO}:${BOT_VERSION}" >> $GITHUB_OUTPUT + VERSION=$(grep '^VERSION' .env.example | head -1 | sed 's/.*=\(.*\)/\1/') + echo "tags=${REPO}:latest,${REPO}:${VERSION}" >> $GITHUB_OUTPUT elif [ "${{ github.ref }}" = "refs/heads/develop" ]; then echo "tags=${REPO}:dev" >> $GITHUB_OUTPUT else @@ -66,6 +67,7 @@ jobs: context: . push: true tags: ${{ steps.tags.outputs.tags }} + platforms: linux/amd64,linux/arm64 - name: Image digest run: echo ${{ steps.build-and-push.outputs.digest }} diff --git a/AGENTS.md b/AGENTS.md index 3fe13d4..a451bf3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,162 +1,169 @@ # Agent Guidelines: SocialVideoDownload.py ## Project Overview -A **Telegram bot** that downloads videos and music from social media links (YouTube, TikTok, etc.) and sends them back to users. Deployed as a Docker container to GitHub Packages. +A **Telegram bot** (modular Python application) that downloads videos and music from social media links (YouTube, TikTok, etc.) and sends them back to users. Deployed as a Docker container to GitHub Container Registry. -- **Language**: Python 3.11 (target, required — Python 3.13+ is incompatible with `python-telegram-bot==13.7`) -- **Primary file**: `main.py` (~80 lines, orchestration only) -- **Framework**: `python-telegram-bot==13.7` — **critical**: this is the old v13 synchronous API (`Updater`, `Dispatcher`, `use_context=True`). Do NOT use modern v20+ async patterns (`Application`, `ContextTypes`, etc.); they are incompatible. -- **Deployment target**: Docker image → `ghcr.io/...` (GitHub Container Registry) - -## Project Structure - -``` -SocialVideoDownload.py/ -├── main.py # Entry point — token loading, handler registration, polling loop -├── config.py # Centralized constants (VERSION, FFMPEG_PATH, retention, disk thresholds) -├── requirements.txt # Python deps -├── Dockerfile # Multi-stage build (ffmpeg → builder → final) -├── .env.example # Template for BOT_TOKEN and other env vars -├── .github/workflows/ # CI/CD (deploy.yml) -├── commands/ # Telegram command handlers -│ ├── start.py -│ ├── help.py -│ ├── download.py -│ ├── music.py -│ ├── stats.py -│ └── auto_download.py -└── utils/ # Shared utilities - ├── token_loader.py # Reads BOT_TOKEN from .env (auto-creates template if missing) - ├── logger.py # Colored console + file logging - ├── file_manager.py # Hash-based deduplication (SHA-256 of URLs in downloads/hashes.txt) - ├── disk_manager.py # Free-space monitoring, emergency cleanup of downloads/ - ├── cache.py # JSON metadata cache (download_temp/cache_metadata.json) with TTL - ├── retention.py # Sets file mtime to future based on retention policy - ├── upload.py # Telegram upload (< 35 MB) or external fallback via curl.libriciel.fr - ├── curl_uploader.py # PycURL-based upload to curl.libriciel.fr with progress callback - └── progress_file.py # Progress tracking helpers -``` +- **Language**: Python 3.11 (target) +- **Architecture**: Modular (commands/, utils/ packages) with entry point `main.py` +- **Framework**: `python-telegram-bot==13.7` — **critical**: this is the old v12 synchronous API (`Updater`, `Dispatcher`, `use_context=True`). Do NOT use modern v20+ async patterns; they are incompatible. +- **Deployment**: Docker image → `ghcr.io/OverStyleFR/SocialVideoDownload.py` +- **CI/CD**: GitHub Actions with multi-arch (`linux/amd64`, `linux/arm64`) ## Essential Commands | Command | Purpose | |---------|---------| -| `python main.py` | Run the bot locally | +| `python main.py` | Run the bot locally (requires `.env`) | +| `bash setup.sh` | One-time local setup (venv + pip install + .env) | +| `docker compose up -d` | Run the Docker container locally | | `docker build -t socialvideodownload .` | Build Docker image | -| `docker run -e BOT_TOKEN=your_token socialvideodownload` | Run container (pass token via env) | +| `docker run -v $(pwd)/.env:/app/.env socialvideodownload` | Run container | | `pip install -r requirements.txt` | Install dependencies | | `echo "No tests to run"` | Current test suite (there are **no tests**) | **No test framework is configured.** The CI workflow explicitly skips tests with a placeholder `echo`. +## Project Structure + +``` +. +├── main.py # Entry point +├── config.py # Configuration from .env +├── commands/ +│ ├── start.py # /start handler +│ ├── help.py # /help handler +│ ├── download.py # /download handler +│ ├── music.py # /music handler +│ ├── stats.py # /stats handler +│ ├── auto_download.py # Auto-download via text messages +│ └── upload.py # Upload helper (now in utils/) +├── utils/ +│ ├── cache.py # Cache hit tracking +│ ├── disk_manager.py # Downloads cleanup (retention + emergency) +│ ├── file_manager.py # Hash-based dedup (sha256 of URL) +│ ├── logger.py # Console + file logging +│ ├── progress_file.py # Progress-aware file wrapper for uploads +│ ├── retention.py # File retention via mtime +│ ├── token_loader.py # .env token loading +│ ├── curl_uploader.py # External upload via curl.libriciel.fr +│ └── upload.py # Telegram file upload with progress +├── imghdr.py # Compatibility shim (removed from stdlib 3.13+) +├── Dockerfile # Multi-stage: bookworm base +├── docker-compose.yml # Local dev container +├── setup.sh # Standalone setup script +├── .dockerignore # Excludes ffmpeg/, .kilo/, *.md, etc. +├── .env.example # Environment template +├── .gitignore +├── requirements.txt +└── AGENTS.md +``` + ## Architecture & Data Flow ``` Telegram Message ↓ - python-telegram-bot v13 handlers (commands/*.py) + python-telegram-bot v12 handlers (commands/*.py) ↓ - yt-dlp Python API (yt_dlp.YoutubeDL) + yt-dlp (Python package, not subprocess) → downloads/.<ext> ↓ - downloads/<title>.<ext> - ↓ - upload.py decides: - ├─ < 35 MB → send via Telegram API (reply_video / reply_audio / reply_document) - └─ ≥ 35 MB → upload via curl.libriciel.fr with progress updates, return URL + [Video] → check retention → bot.send_video() + [Music] → ffmpeg extract-audio → .mp3 → retention → bot.send_audio() ``` +- **Modules**: Logic is split into `commands/` (handlers) and `utils/` (infrastructure). +- **yt-dlp**: Used as a **Python package** (`import yt_dlp`), not a subprocess binary. - **Synchronous**: The entire bot is sync. All handlers block on I/O. Do not introduce `async`/`await` unless migrating the entire framework. -- **Subprocess**: Only `ffmpeg` (for `/music` conversion to MP3) spawns an external binary. -### Caching & Deduplication -- **URL deduplication**: `utils/file_manager.py` stores SHA-256 hashes of URLs in `downloads/hashes.txt`. Before downloading, the bot checks if the hash exists. -- **Metadata cache**: `utils/cache.py` maintains `download_temp/cache_metadata.json` with per-entry TTL: - - Small files (≤ 5 MB) → 24 hours - - Large files (> 5 MB) → 1 hour +### Caching & Dedup +- **Hash-based**: URL → SHA-256 → stored in `downloads/hashes.txt`. Before downloading, `is_already_downloaded()` checks if the hash exists. +- **File check**: Even if the hash exists, the bot verifies the file still exists on disk (yt-dlp's `prepare_filename`). If missing, it re-downloads and the hash line persists (harmless, duplicates are per-session only). ### Retention Policy -- `utils/retention.py` sets file `mtime` to a future timestamp based on size/type: - - MP3 files → long retention (default 24h) - - Small files (< 4 MB default) → long retention - - Large files → short retention (default 2h) -- These values are configurable via `.env` (`SMALL_FILE_SIZE_MB`, `RETENTION_SMALL_HOURS`, `RETENTION_LARGE_HOURS`). +- Files get their **mtime set to `now + retention`** after download via `set_retention()`. +- Small files (< `SMALL_FILE_SIZE_MB`) and mp3s: retention = `RETENTION_SMALL_HOURS` (default 24h). +- Large files: retention = `RETENTION_LARGE_HOURS` (default 2h). +- `cleanup_by_retention()` removes files whose mtime < now (expired retention). +- `check_and_clean_if_needed()` tries retention first, then full clear if still low on space. ### Startup Behavior (`main()`) -1. Loads cache from `download_temp/cache_metadata.json` (creates empty if missing) -2. Checks free disk space; triggers emergency cleanup if below `MIN_FREE_SPACE_MB` (default 500) -3. Registers all command handlers on the dispatcher -4. Sets Telegram bot command menu (`/start`, `/help`, `/download`) -5. Starts a daemon thread for periodic cleanup every `CLEANUP_INTERVAL_HOURS` -6. Begins polling - -### Periodic Cleanup (`scheduled_cleanup` thread) -- Runs every `CLEANUP_INTERVAL_HOURS` (default 24h) -- Calls `clear_downloads()` which empties `downloads/` but preserves `hashes.txt` - -## External Dependencies (Binaries) - -| Binary | Expected Location | Used For | -|--------|-------------------|----------| -| `yt-dlp` | Python package (`yt_dlp`) | Video/audio downloading | -| `ffmpeg` | `ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg` (local) or `/usr/local/bin/ffmpeg` (Docker) | MP3 extraction (`/music`) | - -- In Docker, FFmpeg is copied from a multi-stage `ghcr.io/linuxserver/ffmpeg:latest` image. -- Locally, the bundled static build `ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg` is used (path configurable via `.env`). - -## Configuration & Secrets - -- **Token source**: `.env` file, variable `BOT_TOKEN`. Auto-generated if missing by `utils/token_loader.py`. -- **No `token.txt`**: The old flat-file approach has been replaced by `python-dotenv`. -- **Environment variables loaded**: - - `BOT_TOKEN` — Telegram bot token (required) - - `VERSION` — Bot version string (default `V.8-7`) - - `DEVELOPED_BY` — Author string (default `Tom V. | OverStyleFR`) - - `FFMPEG_PATH` — Path to ffmpeg binary - - `CLEANUP_INTERVAL_HOURS`, `MIN_FREE_SPACE_MB` — Disk/rotation tuning - - `SMALL_FILE_SIZE_MB`, `RETENTION_SMALL_HOURS`, `RETENTION_LARGE_HOURS` — Retention tuning +1. `clear_downloads()` — deletes everything in `downloads/` (fresh start). +2. `load_cache()` — loads cache tracking from disk. +3. Background thread: `scheduled_cleanup()` runs `cleanup_by_retention()` every `CLEANUP_INTERVAL_HOURS`. + +### File-Size Guard +Telegram bot API limits: the bot hardcodes a **35 MB** ceiling (`MAX_FILE_SIZE = 35 * 1024 * 1024` in `utils/upload.py`). Files exceeding this are uploaded externally via `curl.libriciel.fr`. + +## External Dependencies + +| Dependency | Type | Used For | +|------------|------|----------| +| `yt-dlp` | Python package (pip) | Video/audio downloading | +| `ffmpeg` | System binary | Audio extraction (music command) | + +- **FFmpeg** is resolved via `config.FFMPEG_PATH`: + - Default: `ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg` (local dev) + - If the configured path doesn't exist: falls back to `"ffmpeg"` (system PATH) + - In Docker: copied from `ghcr.io/linuxserver/ffmpeg:latest` into `/usr/local/bin/` +- **yt-dlp** is a Python dependency (not a vendored binary). + +## Configuration & Secrets (`.env`) + +| Variable | Default | Description | +|----------|---------|-------------| +| `BOT_TOKEN` | *(required)* | Telegram bot token | +| `VERSION` | `V9.2` | Bot version (used for Docker tags, /stats, /help) — source de vérité : `.env` / `.env.example` | +| `DEVELOPED_BY` | `Tom V. \| OverStyleFR` | Author credit | +| `FFMPEG_PATH` | see above | Path to ffmpeg binary | +| `MIN_FREE_SPACE_MB` | `500` | Min free space before emergency cleanup | +| `CLEANUP_INTERVAL_HOURS` | `24` | Interval between scheduled cleanups | +| `SMALL_FILE_SIZE_MB` | `4` | Threshold for small/large file retention | +| `RETENTION_SMALL_HOURS` | `24` | Retention for small files + mp3 | +| `RETENTION_LARGE_HOURS` | `2` | Retention for large files | + +- **Token source**: `.env` file (read via `python-dotenv`). If missing, `token_loader.py` creates a template and exits. +- **`token.txt`** is deprecated (was used by the old egg-pterodactyl setup). Now `.env` is the sole config source. +- **Version** : la source de vérité est `.env` (via `VERSION=`). `.env.example` est le template commité. La CI lit `VERSION` depuis `.env.example` pour les tags Docker. `config.py` n'a plus de fallback hardcodé — si `.env` manque, la version affichée est `"unknown"`. ## Code Patterns & Conventions -- **Language**: UI strings and comments are in **French** (e.g., "Téléchargement en cours", "Veuillez patienter..."). Maintain this for user-facing messages. -- **Logging**: Single colored logger (`utils/logger.py`): - - `console_logger` (TelegramBot logger) writes to both `logs/YYYY-MM-DD.log` and `StreamHandler` - - Format: `'%(asctime)s - %(levelname)s - %(message)s'` - - Category-based ANSI colors (e.g., `[DOWNLOAD]` blue, `[MUSIC]` magenta, `[UPLOAD]` yellow) -- **Retry logic**: Commands use `max_attempts = 3` / `while attempts < max_attempts` loops. -- **Error handling**: Broad `except Exception` with logging; generally falls back to retrying or sending an error message to the user. +- **Language**: UI strings and comments are in **French** (e.g., "Téléchargement en cours"). Maintain this for user-facing messages. +- **Logging**: Single `console_logger` ("TelegramBot") with colored console output + daily file logs in `logs/`. +- **Retry logic**: Downloads use a `while attempts < max_attempts` loop with `max_attempts = 3`. +- **Error handling**: Broad `except Exception` with logging. Some paths use `try/except` inside retry loops. ## Important Gotchas -1. **Old Telegram API**: If you add new handlers, use v13 semantics: - - `CommandHandler("cmd", func)` (in v13, `pass_args=True` is implicit via `context.args`) +1. **Old Telegram API**: Use v12 semantics: + - `CommandHandler("cmd", func, pass_args=True)` for arguments - `MessageHandler(Filters.text & ~Filters.command, func)` for plain text - - `update.message.chat_id`, `context.bot.send_video(...)` — **not** v20 kwargs. + - `update.message.chat_id`, `context.bot.send_video(...)` — **not** v20 patterns. -2. **File upload limit**: The bot uses a **35 MB** ceiling (`MAX_FILE_SIZE` in `utils/upload.py`). Files above this are uploaded externally via `curl.libriciel.fr` with a PycURL PUT and a 10%-step progress callback message. +2. **`python-telegram-bot==13.7` + Python ≥3.13**: The vendored urllib3 in PTB breaks on Python ≥3.13. A local `imghdr.py` shim is committed for Python 3.13+ stdlib changes. `urllib3<2` is pinned in `requirements.txt` to avoid removal of `urllib3.contrib.appengine`. -3. **Python version lock**: `python-telegram-bot==13.7` is **incompatible with Python 3.13+**. Always target Python 3.11. If running locally on a newer OS, install Python 3.11 via `pyenv` or similar. +3. **CI validate job**: Runs on Python 3.11 (target version). Using newer Python (3.13+) will fail due to PTB compatibility issues. -4. **Docker setuptools fix**: The Dockerfile explicitly reinstalls `setuptools<71` after wheel install because modern setuptoolsdrops `pkg_resources`, which APScheduler 3.6.3 requires. Do not remove this line. +4. **Cleanup on startup**: `clear_downloads()` wipes `downloads/` entirely (including `hashes.txt`). This means the hash cache is not persistent across restarts. -5. **No yt-dlp binary**: `yt-dlp` is installed as a Python package (`yt_dlp`), not a vendored binary. Both `download.py` and `music.py` import and call `yt_dlp.YoutubeDL(...)` directly. +5. **`egg-socialvideodownload.json`**: **Deleted** and deprecated. Was used for Pterodactyl/Pelican panel integration with `token.txt`. The bot now uses `.env` exclusively. -6. **Stats command is restricted**: `commands/stats.py` allows only `AUTHORIZED_USER` (username `overstylefr`) or `AUTHORIZED_IDS` (hardcoded Telegram user ID). Unauthorized users get "Accès refusé." +6. **Branch-based image tags** (`ghcr.io/...`): + - `main` → `latest` + VERSION tag + - `develop` → `dev` -7. **CI skips tests**: The GitHub Actions workflow has a placeholder `echo "No tests to run"`. Adding tests requires updating `.github/workflows/deploy.yml`. +7. **CI skips tests**: The workflow's "Run tests" is a placeholder. Adding tests requires updating `.github/workflows/deploy.yml`. -8. **Branch-based image tags** (`ghcr.io/...`): - - `main` → `latest` + version tags extracted from `config.py` - - `develop` → `dev` - - Other branches → branch name +8. **Version**: Stored in `config.py` as `VERSION = os.getenv("VERSION", "V9.2")`. The CI reads it via `grep` to tag Docker images. ## Docker Notes -- Multi-stage build: - 1. `ffmpeg` stage — copies binaries from `linuxserver/ffmpeg` - 2. `builder` stage — runs `pip wheel` to create wheels - 3. Final stage — installs wheels, fixes setuptools, copies FFmpeg, copies source, runs `python main.py` -- Base image: `python:3.11-slim-bullseye` -- The local `ffmpeg/` directory is copied into the image but the Dockerfile prefers the stage-copied `/usr/local/bin/ffmpeg`. +- **Multi-stage build**: + 1. `ffmpeg` stage — copies binaries from `linuxserver/ffmpeg:latest` + 2. `builder` stage — `pip wheel` on `python:3.11-slim-bookworm` + 3. Final stage — installs wheels (excluding setuptools — kept from base image), copies FFmpeg, `COPY . .`, runs `python main.py` +- **Base image**: `python:3.11-slim-bookworm` +- **`.dockerignore`** excludes `ffmpeg/`, `.kilo/`, `*.md`, `.env`, etc. to minimize image size. +- **`docker-compose.yml`** mounts `.env`, `downloads/`, and `logs/` as volumes. ## Git & Branches @@ -166,10 +173,10 @@ Telegram Message ## When Modifying This Codebase -- The project is split into modules; add new commands to `commands/` and new utilities to `utils/`. -- Preserve French user-facing strings. -- Do not upgrade `python-telegram-bot` without rewriting all handler signatures and startup logic. -- If adding a new command, remember to `dp.add_handler(CommandHandler("cmd", func))` in `main.py` before `updater.start_polling()`. -- If you introduce `async`, you must rewrite the entire bot (handlers, dispatcher, updater → ApplicationBuilder). Prefer sync additions to avoid a full migration. -- Update `VERSION` in `config.py` (and/or `.env`) when shipping meaningful changes. -- If you touch packaging, verify the Docker build still starts without `ModuleNotFoundError: pkg_resources`. +- **Preserve French** user-facing strings. +- **Do not upgrade** `python-telegram-bot` without rewriting all handler signatures (v12 → v20 is a full rewrite). +- If adding a new command, create the handler in `commands/`, add `dp.add_handler(...)` in `main()` before `updater.start_polling()`. +- If you introduce `async`, you must rewrite the entire bot (handlers, dispatcher, updater → ApplicationBuilder). Prefer sync additions. +- **Update `VERSION`** in `.env` / `config.py` default when shipping meaningful changes. +- **New dependencies**: If a dependency requires a Python feature removed in 3.13+ (like `imghdr`), provide a compatibility shim and commit it (do NOT gitignore). +- **`urllib3` pin**: Keep `urllib3<2` pinned — PTB v13.7 uses `urllib3.contrib.appengine` which was removed in urllib3 2.x. diff --git a/Dockerfile b/Dockerfile index 6a13872..6614c9e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,28 +1,36 @@ +# --- FFmpeg Stage --- +FROM ghcr.io/linuxserver/ffmpeg:latest AS ffmpeg + +# --- Build Stage --- FROM python:3.11-slim-bookworm AS builder -WORKDIR /build +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y --no-install-recommends build-essential && \ + rm -rf /var/lib/apt/lists/* COPY requirements.txt . -RUN pip wheel --no-cache-dir --wheel-dir /wheels -r requirements.txt +RUN pip wheel --no-cache-dir --wheel-dir /app/wheels -r requirements.txt + +# --- Final Stage --- FROM python:3.11-slim-bookworm ENV PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 \ - PIP_NO_CACHE_DIR=1 + PYTHONUNBUFFERED=1 WORKDIR /app -RUN apt-get update && \ - apt-get install -y --no-install-recommends ffmpeg ca-certificates && \ - rm -rf /var/lib/apt/lists/* +COPY --from=builder /app/wheels /wheels +COPY --from=ffmpeg /usr/local/bin/ffmpeg /usr/local/bin/ffmpeg +COPY --from=ffmpeg /usr/local/bin/ffprobe /usr/local/bin/ffprobe +RUN chmod +x /usr/local/bin/ffmpeg /usr/local/bin/ffprobe -COPY --from=builder /wheels /wheels -RUN pip install --no-cache-dir /wheels/* && \ - rm -rf /wheels +RUN pip install --no-cache $(ls /wheels/*.whl | grep -v setuptools) && rm -rf /wheels COPY . . -RUN mkdir -p /app/logs /app/downloads /app/download_temp +RUN mkdir -p /app/logs /app/downloads CMD ["python", "main.py"] diff --git a/commands/download.py b/commands/download.py index 2093a71..17e8781 100644 --- a/commands/download.py +++ b/commands/download.py @@ -1,11 +1,21 @@ # commands/download.py +import os import yt_dlp from utils.logger import console_logger from utils.file_manager import is_already_downloaded, save_download from utils.disk_manager import check_and_clean_if_needed from utils.retention import set_retention +from utils.cache import add_to_cache, record_cache_hit from utils.upload import upload_file + +def _edit_progress(bot, chat_id, msg_id, text): + try: + bot.edit_message_text(chat_id=chat_id, message_id=msg_id, text=text) + except Exception: + pass + + def download(update, context): args = context.args if not args: @@ -14,41 +24,61 @@ def download(update, context): return url = args[0] + chat_id = update.message.chat_id + bot = context.bot - # Vérification de l'espace disque avant téléchargement check_and_clean_if_needed() console_logger.info(f"[DOWNLOAD] Traitement de l'URL: {url} par {update.message.from_user.username}") + + progress_msg = update.message.reply_text( + "⏳ Téléchargement en cours...", + reply_to_message_id=update.message.message_id + ) + progress_msg_id = progress_msg.message_id ydl_opts = {'outtmpl': 'downloads/%(title)s.%(ext)s'} + should_download = True + from_cache = False + filename = None + if is_already_downloaded(url): - console_logger.info(f"[DOWNLOAD] Fichier déjà téléchargé pour l'URL: {url} par {update.message.from_user.username}. Récupération du fichier...") + console_logger.info(f"[DOWNLOAD] Fichier déjà téléchargé pour l'URL: {url} par {update.message.from_user.username}. Vérification du fichier...") try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) filename = ydl.prepare_filename(info) - upload_file(update, filename, context) - console_logger.info(f"[DOWNLOAD] Fichier envoyé pour l'URL: {url} par {update.message.from_user.username}") + if os.path.exists(filename): + should_download = False + from_cache = True + set_retention(filename) + add_to_cache(url, os.path.getsize(filename)) + record_cache_hit(url) + _edit_progress(bot, chat_id, progress_msg_id, "📦 Utilisation du cache...") + else: + console_logger.warning(f"[DOWNLOAD] Fichier manquant malgré hash pour l'URL: {url}. Retéléchargement...") except Exception as e: - update.message.reply_text("Erreur lors de la récupération du fichier.") - console_logger.error(f"[DOWNLOAD] Erreur récupération fichier pour l'URL: {url} par {update.message.from_user.username} - {str(e)}") - return + console_logger.error(f"[DOWNLOAD] Erreur récupération infos pour l'URL: {url} - {str(e)}") - max_attempts = 3 - attempts = 0 - while attempts < max_attempts: - try: - console_logger.info(f"[DOWNLOAD] Tentative {attempts + 1} de téléchargement pour l'URL: {url} par {update.message.from_user.username}") - with yt_dlp.YoutubeDL(ydl_opts) as ydl: - info = ydl.extract_info(url, download=True) - filename = ydl.prepare_filename(info) - save_download(url) - set_retention(filename) - console_logger.info(f"[DOWNLOAD] Téléchargement terminé pour l'URL: {url} par {update.message.from_user.username}. Envoi du fichier...") - upload_file(update, filename, context) - break - except Exception as e: - attempts += 1 - console_logger.error(f"[DOWNLOAD] Tentative {attempts} échouée pour l'URL: {url} par {update.message.from_user.username} - {str(e)}") - if attempts >= max_attempts: - update.message.reply_text("Erreur lors du téléchargement après plusieurs tentatives.") + if should_download: + max_attempts = 3 + attempts = 0 + while attempts < max_attempts: + try: + console_logger.info(f"[DOWNLOAD] Tentative {attempts + 1} de téléchargement pour l'URL: {url} par {update.message.from_user.username}") + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=True) + filename = ydl.prepare_filename(info) + save_download(url) + set_retention(filename) + add_to_cache(url, os.path.getsize(filename)) + break + except Exception as e: + attempts += 1 + console_logger.error(f"[DOWNLOAD] Tentative {attempts} échouée pour l'URL: {url} par {update.message.from_user.username} - {str(e)}") + if attempts >= max_attempts: + _edit_progress(bot, chat_id, progress_msg_id, "❌ Échec du téléchargement après plusieurs tentatives.") + return + + _edit_progress(bot, chat_id, progress_msg_id, "📤 Envoi en cours... 0%") + upload_file(update, filename, context, progress_msg_id=progress_msg_id, from_cache=from_cache) diff --git a/commands/music.py b/commands/music.py index e5fe788..8726abb 100644 --- a/commands/music.py +++ b/commands/music.py @@ -5,9 +5,18 @@ from utils.logger import console_logger from utils.file_manager import is_already_downloaded, save_download from utils.retention import set_retention +from utils.cache import add_to_cache, record_cache_hit from utils.upload import upload_file from config import FFMPEG_PATH + +def _edit_progress(bot, chat_id, msg_id, text): + try: + bot.edit_message_text(chat_id=chat_id, message_id=msg_id, text=text) + except Exception: + pass + + def music(update, context): args = context.args if not args: @@ -16,58 +25,79 @@ def music(update, context): return url = args[0] + chat_id = update.message.chat_id + bot = context.bot + console_logger.info(f"[MUSIC] Traitement de l'URL: {url} par {update.message.from_user.username}") + + progress_msg = update.message.reply_text( + "⏳ Téléchargement vidéo en cours...", + reply_to_message_id=update.message.message_id + ) + progress_msg_id = progress_msg.message_id ydl_opts = {'outtmpl': 'downloads/%(title)s.%(ext)s'} + should_download = True + from_cache = False + video_file = None + if is_already_downloaded(url): - console_logger.info(f"[MUSIC] Vidéo déjà téléchargée pour l'URL: {url} par {update.message.from_user.username}. Récupération du fichier...") + console_logger.info(f"[MUSIC] Vidéo déjà téléchargée pour l'URL: {url} par {update.message.from_user.username}. Vérification du fichier...") try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) video_file = ydl.prepare_filename(info) - console_logger.info(f"[MUSIC] Vidéo trouvée: {video_file}") + if os.path.exists(video_file): + should_download = False + from_cache = True + set_retention(video_file) + add_to_cache(url, os.path.getsize(video_file)) + record_cache_hit(url) + _edit_progress(bot, chat_id, progress_msg_id, "📦 Utilisation du cache...") + else: + console_logger.warning(f"[MUSIC] Vidéo manquante malgré hash pour l'URL: {url}. Retéléchargement...") except Exception as e: - update.message.reply_text("Erreur lors de la récupération du fichier vidéo.") - console_logger.error(f"[MUSIC] Erreur récupération vidéo pour l'URL: {url} par {update.message.from_user.username} - {str(e)}") + console_logger.error(f"[MUSIC] Erreur récupération infos pour l'URL: {url} - {str(e)}") + _edit_progress(bot, chat_id, progress_msg_id, "❌ Erreur lors de la récupération de la vidéo.") return - else: + + if should_download: max_attempts = 3 attempts = 0 while attempts < max_attempts: try: - console_logger.info(f"[MUSIC] Tentative {attempts + 1} de téléchargement de la vidéo pour l'URL: {url} par {update.message.from_user.username}") + console_logger.info(f"[MUSIC] Tentative {attempts + 1} de téléchargement pour l'URL: {url} par {update.message.from_user.username}") with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) video_file = ydl.prepare_filename(info) save_download(url) - console_logger.info(f"[MUSIC] Téléchargement terminé: {video_file} par {update.message.from_user.username}") + set_retention(video_file) + add_to_cache(url, os.path.getsize(video_file)) break except Exception as e: attempts += 1 console_logger.error(f"[MUSIC] Tentative {attempts} échouée pour l'URL: {url} par {update.message.from_user.username} - {str(e)}") if attempts >= max_attempts: - update.message.reply_text("Erreur lors du téléchargement de la vidéo après plusieurs tentatives.") + _edit_progress(bot, chat_id, progress_msg_id, "❌ Échec du téléchargement après plusieurs tentatives.") return - # Conversion en audio MP3 + _edit_progress(bot, chat_id, progress_msg_id, "🔄 Conversion audio...") + audio_file = os.path.splitext(video_file)[0] + ".mp3" if os.path.exists(audio_file): console_logger.info(f"[MUSIC] Fichier audio déjà converti: {audio_file}") else: try: - console_logger.info(f"[MUSIC] Conversion de {video_file} en audio {audio_file} via ffmpeg pour {update.message.from_user.username}...") stream = ffmpeg.input(video_file) stream = ffmpeg.output(stream, audio_file, format='mp3', acodec='libmp3lame', audio_bitrate='192k') ffmpeg.run(stream, cmd=FFMPEG_PATH, quiet=True) + set_retention(audio_file) + add_to_cache(url + "#audio", os.path.getsize(audio_file)) console_logger.info(f"[MUSIC] Conversion terminée: {audio_file} pour {update.message.from_user.username}") except Exception as e: - update.message.reply_text("Erreur lors de la conversion en audio.") + _edit_progress(bot, chat_id, progress_msg_id, "❌ Erreur lors de la conversion en audio.") console_logger.error(f"[MUSIC] Erreur conversion en audio pour {video_file} par {update.message.from_user.username} - {str(e)}") return - try: - console_logger.info(f"[MUSIC] Envoi du fichier audio: {audio_file} pour {update.message.from_user.username}") - upload_file(update, audio_file, context) - except Exception as e: - update.message.reply_text("Erreur lors de l'envoi du fichier audio.") - console_logger.error(f"[MUSIC] Erreur envoi audio pour {audio_file} par {update.message.from_user.username} - {str(e)}") + _edit_progress(bot, chat_id, progress_msg_id, "📤 Envoi en cours... 0%") + upload_file(update, audio_file, context, progress_msg_id=progress_msg_id, from_cache=from_cache) diff --git a/commands/start.py b/commands/start.py index 5c574b3..ec5cc73 100644 --- a/commands/start.py +++ b/commands/start.py @@ -1,4 +1,3 @@ -from telegram import ParseMode from utils.logger import console_logger def start(update, context): @@ -8,5 +7,5 @@ def start(update, context): "Je suis un bot qui permet de télécharger des vidéos/musiques via des liens de réseaux sociaux (principalement YouTube & TikTok)." ) - update.message.reply_text(welcome_message, parse_mode=ParseMode.HTML) + update.message.reply_text(welcome_message) console_logger.info(f"[START] Commande /start exécutée par {update.message.from_user.username}") diff --git a/commands/stats.py b/commands/stats.py index c1680a4..b176a19 100644 --- a/commands/stats.py +++ b/commands/stats.py @@ -1,14 +1,17 @@ from utils.logger import console_logger -from utils.cache import download_cache +from utils.cache import cache_stats from utils.disk_manager import get_free_space_mb from config import VERSION, DEVELOPED_BY import os import time -import hashlib import psutil AUTHORIZED_USER = "overstylefr" -AUTHORIZED_IDS = {5092023723} # ID Telegram de @overstylefr +AUTHORIZED_IDS = {5092023723} + + +def _fmt_fr(value, decimals=2): + return f"{value:.{decimals}f}".replace(".", ",") def stats(update, context): @@ -23,35 +26,12 @@ def stats(update, context): return # --- Cache Stats --- - cache_entries = len(download_cache) - cache_hits = 0 - cache_misses = 0 - cache_expired = 0 - cache_total_size = 0 - cache_small_files = 0 - cache_large_files = 0 - - for link_hash, (timestamp, file_size) in download_cache.items(): - from utils.cache import get_ttl - ttl = get_ttl(file_size) - age = time.time() - timestamp - if age < ttl: - cache_hits += 1 - cache_total_size += file_size - if file_size <= 5 * 1024 * 1024: - cache_small_files += 1 - else: - cache_large_files += 1 - else: - cache_expired += 1 + cs = cache_stats() # --- Disk Stats --- downloads_dir = "downloads" - download_temp_dir = "download_temp" total_downloads_size = 0 total_downloads_files = 0 - total_temp_size = 0 - total_temp_files = 0 if os.path.exists(downloads_dir): for root, dirs, files in os.walk(downloads_dir): @@ -60,14 +40,7 @@ def stats(update, context): total_downloads_size += os.path.getsize(fp) total_downloads_files += 1 - if os.path.exists(download_temp_dir): - for root, dirs, files in os.walk(download_temp_dir): - for f in files: - fp = os.path.join(root, f) - total_temp_size += os.path.getsize(fp) - total_temp_files += 1 - - free_space_mb = get_free_space_mb() + free_space_gb = get_free_space_mb() / 1024 # --- Logs Stats --- logs_dir = "logs" @@ -85,8 +58,8 @@ def stats(update, context): uptime_str = f"{int(uptime_seconds // 3600)}h {int((uptime_seconds % 3600) // 60)}m" cpu_percent = psutil.cpu_percent(interval=0.1) memory = psutil.virtual_memory() - memory_used_mb = memory.used / (1024 * 1024) - memory_total_mb = memory.total / (1024 * 1024) + memory_used_gb = memory.used / (1024 ** 3) + memory_total_gb = memory.total / (1024 ** 3) memory_percent = memory.percent # --- Hash Stats --- @@ -102,18 +75,18 @@ def stats(update, context): f"`Version:` {VERSION}\n" f"`Développé par:` {DEVELOPED_BY}\n\n" - f"🗂️ *Cache*\n" - f"`Entrées totales:` {cache_entries}\n" - f"`Entrées valides:` {cache_hits}\n" - f"`Entrées expirées:` {cache_expired}\n" - f"`Petits fichiers (≤5Mo):` {cache_small_files}\n" - f"`Gros fichiers (>5Mo):` {cache_large_files}\n" - f"`Taille totale cache:` {cache_total_size / (1024 * 1024):.2f} Mo\n\n" + f"🗂️ *Cache (session)*\n" + f"`Entrées totales:` {cs['total_entries']}\n" + f"`Entrées valides:` {cs['valid']}\n" + f"`Entrées expirées:` {cs['expired']}\n" + f"`Petits fichiers (≤5Mo):` {cs['small']}\n" + f"`Gros fichiers (>5Mo):` {cs['large']}\n" + f"`Taille totale cache:` {cs['total_size'] / (1024 * 1024):.2f} Mo\n" + f"`Hits cache:` {cs['total_hits']} ({cs['bytes_saved'] / (1024 * 1024):.2f} Mo économisés)\n\n" f"💾 *Disque*\n" - f"`Espace libre:` {free_space_mb:.2f} Mo\n" + f"`Espace libre:` {_fmt_fr(free_space_gb)} Go\n" f"`Fichiers downloads:` {total_downloads_files} ({total_downloads_size / (1024 * 1024):.2f} Mo)\n" - f"`Fichiers temp:` {total_temp_files} ({total_temp_size / (1024 * 1024):.2f} Mo)\n" f"`URLs enregistrées:` {total_hashes}\n\n" f"📝 *Logs*\n" @@ -123,7 +96,7 @@ def stats(update, context): f"🖥️ *Système*\n" f"`Uptime:` {uptime_str}\n" f"`CPU:` {cpu_percent:.1f}%\n" - f"`RAM:` {memory_used_mb:.1f}/{memory_total_mb:.1f} Mo ({memory_percent}%)\n" + f"`RAM:` {_fmt_fr(memory_used_gb, 1)}/{_fmt_fr(memory_total_gb, 1)} Go ({memory_percent}%)\n" ) update.message.reply_text(msg, parse_mode="Markdown") diff --git a/config.py b/config.py index 12f7595..2f58ec3 100644 --- a/config.py +++ b/config.py @@ -4,9 +4,12 @@ load_dotenv(".env", override=True) -VERSION = os.getenv("VERSION", "") +VERSION = os.getenv("VERSION", "unknown") # Source de verite : .env / .env.example DEVELOPED_BY = os.getenv("DEVELOPED_BY", "Tom V. | OverStyleFR") -FFMPEG_PATH = os.getenv("FFMPEG_PATH", "ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg") # Change cette valeur si nécessaire (chemin complet vers l'exécutable ffmpeg) +_FFMPEG_DEFAULT = "ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg" +FFMPEG_PATH = os.getenv("FFMPEG_PATH", _FFMPEG_DEFAULT) +if not os.path.exists(FFMPEG_PATH): + FFMPEG_PATH = "ffmpeg" # Disk Management Configuration MIN_FREE_SPACE_MB = int(os.getenv("MIN_FREE_SPACE_MB", 500)) diff --git a/docker-compose.yml b/docker-compose.yml index 7d01395..a36e13d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,15 +1,9 @@ -version: '3.8' - services: bot: - build: - context: . - dockerfile: Dockerfile.compose + build: . container_name: socialvideodownload - env_file: - - .env volumes: - - ./logs:/app/logs + - ./.env:/app/.env - ./downloads:/app/downloads - - ./download_temp:/app/download_temp + - ./logs:/app/logs restart: unless-stopped diff --git a/imghdr.py b/imghdr.py new file mode 100644 index 0000000..f2584ed --- /dev/null +++ b/imghdr.py @@ -0,0 +1,17 @@ +import struct + +def what(filename, h=None): + if h is None: + with open(filename, 'rb') as f: + h = f.read(32) + if h is None or len(h) < 8: + return None + if h.startswith(b'\x89PNG\r\n\x1a\n'): + return 'png' + if h.startswith(b'\xff\xd8'): + return 'jpeg' + if h.startswith(b'GIF87a') or h.startswith(b'GIF89a'): + return 'gif' + if h.startswith(b'RIFF') and h[8:12] == b'WEBP': + return 'webp' + return None diff --git a/main.py b/main.py index cd73c05..27d691d 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,4 @@ # main.py -import os import threading import time from dotenv import load_dotenv @@ -14,48 +13,34 @@ from utils.cache import load_cache from utils.token_loader import get_token from config import CLEANUP_INTERVAL_HOURS -from utils.disk_manager import clear_downloads, check_and_clean_if_needed +from utils.disk_manager import clear_downloads, cleanup_by_retention from utils.logger import console_logger load_dotenv(".env", override=True) def scheduled_cleanup(): - """Thread de nettoyage périodique du dossier downloads.""" + """Thread de nettoyage périodique — respecte la rétention (mtimes).""" interval_seconds = CLEANUP_INTERVAL_HOURS * 3600 console_logger.info( f"[CLEANUP] Rotation planifiée activée — nettoyage toutes les {CLEANUP_INTERVAL_HOURS}h." ) while True: time.sleep(interval_seconds) - console_logger.info("[CLEANUP] Nettoyage périodique du dossier downloads...") - clear_downloads() + console_logger.info("[CLEANUP] Nettoyage périodique (rétention)...") + cleanup_by_retention() console_logger.info("[CLEANUP] Nettoyage périodique terminé.") def main(): - console_logger.info("[INIT] Début de la réinitialisation des dossiers...") + console_logger.info("[INIT] Nettoyage du dossier downloads...") + clear_downloads() load_cache() - # Vérification de l'espace disque au démarrage - check_and_clean_if_needed() - token = get_token() updater = Updater(token, use_context=True) dp = updater.dispatcher - # Set webhook mode. This is often more robust than polling. - # It requires a public URL to be configured for Telegram to send updates to. - # For local testing, this might need further configuration or a tunneling service. - WEBHOOK_MODE = True # Set to True to use webhooks - if WEBHOOK_MODE: - # You would typically set a webhook URL here, e.g.: - # bot = Bot(token) - # bot.set_webhook("YOUR_WEBHOOK_URL") - # For now, we'll keep the polling logic but set the flag. - # If this resolves the conflict, the next step would be to configure webhooks properly. - console_logger.info("[INIT] Running in webhook mode (polling fallback).") - # Enregistrement des handlers pour les commandes du bot dp.add_handler(CommandHandler("start", start)) dp.add_handler(CommandHandler("help", help_command)) @@ -70,7 +55,7 @@ def main(): bot = updater.bot bot.set_my_commands([ BotCommand("start", "Pour commencer"), - BotCommand("help", "Pour obtenir de l\'aide"), + BotCommand("help", "Pour obtenir de l'aide"), BotCommand("download", "Télécharger une vidéo avec yt-dlp"), ]) console_logger.info("[INIT] Menu des commandes configuré.") @@ -80,13 +65,9 @@ def main(): cleanup_thread.start() console_logger.info("[INIT] Démarrage du bot et lancement du polling...") - # If using webhooks, you would typically start them here and not use polling. - # For now, we'll keep polling for simplicity if WEBHOOK_MODE is set but not fully configured. updater.start_polling() updater.idle() - - if __name__ == '__main__': main() diff --git a/requirements.txt b/requirements.txt index 4734ee2..5005b5c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ python-telegram-bot==13.7 +urllib3>=1.26,<2 yt-dlp>=2021.12.1 ffmpeg-python>=0.2.0 pycurl>=7.43.0.6 python-dotenv>=1.0.0 psutil>=5.9.0 -setuptools<71 diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..5aaa19f --- /dev/null +++ b/setup.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -e + +echo "=== SocialVideoDownload.py — Installation autonome ===" +echo "" + +# Création de l'environnement virtuel +python3 -m venv .venv +source .venv/bin/activate + +# Installation des dépendances +pip install -r requirements.txt + +# Création du fichier .env +if [ ! -f .env ]; then + cp .env.example .env + echo "Fichier .env créé à partir de .env.example." +else + echo "Fichier .env déjà existant, aucun changement." +fi + +echo "" +echo "Installation terminée." +echo "" +echo "Configurez votre token Telegram dans .env (BOT_TOKEN), puis lancez :" +echo " source .venv/bin/activate && python main.py" +echo "" +echo "Assurez-vous que ffmpeg est accessible dans votre PATH ou défini via FFMPEG_PATH dans .env." diff --git a/utils/cache.py b/utils/cache.py index 11f5808..709a6ce 100644 --- a/utils/cache.py +++ b/utils/cache.py @@ -1,79 +1,76 @@ -import os -import json import time import hashlib from utils.logger import console_logger -# Bot version related constants (assuming these are defined in main.py or a config file) -# For now, hardcoding as example: -BOT_VERSION = "V9.1" - -# Cache configuration -SMALL_FILE_THRESHOLD = 5 * 1024 * 1024 # 5 MB -LONG_TTL = 24 * 3600 # 24 hours for files ≤5MB -STANDARD_TTL = 1 * 3600 # 1 hour for files >5MB -AUTHORIZED_USER = "overstylefr" -CACHE_FILE = "download_temp/cache_metadata.json" +SMALL_FILE_THRESHOLD = 5 * 1024 * 1024 +LONG_TTL = 24 * 3600 +STANDARD_TTL = 1 * 3600 download_cache = {} + def load_cache(): global download_cache - try: - with open(CACHE_FILE, 'r') as f: - download_cache = json.load(f) - console_logger.info(f'Cache loaded from {CACHE_FILE}') - except FileNotFoundError: - console_logger.warning(f'Cache file {CACHE_FILE} not found. Initializing empty cache.') - download_cache = {} - except json.JSONDecodeError: - console_logger.error(f'Error decoding JSON from {CACHE_FILE}. Initializing empty cache.') - download_cache = {} - except Exception as e: - console_logger.error(f'An unexpected error occurred loading cache: {e}') - download_cache = {} - -def save_cache(): - try: - # Ensure the directory exists - cache_dir = os.path.dirname(CACHE_FILE) - if cache_dir and not os.path.exists(cache_dir): - os.makedirs(cache_dir) - console_logger.info(f'Created cache directory: {cache_dir}') - - with open(CACHE_FILE, 'w') as f: - json.dump(download_cache, f, indent=4) # Use indent for readability - console_logger.info(f'Cache saved to {CACHE_FILE}') - except Exception as e: - console_logger.error(f'An error occurred saving cache to {CACHE_FILE}: {e}') + download_cache.clear() + console_logger.info("Cache initialisé (session en mémoire).") + def get_ttl(file_size): return LONG_TTL if file_size <= SMALL_FILE_THRESHOLD else STANDARD_TTL + def is_cache_valid(link_hash): if link_hash not in download_cache: return False - timestamp, size = download_cache[link_hash] - current_time = time.time() - ttl = get_ttl(size) - is_valid = (current_time - timestamp) < ttl - # console_logger.debug(f"Cache check for hash {link_hash}: valid={is_valid}, age={(current_time - timestamp):.0f}s, ttl={ttl}s") - return is_valid + timestamp, size, _hits = download_cache[link_hash] + return (time.time() - timestamp) < get_ttl(size) + def add_to_cache(link, file_size): - """Adds or updates an entry in the cache.""" - link_hash = hashlib.md5(link.encode()).hexdigest() - download_cache[link_hash] = (time.time(), file_size) - save_cache() + link_hash = hashlib.sha256(link.encode()).hexdigest() + download_cache[link_hash] = [time.time(), file_size, 0] return link_hash -def get_cached_file_path(link_hash): - """Tries to find the actual file path from the hash. Assumes a known file structure.""" - # This is a placeholder, a more robust solution might be needed if extensions vary wildly - # or if files are stored with different naming conventions. - base_path = os.path.join("download_temp", link_hash) - for ext in ['.mp4', '.mkv', '.webm', '.avi', '.mov', '.mp3', '.m4a', '.ogg', '.wav']: - if os.path.exists(base_path + ext): - return base_path + ext - return None +def record_cache_hit(link): + link_hash = hashlib.sha256(link.encode()).hexdigest() + if link_hash in download_cache: + download_cache[link_hash][2] += 1 + return True + return False + + +def cache_stats(): + total_entries = len(download_cache) + hits = 0 + expired = 0 + total_size = 0 + small = 0 + large = 0 + total_hits = 0 + bytes_saved = 0 + + for timestamp, file_size, hit_count in download_cache.values(): + age = time.time() - timestamp + total_hits += hit_count + bytes_saved += file_size * hit_count + if age < get_ttl(file_size): + hits += 1 + total_size += file_size + if file_size <= SMALL_FILE_THRESHOLD: + small += 1 + else: + large += 1 + else: + expired += 1 + + return { + "total_entries": total_entries, + "valid": hits, + "expired": expired, + "small": small, + "large": large, + "total_size": total_size, + "total_hits": total_hits, + "bytes_saved": bytes_saved, + } \ No newline at end of file diff --git a/utils/curl_uploader.py b/utils/curl_uploader.py index 6a0dd9d..93f0b94 100644 --- a/utils/curl_uploader.py +++ b/utils/curl_uploader.py @@ -44,6 +44,7 @@ def upload_large_file_via_curl(file_path, progress_callback=None): last_reported = [0] while attempts < 3: c = pycurl.Curl() + f = None try: c.setopt(c.URL, target_url) c.setopt(c.UPLOAD, 1) @@ -82,6 +83,10 @@ def progress(download_total, download_now, upload_total, upload_now): finally: try: c.close() - f.close() except Exception: pass + if f is not None: + try: + f.close() + except Exception: + pass diff --git a/utils/disk_manager.py b/utils/disk_manager.py index 5287111..901812a 100644 --- a/utils/disk_manager.py +++ b/utils/disk_manager.py @@ -2,49 +2,68 @@ import shutil from config import MIN_FREE_SPACE_MB from utils.logger import console_logger +from utils.retention import is_file_expired DOWNLOADS_DIR = "downloads" +HASH_FILE = os.path.join(DOWNLOADS_DIR, "hashes.txt") def get_free_space_mb() -> float: - """Retourne l'espace disque libre en Mo sur la partition du dossier downloads.""" stat = shutil.disk_usage(DOWNLOADS_DIR if os.path.exists(DOWNLOADS_DIR) else ".") return stat.free / (1024 * 1024) def clear_downloads(): - """Vide le dossier downloads et recrée sa structure (conserve hashes.txt).""" - hash_file = os.path.join(DOWNLOADS_DIR, "hashes.txt") - hashes_backup = None - - # Sauvegarde des hashes avant suppression pour éviter les re-téléchargements - if os.path.exists(hash_file): - with open(hash_file, "r") as f: - hashes_backup = f.read() - + """Vidage complet du dossier downloads (fichiers + hashes.txt). Démarrage frais.""" if os.path.exists(DOWNLOADS_DIR): shutil.rmtree(DOWNLOADS_DIR) - console_logger.info("[DISK_MANAGER] Dossier downloads vidé.") + console_logger.info("[DISK_MANAGER] Dossier downloads entièrement supprimé.") + os.makedirs(DOWNLOADS_DIR, exist_ok=True) + + +def cleanup_by_retention(): + """Supprime les fichiers dont la rétention est expirée et nettoie hashes.txt.""" + if not os.path.exists(DOWNLOADS_DIR): + os.makedirs(DOWNLOADS_DIR, exist_ok=True) + return - os.makedirs(DOWNLOADS_DIR) + removed = 0 + for entry in os.listdir(DOWNLOADS_DIR): + file_path = os.path.join(DOWNLOADS_DIR, entry) + if entry == "hashes.txt" or not os.path.isfile(file_path): + continue + if is_file_expired(file_path): + try: + os.remove(file_path) + console_logger.info(f"[DISK_MANAGER] Fichier expiré supprimé : {file_path}") + removed += 1 + except Exception as e: + console_logger.error(f"[DISK_MANAGER] Erreur suppression {file_path}: {e}") - if hashes_backup is not None: - with open(hash_file, "w") as f: - f.write(hashes_backup) - console_logger.info("[DISK_MANAGER] Fichier hashes.txt restauré après nettoyage.") + if removed: + console_logger.info(f"[DISK_MANAGER] Nettoyage par rétention terminé — {removed} fichier(s) supprimé(s).") + else: + console_logger.debug("[DISK_MANAGER] Aucun fichier expiré trouvé.") def check_and_clean_if_needed(): - """Vérifie l'espace libre et vide le dossier downloads si le seuil est atteint.""" + """Vérifie l'espace libre. Nettoie par rétention d'abord, sinon vidage complet.""" free_mb = get_free_space_mb() console_logger.debug(f"[DISK_MANAGER] Espace libre : {free_mb:.1f} Mo (seuil : {MIN_FREE_SPACE_MB} Mo)") if free_mb < MIN_FREE_SPACE_MB: console_logger.warning( f"[DISK_MANAGER] Espace libre insuffisant ({free_mb:.1f} Mo < {MIN_FREE_SPACE_MB} Mo). " - "Nettoyage d'urgence du dossier downloads..." + "Nettoyage par rétention..." ) - clear_downloads() + cleanup_by_retention() + free_mb = get_free_space_mb() + if free_mb < MIN_FREE_SPACE_MB: + console_logger.warning( + f"[DISK_MANAGER] Toujours insuffisant après rétention ({free_mb:.1f} Mo). " + "Vidage complet du dossier downloads..." + ) + clear_downloads() console_logger.info("[DISK_MANAGER] Nettoyage d'urgence terminé.") return True return False diff --git a/utils/file_manager.py b/utils/file_manager.py index 7e86bd0..ab988a8 100644 --- a/utils/file_manager.py +++ b/utils/file_manager.py @@ -1,23 +1,10 @@ import os import hashlib -import shutil from utils.logger import console_logger DOWNLOADS_DIR = "downloads" HASH_FILE = os.path.join(DOWNLOADS_DIR, "hashes.txt") -def create_folders(): - console_logger.info("[FILE_MANAGER] Réinitialisation du dossier downloads...") - if os.path.exists(DOWNLOADS_DIR): - shutil.rmtree(DOWNLOADS_DIR) - console_logger.info("[FILE_MANAGER] Dossier downloads supprimé.") - os.makedirs(DOWNLOADS_DIR) - console_logger.info("[FILE_MANAGER] Dossier downloads recréé.") - - if not os.path.exists("logs"): - os.makedirs("logs") - console_logger.info("[FILE_MANAGER] Dossier logs créé.") - def compute_hash(url): hash_value = hashlib.sha256(url.encode('utf-8')).hexdigest() console_logger.debug(f"[FILE_MANAGER] Hash calculé pour l'URL: {url} -> {hash_value}") diff --git a/utils/progress_file.py b/utils/progress_file.py index 67f020a..15b5bb2 100644 --- a/utils/progress_file.py +++ b/utils/progress_file.py @@ -3,13 +3,14 @@ from utils.logger import console_logger class ProgressFile: - def __init__(self, filename, progress_interval=20): + def __init__(self, filename, progress_interval=10, callback=None): self.filename = filename self.f = open(filename, "rb") self.total = os.path.getsize(filename) self.read_bytes = 0 self.last_percent = 0 - self.progress_interval = progress_interval # Intervalle de 20% par défaut + self.progress_interval = progress_interval + self.callback = callback def read(self, size=-1): data = self.f.read(size) @@ -19,11 +20,12 @@ def read(self, size=-1): if percent - self.last_percent >= self.progress_interval: self.last_percent = percent console_logger.info(f"[UPLOAD] Envoi du fichier {self.filename} : {percent}% complété.") + if self.callback: + self.callback(percent) return data def close(self): self.f.close() def __getattr__(self, attr): - # Permet d'accéder aux autres attributs du fichier return getattr(self.f, attr) diff --git a/utils/retention.py b/utils/retention.py index 74689b1..9a6d015 100644 --- a/utils/retention.py +++ b/utils/retention.py @@ -1,4 +1,5 @@ import os +import time from datetime import datetime, timedelta from utils.logger import console_logger @@ -31,3 +32,13 @@ def set_retention(file_path: str): console_logger.debug(f"[RETENTION] Set future mtime for {file_path} ({minutes} min)") except Exception as e: console_logger.error(f"[RETENTION] Failed to set mtime for {file_path}: {e}") + +def is_file_expired(file_path: str) -> bool: + """Check if a file's retention period has expired. + The file's mtime was set to (now + retention) by set_retention(), + so if mtime < now, the retention has elapsed. + """ + if not os.path.exists(file_path): + return True + mtime = os.path.getmtime(file_path) + return mtime < time.time() diff --git a/utils/token_loader.py b/utils/token_loader.py index d943d06..21b0cb5 100644 --- a/utils/token_loader.py +++ b/utils/token_loader.py @@ -2,29 +2,21 @@ from dotenv import load_dotenv def get_token(): - # Priorité : variable d'environnement directe (Docker, Pelican, CI…) - token = os.getenv("BOT_TOKEN", "").strip() - if token and token != "YOUR_TELEGRAM_BOT_TOKEN_HERE": - return token - env_file = ".env" # Génération automatique du fichier .env s'il n'existe pas if not os.path.exists(env_file): - try: - with open(env_file, "w") as f: - f.write("# === Configuration du bot Telegram ===\n") - f.write("BOT_TOKEN=YOUR_TELEGRAM_BOT_TOKEN_HERE\n\n") - f.write("# === Configuration générale ===\n") - f.write("VERSION=V.8-7\n") - f.write("DEVELOPED_BY=Tom V. | OverStyleFR\n") - f.write("FFMPEG_PATH=ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg\n") - print(f"Le fichier {env_file} a été créé. Veuillez y renseigner votre token Telegram (BOT_TOKEN).") - except OSError: - print("Impossible de créer le fichier .env (système de fichiers en lecture seule). Veuillez définir la variable d'environnement BOT_TOKEN.") + with open(env_file, "w") as f: + f.write("# === Configuration du bot Telegram ===\n") + f.write("BOT_TOKEN=YOUR_TELEGRAM_BOT_TOKEN_HERE\n\n") + f.write("# === Configuration générale ===\n") + f.write("VERSION=V9.2\n") + f.write("DEVELOPED_BY=Tom V. | OverStyleFR\n") + f.write("FFMPEG_PATH=ffmpeg/ffmpeg-7.0.2-amd64-static/ffmpeg\n") + print(f"Le fichier {env_file} a été créé. Veuillez y renseigner votre token Telegram (BOT_TOKEN).") exit(1) - load_dotenv(env_file, override=True) + load_dotenv(env_file) token = os.getenv("BOT_TOKEN", "").strip() diff --git a/utils/upload.py b/utils/upload.py index 4c044b1..9756b3f 100644 --- a/utils/upload.py +++ b/utils/upload.py @@ -1,67 +1,89 @@ # utils/upload.py import os from utils.logger import console_logger +from utils.progress_file import ProgressFile -def upload_file(update, file_path, context): - """ - Envoie le fichier via Telegram si sa taille est < 35 Mo. - Sinon, le fichier est uploadé via curl.libriciel.fr à l'aide de - upload_large_file_via_curl() et l'URL de téléchargement est renvoyée à l'utilisateur. - Un callback de progression met à jour un message Telegram tous les 10% avec l'emoji ⏳. - """ + +def _edit_progress(bot, chat_id, msg_id, text): + try: + bot.edit_message_text(chat_id=chat_id, message_id=msg_id, text=text) + except Exception: + pass + + +def upload_file(update, file_path, context, progress_msg_id=None, from_cache=False): if not os.path.exists(file_path): update.message.reply_text("Erreur : Fichier non trouvé.") console_logger.error(f"[UPLOAD] Fichier non trouvé: {file_path}") return - MAX_FILE_SIZE = 35 * 1024 * 1024 # 35 Mo + chat_id = update.message.chat_id + bot = context.bot + + MAX_FILE_SIZE = 35 * 1024 * 1024 file_size = os.path.getsize(file_path) + caption = "📦 Envoyé depuis le cache" if from_cache else None + if file_size > MAX_FILE_SIZE: console_logger.info(f"[UPLOAD] Fichier '{file_path}' trop volumineux ({file_size} octets). Upload externe via curl.libriciel.fr.") - progress_msg = update.message.reply_text("Upload externe en cours : 0% ⏳") - + if progress_msg_id is None: + progress_msg = update.message.reply_text("Upload externe en cours : 0% ⏳") + progress_msg_id = progress_msg.message_id + else: + _edit_progress(bot, chat_id, progress_msg_id, "Upload externe en cours : 0% ⏳") + def progress_callback(percent): - try: - context.bot.edit_message_text( - chat_id=update.message.chat_id, - message_id=progress_msg.message_id, - text=f"Upload externe en cours : {percent}% ⏳" - ) - except Exception: - pass + _edit_progress(bot, chat_id, progress_msg_id, + f"Upload externe en cours : {percent}% ⏳") try: from utils.curl_uploader import upload_large_file_via_curl shareable_url = upload_large_file_via_curl(file_path, progress_callback=progress_callback) - context.bot.delete_message(chat_id=update.message.chat_id, - message_id=progress_msg.message_id) + bot.delete_message(chat_id=chat_id, message_id=progress_msg_id) update.message.reply_text( f"Le fichier est trop volumineux pour être envoyé directement par Telegram.\n" f"Veuillez le télécharger ici : {shareable_url}" ) console_logger.info(f"[UPLOAD] Upload externe réussi pour '{file_path}' -> {shareable_url}") except Exception as e: - context.bot.delete_message(chat_id=update.message.chat_id, - message_id=progress_msg.message_id) + bot.delete_message(chat_id=chat_id, message_id=progress_msg_id) update.message.reply_text( "Erreur lors de l'upload externe du fichier.\nVeuillez uploader manuellement via https://curl.libriciel.fr/" ) console_logger.error(f"[UPLOAD] Erreur upload externe pour '{file_path}': {str(e)}") return - # Sinon, envoyer le fichier via l'API Telegram + # Envoi direct via Telegram avec progression ext = os.path.splitext(file_path)[1].lower() + if progress_msg_id is not None: + _edit_progress(bot, chat_id, progress_msg_id, "📤 Envoi en cours... 0%") + try: - with open(file_path, "rb") as f: - if ext in [".mp4", ".mkv", ".avi"]: - update.message.reply_video(video=f, reply_to_message_id=update.message.message_id) - console_logger.info(f"[UPLOAD] Vidéo envoyée : {file_path}") - elif ext in [".mp3", ".wav"]: - update.message.reply_audio(audio=f, reply_to_message_id=update.message.message_id) - console_logger.info(f"[UPLOAD] Audio envoyé : {file_path}") - else: - update.message.reply_document(document=f, reply_to_message_id=update.message.message_id) - console_logger.info(f"[UPLOAD] Document envoyé : {file_path}") + progress_file = ProgressFile( + file_path, + progress_interval=10, + callback=lambda p: _edit_progress(bot, chat_id, progress_msg_id, + f"📤 Envoi en cours... {p}%") if progress_msg_id else None + ) + if ext in [".mp4", ".mkv", ".avi"]: + update.message.reply_video(video=progress_file, + caption=caption, + reply_to_message_id=update.message.message_id) + console_logger.info(f"[UPLOAD] Vidéo envoyée : {file_path}") + elif ext in [".mp3", ".wav"]: + update.message.reply_audio(audio=progress_file, + caption=caption, + reply_to_message_id=update.message.message_id) + console_logger.info(f"[UPLOAD] Audio envoyé : {file_path}") + else: + update.message.reply_document(document=progress_file, + caption=caption, + reply_to_message_id=update.message.message_id) + console_logger.info(f"[UPLOAD] Document envoyé : {file_path}") + if progress_msg_id is not None: + bot.delete_message(chat_id=chat_id, message_id=progress_msg_id) except Exception as e: + if progress_msg_id is not None: + bot.delete_message(chat_id=chat_id, message_id=progress_msg_id) update.message.reply_text("Erreur lors de l'envoi du fichier.") console_logger.error(f"[UPLOAD] Erreur lors de l'envoi du fichier '{file_path}': {str(e)}")