diff --git a/plugins/Stash2Plex/README.md b/plugins/Stash2Plex/README.md new file mode 100644 index 00000000..5dab9768 --- /dev/null +++ b/plugins/Stash2Plex/README.md @@ -0,0 +1,156 @@ +# Stash2Plex + +Sync metadata from Stash to Plex with queue-based reliability. + +[![Tests](https://img.shields.io/badge/tests-500%2B-brightgreen)](tests/) +[![Coverage](https://img.shields.io/badge/coverage-%3E80%25-brightgreen)](pytest.ini) +[![License](https://img.shields.io/badge/license-AGPL--3.0-blue)](LICENSE) + +## Overview + +Stash2Plex is a Stash plugin that automatically syncs scene metadata from Stash to Plex. When you update a scene in Stash (title, studio, performers, tags), Stash2Plex queues the change and syncs it to the matching item in your Plex library. + +**Key features:** + +- **Persistent queue** - Jobs survive Stash restarts; nothing is lost if Stash crashes +- **Automatic retry** - Failed syncs retry with exponential backoff +- **Circuit breaker** - Protects Plex from being hammered when it's down +- **Crash recovery** - In-progress jobs automatically resume after restart +- **Performance caching** - Reduces Plex API calls with disk-backed caching +- **Selective sync** - Toggle which metadata fields sync to Plex +- **Sync statistics** - Track success rates and timing with batch summaries +- **Batch processing** - Scene data fetched in single queries for large syncs (v1.1.4) +- **Dynamic timeouts** - Processing timeout scales with queue size (v1.1.3) + +**Use Stash2Plex if you:** + +- Organize your media metadata in Stash +- Want Plex to reflect the same titles, studios, performers, and tags +- Need reliable syncing that handles network issues gracefully + +## Quick Start + +**Prerequisites:** + +- Stash running with [PythonDepManager](https://github.com/stashapp/CommunityScripts/tree/main/plugins/PythonDepManager) plugin installed +- Plex Media Server running and accessible from Stash +- Your Plex authentication token + +### Get Your Plex Token + +1. Open Plex Web App and sign in +2. Open any item in your library +3. Click the three-dot menu and select "Get Info" +4. Click "View XML" +5. In the URL bar, find `X-Plex-Token=YOUR_TOKEN_HERE` + +Alternatively, see [Plex's official guide](https://support.plex.tv/articles/204059436-finding-an-authentication-token-x-plex-token/). + +### Installation + +1. **Download Stash2Plex** to your Stash plugins directory: + + ```bash + cd ~/.stash/plugins + git clone https://github.com/trek-e/Stash2Plex.git + ``` + + Or download and extract the ZIP from the [releases page](https://github.com/trek-e/Stash2Plex/releases). + +2. **Reload plugins** in Stash: + + Settings > Plugins > Reload Plugins + +3. **Configure required settings** in Stash: + + Settings > Plugins > Stash2Plex + + | Setting | Value | + |---------|-------| + | Plex URL | `http://localhost:32400` (or your Plex server address) | + | Plex Token | Your X-Plex-Token from above | + | Plex Library | Name of your Plex library (e.g., `Movies`) | + +4. **Test the sync:** + + - Edit any scene in Stash (change the title slightly) + - Check Plex within 30 seconds - the title should update + +That's it! Stash2Plex is now syncing metadata from Stash to Plex. + +## How It Works + +1. **Hook triggers** - When you update a scene in Stash, Stash2Plex receives a hook event +2. **Job queued** - The sync job is saved to a SQLite-backed persistent queue +3. **Worker syncs** - Background worker matches the scene to Plex and applies metadata +4. **Retry on failure** - If Plex is down, the job retries with exponential backoff +5. **Dead letter queue** - Permanently failed jobs (e.g., no Plex match) go to a DLQ for review + +## Documentation + +- [Installation Guide](docs/install.md) - Full setup instructions including Docker +- [Configuration Reference](docs/config.md) - All settings explained +- [Troubleshooting](docs/troubleshoot.md) - Common issues and solutions +- [Architecture](docs/ARCHITECTURE.md) - System design and data flow +- [Changelog](CHANGELOG.md) - Version history + +## Requirements + +- **Stash** - Any recent version +- **Plex Media Server** - Any recent version +- **PythonDepManager** - Stash plugin for managing Python dependencies +- **Python dependencies** - Installed automatically by PythonDepManager: + - `plexapi` - Plex API client + - `pydantic` - Data validation + - `tenacity` - Retry logic + - `persistqueue` - SQLite-backed queue + - `diskcache` - Performance caching + +## Settings Reference + +### Core Settings + +| Setting | Type | Default | Description | +|---------|------|---------|-------------| +| `plex_url` | string | - | Plex server URL (required) | +| `plex_token` | string | - | Plex authentication token (required) | +| `plex_library` | string | - | Plex library name (recommended) | +| `enabled` | boolean | `true` | Enable/disable the plugin | + +### Behavior Settings + +| Setting | Type | Default | Description | +|---------|------|---------|-------------| +| `max_retries` | number | `5` | Max retry attempts before DLQ | +| `poll_interval` | number | `30` | Seconds between queue polls | +| `strict_matching` | boolean | `true` | Skip sync when multiple matches found | +| `preserve_plex_edits` | boolean | `false` | Don't overwrite existing Plex values | +| `connect_timeout` | number | `5` | Plex connection timeout (seconds) | +| `read_timeout` | number | `30` | Plex read timeout (seconds) | + +### Field Sync Toggles + +Control which metadata fields sync from Stash to Plex. All enabled by default. + +| Setting | Type | Default | Description | +|---------|------|---------|-------------| +| `sync_master` | boolean | `true` | Master toggle - when OFF, no fields sync | +| `sync_studio` | boolean | `true` | Sync studio name | +| `sync_summary` | boolean | `true` | Sync summary/details | +| `sync_tagline` | boolean | `true` | Sync tagline | +| `sync_date` | boolean | `true` | Sync release date | +| `sync_performers` | boolean | `true` | Sync performers as actors | +| `sync_tags` | boolean | `true` | Sync tags as genres | +| `sync_poster` | boolean | `true` | Sync poster image | +| `sync_background` | boolean | `true` | Sync background/fanart image | +| `sync_collection` | boolean | `true` | Add to collection by studio name | + +See [Configuration Reference](docs/config.md) for detailed documentation of all settings. + +## Contributing + +Contributions are welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup and guidelines. + +## License + +This project is licensed under the [GNU Affero General Public License v3.0](LICENSE). diff --git a/plugins/Stash2Plex/Stash2Plex.py b/plugins/Stash2Plex/Stash2Plex.py new file mode 100755 index 00000000..8abd0fa3 --- /dev/null +++ b/plugins/Stash2Plex/Stash2Plex.py @@ -0,0 +1,550 @@ +#!/usr/bin/env python3 +""" +Stash2Plex - Stash plugin for syncing metadata to Plex + +Entry point for the Stash plugin. Initializes queue infrastructure, +starts background worker, and handles Stash hooks. +""" + +import os +import sys +import json + + +# Stash plugin log levels - prefix format: \x01 + level + \x02 + message +def log_trace(msg): print(f"\x01t\x02[Stash2Plex] {msg}", file=sys.stderr) +def log_debug(msg): print(f"\x01d\x02[Stash2Plex] {msg}", file=sys.stderr) +def log_info(msg): print(f"\x01i\x02[Stash2Plex] {msg}", file=sys.stderr) +def log_warn(msg): print(f"\x01w\x02[Stash2Plex] {msg}", file=sys.stderr) +def log_error(msg): print(f"\x01e\x02[Stash2Plex] {msg}", file=sys.stderr) +def log_progress(p): print(f"\x01p\x02{p}") + + +log_trace("Script starting...") + +# Add plugin directory to path for imports +PLUGIN_DIR = os.path.dirname(os.path.abspath(__file__)) +if PLUGIN_DIR not in sys.path: + sys.path.insert(0, PLUGIN_DIR) + +log_trace(f"PLUGIN_DIR: {PLUGIN_DIR}") + +try: + from sync_queue.manager import QueueManager + from sync_queue.dlq import DeadLetterQueue + from sync_queue.operations import load_sync_timestamps + from worker.processor import SyncWorker + from hooks.handlers import on_scene_update + from validation.config import validate_config, Stash2PlexConfig + from plex.device_identity import configure_plex_device_identity + log_trace("All imports successful") +except ImportError as e: + log_error(f"Import error: {e}") + import traceback + traceback.print_exc() + print(json.dumps({"error": str(e)})) + sys.exit(1) + +# Globals (initialized in main) +queue_manager = None +dlq = None +worker = None +config: Stash2PlexConfig = None +sync_timestamps: dict = None +stash_interface = None + + +def get_plugin_data_dir(): + """ + Get or create plugin data directory. + + Returns: + Path to plugin data directory + """ + # Check for Stash-provided path first + data_dir = os.getenv('STASH_PLUGIN_DATA') + if not data_dir: + # Default to plugin_dir/data + data_dir = os.path.join(PLUGIN_DIR, 'data') + + os.makedirs(data_dir, exist_ok=True) + return data_dir + + +def get_stash_interface(input_data: dict): + """ + Create StashInterface from input data. + + Returns: + StashInterface instance or None if connection fails + """ + try: + from stashapi.stashapp import StashInterface + conn = input_data.get('server_connection', {}) + if conn: + return StashInterface(conn) + except ImportError: + log_warn(" stashapi not installed, cannot fetch settings") + except Exception as e: + log_warn(f" Could not connect to Stash: {e}") + return None + + +def fetch_plugin_settings(stash) -> dict: + """ + Fetch Stash2Plex plugin settings from Stash configuration. + + Args: + stash: StashInterface instance + + Returns: + Dictionary with plugin settings + """ + if not stash: + return {} + + try: + config = stash.get_configuration() + plugins = config.get('plugins', {}) + return plugins.get('Stash2Plex', {}) + except Exception as e: + log_warn(f" Could not fetch settings from Stash: {e}") + return {} + + +def extract_config_from_input(input_data: dict) -> dict: + """ + Extract Plex configuration from Stash input data. + + Fetches plugin settings from Stash GraphQL API using stashapi, + then falls back to environment variables. + + Args: + input_data: Input data from Stash plugin protocol + + Returns: + Dictionary with config values (may be empty if nothing found) + """ + global stash_interface + config_dict = {} + + # Extract Stash connection info for image fetching + server_conn = input_data.get('server_connection', {}) + if server_conn: + scheme = server_conn.get('Scheme', server_conn.get('scheme', 'http')) + host = server_conn.get('Host', server_conn.get('host', '127.0.0.1')) + port = server_conn.get('Port', server_conn.get('port', 9999)) + config_dict['stash_url'] = f"{scheme}://{host}:{port}" + + # Get session cookie for authentication + session_cookie = server_conn.get('SessionCookie', {}) + if session_cookie: + if isinstance(session_cookie, dict): + cookie_name = session_cookie.get('Name', 'session') + cookie_value = session_cookie.get('Value', '') + config_dict['stash_session_cookie'] = f"{cookie_name}={cookie_value}" + else: + config_dict['stash_session_cookie'] = str(session_cookie) + + # Fetch settings from Stash using stashapi + stash = get_stash_interface(input_data) + stash_interface = stash # Store globally for hook handlers + if stash: + stash_settings = fetch_plugin_settings(stash) + if stash_settings: + log_trace(f"Loaded settings from Stash: {list(stash_settings.keys())}") + config_dict.update(stash_settings) + + # Fallback to environment variables + if 'plex_url' not in config_dict: + env_url = os.getenv('PLEX_URL') + if env_url: + config_dict['plex_url'] = env_url + + if 'plex_token' not in config_dict: + env_token = os.getenv('PLEX_TOKEN') + if env_token: + config_dict['plex_token'] = env_token + + return config_dict + + +def initialize(config_dict: dict = None): + """ + Initialize queue, DLQ, and worker with validated configuration. + + Args: + config_dict: Optional configuration dictionary. If not provided, + will attempt to read from environment variables. + + Raises: + SystemExit: If configuration validation fails + """ + log_trace("initialize() called") + global queue_manager, dlq, worker, config, sync_timestamps + + # Validate configuration + if config_dict is None: + config_dict = {} + + # If no config provided, try environment variables as fallback + if not config_dict: + env_url = os.getenv('PLEX_URL') + env_token = os.getenv('PLEX_TOKEN') + if env_url: + config_dict['plex_url'] = env_url + if env_token: + config_dict['plex_token'] = env_token + + log_trace(f"Validating config: {list(config_dict.keys())}") + try: + validated_config, error = validate_config(config_dict) + except Exception as e: + log_error(f"validate_config exception: {e}") + import traceback + traceback.print_exc() + raise + + if error: + log_error(f"Configuration error: {error}") + raise SystemExit(1) + + config = validated_config + + # Check if plugin is disabled + if not config.enabled: + log_info("Plugin is disabled via configuration") + return + + data_dir = get_plugin_data_dir() + + # Configure persistent Plex device identity (must be before PlexClient creation) + device_id = configure_plex_device_identity(data_dir) + log_trace(f"Using Plex device ID: {device_id[:8]}...") + + # Load sync timestamps for late update detection + sync_timestamps = load_sync_timestamps(data_dir) + log_trace(f"Loaded {len(sync_timestamps)} sync timestamps") + + # Initialize queue infrastructure + queue_manager = QueueManager(data_dir) + dlq = DeadLetterQueue(data_dir) + + # Start background worker with data_dir for timestamp updates + worker = SyncWorker( + queue_manager.get_queue(), + dlq, + config, + data_dir=data_dir, + max_retries=config.max_retries + ) + worker.start() + + log_info("Initialization complete") + + +def shutdown(): + """Clean shutdown of worker and queue.""" + global worker, queue_manager + + if worker: + worker.stop() + + if queue_manager: + queue_manager.shutdown() + + log_trace("Shutdown complete") + + +def handle_hook(hook_context: dict, stash=None): + """ + Handle incoming Stash hook. + + Expected hook_context structure: + { + "type": "Scene.Update.Post", + "input": {...scene update data...} + } + + Args: + hook_context: Hook context from Stash + stash: StashInterface for API calls + """ + global sync_timestamps + + hook_type = hook_context.get("type", "") + # Scene ID is at top level of hookContext, not inside input + scene_id = hook_context.get("id") + input_data = hook_context.get("input") or {} + + log_trace(f"handle_hook: type={hook_type}, scene_id={scene_id}") + + # Only process Scene.Update.Post with actual user input + # Scene.Create.Post is typically from scans - skip those + # Empty input means scan-triggered update, not user edit + if hook_type == "Scene.Update.Post": + if not input_data: + log_trace(f"Skipping {hook_type} - no input data (likely scan)") + return + + if scene_id: + data_dir = get_plugin_data_dir() + try: + on_scene_update( + scene_id, + input_data, + queue_manager.get_queue(), + data_dir=data_dir, + sync_timestamps=sync_timestamps, + stash=stash + ) + log_trace(f"on_scene_update completed for scene {scene_id}") + except Exception as e: + log_error(f"on_scene_update exception: {e}") + import traceback + traceback.print_exc() + else: + log_warn(f"{hook_type} hook missing scene ID") + elif hook_type == "Scene.Create.Post": + log_trace(f"Skipping {hook_type} - scene creation from scan") + else: + log_trace(f"Unhandled hook type: {hook_type}") + + +def handle_task(task_args: dict, stash=None): + """ + Handle manual task trigger from Stash UI. + + Args: + task_args: Task arguments (mode: 'all' or 'recent') + stash: StashInterface for API calls + """ + from sync_queue.operations import enqueue + + mode = task_args.get('mode', 'recent') + log_info(f"Task starting with mode: {mode}") + + if not stash: + log_error("No Stash connection available") + return + + try: + # Batch query fragment - get all needed data in one call + batch_fragment = """ + id + title + details + date + rating100 + files { path } + studio { name } + performers { name } + tags { name } + paths { screenshot preview } + """ + + # Query scenes based on mode + if mode == 'all': + log_info("Fetching all scenes...") + scenes = stash.find_scenes(fragment=batch_fragment) + else: # recent - last 24 hours + import datetime + yesterday = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S") + log_info(f"Fetching scenes updated since {yesterday}...") + scenes = stash.find_scenes( + f={"updated_at": {"value": yesterday, "modifier": "GREATER_THAN"}}, + fragment=batch_fragment + ) + + if not scenes: + log_info("No scenes found to sync") + return + + log_info(f"Found {len(scenes)} scenes to sync") + + # Queue each scene directly (data already fetched in batch) + queue = queue_manager.get_queue() + queued = 0 + skipped = 0 + + for scene in scenes: + scene_id = scene.get('id') + if not scene_id: + continue + + # Extract file path + files = scene.get('files', []) + if not files: + skipped += 1 + continue + file_path = files[0].get('path') + if not file_path: + skipped += 1 + continue + + # Build job data from batch-fetched scene + job_data = { + 'path': file_path, + 'title': scene.get('title'), + 'details': scene.get('details'), + 'date': scene.get('date'), + 'rating100': scene.get('rating100'), + } + + # Extract nested fields + studio = scene.get('studio') + if studio: + job_data['studio'] = studio.get('name') + + performers = scene.get('performers', []) + if performers: + job_data['performers'] = [p.get('name') for p in performers if p.get('name')] + + tags = scene.get('tags', []) + if tags: + job_data['tags'] = [t.get('name') for t in tags if t.get('name')] + + paths = scene.get('paths', {}) + if paths: + if paths.get('screenshot'): + job_data['poster_url'] = paths['screenshot'] + if paths.get('preview'): + job_data['background_url'] = paths['preview'] + + # Enqueue directly (skip on_scene_update to avoid per-scene GraphQL) + try: + enqueue(queue, int(scene_id), "metadata", job_data) + queued += 1 + except Exception as e: + log_warn(f"Failed to queue scene {scene_id}: {e}") + + log_info(f"Queued {queued} scenes for sync" + (f" (skipped {skipped} without files)" if skipped else "")) + + except Exception as e: + log_error(f"Task error: {e}") + import traceback + traceback.print_exc() + + +def is_scan_job_running(stash) -> bool: + """Check if a scan/generate job is running in Stash.""" + if not stash: + return False + try: + # Stash Job type has: id, status, subTasks, description, progress, startTime, endTime, addTime + result = stash.call_GQL(""" + query { jobQueue { status description } } + """) + jobs = result.get('jobQueue', []) if result else [] + # Check description for scan-related keywords + scan_keywords = ['scan', 'auto tag', 'generate', 'identify'] + for job in jobs: + status = (job.get('status') or '').upper() + description = (job.get('description') or '').lower() + if status in ('RUNNING', 'READY') and any(kw in description for kw in scan_keywords): + return True + except Exception: + pass + return False + + +def main(): + """Main entry point for Stash plugin.""" + # Read input from stdin (Stash plugin protocol) + try: + raw_input = sys.stdin.read() + input_data = json.loads(raw_input) + except json.JSONDecodeError as e: + log_error(f"JSON decode error: {e}") + print(json.dumps({"error": f"Invalid JSON input: {e}"})) + sys.exit(1) + + # Check args first - for hooks, check if scan is running before heavy init + args = input_data.get("args", {}) + is_hook = "hookContext" in args + + # For hooks: create minimal stash connection to check for scans + if is_hook: + temp_stash = get_stash_interface(input_data) + if is_scan_job_running(temp_stash): + # Scan running - exit immediately without initialization + print(json.dumps({"output": "ok"})) + return + + # Initialize on first call + global queue_manager, config + if queue_manager is None: + config_dict = extract_config_from_input(input_data) + initialize(config_dict) + + # Check if plugin is disabled (config may have loaded but plugin disabled) + if config and not config.enabled: + print(json.dumps({"output": "disabled"})) + return + + # Handle hook or task + if is_hook: + try: + handle_hook(args["hookContext"], stash=stash_interface) + except Exception as e: + log_error(f"handle_hook exception: {e}") + import traceback + traceback.print_exc() + elif "mode" in args: + try: + handle_task(args, stash=stash_interface) + except Exception as e: + log_error(f"handle_task exception: {e}") + import traceback + traceback.print_exc() + + # Give worker time to process pending jobs before exiting + # Worker thread is daemon, so it dies when main process exits + if worker and queue_manager: + import time + queue = queue_manager.get_queue() + + # Dynamic timeout: ~2 seconds per item, min 30s, max 600s (10 min) + initial_size = queue.size + max_wait = max(30, min(initial_size * 2, 600)) + wait_interval = 0.5 + waited = 0 + last_size = initial_size + + if initial_size > 0: + log_info(f"Processing {initial_size} queued item(s), timeout {max_wait}s...") + + while waited < max_wait: + try: + size = queue.size + if size == 0: + break + # Log progress every 10 items processed + if last_size - size >= 10: + log_info(f"Progress: {initial_size - size}/{initial_size} processed, {size} remaining") + last_size = size + time.sleep(wait_interval) + waited += wait_interval + except Exception as e: + log_error(f"Error checking queue: {e}") + break + + if waited >= max_wait and queue.size > 0: + log_warn(f"Timeout after {max_wait}s ({queue.size} items remaining - run 'Sync Recent' task to continue)") + + # Return empty response (success) + print(json.dumps({"output": "ok"})) + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + shutdown() + sys.exit(0) + except Exception as e: + import traceback + print(json.dumps({"error": str(e)})) + traceback.print_exc() + shutdown() + sys.exit(1) diff --git a/plugins/Stash2Plex/Stash2Plex.yml b/plugins/Stash2Plex/Stash2Plex.yml new file mode 100644 index 00000000..97641b4c --- /dev/null +++ b/plugins/Stash2Plex/Stash2Plex.yml @@ -0,0 +1,110 @@ +# requires: PythonDepManager +name: Stash2Plex +description: Sync metadata from Stash to Plex with queue-based reliability +version: 1.1.4 +url: https://github.com/trek-e/Stash2Plex + +exec: + - python3 + - "{pluginDir}/Stash2Plex.py" + +interface: raw + +hooks: + - name: Sync Scene to Plex + description: Syncs scene metadata to Plex when a scene is manually updated + triggeredBy: + - Scene.Update.Post + +tasks: + - name: Sync All Scenes to Plex + description: Force sync all scenes to Plex (use sparingly - may take a long time) + defaultArgs: + mode: all + - name: Sync Recent Scenes to Plex + description: Sync scenes updated in the last 24 hours + defaultArgs: + mode: recent + +settings: + plex_url: + displayName: Plex URL + description: "URL of your Plex server (e.g., http://localhost:32400)" + type: STRING + plex_token: + displayName: Plex Token + description: "Your Plex authentication token (X-Plex-Token)" + type: STRING + plex_library: + displayName: Plex Library + description: "Name of Plex library to sync (e.g., 'Adult'). Leave empty to search all libraries." + type: STRING + enabled: + displayName: Enabled + description: "Enable or disable the plugin (default: true)" + type: BOOLEAN + max_retries: + displayName: Max Retries + description: "Maximum retry attempts for failed syncs (default: 5)" + type: NUMBER + poll_interval: + displayName: Poll Interval + description: "Seconds between queue polls (default: 30)" + type: NUMBER + strict_matching: + displayName: Strict Matching + description: "Skip sync when multiple Plex matches found (default: true)" + type: BOOLEAN + preserve_plex_edits: + displayName: Preserve Plex Edits + description: "Skip fields that already have values in Plex (default: false)" + type: BOOLEAN + connect_timeout: + displayName: Connect Timeout + description: "Plex connection timeout in seconds (default: 5)" + type: NUMBER + read_timeout: + displayName: Read Timeout + description: "Plex read timeout in seconds (default: 30)" + type: NUMBER + # ----- Field Sync Settings ----- + sync_master: + displayName: "Enable Field Sync" + description: "Master toggle - when OFF, no metadata fields are synced" + type: BOOLEAN + sync_studio: + displayName: "Sync Studio" + description: "Sync studio name from Stash to Plex" + type: BOOLEAN + sync_summary: + displayName: "Sync Summary" + description: "Sync description/details from Stash to Plex" + type: BOOLEAN + sync_tagline: + displayName: "Sync Tagline" + description: "Sync tagline from Stash to Plex" + type: BOOLEAN + sync_date: + displayName: "Sync Date" + description: "Sync release date from Stash to Plex" + type: BOOLEAN + sync_performers: + displayName: "Sync Performers" + description: "Sync performers as actors in Plex" + type: BOOLEAN + sync_tags: + displayName: "Sync Tags" + description: "Sync tags as genres in Plex" + type: BOOLEAN + sync_poster: + displayName: "Sync Poster" + description: "Sync poster image from Stash to Plex" + type: BOOLEAN + sync_background: + displayName: "Sync Background" + description: "Sync background/fanart image from Stash to Plex" + type: BOOLEAN + sync_collection: + displayName: "Sync Collection" + description: "Add items to Plex collection based on studio name" + type: BOOLEAN diff --git a/plugins/Stash2Plex/hooks/__init__.py b/plugins/Stash2Plex/hooks/__init__.py new file mode 100644 index 00000000..0a709f04 --- /dev/null +++ b/plugins/Stash2Plex/hooks/__init__.py @@ -0,0 +1,9 @@ +""" +Hook handlers for Stash events. + +Exports fast, non-blocking event handlers for metadata changes. +""" + +from hooks.handlers import on_scene_update, requires_plex_sync + +__all__ = ['on_scene_update', 'requires_plex_sync'] diff --git a/plugins/Stash2Plex/hooks/handlers.py b/plugins/Stash2Plex/hooks/handlers.py new file mode 100644 index 00000000..a5f1ee81 --- /dev/null +++ b/plugins/Stash2Plex/hooks/handlers.py @@ -0,0 +1,363 @@ +""" +Hook handlers for fast event capture. + +Implements <100ms event handlers that enqueue jobs for background processing. +Includes metadata validation before enqueueing. +""" + +import sys +import time +from typing import Optional + + +# Stash plugin log levels +def log_trace(msg): print(f"\x01t\x02[Stash2Plex Hook] {msg}", file=sys.stderr) +def log_debug(msg): print(f"\x01d\x02[Stash2Plex Hook] {msg}", file=sys.stderr) +def log_info(msg): print(f"\x01i\x02[Stash2Plex Hook] {msg}", file=sys.stderr) +def log_warn(msg): print(f"\x01w\x02[Stash2Plex Hook] {msg}", file=sys.stderr) +def log_error(msg): print(f"\x01e\x02[Stash2Plex Hook] {msg}", file=sys.stderr) + + +try: + from sync_queue.operations import enqueue, load_sync_timestamps +except ImportError: + enqueue = None + load_sync_timestamps = None + +try: + from validation.metadata import validate_metadata +except ImportError: + validate_metadata = None + + +# In-memory tracking of pending scene IDs for deduplication +# Resets on restart - acceptable tradeoff for <100ms hook handler requirement +_pending_scene_ids: set[int] = set() + + +# GraphQL query for fetching complete scene metadata +# stashapi's find_scene uses a minimal fragment that only returns id for nested objects +SCENE_QUERY = """ +query FindScene($id: ID!) { + findScene(id: $id) { + id + title + details + date + rating100 + files { + path + } + studio { + id + name + } + performers { + id + name + } + tags { + id + name + } + paths { + screenshot + preview + } + } +} +""" + + +def mark_scene_pending(scene_id: int) -> None: + """Mark scene as having a pending job in queue.""" + _pending_scene_ids.add(scene_id) + + +def unmark_scene_pending(scene_id: int) -> None: + """Remove scene from pending set (called after job completes).""" + _pending_scene_ids.discard(scene_id) + + +def is_scene_pending(scene_id: int) -> bool: + """Check if scene already has a pending job.""" + return scene_id in _pending_scene_ids + + +def is_scan_running(stash) -> bool: + """ + Check if a library scan or generate job is currently running. + + Args: + stash: StashInterface instance + + Returns: + True if scan/generate job is active + """ + if not stash: + return False + + try: + # Stash Job type has: id, status, subTasks, description, progress, startTime, endTime, addTime + result = stash.call_GQL(""" + query { jobQueue { status description } } + """) + + jobs = result.get('jobQueue', []) if result else [] + + # Check description for scan-related keywords + scan_keywords = ['scan', 'auto tag', 'generate', 'identify'] + for job in jobs: + status = (job.get('status') or '').upper() + description = (job.get('description') or '').lower() + if status in ('RUNNING', 'READY') and any(kw in description for kw in scan_keywords): + log_trace(f"Scan job active: {description}") + return True + + except Exception as e: + log_trace(f"Could not check job queue: {e}") + + return False + + +def requires_plex_sync(update_data: dict) -> bool: + """ + Check if update contains sync-worthy changes. + + Filters out non-metadata updates like play counts, view history, + and file system only changes. + + Args: + update_data: Scene update data from Stash hook + + Returns: + True if update contains metadata fields that should sync to Plex + """ + # Metadata fields that trigger Plex sync + sync_fields = [ + 'title', + 'details', + 'studio_id', + 'performer_ids', + 'tag_ids', + 'rating100', + 'date', + 'rating', + 'studio', + 'performers', + 'tags' + ] + + # Check if any sync-worthy field is present in the update + for field in sync_fields: + if field in update_data: + return True + + return False + + +def on_scene_update( + scene_id: int, + update_data: dict, + queue, + data_dir: Optional[str] = None, + sync_timestamps: Optional[dict[int, float]] = None, + stash=None +) -> bool: + """ + Handle scene update event with fast enqueue. + + Target: <100ms execution time. Validates metadata before enqueueing + to ensure clean data enters the queue. + + Args: + scene_id: Stash scene ID + update_data: Scene update data from hook + queue: Queue instance for job storage + data_dir: Plugin data directory for sync timestamps + sync_timestamps: Dict mapping scene_id to last sync timestamp + stash: StashInterface for fetching scene file path + + Returns: + True if job was enqueued, False if filtered out or validation failed + """ + start = time.time() + + # Skip if scan/generate job is running + if is_scan_running(stash): + log_trace(f"Scene {scene_id} skipped - scan job active") + return False + + # Filter non-sync events before enqueueing + if not requires_plex_sync(update_data): + log_trace(f"Scene {scene_id} update filtered (no metadata changes)") + return False + + # Filter: Timestamp comparison for late update detection + if sync_timestamps is not None: + # Try to get updated_at from Stash hook data + # Fallback to current time if field missing (Stash may not always provide it) + stash_updated_at = update_data.get('updated_at') + if stash_updated_at is None: + # Stash didn't provide updated_at - use current time as proxy + # This means we'll re-sync, which is safe (idempotent) + stash_updated_at = time.time() + + last_synced = sync_timestamps.get(scene_id) + if last_synced and stash_updated_at <= last_synced: + log_trace(f"Scene {scene_id} already synced (Stash: {stash_updated_at} <= Last: {last_synced})") + return False + + # Filter: Queue deduplication using in-memory tracking + if is_scene_pending(scene_id): + log_trace(f"Scene {scene_id} already in queue, skipping duplicate") + return False + + # Enqueue job for background processing + if enqueue is None: + log_error("queue.operations not available") + return False + + # Fetch full scene data from Stash - needed for file path and complete metadata + file_path = None + scene_data = {} + if stash: + try: + # Use raw GraphQL call for complete metadata (stashapi's find_scene uses minimal fragment) + scene = None + try: + if hasattr(stash, 'call_GQL'): + result = stash.call_GQL(SCENE_QUERY, {"id": scene_id}) + scene = result.get("findScene") if result else None + elif hasattr(stash, '_callGraphQL'): + result = stash._callGraphQL(SCENE_QUERY, {"id": scene_id}) + scene = result.get("findScene") if result else None + else: + scene = stash.find_scene(scene_id) + except Exception as gql_err: + log_debug(f"GQL call failed: {gql_err}, falling back to find_scene") + scene = stash.find_scene(scene_id) + + if scene: + # Get file path + files = scene.get('files', []) + if files: + file_path = files[0].get('path') + + # Extract full metadata from scene + scene_data['title'] = scene.get('title') + scene_data['details'] = scene.get('details') + scene_data['date'] = scene.get('date') + scene_data['rating100'] = scene.get('rating100') + + # Get studio name + studio = scene.get('studio') + if studio: + scene_data['studio'] = studio.get('name') + + # Get performer names + performers = scene.get('performers', []) + if performers: + scene_data['performers'] = [p.get('name') for p in performers if p.get('name')] + + # Get tag names + tags = scene.get('tags', []) + if tags: + scene_data['tags'] = [t.get('name') for t in tags if t.get('name')] + + # Get image paths (poster/background) + paths = scene.get('paths', {}) + if paths: + if paths.get('screenshot'): + scene_data['poster_url'] = paths['screenshot'] + if paths.get('preview'): + scene_data['background_url'] = paths['preview'] + + except Exception as e: + import traceback + log_warn(f"Could not fetch scene {scene_id}: {e}") + traceback.print_exc() + + if not file_path: + log_error(f"No file path for scene {scene_id}, cannot sync to Plex") + return False + + # Merge scene_data with update_data (update_data takes precedence) + # This ensures we have complete metadata even if hook only sent partial update + merged_data = dict(scene_data) + merged_data.update(update_data) + merged_data['path'] = file_path + update_data = merged_data + + # Build validation data from update_data + # Title is required - if missing from update, we need to get it + # For now, if title is missing, we skip validation + title = update_data.get('title') + + if title and validate_metadata is not None: + # Build validation dict with available fields + validation_data = { + 'scene_id': scene_id, + 'title': title, + } + + # Add optional fields if present + for field in ['details', 'rating100', 'date', 'studio', 'performers', 'tags']: + if field in update_data and update_data[field] is not None: + validation_data[field] = update_data[field] + + # Validate and sanitize + validated, error = validate_metadata(validation_data) + + if error: + # Check if it's a missing title error (critical) + if 'title' in error.lower() and ('required' in error.lower() or 'empty' in error.lower()): + log_error(f"Scene {scene_id} validation failed: {error}") + return False + # For other validation errors, log warning but continue with sanitized data + log_warn(f"Scene {scene_id} validation issue: {error}") + + if validated: + # Use validated/sanitized data for job + sanitized_data = { + 'title': validated.title, + } + # Add optional fields from validated model + if validated.details is not None: + sanitized_data['details'] = validated.details + if validated.rating100 is not None: + sanitized_data['rating100'] = validated.rating100 + if validated.date is not None: + sanitized_data['date'] = validated.date + if validated.studio is not None: + sanitized_data['studio'] = validated.studio + if validated.performers is not None: + sanitized_data['performers'] = validated.performers + if validated.tags is not None: + sanitized_data['tags'] = validated.tags + + # Preserve any extra fields from original update_data that we don't validate + for key in ['studio_id', 'performer_ids', 'tag_ids', 'rating', 'path', 'poster_url', 'background_url']: + if key in update_data: + sanitized_data[key] = update_data[key] + + enqueue(queue, scene_id, "metadata", sanitized_data) + else: + # Validation failed critically (title issues), already logged above + return False + else: + # No title in update or validation not available, enqueue as-is + # (title might be in Stash already, worker can lookup) + enqueue(queue, scene_id, "metadata", update_data) + + # Mark scene as pending to prevent duplicate enqueues + mark_scene_pending(scene_id) + + # Calculate elapsed time and warn if over target + elapsed_ms = (time.time() - start) * 1000 + log_debug(f"Enqueued sync job for scene {scene_id} in {elapsed_ms:.1f}ms") + + if elapsed_ms > 100: + log_warn(f"Hook handler exceeded 100ms target ({elapsed_ms:.1f}ms)") + + return True diff --git a/plugins/Stash2Plex/plex/__init__.py b/plugins/Stash2Plex/plex/__init__.py new file mode 100644 index 00000000..c2893be2 --- /dev/null +++ b/plugins/Stash2Plex/plex/__init__.py @@ -0,0 +1,36 @@ +""" +Plex API client module for Stash2Plex. + +This module provides the interface for communicating with Plex servers, +including exception handling, timeout configuration, and retry logic. + +Classes: + PlexClient: Wrapper around PlexServer with timeouts and retry + +Exceptions: + PlexTemporaryError: Retry-able Plex errors (network, timeout, 5xx) + PlexPermanentError: Non-retry-able Plex errors (auth, bad request) + PlexNotFound: Item not found in Plex - may appear after library scan + +Functions: + translate_plex_exception: Convert plexapi exceptions to our hierarchy + find_plex_item_by_path: Find Plex item by file path with fallback strategies +""" + +from plex.exceptions import ( + PlexTemporaryError, + PlexPermanentError, + PlexNotFound, + translate_plex_exception, +) +from plex.client import PlexClient +from plex.matcher import find_plex_item_by_path + +__all__ = [ + 'PlexClient', + 'PlexTemporaryError', + 'PlexPermanentError', + 'PlexNotFound', + 'translate_plex_exception', + 'find_plex_item_by_path', +] diff --git a/plugins/Stash2Plex/plex/cache.py b/plugins/Stash2Plex/plex/cache.py new file mode 100644 index 00000000..e6e06c21 --- /dev/null +++ b/plugins/Stash2Plex/plex/cache.py @@ -0,0 +1,595 @@ +""" +Disk-backed caching for Plex library data and match results. + +Provides two cache classes: +- PlexCache: Library data cache (all() results, search results) with TTL expiration +- MatchCache: Path-to-item key mappings with no TTL (invalidate on stale detection) + +Key design decisions: +- SQLite-backed storage via diskcache (same pattern as persist-queue) +- Store only essential item data (key, title, file paths) to avoid memory bloat +- 1-hour TTL for library data (balances freshness vs API savings) +- No TTL for match results (invalidate on failure detection) +- Size limits to prevent unbounded growth + +Example: + >>> from plex.cache import PlexCache, MatchCache + >>> # Library cache for search/all results + >>> lib_cache = PlexCache("/path/to/data_dir") + >>> lib_cache.set_library_items("Movies", plex_library.all()) + >>> # Match cache for path-to-key mappings + >>> match_cache = MatchCache("/path/to/data_dir") + >>> match_cache.set_match("Movies", "/media/movie.mp4", "/library/metadata/123") +""" + +import logging +import os +from typing import Any, Dict, List, Optional + +from diskcache import Cache + +logger = logging.getLogger('Stash2Plex.plex.cache') + + +def _extract_item_data(item: Any) -> Optional[Dict[str, Any]]: + """ + Extract essential fields from a plexapi Video object. + + Only extracts data needed for matching: key, title, and file paths. + This avoids pickling full plexapi objects which can cause memory bloat. + + Args: + item: A plexapi Video object (Movie, Episode, etc.) + + Returns: + Dict with essential fields, or None if extraction fails + + Example: + >>> data = _extract_item_data(plex_item) + >>> print(data) + {'key': '/library/metadata/123', 'title': 'Movie Title', 'file_paths': ['/media/movie.mp4']} + """ + try: + # Extract key and title + key = getattr(item, 'key', None) or getattr(item, 'ratingKey', None) + title = getattr(item, 'title', None) + + if key is None: + logger.debug("Item missing key, skipping") + return None + + # Extract file paths from media[].parts[].file + file_paths = [] + media_list = getattr(item, 'media', None) or [] + for media in media_list: + parts = getattr(media, 'parts', None) or [] + for part in parts: + file_path = getattr(part, 'file', None) + if file_path: + file_paths.append(file_path) + + return { + 'key': str(key), + 'title': title, + 'file_paths': file_paths, + } + except Exception as e: + logger.warning(f"Failed to extract item data: {e}") + return None + + +class PlexCache: + """ + Disk-backed cache for Plex library data. + + Uses diskcache.Cache for SQLite-backed storage with TTL support. + Stores only essential item data (keys, titles, file paths) to avoid + memory bloat from caching full plexapi objects. + + Args: + data_dir: Base directory for cache storage (cache/ subdirectory created) + library_ttl: TTL in seconds for library data (default: 3600 = 1 hour) + size_limit: Maximum cache size in bytes (default: 100MB) + + Example: + >>> cache = PlexCache("/data/plexsync", library_ttl=1800) + >>> # Store items (extracts essential data only) + >>> cache.set_library_items("Movies", plex_library.all()) + >>> # Retrieve later (returns simplified dicts) + >>> items = cache.get_library_items("Movies") + >>> for item in items: + ... print(f"{item['title']}: {item['file_paths']}") + """ + + # Default TTL: 1 hour per RESEARCH.md recommendation + DEFAULT_LIBRARY_TTL = 3600 + + # Default size limit: 100MB + DEFAULT_SIZE_LIMIT = 100 * 1024 * 1024 + + def __init__( + self, + data_dir: str, + library_ttl: int = DEFAULT_LIBRARY_TTL, + size_limit: int = DEFAULT_SIZE_LIMIT, + ) -> None: + """ + Initialize cache in data_dir/cache/ directory. + + Args: + data_dir: Base directory (cache/ subdirectory will be created) + library_ttl: TTL in seconds for library data + size_limit: Maximum cache size in bytes + """ + self._data_dir = data_dir + self._library_ttl = library_ttl + self._size_limit = size_limit + + # Create cache directory + cache_dir = os.path.join(data_dir, 'cache') + os.makedirs(cache_dir, exist_ok=True) + + # Initialize diskcache with size limit + self._cache = Cache(cache_dir, size_limit=size_limit) + + # Enable statistics tracking + self._cache.stats(enable=True) + + # Track custom stats for this session + self._hits = 0 + self._misses = 0 + + logger.debug(f"PlexCache initialized at {cache_dir} (TTL: {library_ttl}s, limit: {size_limit} bytes)") + + def _make_library_key(self, library_name: str) -> str: + """Generate cache key for library items.""" + return f"library:{library_name}:all" + + def _make_search_key(self, library_name: str, title: str) -> str: + """Generate cache key for search results.""" + return f"search:{library_name}:{title}" + + def get_library_items(self, library_name: str) -> Optional[List[Dict[str, Any]]]: + """ + Get cached library items. + + Returns simplified dicts (not plexapi objects) with: + - key: Plex item key + - title: Item title + - file_paths: List of file paths + + Args: + library_name: Name of the Plex library section + + Returns: + List of item dicts if cached, None if cache miss + + Example: + >>> items = cache.get_library_items("Movies") + >>> if items is not None: + ... for item in items: + ... print(f"{item['key']}: {item['title']}") + """ + cache_key = self._make_library_key(library_name) + result = self._cache.get(cache_key) + + if result is not None: + self._hits += 1 + logger.debug(f"Cache hit for library '{library_name}' ({len(result)} items)") + else: + self._misses += 1 + logger.debug(f"Cache miss for library '{library_name}'") + + return result + + def set_library_items(self, library_name: str, items: List[Any]) -> None: + """ + Cache library items, extracting only essential fields. + + Transforms plexapi Video objects into simplified dicts containing + only key, title, and file paths to avoid memory bloat. + + Args: + library_name: Name of the Plex library section + items: List of plexapi Video objects (Movie, Episode, etc.) + + Example: + >>> library = plex.library.section("Movies") + >>> cache.set_library_items("Movies", library.all()) + """ + cache_key = self._make_library_key(library_name) + + # Extract essential data from each item + extracted_items = [] + for item in items: + data = _extract_item_data(item) + if data is not None: + extracted_items.append(data) + + self._cache.set(cache_key, extracted_items, expire=self._library_ttl) + logger.debug(f"Cached {len(extracted_items)} items for library '{library_name}' (TTL: {self._library_ttl}s)") + + def get_search_results(self, library_name: str, title: str) -> Optional[List[Dict[str, Any]]]: + """ + Get cached search results. + + Args: + library_name: Name of the Plex library section + title: Search title + + Returns: + List of item dicts if cached, None if cache miss + + Example: + >>> results = cache.get_search_results("Movies", "Inception") + >>> if results is not None: + ... print(f"Found {len(results)} cached results") + """ + cache_key = self._make_search_key(library_name, title) + result = self._cache.get(cache_key) + + if result is not None: + self._hits += 1 + logger.debug(f"Cache hit for search '{library_name}:{title}' ({len(result)} results)") + else: + self._misses += 1 + logger.debug(f"Cache miss for search '{library_name}:{title}'") + + return result + + def set_search_results(self, library_name: str, title: str, results: List[Any]) -> None: + """ + Cache search results. + + Args: + library_name: Name of the Plex library section + title: Search title + results: List of plexapi Video objects from search + + Example: + >>> results = library.search(title="Inception") + >>> cache.set_search_results("Movies", "Inception", results) + """ + cache_key = self._make_search_key(library_name, title) + + # Extract essential data from each result + extracted_results = [] + for item in results: + data = _extract_item_data(item) + if data is not None: + extracted_results.append(data) + + self._cache.set(cache_key, extracted_results, expire=self._library_ttl) + logger.debug(f"Cached {len(extracted_results)} search results for '{library_name}:{title}'") + + def clear(self) -> None: + """ + Clear all cached data. + + Removes all entries from the cache. Use when: + - Plex library has been rescanned + - Cache data appears stale + - Manual cache reset requested + + Example: + >>> cache.clear() + >>> print("Cache cleared") + """ + self._cache.clear() + self._hits = 0 + self._misses = 0 + logger.info("Cache cleared") + + def get_stats(self) -> Dict[str, Any]: + """ + Get cache hit/miss statistics. + + Returns session statistics for monitoring cache effectiveness. + + Returns: + Dict with keys: + - hits: Number of cache hits this session + - misses: Number of cache misses this session + - hit_rate: Hit rate as percentage (0-100) + - size: Current cache size in bytes + - count: Number of cached items + + Example: + >>> stats = cache.get_stats() + >>> print(f"Hit rate: {stats['hit_rate']:.1f}%") + """ + total = self._hits + self._misses + hit_rate = (self._hits / total * 100) if total > 0 else 0.0 + + # Get diskcache internal stats + try: + size = self._cache.volume() + except Exception: + size = 0 + + try: + count = len(self._cache) + except Exception: + count = 0 + + return { + 'hits': self._hits, + 'misses': self._misses, + 'hit_rate': hit_rate, + 'size': size, + 'count': count, + } + + def close(self) -> None: + """ + Close the cache connection. + + Should be called when done using the cache to release resources. + + Example: + >>> cache = PlexCache("/data") + >>> try: + ... # use cache + ... pass + ... finally: + ... cache.close() + """ + self._cache.close() + logger.debug("Cache closed") + + def __repr__(self) -> str: + """Return string representation of cache.""" + stats = self.get_stats() + return ( + f"PlexCache(data_dir={self._data_dir!r}, " + f"ttl={self._library_ttl}, " + f"items={stats['count']}, " + f"hit_rate={stats['hit_rate']:.1f}%)" + ) + + +class MatchCache: + """ + Cache for file path to Plex item key mappings. + + Stores path-to-key mappings with no TTL expiration (matches are stable + until library changes). Supports auto-invalidation on match failure + to detect stale cache entries. + + Key differences from PlexCache: + - No TTL expiration (matches are stable until library changes) + - Keys are f"match:{library_name}:{file_path}" (lowercased path) + - Stores only the Plex item key (string), not full item data + - Auto-invalidate on match failure (stale cache detection) + + Args: + data_dir: Base directory for cache storage (match_cache/ subdirectory created) + size_limit: Maximum cache size in bytes (default: 50MB) + + Example: + >>> from plex.cache import MatchCache + >>> cache = MatchCache("/data/plexsync") + >>> cache.set_match("Movies", "/media/movie.mp4", "/library/metadata/123") + >>> key = cache.get_match("Movies", "/media/movie.mp4") + >>> print(key) # /library/metadata/123 + """ + + # Default size limit: 50MB + DEFAULT_SIZE_LIMIT = 50 * 1024 * 1024 + + def __init__( + self, + data_dir: str, + size_limit: int = DEFAULT_SIZE_LIMIT, + ) -> None: + """ + Initialize cache in data_dir/match_cache/ directory. + + Args: + data_dir: Base directory (match_cache/ subdirectory will be created) + size_limit: Maximum cache size in bytes + """ + self._data_dir = data_dir + self._size_limit = size_limit + + # Create cache directory + cache_dir = os.path.join(data_dir, 'match_cache') + os.makedirs(cache_dir, exist_ok=True) + + # Initialize diskcache with size limit + self._cache = Cache(cache_dir, size_limit=size_limit) + + # Enable statistics tracking + self._cache.stats(enable=True) + + # Track custom stats for this session + self._hits = 0 + self._misses = 0 + + logger.debug(f"MatchCache initialized at {cache_dir} (limit: {size_limit} bytes)") + + def _make_key(self, library_name: str, file_path: str) -> str: + """Generate cache key for match result.""" + # Use lowercase file path for case-insensitive matching consistency + return f"match:{library_name}:{file_path.lower()}" + + def get_match(self, library_name: str, file_path: str) -> Optional[str]: + """ + Get cached Plex item key for file path. + + Args: + library_name: Name of the Plex library section + file_path: File path from Stash (lowercased for matching) + + Returns: + Plex item key (string) if cached, None if cache miss + + Example: + >>> key = cache.get_match("Movies", "/media/movie.mp4") + >>> if key is not None: + ... # Use key to fetch item directly: library.fetchItem(key) + ... pass + """ + cache_key = self._make_key(library_name, file_path) + result = self._cache.get(cache_key) + + if result is not None: + self._hits += 1 + logger.debug(f"Match cache hit for '{file_path}' -> {result}") + else: + self._misses += 1 + logger.debug(f"Match cache miss for '{file_path}'") + + return result + + def set_match(self, library_name: str, file_path: str, plex_key: str) -> None: + """ + Cache path to Plex item key mapping (no TTL). + + Stores the mapping indefinitely until manually invalidated or + cache size limit triggers eviction. + + Args: + library_name: Name of the Plex library section + file_path: File path from Stash + plex_key: Plex item key (e.g., "/library/metadata/12345") + + Example: + >>> cache.set_match("Movies", "/media/movie.mp4", "/library/metadata/123") + """ + cache_key = self._make_key(library_name, file_path) + # No expire = permanent until invalidated or evicted + self._cache.set(cache_key, plex_key) + logger.debug(f"Cached match '{file_path}' -> {plex_key}") + + def invalidate(self, library_name: str, file_path: str) -> None: + """ + Invalidate a specific match (e.g., on PlexNotFound). + + Use when a cached match is discovered to be stale (item no longer + exists at that key, or match was incorrect). + + Args: + library_name: Name of the Plex library section + file_path: File path to invalidate + + Example: + >>> # Item not found using cached key - invalidate + >>> cache.invalidate("Movies", "/media/movie.mp4") + """ + cache_key = self._make_key(library_name, file_path) + try: + del self._cache[cache_key] + logger.debug(f"Invalidated match for '{file_path}'") + except KeyError: + # Key wasn't cached, nothing to invalidate + pass + + def invalidate_library(self, library_name: str) -> None: + """ + Invalidate all matches for a library (e.g., after library scan). + + Use when a library has been rescanned and all cached matches + may be stale. + + Args: + library_name: Name of the Plex library section + + Example: + >>> # After library scan, clear all matches for that library + >>> cache.invalidate_library("Movies") + """ + prefix = f"match:{library_name}:" + invalidated = 0 + # Iterate over all keys and delete matching ones + for key in list(self._cache): + if key.startswith(prefix): + try: + del self._cache[key] + invalidated += 1 + except KeyError: + pass + logger.info(f"Invalidated {invalidated} matches for library '{library_name}'") + + def clear(self) -> None: + """ + Clear all match cache data. + + Removes all path-to-key mappings. Use when: + - All Plex libraries have been rescanned + - Cache data appears corrupted + - Manual cache reset requested + + Example: + >>> cache.clear() + """ + self._cache.clear() + self._hits = 0 + self._misses = 0 + logger.info("Match cache cleared") + + def get_stats(self) -> Dict[str, Any]: + """ + Get cache hit/miss statistics. + + Returns session statistics for monitoring cache effectiveness. + + Returns: + Dict with keys: + - hits: Number of cache hits this session + - misses: Number of cache misses this session + - hit_rate: Hit rate as percentage (0-100) + - size: Current cache size in bytes + - count: Number of cached matches + + Example: + >>> stats = cache.get_stats() + >>> print(f"Match cache hit rate: {stats['hit_rate']:.1f}%") + """ + total = self._hits + self._misses + hit_rate = (self._hits / total * 100) if total > 0 else 0.0 + + # Get diskcache internal stats + try: + size = self._cache.volume() + except Exception: + size = 0 + + try: + count = len(self._cache) + except Exception: + count = 0 + + return { + 'hits': self._hits, + 'misses': self._misses, + 'hit_rate': hit_rate, + 'size': size, + 'count': count, + } + + def close(self) -> None: + """ + Close the cache connection. + + Should be called when done using the cache to release resources. + + Example: + >>> cache = MatchCache("/data") + >>> try: + ... # use cache + ... pass + ... finally: + ... cache.close() + """ + self._cache.close() + logger.debug("Match cache closed") + + def __repr__(self) -> str: + """Return string representation of cache.""" + stats = self.get_stats() + return ( + f"MatchCache(data_dir={self._data_dir!r}, " + f"matches={stats['count']}, " + f"hit_rate={stats['hit_rate']:.1f}%)" + ) diff --git a/plugins/Stash2Plex/plex/client.py b/plugins/Stash2Plex/plex/client.py new file mode 100644 index 00000000..5da501b0 --- /dev/null +++ b/plugins/Stash2Plex/plex/client.py @@ -0,0 +1,187 @@ +""" +PlexClient wrapper with explicit timeouts and retry logic. + +Provides a PlexClient class that wraps plexapi.server.PlexServer with: +- Explicit timeout configuration (no infinite hangs) +- Tenacity retry for connection errors (handles network blips) +- Lazy PlexServer initialization +- Exception translation to Phase 2 hierarchy +""" + +import logging +from typing import Any, Optional, Tuple, Type, TYPE_CHECKING + +from tenacity import ( + retry, + retry_if_exception_type, + wait_exponential_jitter, + stop_after_attempt, + before_sleep_log, +) + +from plex.exceptions import translate_plex_exception + +if TYPE_CHECKING: + from plexapi.server import PlexServer + +logger = logging.getLogger('Stash2Plex.plex.client') + + +def _get_retriable_exceptions() -> Tuple[Type[Exception], ...]: + """ + Build tuple of retriable exceptions lazily. + + This avoids importing requests at module load time, which would + trigger urllib3 and conflict with the project's queue/ module + shadowing Python's stdlib queue module. + """ + import requests.exceptions + return ( + ConnectionError, + TimeoutError, + OSError, + requests.exceptions.ConnectionError, + requests.exceptions.Timeout, + ) + + +# Connection errors that warrant immediate retry +# These are network-level failures that may resolve quickly +# Lazily initialized to avoid import issues +RETRIABLE_EXCEPTIONS: Tuple[Type[Exception], ...] = ( + ConnectionError, + TimeoutError, + OSError, +) + + +class PlexClient: + """ + Wrapper around PlexServer with timeout and retry configuration. + + Provides reliable Plex communication by: + - Configuring explicit timeouts (no infinite hangs) + - Retrying connection errors with exponential backoff and jitter + - Lazy initialization (connection on first use) + - Exception translation to Phase 2 error hierarchy + + Args: + url: Plex server URL (e.g., http://192.168.1.100:32400) + token: Plex authentication token + connect_timeout: Connection timeout in seconds (default: 5.0) + read_timeout: Read timeout in seconds (default: 30.0) + + Example: + >>> from plex.client import PlexClient + >>> client = PlexClient( + ... url="http://plex:32400", + ... token="your-token", + ... timeout=10 + ... ) + >>> library = client.get_library("Movies") + """ + + # Class-level cache for full retriable exceptions tuple + _retriable_exceptions: Optional[Tuple[Type[Exception], ...]] = None + + def __init__( + self, + url: str, + token: str, + connect_timeout: float = 5.0, + read_timeout: float = 30.0, + ) -> None: + self._url = url + self._token = token + self._connect_timeout = connect_timeout + self._read_timeout = read_timeout + self._server: Optional["PlexServer"] = None + + @classmethod + def _get_retriable_exceptions(cls) -> Tuple[Type[Exception], ...]: + """Get full retriable exceptions tuple, including requests exceptions.""" + if cls._retriable_exceptions is None: + cls._retriable_exceptions = _get_retriable_exceptions() + return cls._retriable_exceptions + + def _get_server(self) -> "PlexServer": + """ + Create and return PlexServer connection with retry on network errors. + + Uses tenacity retry for automatic retry on connection errors: + - 3 attempts maximum + - Exponential backoff: 100ms initial, 400ms max, with 100ms jitter + - Logs warning before each retry attempt + + Returns: + Connected PlexServer instance + + Raises: + PlexTemporaryError: On connection/timeout errors after all retries + PlexPermanentError: On authentication failures + """ + from plexapi.server import PlexServer + + # Get full retriable exceptions including requests types + retriable = self._get_retriable_exceptions() + + @retry( + retry=retry_if_exception_type(retriable), + wait=wait_exponential_jitter(initial=0.1, max=0.4, jitter=0.1), + stop=stop_after_attempt(3), + before_sleep=before_sleep_log(logger, logging.WARNING), + reraise=True, + ) + def connect(): + try: + logger.debug(f"Connecting to Plex server at {self._url}") + server = PlexServer( + baseurl=self._url, + token=self._token, + timeout=self._read_timeout, + ) + logger.debug(f"Connected to Plex server: {server.friendlyName}") + return server + except retriable: + # Let tenacity handle retry + raise + except Exception as exc: + # Translate other exceptions to our hierarchy + raise translate_plex_exception(exc) from exc + + return connect() + + @property + def server(self) -> "PlexServer": + """ + Get the PlexServer instance, connecting lazily if needed. + + Returns: + Connected PlexServer instance + + Raises: + PlexTemporaryError: On connection/timeout errors + PlexPermanentError: On authentication failures + """ + if self._server is None: + self._server = self._get_server() + return self._server + + def get_library(self, section_name: str) -> Any: + """ + Get a library section by name. + + Args: + section_name: Name of the library section (e.g., "Movies", "TV Shows") + + Returns: + LibrarySection instance + + Raises: + PlexNotFound: If section doesn't exist + PlexTemporaryError: On connection errors + """ + try: + return self.server.library.section(section_name) + except Exception as exc: + raise translate_plex_exception(exc) from exc diff --git a/plugins/Stash2Plex/plex/device_identity.py b/plugins/Stash2Plex/plex/device_identity.py new file mode 100644 index 00000000..0e242f50 --- /dev/null +++ b/plugins/Stash2Plex/plex/device_identity.py @@ -0,0 +1,103 @@ +""" +Device identity management for persistent Plex device recognition. + +This module ensures Stash2Plex appears as a consistent, named device in Plex +rather than generating "new device" notifications on each connection. + +The issue: plexapi defaults to using a MAC-address-derived UUID which can +change between runs, causing Plex to show "new device connected" notifications +and cluttering the Plex devices list. + +The fix: Generate a UUID once, persist it to disk, and configure plexapi +to use this persistent identifier along with friendly device names. + +CRITICAL: configure_plex_device_identity() MUST be called before any +PlexServer connections are created, as plexapi captures the identifier +values when building connection headers. +""" + +import json +import logging +import os +import uuid + +logger = logging.getLogger('Stash2Plex.device') + + +def load_or_create_device_id(data_dir: str) -> str: + """ + Load persistent device ID or create a new one. + + The device ID ensures Plex recognizes Stash2Plex as the same device + across restarts, avoiding "new device" notifications. + + Args: + data_dir: Path to plugin data directory for persisting device_id.json + + Returns: + Device ID string (UUID format) + """ + # Ensure data directory exists + os.makedirs(data_dir, exist_ok=True) + + id_file = os.path.join(data_dir, 'device_id.json') + + # Try to load existing device ID + if os.path.exists(id_file): + try: + with open(id_file, 'r') as f: + data = json.load(f) + device_id = data.get('device_id') + if device_id: + logger.debug(f"Loaded existing device ID: {device_id[:8]}...") + return device_id + except (json.JSONDecodeError, KeyError, IOError) as e: + logger.warning(f"Corrupt device_id.json, regenerating: {e}") + + # Generate new UUID v4 (random, good for device IDs) + device_id = str(uuid.uuid4()) + logger.info(f"Generated new device ID: {device_id[:8]}...") + + # Persist for future use + with open(id_file, 'w') as f: + json.dump({'device_id': device_id}, f, indent=2) + + return device_id + + +def configure_plex_device_identity(data_dir: str) -> str: + """ + Configure plexapi module-level variables for device identification. + + This sets: + - X_PLEX_IDENTIFIER: Persistent UUID for device recognition + - X_PLEX_PRODUCT: Product name shown in Plex UI + - X_PLEX_DEVICE_NAME: Device name shown in Plex UI + + MUST be called before any PlexServer connections are created. + The plexapi module captures these values when building connection + headers, so changing them after connection has no effect. + + Args: + data_dir: Path to plugin data directory for device_id.json + + Returns: + The device ID being used + """ + device_id = load_or_create_device_id(data_dir) + + # Import plexapi at call time to avoid import order issues + import plexapi + import plexapi.config + + # Set module-level variables (affects all future connections) + plexapi.X_PLEX_IDENTIFIER = device_id + plexapi.X_PLEX_PRODUCT = 'Stash2Plex' + plexapi.X_PLEX_DEVICE_NAME = 'Stash2Plex Plugin' + + # Rebuild BASE_HEADERS to pick up new values + plexapi.BASE_HEADERS = plexapi.config.reset_base_headers() + + logger.debug(f"Configured Plex device identity: {device_id[:8]}...") + + return device_id diff --git a/plugins/Stash2Plex/plex/exceptions.py b/plugins/Stash2Plex/plex/exceptions.py new file mode 100644 index 00000000..490b9bd2 --- /dev/null +++ b/plugins/Stash2Plex/plex/exceptions.py @@ -0,0 +1,122 @@ +""" +Plex exception hierarchy for error classification. + +Integrates with Phase 2 error classification by subclassing TransientError +and PermanentError. This ensures proper retry/DLQ routing via the worker. + +Exception classes: + PlexTemporaryError: Retry-able errors (network, timeout, 5xx, rate limits) + PlexPermanentError: Non-retry-able errors (auth, bad request) + PlexNotFound: Item not in Plex library - may appear after scan + +Functions: + translate_plex_exception: Convert plexapi/requests exceptions to hierarchy +""" + +from worker.processor import TransientError, PermanentError + + +class PlexTemporaryError(TransientError): + """ + Retry-able Plex errors. + + Used for errors that may resolve with retry: + - Network connectivity issues + - Timeout errors + - Server overload (5xx, rate limiting) + """ + pass + + +class PlexPermanentError(PermanentError): + """ + Non-retry-able Plex errors. + + Used for errors that won't resolve with retry: + - Authentication failures (invalid token) + - Bad requests (malformed data) + - Permission errors + """ + pass + + +class PlexNotFound(TransientError): + """ + Item not found in Plex library. + + Distinct from PlexTemporaryError to allow different retry timing. + Plex library scanning can take minutes to hours, so an item may + appear after the library scan completes. + + This is treated as transient because the file may exist in the + filesystem but Plex hasn't indexed it yet. + """ + pass + + +def translate_plex_exception(exc: Exception) -> Exception: + """ + Translate PlexAPI or requests exception to Phase 2 hierarchy. + + Handles: + - plexapi.exceptions (Unauthorized, NotFound, BadRequest) + - requests.exceptions (ConnectionError, Timeout) + - HTTP status codes via response attribute + - Unknown errors default to transient (safer) + + Args: + exc: The original exception from PlexAPI or requests + + Returns: + A PlexTemporaryError, PlexPermanentError, or PlexNotFound instance + """ + # Import plexapi exceptions lazily to avoid hard dependency at module load + try: + from plexapi.exceptions import Unauthorized, NotFound, BadRequest + has_plexapi = True + except ImportError: + has_plexapi = False + Unauthorized = NotFound = BadRequest = None + + # Import requests exceptions + try: + import requests.exceptions + has_requests = True + except ImportError: + has_requests = False + + # Handle PlexAPI exceptions + if has_plexapi: + if Unauthorized is not None and isinstance(exc, Unauthorized): + return PlexPermanentError(f"Authentication failed: {exc}") + if NotFound is not None and isinstance(exc, NotFound): + return PlexNotFound(f"Item not found in Plex: {exc}") + if BadRequest is not None and isinstance(exc, BadRequest): + return PlexPermanentError(f"Bad request to Plex: {exc}") + + # Handle requests/network exceptions + if has_requests: + if isinstance(exc, requests.exceptions.ConnectionError): + return PlexTemporaryError(f"Connection error: {exc}") + if isinstance(exc, requests.exceptions.Timeout): + return PlexTemporaryError(f"Timeout error: {exc}") + + # Handle base Python network exceptions + if isinstance(exc, (ConnectionError, TimeoutError, OSError)): + return PlexTemporaryError(f"Connection error: {exc}") + + # Handle HTTP errors with response attribute (status codes) + if hasattr(exc, 'response') and exc.response is not None: + status = getattr(exc.response, 'status_code', None) + if status is not None: + if status == 401: + return PlexPermanentError(f"Unauthorized (401): {exc}") + if status == 404: + return PlexNotFound(f"Not found (404): {exc}") + if status in (429, 500, 502, 503, 504): + return PlexTemporaryError(f"Server error ({status}): {exc}") + if 400 <= status < 500: + return PlexPermanentError(f"Client error ({status}): {exc}") + + # Unknown errors default to transient (safer, allows retry) + return PlexTemporaryError(f"Unknown Plex error: {exc}") diff --git a/plugins/Stash2Plex/plex/matcher.py b/plugins/Stash2Plex/plex/matcher.py new file mode 100644 index 00000000..1529313c --- /dev/null +++ b/plugins/Stash2Plex/plex/matcher.py @@ -0,0 +1,363 @@ +""" +Plex item matching logic with optional caching support. + +Fast path: title search derived from filename, then verify by filename match. +Slow fallback: scan all items if title search fails. + +Caching support (optional): +- library_cache: PlexCache for library items and search results +- match_cache: MatchCache for path-to-item key mappings + +When caches are provided: +1. Check match_cache for existing path-to-key mapping +2. If hit, fetch item directly via library.fetchItem(key) (1 API call vs search) +3. If miss or stale, fall back to search/all logic +4. After successful match, store in match_cache + +Cache stores simplified dicts, but matching returns actual plexapi Video objects. +""" + +from enum import Enum +import logging +import re +import sys +from pathlib import Path +from typing import Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from plexapi.library import LibrarySection + from plexapi.video import Video + from plex.cache import PlexCache, MatchCache + +logger = logging.getLogger('Stash2Plex.plex.matcher') + +# Stash plugin log levels +def log_debug(msg): print(f"\x01d\x02[Stash2Plex Matcher] {msg}", file=sys.stderr) +def log_info(msg): print(f"\x01i\x02[Stash2Plex Matcher] {msg}", file=sys.stderr) +def log_warn(msg): print(f"\x01w\x02[Stash2Plex Matcher] {msg}", file=sys.stderr) + + +def _cached_item_has_file(item_data: dict, filename: str, case_insensitive: bool = True) -> bool: + """ + Check if cached item data has a file matching the given filename. + + Args: + item_data: Dict with 'file_paths' list from cache + filename: Filename to match (not full path) + case_insensitive: If True, compare case-insensitively + + Returns: + True if item has matching filename + """ + file_paths = item_data.get('file_paths', []) + compare_name = filename.lower() if case_insensitive else filename + + for file_path in file_paths: + cached_name = Path(file_path).name + if case_insensitive: + cached_name = cached_name.lower() + if cached_name == compare_name: + return True + return False + + +def _item_has_file(item, path_or_filename: str, exact: bool = True, case_insensitive: bool = False) -> bool: + """ + Check if a Plex item has a media file matching the given path or filename. + + Args: + item: Plex video item + path_or_filename: Full path or just filename to match + exact: If True, match full path; if False, match filename only + case_insensitive: If True, compare case-insensitively + + Returns: + True if item has matching file + """ + try: + if not hasattr(item, 'media') or not item.media: + return False + + compare_val = path_or_filename.lower() if case_insensitive else path_or_filename + + for media in item.media: + if not hasattr(media, 'parts') or not media.parts: + continue + for part in media.parts: + if not hasattr(part, 'file') or not part.file: + continue + + file_path = part.file + if case_insensitive: + file_path = file_path.lower() + + if exact: + if file_path == compare_val: + return True + else: + # Match filename only + if file_path.endswith('/' + compare_val) or file_path.endswith('\\' + compare_val): + return True + # Also check without path separator for edge cases + if Path(file_path).name == (Path(path_or_filename).name if not case_insensitive else Path(path_or_filename).name.lower()): + return True + except Exception as e: + logger.debug(f"Error checking item files: {e}") + + return False + + +class MatchConfidence(Enum): + HIGH = "high" # Single unique match - auto-sync safe + LOW = "low" # Multiple candidates - needs review + + +def find_plex_item_by_path( + library: "LibrarySection", + stash_path: str, + plex_path_prefix: Optional[str] = None, + stash_path_prefix: Optional[str] = None, +) -> Optional["Video"]: + """ + Find Plex item matching a Stash file path. + + Fast path: title search then verify filename. + Slow fallback: scan all items by filename if title search fails. + + Args: + library: Plex library section to search + stash_path: File path from Stash + plex_path_prefix: Unused, kept for API compatibility + stash_path_prefix: Unused, kept for API compatibility + + Returns: + Matching Plex item or None if not found / ambiguous + """ + path = Path(stash_path) + filename = path.name + filename_lower = filename.lower() + + # Derive title variants from filename + title_search = path.stem + title_search = re.sub(r'\s*[-_]?\s*(WEBDL|WEB-DL|WEBRip|HDTV|BluRay|BDRip|DVDRip|720p|1080p|2160p|4K|HDR|DV).*$', '', title_search, flags=re.IGNORECASE) + title_base = re.sub(r'\s*[-_]?\s*\d{4}-\d{2}-\d{2}\s*$', '', title_search) + + matches = [] + + # Fast path: title search + try: + for title in [title_search, title_base]: + if matches: + break + results = library.search(title=title) + for item in results: + if _item_has_file(item, filename_lower, exact=False, case_insensitive=True): + matches.append(item) + except Exception as e: + logger.warning(f"Title search failed: {e}") + + # Slow fallback if needed + if not matches: + try: + all_items = library.all() + for item in all_items: + if _item_has_file(item, filename_lower, exact=False, case_insensitive=True): + matches.append(item) + except Exception as e: + logger.warning(f"Scan failed: {e}") + + if len(matches) == 1: + return matches[0] + return None + + +def find_plex_items_with_confidence( + library: "LibrarySection", + stash_path: str, + plex_path_prefix: Optional[str] = None, + stash_path_prefix: Optional[str] = None, + library_cache: Optional["PlexCache"] = None, + match_cache: Optional["MatchCache"] = None, +) -> tuple[MatchConfidence, Optional["Video"], list["Video"]]: + """ + Find Plex item with confidence scoring and optional caching. + + Execution order with caches: + 1. Check match_cache for existing path-to-key mapping + 2. If hit, fetch item directly via library.fetchItem(key) (1 API call) + 3. If stale (item not found), invalidate cache and continue + 4. Fall back to title search, then all-items scan + 5. On successful match, store in match_cache for next time + + Args: + library: Plex library section to search + stash_path: File path from Stash + plex_path_prefix: Unused, kept for API compatibility + stash_path_prefix: Unused, kept for API compatibility + library_cache: Optional PlexCache for library/search result caching + match_cache: Optional MatchCache for path-to-key mapping caching + + Returns: + Tuple of (confidence, best_match_or_none, all_candidates): + - HIGH confidence + item: Single unique match found + - LOW confidence + None: Multiple ambiguous matches (candidates list populated) + - Raises PlexNotFound if no matches at all + + Raises: + PlexNotFound: When no matching items found (allows retry logic) + + Example: + >>> # Without caches (backward compatible) + >>> confidence, item, candidates = find_plex_items_with_confidence(library, path) + + >>> # With caches (optimized) + >>> from plex.cache import PlexCache, MatchCache + >>> lib_cache = PlexCache("/data") + >>> match_cache = MatchCache("/data") + >>> confidence, item, candidates = find_plex_items_with_confidence( + ... library, path, library_cache=lib_cache, match_cache=match_cache + ... ) + """ + from plex.exceptions import PlexNotFound + + path = Path(stash_path) + filename = path.name + filename_lower = filename.lower() + library_name = library.title + + # Check match_cache for existing mapping (fastest path) + if match_cache is not None: + cached_key = match_cache.get_match(library_name, stash_path) + if cached_key is not None: + log_debug(f"Match cache hit: {stash_path} -> {cached_key}") + try: + # Fetch item directly by key (1 API call vs search) + item = library.fetchItem(cached_key) + if item is not None: + log_info(f"Cache hit: {item.title}") + return (MatchConfidence.HIGH, item, [item]) + except Exception as e: + # Item not found at cached key - cache is stale + log_debug(f"Cached key stale, invalidating: {e}") + match_cache.invalidate(library_name, stash_path) + + # Derive title variants from filename + title_search = path.stem + # Remove quality suffix (handles "WEBDL", "- WEBDL", "WEBDL-2160p", etc.) + title_search = re.sub(r'\s*[-_]?\s*(WEBDL|WEB-DL|WEBRip|HDTV|BluRay|BDRip|DVDRip|720p|1080p|2160p|4K|HDR|DV).*$', '', title_search, flags=re.IGNORECASE) + # Remove date suffix like "- 2026-01-30" or "2026-01-30" + title_base = re.sub(r'\s*[-_]?\s*\d{4}-\d{2}-\d{2}\s*$', '', title_search) + + log_debug(f"Searching '{library_name}' for: {filename}") + log_debug(f"Title variants: '{title_search}' / '{title_base}'") + + candidates = [] + + # Fast path: title search (with optional library_cache) + try: + for title in [title_search, title_base]: + if candidates: + break + log_debug(f"Title search: '{title}'") + + # Check library_cache for search results + cached_results = None + if library_cache is not None: + cached_results = library_cache.get_search_results(library_name, title) + + if cached_results is not None: + # Cache hit - match against cached data, then fetch actual items + log_debug(f"Search cache hit: {len(cached_results)} results") + for item_data in cached_results: + if _cached_item_has_file(item_data, filename_lower, case_insensitive=True): + # Found match in cache - fetch actual item + try: + actual_item = library.fetchItem(item_data['key']) + if actual_item is not None: + candidates.append(actual_item) + log_info(f"Found (cached): {actual_item.title}") + except Exception as e: + log_debug(f"Failed to fetch cached item {item_data['key']}: {e}") + else: + # Cache miss - do actual search + results = library.search(title=title) + log_debug(f"Got {len(results)} title matches") + + # Cache the results for next time + if library_cache is not None: + library_cache.set_search_results(library_name, title, results) + + for item in results: + if _item_has_file(item, filename_lower, exact=False, case_insensitive=True): + candidates.append(item) + log_info(f"Found: {item.title}") + except Exception as e: + log_warn(f"Title search failed: {e}") + + # Slow fallback: scan all items if title search found nothing + if not candidates: + try: + log_debug(f"Fallback: scanning all items...") + + # Check library_cache for all items + cached_items = None + if library_cache is not None: + cached_items = library_cache.get_library_items(library_name) + + if cached_items is not None: + # Cache hit - match against cached data, then fetch actual items + log_debug(f"Library cache hit: {len(cached_items)} items") + for item_data in cached_items: + if _cached_item_has_file(item_data, filename_lower, case_insensitive=True): + # Found match in cache - fetch actual item + try: + actual_item = library.fetchItem(item_data['key']) + if actual_item is not None: + candidates.append(actual_item) + log_info(f"Found (cached): {actual_item.title}") + except Exception as e: + log_debug(f"Failed to fetch cached item {item_data['key']}: {e}") + else: + # Cache miss - do actual library scan + all_items = library.all() + log_debug(f"Scanning {len(all_items)} items...") + + # Cache the items for next time + if library_cache is not None: + library_cache.set_library_items(library_name, all_items) + + for item in all_items: + if _item_has_file(item, filename_lower, exact=False, case_insensitive=True): + candidates.append(item) + log_info(f"Found: {item.title}") + except Exception as e: + log_warn(f"Scan failed: {e}") + + # Scoring logic + if len(candidates) == 0: + raise PlexNotFound(f"No Plex item found for filename: {filename}") + elif len(candidates) == 1: + logger.debug(f"HIGH confidence match for {filename}") + # Store in match_cache for next time + if match_cache is not None: + item = candidates[0] + item_key = getattr(item, 'key', None) or getattr(item, 'ratingKey', None) + if item_key: + match_cache.set_match(library_name, stash_path, str(item_key)) + return (MatchConfidence.HIGH, candidates[0], candidates) + else: + # Multiple matches - log warning with candidate paths + candidate_paths = [] + for item in candidates: + try: + if hasattr(item, 'media') and item.media: + if hasattr(item.media[0], 'parts') and item.media[0].parts: + candidate_paths.append(item.media[0].parts[0].file) + except (IndexError, AttributeError): + candidate_paths.append("") + + logger.warning( + f"LOW confidence match for '{filename}': " + f"{len(candidates)} candidates found - {candidate_paths}" + ) + return (MatchConfidence.LOW, None, candidates) diff --git a/plugins/Stash2Plex/plex/timing.py b/plugins/Stash2Plex/plex/timing.py new file mode 100644 index 00000000..bdc70bda --- /dev/null +++ b/plugins/Stash2Plex/plex/timing.py @@ -0,0 +1,108 @@ +""" +Timing utilities for performance measurement. + +Provides decorators to log operation duration and identify bottlenecks. +""" + +import logging +import sys +import time +from functools import wraps +from typing import Any, Callable + +logger = logging.getLogger('Stash2Plex.plex.timing') + + +def timed(func: Callable) -> Callable: + """ + Decorator to log function execution time. + + Logs at DEBUG level for fast operations (<1s), + INFO level for slower operations (>=1s). + + Args: + func: Function to wrap with timing + + Returns: + Wrapped function that logs execution time + + Example: + >>> @timed + ... def slow_function(): + ... time.sleep(0.5) + ... return "done" + >>> slow_function() # Logs: slow_function took 0.501s + """ + @wraps(func) + def wrapper(*args, **kwargs) -> Any: + start = time.perf_counter() + try: + result = func(*args, **kwargs) + return result + finally: + elapsed = time.perf_counter() - start + level = logging.INFO if elapsed >= 1.0 else logging.DEBUG + logger.log(level, f"{func.__name__} took {elapsed:.3f}s") + return wrapper + + +class OperationTimer: + """ + Context manager for timing code blocks. + + Logs at DEBUG level for fast operations (<1s), + INFO level for slower operations (>=1s). + + Attributes: + operation_name: Name of the operation being timed + start_time: Time when context was entered + elapsed: Elapsed time in seconds (available after exit) + + Example: + >>> with OperationTimer("database query"): + ... result = db.query() + # Logs: database query took 0.123s + + >>> timer = OperationTimer("fetch") + >>> with timer: + ... data = fetch_data() + >>> print(f"Fetch took {timer.elapsed:.3f}s") + """ + + def __init__(self, operation_name: str) -> None: + """ + Initialize operation timer. + + Args: + operation_name: Descriptive name for the operation + """ + self.operation_name = operation_name + self.start_time: float = 0.0 + self.elapsed: float = 0.0 + + def __enter__(self) -> "OperationTimer": + """Start timing the operation.""" + self.start_time = time.perf_counter() + return self + + def __exit__(self, *args) -> None: + """Stop timing and log the duration.""" + self.elapsed = time.perf_counter() - self.start_time + level = logging.INFO if self.elapsed >= 1.0 else logging.DEBUG + logger.log(level, f"{self.operation_name} took {self.elapsed:.3f}s") + + +def log_timing(msg: str) -> None: + """ + Log timing message with Stash plugin format. + + Uses Stash's plugin log format for DEBUG level timing messages. + + Args: + msg: Timing message to log + + Example: + >>> log_timing("Search completed in 0.5s") + # Outputs: \x01d\x02[Stash2Plex Timing] Search completed in 0.5s + """ + print(f"\x01d\x02[Stash2Plex Timing] {msg}", file=sys.stderr) diff --git a/plugins/Stash2Plex/requirements.txt b/plugins/Stash2Plex/requirements.txt new file mode 100644 index 00000000..bb21c607 --- /dev/null +++ b/plugins/Stash2Plex/requirements.txt @@ -0,0 +1,6 @@ +persist-queue>=1.1.0 +stashapi +plexapi>=4.17.0 +tenacity>=9.0.0 +pydantic>=2.0.0 +diskcache>=5.6.0 diff --git a/plugins/Stash2Plex/sync_queue/__init__.py b/plugins/Stash2Plex/sync_queue/__init__.py new file mode 100644 index 00000000..060211bd --- /dev/null +++ b/plugins/Stash2Plex/sync_queue/__init__.py @@ -0,0 +1,32 @@ +""" +Persistent Queue Module + +Provides durable job queue infrastructure for Stash2Plex using SQLite-backed +persistence. Jobs survive process restarts, crashes, and Plex outages. +""" + +from sync_queue.manager import QueueManager +from sync_queue.models import SyncJob +from sync_queue.dlq import DeadLetterQueue + +# Operations will be imported after operations.py is created +try: + from sync_queue.operations import enqueue, get_pending, ack_job, nack_job, fail_job, get_stats + __all__ = [ + 'QueueManager', + 'SyncJob', + 'DeadLetterQueue', + 'enqueue', + 'get_pending', + 'ack_job', + 'nack_job', + 'fail_job', + 'get_stats', + ] +except ImportError: + # operations.py not yet created + __all__ = [ + 'QueueManager', + 'SyncJob', + 'DeadLetterQueue', + ] diff --git a/plugins/Stash2Plex/sync_queue/dlq.py b/plugins/Stash2Plex/sync_queue/dlq.py new file mode 100644 index 00000000..181132aa --- /dev/null +++ b/plugins/Stash2Plex/sync_queue/dlq.py @@ -0,0 +1,203 @@ +""" +Dead Letter Queue Module + +Stores permanently failed jobs for manual review and debugging. When jobs fail +beyond retry limits or encounter permanent errors, they're moved to the DLQ +with full error context preserved. +""" + +import sqlite3 +import pickle +import traceback +import logging +from typing import Optional + +# Use standard logging for DLQ operations +log = logging.getLogger(__name__) + + +class DeadLetterQueue: + """Dead letter queue for permanently failed sync jobs""" + + def __init__(self, data_dir: str): + """ + Initialize dead letter queue + + Args: + data_dir: Directory for DLQ database (same as main queue) + """ + import os + self.db_path = os.path.join(data_dir, 'dlq.db') + self._setup_schema() + + def _setup_schema(self): + """Create dead_letters table and indexes if not exists""" + conn = sqlite3.connect(self.db_path) + + # Create table + conn.execute(''' + CREATE TABLE IF NOT EXISTS dead_letters ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id INTEGER, + scene_id INTEGER, + job_data BLOB, + error_type TEXT, + error_message TEXT, + stack_trace TEXT, + retry_count INTEGER, + failed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # Create indexes for efficient querying + conn.execute('CREATE INDEX IF NOT EXISTS idx_failed_at ON dead_letters(failed_at)') + conn.execute('CREATE INDEX IF NOT EXISTS idx_scene_id ON dead_letters(scene_id)') + + conn.commit() + conn.close() + + def _get_connection(self): + """Get SQLite connection (context manager pattern)""" + return sqlite3.connect(self.db_path) + + def add(self, job: dict, error: Exception, retry_count: int): + """ + Add failed job to DLQ + + Args: + job: Full job dict (includes pqid, scene_id, data) + error: Exception that caused failure + retry_count: Number of retry attempts before failure + """ + with self._get_connection() as conn: + conn.execute( + '''INSERT INTO dead_letters + (job_id, scene_id, job_data, error_type, error_message, stack_trace, retry_count) + VALUES (?, ?, ?, ?, ?, ?, ?)''', + ( + job.get('pqid'), + job.get('scene_id'), + pickle.dumps(job), + type(error).__name__, + str(error), + traceback.format_exc(), + retry_count + ) + ) + conn.commit() + + log.warning(f"Job {job.get('pqid')} moved to DLQ after {retry_count} retries: {type(error).__name__}") + + def get_recent(self, limit: int = 10) -> list[dict]: + """ + Get recent failed jobs (summary, no full job_data) + + Args: + limit: Maximum number of entries to return + + Returns: + List of dicts with: id, job_id, scene_id, error_type, error_message, failed_at + """ + with self._get_connection() as conn: + conn.row_factory = sqlite3.Row + cursor = conn.execute( + '''SELECT id, job_id, scene_id, error_type, error_message, failed_at + FROM dead_letters + ORDER BY failed_at DESC + LIMIT ?''', + (limit,) + ) + results = [dict(row) for row in cursor.fetchall()] + + return results + + def get_by_id(self, dlq_id: int) -> Optional[dict]: + """ + Get full job details by DLQ ID (including unpickled job_data) + + Args: + dlq_id: Dead letter queue entry ID + + Returns: + Full job dict, or None if not found + """ + with self._get_connection() as conn: + cursor = conn.execute( + 'SELECT job_data FROM dead_letters WHERE id = ?', + (dlq_id,) + ) + row = cursor.fetchone() + + if row is None: + return None + + # Unpickle job_data + return pickle.loads(row[0]) + + def get_count(self) -> int: + """ + Get total count of DLQ entries + + Returns: + Number of failed jobs in DLQ + """ + with self._get_connection() as conn: + cursor = conn.execute('SELECT COUNT(*) FROM dead_letters') + count = cursor.fetchone()[0] + + return count + + def delete_older_than(self, days: int = 30): + """ + Delete DLQ entries older than specified days + + Args: + days: Retention period (default 30 days) + """ + with self._get_connection() as conn: + cursor = conn.execute( + "DELETE FROM dead_letters WHERE failed_at < datetime('now', '-' || ? || ' days')", + (days,) + ) + count = cursor.rowcount + conn.commit() + + log.info(f"Removed {count} DLQ entries older than {days} days") + + def get_error_summary(self) -> dict[str, int]: + """ + Get count of DLQ entries grouped by error type. + + Returns: + Dict mapping error_type to count, e.g., {"PlexNotFound": 3, "PermanentError": 2} + """ + with self._get_connection() as conn: + cursor = conn.execute( + 'SELECT error_type, COUNT(*) as count FROM dead_letters GROUP BY error_type' + ) + return {row[0]: row[1] for row in cursor.fetchall()} + + +if __name__ == "__main__": + import tempfile + import os + + with tempfile.TemporaryDirectory() as tmpdir: + dlq = DeadLetterQueue(tmpdir) + + # Add a failed job + test_job = {"pqid": 1, "scene_id": 123, "data": {"title": "Test"}} + test_error = ValueError("Plex API error: 404 Not Found") + dlq.add(test_job, test_error, retry_count=5) + + # Query + assert dlq.get_count() == 1 + recent = dlq.get_recent(limit=5) + assert len(recent) == 1 + assert recent[0]["scene_id"] == 123 + + # Get full job + full = dlq.get_by_id(recent[0]["id"]) + assert full["scene_id"] == 123 + + print("DLQ integration test PASSED") diff --git a/plugins/Stash2Plex/sync_queue/manager.py b/plugins/Stash2Plex/sync_queue/manager.py new file mode 100644 index 00000000..053a3101 --- /dev/null +++ b/plugins/Stash2Plex/sync_queue/manager.py @@ -0,0 +1,92 @@ +""" +Queue manager for persistent job storage. + +Handles queue initialization, lifecycle management, and shutdown. +""" + +import os +from typing import Optional +try: + import persistqueue +except ImportError: + persistqueue = None # Will fail at runtime with clear error + + +class QueueManager: + """ + Manages SQLite-backed persistent queue lifecycle. + + Stores queue in Stash plugin data directory. Queue survives + process restarts and crashes with auto_resume=True. + """ + + def __init__(self, data_dir: Optional[str] = None): + """ + Initialize queue manager. + + Args: + data_dir: Directory for queue storage. Defaults to + $STASH_PLUGIN_DATA or ~/.stash/plugins/Stash2Plex/data + """ + if persistqueue is None: + raise ImportError( + "persist-queue not installed. " + "Run: pip install persist-queue>=1.1.0" + ) + + # Determine data directory + if data_dir is None: + # Try environment variable first + stash_data = os.getenv('STASH_PLUGIN_DATA') + if stash_data: + data_dir = stash_data + else: + # Fallback to default location + home = os.path.expanduser('~') + data_dir = os.path.join(home, '.stash', 'plugins', 'Stash2Plex', 'data') + + self.data_dir = data_dir + self.queue_path = os.path.join(data_dir, 'queue') + + # Create directory if needed + os.makedirs(self.queue_path, exist_ok=True) + + # Initialize queue + self._queue = self._init_queue() + + print(f"Queue initialized at {self.queue_path}") + + def _init_queue(self) -> 'persistqueue.SQLiteAckQueue': + """ + Create SQLiteAckQueue with production settings. + + Returns: + Configured SQLiteAckQueue instance + """ + return persistqueue.SQLiteAckQueue( + path=self.queue_path, + auto_commit=True, # Required for AckQueue - immediate persistence + multithreading=True, # Thread-safe operations + auto_resume=True # Recover unack jobs on crash + ) + + def get_queue(self) -> 'persistqueue.SQLiteAckQueue': + """ + Get the queue instance. + + Returns: + SQLiteAckQueue for job operations + """ + return self._queue + + def shutdown(self): + """ + Clean shutdown of queue manager. + + Logs final statistics and closes connections. + """ + # Queue doesn't require explicit close, but log stats + print("Queue manager shutting down") + + # Note: persist-queue handles cleanup automatically + # No explicit close() needed for SQLiteAckQueue diff --git a/plugins/Stash2Plex/sync_queue/models.py b/plugins/Stash2Plex/sync_queue/models.py new file mode 100644 index 00000000..db3af1bc --- /dev/null +++ b/plugins/Stash2Plex/sync_queue/models.py @@ -0,0 +1,43 @@ +""" +Job data models for sync queue. + +Uses dict-compatible structures for safe serialization with persist-queue. +""" + +import time +from typing import TypedDict, Any + + +class SyncJob(TypedDict): + """ + Sync job data structure. + + Dict-compatible type for safe pickling by persist-queue. + All jobs are JSON-serializable to avoid pickle protocol issues. + """ + scene_id: int + update_type: str + data: dict[str, Any] + enqueued_at: float + job_key: str + + +def create_sync_job(scene_id: int, update_type: str, data: dict[str, Any]) -> SyncJob: + """ + Create a new sync job dict. + + Args: + scene_id: Stash scene ID + update_type: Type of update (e.g., "metadata", "image") + data: Metadata to sync to Plex + + Returns: + SyncJob dict ready for queue + """ + return SyncJob( + scene_id=scene_id, + update_type=update_type, + data=data, + enqueued_at=time.time(), + job_key=f"scene_{scene_id}" + ) diff --git a/plugins/Stash2Plex/sync_queue/operations.py b/plugins/Stash2Plex/sync_queue/operations.py new file mode 100644 index 00000000..b4b5a4b8 --- /dev/null +++ b/plugins/Stash2Plex/sync_queue/operations.py @@ -0,0 +1,244 @@ +""" +Queue operations for job lifecycle management. + +Stateless operations that work on queue instance passed in. +""" + +import json +import os +import sqlite3 +import time +from typing import Optional + +try: + import persistqueue +except ImportError: + persistqueue = None + + +def enqueue(queue: 'persistqueue.SQLiteAckQueue', scene_id: int, update_type: str, data: dict) -> dict: + """ + Enqueue a sync job. + + Args: + queue: SQLiteAckQueue instance + scene_id: Stash scene ID + update_type: Type of update (e.g., "metadata", "image") + data: Metadata to sync to Plex + + Returns: + The enqueued job dict + + Example: + >>> from persistqueue import SQLiteAckQueue + >>> queue = SQLiteAckQueue('/tmp/queue') + >>> job = enqueue(queue, scene_id=123, update_type='metadata', + ... data={'title': 'Example Scene'}) + >>> print(job['scene_id']) + 123 + """ + job = { + 'scene_id': scene_id, + 'update_type': update_type, + 'data': data, + 'enqueued_at': time.time(), + 'job_key': f"scene_{scene_id}" + } + + queue.put(job) + print(f"Enqueued sync job for scene {scene_id}") + + return job + + +def get_pending(queue: 'persistqueue.SQLiteAckQueue', timeout: float = 0) -> Optional[dict]: + """ + Get next pending job from queue. + + Args: + queue: SQLiteAckQueue instance + timeout: Seconds to wait for job (0 = non-blocking) + + Returns: + Job dict with 'pqid' field added by persist-queue, or None if timeout + """ + job = queue.get(timeout=timeout) + return job + + +def ack_job(queue: 'persistqueue.SQLiteAckQueue', job: dict): + """ + Acknowledge successful job completion. + + Args: + queue: SQLiteAckQueue instance + job: Job dict (must have 'pqid' field from get_pending) + """ + queue.ack(job) + pqid = job.get('pqid', '?') + print(f"Job {pqid} completed") + + +def nack_job(queue: 'persistqueue.SQLiteAckQueue', job: dict): + """ + Return job to queue for retry. + + Args: + queue: SQLiteAckQueue instance + job: Job dict (must have 'pqid' field from get_pending) + """ + queue.nack(job) + pqid = job.get('pqid', '?') + print(f"Job {pqid} returned to queue for retry") + + +def fail_job(queue: 'persistqueue.SQLiteAckQueue', job: dict): + """ + Mark job as permanently failed. + + Args: + queue: SQLiteAckQueue instance + job: Job dict (must have 'pqid' field from get_pending) + """ + queue.ack_failed(job) + pqid = job.get('pqid', '?') + print(f"Job {pqid} marked as failed") + + +def get_stats(queue_path: str) -> dict: + """ + Get queue statistics by status. + + Queries SQLite database directly for status counts. + + Args: + queue_path: Path to queue directory (contains data.db) + + Returns: + Dict with status counts: { + 'pending': int, + 'in_progress': int, + 'completed': int, + 'failed': int + } + + Status codes from persist-queue AckStatus enum: + 0 = inited + 1 = ready (pending) + 2 = unack (in_progress) + 5 = acked (completed) + 9 = ack_failed (failed) + """ + db_path = os.path.join(queue_path, 'data.db') + + # Return zeros if database doesn't exist yet + if not os.path.exists(db_path): + return { + 'pending': 0, + 'in_progress': 0, + 'completed': 0, + 'failed': 0 + } + + conn = sqlite3.connect(db_path) + try: + # Find the ack_queue table (persist-queue uses ack_queue_default by default) + cursor = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'ack_queue%'" + ) + table = cursor.fetchone() + if not table: + # Table doesn't exist yet (no jobs enqueued) + return { + 'pending': 0, + 'in_progress': 0, + 'completed': 0, + 'failed': 0 + } + + table_name = table[0] + + cursor = conn.execute(f''' + SELECT status, COUNT(*) as count + FROM {table_name} + GROUP BY status + ''') + + stats = { + 'pending': 0, + 'in_progress': 0, + 'completed': 0, + 'failed': 0 + } + + for row in cursor: + status_code = row[0] + count = row[1] + + # Map status codes to categories + # 0 and 1 are both "pending" (ready to process) + if status_code in (0, 1): + stats['pending'] += count + elif status_code == 2: + stats['in_progress'] += count + elif status_code == 5: + stats['completed'] += count + elif status_code == 9: + stats['failed'] += count + + return stats + + finally: + conn.close() + + +def _get_sync_timestamps_path(data_dir: str) -> str: + """Get path to sync timestamps JSON file.""" + return os.path.join(data_dir, 'sync_timestamps.json') + + +def load_sync_timestamps(data_dir: str) -> dict[int, float]: + """ + Load sync timestamps from JSON file. + + Args: + data_dir: Queue data directory (same as queue_path) + + Returns: + Dict mapping scene_id -> last_synced_at timestamp + """ + path = _get_sync_timestamps_path(data_dir) + if not os.path.exists(path): + return {} + + try: + with open(path, 'r') as f: + data = json.load(f) + # JSON keys are strings, convert back to int + return {int(k): v for k, v in data.items()} + except (json.JSONDecodeError, IOError): + return {} + + +def save_sync_timestamp(data_dir: str, scene_id: int, timestamp: float) -> None: + """ + Save sync timestamp for a scene. + + Args: + data_dir: Queue data directory + scene_id: Scene ID that was synced + timestamp: time.time() when sync completed + """ + path = _get_sync_timestamps_path(data_dir) + + # Load existing timestamps + timestamps = load_sync_timestamps(data_dir) + + # Update with new timestamp + timestamps[scene_id] = timestamp + + # Write back atomically (write to temp, rename) + temp_path = path + '.tmp' + with open(temp_path, 'w') as f: + json.dump(timestamps, f) + os.replace(temp_path, path) diff --git a/plugins/Stash2Plex/validation/__init__.py b/plugins/Stash2Plex/validation/__init__.py new file mode 100644 index 00000000..8842024d --- /dev/null +++ b/plugins/Stash2Plex/validation/__init__.py @@ -0,0 +1,21 @@ +""" +Validation module for Stash2Plex. + +Provides metadata validation, text sanitization, error classification, +and plugin configuration validation. +""" + +from validation.sanitizers import sanitize_for_plex +from validation.errors import classify_exception, classify_http_error +from validation.metadata import SyncMetadata, validate_metadata +from validation.config import Stash2PlexConfig, validate_config + +__all__ = [ + 'sanitize_for_plex', + 'classify_exception', + 'classify_http_error', + 'SyncMetadata', + 'validate_metadata', + 'Stash2PlexConfig', + 'validate_config', +] diff --git a/plugins/Stash2Plex/validation/config.py b/plugins/Stash2Plex/validation/config.py new file mode 100644 index 00000000..569a8ce2 --- /dev/null +++ b/plugins/Stash2Plex/validation/config.py @@ -0,0 +1,221 @@ +""" +Configuration validation for Stash2Plex. + +Provides pydantic v2 models for validating plugin configuration +with fail-fast behavior and sensible defaults. +""" + +from pydantic import BaseModel, Field, field_validator, ValidationError +from typing import Optional +import logging + +log = logging.getLogger('Stash2Plex.config') + + +class Stash2PlexConfig(BaseModel): + """ + Stash2Plex plugin configuration with validation. + + Required: + plex_url: Plex server URL (e.g., http://192.168.1.100:32400) + plex_token: Plex authentication token + + Optional tunables: + enabled: Master on/off switch (default: True) + max_retries: Retry attempts before DLQ (default: 5, range: 1-20) + poll_interval: Worker poll interval in seconds (default: 1.0, range: 0.1-60.0) + strict_mode: If True, reject invalid metadata; if False, sanitize and continue (default: False) + plex_connect_timeout: Connection timeout in seconds (default: 5.0, range: 1.0-30.0) + plex_read_timeout: Read timeout in seconds (default: 30.0, range: 5.0-120.0) + dlq_retention_days: Days to retain failed jobs in DLQ (default: 30, range: 1-365) + """ + + # Required fields + plex_url: str + plex_token: str + + # Optional tunables with defaults + enabled: bool = True + max_retries: int = Field(default=5, ge=1, le=20) + poll_interval: float = Field(default=1.0, ge=0.1, le=60.0) + strict_mode: bool = False + + # Plex connection timeouts (in seconds) + plex_connect_timeout: float = Field(default=5.0, ge=1.0, le=30.0) + plex_read_timeout: float = Field(default=30.0, ge=5.0, le=120.0) + + # DLQ settings + dlq_retention_days: int = Field(default=30, ge=1, le=365) + + # Plex library to sync (required - e.g., "Adult", "Movies") + plex_library: Optional[str] = Field( + default=None, + description="Name of Plex library section to sync. If not set, searches all libraries (slow)." + ) + + # Late update detection flags + strict_matching: bool = Field( + default=True, + description="Skip sync on low-confidence matches (safer). False = sync anyway with warning logged." + ) + preserve_plex_edits: bool = Field( + default=False, + description="Preserve manual Plex edits. True = only update empty fields, False = Stash always wins." + ) + + # ========================================================================= + # Field Sync Toggles (all default True = enabled) + # ========================================================================= + + sync_master: bool = Field( + default=True, + description="Master toggle to enable/disable all metadata field syncing" + ) + sync_studio: bool = Field( + default=True, + description="Sync studio name from Stash to Plex" + ) + sync_summary: bool = Field( + default=True, + description="Sync summary/details from Stash to Plex" + ) + sync_tagline: bool = Field( + default=True, + description="Sync tagline from Stash to Plex" + ) + sync_date: bool = Field( + default=True, + description="Sync release date from Stash to Plex" + ) + sync_performers: bool = Field( + default=True, + description="Sync performers as Plex actors" + ) + sync_tags: bool = Field( + default=True, + description="Sync tags as Plex genres" + ) + sync_poster: bool = Field( + default=True, + description="Sync poster image from Stash to Plex" + ) + sync_background: bool = Field( + default=True, + description="Sync background/fanart image from Stash to Plex" + ) + sync_collection: bool = Field( + default=True, + description="Add items to Plex collection based on studio name" + ) + + # Stash connection (for fetching images) + stash_url: Optional[str] = Field( + default=None, + description="Stash server URL (extracted from connection)" + ) + stash_api_key: Optional[str] = Field( + default=None, + description="Stash API key for authenticated image fetching" + ) + stash_session_cookie: Optional[str] = Field( + default=None, + description="Stash session cookie for authenticated image fetching" + ) + + @field_validator('plex_url', mode='after') + @classmethod + def validate_plex_url(cls, v: str) -> str: + """Validate plex_url is a valid HTTP/HTTPS URL.""" + if not v: + raise ValueError('plex_url is required') + if not v.startswith(('http://', 'https://')): + raise ValueError('plex_url must start with http:// or https://') + return v.rstrip('/') # Normalize: remove trailing slash + + @field_validator('plex_token', mode='after') + @classmethod + def validate_plex_token(cls, v: str) -> str: + """Validate plex_token is present and reasonable length.""" + if not v: + raise ValueError('plex_token is required') + if len(v) < 10: + raise ValueError('plex_token appears invalid (too short)') + return v + + @field_validator( + 'strict_matching', 'preserve_plex_edits', + 'sync_master', 'sync_studio', 'sync_summary', 'sync_tagline', + 'sync_date', 'sync_performers', 'sync_tags', 'sync_poster', + 'sync_background', 'sync_collection', + mode='before' + ) + @classmethod + def validate_booleans(cls, v): + """Ensure boolean fields are actual booleans, not truthy strings.""" + if isinstance(v, bool): + return v + if isinstance(v, str): + lower = v.lower() + if lower in ('true', '1', 'yes'): + return True + if lower in ('false', '0', 'no'): + return False + raise ValueError(f"Invalid boolean value: {v}") + raise ValueError(f"Expected boolean, got {type(v).__name__}") + + def log_config(self) -> None: + """Log configuration with masked token for security.""" + if len(self.plex_token) > 8: + masked = self.plex_token[:4] + '****' + self.plex_token[-4:] + else: + masked = '****' + log.info( + f"Stash2Plex config: url={self.plex_url}, token={masked}, " + f"max_retries={self.max_retries}, enabled={self.enabled}, " + f"strict_mode={self.strict_mode}, " + f"connect_timeout={self.plex_connect_timeout}s, " + f"read_timeout={self.plex_read_timeout}s, " + f"dlq_retention_days={self.dlq_retention_days}, " + f"strict_matching={self.strict_matching}, " + f"preserve_plex_edits={self.preserve_plex_edits}" + ) + # Log field sync toggle summary + toggles_off = [k.replace('sync_', '') for k in [ + 'sync_studio', 'sync_summary', 'sync_tagline', 'sync_date', + 'sync_performers', 'sync_tags', 'sync_poster', 'sync_background', 'sync_collection' + ] if not getattr(self, k, True)] + if not self.sync_master: + log.info("Field sync: MASTER OFF (all fields disabled)") + elif toggles_off: + log.info(f"Field sync: enabled except {toggles_off}") + else: + log.info("Field sync: all fields enabled") + + +def validate_config(config_dict: dict) -> tuple[Optional[Stash2PlexConfig], Optional[str]]: + """ + Validate configuration dictionary and return Stash2PlexConfig or error message. + + Args: + config_dict: Dictionary containing configuration values + + Returns: + Tuple of (Stash2PlexConfig, None) on success, + or (None, error_message) on validation failure + """ + try: + config = Stash2PlexConfig(**config_dict) + return (config, None) + except ValidationError as e: + # Extract user-friendly error messages + errors = [] + for error in e.errors(): + field = '.'.join(str(loc) for loc in error['loc']) + msg = error['msg'] + errors.append(f"{field}: {msg}") + error_message = '; '.join(errors) + return (None, error_message) + + +# Re-export ValidationError for external use +__all__ = ['Stash2PlexConfig', 'validate_config', 'ValidationError'] diff --git a/plugins/Stash2Plex/validation/errors.py b/plugins/Stash2Plex/validation/errors.py new file mode 100644 index 00000000..8cba3a97 --- /dev/null +++ b/plugins/Stash2Plex/validation/errors.py @@ -0,0 +1,201 @@ +""" +Centralized error classification for retry/DLQ routing. + +Provides consistent classification of errors to determine whether they should +be retried (transient) or moved to DLQ (permanent). This ensures all components +use the same logic for error handling decisions. + +Also provides partial sync result tracking for granular error handling where +non-critical field failures (performers, tags, poster) don't fail the entire job. +""" + +import logging +from dataclasses import dataclass, field +from typing import List, Type + +from worker.processor import TransientError, PermanentError + + +@dataclass +class FieldUpdateWarning: + """ + Warning for a non-critical field update that failed. + + These are logged but don't fail the entire sync job. + Examples: performer sync failed, tag sync failed, poster upload failed. + + Attributes: + field_name: The field that failed to update (e.g., "performers", "tags", "poster") + error_message: The exception message describing what went wrong + error_type: The exception class name (e.g., "PlexTemporaryError") + """ + field_name: str + error_message: str + error_type: str + + def __str__(self) -> str: + return f"{self.field_name}: {self.error_message}" + + +@dataclass +class PartialSyncResult: + """ + Result of a metadata sync that may have partial failures. + + Tracks which fields succeeded, which had warnings, and provides + a summary for logging. Used by _update_metadata() to return + granular status instead of all-or-nothing success/failure. + + Attributes: + success: Overall success status (True if critical fields OK) + warnings: List of non-critical field failures + fields_updated: List of fields that were successfully updated + """ + success: bool = True + warnings: List[FieldUpdateWarning] = field(default_factory=list) + fields_updated: List[str] = field(default_factory=list) + + def add_warning(self, field_name: str, error: Exception) -> None: + """ + Add a warning for a non-critical field failure. + + Args: + field_name: Name of the field that failed + error: The exception that caused the failure + """ + self.warnings.append(FieldUpdateWarning( + field_name=field_name, + error_message=str(error), + error_type=type(error).__name__ + )) + + def add_success(self, field_name: str) -> None: + """ + Record a successful field update. + + Args: + field_name: Name of the field that was updated successfully + """ + self.fields_updated.append(field_name) + + @property + def has_warnings(self) -> bool: + """Return True if there are any warnings.""" + return len(self.warnings) > 0 + + @property + def warning_summary(self) -> str: + """ + Human-readable summary of warnings. + + Returns: + Empty string if no warnings, otherwise formatted summary + like "2 warnings: performers: connection error; tags: timeout" + """ + if not self.warnings: + return "" + return f"{len(self.warnings)} warnings: " + "; ".join(str(w) for w in self.warnings) + + +# HTTP status codes that indicate transient (retry-able) errors +# 429: Rate limited - retry after backoff +# 5xx: Server errors - usually temporary +TRANSIENT_CODES = frozenset({429, 500, 502, 503, 504}) + +# HTTP status codes that indicate permanent (non-retry-able) errors +# 400: Bad request - data issue +# 401: Unauthorized - auth config issue +# 403: Forbidden - permission issue +# 404: Not found - item doesn't exist +# 405: Method not allowed - API misuse +# 410: Gone - item permanently removed +# 422: Unprocessable entity - validation failure +PERMANENT_CODES = frozenset({400, 401, 403, 404, 405, 410, 422}) + +# Module logger +logger = logging.getLogger(__name__) + + +def classify_http_error(status_code: int) -> Type[Exception]: + """ + Classify an HTTP status code as transient or permanent error. + + Args: + status_code: HTTP response status code + + Returns: + TransientError class for retry-able errors + PermanentError class for non-retry-able errors + """ + if status_code in TRANSIENT_CODES: + logger.debug(f"HTTP {status_code} classified as transient") + return TransientError + + if status_code in PERMANENT_CODES: + logger.debug(f"HTTP {status_code} classified as permanent") + return PermanentError + + # Default classification by status code range + if 400 <= status_code < 500: + # Unknown 4xx = permanent (client error, unlikely to change) + logger.debug(f"HTTP {status_code} (unknown 4xx) classified as permanent") + return PermanentError + + if status_code >= 500: + # Unknown 5xx = transient (server error, may recover) + logger.debug(f"HTTP {status_code} (unknown 5xx) classified as transient") + return TransientError + + # Unknown codes (1xx, 2xx, 3xx shouldn't reach here) = transient (safer) + logger.debug(f"HTTP {status_code} (unexpected) classified as transient") + return TransientError + + +def classify_exception(exc: Exception) -> Type[Exception]: + """ + Classify an exception as transient or permanent error. + + Handles various exception types: + - HTTP responses (from requests/httpx): Extract status code + - Network errors: Transient (ConnectionError, TimeoutError, OSError) + - Validation errors: Permanent (ValueError, TypeError, KeyError, AttributeError) + - Already classified: Return same type + - Unknown: Transient (safer, allows retry) + + Args: + exc: The exception to classify + + Returns: + TransientError class for retry-able errors + PermanentError class for non-retry-able errors + """ + # Check if already classified + if isinstance(exc, TransientError): + logger.debug(f"Exception already TransientError: {exc}") + return TransientError + + if isinstance(exc, PermanentError): + logger.debug(f"Exception already PermanentError: {exc}") + return PermanentError + + # Check for HTTP response (requests, httpx, urllib, etc.) + if hasattr(exc, 'response') and exc.response is not None: + response = exc.response + status_code = getattr(response, 'status_code', None) + if status_code is not None: + logger.debug(f"Exception has HTTP response with status {status_code}") + return classify_http_error(status_code) + + # Network errors are transient (connectivity, timeout) + if isinstance(exc, (ConnectionError, TimeoutError, OSError)): + logger.debug(f"Network error classified as transient: {type(exc).__name__}") + return TransientError + + # Validation/data errors are permanent (won't fix with retry) + if isinstance(exc, (ValueError, TypeError, KeyError, AttributeError)): + logger.debug(f"Validation error classified as permanent: {type(exc).__name__}") + return PermanentError + + # Unknown errors default to transient (safer, allows retry) + logger.debug(f"Unknown exception classified as transient: {type(exc).__name__}") + return TransientError diff --git a/plugins/Stash2Plex/validation/limits.py b/plugins/Stash2Plex/validation/limits.py new file mode 100644 index 00000000..59f2a81f --- /dev/null +++ b/plugins/Stash2Plex/validation/limits.py @@ -0,0 +1,33 @@ +""" +Plex API field length limits. + +Conservative defaults based on practical testing and common usage patterns. +Plex does not officially document these limits, so we use safe values that +work reliably without causing UI issues or API errors. +""" + +# Text field limits (characters) +MAX_TITLE_LENGTH = 255 # Scene/movie title +MAX_STUDIO_LENGTH = 255 # Studio name +MAX_SUMMARY_LENGTH = 10000 # Description/details +MAX_TAGLINE_LENGTH = 255 # Short tagline +MAX_PERFORMER_NAME_LENGTH = 255 # Individual performer name +MAX_TAG_NAME_LENGTH = 255 # Individual tag name + +# List field limits (count) +MAX_PERFORMERS = 50 # Maximum performers per item +MAX_TAGS = 50 # Maximum tags per item +MAX_COLLECTIONS = 20 # Maximum collections per item + +# Convenience dict for programmatic access +PLEX_LIMITS = { + 'title': MAX_TITLE_LENGTH, + 'studio': MAX_STUDIO_LENGTH, + 'summary': MAX_SUMMARY_LENGTH, + 'tagline': MAX_TAGLINE_LENGTH, + 'performer_name': MAX_PERFORMER_NAME_LENGTH, + 'tag_name': MAX_TAG_NAME_LENGTH, + 'performers_count': MAX_PERFORMERS, + 'tags_count': MAX_TAGS, + 'collections_count': MAX_COLLECTIONS, +} diff --git a/plugins/Stash2Plex/validation/metadata.py b/plugins/Stash2Plex/validation/metadata.py new file mode 100644 index 00000000..c9c6ac71 --- /dev/null +++ b/plugins/Stash2Plex/validation/metadata.py @@ -0,0 +1,151 @@ +""" +Pydantic metadata validation model for Stash2Plex. + +Provides SyncMetadata model with field validation and sanitization +to ensure clean data before enqueueing for Plex sync. +""" + +from pydantic import BaseModel, Field, field_validator, ValidationError +from typing import Optional, Any +import logging + +from validation.sanitizers import sanitize_for_plex + + +log = logging.getLogger('Stash2Plex.validation') + + +class SyncMetadata(BaseModel): + """ + Validated metadata structure for Plex sync jobs. + + Required fields: + scene_id: Positive integer identifying the scene in Stash + title: Non-empty string (1-255 chars) for the scene title + + Optional fields: + details: Description/summary (max 10000 chars) + date: Release date string + rating100: Rating on 0-100 scale + studio: Studio name (max 255 chars) + performers: List of performer names + tags: List of tag names + + All string fields are automatically sanitized via field_validator + to remove control characters and normalize text. + + Example: + >>> from validation.metadata import SyncMetadata + >>> meta = SyncMetadata( + ... scene_id=123, + ... title="Example Scene", + ... studio="Example Studio", + ... performers=["Actor One", "Actor Two"] + ... ) + >>> print(meta.title) + Example Scene + """ + + # Required fields + scene_id: int = Field(..., gt=0, description="Stash scene ID (positive integer)") + title: str = Field(..., min_length=1, max_length=255, description="Scene title") + + # Optional fields + details: Optional[str] = Field(default=None, max_length=10000, description="Scene description") + date: Optional[str] = Field(default=None, description="Release date") + rating100: Optional[int] = Field(default=None, ge=0, le=100, description="Rating 0-100") + studio: Optional[str] = Field(default=None, max_length=255, description="Studio name") + performers: Optional[list[str]] = Field(default=None, description="Performer names") + tags: Optional[list[str]] = Field(default=None, description="Tag names") + + @field_validator('title', mode='before') + @classmethod + def sanitize_title(cls, v: Any) -> str: + """Sanitize title field, ensuring non-empty string.""" + if v is None: + raise ValueError("title is required") + if not isinstance(v, str): + v = str(v) + original = v + sanitized = sanitize_for_plex(v, max_length=255) + if sanitized != original: + log.debug(f"Sanitized title: '{original[:50]}...' -> '{sanitized[:50]}...'") + if not sanitized: + raise ValueError("title cannot be empty after sanitization") + return sanitized + + @field_validator('details', mode='before') + @classmethod + def sanitize_details(cls, v: Any) -> Optional[str]: + """Sanitize details field.""" + if v is None: + return None + if not isinstance(v, str): + v = str(v) + original = v + sanitized = sanitize_for_plex(v, max_length=10000) + if sanitized != original: + log.debug(f"Sanitized details: '{original[:50]}...' -> '{sanitized[:50]}...'") + return sanitized if sanitized else None + + @field_validator('studio', mode='before') + @classmethod + def sanitize_studio(cls, v: Any) -> Optional[str]: + """Sanitize studio field.""" + if v is None: + return None + if not isinstance(v, str): + v = str(v) + original = v + sanitized = sanitize_for_plex(v, max_length=255) + if sanitized != original: + log.debug(f"Sanitized studio: '{original[:50]}...' -> '{sanitized[:50]}...'") + return sanitized if sanitized else None + + @field_validator('performers', 'tags', mode='before') + @classmethod + def sanitize_string_list(cls, v: Any) -> Optional[list[str]]: + """Sanitize list of string fields.""" + if v is None: + return None + if not isinstance(v, list): + return None + sanitized = [ + sanitize_for_plex(str(item), max_length=255) + for item in v + if item + ] + # Filter out empty strings after sanitization + return [s for s in sanitized if s] or None + + +def validate_metadata(data: dict) -> tuple[Optional[SyncMetadata], Optional[str]]: + """ + Validate metadata dictionary and return result. + + This helper allows callers to handle validation errors gracefully + without needing to catch exceptions. + + Args: + data: Dictionary containing metadata fields + + Returns: + Tuple of (SyncMetadata, None) on success + Tuple of (None, error_message) on validation failure + """ + try: + model = SyncMetadata(**data) + return (model, None) + except ValidationError as e: + # Extract readable error message + errors = e.errors() + if errors: + first_error = errors[0] + field = '.'.join(str(loc) for loc in first_error.get('loc', [])) + msg = first_error.get('msg', 'validation error') + return (None, f"{field}: {msg}") + return (None, str(e)) + + +# Re-export ValidationError for caller convenience +__all__ = ['SyncMetadata', 'validate_metadata', 'ValidationError'] diff --git a/plugins/Stash2Plex/validation/sanitizers.py b/plugins/Stash2Plex/validation/sanitizers.py new file mode 100644 index 00000000..7c32974d --- /dev/null +++ b/plugins/Stash2Plex/validation/sanitizers.py @@ -0,0 +1,131 @@ +""" +Text sanitization utilities for Plex API compatibility. + +Provides functions to clean and normalize text fields before sending to Plex, +ensuring control characters, smart quotes, and excessive whitespace don't cause +API errors or display issues. +""" + +import unicodedata +from typing import Optional +import logging + +from validation.limits import MAX_TITLE_LENGTH + + +# Mapping for smart quote conversion to ASCII equivalents +# Unicode left/right double quotes -> straight double quote +# Unicode left/right single quotes -> straight apostrophe +# En/em dashes -> hyphen-minus +# Ellipsis -> three dots +QUOTE_MAP = str.maketrans({ + '\u201c': '"', # Left double quotation mark + '\u201d': '"', # Right double quotation mark + '\u2018': "'", # Left single quotation mark + '\u2019': "'", # Right single quotation mark + '\u2013': '-', # En dash + '\u2014': '-', # Em dash + '\u2026': '...', # Horizontal ellipsis +}) + + +def strip_emojis(text: str) -> str: + """ + Remove emoji characters from text. + + Removes Unicode characters in the 'So' (Symbol, Other) category, + which includes most emoji characters. This can help prevent potential + display or encoding issues in Plex. + + Args: + text: The text to process + + Returns: + Text with emoji characters removed + + Example: + >>> strip_emojis("Hello World") + 'Hello World' + >>> strip_emojis("Test") + 'Test' + """ + if not text: + return '' + + # Remove characters in the 'So' (Symbol, Other) category + # This category contains most emoji characters + return ''.join( + char for char in text + if unicodedata.category(char) != 'So' + ) + + +def sanitize_for_plex( + text: str, + max_length: int = MAX_TITLE_LENGTH, + logger: Optional[logging.Logger] = None, + strip_emoji: bool = False +) -> str: + """ + Sanitize text for safe use with Plex API. + + Performs the following transformations: + 1. Returns empty string if text is None or empty + 2. Normalizes Unicode to NFC form + 3. Removes control characters (Cc) and format characters (Cf) + 4. Optionally removes emoji characters (if strip_emoji=True) + 5. Converts smart quotes and dashes to ASCII equivalents + 6. Collapses multiple whitespace to single spaces + 7. Truncates at max_length, preferring word boundaries + + Args: + text: The text to sanitize + max_length: Maximum length (0 = no limit). Default 255. + logger: Optional logger for debug output + strip_emoji: If True, remove emoji characters. Default False. + + Returns: + Sanitized text string + """ + # Handle None or empty + if not text: + return '' + + original = text + + # Normalize Unicode to NFC (composed form) + text = unicodedata.normalize('NFC', text) + + # Remove control characters (Cc) and format characters (Cf) + # These include null bytes, escape sequences, zero-width chars, etc. + text = ''.join( + char for char in text + if unicodedata.category(char) not in ('Cc', 'Cf') + ) + + # Optionally remove emoji characters (Symbol, Other category) + if strip_emoji: + text = strip_emojis(text) + + # Convert smart quotes and dashes to ASCII equivalents + text = text.translate(QUOTE_MAP) + + # Collapse whitespace (split on any whitespace, rejoin with single space) + text = ' '.join(text.split()) + + # Truncate if needed, preferring word boundary + if max_length > 0 and len(text) > max_length: + # Find last space within limit + truncated = text[:max_length] + last_space = truncated.rfind(' ') + + # Only use word boundary if it's reasonably close to max_length (>80%) + if last_space > max_length * 0.8: + text = truncated[:last_space] + else: + text = truncated + + if logger and text != original: + logger.debug(f"Sanitized text: {len(original)} -> {len(text)} chars") + + return text diff --git a/plugins/Stash2Plex/worker/__init__.py b/plugins/Stash2Plex/worker/__init__.py new file mode 100644 index 00000000..73751eaa --- /dev/null +++ b/plugins/Stash2Plex/worker/__init__.py @@ -0,0 +1,24 @@ +""" +Worker module for background job processing. + +Exports: + SyncWorker: Background worker with retry orchestration + CircuitBreaker: Circuit breaker for Plex outage protection + CircuitState: Circuit breaker state enum + calculate_delay: Exponential backoff delay calculator + get_retry_params: Get retry parameters for error type +""" + +from worker.processor import SyncWorker, TransientError, PermanentError +from worker.circuit_breaker import CircuitBreaker, CircuitState +from worker.backoff import calculate_delay, get_retry_params + +__all__ = [ + 'SyncWorker', + 'TransientError', + 'PermanentError', + 'CircuitBreaker', + 'CircuitState', + 'calculate_delay', + 'get_retry_params', +] diff --git a/plugins/Stash2Plex/worker/backoff.py b/plugins/Stash2Plex/worker/backoff.py new file mode 100644 index 00000000..02651f8b --- /dev/null +++ b/plugins/Stash2Plex/worker/backoff.py @@ -0,0 +1,86 @@ +""" +Exponential backoff with full jitter for retry delay calculation. + +Provides crash-safe delay calculation for retry orchestration. +Full jitter prevents thundering herd when multiple jobs retry +simultaneously after Plex outage. + +Functions: + calculate_delay: Calculate retry delay with full jitter + get_retry_params: Get backoff parameters based on error type +""" + +import random +from typing import Optional, Tuple + + +def calculate_delay( + retry_count: int, + base: float, + cap: float, + jitter_seed: Optional[int] = None +) -> float: + """ + Calculate retry delay using exponential backoff with full jitter. + + Full jitter formula: random.uniform(0, min(cap, base * 2^retry_count)) + + This distributes retries randomly within the delay window, + preventing thundering herd when multiple jobs retry after an outage. + + Args: + retry_count: Number of previous retry attempts (0 for first retry) + base: Base delay in seconds (e.g., 5.0) + cap: Maximum delay cap in seconds (e.g., 80.0) + jitter_seed: Optional seed for deterministic testing + + Returns: + Delay in seconds, in range [0, min(cap, base * 2^retry_count)] + + Example: + >>> from worker.backoff import calculate_delay + >>> delay = calculate_delay(retry_count=3, base=1.0, cap=60.0, + ... jitter_seed=42) + >>> 0 <= delay <= 8.0 # 2^3 = 8, with jitter + True + """ + # Create seeded random generator for deterministic testing + rng = random.Random(jitter_seed) + + # Calculate exponential delay: base * 2^retry_count + exponential_delay = base * (2 ** retry_count) + + # Apply cap + max_delay = min(cap, exponential_delay) + + # Full jitter: random value in [0, max_delay] + return rng.uniform(0, max_delay) + + +def get_retry_params(error: Exception) -> Tuple[float, float, int]: + """ + Get backoff parameters based on error type. + + PlexNotFound errors use longer delays and more retries because + Plex library scanning can take minutes to hours. + + Args: + error: The exception that triggered the retry + + Returns: + Tuple of (base_delay, max_delay, max_retries) + - PlexNotFound: (30.0, 600.0, 12) for ~2 hour window + - Other errors: (5.0, 80.0, 5) for standard backoff + """ + # Import lazily to avoid circular imports + from plex.exceptions import PlexNotFound + + if isinstance(error, PlexNotFound): + # Library scanning: longer delays, more retries + # 30s base -> 60 -> 120 -> 240 -> 480 -> capped at 600 (10 min) + # 12 retries gives ~2 hour total retry window + return (30.0, 600.0, 12) + else: + # Standard transient errors: normal backoff + # 5s base -> 10 -> 20 -> 40 -> 80 (capped) + return (5.0, 80.0, 5) diff --git a/plugins/Stash2Plex/worker/circuit_breaker.py b/plugins/Stash2Plex/worker/circuit_breaker.py new file mode 100644 index 00000000..b99c5c65 --- /dev/null +++ b/plugins/Stash2Plex/worker/circuit_breaker.py @@ -0,0 +1,140 @@ +""" +Circuit breaker pattern for resilient Plex API calls. + +Prevents retry exhaustion during Plex outages by pausing +job processing when consecutive failures occur. + +States: +- CLOSED: Normal operation, count failures +- OPEN: Block all requests until recovery timeout +- HALF_OPEN: Allow one test request to check recovery +""" + +import time +from enum import Enum + + +class CircuitState(Enum): + """Circuit breaker states.""" + CLOSED = "closed" + OPEN = "open" + HALF_OPEN = "half_open" + + +class CircuitBreaker: + """ + Circuit breaker state machine. + + Tracks consecutive failures and blocks execution when + a failure threshold is reached. Allows recovery testing + after a timeout period. + + Args: + failure_threshold: Consecutive failures before opening (default: 5) + recovery_timeout: Seconds before transitioning to HALF_OPEN (default: 60.0) + success_threshold: Successes in HALF_OPEN to close (default: 1) + + Usage: + breaker = CircuitBreaker() + + if breaker.can_execute(): + try: + result = call_plex_api() + breaker.record_success() + except Exception: + breaker.record_failure() + else: + # Circuit is open, skip execution + pass + """ + + def __init__( + self, + failure_threshold: int = 5, + recovery_timeout: float = 60.0, + success_threshold: int = 1 + ): + self._failure_threshold = failure_threshold + self._recovery_timeout = recovery_timeout + self._success_threshold = success_threshold + + # Internal state + self._state = CircuitState.CLOSED + self._failure_count = 0 + self._success_count = 0 + self._opened_at: float | None = None + + @property + def state(self) -> CircuitState: + """Current circuit state (may transition to HALF_OPEN if timeout elapsed).""" + if self._state == CircuitState.OPEN and self._opened_at is not None: + if time.time() - self._opened_at >= self._recovery_timeout: + self._state = CircuitState.HALF_OPEN + self._success_count = 0 + return self._state + + def can_execute(self) -> bool: + """ + Check if execution is allowed. + + Returns: + True if circuit is CLOSED or HALF_OPEN, False if OPEN + """ + current_state = self.state # Property call handles OPEN -> HALF_OPEN transition + return current_state != CircuitState.OPEN + + def record_success(self) -> None: + """ + Record a successful execution. + + In CLOSED state: resets failure count. + In HALF_OPEN state: increments success count, closes if threshold reached. + """ + if self._state == CircuitState.HALF_OPEN: + self._success_count += 1 + if self._success_count >= self._success_threshold: + self._close() + else: + # CLOSED state - just reset failure count + self._failure_count = 0 + + def record_failure(self) -> None: + """ + Record a failed execution. + + In CLOSED state: increments failure count, opens if threshold reached. + In HALF_OPEN state: immediately reopens the circuit. + """ + if self._state == CircuitState.HALF_OPEN: + # HALF_OPEN failure -> reopen immediately + self._open() + else: + # CLOSED state - count failures + self._failure_count += 1 + if self._failure_count >= self._failure_threshold: + self._open() + + def reset(self) -> None: + """ + Force reset to CLOSED state. + + Useful for testing or manual recovery. + """ + self._close() + + def _open(self) -> None: + """Transition to OPEN state.""" + self._state = CircuitState.OPEN + self._opened_at = time.time() + self._failure_count = 0 + self._success_count = 0 + + def _close(self) -> None: + """Transition to CLOSED state.""" + self._state = CircuitState.CLOSED + self._opened_at = None + self._failure_count = 0 + self._success_count = 0 + + +__all__ = ['CircuitBreaker', 'CircuitState'] diff --git a/plugins/Stash2Plex/worker/processor.py b/plugins/Stash2Plex/worker/processor.py new file mode 100644 index 00000000..5aa783f8 --- /dev/null +++ b/plugins/Stash2Plex/worker/processor.py @@ -0,0 +1,974 @@ +""" +Background worker for processing sync jobs. + +Implements reliable job processing with acknowledgment workflow: +- Acknowledges successful jobs +- Retries transient failures with exponential backoff +- Moves permanently failed jobs to DLQ +- Circuit breaker pauses processing during Plex outages +""" + +import os +import sys +import time +import threading +import logging +from typing import Optional, TYPE_CHECKING + +from worker.stats import SyncStats + +# Lazy imports to avoid circular import with validation module +# These are imported inside _update_metadata() where they're used + + +# Stash plugin log levels +def log_trace(msg): print(f"\x01t\x02[Stash2Plex Worker] {msg}", file=sys.stderr) +def log_debug(msg): print(f"\x01d\x02[Stash2Plex Worker] {msg}", file=sys.stderr) +def log_info(msg): print(f"\x01i\x02[Stash2Plex Worker] {msg}", file=sys.stderr) +def log_warn(msg): print(f"\x01w\x02[Stash2Plex Worker] {msg}", file=sys.stderr) +def log_error(msg): print(f"\x01e\x02[Stash2Plex Worker] {msg}", file=sys.stderr) + +# Lazy imports to avoid module-level pollution in tests +# These functions are imported inside methods that use them +# to ensure imports are fresh and not polluted by test mocking + +logger = logging.getLogger('Stash2Plex.worker') + +if TYPE_CHECKING: + from validation.config import Stash2PlexConfig + from plex.client import PlexClient + from plex.cache import PlexCache, MatchCache + + +class TransientError(Exception): + """Retry-able errors (network, timeout, 5xx)""" + pass + + +class PermanentError(Exception): + """Non-retry-able errors (4xx except 429, validation)""" + pass + + +class SyncWorker: + """ + Background worker that processes sync jobs from the queue. + + Runs in a daemon thread and processes jobs with proper acknowledgment: + - Success: ack_job + - Transient failure: exponential backoff retry with metadata in job + - Permanent failure or max retries: fail_job + DLQ + - Circuit breaker: pauses processing after consecutive failures + """ + + def __init__( + self, + queue, + dlq: 'DeadLetterQueue', + config: 'Stash2PlexConfig', + data_dir: Optional[str] = None, + max_retries: int = 5, + ): + """ + Initialize sync worker. + + Args: + queue: SQLiteAckQueue instance + dlq: DeadLetterQueue for permanently failed jobs + config: Stash2PlexConfig with Plex URL, token, and timeouts + data_dir: Plugin data directory for sync timestamp updates + max_retries: Maximum retry attempts before moving to DLQ (default for standard errors) + """ + self.queue = queue + self.dlq = dlq + self.config = config + self.data_dir = data_dir + self.max_retries = max_retries + self.running = False + self.thread: Optional[threading.Thread] = None + self._plex_client: Optional['PlexClient'] = None + + # Initialize caches (lazy, created on first use) + self._library_cache: Optional['PlexCache'] = None + self._match_cache: Optional['MatchCache'] = None + + # Initialize stats tracking + self._stats = SyncStats() + if data_dir is not None: + stats_path = os.path.join(data_dir, 'stats.json') + self._stats = SyncStats.load_from_file(stats_path) + + # DLQ status logging interval + self._jobs_since_dlq_log = 0 + self._dlq_log_interval = 10 # Log DLQ status every 10 jobs + + # Circuit breaker for resilience during Plex outages + from worker.circuit_breaker import CircuitBreaker + self.circuit_breaker = CircuitBreaker( + failure_threshold=5, + recovery_timeout=60.0, + success_threshold=1 + ) + + def start(self): + """Start the background worker thread""" + if self.running: + log_trace("Already running") + return + + # Cleanup old DLQ entries + retention_days = getattr(self.config, 'dlq_retention_days', 30) + self.dlq.delete_older_than(days=retention_days) + + # Log DLQ status on startup + self._log_dlq_status() + + self.running = True + self.thread = threading.Thread(target=self._worker_loop, daemon=True) + self.thread.start() + log_info("Started") + + def _log_dlq_status(self): + """Log DLQ status if jobs present.""" + count = self.dlq.get_count() + if count > 0: + log_warn(f"DLQ contains {count} failed jobs requiring review") + recent = self.dlq.get_recent(limit=5) + for entry in recent: + log_debug( + f"DLQ #{entry['id']}: scene {entry['scene_id']} - " + f"{entry['error_type']}: {entry['error_message'][:80]}" + ) + + def _log_batch_summary(self): + """Log periodic summary of sync operations with JSON stats.""" + import json + + stats = self._stats + + # Human-readable summary line + log_info( + f"Sync summary: {stats.jobs_succeeded}/{stats.jobs_processed} succeeded " + f"({stats.success_rate:.1f}%), avg {stats.avg_processing_time*1000:.0f}ms, " + f"confidence: {stats.high_confidence_matches} high / {stats.low_confidence_matches} low" + ) + + # JSON batch summary for machine parsing + stats_dict = { + "processed": stats.jobs_processed, + "succeeded": stats.jobs_succeeded, + "failed": stats.jobs_failed, + "to_dlq": stats.jobs_to_dlq, + "success_rate": f"{stats.success_rate:.1f}%", + "avg_time_ms": int(stats.avg_processing_time * 1000), + "high_confidence": stats.high_confidence_matches, + "low_confidence": stats.low_confidence_matches, + "errors_by_type": stats.errors_by_type, + } + log_info(f"Stats: {json.dumps(stats_dict)}") + + # DLQ summary if items present (using get_error_summary method) + dlq_summary = self.dlq.get_error_summary() + if dlq_summary: + total = sum(dlq_summary.values()) + breakdown = ", ".join(f"{count} {err_type}" for err_type, count in dlq_summary.items()) + log_warn(f"DLQ contains {total} items: {breakdown}") + + def stop(self): + """Stop the background worker thread""" + if not self.running: + return + + log_trace("Stopping...") + self.running = False + + if self.thread: + self.thread.join(timeout=5) + if self.thread.is_alive(): + log_warn("Thread did not stop cleanly") + + log_trace("Stopped") + + def _prepare_for_retry(self, job: dict, error: Exception) -> dict: + """ + Add retry metadata to job before re-enqueueing. + + Stores retry_count, next_retry_at, and last_error_type in job dict + so retry state survives worker restart (crash-safe). + + Args: + job: Job dict to update + error: The exception that caused the retry + + Returns: + Updated job dict with retry metadata + """ + from worker.backoff import calculate_delay, get_retry_params + + base, cap, max_retries = get_retry_params(error) + retry_count = job.get('retry_count', 0) + 1 + delay = calculate_delay(retry_count - 1, base, cap) # -1 because we just incremented + + job['retry_count'] = retry_count + job['next_retry_at'] = time.time() + delay + job['last_error_type'] = type(error).__name__ + return job + + def _is_ready_for_retry(self, job: dict) -> bool: + """ + Check if job's backoff delay has elapsed. + + Args: + job: Job dict with optional next_retry_at field + + Returns: + True if job is ready for processing (no delay or delay elapsed) + """ + next_retry_at = job.get('next_retry_at', 0) + return time.time() >= next_retry_at + + def _get_max_retries_for_error(self, error: Exception) -> int: + """ + Get max retries based on error type. + + PlexNotFound errors get more retries (12) to allow for library scanning. + Other transient errors use the standard max_retries (5). + + Args: + error: The exception that triggered the retry + + Returns: + Maximum number of retries for this error type + """ + from worker.backoff import get_retry_params + _, _, max_retries = get_retry_params(error) + return max_retries + + def _requeue_with_metadata(self, job: dict): + """ + Re-enqueue job with updated retry metadata. + + persist-queue's nack() doesn't support modifying job data, + so we ack the current job and enqueue a fresh copy with + updated metadata. + + Args: + job: Job dict with retry metadata already added + """ + # Lazy import to avoid module-level import pollution in tests + from sync_queue.operations import ack_job as _ack_job + + # Extract original job fields for re-enqueue + scene_id = job.get('scene_id') + update_type = job.get('update_type') + data = job.get('data', {}) + + # Create new job with all metadata preserved + new_job = { + 'scene_id': scene_id, + 'update_type': update_type, + 'data': data, + 'enqueued_at': job.get('enqueued_at', time.time()), + 'job_key': job.get('job_key', f"scene_{scene_id}"), + # Preserve retry metadata + 'retry_count': job.get('retry_count', 0), + 'next_retry_at': job.get('next_retry_at', 0), + 'last_error_type': job.get('last_error_type'), + } + + # Ack the old job (removes from queue) + _ack_job(self.queue, job) + # Enqueue fresh copy with metadata + self.queue.put(new_job) + + def _worker_loop(self): + """Main worker loop - runs in background thread""" + from worker.circuit_breaker import CircuitState + # Lazy imports to avoid module-level pollution in tests + from sync_queue.operations import get_pending, ack_job, nack_job, fail_job + + while self.running: + try: + # Check circuit breaker first - pause if Plex is down + if not self.circuit_breaker.can_execute(): + log_debug(f"Circuit OPEN, sleeping {self.config.poll_interval}s") + time.sleep(self.config.poll_interval) + continue + + # Get next pending job (10 second timeout) + item = get_pending(self.queue, timeout=10) + if item is None: + # Timeout, continue loop + continue + + # Check if backoff delay has elapsed + if not self._is_ready_for_retry(item): + # Put back in queue - not ready yet + nack_job(self.queue, item) + time.sleep(0.1) # Small delay to avoid tight loop + continue + + pqid = item.get('pqid') + scene_id = item.get('scene_id') + retry_count = item.get('retry_count', 0) + log_debug(f"Processing job {pqid} for scene {scene_id} (attempt {retry_count + 1})") + + _job_start = time.perf_counter() + try: + # Process the job and get match confidence + confidence = self._process_job(item) + _job_elapsed = time.perf_counter() - _job_start + + # Record success with stats + self._stats.record_success(_job_elapsed, confidence=confidence or 'high') + + # Success: acknowledge job and record with circuit breaker + ack_job(self.queue, item) + self.circuit_breaker.record_success() + log_info(f"Job {pqid} completed") + + # Periodic batch summary and cache status logging + self._jobs_since_dlq_log += 1 + if self._jobs_since_dlq_log >= self._dlq_log_interval: + self._log_batch_summary() + self._log_cache_stats() + self._jobs_since_dlq_log = 0 + + # Save stats periodically + if self.data_dir is not None: + stats_path = os.path.join(self.data_dir, 'stats.json') + self._stats.save_to_file(stats_path) + + except TransientError as e: + _job_elapsed = time.perf_counter() - _job_start + # Record failure with circuit breaker + self.circuit_breaker.record_failure() + if self.circuit_breaker.state == CircuitState.OPEN: + log_warn("Circuit breaker OPENED - pausing processing") + + # Prepare job for retry with backoff metadata + job = self._prepare_for_retry(item, e) + max_retries = self._get_max_retries_for_error(e) + job_retry_count = job.get('retry_count', 0) + + if job_retry_count >= max_retries: + log_warn(f"Job {pqid} exceeded max retries ({max_retries}), moving to DLQ") + fail_job(self.queue, item) + self.dlq.add(job, e, job_retry_count) + self._stats.record_failure(type(e).__name__, _job_elapsed, to_dlq=True) + else: + delay = job.get('next_retry_at', 0) - time.time() + log_debug(f"Job {pqid} failed (attempt {job_retry_count}/{max_retries}), retry in {delay:.1f}s: {e}") + self._requeue_with_metadata(job) + self._stats.record_failure(type(e).__name__, _job_elapsed, to_dlq=False) + + except PermanentError as e: + _job_elapsed = time.perf_counter() - _job_start + # Permanent error: move to DLQ immediately (doesn't count against circuit) + log_error(f"Job {pqid} permanent failure, moving to DLQ: {e}") + fail_job(self.queue, item) + self.dlq.add(item, e, item.get('retry_count', 0)) + self._stats.record_failure(type(e).__name__, _job_elapsed, to_dlq=True) + + except Exception as e: + _job_elapsed = time.perf_counter() - _job_start + # Unknown error: treat as transient with circuit breaker + self.circuit_breaker.record_failure() + log_warn(f"Job {pqid} unexpected error (treating as transient): {e}") + nack_job(self.queue, item) + self._stats.record_failure(type(e).__name__, _job_elapsed, to_dlq=False) + + except Exception as e: + # Worker loop error: log and continue + log_error(f"Worker loop error: {e}") + time.sleep(1) # Avoid tight loop on persistent errors + + def _fetch_stash_image(self, url: str) -> Optional[bytes]: + """ + Fetch image from Stash URL. + + Downloads the image bytes so we can upload directly to Plex, + avoiding issues with Plex not being able to reach Stash's internal URL. + + Args: + url: Stash image URL (screenshot, preview, etc.) + + Returns: + Image bytes or None if fetch failed + """ + import urllib.request + import urllib.error + + try: + # Build request with authentication from Stash connection + req = urllib.request.Request(url) + + # Add API key header if available + api_key = getattr(self.config, 'stash_api_key', None) + if api_key: + req.add_header('ApiKey', api_key) + + # Add session cookie if available + session_cookie = getattr(self.config, 'stash_session_cookie', None) + if session_cookie: + req.add_header('Cookie', session_cookie) + + with urllib.request.urlopen(req, timeout=30) as response: + return response.read() + except urllib.error.URLError as e: + log_warn(f" Failed to fetch image from Stash: {e}") + return None + except Exception as e: + log_warn(f" Image fetch error: {e}") + return None + + def _get_plex_client(self) -> 'PlexClient': + """ + Get PlexClient with lazy initialization. + + Creates PlexClient on first use to avoid connecting to Plex + until we actually need to process a job. + + Returns: + PlexClient instance configured with URL, token, and timeouts + """ + if self._plex_client is None: + from plex.client import PlexClient + self._plex_client = PlexClient( + url=self.config.plex_url, + token=self.config.plex_token, + connect_timeout=self.config.plex_connect_timeout, + read_timeout=self.config.plex_read_timeout, + ) + return self._plex_client + + def _get_caches(self) -> tuple[Optional['PlexCache'], Optional['MatchCache']]: + """ + Get or create cache instances (lazy initialization). + + Caches are only created when data_dir is set. When data_dir is None, + returns (None, None) and processing continues without caching. + + Returns: + Tuple of (PlexCache or None, MatchCache or None) + """ + if self._library_cache is None and self.data_dir is not None: + from plex.cache import PlexCache, MatchCache + cache_dir = os.path.join(self.data_dir, 'cache') + self._library_cache = PlexCache(cache_dir) + self._match_cache = MatchCache(cache_dir) + log_debug(f"Initialized caches at {cache_dir}") + return self._library_cache, self._match_cache + + def _log_cache_stats(self): + """Log cache hit/miss statistics.""" + library_cache, match_cache = self._get_caches() + if library_cache is not None: + stats = library_cache.get_stats() + total = stats.get('hits', 0) + stats.get('misses', 0) + if total > 0: + hit_rate = stats['hits'] / total * 100 + log_debug(f"Library cache: {hit_rate:.1f}% hit rate ({stats['hits']} hits, {stats['misses']} misses)") + if match_cache is not None: + stats = match_cache.get_stats() + total = stats.get('hits', 0) + stats.get('misses', 0) + if total > 0: + hit_rate = stats['hits'] / total * 100 + log_info(f"Match cache: {hit_rate:.1f}% hit rate ({stats['hits']} hits, {stats['misses']} misses)") + + def _process_job(self, job: dict) -> Optional[str]: + """ + Process a sync job by updating Plex metadata. + + Finds the Plex item matching the job's file path and updates + its metadata based on the update_type. + + Args: + job: Job dict with scene_id, update_type, data + - scene_id: Stash scene ID + - update_type: Type of update (e.g., 'metadata') + - data: Dict containing 'path' and metadata fields + + Returns: + Match confidence level ('high' or 'low'), or None if job failed + + Raises: + TransientError: For retry-able errors (network, timeout) + PermanentError: For permanent failures (missing path, bad data) + """ + import time as _time + _start_time = _time.perf_counter() + + from plex.exceptions import PlexTemporaryError, PlexPermanentError, PlexNotFound, translate_plex_exception + from plex.matcher import find_plex_items_with_confidence, MatchConfidence + # Lazy imports to avoid module-level pollution in tests + from sync_queue.operations import save_sync_timestamp + from hooks.handlers import unmark_scene_pending + + scene_id = job.get('scene_id') + data = job.get('data', {}) + file_path = data.get('path') + + if not file_path: + raise PermanentError(f"Job {scene_id} missing file path") + + try: + client = self._get_plex_client() + + # Get library section(s) to search + if self.config.plex_library: + # Search only the configured library + try: + sections = [client.server.library.section(self.config.plex_library)] + log_trace(f"Searching library: {self.config.plex_library}") + except Exception as e: + raise PermanentError(f"Library '{self.config.plex_library}' not found: {e}") + else: + # Search all libraries (slow) + sections = client.server.library.sections() + log_info(f"Searching all {len(sections)} libraries (set plex_library to speed up)") + + # Get caches for optimized matching + library_cache, match_cache = self._get_caches() + + # Search library sections, collect ALL candidates + all_candidates = [] + for section in sections: + try: + confidence, item, candidates = find_plex_items_with_confidence( + section, + file_path, + library_cache=library_cache, + match_cache=match_cache, + ) + all_candidates.extend(candidates) + except PlexNotFound: + continue # No match in this section, try next + + # Deduplicate candidates (same item might be in multiple sections) + seen_keys = set() + unique_candidates = [] + for c in all_candidates: + if c.key not in seen_keys: + seen_keys.add(c.key) + unique_candidates.append(c) + + # Apply confidence scoring + confidence = None + if len(unique_candidates) == 0: + raise PlexNotFound(f"Could not find Plex item for path: {file_path}") + elif len(unique_candidates) == 1: + # HIGH confidence - single unique match + confidence = 'high' + plex_item = unique_candidates[0] + self._update_metadata(plex_item, data) + else: + # LOW confidence - multiple matches + confidence = 'low' + paths = [c.media[0].parts[0].file if c.media and c.media[0].parts else c.key for c in unique_candidates] + if self.config.strict_matching: + logger.warning( + f"[Stash2Plex] LOW CONFIDENCE SKIPPED: scene {scene_id}\n" + f" Stash path: {file_path}\n" + f" Plex candidates ({len(unique_candidates)}): {paths}" + ) + raise PermanentError(f"Low confidence match skipped (strict_matching=true)") + else: + plex_item = unique_candidates[0] + logger.warning( + f"[Stash2Plex] LOW CONFIDENCE SYNCED: scene {scene_id}\n" + f" Chosen: {paths[0]}\n" + f" Other candidates: {paths[1:]}" + ) + self._update_metadata(plex_item, data) + + # Update sync timestamp after successful sync + if self.data_dir is not None: + save_sync_timestamp(self.data_dir, scene_id, time.time()) + + # Remove from pending set (always, even on failure - will be re-added on retry) + unmark_scene_pending(scene_id) + + # Log job processing time + _elapsed = _time.perf_counter() - _start_time + if _elapsed >= 1.0: + log_info(f"_process_job took {_elapsed:.3f}s") + else: + log_trace(f"_process_job took {_elapsed:.3f}s") + + return confidence + + except (PlexTemporaryError, PlexPermanentError, PlexNotFound): + unmark_scene_pending(scene_id) # Allow re-enqueue on next hook + raise + except Exception as e: + unmark_scene_pending(scene_id) + raise translate_plex_exception(e) + + def _validate_edit_result(self, plex_item, expected_edits: dict) -> list: + """ + Validate that edit actually applied expected values. + + Returns list of fields that may not have updated correctly. + This catches silent failures where Plex accepts the request + but doesn't apply the value (e.g., field limit exceeded). + + Args: + plex_item: Plex item after reload + expected_edits: Dict of edits that were sent (field.value -> value) + + Returns: + List of issue descriptions for fields that may not have updated + """ + issues = [] + for field_key, expected_value in expected_edits.items(): + # Skip locked fields and non-value fields + if '.locked' in field_key or not expected_value: + continue + + # Parse field name from "field.value" format + field_name = field_key.replace('.value', '') + + # Map edit field names to actual plex_item attributes + field_mapping = { + 'title': 'title', + 'studio': 'studio', + 'summary': 'summary', + 'tagline': 'tagline', + 'originallyAvailableAt': 'originallyAvailableAt', + } + + attr_name = field_mapping.get(field_name) + if not attr_name: + continue + + actual_value = getattr(plex_item, attr_name, None) + + # Convert to string for comparison (handle None) + expected_str = str(expected_value) if expected_value else '' + actual_str = str(actual_value) if actual_value else '' + + # Check if value matches (with tolerance for truncation/sanitization) + # Compare first 50 chars to handle any server-side trimming + if expected_str and actual_str: + if expected_str[:50] != actual_str[:50]: + issues.append( + f"{field_name}: sent '{expected_str[:20]}...', " + f"got '{actual_str[:20]}...'" + ) + elif expected_str and not actual_str: + issues.append(f"{field_name}: sent value but field is empty") + + return issues + + def _update_metadata(self, plex_item, data: dict): + """ + Update Plex item metadata from sync job data with granular error handling. + + Implements LOCKED user decision: When Stash provides None/empty for an + optional field, the existing Plex value is CLEARED (not preserved). + When a field key is NOT in the data dict, the existing value is preserved. + + Non-critical field failures (performers, tags, poster, background, collection) + are logged as warnings but don't fail the overall sync. Critical field failures + (core metadata edit) propagate and fail the job. + + Uses plex_item.edit() to update metadata fields, then reloads + the item to confirm changes. + + Args: + plex_item: Plex Video item to update + data: Dict containing metadata fields (title, studio, details, etc.) + + Returns: + PartialSyncResult tracking which fields succeeded and which had warnings + """ + # Lazy imports to avoid circular import with validation module + from validation.limits import ( + MAX_TITLE_LENGTH, + MAX_STUDIO_LENGTH, + MAX_SUMMARY_LENGTH, + MAX_TAGLINE_LENGTH, + MAX_PERFORMER_NAME_LENGTH, + MAX_TAG_NAME_LENGTH, + MAX_PERFORMERS, + MAX_TAGS, + ) + from validation.sanitizers import sanitize_for_plex + from validation.errors import PartialSyncResult + + result = PartialSyncResult() + edits = {} + + # ========================================================================= + # MASTER TOGGLE CHECK - if OFF, skip ALL field syncing + # ========================================================================= + if not getattr(self.config, 'sync_master', True): + log_debug("Master sync toggle is OFF - skipping all field syncs") + return result + + # LOCKED DECISION: Missing optional fields clear existing Plex values + # - If key exists AND value is None/empty -> CLEAR (set to '') + # - If key exists AND value is present -> sanitize and set + # - If key does NOT exist in data dict -> do nothing (preserve) + + # Handle title field (CRITICAL - failure propagates) + if 'title' in data: + title_value = data.get('title') + if title_value is None or title_value == '': + # LOCKED: Clear existing Plex value + edits['title.value'] = '' + log_debug("Clearing title (Stash value is empty)") + else: + sanitized = sanitize_for_plex(title_value, max_length=MAX_TITLE_LENGTH) + if not self.config.preserve_plex_edits or not plex_item.title: + edits['title.value'] = sanitized + + # Handle studio field - TOGGLE CHECK FIRST + if getattr(self.config, 'sync_studio', True): + if 'studio' in data: + studio_value = data.get('studio') + if studio_value is None or studio_value == '': + # LOCKED: Clear existing Plex value + edits['studio.value'] = '' + log_debug("Clearing studio (Stash value is empty)") + else: + sanitized = sanitize_for_plex(studio_value, max_length=MAX_STUDIO_LENGTH) + if not self.config.preserve_plex_edits or not plex_item.studio: + edits['studio.value'] = sanitized + + # Handle summary field (Stash 'details' -> Plex 'summary') - TOGGLE CHECK FIRST + if getattr(self.config, 'sync_summary', True): + # Check for both 'details' and 'summary' keys + has_summary_key = 'details' in data or 'summary' in data + if has_summary_key: + summary_value = data.get('details') or data.get('summary') + if summary_value is None or summary_value == '': + # LOCKED: Clear existing Plex value + edits['summary.value'] = '' + log_debug("Clearing summary (Stash value is empty)") + else: + sanitized = sanitize_for_plex(summary_value, max_length=MAX_SUMMARY_LENGTH) + if not self.config.preserve_plex_edits or not plex_item.summary: + edits['summary.value'] = sanitized + + # Handle tagline field - TOGGLE CHECK FIRST + if getattr(self.config, 'sync_tagline', True): + if 'tagline' in data: + tagline_value = data.get('tagline') + if tagline_value is None or tagline_value == '': + # LOCKED: Clear existing Plex value + edits['tagline.value'] = '' + log_debug("Clearing tagline (Stash value is empty)") + else: + sanitized = sanitize_for_plex(tagline_value, max_length=MAX_TAGLINE_LENGTH) + if not self.config.preserve_plex_edits or not getattr(plex_item, 'tagline', None): + edits['tagline.value'] = sanitized + + # Handle date field - TOGGLE CHECK FIRST + if getattr(self.config, 'sync_date', True): + if 'date' in data: + date_value = data.get('date') + if date_value is None or date_value == '': + # LOCKED: Clear existing Plex value + edits['originallyAvailableAt.value'] = '' + log_debug("Clearing date (Stash value is empty)") + else: + if not self.config.preserve_plex_edits or not getattr(plex_item, 'originallyAvailableAt', None): + edits['originallyAvailableAt.value'] = date_value + + # Apply core metadata edits (CRITICAL - failure propagates) + if edits: + log_debug(f"Updating fields: {list(edits.keys())}") + plex_item.edit(**edits) + plex_item.reload() + + # Validate that edits were actually applied + validation_issues = self._validate_edit_result(plex_item, edits) + if validation_issues: + log_debug(f"Edit validation issues (may be expected): {validation_issues}") + + mode = "preserved" if self.config.preserve_plex_edits else "overwrite" + log_info(f"Updated metadata ({mode} mode): {plex_item.title}") + result.add_success('metadata') + else: + log_trace(f"No metadata fields to update for: {plex_item.title}") + + # NON-CRITICAL: Sync performers as actors - TOGGLE CHECK FIRST + # LOCKED: If 'performers' key exists with empty/None, clear all actors + if getattr(self.config, 'sync_performers', True) and 'performers' in data: + performers = data.get('performers') + if performers is None or performers == []: + # LOCKED: Clear all existing performers + try: + plex_item.edit(**{'actor.locked': 1}) # Lock to clear + plex_item.reload() + log_debug("Clearing performers (Stash value is empty)") + result.add_success('performers') + except Exception as e: + log_warn(f" Failed to clear performers: {e}") + result.add_warning('performers', e) + elif performers: + try: + # Sanitize performer names and apply limits + sanitized_performers = [ + sanitize_for_plex(p, max_length=MAX_PERFORMER_NAME_LENGTH) + for p in performers + ] + + # Apply MAX_PERFORMERS limit + if len(sanitized_performers) > MAX_PERFORMERS: + log_warn(f"Truncating performers list from {len(sanitized_performers)} to {MAX_PERFORMERS}") + sanitized_performers = sanitized_performers[:MAX_PERFORMERS] + + # Get current actors + current_actors = [actor.tag for actor in getattr(plex_item, 'actors', [])] + new_performers = [p for p in sanitized_performers if p not in current_actors] + + if new_performers: + # Build actor edit params - each actor needs actor[N].tag.tag format + actor_edits = {} + all_actors = current_actors + new_performers + # Apply limit to combined list too + if len(all_actors) > MAX_PERFORMERS: + log_warn(f"Truncating combined actors list from {len(all_actors)} to {MAX_PERFORMERS}") + all_actors = all_actors[:MAX_PERFORMERS] + for i, actor_name in enumerate(all_actors): + actor_edits[f'actor[{i}].tag.tag'] = actor_name + + plex_item.edit(**actor_edits) + plex_item.reload() + log_info(f"Added {len(new_performers)} performers: {new_performers}") + else: + log_trace(f"Performers already in Plex: {sanitized_performers}") + result.add_success('performers') + except Exception as e: + log_warn(f" Failed to sync performers: {e}") + result.add_warning('performers', e) + + # NON-CRITICAL: Sync poster image - TOGGLE CHECK FIRST + # (download from Stash, save to temp file, upload to Plex) + if getattr(self.config, 'sync_poster', True) and data.get('poster_url'): + poster_url = data.get('poster_url') + try: + image_data = self._fetch_stash_image(poster_url) + if image_data: + import tempfile + import os + with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as f: + f.write(image_data) + temp_path = f.name + try: + plex_item.uploadPoster(filepath=temp_path) + log_debug(f"Uploaded poster ({len(image_data)} bytes)") + result.add_success('poster') + finally: + os.unlink(temp_path) + else: + result.add_warning('poster', ValueError("No image data returned from Stash")) + except Exception as e: + log_warn(f" Failed to upload poster: {e}") + result.add_warning('poster', e) + + # NON-CRITICAL: Sync background/art image - TOGGLE CHECK FIRST + # (download from Stash, save to temp file, upload to Plex) + if getattr(self.config, 'sync_background', True) and data.get('background_url'): + background_url = data.get('background_url') + try: + image_data = self._fetch_stash_image(background_url) + if image_data: + import tempfile + import os + with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as f: + f.write(image_data) + temp_path = f.name + try: + plex_item.uploadArt(filepath=temp_path) + log_debug(f"Uploaded background ({len(image_data)} bytes)") + result.add_success('background') + finally: + os.unlink(temp_path) + else: + result.add_warning('background', ValueError("No image data returned from Stash")) + except Exception as e: + log_warn(f" Failed to upload background: {e}") + result.add_warning('background', e) + + # NON-CRITICAL: Sync tags as genres - TOGGLE CHECK FIRST + # LOCKED: If 'tags' key exists with empty/None, clear all tags + if getattr(self.config, 'sync_tags', True) and 'tags' in data: + tags = data.get('tags') + if tags is None or tags == []: + # LOCKED: Clear all existing tags/genres + try: + plex_item.edit(**{'genre.locked': 1}) # Lock to clear + plex_item.reload() + log_debug("Clearing tags (Stash value is empty)") + result.add_success('tags') + except Exception as e: + log_warn(f" Failed to clear tags: {e}") + result.add_warning('tags', e) + elif tags: + try: + # Sanitize tag names and apply limits + sanitized_tags = [ + sanitize_for_plex(t, max_length=MAX_TAG_NAME_LENGTH) + for t in tags + ] + + # Apply MAX_TAGS limit + if len(sanitized_tags) > MAX_TAGS: + log_warn(f"Truncating tags list from {len(sanitized_tags)} to {MAX_TAGS}") + sanitized_tags = sanitized_tags[:MAX_TAGS] + + current_genres = [g.tag for g in getattr(plex_item, 'genres', [])] + new_tags = [t for t in sanitized_tags if t not in current_genres] + + if new_tags: + # Build genre edit params + genre_edits = {} + all_genres = current_genres + new_tags + # Apply limit to combined list too + if len(all_genres) > MAX_TAGS: + log_warn(f"Truncating combined tags list from {len(all_genres)} to {MAX_TAGS}") + all_genres = all_genres[:MAX_TAGS] + for i, genre_name in enumerate(all_genres): + genre_edits[f'genre[{i}].tag.tag'] = genre_name + + plex_item.edit(**genre_edits) + plex_item.reload() + log_info(f"Added {len(new_tags)} tags as genres: {new_tags}") + else: + log_trace(f"Tags already in Plex: {sanitized_tags}") + result.add_success('tags') + except Exception as e: + log_warn(f" Failed to sync tags: {e}") + result.add_warning('tags', e) + + # NON-CRITICAL: Add to studio collection - TOGGLE CHECK FIRST + if getattr(self.config, 'sync_collection', True) and data.get('studio'): + studio = data.get('studio') + try: + current_collections = [c.tag for c in getattr(plex_item, 'collections', [])] + if studio not in current_collections: + # Build collection edit params + collection_edits = {} + all_collections = current_collections + [studio] + for i, coll_name in enumerate(all_collections): + collection_edits[f'collection[{i}].tag.tag'] = coll_name + + plex_item.edit(**collection_edits) + plex_item.reload() + log_debug(f"Added to collection: {studio}") + else: + log_trace(f"Already in collection: {studio}") + result.add_success('collection') + except Exception as e: + log_warn(f" Failed to add to collection: {e}") + result.add_warning('collection', e) + + # Log aggregated warnings at end + if result.has_warnings: + log_warn(f"Partial sync for {plex_item.title}: {result.warning_summary}") + + return result diff --git a/plugins/Stash2Plex/worker/stats.py b/plugins/Stash2Plex/worker/stats.py new file mode 100644 index 00000000..c102e19b --- /dev/null +++ b/plugins/Stash2Plex/worker/stats.py @@ -0,0 +1,217 @@ +""" +Statistics tracking for sync operations. + +Provides SyncStats dataclass for tracking job processing metrics including +success/fail counts, timing data, match confidence tracking, and error +type aggregation. +""" + +import json +import os +import time +from dataclasses import dataclass, field +from typing import Dict + + +@dataclass +class SyncStats: + """ + Statistics for sync job processing. + + Tracks cumulative metrics across sync operations including job counts, + processing times, error types, and match confidence levels. + + Attributes: + jobs_processed: Total number of jobs processed + jobs_succeeded: Number of successfully completed jobs + jobs_failed: Number of failed jobs + jobs_to_dlq: Number of jobs moved to dead letter queue + total_processing_time: Sum of all processing times in seconds + session_start: Timestamp when stats tracking began + errors_by_type: Count of errors by type name (e.g., "PlexNotFound": 3) + high_confidence_matches: Count of high confidence Plex matches + low_confidence_matches: Count of low confidence Plex matches + + Example: + >>> stats = SyncStats() + >>> stats.record_success(0.5, confidence='high') + >>> stats.record_failure('PlexNotFound', 0.2, to_dlq=True) + >>> print(f"Success rate: {stats.success_rate:.1f}%") + Success rate: 50.0% + """ + + jobs_processed: int = 0 + jobs_succeeded: int = 0 + jobs_failed: int = 0 + jobs_to_dlq: int = 0 + total_processing_time: float = 0.0 + session_start: float = field(default_factory=time.time) + errors_by_type: Dict[str, int] = field(default_factory=dict) + high_confidence_matches: int = 0 + low_confidence_matches: int = 0 + + def record_success(self, processing_time: float, confidence: str = 'high') -> None: + """ + Record a successful job completion. + + Args: + processing_time: Time taken to process job in seconds + confidence: Match confidence level ('high' or 'low') + """ + self.jobs_processed += 1 + self.jobs_succeeded += 1 + self.total_processing_time += processing_time + + if confidence == 'high': + self.high_confidence_matches += 1 + else: + self.low_confidence_matches += 1 + + def record_failure( + self, + error_type: str, + processing_time: float, + to_dlq: bool = False + ) -> None: + """ + Record a job failure. + + Args: + error_type: Name of the exception type (e.g., 'PlexNotFound') + processing_time: Time taken before failure in seconds + to_dlq: Whether job was moved to dead letter queue + """ + self.jobs_processed += 1 + self.jobs_failed += 1 + self.total_processing_time += processing_time + + if to_dlq: + self.jobs_to_dlq += 1 + + # Track error type count + self.errors_by_type[error_type] = self.errors_by_type.get(error_type, 0) + 1 + + @property + def success_rate(self) -> float: + """ + Calculate success rate as a percentage. + + Returns: + Percentage of successful jobs (0.0 to 100.0), or 0.0 if no jobs processed + """ + if self.jobs_processed == 0: + return 0.0 + return (self.jobs_succeeded / self.jobs_processed) * 100.0 + + @property + def avg_processing_time(self) -> float: + """ + Calculate average processing time per job. + + Returns: + Average time in seconds, or 0.0 if no jobs processed + """ + if self.jobs_processed == 0: + return 0.0 + return self.total_processing_time / self.jobs_processed + + def to_dict(self) -> dict: + """ + Convert stats to dictionary for JSON serialization. + + Returns: + Dict containing all stats fields + """ + return { + 'jobs_processed': self.jobs_processed, + 'jobs_succeeded': self.jobs_succeeded, + 'jobs_failed': self.jobs_failed, + 'jobs_to_dlq': self.jobs_to_dlq, + 'total_processing_time': self.total_processing_time, + 'session_start': self.session_start, + 'errors_by_type': dict(self.errors_by_type), + 'high_confidence_matches': self.high_confidence_matches, + 'low_confidence_matches': self.low_confidence_matches, + } + + def save_to_file(self, filepath: str) -> None: + """ + Save stats to JSON file with cumulative merge. + + If the file already exists, loads existing stats and merges + cumulative totals before saving. Creates parent directories + if needed. + + Args: + filepath: Path to JSON file for persistence + """ + # Load existing stats if file exists + existing = {} + if os.path.exists(filepath): + try: + with open(filepath, 'r') as f: + existing = json.load(f) + except (json.JSONDecodeError, IOError): + existing = {} + + # Merge cumulative totals + merged = { + 'jobs_processed': existing.get('jobs_processed', 0) + self.jobs_processed, + 'jobs_succeeded': existing.get('jobs_succeeded', 0) + self.jobs_succeeded, + 'jobs_failed': existing.get('jobs_failed', 0) + self.jobs_failed, + 'jobs_to_dlq': existing.get('jobs_to_dlq', 0) + self.jobs_to_dlq, + 'total_processing_time': existing.get('total_processing_time', 0.0) + self.total_processing_time, + 'session_start': existing.get('session_start', self.session_start), + 'high_confidence_matches': existing.get('high_confidence_matches', 0) + self.high_confidence_matches, + 'low_confidence_matches': existing.get('low_confidence_matches', 0) + self.low_confidence_matches, + } + + # Merge error type counts + merged_errors = dict(existing.get('errors_by_type', {})) + for error_type, count in self.errors_by_type.items(): + merged_errors[error_type] = merged_errors.get(error_type, 0) + count + merged['errors_by_type'] = merged_errors + + # Ensure parent directory exists + parent_dir = os.path.dirname(filepath) + if parent_dir: + os.makedirs(parent_dir, exist_ok=True) + + # Write merged stats + with open(filepath, 'w') as f: + json.dump(merged, f, indent=2) + + @classmethod + def load_from_file(cls, filepath: str) -> 'SyncStats': + """ + Load stats from JSON file. + + If the file doesn't exist or is invalid, returns a new empty + SyncStats instance. + + Args: + filepath: Path to JSON file to load + + Returns: + SyncStats instance with loaded data, or empty stats if file missing + """ + if not os.path.exists(filepath): + return cls() + + try: + with open(filepath, 'r') as f: + data = json.load(f) + except (json.JSONDecodeError, IOError): + return cls() + + return cls( + jobs_processed=data.get('jobs_processed', 0), + jobs_succeeded=data.get('jobs_succeeded', 0), + jobs_failed=data.get('jobs_failed', 0), + jobs_to_dlq=data.get('jobs_to_dlq', 0), + total_processing_time=data.get('total_processing_time', 0.0), + session_start=data.get('session_start', time.time()), + errors_by_type=dict(data.get('errors_by_type', {})), + high_confidence_matches=data.get('high_confidence_matches', 0), + low_confidence_matches=data.get('low_confidence_matches', 0), + )