From 37bea6184bdf4ede78eab69d0281390a6f2aa5d4 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 14:58:44 -0500 Subject: [PATCH 01/25] Add Claude-powered Slack bot for data transfer requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a Slack bot that uses Claude to interpret user requests and manage data transfers via xfer/Slurm. Features: - Natural language transfer requests via Slack mentions - Two-phase job submission (prepare → transfer array job) - Job tracking via Slurm comments (slack:channel/thread) - Progress reporting from state files - Backend validation with support channel escalation - Configurable defaults via environment variables Components: - src/xfer/slackbot/ - Bot implementation - app.py: Slack Bolt event handlers - claude_agent.py: Claude API with tool definitions - slurm_tools.py: Slurm/xfer operations - config.py: Configuration dataclasses - docs/slack-app-setup.md - Setup guide - examples/allowed_backends.yaml - Backend config example - tests/test_slackbot_dryrun.py - Dry-run tests Also adds --sbatch-extras to xfer run command for comment injection. Co-Authored-By: Claude Opus 4.5 --- docs/slack-app-setup.md | 231 ++++++++++++ examples/allowed_backends.yaml | 41 +++ pyproject.toml | 8 + src/xfer/cli.py | 4 + src/xfer/slackbot/__init__.py | 10 + src/xfer/slackbot/app.py | 246 +++++++++++++ src/xfer/slackbot/claude_agent.py | 399 ++++++++++++++++++++ src/xfer/slackbot/config.py | 119 ++++++ src/xfer/slackbot/slurm_tools.py | 588 ++++++++++++++++++++++++++++++ tests/test_slackbot_dryrun.py | 454 +++++++++++++++++++++++ uv.lock | 410 +++++++++++++++++++++ 11 files changed, 2510 insertions(+) create mode 100644 docs/slack-app-setup.md create mode 100644 examples/allowed_backends.yaml create mode 100644 src/xfer/slackbot/__init__.py create mode 100644 src/xfer/slackbot/app.py create mode 100644 src/xfer/slackbot/claude_agent.py create mode 100644 src/xfer/slackbot/config.py create mode 100644 src/xfer/slackbot/slurm_tools.py create mode 100644 tests/test_slackbot_dryrun.py diff --git a/docs/slack-app-setup.md b/docs/slack-app-setup.md new file mode 100644 index 0000000..53d807f --- /dev/null +++ b/docs/slack-app-setup.md @@ -0,0 +1,231 @@ +# Slack App Setup for xfer-slackbot + +This guide walks through creating and configuring a Slack app for the xfer data transfer bot. + +## 1. Create the Slack App + +1. Go to [api.slack.com/apps](https://api.slack.com/apps) +2. Click **Create New App** +3. Choose **From scratch** +4. Enter: + - **App Name:** `xfer-bot` (or your preferred name) + - **Workspace:** Select your workspace +5. Click **Create App** + +## 2. Enable Socket Mode + +Socket Mode allows the bot to receive events without exposing a public endpoint. + +1. In the left sidebar, go to **Socket Mode** +2. Toggle **Enable Socket Mode** to On +3. You'll be prompted to create an app-level token: + - **Token Name:** `xfer-socket` + - **Scopes:** `connections:write` (should be pre-selected) +4. Click **Generate** +5. **Copy the token** (starts with `xapp-`) — this is your `SLACK_APP_TOKEN` + +## 3. Configure OAuth Scopes + +1. In the left sidebar, go to **OAuth & Permissions** +2. Scroll to **Scopes** → **Bot Token Scopes** +3. Add these scopes: + +| Scope | Purpose | +|-------|---------| +| `app_mentions:read` | Receive @mentions | +| `channels:history` | Read messages in public channels | +| `channels:read` | View basic channel info | +| `chat:write` | Send messages | +| `groups:history` | Read messages in private channels (if needed) | +| `groups:read` | View basic private channel info (if needed) | +| `im:history` | Read DMs with the bot | +| `im:read` | View basic DM info | +| `im:write` | Start DMs with users | +| `users:read` | View basic user info | + +## 4. Subscribe to Events + +1. In the left sidebar, go to **Event Subscriptions** +2. Toggle **Enable Events** to On +3. Expand **Subscribe to bot events** +4. Add these events: + +| Event | Purpose | +|-------|---------| +| `app_mention` | When someone @mentions the bot | +| `message.channels` | Messages in public channels | +| `message.groups` | Messages in private channels (if needed) | +| `message.im` | Direct messages to the bot | + +5. Click **Save Changes** + +## 5. Configure App Home (Optional but Recommended) + +1. In the left sidebar, go to **App Home** +2. Under **Show Tabs**, enable: + - **Messages Tab** — allows DMs with the bot +3. Check **Allow users to send Slash commands and messages from the messages tab** + +## 6. Install the App + +1. In the left sidebar, go to **Install App** +2. Click **Install to Workspace** +3. Review the permissions and click **Allow** +4. **Copy the Bot User OAuth Token** (starts with `xoxb-`) — this is your `SLACK_BOT_TOKEN` + +## 7. Get Channel IDs + +You'll need channel IDs for configuration. To find them: + +**Option A: From Slack UI** +1. Right-click on a channel name +2. Click **View channel details** +3. Scroll to the bottom — the Channel ID is shown (e.g., `C07ABC123`) + +**Option B: Using the API** +```bash +curl -H "Authorization: Bearer xoxb-YOUR-TOKEN" \ + "https://slack.com/api/conversations.list" | jq '.channels[] | {name, id}' +``` + +## 8. Configure Environment Variables + +Create a `.env` file or set these environment variables: + +```bash +# Required +export SLACK_BOT_TOKEN="xoxb-your-bot-token" +export SLACK_APP_TOKEN="xapp-your-app-token" +export ANTHROPIC_API_KEY="sk-ant-your-api-key" + +# Optional - restrict to specific channels (comma-separated channel IDs) +export XFER_ALLOWED_CHANNELS="C07ABC123,C07DEF456" + +# Optional - support channel for backend requests +export XFER_SUPPORT_CHANNEL="C07SUPPORT" + +# Optional - customize paths and defaults +export XFER_RUNS_DIR="/scratch/xfer-runs" +export XFER_RCLONE_CONFIG="/home/svc/.config/rclone/rclone.conf" +export XFER_RCLONE_IMAGE="rclone/rclone:latest" +export XFER_SLURM_PARTITION="transfer" +export XFER_SLURM_QOS="data-transfer" +export XFER_ALLOWED_BACKENDS_FILE="/etc/xfer/allowed_backends.yaml" +``` + +## 9. Run the Bot + +```bash +# Install with slackbot dependencies +pip install 'xfer[slackbot]' + +# Or if developing locally +cd /path/to/xfer +pip install -e '.[slackbot]' + +# Run the bot +xfer-slackbot +``` + +You should see: +``` +2026-02-04 14:30:00 - xfer.slackbot.app - INFO - Starting xfer Slack bot... +2026-02-04 14:30:00 - xfer.slackbot.app - INFO - Runs directory: /home/user/xfer-runs +2026-02-04 14:30:00 - xfer.slackbot.app - INFO - Allowed channels: ['C07ABC123'] +``` + +## 10. Test the Bot + +1. Invite the bot to your channel: `/invite @xfer-bot` +2. Mention the bot: `@xfer-bot what backends are available?` +3. Try a transfer request: `@xfer-bot transfer data from s3src:bucket/data to s3dst:archive/data` + +## Running as a Service + +For production, run the bot as a systemd service: + +```ini +# /etc/systemd/system/xfer-slackbot.service +[Unit] +Description=xfer Slack Bot +After=network.target + +[Service] +Type=simple +User=xfer-svc +Group=xfer-svc +WorkingDirectory=/opt/xfer +EnvironmentFile=/etc/xfer/slackbot.env +ExecStart=/opt/xfer/venv/bin/xfer-slackbot +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target +``` + +```bash +sudo systemctl daemon-reload +sudo systemctl enable xfer-slackbot +sudo systemctl start xfer-slackbot +sudo journalctl -u xfer-slackbot -f # View logs +``` + +## Testing + +### Dry-run tests (no Slack/Slurm required) + +Run the dry-run tests to validate the bot logic without connecting to Slack or submitting jobs: + +```bash +# Install with slackbot dependencies +pip install -e '.[slackbot]' + +# Run dry-run tests +python tests/test_slackbot_dryrun.py +``` + +This tests: +- Slack comment format and parsing +- Configuration defaults +- Backend validation (from YAML and rclone.conf) +- Prepare script generation +- sacct JSON parsing +- Transfer progress tracking +- Claude tool definitions +- Full flow simulation + +### Testing with Slack (no job submission) + +To test the Slack integration without submitting real jobs, you can mock the `sbatch` command: + +```bash +# Create a mock sbatch that just prints a fake job ID +mkdir -p ~/bin +cat > ~/bin/sbatch << 'EOF' +#!/bin/bash +echo "Submitted batch job 12345" +EOF +chmod +x ~/bin/sbatch + +# Add to PATH before running the bot +export PATH=~/bin:$PATH +xfer-slackbot +``` + +## Troubleshooting + +### Bot doesn't respond to mentions +- Check the bot is in the channel (`/invite @xfer-bot`) +- Verify `XFER_ALLOWED_CHANNELS` includes the channel ID (or is unset for all channels) +- Check logs for errors + +### "not_in_channel" errors +- The bot needs to be invited to channels before it can read/write messages + +### Socket mode connection issues +- Verify `SLACK_APP_TOKEN` starts with `xapp-` +- Check Socket Mode is enabled in the Slack app settings + +### Permission errors +- Review OAuth scopes — you may need to reinstall the app after adding scopes diff --git a/examples/allowed_backends.yaml b/examples/allowed_backends.yaml new file mode 100644 index 0000000..86dff97 --- /dev/null +++ b/examples/allowed_backends.yaml @@ -0,0 +1,41 @@ +# Allowed rclone backends for xfer-slackbot +# +# This file defines which rclone remotes users can transfer to/from. +# Set XFER_ALLOWED_BACKENDS_FILE to point to this file. +# +# If this file is not configured, the bot will parse rclone.conf +# and allow all configured remotes. + +allowed_backends: + # AWS S3 buckets + - s3src # Source S3 (e.g., research data intake) + - s3dst # Destination S3 (e.g., archive) + - s3scratch # Scratch/temporary storage + + # Google Cloud Storage + - gcs-research + - gcs-archive + + # Azure Blob Storage + - azure-hot + - azure-archive + + # On-prem / Other + - nfs-home # NFS mounted as rclone local + - ceph-rgw # Ceph RadosGW S3-compatible + +# Backends NOT in this list will be rejected. +# Users can request access via the bot, which posts to the support channel. +# +# Example rclone.conf entries for these backends: +# +# [s3src] +# type = s3 +# provider = AWS +# env_auth = true +# region = us-east-1 +# +# [gcs-research] +# type = google cloud storage +# project_number = 123456789 +# service_account_file = /etc/xfer/gcs-sa.json diff --git a/pyproject.toml b/pyproject.toml index 9c94356..cb3f2d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,8 +9,16 @@ dependencies = [ "rich>=13.7.1", ] +[project.optional-dependencies] +slackbot = [ + "slack-bolt>=1.18.0", + "anthropic>=0.39.0", + "pyyaml>=6.0", +] + [project.scripts] xfer = "xfer.cli:main" +xfer-slackbot = "xfer.slackbot.app:main" [build-system] requires = ["hatchling>=1.25.0"] diff --git a/src/xfer/cli.py b/src/xfer/cli.py index 223c2ec..9890686 100644 --- a/src/xfer/cli.py +++ b/src/xfer/cli.py @@ -742,6 +742,9 @@ def run_all( cpus_per_task: int = typer.Option(4, min=1, help="cpus-per-task"), mem: str = typer.Option("8G", help="Slurm mem"), max_attempts: int = typer.Option(5, min=1, help="Worker retry attempts"), + sbatch_extras: str = typer.Option( + "", help="Extra SBATCH lines, e.g. '#SBATCH --account=foo\\n#SBATCH --qos=bar'" + ), submit: bool = typer.Option(False, help="If set, submit job after rendering"), pyxis_extra: str = typer.Option("", help="Extra pyxis flags"), ) -> None: @@ -781,6 +784,7 @@ def run_all( source_root=source, dest_root=dest, max_attempts=max_attempts, + sbatch_extras=sbatch_extras, pyxis_extra=pyxis_extra, ) if submit: diff --git a/src/xfer/slackbot/__init__.py b/src/xfer/slackbot/__init__.py new file mode 100644 index 0000000..a3b71f1 --- /dev/null +++ b/src/xfer/slackbot/__init__.py @@ -0,0 +1,10 @@ +""" +xfer slackbot - Claude-powered Slack bot for data transfer requests. + +This module provides a Slack bot that uses Claude to interpret user requests +and manage data transfers via xfer/Slurm. +""" + +from .app import create_app + +__all__ = ["create_app"] diff --git a/src/xfer/slackbot/app.py b/src/xfer/slackbot/app.py new file mode 100644 index 0000000..8164884 --- /dev/null +++ b/src/xfer/slackbot/app.py @@ -0,0 +1,246 @@ +""" +Slack Bolt application for the xfer data transfer bot. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from slack_bolt import App +from slack_bolt.adapter.socket_mode import SocketModeHandler + +from .claude_agent import ClaudeAgent +from .config import BotConfig + +logger = logging.getLogger(__name__) + + +class ConversationStore: + """ + Simple in-memory store for conversation history per thread. + + In production, you might want to use Redis or similar for persistence. + """ + + def __init__(self, max_messages: int = 20): + self.max_messages = max_messages + self._store: dict[str, list[dict]] = {} + + def _key(self, channel: str, thread_ts: str) -> str: + return f"{channel}:{thread_ts}" + + def get(self, channel: str, thread_ts: str) -> list[dict]: + return self._store.get(self._key(channel, thread_ts), []) + + def append(self, channel: str, thread_ts: str, message: dict) -> None: + key = self._key(channel, thread_ts) + if key not in self._store: + self._store[key] = [] + self._store[key].append(message) + # Trim to max messages + if len(self._store[key]) > self.max_messages: + self._store[key] = self._store[key][-self.max_messages :] + + +def create_app(config: BotConfig | None = None) -> tuple[App, SocketModeHandler]: + """ + Create and configure the Slack Bolt app. + + Returns the app and socket mode handler. + """ + if config is None: + config = BotConfig.from_env() + + app = App(token=config.slack_bot_token) + # Pass Slack client to agent for support channel notifications + agent = ClaudeAgent(config, slack_client=app.client) + conversations = ConversationStore() + + def is_allowed_channel(channel: str) -> bool: + """Check if the bot should respond in this channel.""" + if not config.allowed_channels: + return True # No restrictions + return channel in config.allowed_channels + + @app.event("app_mention") + def handle_mention(event: dict[str, Any], say: Any) -> None: + """Handle @mentions of the bot.""" + channel = event.get("channel", "") + thread_ts = event.get("thread_ts") or event.get("ts", "") + user = event.get("user", "") + text = event.get("text", "") + + if not is_allowed_channel(channel): + logger.info(f"Ignoring message in non-allowed channel: {channel}") + return + + # Remove the bot mention from the text + # Format is typically "<@U12345> message" + import re + + text = re.sub(r"<@[A-Z0-9]+>\s*", "", text).strip() + + if not text: + say( + text="Hi! I'm the data transfer bot. Tell me what you'd like to transfer and I'll help set it up.", + thread_ts=thread_ts, + ) + return + + logger.info(f"Processing request from {user} in {channel}: {text[:100]}...") + + # Get conversation history for context + history = conversations.get(channel, thread_ts) + + try: + response = agent.process_message( + user_message=text, + channel_id=channel, + thread_ts=thread_ts, + conversation_history=history.copy() if history else None, + ) + + # Store the exchange + conversations.append(channel, thread_ts, {"role": "user", "content": text}) + conversations.append( + channel, thread_ts, {"role": "assistant", "content": response} + ) + + say(text=response, thread_ts=thread_ts) + + except Exception as e: + logger.exception(f"Error processing message: {e}") + say( + text=f"Sorry, I encountered an error processing your request: {str(e)}", + thread_ts=thread_ts, + ) + + @app.event("message") + def handle_message(event: dict[str, Any], say: Any) -> None: + """ + Handle direct messages and thread replies. + + For thread replies, we respond if we've been mentioned in the thread before. + For DMs, we always respond. + """ + # Ignore bot messages to prevent loops + if event.get("bot_id") or event.get("subtype") == "bot_message": + return + + channel = event.get("channel", "") + channel_type = event.get("channel_type", "") + thread_ts = event.get("thread_ts") or event.get("ts", "") + text = event.get("text", "") + user = event.get("user", "") + + # Handle DMs + if channel_type == "im": + if not text: + return + + logger.info(f"Processing DM from {user}: {text[:100]}...") + + history = conversations.get(channel, thread_ts) + + try: + response = agent.process_message( + user_message=text, + channel_id=channel, + thread_ts=thread_ts, + conversation_history=history.copy() if history else None, + ) + + conversations.append( + channel, thread_ts, {"role": "user", "content": text} + ) + conversations.append( + channel, thread_ts, {"role": "assistant", "content": response} + ) + + say(text=response, thread_ts=thread_ts) + + except Exception as e: + logger.exception(f"Error processing DM: {e}") + say( + text=f"Sorry, I encountered an error: {str(e)}", + thread_ts=thread_ts, + ) + + # For channel messages in threads where we've participated, continue responding + elif event.get("thread_ts"): + # Check if we have history in this thread (meaning we've been mentioned) + history = conversations.get(channel, thread_ts) + if history and is_allowed_channel(channel): + logger.info( + f"Continuing thread conversation with {user}: {text[:100]}..." + ) + + try: + response = agent.process_message( + user_message=text, + channel_id=channel, + thread_ts=thread_ts, + conversation_history=history.copy(), + ) + + conversations.append( + channel, thread_ts, {"role": "user", "content": text} + ) + conversations.append( + channel, thread_ts, {"role": "assistant", "content": response} + ) + + say(text=response, thread_ts=thread_ts) + + except Exception as e: + logger.exception(f"Error in thread reply: {e}") + say( + text=f"Sorry, I encountered an error: {str(e)}", + thread_ts=thread_ts, + ) + + # Create socket mode handler + handler = SocketModeHandler(app, config.slack_app_token) + + return app, handler + + +def main() -> None: + """Run the bot.""" + import sys + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + config = BotConfig.from_env() + + # Validate required config + if not config.slack_bot_token: + print( + "Error: SLACK_BOT_TOKEN environment variable is required", file=sys.stderr + ) + sys.exit(1) + if not config.slack_app_token: + print( + "Error: SLACK_APP_TOKEN environment variable is required", file=sys.stderr + ) + sys.exit(1) + if not config.anthropic_api_key: + print( + "Error: ANTHROPIC_API_KEY environment variable is required", file=sys.stderr + ) + sys.exit(1) + + logger.info("Starting xfer Slack bot...") + logger.info(f"Runs directory: {config.runs_base_dir}") + logger.info(f"Allowed channels: {config.allowed_channels or 'all'}") + + app, handler = create_app(config) + handler.start() + + +if __name__ == "__main__": + main() diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py new file mode 100644 index 0000000..2c19c09 --- /dev/null +++ b/src/xfer/slackbot/claude_agent.py @@ -0,0 +1,399 @@ +""" +Claude agent with tool definitions for the xfer Slack bot. + +This module defines the tools Claude can use and handles the conversation flow. +""" + +from __future__ import annotations + +import json +from typing import Any + +import anthropic + +from .config import BotConfig +from .slurm_tools import ( + cancel_job, + get_allowed_backends, + get_job_status, + get_jobs_by_thread, + get_transfer_progress, + get_transfer_progress_by_job, + submit_transfer, +) + +# Tool definitions for Claude +TOOLS = [ + { + "name": "submit_transfer", + "description": """Submit a new data transfer job. Use this when the user wants to transfer data from one location to another. + +Before calling this tool: +1. Confirm you have both source and destination paths +2. Validate that the backends are allowed using list_backends first +3. Ask for clarification if the request is ambiguous + +The transfer runs in two phases: +1. A preparation job builds the file manifest and sets up the transfer +2. The transfer array job does the actual data movement + +Both jobs are tracked and the user can check status at any time. + +The source and destination should be in rclone format: "remote:bucket/path" (e.g., "s3src:mybucket/data")""", + "input_schema": { + "type": "object", + "properties": { + "source": { + "type": "string", + "description": "Source path in rclone format (remote:bucket/path)", + }, + "dest": { + "type": "string", + "description": "Destination path in rclone format (remote:bucket/path)", + }, + "num_shards": { + "type": "integer", + "description": "Number of shards to split the transfer into (default: 256)", + }, + "time_limit": { + "type": "string", + "description": "Slurm time limit in HH:MM:SS format (default: 24:00:00)", + }, + "job_name": { + "type": "string", + "description": "Custom name for the Slurm job", + }, + }, + "required": ["source", "dest"], + }, + }, + { + "name": "check_status", + "description": """Check the status of transfer jobs in this thread. Use this when the user asks about job status, progress, or wants to know if their transfer is complete. + +This tool finds all jobs associated with the current Slack thread and returns their status.""", + "input_schema": { + "type": "object", + "properties": { + "job_id": { + "type": "string", + "description": "Specific job ID to check. If not provided, shows all jobs for this thread.", + }, + }, + "required": [], + }, + }, + { + "name": "list_backends", + "description": """List the available/allowed rclone backends for transfers. + +Use this to: +- Show users what storage systems they can transfer to/from +- Validate a backend before submitting a transfer +- Help users understand what options are available""", + "input_schema": { + "type": "object", + "properties": {}, + "required": [], + }, + }, + { + "name": "cancel_job", + "description": """Cancel a running transfer job. + +Only jobs that were submitted from this thread can be cancelled. +Use this when the user explicitly requests to cancel or stop a transfer.""", + "input_schema": { + "type": "object", + "properties": { + "job_id": { + "type": "string", + "description": "The Slurm job ID to cancel", + }, + }, + "required": ["job_id"], + }, + }, + { + "name": "get_transfer_details", + "description": """Get detailed progress information for a transfer job. + +This provides granular progress: how many file shards are complete, failed, or still pending. +Use this when users want detailed progress beyond just the job state.""", + "input_schema": { + "type": "object", + "properties": { + "job_id": { + "type": "string", + "description": "The Slurm job ID to get details for", + }, + }, + "required": ["job_id"], + }, + }, + { + "name": "request_backend_access", + "description": """Flag a request for access to a backend that is not currently allowed. + +Use this when a user needs to transfer to/from a backend that isn't in the allowed list. +This notifies the support team to review and potentially add the backend.""", + "input_schema": { + "type": "object", + "properties": { + "backend_name": { + "type": "string", + "description": "The name of the backend being requested", + }, + "justification": { + "type": "string", + "description": "Why the user needs access to this backend", + }, + }, + "required": ["backend_name"], + }, + }, +] + +SYSTEM_PROMPT = """You are a helpful data transfer assistant for an HPC cluster. You help researchers submit and monitor data transfer jobs via Slurm. + +Your capabilities: +- Submit new data transfer jobs (source -> destination) +- Check status of running/completed transfers +- List available storage backends +- Cancel jobs if requested +- Request access to new backends on behalf of users + +Guidelines: +1. Always validate that backends are allowed before submitting transfers +2. If a backend isn't allowed, offer to request access on the user's behalf +3. Provide clear, concise status updates +4. Ask for clarification if the source or destination is ambiguous +5. Be helpful but don't make assumptions about paths - ask if unsure +6. When reporting job status, include relevant details like progress and any errors + +Transfer path format: +- Paths should be in rclone format: "remote:bucket/path" +- Example: "s3src:research-data/experiment1" or "gcs:archive-bucket/backups" + +Keep responses brief and focused on the task at hand.""" + + +class ClaudeAgent: + """Agent that uses Claude to interpret requests and execute tools.""" + + def __init__(self, config: BotConfig, slack_client=None): + self.config = config + self.client = anthropic.Anthropic(api_key=config.anthropic_api_key) + self.slack_client = slack_client # For posting to support channel + + def execute_tool( + self, + tool_name: str, + tool_input: dict[str, Any], + channel_id: str, + thread_ts: str, + ) -> str: + """Execute a tool and return the result as a string.""" + if tool_name == "submit_transfer": + result = submit_transfer( + source=tool_input["source"], + dest=tool_input["dest"], + config=self.config, + channel_id=channel_id, + thread_ts=thread_ts, + num_shards=tool_input.get("num_shards"), + time_limit=tool_input.get("time_limit"), + job_name=tool_input.get("job_name"), + ) + return json.dumps(result.__dict__, default=str) + + elif tool_name == "check_status": + job_id = tool_input.get("job_id") + if job_id: + # Get job info with progress if available + progress = get_transfer_progress_by_job(job_id) + if progress: + return json.dumps(progress) + job = get_job_status(job_id) + if job: + return json.dumps(job.to_dict()) + return json.dumps({"error": f"Job {job_id} not found"}) + else: + # Get all jobs for this thread with progress info + jobs = get_jobs_by_thread(channel_id, thread_ts) + results = [] + for job in jobs: + if job.work_dir: + progress = get_transfer_progress_by_job(job.job_id) + if progress: + results.append(progress) + continue + results.append(job.to_dict()) + return json.dumps(results) + + elif tool_name == "list_backends": + backends = get_allowed_backends(self.config) + return json.dumps({"allowed_backends": backends}) + + elif tool_name == "cancel_job": + success, message = cancel_job( + tool_input["job_id"], + channel_id, + thread_ts, + ) + return json.dumps({"success": success, "message": message}) + + elif tool_name == "get_transfer_details": + job_id = tool_input["job_id"] + progress = get_transfer_progress_by_job(job_id) + if not progress: + # Try to get basic job info at least + job = get_job_status(job_id) + if job: + return json.dumps( + { + "job": job.to_dict(), + "note": "Could not find run directory for detailed progress", + } + ) + return json.dumps({"error": f"Job {job_id} not found"}) + + return json.dumps(progress) + + elif tool_name == "request_backend_access": + backend_name = tool_input["backend_name"] + justification = tool_input.get("justification", "No justification provided") + + # Post to support channel if configured + support_posted = False + if self.slack_client and self.config.support_channel: + try: + # Build a message linking back to the original thread + self.slack_client.chat_postMessage( + channel=self.config.support_channel, + text=f"Backend access request: {backend_name}", + blocks=[ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "🔐 Backend Access Request", + }, + }, + { + "type": "section", + "fields": [ + { + "type": "mrkdwn", + "text": f"*Backend:*\n`{backend_name}`", + }, + { + "type": "mrkdwn", + "text": f"*Requested from:*\n<#{channel_id}>", + }, + ], + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"*Justification:*\n{justification}", + }, + }, + { + "type": "context", + "elements": [ + { + "type": "mrkdwn", + "text": f"", + } + ], + }, + ], + ) + support_posted = True + except Exception as e: + # Log but don't fail the request + import logging + + logging.getLogger(__name__).error( + f"Failed to post to support channel: {e}" + ) + + return json.dumps( + { + "status": ( + "request_submitted" if support_posted else "request_logged" + ), + "backend": backend_name, + "justification": justification, + "support_notified": support_posted, + "message": ( + "Your request for backend access has been submitted to the support team." + if support_posted + else "Your request has been logged but could not notify support. Please contact them directly." + ), + } + ) + + else: + return json.dumps({"error": f"Unknown tool: {tool_name}"}) + + def process_message( + self, + user_message: str, + channel_id: str, + thread_ts: str, + conversation_history: list[dict] | None = None, + ) -> str: + """ + Process a user message and return Claude's response. + + Handles the full tool-use loop until Claude provides a final response. + """ + messages = conversation_history or [] + messages.append({"role": "user", "content": user_message}) + + while True: + response = self.client.messages.create( + model=self.config.claude_model, + max_tokens=1024, + system=SYSTEM_PROMPT, + tools=TOOLS, + messages=messages, + ) + + # Check if we need to execute tools + if response.stop_reason == "tool_use": + # Add assistant's response to history + messages.append({"role": "assistant", "content": response.content}) + + # Execute each tool and collect results + tool_results = [] + for block in response.content: + if block.type == "tool_use": + result = self.execute_tool( + block.name, + block.input, + channel_id, + thread_ts, + ) + tool_results.append( + { + "type": "tool_result", + "tool_use_id": block.id, + "content": result, + } + ) + + # Add tool results and continue the loop + messages.append({"role": "user", "content": tool_results}) + + else: + # Extract text response + text_response = "" + for block in response.content: + if hasattr(block, "text"): + text_response += block.text + + return text_response diff --git a/src/xfer/slackbot/config.py b/src/xfer/slackbot/config.py new file mode 100644 index 0000000..3db3912 --- /dev/null +++ b/src/xfer/slackbot/config.py @@ -0,0 +1,119 @@ +""" +Configuration and defaults for the xfer Slack bot. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + + +@dataclass +class SlurmDefaults: + """Default Slurm job parameters.""" + + partition: str = "transfer" + time_limit: str = "24:00:00" + cpus_per_task: int = 4 + mem: str = "8G" + array_concurrency: int = 64 + num_shards: int = 256 + max_attempts: int = 5 + qos: Optional[str] = None # Set via cluster config if needed + + +@dataclass +class RcloneDefaults: + """Default rclone parameters.""" + + image: str = "rclone/rclone:latest" + config_path: Path = field( + default_factory=lambda: Path.home() / ".config/rclone/rclone.conf" + ) + container_conf_path: str = "/etc/rclone/rclone.conf" + flags: str = ( + "--transfers 32 --checkers 64 --fast-list --retries 10 --low-level-retries 20" + ) + + +@dataclass +class BotConfig: + """Configuration for the xfer Slack bot.""" + + # Slack + slack_bot_token: str = field( + default_factory=lambda: os.environ.get("SLACK_BOT_TOKEN", "") + ) + slack_app_token: str = field( + default_factory=lambda: os.environ.get("SLACK_APP_TOKEN", "") + ) + allowed_channels: list[str] = field(default_factory=list) # Empty = all channels + support_channel: Optional[str] = field( + default_factory=lambda: os.environ.get("XFER_SUPPORT_CHANNEL") + ) # Channel ID for backend requests, alerts, etc. + + # Claude + anthropic_api_key: str = field( + default_factory=lambda: os.environ.get("ANTHROPIC_API_KEY", "") + ) + claude_model: str = "claude-sonnet-4-20250514" + + # Paths + runs_base_dir: Path = field(default_factory=lambda: Path.home() / "xfer-runs") + allowed_backends_file: Optional[Path] = ( + None # Path to YAML/JSON listing allowed backends + ) + + # Defaults + slurm: SlurmDefaults = field(default_factory=SlurmDefaults) + rclone: RcloneDefaults = field(default_factory=RcloneDefaults) + + @classmethod + def from_env(cls) -> "BotConfig": + """Load configuration from environment variables.""" + config = cls() + + # Override from env if set + if channels := os.environ.get("XFER_ALLOWED_CHANNELS"): + config.allowed_channels = [ + c.strip() for c in channels.split(",") if c.strip() + ] + + if runs_dir := os.environ.get("XFER_RUNS_DIR"): + config.runs_base_dir = Path(runs_dir) + + if backends_file := os.environ.get("XFER_ALLOWED_BACKENDS_FILE"): + config.allowed_backends_file = Path(backends_file) + + if partition := os.environ.get("XFER_SLURM_PARTITION"): + config.slurm.partition = partition + + if qos := os.environ.get("XFER_SLURM_QOS"): + config.slurm.qos = qos + + if rclone_image := os.environ.get("XFER_RCLONE_IMAGE"): + config.rclone.image = rclone_image + + if rclone_config := os.environ.get("XFER_RCLONE_CONFIG"): + config.rclone.config_path = Path(rclone_config) + + return config + + +def slack_comment(channel_id: str, thread_ts: str) -> str: + """Generate a Slurm job comment from Slack thread identifiers.""" + return f"slack:{channel_id}/{thread_ts}" + + +def parse_slack_comment(comment: str) -> tuple[str, str] | None: + """Parse a Slurm job comment back to (channel_id, thread_ts).""" + if not comment.startswith("slack:"): + return None + try: + rest = comment[6:] # Remove "slack:" + channel_id, thread_ts = rest.split("/", 1) + return channel_id, thread_ts + except ValueError: + return None diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py new file mode 100644 index 0000000..3aae9b2 --- /dev/null +++ b/src/xfer/slackbot/slurm_tools.py @@ -0,0 +1,588 @@ +""" +Slurm interaction tools for the xfer Slack bot. + +These functions are called by Claude via tool use to execute actual operations. +""" + +from __future__ import annotations + +import json +import re +import shlex +import subprocess +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Optional + +from .config import BotConfig, slack_comment + + +@dataclass +class JobInfo: + """Information about a Slurm job.""" + + job_id: str + array_job_id: Optional[str] + state: str + name: str + comment: str + work_dir: Optional[str] # From --chdir, lets us find run directory + submit_time: Optional[str] + start_time: Optional[str] + end_time: Optional[str] + partition: str + # Progress info (from state files) + total_tasks: Optional[int] = None + completed_tasks: Optional[int] = None + failed_tasks: Optional[int] = None + + def to_dict(self) -> dict: + return { + "job_id": self.job_id, + "array_job_id": self.array_job_id, + "state": self.state, + "name": self.name, + "comment": self.comment, + "work_dir": self.work_dir, + "submit_time": self.submit_time, + "start_time": self.start_time, + "end_time": self.end_time, + "partition": self.partition, + "total_tasks": self.total_tasks, + "completed_tasks": self.completed_tasks, + "failed_tasks": self.failed_tasks, + } + + +@dataclass +class TransferRequest: + """Tracks a multi-phase transfer request.""" + + source: str + dest: str + run_dir: Path + channel_id: str + thread_ts: str + # Job IDs for each phase + manifest_job_id: Optional[str] = None + transfer_job_id: Optional[str] = None + # Config overrides + num_shards: Optional[int] = None + time_limit: Optional[str] = None + job_name: Optional[str] = None + + +@dataclass +class TransferResult: + """Result of submitting a transfer job.""" + + success: bool + job_id: Optional[str] + run_dir: Optional[Path] + message: str + error: Optional[str] = None + + +def run_cmd( + cmd: list[str], + *, + check: bool = True, + capture: bool = True, +) -> subprocess.CompletedProcess: + """Run a shell command and return the result.""" + return subprocess.run( + cmd, + check=check, + text=True, + capture_output=capture, + ) + + +def get_allowed_backends(config: BotConfig) -> list[str]: + """ + Get list of allowed rclone backends. + + If allowed_backends_file is set, reads from that file. + Otherwise, parses rclone.conf for configured remotes. + """ + # If explicit allowed list exists, use it + if config.allowed_backends_file and config.allowed_backends_file.exists(): + content = config.allowed_backends_file.read_text() + if config.allowed_backends_file.suffix in (".yaml", ".yml"): + import yaml + + data = yaml.safe_load(content) + return data.get("allowed_backends", []) + else: + data = json.loads(content) + return data.get("allowed_backends", []) + + # Otherwise, parse rclone.conf for section headers + backends = [] + if config.rclone.config_path.exists(): + content = config.rclone.config_path.read_text() + # rclone.conf uses INI format: [remote_name] + for match in re.finditer(r"^\[([^\]]+)\]", content, re.MULTILINE): + backends.append(match.group(1)) + + return backends + + +def validate_backend(backend: str, config: BotConfig) -> tuple[bool, str]: + """ + Validate that a backend is allowed. + + Returns (is_valid, message). + """ + # Extract remote name from "remote:path" format + if ":" in backend: + remote_name = backend.split(":")[0] + else: + remote_name = backend + + allowed = get_allowed_backends(config) + if not allowed: + return False, "No backends configured. Please contact support." + + if remote_name in allowed: + return True, f"Backend '{remote_name}' is allowed." + else: + return False, ( + f"Backend '{remote_name}' is not in the allowed list. " + f"Allowed backends: {', '.join(allowed)}. " + "Contact support if you need access to additional backends." + ) + + +def _write_prepare_script( + run_dir: Path, + source: str, + dest: str, + config: BotConfig, + num_shards: int, + array_concurrency: int, + time_limit: str, + job_name: str, + comment: str, +) -> Path: + """ + Write a batch script that prepares the transfer (manifest + shard + render). + + This script runs as a single Slurm job. When complete, it submits the + transfer array job. + """ + script_path = run_dir / "prepare.sh" + + # Build sbatch extras for the transfer job + sbatch_extras_lines = [f"#SBATCH --comment={shlex.quote(comment)}"] + sbatch_extras_lines.append(f"#SBATCH --chdir={run_dir}") + if config.slurm.qos: + sbatch_extras_lines.append(f"#SBATCH --qos={config.slurm.qos}") + sbatch_extras = "\\n".join(sbatch_extras_lines) + + script_content = f"""#!/usr/bin/env bash +#SBATCH --job-name={job_name}-prepare +#SBATCH --output={run_dir}/prepare-%j.out +#SBATCH --error={run_dir}/prepare-%j.err +#SBATCH --time=04:00:00 +#SBATCH --partition={config.slurm.partition} +#SBATCH --cpus-per-task=8 +#SBATCH --mem=16G +#SBATCH --comment={shlex.quote(comment)} +#SBATCH --chdir={run_dir} +{f"#SBATCH --qos={config.slurm.qos}" if config.slurm.qos else ""} + +set -euo pipefail + +echo "=== Starting manifest build at $(date -Is) ===" +echo "Source: {source}" +echo "Dest: {dest}" +echo "Run dir: {run_dir}" + +# Phase 1: Build manifest +xfer manifest build \\ + --source {shlex.quote(source)} \\ + --dest {shlex.quote(dest)} \\ + --out {run_dir}/manifest.jsonl \\ + --rclone-image {shlex.quote(config.rclone.image)} \\ + --rclone-config {shlex.quote(str(config.rclone.config_path))} + +echo "=== Manifest build complete at $(date -Is) ===" + +# Phase 2: Shard manifest +xfer manifest shard \\ + --in {run_dir}/manifest.jsonl \\ + --outdir {run_dir}/shards \\ + --num-shards {num_shards} + +echo "=== Sharding complete at $(date -Is) ===" + +# Phase 3: Render Slurm scripts +xfer slurm render \\ + --run-dir {run_dir} \\ + --num-shards {num_shards} \\ + --array-concurrency {array_concurrency} \\ + --job-name {shlex.quote(job_name)} \\ + --time-limit {time_limit} \\ + --partition {config.slurm.partition} \\ + --cpus-per-task {config.slurm.cpus_per_task} \\ + --mem {config.slurm.mem} \\ + --rclone-image {shlex.quote(config.rclone.image)} \\ + --rclone-config {shlex.quote(str(config.rclone.config_path))} \\ + --rclone-flags {shlex.quote(config.rclone.flags)} \\ + --max-attempts {config.slurm.max_attempts} \\ + --sbatch-extras {shlex.quote(sbatch_extras)} + +echo "=== Render complete at $(date -Is) ===" + +# Phase 4: Submit transfer array job +xfer slurm submit --run-dir {run_dir} + +echo "=== Transfer job submitted at $(date -Is) ===" +""" + + script_path.write_text(script_content) + script_path.chmod(0o755) + return script_path + + +def submit_transfer( + source: str, + dest: str, + config: BotConfig, + channel_id: str, + thread_ts: str, + *, + num_shards: Optional[int] = None, + array_concurrency: Optional[int] = None, + time_limit: Optional[str] = None, + job_name: Optional[str] = None, +) -> TransferResult: + """ + Submit a data transfer job via xfer. + + This submits a two-phase job: + 1. Prepare job: builds manifest, shards, renders scripts, submits transfer job + 2. Transfer job: the actual array job doing the data movement + + The prepare job runs first and submits the transfer job when complete. + """ + # Validate backends + for backend, label in [(source, "source"), (dest, "destination")]: + valid, msg = validate_backend(backend, config) + if not valid: + return TransferResult( + success=False, + job_id=None, + run_dir=None, + message=f"Invalid {label}: {msg}", + ) + + # Generate run directory + timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + run_name = f"slack_{channel_id}_{timestamp}" + run_dir = config.runs_base_dir / run_name + run_dir.mkdir(parents=True, exist_ok=True) + + # Resolve defaults + num_shards = num_shards or config.slurm.num_shards + array_concurrency = array_concurrency or config.slurm.array_concurrency + time_limit = time_limit or config.slurm.time_limit + job_name = job_name or "xfer-slack" + + # Build comment for tracking + comment = slack_comment(channel_id, thread_ts) + + # Write the prepare script + prepare_script = _write_prepare_script( + run_dir=run_dir, + source=source, + dest=dest, + config=config, + num_shards=num_shards, + array_concurrency=array_concurrency, + time_limit=time_limit, + job_name=job_name, + comment=comment, + ) + + # Submit the prepare job + try: + result = run_cmd(["sbatch", str(prepare_script)], check=True) + + # Parse job ID from output + job_id = None + for line in result.stdout.splitlines(): + if "Submitted batch job" in line: + parts = line.split() + job_id = parts[-1] + break + + # Save request metadata for later reference + request_meta = { + "source": source, + "dest": dest, + "channel_id": channel_id, + "thread_ts": thread_ts, + "prepare_job_id": job_id, + "num_shards": num_shards, + "submitted_at": datetime.utcnow().isoformat() + "Z", + } + (run_dir / "request.json").write_text(json.dumps(request_meta, indent=2)) + + return TransferResult( + success=True, + job_id=job_id, + run_dir=run_dir, + message=( + f"Transfer preparation job submitted. Job ID: {job_id}\n" + f"Run directory: {run_dir}\n" + "The manifest will be built and the transfer will start automatically." + ), + ) + + except subprocess.CalledProcessError as e: + return TransferResult( + success=False, + job_id=None, + run_dir=run_dir, + message="Failed to submit transfer preparation job.", + error=e.stderr or str(e), + ) + + +def _parse_job_from_sacct_json(job_data: dict) -> Optional[JobInfo]: + """Parse a single job entry from sacct --json output.""" + job_id = str(job_data.get("job_id", "")) + if not job_id: + return None + + # Handle array jobs + array_job_id = None + array_task_id = job_data.get("array", {}).get("task_id", {}).get("number") + if array_task_id is not None: + array_job_id = job_id + job_id = f"{job_id}_{array_task_id}" + + # Extract state - sacct JSON uses state.current + state_info = job_data.get("state", {}) + state = state_info.get("current", ["UNKNOWN"]) + if isinstance(state, list): + state = state[0] if state else "UNKNOWN" + + # Extract times + time_info = job_data.get("time", {}) + submit_time = time_info.get("submission") + start_time = time_info.get("start") + end_time = time_info.get("end") + + # Work directory from --chdir + work_dir = job_data.get("working_directory") + + return JobInfo( + job_id=job_id, + array_job_id=array_job_id, + state=state, + name=job_data.get("name", ""), + comment=job_data.get("comment", {}).get("job", ""), + work_dir=work_dir, + submit_time=str(submit_time) if submit_time else None, + start_time=str(start_time) if start_time else None, + end_time=str(end_time) if end_time else None, + partition=job_data.get("partition", ""), + ) + + +def get_jobs_by_user(user: Optional[str] = None) -> list[JobInfo]: + """ + Get all jobs for a user using sacct --json. + + If user is None, queries current user's jobs. + """ + cmd = ["sacct", "--json", "-X"] # -X = no job steps + if user: + cmd.extend(["-u", user]) + + try: + result = run_cmd(cmd, check=True) + except subprocess.CalledProcessError: + return [] + + try: + data = json.loads(result.stdout) + except json.JSONDecodeError: + return [] + + jobs = [] + for job_data in data.get("jobs", []): + job_info = _parse_job_from_sacct_json(job_data) + if job_info: + jobs.append(job_info) + + return jobs + + +def get_jobs_by_thread(channel_id: str, thread_ts: str) -> list[JobInfo]: + """ + Find all Slurm jobs associated with a Slack thread. + + Uses sacct --json and filters by comment. + """ + comment_pattern = slack_comment(channel_id, thread_ts) + all_jobs = get_jobs_by_user() + + return [j for j in all_jobs if comment_pattern in (j.comment or "")] + + +def get_job_status(job_id: str) -> Optional[JobInfo]: + """Get detailed status for a specific job ID.""" + cmd = ["sacct", "--json", "-j", job_id, "-X"] + + try: + result = run_cmd(cmd, check=True) + except subprocess.CalledProcessError: + return None + + try: + data = json.loads(result.stdout) + except json.JSONDecodeError: + return None + + for job_data in data.get("jobs", []): + job_info = _parse_job_from_sacct_json(job_data) + if job_info: + return job_info + + return None + + +def get_transfer_progress(run_dir: Path) -> dict: + """ + Get detailed progress for a transfer by examining state files. + + Returns dict with total_tasks, completed, failed, pending counts, + plus metadata about the transfer. + """ + state_dir = run_dir / "state" + shards_meta = run_dir / "shards" / "shards.meta.json" + request_meta = run_dir / "request.json" + manifest_file = run_dir / "manifest.jsonl" + + result = { + "run_dir": str(run_dir), + "phase": "unknown", + "total_tasks": 0, + "completed": 0, + "failed": 0, + "pending": 0, + "in_progress": 0, + "total_bytes": 0, + "total_files": 0, + } + + # Load request metadata if available + if request_meta.exists(): + try: + meta = json.loads(request_meta.read_text()) + result["source"] = meta.get("source") + result["dest"] = meta.get("dest") + result["prepare_job_id"] = meta.get("prepare_job_id") + except json.JSONDecodeError: + pass + + # Determine phase based on what files exist + if not manifest_file.exists(): + result["phase"] = "building_manifest" + return result + + if not shards_meta.exists(): + result["phase"] = "sharding" + return result + + # Get total from shards metadata + try: + meta = json.loads(shards_meta.read_text()) + result["total_tasks"] = meta.get("num_shards", 0) + result["total_bytes"] = meta.get("bytes_total", 0) + result["total_files"] = meta.get("num_records", 0) + except json.JSONDecodeError: + pass + + if not state_dir.exists(): + result["phase"] = "waiting_to_start" + result["pending"] = result["total_tasks"] + return result + + result["phase"] = "transferring" + + # Count state files + done_files = list(state_dir.glob("shard_*.done")) + fail_files = list(state_dir.glob("shard_*.fail")) + + result["completed"] = len(done_files) + result["failed"] = len(fail_files) + + # In progress = has attempt file but no done/fail + attempt_files = list(state_dir.glob("shard_*.attempt")) + done_ids = {f.stem.replace("shard_", "").replace(".done", "") for f in done_files} + fail_ids = {f.stem.replace("shard_", "").replace(".fail", "") for f in fail_files} + + in_progress = 0 + for af in attempt_files: + task_id = af.stem.replace("shard_", "").replace(".attempt", "") + if task_id not in done_ids and task_id not in fail_ids: + in_progress += 1 + + result["in_progress"] = in_progress + result["pending"] = ( + result["total_tasks"] - result["completed"] - result["failed"] - in_progress + ) + + # Check if complete + if result["completed"] == result["total_tasks"]: + result["phase"] = "complete" + elif result["failed"] > 0 and result["pending"] == 0 and result["in_progress"] == 0: + result["phase"] = "failed" + + return result + + +def get_transfer_progress_by_job(job_id: str) -> Optional[dict]: + """ + Get transfer progress by looking up the job's working directory. + """ + job = get_job_status(job_id) + if not job or not job.work_dir: + return None + + run_dir = Path(job.work_dir) + if not run_dir.exists(): + return None + + progress = get_transfer_progress(run_dir) + progress["job_state"] = job.state + return progress + + +def cancel_job(job_id: str, channel_id: str, thread_ts: str) -> tuple[bool, str]: + """ + Cancel a Slurm job. + + Validates that the job belongs to the requesting thread via comment. + """ + # First, verify the job belongs to this thread + job_info = get_job_status(job_id) + if not job_info: + return False, f"Job {job_id} not found." + + expected_comment = slack_comment(channel_id, thread_ts) + if expected_comment not in job_info.comment: + return False, f"Job {job_id} does not belong to this thread. Cannot cancel." + + # Cancel the job + try: + run_cmd(["scancel", job_id], check=True) + return True, f"Job {job_id} has been cancelled." + except subprocess.CalledProcessError as e: + return False, f"Failed to cancel job {job_id}: {e.stderr or str(e)}" diff --git a/tests/test_slackbot_dryrun.py b/tests/test_slackbot_dryrun.py new file mode 100644 index 0000000..1a8a9bf --- /dev/null +++ b/tests/test_slackbot_dryrun.py @@ -0,0 +1,454 @@ +#!/usr/bin/env python3 +""" +Dry-run test for xfer-slackbot components. + +Tests the bot logic without connecting to Slack or submitting real jobs. +""" + +from __future__ import annotations + +import json +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +# Add src to path for local testing +import sys + +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from xfer.slackbot.config import ( + BotConfig, + RcloneDefaults, + SlurmDefaults, + parse_slack_comment, + slack_comment, +) +from xfer.slackbot.slurm_tools import ( + TransferResult, + _parse_job_from_sacct_json, + _write_prepare_script, + get_allowed_backends, + get_transfer_progress, + validate_backend, +) + + +def test_slack_comment_format(): + """Test Slack comment generation and parsing.""" + print("\n=== Testing Slack comment format ===") + + channel_id = "C07ABC123" + thread_ts = "1234567890.123456" + + comment = slack_comment(channel_id, thread_ts) + print(f"Generated comment: {comment}") + assert comment == "slack:C07ABC123/1234567890.123456" + + parsed = parse_slack_comment(comment) + print(f"Parsed back: {parsed}") + assert parsed == (channel_id, thread_ts) + + # Test invalid comment + assert parse_slack_comment("not-a-slack-comment") is None + assert parse_slack_comment("slack:invalid") is None + + print("✓ Slack comment format tests passed") + + +def test_config_defaults(): + """Test configuration defaults.""" + print("\n=== Testing config defaults ===") + + config = BotConfig() + print(f"Default partition: {config.slurm.partition}") + print(f"Default time_limit: {config.slurm.time_limit}") + print(f"Default num_shards: {config.slurm.num_shards}") + print(f"Default rclone image: {config.rclone.image}") + + assert config.slurm.partition == "transfer" + assert config.slurm.time_limit == "24:00:00" + assert config.slurm.num_shards == 256 + + print("✓ Config defaults tests passed") + + +def test_allowed_backends_from_yaml(): + """Test loading allowed backends from YAML file.""" + print("\n=== Testing allowed backends from YAML ===") + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write("allowed_backends:\n - s3src\n - s3dst\n - gcs-archive\n") + yaml_path = Path(f.name) + + try: + config = BotConfig() + config.allowed_backends_file = yaml_path + + backends = get_allowed_backends(config) + print(f"Loaded backends: {backends}") + assert backends == ["s3src", "s3dst", "gcs-archive"] + + # Test validation + valid, msg = validate_backend("s3src:bucket/path", config) + print(f"s3src valid: {valid} - {msg}") + assert valid + + valid, msg = validate_backend("unknown:bucket/path", config) + print(f"unknown valid: {valid} - {msg}") + assert not valid + + print("✓ Allowed backends tests passed") + finally: + yaml_path.unlink() + + +def test_allowed_backends_from_rclone_conf(): + """Test loading allowed backends from rclone.conf.""" + print("\n=== Testing allowed backends from rclone.conf ===") + + with tempfile.NamedTemporaryFile(mode="w", suffix=".conf", delete=False) as f: + f.write("""[s3src] +type = s3 +provider = AWS + +[s3dst] +type = s3 +provider = AWS + +[gcs-research] +type = google cloud storage +""") + conf_path = Path(f.name) + + try: + config = BotConfig() + config.allowed_backends_file = None + config.rclone.config_path = conf_path + + backends = get_allowed_backends(config) + print(f"Parsed backends from rclone.conf: {backends}") + assert "s3src" in backends + assert "s3dst" in backends + assert "gcs-research" in backends + + print("✓ rclone.conf parsing tests passed") + finally: + conf_path.unlink() + + +def test_prepare_script_generation(): + """Test generation of the prepare.sh script.""" + print("\n=== Testing prepare script generation ===") + + with tempfile.TemporaryDirectory() as tmpdir: + run_dir = Path(tmpdir) + + config = BotConfig() + config.slurm.qos = "data-transfer" + + script_path = _write_prepare_script( + run_dir=run_dir, + source="s3src:mybucket/data", + dest="s3dst:archive/data", + config=config, + num_shards=128, + array_concurrency=32, + time_limit="12:00:00", + job_name="test-xfer", + comment="slack:C07ABC/1234567890.123456", + ) + + print(f"Script written to: {script_path}") + assert script_path.exists() + + content = script_path.read_text() + print("\n--- Script content (first 50 lines) ---") + for i, line in enumerate(content.splitlines()[:50]): + print(f" {i+1:3}: {line}") + + # Verify key elements + assert "#SBATCH --job-name=test-xfer-prepare" in content + assert "#SBATCH --comment=" in content + assert "#SBATCH --chdir=" in content + assert "#SBATCH --qos=data-transfer" in content + assert "xfer manifest build" in content + assert "xfer manifest shard" in content + assert "xfer slurm render" in content + assert "xfer slurm submit" in content + assert "--num-shards 128" in content + + print("\n✓ Prepare script generation tests passed") + + +def test_sacct_json_parsing(): + """Test parsing of sacct --json output.""" + print("\n=== Testing sacct JSON parsing ===") + + # Simulated sacct --json output for a job + job_data = { + "job_id": 12345, + "name": "xfer-slack", + "partition": "transfer", + "state": {"current": ["RUNNING"]}, + "time": { + "submission": 1706976000, + "start": 1706976060, + "end": 0, + }, + "working_directory": "/scratch/xfer-runs/slack_C07ABC_20260204", + "comment": {"job": "slack:C07ABC/1234567890.123456"}, + } + + job_info = _parse_job_from_sacct_json(job_data) + print(f"Parsed job: {job_info}") + + assert job_info.job_id == "12345" + assert job_info.state == "RUNNING" + assert job_info.name == "xfer-slack" + assert job_info.work_dir == "/scratch/xfer-runs/slack_C07ABC_20260204" + assert "slack:C07ABC" in job_info.comment + + # Test array job + array_job_data = { + "job_id": 12345, + "array": {"task_id": {"number": 42}}, + "name": "xfer-slack", + "partition": "transfer", + "state": {"current": ["COMPLETED"]}, + "time": {}, + "comment": {"job": ""}, + } + + array_job = _parse_job_from_sacct_json(array_job_data) + print(f"Parsed array job: {array_job}") + assert array_job.job_id == "12345_42" + assert array_job.array_job_id == "12345" + + print("✓ sacct JSON parsing tests passed") + + +def test_transfer_progress(): + """Test transfer progress calculation from state files.""" + print("\n=== Testing transfer progress calculation ===") + + with tempfile.TemporaryDirectory() as tmpdir: + run_dir = Path(tmpdir) + + # Create directory structure + (run_dir / "shards").mkdir() + (run_dir / "state").mkdir() + + # Write manifest (required before shards) + (run_dir / "manifest.jsonl").write_text('{"path": "file1.txt"}\n') + + # Write shards metadata + shards_meta = { + "num_shards": 10, + "bytes_total": 1073741824, # 1 GB + "num_records": 1000, + } + (run_dir / "shards" / "shards.meta.json").write_text(json.dumps(shards_meta)) + + # Write request metadata + request_meta = { + "source": "s3src:bucket/data", + "dest": "s3dst:archive/data", + "prepare_job_id": "12345", + } + (run_dir / "request.json").write_text(json.dumps(request_meta)) + + # Simulate some progress + (run_dir / "state" / "shard_0.done").touch() + (run_dir / "state" / "shard_1.done").touch() + (run_dir / "state" / "shard_2.done").touch() + (run_dir / "state" / "shard_3.attempt").write_text("1") + (run_dir / "state" / "shard_4.attempt").write_text("2") + (run_dir / "state" / "shard_5.fail").write_text("1") + + progress = get_transfer_progress(run_dir) + print(f"Progress: {json.dumps(progress, indent=2)}") + + assert progress["phase"] == "transferring" + assert progress["total_tasks"] == 10 + assert progress["completed"] == 3 + assert progress["failed"] == 1 + assert progress["in_progress"] == 2 # shard_3 and shard_4 have attempts but no done/fail + assert progress["pending"] == 4 # 10 - 3 - 1 - 2 + assert progress["total_bytes"] == 1073741824 + assert progress["source"] == "s3src:bucket/data" + + print("✓ Transfer progress tests passed") + + +def test_claude_agent_tools(): + """Test Claude agent tool definitions and execution.""" + print("\n=== Testing Claude agent tools ===") + + from xfer.slackbot.claude_agent import TOOLS + + tool_names = [t["name"] for t in TOOLS] + print(f"Available tools: {tool_names}") + + expected_tools = [ + "submit_transfer", + "check_status", + "list_backends", + "cancel_job", + "get_transfer_details", + "request_backend_access", + ] + + for tool in expected_tools: + assert tool in tool_names, f"Missing tool: {tool}" + + # Verify tool schemas + for tool in TOOLS: + assert "name" in tool + assert "description" in tool + assert "input_schema" in tool + print(f" ✓ {tool['name']}: {len(tool['description'])} chars, {len(tool['input_schema'].get('properties', {}))} params") + + print("✓ Claude agent tools tests passed") + + +def test_full_flow_simulation(): + """Simulate a full transfer request flow.""" + print("\n=== Simulating full transfer flow ===") + + with tempfile.TemporaryDirectory() as tmpdir: + # Setup config + config = BotConfig() + config.runs_base_dir = Path(tmpdir) + + # Create mock rclone.conf + rclone_conf = Path(tmpdir) / "rclone.conf" + rclone_conf.write_text("[s3src]\ntype=s3\n\n[s3dst]\ntype=s3\n") + config.rclone.config_path = rclone_conf + + print(f"Runs base dir: {config.runs_base_dir}") + print(f"Rclone config: {config.rclone.config_path}") + + # Simulate a transfer request + channel_id = "C07ABC123" + thread_ts = "1234567890.123456" + source = "s3src:research/dataset1" + dest = "s3dst:archive/dataset1" + + print(f"\nSimulating transfer request:") + print(f" Source: {source}") + print(f" Dest: {dest}") + print(f" Channel: {channel_id}") + print(f" Thread: {thread_ts}") + + # Validate backends + valid, msg = validate_backend(source, config) + print(f"\nSource validation: {valid} - {msg}") + assert valid + + valid, msg = validate_backend(dest, config) + print(f"Dest validation: {valid} - {msg}") + assert valid + + # Generate run directory name + from datetime import datetime + + timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + run_name = f"slack_{channel_id}_{timestamp}" + run_dir = config.runs_base_dir / run_name + run_dir.mkdir(parents=True) + print(f"\nRun directory: {run_dir}") + + # Generate prepare script + comment = slack_comment(channel_id, thread_ts) + script_path = _write_prepare_script( + run_dir=run_dir, + source=source, + dest=dest, + config=config, + num_shards=config.slurm.num_shards, + array_concurrency=config.slurm.array_concurrency, + time_limit=config.slurm.time_limit, + job_name="xfer-slack", + comment=comment, + ) + print(f"Prepare script: {script_path}") + assert script_path.exists() + + # Write request metadata (as submit_transfer would) + request_meta = { + "source": source, + "dest": dest, + "channel_id": channel_id, + "thread_ts": thread_ts, + "prepare_job_id": "SIMULATED_12345", + "num_shards": config.slurm.num_shards, + "submitted_at": datetime.utcnow().isoformat() + "Z", + } + (run_dir / "request.json").write_text(json.dumps(request_meta, indent=2)) + print(f"Request metadata written") + + # Check initial progress (no manifest yet) + progress = get_transfer_progress(run_dir) + print(f"\nInitial progress: phase={progress['phase']}") + assert progress["phase"] == "building_manifest" + + # Simulate manifest complete + (run_dir / "manifest.jsonl").write_text('{"path": "file1.txt", "size": 1000}\n') + progress = get_transfer_progress(run_dir) + print(f"After manifest: phase={progress['phase']}") + assert progress["phase"] == "sharding" + + # Simulate sharding complete + (run_dir / "shards").mkdir() + (run_dir / "shards" / "shards.meta.json").write_text( + json.dumps({"num_shards": 4, "bytes_total": 4000, "num_records": 4}) + ) + progress = get_transfer_progress(run_dir) + print(f"After sharding: phase={progress['phase']}") + assert progress["phase"] == "waiting_to_start" + + # Simulate transfer in progress + (run_dir / "state").mkdir() + (run_dir / "state" / "shard_0.done").touch() + (run_dir / "state" / "shard_1.attempt").write_text("1") + progress = get_transfer_progress(run_dir) + print(f"During transfer: phase={progress['phase']}, completed={progress['completed']}/{progress['total_tasks']}") + assert progress["phase"] == "transferring" + assert progress["completed"] == 1 + + # Simulate complete + (run_dir / "state" / "shard_1.done").touch() + (run_dir / "state" / "shard_2.done").touch() + (run_dir / "state" / "shard_3.done").touch() + progress = get_transfer_progress(run_dir) + print(f"After complete: phase={progress['phase']}, completed={progress['completed']}/{progress['total_tasks']}") + assert progress["phase"] == "complete" + assert progress["completed"] == 4 + + print("\n✓ Full flow simulation passed") + + +def main(): + """Run all dry-run tests.""" + print("=" * 60) + print("xfer-slackbot Dry Run Tests") + print("=" * 60) + + test_slack_comment_format() + test_config_defaults() + test_allowed_backends_from_yaml() + test_allowed_backends_from_rclone_conf() + test_prepare_script_generation() + test_sacct_json_parsing() + test_transfer_progress() + test_claude_agent_tools() + test_full_flow_simulation() + + print("\n" + "=" * 60) + print("All dry-run tests passed! ✓") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/uv.lock b/uv.lock index 9e0f8b4..f711132 100644 --- a/uv.lock +++ b/uv.lock @@ -2,6 +2,48 @@ version = 1 revision = 3 requires-python = ">=3.10" +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anthropic" +version = "0.77.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "docstring-parser" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/88/61/50aef0587acd9dd8bf1b8b7fd7fbb25ba4c6ec5387a6ffc195a697951fcc/anthropic-0.77.1.tar.gz", hash = "sha256:a19d78ff6fff9e05d211e3a936051cd5b9462f0eac043d2d45b2372f455d11cd", size = 504691, upload-time = "2026-02-03T17:44:22.667Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/54/e83babf9833547c5548b4e25230ef3d62492e45925b0d104a43e501918a0/anthropic-0.77.1-py3-none-any.whl", hash = "sha256:76fd6f2ab36033a5294d58182a5f712dab9573c3a54413a275ecdf29e727c1e0", size = 397856, upload-time = "2026-02-03T17:44:20.962Z" }, +] + +[[package]] +name = "anyio" +version = "4.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, +] + [[package]] name = "black" version = "26.1.0" @@ -46,6 +88,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e4/3d/51bdb3ecbfadfaf825ec0c75e1de6077422b4afa2091c6c9ba34fbfc0c2d/black-26.1.0-py3-none-any.whl", hash = "sha256:1054e8e47ebd686e078c0bb0eaf31e6ce69c966058d122f2c0c950311f9f3ede", size = 204010, upload-time = "2026-01-18T04:50:09.978Z" }, ] +[[package]] +name = "certifi" +version = "2026.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, +] + [[package]] name = "cfgv" version = "3.5.0" @@ -85,6 +136,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, +] + +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, +] + +[[package]] +name = "exceptiongroup" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, +] + [[package]] name = "filelock" version = "3.20.3" @@ -94,6 +175,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" }, ] +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + [[package]] name = "identify" version = "2.6.16" @@ -103,6 +221,112 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/58/40fbbcefeda82364720eba5cf2270f98496bdfa19ea75b4cccae79c698e6/identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0", size = 99202, upload-time = "2026-01-12T18:58:56.627Z" }, ] +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "jiter" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/5a/41da76c5ea07bec1b0472b6b2fdb1b651074d504b19374d7e130e0cdfb25/jiter-0.13.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2ffc63785fd6c7977defe49b9824ae6ce2b2e2b77ce539bdaf006c26da06342e", size = 311164, upload-time = "2026-02-02T12:35:17.688Z" }, + { url = "https://files.pythonhosted.org/packages/40/cb/4a1bf994a3e869f0d39d10e11efb471b76d0ad70ecbfb591427a46c880c2/jiter-0.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4a638816427006c1e3f0013eb66d391d7a3acda99a7b0cf091eff4497ccea33a", size = 320296, upload-time = "2026-02-02T12:35:19.828Z" }, + { url = "https://files.pythonhosted.org/packages/09/82/acd71ca9b50ecebadc3979c541cd717cce2fe2bc86236f4fa597565d8f1a/jiter-0.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19928b5d1ce0ff8c1ee1b9bdef3b5bfc19e8304f1b904e436caf30bc15dc6cf5", size = 352742, upload-time = "2026-02-02T12:35:21.258Z" }, + { url = "https://files.pythonhosted.org/packages/71/03/d1fc996f3aecfd42eb70922edecfb6dd26421c874503e241153ad41df94f/jiter-0.13.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:309549b778b949d731a2f0e1594a3f805716be704a73bf3ad9a807eed5eb5721", size = 363145, upload-time = "2026-02-02T12:35:24.653Z" }, + { url = "https://files.pythonhosted.org/packages/f1/61/a30492366378cc7a93088858f8991acd7d959759fe6138c12a4644e58e81/jiter-0.13.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcdabaea26cb04e25df3103ce47f97466627999260290349a88c8136ecae0060", size = 487683, upload-time = "2026-02-02T12:35:26.162Z" }, + { url = "https://files.pythonhosted.org/packages/20/4e/4223cffa9dbbbc96ed821c5aeb6bca510848c72c02086d1ed3f1da3d58a7/jiter-0.13.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3a377af27b236abbf665a69b2bdd680e3b5a0bd2af825cd3b81245279a7606c", size = 373579, upload-time = "2026-02-02T12:35:27.582Z" }, + { url = "https://files.pythonhosted.org/packages/fe/c9/b0489a01329ab07a83812d9ebcffe7820a38163c6d9e7da644f926ff877c/jiter-0.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe49d3ff6db74321f144dff9addd4a5874d3105ac5ba7c5b77fac099cfae31ae", size = 362904, upload-time = "2026-02-02T12:35:28.925Z" }, + { url = "https://files.pythonhosted.org/packages/05/af/53e561352a44afcba9a9bc67ee1d320b05a370aed8df54eafe714c4e454d/jiter-0.13.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2113c17c9a67071b0f820733c0893ed1d467b5fcf4414068169e5c2cabddb1e2", size = 392380, upload-time = "2026-02-02T12:35:30.385Z" }, + { url = "https://files.pythonhosted.org/packages/76/2a/dd805c3afb8ed5b326c5ae49e725d1b1255b9754b1b77dbecdc621b20773/jiter-0.13.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ab1185ca5c8b9491b55ebf6c1e8866b8f68258612899693e24a92c5fdb9455d5", size = 517939, upload-time = "2026-02-02T12:35:31.865Z" }, + { url = "https://files.pythonhosted.org/packages/20/2a/7b67d76f55b8fe14c937e7640389612f05f9a4145fc28ae128aaa5e62257/jiter-0.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9621ca242547edc16400981ca3231e0c91c0c4c1ab8573a596cd9bb3575d5c2b", size = 551696, upload-time = "2026-02-02T12:35:33.306Z" }, + { url = "https://files.pythonhosted.org/packages/85/9c/57cdd64dac8f4c6ab8f994fe0eb04dc9fd1db102856a4458fcf8a99dfa62/jiter-0.13.0-cp310-cp310-win32.whl", hash = "sha256:a7637d92b1c9d7a771e8c56f445c7f84396d48f2e756e5978840ecba2fac0894", size = 204592, upload-time = "2026-02-02T12:35:34.58Z" }, + { url = "https://files.pythonhosted.org/packages/a7/38/f4f3ea5788b8a5bae7510a678cdc747eda0c45ffe534f9878ff37e7cf3b3/jiter-0.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c1b609e5cbd2f52bb74fb721515745b407df26d7b800458bd97cb3b972c29e7d", size = 206016, upload-time = "2026-02-02T12:35:36.435Z" }, + { url = "https://files.pythonhosted.org/packages/71/29/499f8c9eaa8a16751b1c0e45e6f5f1761d180da873d417996cc7bddc8eef/jiter-0.13.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ea026e70a9a28ebbdddcbcf0f1323128a8db66898a06eaad3a4e62d2f554d096", size = 311157, upload-time = "2026-02-02T12:35:37.758Z" }, + { url = "https://files.pythonhosted.org/packages/50/f6/566364c777d2ab450b92100bea11333c64c38d32caf8dc378b48e5b20c46/jiter-0.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:66aa3e663840152d18cc8ff1e4faad3dd181373491b9cfdc6004b92198d67911", size = 319729, upload-time = "2026-02-02T12:35:39.246Z" }, + { url = "https://files.pythonhosted.org/packages/73/dd/560f13ec5e4f116d8ad2658781646cca91b617ae3b8758d4a5076b278f70/jiter-0.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3524798e70655ff19aec58c7d05adb1f074fecff62da857ea9be2b908b6d701", size = 354766, upload-time = "2026-02-02T12:35:40.662Z" }, + { url = "https://files.pythonhosted.org/packages/7c/0d/061faffcfe94608cbc28a0d42a77a74222bdf5055ccdbe5fd2292b94f510/jiter-0.13.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ec7e287d7fbd02cb6e22f9a00dd9c9cd504c40a61f2c61e7e1f9690a82726b4c", size = 362587, upload-time = "2026-02-02T12:35:42.025Z" }, + { url = "https://files.pythonhosted.org/packages/92/c9/c66a7864982fd38a9773ec6e932e0398d1262677b8c60faecd02ffb67bf3/jiter-0.13.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47455245307e4debf2ce6c6e65a717550a0244231240dcf3b8f7d64e4c2f22f4", size = 487537, upload-time = "2026-02-02T12:35:43.459Z" }, + { url = "https://files.pythonhosted.org/packages/6c/86/84eb4352cd3668f16d1a88929b5888a3fe0418ea8c1dfc2ad4e7bf6e069a/jiter-0.13.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ee9da221dca6e0429c2704c1b3655fe7b025204a71d4d9b73390c759d776d165", size = 373717, upload-time = "2026-02-02T12:35:44.928Z" }, + { url = "https://files.pythonhosted.org/packages/6e/09/9fe4c159358176f82d4390407a03f506a8659ed13ca3ac93a843402acecf/jiter-0.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24ab43126d5e05f3d53a36a8e11eb2f23304c6c1117844aaaf9a0aa5e40b5018", size = 362683, upload-time = "2026-02-02T12:35:46.636Z" }, + { url = "https://files.pythonhosted.org/packages/c9/5e/85f3ab9caca0c1d0897937d378b4a515cae9e119730563572361ea0c48ae/jiter-0.13.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9da38b4fedde4fb528c740c2564628fbab737166a0e73d6d46cb4bb5463ff411", size = 392345, upload-time = "2026-02-02T12:35:48.088Z" }, + { url = "https://files.pythonhosted.org/packages/12/4c/05b8629ad546191939e6f0c2f17e29f542a398f4a52fb987bc70b6d1eb8b/jiter-0.13.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0b34c519e17658ed88d5047999a93547f8889f3c1824120c26ad6be5f27b6cf5", size = 517775, upload-time = "2026-02-02T12:35:49.482Z" }, + { url = "https://files.pythonhosted.org/packages/4d/88/367ea2eb6bc582c7052e4baf5ddf57ebe5ab924a88e0e09830dfb585c02d/jiter-0.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d2a6394e6af690d462310a86b53c47ad75ac8c21dc79f120714ea449979cb1d3", size = 551325, upload-time = "2026-02-02T12:35:51.104Z" }, + { url = "https://files.pythonhosted.org/packages/f3/12/fa377ffb94a2f28c41afaed093e0d70cfe512035d5ecb0cad0ae4792d35e/jiter-0.13.0-cp311-cp311-win32.whl", hash = "sha256:0f0c065695f616a27c920a56ad0d4fc46415ef8b806bf8fc1cacf25002bd24e1", size = 204709, upload-time = "2026-02-02T12:35:52.467Z" }, + { url = "https://files.pythonhosted.org/packages/cb/16/8e8203ce92f844dfcd3d9d6a5a7322c77077248dbb12da52d23193a839cd/jiter-0.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:0733312953b909688ae3c2d58d043aa040f9f1a6a75693defed7bc2cc4bf2654", size = 204560, upload-time = "2026-02-02T12:35:53.925Z" }, + { url = "https://files.pythonhosted.org/packages/44/26/97cc40663deb17b9e13c3a5cf29251788c271b18ee4d262c8f94798b8336/jiter-0.13.0-cp311-cp311-win_arm64.whl", hash = "sha256:5d9b34ad56761b3bf0fbe8f7e55468704107608512350962d3317ffd7a4382d5", size = 189608, upload-time = "2026-02-02T12:35:55.304Z" }, + { url = "https://files.pythonhosted.org/packages/2e/30/7687e4f87086829955013ca12a9233523349767f69653ebc27036313def9/jiter-0.13.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0a2bd69fc1d902e89925fc34d1da51b2128019423d7b339a45d9e99c894e0663", size = 307958, upload-time = "2026-02-02T12:35:57.165Z" }, + { url = "https://files.pythonhosted.org/packages/c3/27/e57f9a783246ed95481e6749cc5002a8a767a73177a83c63ea71f0528b90/jiter-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f917a04240ef31898182f76a332f508f2cc4b57d2b4d7ad2dbfebbfe167eb505", size = 318597, upload-time = "2026-02-02T12:35:58.591Z" }, + { url = "https://files.pythonhosted.org/packages/cf/52/e5719a60ac5d4d7c5995461a94ad5ef962a37c8bf5b088390e6fad59b2ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1e2b199f446d3e82246b4fd9236d7cb502dc2222b18698ba0d986d2fecc6152", size = 348821, upload-time = "2026-02-02T12:36:00.093Z" }, + { url = "https://files.pythonhosted.org/packages/61/db/c1efc32b8ba4c740ab3fc2d037d8753f67685f475e26b9d6536a4322bcdd/jiter-0.13.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04670992b576fa65bd056dbac0c39fe8bd67681c380cb2b48efa885711d9d726", size = 364163, upload-time = "2026-02-02T12:36:01.937Z" }, + { url = "https://files.pythonhosted.org/packages/55/8a/fb75556236047c8806995671a18e4a0ad646ed255276f51a20f32dceaeec/jiter-0.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a1aff1fbdb803a376d4d22a8f63f8e7ccbce0b4890c26cc7af9e501ab339ef0", size = 483709, upload-time = "2026-02-02T12:36:03.41Z" }, + { url = "https://files.pythonhosted.org/packages/7e/16/43512e6ee863875693a8e6f6d532e19d650779d6ba9a81593ae40a9088ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b3fb8c2053acaef8580809ac1d1f7481a0a0bdc012fd7f5d8b18fb696a5a089", size = 370480, upload-time = "2026-02-02T12:36:04.791Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4c/09b93e30e984a187bc8aaa3510e1ec8dcbdcd71ca05d2f56aac0492453aa/jiter-0.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdaba7d87e66f26a2c45d8cbadcbfc4bf7884182317907baf39cfe9775bb4d93", size = 360735, upload-time = "2026-02-02T12:36:06.994Z" }, + { url = "https://files.pythonhosted.org/packages/1a/1b/46c5e349019874ec5dfa508c14c37e29864ea108d376ae26d90bee238cd7/jiter-0.13.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7b88d649135aca526da172e48083da915ec086b54e8e73a425ba50999468cc08", size = 391814, upload-time = "2026-02-02T12:36:08.368Z" }, + { url = "https://files.pythonhosted.org/packages/15/9e/26184760e85baee7162ad37b7912797d2077718476bf91517641c92b3639/jiter-0.13.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e404ea551d35438013c64b4f357b0474c7abf9f781c06d44fcaf7a14c69ff9e2", size = 513990, upload-time = "2026-02-02T12:36:09.993Z" }, + { url = "https://files.pythonhosted.org/packages/e9/34/2c9355247d6debad57a0a15e76ab1566ab799388042743656e566b3b7de1/jiter-0.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1f4748aad1b4a93c8bdd70f604d0f748cdc0e8744c5547798acfa52f10e79228", size = 548021, upload-time = "2026-02-02T12:36:11.376Z" }, + { url = "https://files.pythonhosted.org/packages/ac/4a/9f2c23255d04a834398b9c2e0e665382116911dc4d06b795710503cdad25/jiter-0.13.0-cp312-cp312-win32.whl", hash = "sha256:0bf670e3b1445fc4d31612199f1744f67f889ee1bbae703c4b54dc097e5dd394", size = 203024, upload-time = "2026-02-02T12:36:12.682Z" }, + { url = "https://files.pythonhosted.org/packages/09/ee/f0ae675a957ae5a8f160be3e87acea6b11dc7b89f6b7ab057e77b2d2b13a/jiter-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:15db60e121e11fe186c0b15236bd5d18381b9ddacdcf4e659feb96fc6c969c92", size = 205424, upload-time = "2026-02-02T12:36:13.93Z" }, + { url = "https://files.pythonhosted.org/packages/1b/02/ae611edf913d3cbf02c97cdb90374af2082c48d7190d74c1111dde08bcdd/jiter-0.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:41f92313d17989102f3cb5dd533a02787cdb99454d494344b0361355da52fcb9", size = 186818, upload-time = "2026-02-02T12:36:15.308Z" }, + { url = "https://files.pythonhosted.org/packages/91/9c/7ee5a6ff4b9991e1a45263bfc46731634c4a2bde27dfda6c8251df2d958c/jiter-0.13.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1f8a55b848cbabf97d861495cd65f1e5c590246fabca8b48e1747c4dfc8f85bf", size = 306897, upload-time = "2026-02-02T12:36:16.748Z" }, + { url = "https://files.pythonhosted.org/packages/7c/02/be5b870d1d2be5dd6a91bdfb90f248fbb7dcbd21338f092c6b89817c3dbf/jiter-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f556aa591c00f2c45eb1b89f68f52441a016034d18b65da60e2d2875bbbf344a", size = 317507, upload-time = "2026-02-02T12:36:18.351Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/b25d2ec333615f5f284f3a4024f7ce68cfa0604c322c6808b2344c7f5d2b/jiter-0.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7e1d61da332ec412350463891923f960c3073cf1aae93b538f0bb4c8cd46efb", size = 350560, upload-time = "2026-02-02T12:36:19.746Z" }, + { url = "https://files.pythonhosted.org/packages/be/ec/74dcb99fef0aca9fbe56b303bf79f6bd839010cb18ad41000bf6cc71eec0/jiter-0.13.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3097d665a27bc96fd9bbf7f86178037db139f319f785e4757ce7ccbf390db6c2", size = 363232, upload-time = "2026-02-02T12:36:21.243Z" }, + { url = "https://files.pythonhosted.org/packages/1b/37/f17375e0bb2f6a812d4dd92d7616e41917f740f3e71343627da9db2824ce/jiter-0.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d01ecc3a8cbdb6f25a37bd500510550b64ddf9f7d64a107d92f3ccb25035d0f", size = 483727, upload-time = "2026-02-02T12:36:22.688Z" }, + { url = "https://files.pythonhosted.org/packages/77/d2/a71160a5ae1a1e66c1395b37ef77da67513b0adba73b993a27fbe47eb048/jiter-0.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed9bbc30f5d60a3bdf63ae76beb3f9db280d7f195dfcfa61af792d6ce912d159", size = 370799, upload-time = "2026-02-02T12:36:24.106Z" }, + { url = "https://files.pythonhosted.org/packages/01/99/ed5e478ff0eb4e8aa5fd998f9d69603c9fd3f32de3bd16c2b1194f68361c/jiter-0.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98fbafb6e88256f4454de33c1f40203d09fc33ed19162a68b3b257b29ca7f663", size = 359120, upload-time = "2026-02-02T12:36:25.519Z" }, + { url = "https://files.pythonhosted.org/packages/16/be/7ffd08203277a813f732ba897352797fa9493faf8dc7995b31f3d9cb9488/jiter-0.13.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5467696f6b827f1116556cb0db620440380434591e93ecee7fd14d1a491b6daa", size = 390664, upload-time = "2026-02-02T12:36:26.866Z" }, + { url = "https://files.pythonhosted.org/packages/d1/84/e0787856196d6d346264d6dcccb01f741e5f0bd014c1d9a2ebe149caf4f3/jiter-0.13.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2d08c9475d48b92892583df9da592a0e2ac49bcd41fae1fec4f39ba6cf107820", size = 513543, upload-time = "2026-02-02T12:36:28.217Z" }, + { url = "https://files.pythonhosted.org/packages/65/50/ecbd258181c4313cf79bca6c88fb63207d04d5bf5e4f65174114d072aa55/jiter-0.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:aed40e099404721d7fcaf5b89bd3b4568a4666358bcac7b6b15c09fb6252ab68", size = 547262, upload-time = "2026-02-02T12:36:29.678Z" }, + { url = "https://files.pythonhosted.org/packages/27/da/68f38d12e7111d2016cd198161b36e1f042bd115c169255bcb7ec823a3bf/jiter-0.13.0-cp313-cp313-win32.whl", hash = "sha256:36ebfbcffafb146d0e6ffb3e74d51e03d9c35ce7c625c8066cdbfc7b953bdc72", size = 200630, upload-time = "2026-02-02T12:36:31.808Z" }, + { url = "https://files.pythonhosted.org/packages/25/65/3bd1a972c9a08ecd22eb3b08a95d1941ebe6938aea620c246cf426ae09c2/jiter-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:8d76029f077379374cf0dbc78dbe45b38dec4a2eb78b08b5194ce836b2517afc", size = 202602, upload-time = "2026-02-02T12:36:33.679Z" }, + { url = "https://files.pythonhosted.org/packages/15/fe/13bd3678a311aa67686bb303654792c48206a112068f8b0b21426eb6851e/jiter-0.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:bb7613e1a427cfcb6ea4544f9ac566b93d5bf67e0d48c787eca673ff9c9dff2b", size = 185939, upload-time = "2026-02-02T12:36:35.065Z" }, + { url = "https://files.pythonhosted.org/packages/49/19/a929ec002ad3228bc97ca01dbb14f7632fffdc84a95ec92ceaf4145688ae/jiter-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fa476ab5dd49f3bf3a168e05f89358c75a17608dbabb080ef65f96b27c19ab10", size = 316616, upload-time = "2026-02-02T12:36:36.579Z" }, + { url = "https://files.pythonhosted.org/packages/52/56/d19a9a194afa37c1728831e5fb81b7722c3de18a3109e8f282bfc23e587a/jiter-0.13.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade8cb6ff5632a62b7dbd4757d8c5573f7a2e9ae285d6b5b841707d8363205ef", size = 346850, upload-time = "2026-02-02T12:36:38.058Z" }, + { url = "https://files.pythonhosted.org/packages/36/4a/94e831c6bf287754a8a019cb966ed39ff8be6ab78cadecf08df3bb02d505/jiter-0.13.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9950290340acc1adaded363edd94baebcee7dabdfa8bee4790794cd5cfad2af6", size = 358551, upload-time = "2026-02-02T12:36:39.417Z" }, + { url = "https://files.pythonhosted.org/packages/a2/ec/a4c72c822695fa80e55d2b4142b73f0012035d9fcf90eccc56bc060db37c/jiter-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2b4972c6df33731aac0742b64fd0d18e0a69bc7d6e03108ce7d40c85fd9e3e6d", size = 201950, upload-time = "2026-02-02T12:36:40.791Z" }, + { url = "https://files.pythonhosted.org/packages/b6/00/393553ec27b824fbc29047e9c7cd4a3951d7fbe4a76743f17e44034fa4e4/jiter-0.13.0-cp313-cp313t-win_arm64.whl", hash = "sha256:701a1e77d1e593c1b435315ff625fd071f0998c5f02792038a5ca98899261b7d", size = 185852, upload-time = "2026-02-02T12:36:42.077Z" }, + { url = "https://files.pythonhosted.org/packages/6e/f5/f1997e987211f6f9bd71b8083047b316208b4aca0b529bb5f8c96c89ef3e/jiter-0.13.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:cc5223ab19fe25e2f0bf2643204ad7318896fe3729bf12fde41b77bfc4fafff0", size = 308804, upload-time = "2026-02-02T12:36:43.496Z" }, + { url = "https://files.pythonhosted.org/packages/cd/8f/5482a7677731fd44881f0204981ce2d7175db271f82cba2085dd2212e095/jiter-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9776ebe51713acf438fd9b4405fcd86893ae5d03487546dae7f34993217f8a91", size = 318787, upload-time = "2026-02-02T12:36:45.071Z" }, + { url = "https://files.pythonhosted.org/packages/f3/b9/7257ac59778f1cd025b26a23c5520a36a424f7f1b068f2442a5b499b7464/jiter-0.13.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879e768938e7b49b5e90b7e3fecc0dbec01b8cb89595861fb39a8967c5220d09", size = 353880, upload-time = "2026-02-02T12:36:47.365Z" }, + { url = "https://files.pythonhosted.org/packages/c3/87/719eec4a3f0841dad99e3d3604ee4cba36af4419a76f3cb0b8e2e691ad67/jiter-0.13.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:682161a67adea11e3aae9038c06c8b4a9a71023228767477d683f69903ebc607", size = 366702, upload-time = "2026-02-02T12:36:48.871Z" }, + { url = "https://files.pythonhosted.org/packages/d2/65/415f0a75cf6921e43365a1bc227c565cb949caca8b7532776e430cbaa530/jiter-0.13.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a13b68cd1cd8cc9de8f244ebae18ccb3e4067ad205220ef324c39181e23bbf66", size = 486319, upload-time = "2026-02-02T12:36:53.006Z" }, + { url = "https://files.pythonhosted.org/packages/54/a2/9e12b48e82c6bbc6081fd81abf915e1443add1b13d8fc586e1d90bb02bb8/jiter-0.13.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87ce0f14c6c08892b610686ae8be350bf368467b6acd5085a5b65441e2bf36d2", size = 372289, upload-time = "2026-02-02T12:36:54.593Z" }, + { url = "https://files.pythonhosted.org/packages/4e/c1/e4693f107a1789a239c759a432e9afc592366f04e901470c2af89cfd28e1/jiter-0.13.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c365005b05505a90d1c47856420980d0237adf82f70c4aff7aebd3c1cc143ad", size = 360165, upload-time = "2026-02-02T12:36:56.112Z" }, + { url = "https://files.pythonhosted.org/packages/17/08/91b9ea976c1c758240614bd88442681a87672eebc3d9a6dde476874e706b/jiter-0.13.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1317fdffd16f5873e46ce27d0e0f7f4f90f0cdf1d86bf6abeaea9f63ca2c401d", size = 389634, upload-time = "2026-02-02T12:36:57.495Z" }, + { url = "https://files.pythonhosted.org/packages/18/23/58325ef99390d6d40427ed6005bf1ad54f2577866594bcf13ce55675f87d/jiter-0.13.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:c05b450d37ba0c9e21c77fef1f205f56bcee2330bddca68d344baebfc55ae0df", size = 514933, upload-time = "2026-02-02T12:36:58.909Z" }, + { url = "https://files.pythonhosted.org/packages/5b/25/69f1120c7c395fd276c3996bb8adefa9c6b84c12bb7111e5c6ccdcd8526d/jiter-0.13.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:775e10de3849d0631a97c603f996f518159272db00fdda0a780f81752255ee9d", size = 548842, upload-time = "2026-02-02T12:37:00.433Z" }, + { url = "https://files.pythonhosted.org/packages/18/05/981c9669d86850c5fbb0d9e62bba144787f9fba84546ba43d624ee27ef29/jiter-0.13.0-cp314-cp314-win32.whl", hash = "sha256:632bf7c1d28421c00dd8bbb8a3bac5663e1f57d5cd5ed962bce3c73bf62608e6", size = 202108, upload-time = "2026-02-02T12:37:01.718Z" }, + { url = "https://files.pythonhosted.org/packages/8d/96/cdcf54dd0b0341db7d25413229888a346c7130bd20820530905fdb65727b/jiter-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:f22ef501c3f87ede88f23f9b11e608581c14f04db59b6a801f354397ae13739f", size = 204027, upload-time = "2026-02-02T12:37:03.075Z" }, + { url = "https://files.pythonhosted.org/packages/fb/f9/724bcaaab7a3cd727031fe4f6995cb86c4bd344909177c186699c8dec51a/jiter-0.13.0-cp314-cp314-win_arm64.whl", hash = "sha256:07b75fe09a4ee8e0c606200622e571e44943f47254f95e2436c8bdcaceb36d7d", size = 187199, upload-time = "2026-02-02T12:37:04.414Z" }, + { url = "https://files.pythonhosted.org/packages/62/92/1661d8b9fd6a3d7a2d89831db26fe3c1509a287d83ad7838831c7b7a5c7e/jiter-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:964538479359059a35fb400e769295d4b315ae61e4105396d355a12f7fef09f0", size = 318423, upload-time = "2026-02-02T12:37:05.806Z" }, + { url = "https://files.pythonhosted.org/packages/4f/3b/f77d342a54d4ebcd128e520fc58ec2f5b30a423b0fd26acdfc0c6fef8e26/jiter-0.13.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e104da1db1c0991b3eaed391ccd650ae8d947eab1480c733e5a3fb28d4313e40", size = 351438, upload-time = "2026-02-02T12:37:07.189Z" }, + { url = "https://files.pythonhosted.org/packages/76/b3/ba9a69f0e4209bd3331470c723c2f5509e6f0482e416b612431a5061ed71/jiter-0.13.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e3a5f0cde8ff433b8e88e41aa40131455420fb3649a3c7abdda6145f8cb7202", size = 364774, upload-time = "2026-02-02T12:37:08.579Z" }, + { url = "https://files.pythonhosted.org/packages/b3/16/6cdb31fa342932602458dbb631bfbd47f601e03d2e4950740e0b2100b570/jiter-0.13.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57aab48f40be1db920a582b30b116fe2435d184f77f0e4226f546794cedd9cf0", size = 487238, upload-time = "2026-02-02T12:37:10.066Z" }, + { url = "https://files.pythonhosted.org/packages/ed/b1/956cc7abaca8d95c13aa8d6c9b3f3797241c246cd6e792934cc4c8b250d2/jiter-0.13.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7772115877c53f62beeb8fd853cab692dbc04374ef623b30f997959a4c0e7e95", size = 372892, upload-time = "2026-02-02T12:37:11.656Z" }, + { url = "https://files.pythonhosted.org/packages/26/c4/97ecde8b1e74f67b8598c57c6fccf6df86ea7861ed29da84629cdbba76c4/jiter-0.13.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1211427574b17b633cfceba5040de8081e5abf114f7a7602f73d2e16f9fdaa59", size = 360309, upload-time = "2026-02-02T12:37:13.244Z" }, + { url = "https://files.pythonhosted.org/packages/4b/d7/eabe3cf46715854ccc80be2cd78dd4c36aedeb30751dbf85a1d08c14373c/jiter-0.13.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7beae3a3d3b5212d3a55d2961db3c292e02e302feb43fce6a3f7a31b90ea6dfe", size = 389607, upload-time = "2026-02-02T12:37:14.881Z" }, + { url = "https://files.pythonhosted.org/packages/df/2d/03963fc0804e6109b82decfb9974eb92df3797fe7222428cae12f8ccaa0c/jiter-0.13.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e5562a0f0e90a6223b704163ea28e831bd3a9faa3512a711f031611e6b06c939", size = 514986, upload-time = "2026-02-02T12:37:16.326Z" }, + { url = "https://files.pythonhosted.org/packages/f6/6c/8c83b45eb3eb1c1e18d841fe30b4b5bc5619d781267ca9bc03e005d8fd0a/jiter-0.13.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:6c26a424569a59140fb51160a56df13f438a2b0967365e987889186d5fc2f6f9", size = 548756, upload-time = "2026-02-02T12:37:17.736Z" }, + { url = "https://files.pythonhosted.org/packages/47/66/eea81dfff765ed66c68fd2ed8c96245109e13c896c2a5015c7839c92367e/jiter-0.13.0-cp314-cp314t-win32.whl", hash = "sha256:24dc96eca9f84da4131cdf87a95e6ce36765c3b156fc9ae33280873b1c32d5f6", size = 201196, upload-time = "2026-02-02T12:37:19.101Z" }, + { url = "https://files.pythonhosted.org/packages/ff/32/4ac9c7a76402f8f00d00842a7f6b83b284d0cf7c1e9d4227bc95aa6d17fa/jiter-0.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0a8d76c7524087272c8ae913f5d9d608bd839154b62c4322ef65723d2e5bb0b8", size = 204215, upload-time = "2026-02-02T12:37:20.495Z" }, + { url = "https://files.pythonhosted.org/packages/f9/8e/7def204fea9f9be8b3c21a6f2dd6c020cf56c7d5ff753e0e23ed7f9ea57e/jiter-0.13.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2c26cf47e2cad140fa23b6d58d435a7c0161f5c514284802f25e87fddfe11024", size = 187152, upload-time = "2026-02-02T12:37:22.124Z" }, + { url = "https://files.pythonhosted.org/packages/79/b3/3c29819a27178d0e461a8571fb63c6ae38be6dc36b78b3ec2876bbd6a910/jiter-0.13.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b1cbfa133241d0e6bdab48dcdc2604e8ba81512f6bbd68ec3e8e1357dd3c316c", size = 307016, upload-time = "2026-02-02T12:37:42.755Z" }, + { url = "https://files.pythonhosted.org/packages/eb/ae/60993e4b07b1ac5ebe46da7aa99fdbb802eb986c38d26e3883ac0125c4e0/jiter-0.13.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:db367d8be9fad6e8ebbac4a7578b7af562e506211036cba2c06c3b998603c3d2", size = 305024, upload-time = "2026-02-02T12:37:44.774Z" }, + { url = "https://files.pythonhosted.org/packages/77/fa/2227e590e9cf98803db2811f172b2d6460a21539ab73006f251c66f44b14/jiter-0.13.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45f6f8efb2f3b0603092401dc2df79fa89ccbc027aaba4174d2d4133ed661434", size = 339337, upload-time = "2026-02-02T12:37:46.668Z" }, + { url = "https://files.pythonhosted.org/packages/2d/92/015173281f7eb96c0ef580c997da8ef50870d4f7f4c9e03c845a1d62ae04/jiter-0.13.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:597245258e6ad085d064780abfb23a284d418d3e61c57362d9449c6c7317ee2d", size = 346395, upload-time = "2026-02-02T12:37:48.09Z" }, + { url = "https://files.pythonhosted.org/packages/80/60/e50fa45dd7e2eae049f0ce964663849e897300433921198aef94b6ffa23a/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:3d744a6061afba08dd7ae375dcde870cffb14429b7477e10f67e9e6d68772a0a", size = 305169, upload-time = "2026-02-02T12:37:50.376Z" }, + { url = "https://files.pythonhosted.org/packages/d2/73/a009f41c5eed71c49bec53036c4b33555afcdee70682a18c6f66e396c039/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:ff732bd0a0e778f43d5009840f20b935e79087b4dc65bd36f1cd0f9b04b8ff7f", size = 303808, upload-time = "2026-02-02T12:37:52.092Z" }, + { url = "https://files.pythonhosted.org/packages/c4/10/528b439290763bff3d939268085d03382471b442f212dca4ff5f12802d43/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab44b178f7981fcaea7e0a5df20e773c663d06ffda0198f1a524e91b2fde7e59", size = 337384, upload-time = "2026-02-02T12:37:53.582Z" }, + { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" }, +] + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -185,6 +409,139 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" }, ] +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/90/32c9941e728d564b411d574d8ee0cf09b12ec978cb22b294995bae5549a5/pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146", size = 2107298, upload-time = "2025-11-04T13:39:04.116Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a8/61c96a77fe28993d9a6fb0f4127e05430a267b235a124545d79fea46dd65/pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2", size = 1901475, upload-time = "2025-11-04T13:39:06.055Z" }, + { url = "https://files.pythonhosted.org/packages/5d/b6/338abf60225acc18cdc08b4faef592d0310923d19a87fba1faf05af5346e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97", size = 1918815, upload-time = "2025-11-04T13:39:10.41Z" }, + { url = "https://files.pythonhosted.org/packages/d1/1c/2ed0433e682983d8e8cba9c8d8ef274d4791ec6a6f24c58935b90e780e0a/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9", size = 2065567, upload-time = "2025-11-04T13:39:12.244Z" }, + { url = "https://files.pythonhosted.org/packages/b3/24/cf84974ee7d6eae06b9e63289b7b8f6549d416b5c199ca2d7ce13bbcf619/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52", size = 2230442, upload-time = "2025-11-04T13:39:13.962Z" }, + { url = "https://files.pythonhosted.org/packages/fd/21/4e287865504b3edc0136c89c9c09431be326168b1eb7841911cbc877a995/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941", size = 2350956, upload-time = "2025-11-04T13:39:15.889Z" }, + { url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a", size = 2068253, upload-time = "2025-11-04T13:39:17.403Z" }, + { url = "https://files.pythonhosted.org/packages/d5/8c/a4abfc79604bcb4c748e18975c44f94f756f08fb04218d5cb87eb0d3a63e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c", size = 2177050, upload-time = "2025-11-04T13:39:19.351Z" }, + { url = "https://files.pythonhosted.org/packages/67/b1/de2e9a9a79b480f9cb0b6e8b6ba4c50b18d4e89852426364c66aa82bb7b3/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2", size = 2147178, upload-time = "2025-11-04T13:39:21Z" }, + { url = "https://files.pythonhosted.org/packages/16/c1/dfb33f837a47b20417500efaa0378adc6635b3c79e8369ff7a03c494b4ac/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556", size = 2341833, upload-time = "2025-11-04T13:39:22.606Z" }, + { url = "https://files.pythonhosted.org/packages/47/36/00f398642a0f4b815a9a558c4f1dca1b4020a7d49562807d7bc9ff279a6c/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49", size = 2321156, upload-time = "2025-11-04T13:39:25.843Z" }, + { url = "https://files.pythonhosted.org/packages/7e/70/cad3acd89fde2010807354d978725ae111ddf6d0ea46d1ea1775b5c1bd0c/pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba", size = 1989378, upload-time = "2025-11-04T13:39:27.92Z" }, + { url = "https://files.pythonhosted.org/packages/76/92/d338652464c6c367e5608e4488201702cd1cbb0f33f7b6a85a60fe5f3720/pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9", size = 2013622, upload-time = "2025-11-04T13:39:29.848Z" }, + { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" }, + { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" }, + { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" }, + { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" }, + { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" }, + { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" }, + { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" }, + { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" }, + { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" }, + { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" }, + { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" }, + { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" }, + { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" }, + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" }, + { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" }, + { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" }, + { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, + { url = "https://files.pythonhosted.org/packages/e6/b0/1a2aa41e3b5a4ba11420aba2d091b2d17959c8d1519ece3627c371951e73/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8", size = 2103351, upload-time = "2025-11-04T13:43:02.058Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ee/31b1f0020baaf6d091c87900ae05c6aeae101fa4e188e1613c80e4f1ea31/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a", size = 1925363, upload-time = "2025-11-04T13:43:05.159Z" }, + { url = "https://files.pythonhosted.org/packages/e1/89/ab8e86208467e467a80deaca4e434adac37b10a9d134cd2f99b28a01e483/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b", size = 2135615, upload-time = "2025-11-04T13:43:08.116Z" }, + { url = "https://files.pythonhosted.org/packages/99/0a/99a53d06dd0348b2008f2f30884b34719c323f16c3be4e6cc1203b74a91d/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2", size = 2175369, upload-time = "2025-11-04T13:43:12.49Z" }, + { url = "https://files.pythonhosted.org/packages/6d/94/30ca3b73c6d485b9bb0bc66e611cff4a7138ff9736b7e66bcf0852151636/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093", size = 2144218, upload-time = "2025-11-04T13:43:15.431Z" }, + { url = "https://files.pythonhosted.org/packages/87/57/31b4f8e12680b739a91f472b5671294236b82586889ef764b5fbc6669238/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a", size = 2329951, upload-time = "2025-11-04T13:43:18.062Z" }, + { url = "https://files.pythonhosted.org/packages/7d/73/3c2c8edef77b8f7310e6fb012dbc4b8551386ed575b9eb6fb2506e28a7eb/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963", size = 2318428, upload-time = "2025-11-04T13:43:20.679Z" }, + { url = "https://files.pythonhosted.org/packages/2f/02/8559b1f26ee0d502c74f9cca5c0d2fd97e967e083e006bbbb4e97f3a043a/pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a", size = 2147009, upload-time = "2025-11-04T13:43:23.286Z" }, + { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" }, + { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" }, + { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" }, + { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" }, + { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" }, + { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -319,6 +676,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] +[[package]] +name = "slack-bolt" +version = "1.27.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "slack-sdk" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4c/28/50ed0b86e48b48e6ddcc71de93b91c8ac14a55d1249e4bff0586494a2f90/slack_bolt-1.27.0.tar.gz", hash = "sha256:3db91d64e277e176a565c574ae82748aa8554f19e41a4fceadca4d65374ce1e0", size = 129101, upload-time = "2025-11-13T20:17:46.878Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/a8/1acb355759747ba4da5f45c1a33d641994b9e04b914908c9434f18bd97e8/slack_bolt-1.27.0-py2.py3-none-any.whl", hash = "sha256:c43c94bf34740f2adeb9b55566c83f1e73fed6ba2878bd346cdfd6fd8ad22360", size = 230428, upload-time = "2025-11-13T20:17:45.465Z" }, +] + +[[package]] +name = "slack-sdk" +version = "3.39.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/dd/645f3eb93fce38eadbb649e85684730b1fc3906c2674ca59bddc2ca2bd2e/slack_sdk-3.39.0.tar.gz", hash = "sha256:6a56be10dc155c436ff658c6b776e1c082e29eae6a771fccf8b0a235822bbcb1", size = 247207, upload-time = "2025-11-20T15:27:57.556Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/1f/32bcf088e535c1870b1a1f2e3b916129c66fdfe565a793316317241d41e5/slack_sdk-3.39.0-py2.py3-none-any.whl", hash = "sha256:b1556b2f5b8b12b94e5ea3f56c4f2c7f04462e4e1013d325c5764ff118044fa8", size = 309850, upload-time = "2025-11-20T15:27:55.729Z" }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + [[package]] name = "tomli" version = "2.4.0" @@ -397,6 +784,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + [[package]] name = "virtualenv" version = "20.36.1" @@ -421,6 +820,13 @@ dependencies = [ { name = "typer" }, ] +[package.optional-dependencies] +slackbot = [ + { name = "anthropic" }, + { name = "pyyaml" }, + { name = "slack-bolt" }, +] + [package.dev-dependencies] dev = [ { name = "black" }, @@ -429,9 +835,13 @@ dev = [ [package.metadata] requires-dist = [ + { name = "anthropic", marker = "extra == 'slackbot'", specifier = ">=0.39.0" }, + { name = "pyyaml", marker = "extra == 'slackbot'", specifier = ">=6.0" }, { name = "rich", specifier = ">=13.7.1" }, + { name = "slack-bolt", marker = "extra == 'slackbot'", specifier = ">=1.18.0" }, { name = "typer", specifier = ">=0.12.3" }, ] +provides-extras = ["slackbot"] [package.metadata.requires-dev] dev = [ From 612e4767808b43fa1c2ea9894282dd6b72e656b4 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 17:09:53 -0500 Subject: [PATCH 02/25] Add intelligent rclone flag selection and manifest stats - Add automatic file size analysis to select optimal rclone flags based on file distribution (small files, large files, or mixed workloads) - Add default --stats 600s --progress flags for ETA tracking in logs - Add user-specified rclone flags support (appended to intelligent defaults) - Add get_manifest_stats tool for previewing data before transfer - Add xfer manifest analyze CLI command - Update documentation with new capabilities Co-Authored-By: Claude Opus 4.5 --- README.md | 65 ++++++++-- docs/slack-app-setup.md | 38 ++++++ src/xfer/cli.py | 91 ++++++++++++++ src/xfer/est.py | 193 ++++++++++++++++++++++++++++ src/xfer/slackbot/claude_agent.py | 55 ++++++++ src/xfer/slackbot/config.py | 3 +- src/xfer/slackbot/slurm_tools.py | 201 +++++++++++++++++++++++++++++- 7 files changed, 634 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 654d766..8baefc6 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,35 @@ run/ --- -### 2. Shard the manifest +### 2. Analyze file size distribution (optional) + +Analyze the manifest to see file size statistics and get suggested rclone flags: + +```bash +uv run xfer manifest analyze \ + --in run/manifest.jsonl \ + --out run/analysis.json +``` + +Output (`analysis.json`): +```json +{ + "status": "ok", + "total_files": 125000, + "total_bytes": 5368709120, + "total_bytes_human": "5.00 GiB", + "profile": "small_files", + "profile_explanation": "Optimized for small files (82% < 1MB, median 256 KiB)", + "suggested_flags": "--transfers 64 --checkers 128 --fast-list --retries 10 ...", + "histogram": [...] +} +``` + +The Slack bot runs this analysis automatically on every transfer to select optimal flags. + +--- + +### 3. Shard the manifest Splits the manifest into balanced shards (by total bytes): @@ -174,7 +202,7 @@ run/ --- -### 3. Render Slurm scripts +### 4. Render Slurm scripts Creates: @@ -199,7 +227,7 @@ uv run xfer slurm render \ --- -### 4. Submit the job +### 5. Submit the job ```bash uv run xfer slurm submit --run-dir run @@ -264,7 +292,21 @@ sbatch run/sbatch_array.sh ## Recommended rclone flags (starting points) -### High-throughput S3↔S3 +### Intelligent Auto-Selection (Slack Bot) + +When using the Slack bot, rclone flags are **automatically selected** based on file size distribution analysis: + +| Profile | Condition | Flags | +|---------|-----------|-------| +| **Small files** | >70% files < 1MB | `--transfers 64 --checkers 128 --fast-list` | +| **Large files** | >50% files > 100MB | `--transfers 16 --checkers 32 --buffer-size 256M` | +| **Mixed** | Default | `--transfers 32 --checkers 64 --fast-list` | + +All profiles include `--retries 10 --low-level-retries 20 --stats 600s --progress` for reliability and logging. + +### Manual Flag Selection (CLI) + +#### High-throughput S3↔S3 ```text --transfers 32 @@ -275,15 +317,23 @@ sbatch run/sbatch_array.sh --stats 30s ``` -### Small objects (metadata heavy) +#### Small objects (metadata heavy) ```text ---transfers 16 +--transfers 64 --checkers 128 --fast-list ``` -### Track progress +#### Large objects + +```text +--transfers 16 +--checkers 32 +--buffer-size 256M +``` + +#### Track progress (always recommended) ```text --progress --stats 600s @@ -296,6 +346,7 @@ sbatch run/sbatch_array.sh ``` run/ manifest.jsonl + analysis.json # File size analysis and suggested flags shards/ shard_000123.jsonl shards.meta.json diff --git a/docs/slack-app-setup.md b/docs/slack-app-setup.md index 53d807f..516ceb5 100644 --- a/docs/slack-app-setup.md +++ b/docs/slack-app-setup.md @@ -140,6 +140,44 @@ You should see: 2. Mention the bot: `@xfer-bot what backends are available?` 3. Try a transfer request: `@xfer-bot transfer data from s3src:bucket/data to s3dst:archive/data` +## Bot Capabilities + +### Check data size before transferring + +Ask the bot to scan a source and report statistics: + +``` +@xfer-bot how much data is in s3src:research/experiment1? +``` + +The bot will return: +- Total file count and size +- File size distribution histogram +- Suggested rclone flags based on file sizes +- Estimated transfer times + +### Custom rclone flags + +Specify custom flags that are appended to the intelligent defaults: + +``` +@xfer-bot transfer s3src:data to s3dst:archive with --bwlimit 100M --checksum +``` + +Common options: +- `--bwlimit 100M` — Limit bandwidth +- `--checksum` — Verify files with checksums +- `--dry-run` — Test without copying + +### Intelligent flag selection + +The bot automatically analyzes file sizes and selects optimal rclone flags: +- **Small files** (>70% < 1MB): Higher parallelism (`--transfers 64`) +- **Large files** (>50% > 100MB): Larger buffers (`--buffer-size 256M`) +- **Mixed**: Balanced defaults + +All transfers include `--stats 600s --progress` for ETA tracking. + ## Running as a Service For production, run the bot as a systemd service: diff --git a/src/xfer/cli.py b/src/xfer/cli.py index 9890686..9e90cde 100644 --- a/src/xfer/cli.py +++ b/src/xfer/cli.py @@ -398,6 +398,97 @@ def manifest_shard( eprint(f"Wrote {num_shards} shards to {outdir} (records={n}, bytes={total_bytes})") +@manifest_app.command("analyze") +def manifest_analyze( + infile: Path = typer.Option( + ..., + "--in", + exists=True, + dir_okay=False, + help="Input manifest JSONL", + resolve_path=True, + ), + out: Optional[Path] = typer.Option( + None, + help="Output analysis JSON file (if not specified, prints to stdout)", + resolve_path=True, + ), + base_flags: str = typer.Option( + "--retries 10 --low-level-retries 20 --stats 600s --progress", + help="Base rclone flags to always include", + ), +) -> None: + """ + Analyze manifest file sizes and suggest optimal rclone flags. + + Examines the file size distribution and recommends transfer settings: + - Many small files: higher parallelism (--transfers 64 --checkers 128) + - Large files: fewer streams, larger buffers (--transfers 16 --buffer-size 256M) + - Mixed: balanced defaults (--transfers 32 --checkers 64) + """ + from .est import ( + compute_file_size_stats, + format_histogram_data, + human_bytes, + suggest_rclone_flags_from_sizes, + ) + + # Read manifest and extract sizes + sizes: List[int] = [] + for ln in infile.read_text(encoding="utf-8").splitlines(): + if not ln.strip(): + continue + try: + r = json.loads(ln) + size = int(r.get("size") or 0) + sizes.append(size) + except (json.JSONDecodeError, ValueError): + continue + + if not sizes: + result = { + "status": "error", + "error": "No files found in manifest", + "suggested_flags": base_flags, + } + else: + stats = compute_file_size_stats(sizes) + suggestion = suggest_rclone_flags_from_sizes(sizes) + histogram = format_histogram_data(sizes) + + # Combine base flags with profile-specific flags + combined_flags = f"{suggestion.flags} {base_flags}" + + result = { + "status": "ok", + "total_files": stats.total_files, + "total_bytes": stats.total_bytes, + "total_bytes_human": human_bytes(stats.total_bytes), + "file_size_stats": { + "min_size": stats.min_size, + "min_size_human": human_bytes(stats.min_size), + "max_size": stats.max_size, + "max_size_human": human_bytes(stats.max_size), + "median_size": stats.median_size, + "median_size_human": human_bytes(stats.median_size), + "small_files_pct": round(stats.small_files_pct, 1), + "medium_files_pct": round(stats.medium_files_pct, 1), + "large_files_pct": round(stats.large_files_pct, 1), + }, + "profile": suggestion.profile, + "profile_explanation": suggestion.explanation, + "suggested_flags": combined_flags, + "histogram": histogram, + } + + json_output = json.dumps(result, indent=2) + if out: + out.write_text(json_output + "\n", encoding="utf-8") + eprint(f"Wrote analysis to {out}") + else: + print(json_output) + + # ----------------------------- # Slurm render/submit # ----------------------------- diff --git a/src/xfer/est.py b/src/xfer/est.py index 0be3f9a..5d10e8c 100644 --- a/src/xfer/est.py +++ b/src/xfer/est.py @@ -449,6 +449,199 @@ def parse_bytes(s: str) -> int: print(f"| {label} | {c} | {pf:5.1f}% | {human_bytes(b)} | {pb:5.1f}% | {bar} |") +# ----------------------------- +# Analysis functions for programmatic use +# ----------------------------- +@dataclass +class FileSizeStats: + """Statistics about file sizes in a dataset.""" + + total_files: int + total_bytes: int + min_size: int + max_size: int + median_size: int + mean_size: float + p10_size: int # 10th percentile + p90_size: int # 90th percentile + small_files_pct: float # % of files < 1MB + medium_files_pct: float # % of files 1MB - 100MB + large_files_pct: float # % of files > 100MB + + +def compute_file_size_stats(sizes: List[int]) -> FileSizeStats: + """Compute statistics about file sizes.""" + if not sizes: + return FileSizeStats( + total_files=0, + total_bytes=0, + min_size=0, + max_size=0, + median_size=0, + mean_size=0.0, + p10_size=0, + p90_size=0, + small_files_pct=0.0, + medium_files_pct=0.0, + large_files_pct=0.0, + ) + + sorted_sizes = sorted(sizes) + n = len(sorted_sizes) + total = sum(sizes) + + # Percentile helper + def percentile(p: float) -> int: + idx = int(p * (n - 1)) + return sorted_sizes[idx] + + # Size thresholds + small_threshold = 1024 * 1024 # 1 MB + large_threshold = 100 * 1024 * 1024 # 100 MB + + small_count = sum(1 for s in sizes if s < small_threshold) + large_count = sum(1 for s in sizes if s > large_threshold) + medium_count = n - small_count - large_count + + return FileSizeStats( + total_files=n, + total_bytes=total, + min_size=sorted_sizes[0], + max_size=sorted_sizes[-1], + median_size=percentile(0.5), + mean_size=total / n, + p10_size=percentile(0.1), + p90_size=percentile(0.9), + small_files_pct=100.0 * small_count / n, + medium_files_pct=100.0 * medium_count / n, + large_files_pct=100.0 * large_count / n, + ) + + +@dataclass +class RcloneFlagsSuggestion: + """Suggested rclone flags based on file size distribution.""" + + profile: str # "small_files", "large_files", or "mixed" + flags: str + explanation: str + + +def suggest_rclone_flags_from_sizes(sizes: List[int]) -> RcloneFlagsSuggestion: + """ + Analyze file sizes and suggest optimal rclone flags. + + Profiles: + - small_files: Many small files (>70% < 1MB) - high parallelism + - large_files: Many large files (>50% > 100MB) - fewer streams, larger buffers + - mixed: Default balanced settings + """ + stats = compute_file_size_stats(sizes) + + if stats.total_files == 0: + return RcloneFlagsSuggestion( + profile="empty", + flags="--transfers 32 --checkers 64 --fast-list", + explanation="No files to analyze, using default settings", + ) + + # Small files profile: high parallelism for many small files + if stats.small_files_pct > 70 or stats.median_size < 1024 * 1024: + return RcloneFlagsSuggestion( + profile="small_files", + flags="--transfers 64 --checkers 128 --fast-list", + explanation=f"Optimized for small files ({stats.small_files_pct:.0f}% < 1MB, median {human_bytes(stats.median_size)})", + ) + + # Large files profile: fewer streams, larger buffers + if stats.large_files_pct > 50 or stats.median_size > 100 * 1024 * 1024: + return RcloneFlagsSuggestion( + profile="large_files", + flags="--transfers 16 --checkers 32 --buffer-size 256M", + explanation=f"Optimized for large files ({stats.large_files_pct:.0f}% > 100MB, median {human_bytes(stats.median_size)})", + ) + + # Mixed/default profile + return RcloneFlagsSuggestion( + profile="mixed", + flags="--transfers 32 --checkers 64 --fast-list", + explanation=f"Balanced settings for mixed file sizes (median {human_bytes(stats.median_size)})", + ) + + +def format_histogram_data( + sizes: List[int], +) -> List[Dict[str, Any]]: + """ + Return histogram as structured data (for JSON serialization). + + Returns list of dicts with: range_label, count, pct_files, bytes, pct_bytes + """ + if not sizes: + return [] + + obs_min = max(1, min(sizes)) + obs_max = max(sizes) + + # Use power-of-2 bins + min_b = 2 ** int(math.floor(math.log2(obs_min))) + max_b = 2 ** int(math.ceil(math.log2(obs_max))) + + edges = [float(x) for x in default_pow2_edges(min_b, max_b)] + counts, bytes_bin = histogram_counts(sizes, edges) + + total_files = len(sizes) + total_bytes = sum(sizes) + + result = [] + for i in range(len(edges) - 1): + lo = int(edges[i]) + hi = int(edges[i + 1]) + c = counts[i] + b = bytes_bin[i] + pf = (100.0 * c / total_files) if total_files else 0.0 + pb = (100.0 * b / total_bytes) if total_bytes else 0.0 + + result.append( + { + "range_min": lo, + "range_max": hi, + "range_label": f"{human_bytes(lo)} - {human_bytes(hi)}", + "file_count": c, + "pct_files": round(pf, 1), + "bytes": b, + "bytes_human": human_bytes(b), + "pct_bytes": round(pb, 1), + } + ) + + return result + + +def format_histogram_text(sizes: List[int], width: int = 30) -> str: + """Format histogram as text for display in Slack/terminal.""" + if not sizes: + return "No files to display" + + hist_data = format_histogram_data(sizes) + counts = [h["file_count"] for h in hist_data] + max_count = max(counts) if counts else 0 + + lines = ["File size distribution:"] + lines.append("") + + for h in hist_data: + if h["file_count"] == 0: + continue + bar_len = int((h["file_count"] / max_count) * width) if max_count > 0 else 0 + bar = "█" * bar_len + lines.append( + f" {h['range_label']:>20}: {h['file_count']:>8} files ({h['pct_files']:5.1f}%) {bar}" + ) + + return "\n".join(lines) + + # ----------------------------- # Main # ----------------------------- diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index 2c19c09..46917e4 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -17,6 +17,7 @@ get_allowed_backends, get_job_status, get_jobs_by_thread, + get_source_stats, get_transfer_progress, get_transfer_progress_by_job, submit_transfer, @@ -63,6 +64,10 @@ "type": "string", "description": "Custom name for the Slurm job", }, + "rclone_flags": { + "type": "string", + "description": "Additional rclone flags to append to defaults (e.g., '--bwlimit 100M --checksum')", + }, }, "required": ["source", "dest"], }, @@ -152,6 +157,28 @@ "required": ["backend_name"], }, }, + { + "name": "get_manifest_stats", + "description": """Scan a source path and return file statistics without starting a transfer. + +Use this to: +- Preview the data volume before committing to a transfer +- Get file count, total size, and size distribution +- See suggested rclone flags based on file size patterns +- Estimate transfer times + +This is helpful when users want to know "how much data is there?" or "how long will this take?" before starting.""", + "input_schema": { + "type": "object", + "properties": { + "source": { + "type": "string", + "description": "Source path in rclone format (remote:bucket/path)", + }, + }, + "required": ["source"], + }, + }, ] SYSTEM_PROMPT = """You are a helpful data transfer assistant for an HPC cluster. You help researchers submit and monitor data transfer jobs via Slurm. @@ -162,6 +189,7 @@ - List available storage backends - Cancel jobs if requested - Request access to new backends on behalf of users +- Scan source paths to get file statistics and transfer estimates Guidelines: 1. Always validate that backends are allowed before submitting transfers @@ -170,11 +198,17 @@ 4. Ask for clarification if the source or destination is ambiguous 5. Be helpful but don't make assumptions about paths - ask if unsure 6. When reporting job status, include relevant details like progress and any errors +7. If users want custom rclone flags (e.g., bandwidth limits, checksum verification), pass them via the rclone_flags parameter Transfer path format: - Paths should be in rclone format: "remote:bucket/path" - Example: "s3src:research-data/experiment1" or "gcs:archive-bucket/backups" +Custom rclone flags: +- Users can specify additional rclone flags like '--bwlimit 100M' or '--checksum' +- These are appended to the default flags, not replacing them +- Common options: --bwlimit (bandwidth limit), --checksum (verify with checksums), --dry-run (test without copying) + Keep responses brief and focused on the task at hand.""" @@ -204,6 +238,7 @@ def execute_tool( num_shards=tool_input.get("num_shards"), time_limit=tool_input.get("time_limit"), job_name=tool_input.get("job_name"), + rclone_flags=tool_input.get("rclone_flags"), ) return json.dumps(result.__dict__, default=str) @@ -336,6 +371,26 @@ def execute_tool( } ) + elif tool_name == "get_manifest_stats": + stats = get_source_stats( + source=tool_input["source"], + config=self.config, + ) + # Convert dataclass to dict for JSON serialization + result = { + "source": stats.source, + "total_files": stats.total_files, + "total_bytes": stats.total_bytes, + "total_bytes_human": stats.total_bytes_human, + "file_size_stats": stats.file_size_stats, + "suggested_flags": stats.suggested_flags, + "histogram": stats.histogram, + "histogram_text": stats.histogram_text, + } + if stats.error: + result["error"] = stats.error + return json.dumps(result) + else: return json.dumps({"error": f"Unknown tool: {tool_name}"}) diff --git a/src/xfer/slackbot/config.py b/src/xfer/slackbot/config.py index 3db3912..4409c67 100644 --- a/src/xfer/slackbot/config.py +++ b/src/xfer/slackbot/config.py @@ -34,7 +34,8 @@ class RcloneDefaults: ) container_conf_path: str = "/etc/rclone/rclone.conf" flags: str = ( - "--transfers 32 --checkers 64 --fast-list --retries 10 --low-level-retries 20" + "--transfers 32 --checkers 64 --fast-list --retries 10 --low-level-retries 20 " + "--stats 600s --progress" ) diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index 3aae9b2..fd56620 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -17,6 +17,16 @@ from .config import BotConfig, slack_comment +from ..est import ( + compute_file_size_stats, + compute_totals_and_sizes, + extract_size_bytes, + format_histogram_data, + format_histogram_text, + human_bytes, + suggest_rclone_flags_from_sizes, +) + @dataclass class JobInfo: @@ -165,6 +175,7 @@ def _write_prepare_script( time_limit: str, job_name: str, comment: str, + rclone_flags: Optional[str] = None, ) -> Path: """ Write a batch script that prepares the transfer (manifest + shard + render). @@ -181,6 +192,9 @@ def _write_prepare_script( sbatch_extras_lines.append(f"#SBATCH --qos={config.slurm.qos}") sbatch_extras = "\\n".join(sbatch_extras_lines) + # User flags to append after analysis-suggested flags + user_rclone_flags = rclone_flags or "" + script_content = f"""#!/usr/bin/env bash #SBATCH --job-name={job_name}-prepare #SBATCH --output={run_dir}/prepare-%j.out @@ -210,7 +224,29 @@ def _write_prepare_script( echo "=== Manifest build complete at $(date -Is) ===" -# Phase 2: Shard manifest +# Phase 2: Analyze manifest to determine optimal rclone flags +echo "=== Analyzing file size distribution ===" +xfer manifest analyze \\ + --in {run_dir}/manifest.jsonl \\ + --out {run_dir}/analysis.json + +# Extract suggested flags from analysis +SUGGESTED_FLAGS=$(python3 -c "import json; print(json.load(open('{run_dir}/analysis.json'))['suggested_flags'])") +echo "Profile-based flags: $SUGGESTED_FLAGS" + +# Append any user-specified flags +USER_FLAGS={shlex.quote(user_rclone_flags)} +if [ -n "$USER_FLAGS" ]; then + RCLONE_FLAGS="$SUGGESTED_FLAGS $USER_FLAGS" + echo "User flags appended: $USER_FLAGS" +else + RCLONE_FLAGS="$SUGGESTED_FLAGS" +fi +echo "Final rclone flags: $RCLONE_FLAGS" + +echo "=== Analysis complete at $(date -Is) ===" + +# Phase 3: Shard manifest xfer manifest shard \\ --in {run_dir}/manifest.jsonl \\ --outdir {run_dir}/shards \\ @@ -218,7 +254,7 @@ def _write_prepare_script( echo "=== Sharding complete at $(date -Is) ===" -# Phase 3: Render Slurm scripts +# Phase 4: Render Slurm scripts xfer slurm render \\ --run-dir {run_dir} \\ --num-shards {num_shards} \\ @@ -230,13 +266,13 @@ def _write_prepare_script( --mem {config.slurm.mem} \\ --rclone-image {shlex.quote(config.rclone.image)} \\ --rclone-config {shlex.quote(str(config.rclone.config_path))} \\ - --rclone-flags {shlex.quote(config.rclone.flags)} \\ + --rclone-flags "$RCLONE_FLAGS" \\ --max-attempts {config.slurm.max_attempts} \\ --sbatch-extras {shlex.quote(sbatch_extras)} echo "=== Render complete at $(date -Is) ===" -# Phase 4: Submit transfer array job +# Phase 5: Submit transfer array job xfer slurm submit --run-dir {run_dir} echo "=== Transfer job submitted at $(date -Is) ===" @@ -258,6 +294,7 @@ def submit_transfer( array_concurrency: Optional[int] = None, time_limit: Optional[str] = None, job_name: Optional[str] = None, + rclone_flags: Optional[str] = None, ) -> TransferResult: """ Submit a data transfer job via xfer. @@ -305,6 +342,7 @@ def submit_transfer( time_limit=time_limit, job_name=job_name, comment=comment, + rclone_flags=rclone_flags, ) # Submit the prepare job @@ -586,3 +624,158 @@ def cancel_job(job_id: str, channel_id: str, thread_ts: str) -> tuple[bool, str] return True, f"Job {job_id} has been cancelled." except subprocess.CalledProcessError as e: return False, f"Failed to cancel job {job_id}: {e.stderr or str(e)}" + + +@dataclass +class SourceStats: + """Statistics about a source path.""" + + source: str + total_files: int + total_bytes: int + total_bytes_human: str + file_size_stats: dict + suggested_flags: dict + histogram: list + histogram_text: str + error: Optional[str] = None + + +def get_source_stats(source: str, config: BotConfig) -> SourceStats: + """ + Scan a source path and return file statistics without starting a transfer. + + Runs rclone lsjson via container and computes statistics. + """ + # Validate backend first + valid, msg = validate_backend(source, config) + if not valid: + return SourceStats( + source=source, + total_files=0, + total_bytes=0, + total_bytes_human="0 B", + file_size_stats={}, + suggested_flags={}, + histogram=[], + histogram_text="", + error=msg, + ) + + # Build rclone lsjson command + rclone_cmd = [ + "rclone", + "lsjson", + source, + "--recursive", + "--fast-list", + "--files-only", + "--config", + config.rclone.container_conf_path, + ] + + # Build srun command with container + mounts = f"{config.rclone.config_path}:{config.rclone.container_conf_path}:ro" + + srun_cmd = [ + "srun", + "-n", + "1", + "-c", + "8", + "--container-image", + config.rclone.image, + "--container-mounts", + mounts, + "--no-container-remap-root", + ] + rclone_cmd + + try: + result = run_cmd(srun_cmd, capture=True, check=True) + lsjson_output = result.stdout + except subprocess.CalledProcessError as e: + return SourceStats( + source=source, + total_files=0, + total_bytes=0, + total_bytes_human="0 B", + file_size_stats={}, + suggested_flags={}, + histogram=[], + histogram_text="", + error=f"Failed to list source: {e.stderr or str(e)}", + ) + + # Parse JSON output + try: + items = json.loads(lsjson_output) + if not isinstance(items, list): + raise ValueError("Expected JSON array from rclone lsjson") + except (json.JSONDecodeError, ValueError) as e: + return SourceStats( + source=source, + total_files=0, + total_bytes=0, + total_bytes_human="0 B", + file_size_stats={}, + suggested_flags={}, + histogram=[], + histogram_text="", + error=f"Failed to parse rclone output: {e}", + ) + + # Extract sizes + sizes = [] + for item in items: + size = extract_size_bytes(item) + if size is not None: + sizes.append(size) + + if not sizes: + return SourceStats( + source=source, + total_files=0, + total_bytes=0, + total_bytes_human="0 B", + file_size_stats={}, + suggested_flags={}, + histogram=[], + histogram_text="No files found", + error=None, + ) + + # Compute statistics + stats = compute_file_size_stats(sizes) + flags_suggestion = suggest_rclone_flags_from_sizes(sizes) + histogram = format_histogram_data(sizes) + histogram_text = format_histogram_text(sizes) + + return SourceStats( + source=source, + total_files=stats.total_files, + total_bytes=stats.total_bytes, + total_bytes_human=human_bytes(stats.total_bytes), + file_size_stats={ + "min_size": stats.min_size, + "min_size_human": human_bytes(stats.min_size), + "max_size": stats.max_size, + "max_size_human": human_bytes(stats.max_size), + "median_size": stats.median_size, + "median_size_human": human_bytes(stats.median_size), + "mean_size": int(stats.mean_size), + "mean_size_human": human_bytes(int(stats.mean_size)), + "p10_size": stats.p10_size, + "p90_size": stats.p90_size, + "small_files_pct": round(stats.small_files_pct, 1), + "medium_files_pct": round(stats.medium_files_pct, 1), + "large_files_pct": round(stats.large_files_pct, 1), + }, + suggested_flags={ + "profile": flags_suggestion.profile, + "flags": flags_suggestion.flags, + "explanation": flags_suggestion.explanation, + }, + histogram=histogram, + histogram_text=histogram_text, + error=None, + ) From 55446debefe3edf5aa5654b143e5e3ca88478071 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 17:15:11 -0500 Subject: [PATCH 03/25] Fix markdown rendering in Slack responses - Update system prompt to instruct Claude to use Slack's mrkdwn format - Add markdown_to_slack() converter as safety net for any standard markdown that slips through - Apply converter to all bot responses before sending Slack mrkdwn differs from standard markdown: - Bold: *text* (not **text**) - Links: (not [text](url)) - No header support (converted to bold) Co-Authored-By: Claude Opus 4.5 --- src/xfer/slackbot/app.py | 32 ++++++++++++++++++++++++++++--- src/xfer/slackbot/claude_agent.py | 13 +++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/xfer/slackbot/app.py b/src/xfer/slackbot/app.py index 8164884..5ef7dc1 100644 --- a/src/xfer/slackbot/app.py +++ b/src/xfer/slackbot/app.py @@ -5,6 +5,7 @@ from __future__ import annotations import logging +import re from typing import Any from slack_bolt import App @@ -16,6 +17,28 @@ logger = logging.getLogger(__name__) +def markdown_to_slack(text: str) -> str: + """ + Convert standard markdown to Slack's mrkdwn format. + + This is a safety net in case Claude outputs standard markdown + despite being instructed to use Slack format. + """ + # Convert **bold** to *bold* (must do before single asterisk handling) + text = re.sub(r"\*\*([^*]+)\*\*", r"*\1*", text) + + # Convert [text](url) to + text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"<\2|\1>", text) + + # Convert ~~strikethrough~~ to ~strikethrough~ + text = re.sub(r"~~([^~]+)~~", r"~\1~", text) + + # Remove markdown headers (## Header -> *Header*) + text = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", text, flags=re.MULTILINE) + + return text + + class ConversationStore: """ Simple in-memory store for conversation history per thread. @@ -107,7 +130,8 @@ def handle_mention(event: dict[str, Any], say: Any) -> None: channel, thread_ts, {"role": "assistant", "content": response} ) - say(text=response, thread_ts=thread_ts) + # Convert any markdown to Slack mrkdwn format + say(text=markdown_to_slack(response), thread_ts=thread_ts) except Exception as e: logger.exception(f"Error processing message: {e}") @@ -158,7 +182,8 @@ def handle_message(event: dict[str, Any], say: Any) -> None: channel, thread_ts, {"role": "assistant", "content": response} ) - say(text=response, thread_ts=thread_ts) + # Convert any markdown to Slack mrkdwn format + say(text=markdown_to_slack(response), thread_ts=thread_ts) except Exception as e: logger.exception(f"Error processing DM: {e}") @@ -191,7 +216,8 @@ def handle_message(event: dict[str, Any], say: Any) -> None: channel, thread_ts, {"role": "assistant", "content": response} ) - say(text=response, thread_ts=thread_ts) + # Convert any markdown to Slack mrkdwn format + say(text=markdown_to_slack(response), thread_ts=thread_ts) except Exception as e: logger.exception(f"Error in thread reply: {e}") diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index 46917e4..cd57ed0 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -209,6 +209,19 @@ - These are appended to the default flags, not replacing them - Common options: --bwlimit (bandwidth limit), --checksum (verify with checksums), --dry-run (test without copying) +Formatting (IMPORTANT - you are responding in Slack, not markdown): +- Use Slack's mrkdwn format, NOT standard markdown +- Bold: *text* (single asterisks, not double) +- Italic: _text_ (underscores) +- Strikethrough: ~text~ +- Code: `code` (backticks work the same) +- Code blocks: ```code``` (no language specifier) +- Links: (angle brackets with pipe) +- Lists: Use bullet points with "• " or simple dashes +- Do NOT use **double asterisks** for bold +- Do NOT use [text](url) for links +- Do NOT use markdown headers like ## Header + Keep responses brief and focused on the task at hand.""" From 1c01ddb1e9fecaa442237292fd5b9eb664a7f0c3 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 18:34:08 -0500 Subject: [PATCH 04/25] Add check_path_exists tool and fix uv environment setup New features: - Add check_path_exists tool to verify bucket/path accessibility - If path doesn't exist, automatically notifies support channel - Detects common errors: NoSuchBucket, AccessDenied, NoSuchKey Bug fixes: - Add uv sync to prepare script to ensure environment is up to date - Use 'uv run xfer' instead of bare 'xfer' commands - Add XFER_INSTALL_DIR config option for xfer repo location Co-Authored-By: Claude Opus 4.5 --- src/xfer/slackbot/claude_agent.py | 98 +++++++++++++++++++++++++ src/xfer/slackbot/config.py | 6 ++ src/xfer/slackbot/slurm_tools.py | 117 ++++++++++++++++++++++++++++-- 3 files changed, 216 insertions(+), 5 deletions(-) diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index cd57ed0..fa8ce71 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -14,6 +14,7 @@ from .config import BotConfig from .slurm_tools import ( cancel_job, + check_path_exists, get_allowed_backends, get_job_status, get_jobs_by_thread, @@ -179,6 +180,29 @@ "required": ["source"], }, }, + { + "name": "check_path_exists", + "description": """Check if a bucket or path exists at a remote endpoint. + +Use this to verify that a source or destination path is accessible before starting a transfer. +If the path doesn't exist, this will notify the support team so they can help resolve the issue. + +Common reasons for paths not existing: +- Bucket name is misspelled +- Bucket hasn't been created yet +- Credentials don't have access to the bucket +- Wrong endpoint/region configured""", + "input_schema": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Path to check in rclone format (remote:bucket/path)", + }, + }, + "required": ["path"], + }, + }, ] SYSTEM_PROMPT = """You are a helpful data transfer assistant for an HPC cluster. You help researchers submit and monitor data transfer jobs via Slurm. @@ -190,6 +214,7 @@ - Cancel jobs if requested - Request access to new backends on behalf of users - Scan source paths to get file statistics and transfer estimates +- Verify that buckets/paths exist before starting transfers Guidelines: 1. Always validate that backends are allowed before submitting transfers @@ -404,6 +429,79 @@ def execute_tool( result["error"] = stats.error return json.dumps(result) + elif tool_name == "check_path_exists": + path = tool_input["path"] + check_result = check_path_exists(path, self.config) + + # If path doesn't exist, notify support channel + support_notified = False + if not check_result.exists and self.slack_client and self.config.support_channel: + try: + self.slack_client.chat_postMessage( + channel=self.config.support_channel, + text=f"Path access issue: {path}", + blocks=[ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "Path Access Issue", + }, + }, + { + "type": "section", + "fields": [ + { + "type": "mrkdwn", + "text": f"*Path:*\n`{path}`", + }, + { + "type": "mrkdwn", + "text": f"*Error:*\n{check_result.error or 'Unknown'}", + }, + ], + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"*Details:*\n```{check_result.details[:500] if check_result.details else 'No details'}```", + }, + }, + { + "type": "context", + "elements": [ + { + "type": "mrkdwn", + "text": f"", + } + ], + }, + ], + ) + support_notified = True + except Exception as e: + import logging + + logging.getLogger(__name__).error( + f"Failed to post to support channel: {e}" + ) + + return json.dumps( + { + "path": check_result.path, + "exists": check_result.exists, + "error": check_result.error, + "support_notified": support_notified, + "message": ( + f"Path '{path}' is accessible." + if check_result.exists + else f"Path '{path}' is not accessible: {check_result.error}. " + + ("Support team has been notified." if support_notified else "") + ), + } + ) + else: return json.dumps({"error": f"Unknown tool: {tool_name}"}) diff --git a/src/xfer/slackbot/config.py b/src/xfer/slackbot/config.py index 4409c67..7dce32d 100644 --- a/src/xfer/slackbot/config.py +++ b/src/xfer/slackbot/config.py @@ -66,6 +66,9 @@ class BotConfig: allowed_backends_file: Optional[Path] = ( None # Path to YAML/JSON listing allowed backends ) + xfer_install_dir: Optional[Path] = field( + default_factory=lambda: Path(__file__).parent.parent.parent.parent.resolve() + ) # Path to xfer git repo for uv sync # Defaults slurm: SlurmDefaults = field(default_factory=SlurmDefaults) @@ -100,6 +103,9 @@ def from_env(cls) -> "BotConfig": if rclone_config := os.environ.get("XFER_RCLONE_CONFIG"): config.rclone.config_path = Path(rclone_config) + if xfer_install_dir := os.environ.get("XFER_INSTALL_DIR"): + config.xfer_install_dir = Path(xfer_install_dir) + return config diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index fd56620..ffc4aa6 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -195,6 +195,9 @@ def _write_prepare_script( # User flags to append after analysis-suggested flags user_rclone_flags = rclone_flags or "" + # Get xfer install directory for uv + xfer_dir = config.xfer_install_dir or Path(__file__).parent.parent.parent.parent.resolve() + script_content = f"""#!/usr/bin/env bash #SBATCH --job-name={job_name}-prepare #SBATCH --output={run_dir}/prepare-%j.out @@ -209,13 +212,21 @@ def _write_prepare_script( set -euo pipefail +# Setup uv environment +XFER_DIR="{xfer_dir}" +echo "=== Setting up uv environment at $(date -Is) ===" +echo "XFER_DIR: $XFER_DIR" +cd "$XFER_DIR" +uv sync +echo "=== uv sync complete ===" + echo "=== Starting manifest build at $(date -Is) ===" echo "Source: {source}" echo "Dest: {dest}" echo "Run dir: {run_dir}" # Phase 1: Build manifest -xfer manifest build \\ +uv run xfer manifest build \\ --source {shlex.quote(source)} \\ --dest {shlex.quote(dest)} \\ --out {run_dir}/manifest.jsonl \\ @@ -226,7 +237,7 @@ def _write_prepare_script( # Phase 2: Analyze manifest to determine optimal rclone flags echo "=== Analyzing file size distribution ===" -xfer manifest analyze \\ +uv run xfer manifest analyze \\ --in {run_dir}/manifest.jsonl \\ --out {run_dir}/analysis.json @@ -247,7 +258,7 @@ def _write_prepare_script( echo "=== Analysis complete at $(date -Is) ===" # Phase 3: Shard manifest -xfer manifest shard \\ +uv run xfer manifest shard \\ --in {run_dir}/manifest.jsonl \\ --outdir {run_dir}/shards \\ --num-shards {num_shards} @@ -255,7 +266,7 @@ def _write_prepare_script( echo "=== Sharding complete at $(date -Is) ===" # Phase 4: Render Slurm scripts -xfer slurm render \\ +uv run xfer slurm render \\ --run-dir {run_dir} \\ --num-shards {num_shards} \\ --array-concurrency {array_concurrency} \\ @@ -273,7 +284,7 @@ def _write_prepare_script( echo "=== Render complete at $(date -Is) ===" # Phase 5: Submit transfer array job -xfer slurm submit --run-dir {run_dir} +uv run xfer slurm submit --run-dir {run_dir} echo "=== Transfer job submitted at $(date -Is) ===" """ @@ -779,3 +790,99 @@ def get_source_stats(source: str, config: BotConfig) -> SourceStats: histogram_text=histogram_text, error=None, ) + + +@dataclass +class PathCheckResult: + """Result of checking if a path exists.""" + + path: str + exists: bool + error: Optional[str] = None + details: Optional[str] = None + + +def check_path_exists(path: str, config: BotConfig) -> PathCheckResult: + """ + Check if a bucket/path exists at a remote endpoint. + + Uses rclone lsf with --max-depth 0 to check if the path is accessible. + """ + # Validate backend first + valid, msg = validate_backend(path, config) + if not valid: + return PathCheckResult( + path=path, + exists=False, + error=msg, + details="Backend not in allowed list", + ) + + # Build rclone lsf command to check if path exists + # Using lsf with --max-depth 0 and --dirs-only is a quick way to check + rclone_cmd = [ + "rclone", + "lsf", + path, + "--max-depth", + "0", + "--config", + config.rclone.container_conf_path, + ] + + # Build srun command with container + mounts = f"{config.rclone.config_path}:{config.rclone.container_conf_path}:ro" + + srun_cmd = [ + "srun", + "-n", + "1", + "-c", + "2", + "--container-image", + config.rclone.image, + "--container-mounts", + mounts, + "--no-container-remap-root", + ] + rclone_cmd + + try: + result = run_cmd(srun_cmd, capture=True, check=True) + # If command succeeded, path exists + return PathCheckResult( + path=path, + exists=True, + details="Path is accessible", + ) + except subprocess.CalledProcessError as e: + error_output = e.stderr or str(e) + + # Check for common error patterns + if "NoSuchBucket" in error_output or "bucket does not exist" in error_output.lower(): + return PathCheckResult( + path=path, + exists=False, + error="Bucket does not exist", + details=error_output, + ) + elif "AccessDenied" in error_output or "access denied" in error_output.lower(): + return PathCheckResult( + path=path, + exists=False, + error="Access denied - credentials may not have permission", + details=error_output, + ) + elif "NoSuchKey" in error_output or "not found" in error_output.lower(): + return PathCheckResult( + path=path, + exists=False, + error="Path does not exist within the bucket", + details=error_output, + ) + else: + return PathCheckResult( + path=path, + exists=False, + error=f"Failed to access path: {error_output[:200]}", + details=error_output, + ) From b8f433531388c2110201cab39bf392288ffcd6ce Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 18:36:18 -0500 Subject: [PATCH 05/25] Allow users to set lower array concurrency (max 64) - Add array_concurrency parameter to submit_transfer tool - Cap maximum at 64 to prevent overloading storage systems - Users can request lower values (e.g., 32, 16) for gentler transfers Co-Authored-By: Claude Opus 4.5 --- src/xfer/slackbot/claude_agent.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index fa8ce71..4e4aadf 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -69,6 +69,10 @@ "type": "string", "description": "Additional rclone flags to append to defaults (e.g., '--bwlimit 100M --checksum')", }, + "array_concurrency": { + "type": "integer", + "description": "Maximum concurrent Slurm array tasks (default: 64, max: 64). Lower this to reduce load on storage systems.", + }, }, "required": ["source", "dest"], }, @@ -267,6 +271,13 @@ def execute_tool( ) -> str: """Execute a tool and return the result as a string.""" if tool_name == "submit_transfer": + # Cap array_concurrency at 64 (the default maximum) + array_concurrency = tool_input.get("array_concurrency") + if array_concurrency is not None: + array_concurrency = min(int(array_concurrency), 64) + if array_concurrency < 1: + array_concurrency = 1 + result = submit_transfer( source=tool_input["source"], dest=tool_input["dest"], @@ -274,6 +285,7 @@ def execute_tool( channel_id=channel_id, thread_ts=thread_ts, num_shards=tool_input.get("num_shards"), + array_concurrency=array_concurrency, time_limit=tool_input.get("time_limit"), job_name=tool_input.get("job_name"), rclone_flags=tool_input.get("rclone_flags"), From 82ef039e55ec6902e0ffeced165f1f92c59a4f57 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 19:17:25 -0500 Subject: [PATCH 06/25] Improve error logging for manifest build failures Add detailed diagnostics when srun/rclone fails: - Log the full command that was executed - Capture all SLURM environment variables - Print SLURM memory vars to stderr for quick debugging - Include full stdout and stderr in error log This helps diagnose environment-related failures like SLURM_MEM_* variable conflicts. Co-Authored-By: Claude Opus 4.5 --- src/xfer/cli.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/xfer/cli.py b/src/xfer/cli.py index 9e90cde..fa04bf1 100644 --- a/src/xfer/cli.py +++ b/src/xfer/cli.py @@ -263,16 +263,41 @@ def manifest_build( # Write error details to xfer-err/ err_file = err_dir / f"manifest_build-{run_id}.log" with err_file.open("w", encoding="utf-8") as ef: - ef.write(f"Exception: {exc}\n") - import traceback + ef.write(f"Exception: {exc}\n\n") + + # Log the command that was run + ef.write("--- COMMAND ---\n") + ef.write(" ".join(shlex.quote(c) for c in srun_cmd) + "\n\n") + # Log relevant SLURM environment variables + ef.write("--- SLURM ENVIRONMENT ---\n") + slurm_vars = {k: v for k, v in os.environ.items() if k.startswith("SLURM")} + for k, v in sorted(slurm_vars.items()): + ef.write(f"{k}={v}\n") + ef.write("\n") + + import traceback + ef.write("--- TRACEBACK ---\n") ef.write(traceback.format_exc()) - # If subprocess.CalledProcessError, try to write stderr + + # If subprocess.CalledProcessError, write stdout and stderr if hasattr(exc, "stderr") and exc.stderr: with err_file.open("a", encoding="utf-8") as ef: ef.write("\n--- STDERR ---\n") ef.write(str(exc.stderr)) + if hasattr(exc, "stdout") and exc.stdout: + with err_file.open("a", encoding="utf-8") as ef: + ef.write("\n--- STDOUT ---\n") + ef.write(str(exc.stdout)) + + # Also print key info to stderr for visibility in job logs eprint(f"ERROR: srun/rclone failed, see {err_file}") + eprint(f"Command: {' '.join(shlex.quote(c) for c in srun_cmd)}") + if hasattr(exc, "stderr") and exc.stderr: + eprint(f"stderr: {exc.stderr}") + slurm_mem_vars = {k: v for k, v in os.environ.items() if "MEM" in k and k.startswith("SLURM")} + if slurm_mem_vars: + eprint(f"SLURM memory env vars: {slurm_mem_vars}") raise # Build JSONL From 7ef4a9bcaf5e1dca21ff1a1ce9fcde93cc947393 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 19:26:14 -0500 Subject: [PATCH 07/25] Fix run_id format to be filename-safe Remove colons from timestamp format to avoid 'Invalid argument' errors when creating log files. Changes from ISO format (2026-02-05T00:21:57Z) to compact format (20260205T002157Z). Co-Authored-By: Claude Opus 4.5 --- src/xfer/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xfer/cli.py b/src/xfer/cli.py index fa04bf1..2424ae0 100644 --- a/src/xfer/cli.py +++ b/src/xfer/cli.py @@ -54,7 +54,8 @@ # Helpers # ----------------------------- def now_run_id() -> str: - ts = dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z" + # Use filename-safe format (no colons) + ts = dt.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ") rnd = os.urandom(3).hex() return f"{ts}_{rnd}" From d3e7582fe88edb91d38b495f8a21c56299389548 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 19:30:29 -0500 Subject: [PATCH 08/25] Prevent environment propagation when submitting prepare job Use sbatch --export=NONE to avoid inheriting SLURM_* environment variables from the bot's parent job. This fixes the SLURM_MEM_PER_CPU vs SLURM_MEM_PER_NODE conflict when the bot runs as a Slurm job. Co-Authored-By: Claude Opus 4.5 --- src/xfer/slackbot/slurm_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index ffc4aa6..555dc85 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -357,8 +357,9 @@ def submit_transfer( ) # Submit the prepare job + # Use --export=NONE to prevent inheriting SLURM_* env vars from the bot's job try: - result = run_cmd(["sbatch", str(prepare_script)], check=True) + result = run_cmd(["sbatch", "--export=NONE", str(prepare_script)], check=True) # Parse job ID from output job_id = None From 94214d09d4c66a940f259931b500750907890864 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 19:32:34 -0500 Subject: [PATCH 09/25] Add --export=NONE to all sbatch calls Ensure clean environment for all submitted jobs: - submit.sh template (submits array job from prepare.sh) - slurm submit CLI command (direct submission) All batch scripts already define their required environment variables via export statements, so they are self-contained. Co-Authored-By: Claude Opus 4.5 --- src/xfer/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xfer/cli.py b/src/xfer/cli.py index 2424ae0..77040a9 100644 --- a/src/xfer/cli.py +++ b/src/xfer/cli.py @@ -634,7 +634,7 @@ def manifest_analyze( : "${RUN_DIR:?}" cd "${RUN_DIR}" -sbatch "${RUN_DIR}/sbatch_array.sh" +sbatch --export=NONE "${RUN_DIR}/sbatch_array.sh" """ SBATCH_ARRAY_SH = r"""#!/usr/bin/env bash @@ -818,7 +818,7 @@ def slurm_submit( sbatch_script = run_dir / "sbatch_array.sh" if not sbatch_script.exists(): raise typer.BadParameter(f"Missing {sbatch_script}. Run `slurm render` first.") - cp = run_cmd(["sbatch", str(sbatch_script)], capture=True, check=True) + cp = run_cmd(["sbatch", "--export=NONE", str(sbatch_script)], capture=True, check=True) print(cp.stdout.strip()) From 36546a5c9f9070efdd15656bf41bfe3f29cc6556 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 19:38:09 -0500 Subject: [PATCH 10/25] Unset conflicting SLURM_MEM_* variables in prepare.sh Slurm sets both SLURM_MEM_PER_NODE (from --mem=16G) and SLURM_MEM_PER_CPU (from cluster DefMemPerCPU), causing srun to fail. Unset these at the start of the script before running any srun commands. Co-Authored-By: Claude Opus 4.5 --- src/xfer/slackbot/slurm_tools.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index 555dc85..a445836 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -212,6 +212,10 @@ def _write_prepare_script( set -euo pipefail +# Unset conflicting Slurm memory variables to allow srun to work +# (cluster DefMemPerCPU conflicts with job --mem setting) +unset SLURM_MEM_PER_CPU SLURM_MEM_PER_GPU SLURM_MEM_PER_NODE + # Setup uv environment XFER_DIR="{xfer_dir}" echo "=== Setting up uv environment at $(date -Is) ===" From 091ff874dfb65e4390723b0e8381733e2b817b22 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Wed, 4 Feb 2026 20:29:32 -0500 Subject: [PATCH 11/25] Increase prepare job time limit to 2 days Large transfers may need more time for manifest building and sharding. Co-Authored-By: Claude Opus 4.5 --- src/xfer/slackbot/slurm_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index a445836..a07d205 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -202,7 +202,7 @@ def _write_prepare_script( #SBATCH --job-name={job_name}-prepare #SBATCH --output={run_dir}/prepare-%j.out #SBATCH --error={run_dir}/prepare-%j.err -#SBATCH --time=04:00:00 +#SBATCH --time=2-00:00:00 #SBATCH --partition={config.slurm.partition} #SBATCH --cpus-per-task=8 #SBATCH --mem=16G From 5d0928aaf5d0940f210e78ea0415e60c3098cd9b Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Thu, 5 Feb 2026 09:43:50 -0500 Subject: [PATCH 12/25] Restore thread context from Slack API after bot restart The bot uses in-memory conversation history to track threads it has participated in. When the bot restarts, this history is lost and it stops responding to thread messages from other users asking for updates. This adds a fallback that fetches thread history via the Slack API when in-memory history is missing, reconstructing the conversation context so the Claude agent can still identify the relevant transfer job. Co-Authored-By: Claude Opus 4.5 --- src/xfer/slackbot/app.py | 122 +++++++++++++++++++++++++++++++-------- 1 file changed, 98 insertions(+), 24 deletions(-) diff --git a/src/xfer/slackbot/app.py b/src/xfer/slackbot/app.py index 5ef7dc1..e482fcd 100644 --- a/src/xfer/slackbot/app.py +++ b/src/xfer/slackbot/app.py @@ -80,6 +80,61 @@ def create_app(config: BotConfig | None = None) -> tuple[App, SocketModeHandler] agent = ClaudeAgent(config, slack_client=app.client) conversations = ConversationStore() + def fetch_thread_history(channel: str, thread_ts: str) -> list[dict] | None: + """ + Fetch conversation history from a Slack thread and reconstruct it. + + Returns the conversation history if the bot has participated in the thread, + or None if the bot hasn't participated or there was an error. + + This is used as a fallback when in-memory conversation history is lost + (e.g., after bot restart). + """ + try: + # Get our bot's user ID + auth_result = app.client.auth_test() + bot_user_id = auth_result.get("user_id") + if not bot_user_id: + return None + + # Fetch thread replies + result = app.client.conversations_replies( + channel=channel, + ts=thread_ts, + limit=100, # Fetch up to 100 messages from the thread + ) + messages = result.get("messages", []) + + # Check if bot has participated + bot_participated = any(msg.get("user") == bot_user_id for msg in messages) + if not bot_participated: + return None + + # Reconstruct conversation history + history = [] + for msg in messages: + text = msg.get("text", "") + if not text: + continue + + if msg.get("user") == bot_user_id: + # Bot's message + history.append({"role": "assistant", "content": text}) + else: + # User message - strip bot mentions + text = re.sub(r"<@[A-Z0-9]+>\s*", "", text).strip() + if text: + history.append({"role": "user", "content": text}) + + logger.info( + f"Reconstructed {len(history)} messages from thread {channel}:{thread_ts}" + ) + return history + + except Exception as e: + logger.warning(f"Failed to fetch thread history: {e}") + return None + def is_allowed_channel(channel: str) -> bool: """Check if the bot should respond in this channel.""" if not config.allowed_channels: @@ -194,37 +249,56 @@ def handle_message(event: dict[str, Any], say: Any) -> None: # For channel messages in threads where we've participated, continue responding elif event.get("thread_ts"): + if not is_allowed_channel(channel): + return + # Check if we have history in this thread (meaning we've been mentioned) history = conversations.get(channel, thread_ts) - if history and is_allowed_channel(channel): - logger.info( - f"Continuing thread conversation with {user}: {text[:100]}..." - ) - try: - response = agent.process_message( - user_message=text, - channel_id=channel, - thread_ts=thread_ts, - conversation_history=history.copy(), + # If no in-memory history, try to reconstruct from Slack API + # This handles the case where the bot was restarted and lost memory + if not history: + fetched_history = fetch_thread_history(channel, thread_ts) + if fetched_history is not None: + logger.info( + f"Restored {len(fetched_history)} messages for thread {channel}:{thread_ts}" ) + # Restore to in-memory store for future messages in this session + for msg in fetched_history: + conversations.append(channel, thread_ts, msg) + history = fetched_history + else: + # Bot hasn't participated in this thread + return + + logger.info( + f"Continuing thread conversation with {user}: {text[:100]}..." + ) - conversations.append( - channel, thread_ts, {"role": "user", "content": text} - ) - conversations.append( - channel, thread_ts, {"role": "assistant", "content": response} - ) + try: + response = agent.process_message( + user_message=text, + channel_id=channel, + thread_ts=thread_ts, + conversation_history=history.copy() if history else None, + ) - # Convert any markdown to Slack mrkdwn format - say(text=markdown_to_slack(response), thread_ts=thread_ts) + conversations.append( + channel, thread_ts, {"role": "user", "content": text} + ) + conversations.append( + channel, thread_ts, {"role": "assistant", "content": response} + ) - except Exception as e: - logger.exception(f"Error in thread reply: {e}") - say( - text=f"Sorry, I encountered an error: {str(e)}", - thread_ts=thread_ts, - ) + # Convert any markdown to Slack mrkdwn format + say(text=markdown_to_slack(response), thread_ts=thread_ts) + + except Exception as e: + logger.exception(f"Error in thread reply: {e}") + say( + text=f"Sorry, I encountered an error: {str(e)}", + thread_ts=thread_ts, + ) # Create socket mode handler handler = SocketModeHandler(app, config.slack_app_token) From ef6687b5722ccd878e9a6bea904115474d7401d4 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Thu, 5 Feb 2026 09:51:24 -0500 Subject: [PATCH 13/25] Add read_job_logs tool to access job analysis and logs Adds a new tool that allows users to inspect running/completed jobs: - File size distribution histogram from analysis.json - Suggested rclone flags determined during manifest analysis - Tail of prepare job stdout/stderr logs - Extracted rclone commands that were run This helps users debug issues and understand what the transfer is doing without needing direct access to the cluster filesystem. Co-Authored-By: Claude Opus 4.5 --- src/xfer/slackbot/claude_agent.py | 63 +++++++++++++ src/xfer/slackbot/slurm_tools.py | 142 ++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+) diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index 4e4aadf..7a94019 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -16,6 +16,7 @@ cancel_job, check_path_exists, get_allowed_backends, + get_job_logs, get_job_status, get_jobs_by_thread, get_source_stats, @@ -207,6 +208,38 @@ "required": ["path"], }, }, + { + "name": "read_job_logs", + "description": """Read logs and analysis data for a transfer job. + +Use this when users want to: +- See the file size distribution histogram for a transfer +- View rclone commands that were run during manifest generation +- Debug issues by examining log output +- See suggested rclone flags that were determined from file analysis + +This returns: +- analysis.json contents (file size histogram, suggested flags, statistics) +- Tail of the prepare job stdout log +- Tail of the prepare job stderr log (if any errors) +- List of rclone commands extracted from logs + +Only works for jobs that belong to the current thread.""", + "input_schema": { + "type": "object", + "properties": { + "job_id": { + "type": "string", + "description": "The Slurm job ID to get logs for", + }, + "tail_lines": { + "type": "integer", + "description": "Number of lines to return from log files (default: 50, max: 200)", + }, + }, + "required": ["job_id"], + }, + }, ] SYSTEM_PROMPT = """You are a helpful data transfer assistant for an HPC cluster. You help researchers submit and monitor data transfer jobs via Slurm. @@ -514,6 +547,36 @@ def execute_tool( } ) + elif tool_name == "read_job_logs": + job_id = tool_input["job_id"] + tail_lines = tool_input.get("tail_lines", 50) + # Cap tail_lines at 200 + tail_lines = min(max(1, tail_lines), 200) + + logs = get_job_logs( + job_id=job_id, + channel_id=channel_id, + thread_ts=thread_ts, + tail_lines=tail_lines, + ) + + result = { + "job_id": logs.job_id, + "run_dir": logs.run_dir, + "rclone_commands": logs.rclone_commands, + } + + if logs.error: + result["error"] = logs.error + if logs.analysis: + result["analysis"] = logs.analysis + if logs.log_tail: + result["log_tail"] = logs.log_tail + if logs.error_log_tail: + result["error_log_tail"] = logs.error_log_tail + + return json.dumps(result) + else: return json.dumps({"error": f"Unknown tool: {tool_name}"}) diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index a07d205..75a9536 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -891,3 +891,145 @@ def check_path_exists(path: str, config: BotConfig) -> PathCheckResult: error=f"Failed to access path: {error_output[:200]}", details=error_output, ) + + +@dataclass +class JobLogs: + """Logs and analysis data for a job.""" + + job_id: str + run_dir: Optional[str] + analysis: Optional[dict] # Contents of analysis.json + log_tail: Optional[str] # Last N lines of prepare job output + error_log_tail: Optional[str] # Last N lines of prepare job stderr + rclone_commands: list[str] # Extracted rclone commands from logs + error: Optional[str] = None + + +def get_job_logs( + job_id: str, + channel_id: str, + thread_ts: str, + tail_lines: int = 50, +) -> JobLogs: + """ + Get logs and analysis data for a job. + + Validates that the job belongs to the requesting thread. + + Args: + job_id: The Slurm job ID (can be prepare job or array job) + channel_id: Slack channel ID for validation + thread_ts: Slack thread timestamp for validation + tail_lines: Number of lines to return from log files (default 50) + + Returns: + JobLogs with analysis data and log contents + """ + # Get job info + job_info = get_job_status(job_id) + if not job_info: + return JobLogs( + job_id=job_id, + run_dir=None, + analysis=None, + log_tail=None, + error_log_tail=None, + rclone_commands=[], + error=f"Job {job_id} not found", + ) + + # Validate job belongs to this thread + expected_comment = slack_comment(channel_id, thread_ts) + if expected_comment not in job_info.comment: + return JobLogs( + job_id=job_id, + run_dir=None, + analysis=None, + log_tail=None, + error_log_tail=None, + rclone_commands=[], + error=f"Job {job_id} does not belong to this thread", + ) + + if not job_info.work_dir: + return JobLogs( + job_id=job_id, + run_dir=None, + analysis=None, + log_tail=None, + error_log_tail=None, + rclone_commands=[], + error="Job has no working directory", + ) + + run_dir = Path(job_info.work_dir) + if not run_dir.exists(): + return JobLogs( + job_id=job_id, + run_dir=str(run_dir), + analysis=None, + log_tail=None, + error_log_tail=None, + rclone_commands=[], + error=f"Run directory does not exist: {run_dir}", + ) + + # Read analysis.json if it exists + analysis = None + analysis_file = run_dir / "analysis.json" + if analysis_file.exists(): + try: + analysis = json.loads(analysis_file.read_text()) + except (json.JSONDecodeError, IOError) as e: + analysis = {"error": f"Failed to read analysis.json: {e}"} + + # Find and read log files + # The prepare job ID might be different from the array job ID + # Look for any prepare-*.out files + log_tail = None + error_log_tail = None + rclone_commands = [] + + # Try to find prepare job logs + log_files = list(run_dir.glob("prepare-*.out")) + if log_files: + # Use the most recent one + log_file = sorted(log_files, key=lambda p: p.stat().st_mtime)[-1] + try: + lines = log_file.read_text().splitlines() + log_tail = "\n".join(lines[-tail_lines:]) if lines else "" + + # Extract rclone commands from log + for line in lines: + # Look for lines that contain rclone commands + if "rclone" in line.lower() and any( + cmd in line for cmd in ["copy", "sync", "lsf", "lsjson", "ls"] + ): + # Clean up the line + line = line.strip() + if line and not line.startswith("#"): + rclone_commands.append(line) + except IOError as e: + log_tail = f"Failed to read log file: {e}" + + # Try to find error logs + err_files = list(run_dir.glob("prepare-*.err")) + if err_files: + err_file = sorted(err_files, key=lambda p: p.stat().st_mtime)[-1] + try: + lines = err_file.read_text().splitlines() + # Only include if there's actual content + if lines: + error_log_tail = "\n".join(lines[-tail_lines:]) + except IOError: + pass + + return JobLogs( + job_id=job_id, + run_dir=str(run_dir), + analysis=analysis, + log_tail=log_tail, + error_log_tail=error_log_tail, + rclone_commands=rclone_commands, + ) From b9e76d73e657867f609723dbbbe24baa176faba7 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Thu, 5 Feb 2026 10:11:23 -0500 Subject: [PATCH 14/25] Update Slack bot documentation with new features - Add table of all available bot tools - Document read_job_logs capability for viewing histograms and logs - Document thread-based conversation behavior - Explain restart resilience with automatic context recovery - Add troubleshooting entry for thread message issues Co-Authored-By: Claude Opus 4.5 --- docs/slack-app-setup.md | 47 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/docs/slack-app-setup.md b/docs/slack-app-setup.md index 516ceb5..06bfafb 100644 --- a/docs/slack-app-setup.md +++ b/docs/slack-app-setup.md @@ -142,6 +142,22 @@ You should see: ## Bot Capabilities +### Available Tools + +The bot has access to the following tools: + +| Tool | Description | +|------|-------------| +| `submit_transfer` | Submit a new data transfer job | +| `check_status` | Check status of transfer jobs in the thread | +| `list_backends` | List available/allowed rclone backends | +| `cancel_job` | Cancel a running transfer job | +| `get_transfer_details` | Get detailed shard-level progress | +| `get_manifest_stats` | Scan a source path for file statistics | +| `check_path_exists` | Verify a bucket/path is accessible | +| `request_backend_access` | Request access to a new backend | +| `read_job_logs` | Read job logs and analysis data | + ### Check data size before transferring Ask the bot to scan a source and report statistics: @@ -156,6 +172,22 @@ The bot will return: - Suggested rclone flags based on file sizes - Estimated transfer times +### View job logs and analysis + +For running or completed jobs, ask the bot to show logs and analysis data: + +``` +@xfer-bot show me the file size histogram for job 12345 +@xfer-bot what rclone commands were run for this transfer? +@xfer-bot show me the logs for job 12345 +``` + +The bot can return: +- File size distribution histogram from manifest analysis +- Suggested rclone flags that were determined during setup +- Tail of prepare job stdout/stderr logs +- Extracted rclone commands that were run + ### Custom rclone flags Specify custom flags that are appended to the intelligent defaults: @@ -178,6 +210,15 @@ The bot automatically analyzes file sizes and selects optimal rclone flags: All transfers include `--stats 600s --progress` for ETA tracking. +### Thread-based conversations + +The bot tracks conversations by Slack thread. Key behaviors: + +- **Initial contact**: Mention the bot with `@xfer-bot` to start a conversation +- **Follow-ups**: Once the bot has responded in a thread, any user can ask follow-up questions without mentioning the bot +- **Persistent context**: The bot associates Slurm jobs with the thread they were submitted from, so asking "what's the status?" in a thread will show jobs from that thread +- **Restart resilience**: If the bot restarts, it automatically recovers thread context from Slack's API, so ongoing conversations continue working + ## Running as a Service For production, run the bot as a systemd service: @@ -258,6 +299,11 @@ xfer-slackbot - Verify `XFER_ALLOWED_CHANNELS` includes the channel ID (or is unset for all channels) - Check logs for errors +### Bot doesn't respond to thread messages +- The bot only responds to threads where it has previously participated +- Someone must first mention the bot with `@xfer-bot` to start the conversation +- After a bot restart, the bot will automatically recover context by checking Slack's thread history + ### "not_in_channel" errors - The bot needs to be invited to channels before it can read/write messages @@ -267,3 +313,4 @@ xfer-slackbot ### Permission errors - Review OAuth scopes — you may need to reinstall the app after adding scopes +- The `channels:history` scope is required for the bot to recover thread context after restarts From cb8c3c46716689bbecd1b9660c5694b0b73935a7 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Thu, 5 Feb 2026 11:29:16 -0500 Subject: [PATCH 15/25] Add list_buckets tool to enumerate buckets at endpoints Allows users to ask the bot what buckets are available at a given backend, making it easier to discover data sources before transfers. Co-Authored-By: Claude Opus 4.5 --- src/xfer/slackbot/claude_agent.py | 34 ++++++++++ src/xfer/slackbot/slurm_tools.py | 100 ++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index 7a94019..e0f357f 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -22,6 +22,7 @@ get_source_stats, get_transfer_progress, get_transfer_progress_by_job, + list_buckets, submit_transfer, ) @@ -108,6 +109,28 @@ "required": [], }, }, + { + "name": "list_buckets", + "description": """List buckets available at a specific backend/endpoint. + +Use this when users want to: +- See what buckets exist at a storage endpoint +- Browse available data sources before starting a transfer +- Discover bucket names they can use in transfer paths + +This runs 'rclone lsd' to list top-level directories (buckets) at the remote. +Note: The backend must be in the allowed list, and credentials must have ListBuckets permission.""", + "input_schema": { + "type": "object", + "properties": { + "backend": { + "type": "string", + "description": "The backend name to list buckets for (e.g., 's3src', 'gcs')", + }, + }, + "required": ["backend"], + }, + }, { "name": "cancel_job", "description": """Cancel a running transfer job. @@ -248,6 +271,7 @@ - Submit new data transfer jobs (source -> destination) - Check status of running/completed transfers - List available storage backends +- List buckets available at each backend - Cancel jobs if requested - Request access to new backends on behalf of users - Scan source paths to get file statistics and transfer estimates @@ -353,6 +377,16 @@ def execute_tool( backends = get_allowed_backends(self.config) return json.dumps({"allowed_backends": backends}) + elif tool_name == "list_buckets": + backend = tool_input["backend"] + result = list_buckets(backend, self.config) + return json.dumps({ + "backend": result.backend, + "buckets": result.buckets, + "bucket_count": len(result.buckets), + "error": result.error, + }) + elif tool_name == "cancel_job": success, message = cancel_job( tool_input["job_id"], diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index 75a9536..b0d6864 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -807,6 +807,15 @@ class PathCheckResult: details: Optional[str] = None +@dataclass +class BucketListResult: + """Result of listing buckets at an endpoint.""" + + backend: str + buckets: list[str] + error: Optional[str] = None + + def check_path_exists(path: str, config: BotConfig) -> PathCheckResult: """ Check if a bucket/path exists at a remote endpoint. @@ -893,6 +902,97 @@ def check_path_exists(path: str, config: BotConfig) -> PathCheckResult: ) +def list_buckets(backend: str, config: BotConfig) -> BucketListResult: + """ + List buckets/top-level directories at a remote endpoint. + + Uses rclone lsd to list directories at the root of the remote. + + Args: + backend: The backend name (e.g., "s3src" or "gcs"). + Can include a trailing colon (e.g., "s3src:"). + + Returns: + BucketListResult with list of bucket names or an error. + """ + # Normalize backend name - remove trailing colon if present + backend = backend.rstrip(":") + + # Validate backend first + valid, msg = validate_backend(backend, config) + if not valid: + return BucketListResult( + backend=backend, + buckets=[], + error=msg, + ) + + # Build rclone lsd command to list buckets + # lsd lists directories, at root level these are buckets + rclone_cmd = [ + "rclone", + "lsd", + f"{backend}:", + "--config", + config.rclone.container_conf_path, + ] + + # Build srun command with container + mounts = f"{config.rclone.config_path}:{config.rclone.container_conf_path}:ro" + + srun_cmd = [ + "srun", + "-n", + "1", + "-c", + "2", + "--container-image", + config.rclone.image, + "--container-mounts", + mounts, + "--no-container-remap-root", + ] + rclone_cmd + + try: + result = run_cmd(srun_cmd, capture=True, check=True) + # Parse rclone lsd output - format is: + # " -1 2024-01-15 10:30:00 -1 bucket-name" + # We want the last column (bucket name) + buckets = [] + for line in result.stdout.strip().splitlines(): + parts = line.split() + if parts: + # Bucket name is the last field + buckets.append(parts[-1]) + + return BucketListResult( + backend=backend, + buckets=sorted(buckets), + ) + except subprocess.CalledProcessError as e: + error_output = e.stderr or str(e) + + # Check for common error patterns + if "AccessDenied" in error_output or "access denied" in error_output.lower(): + return BucketListResult( + backend=backend, + buckets=[], + error="Access denied - credentials may not have ListBuckets permission", + ) + elif "InvalidAccessKeyId" in error_output: + return BucketListResult( + backend=backend, + buckets=[], + error="Invalid access key - check credentials configuration", + ) + else: + return BucketListResult( + backend=backend, + buckets=[], + error=f"Failed to list buckets: {error_output[:200]}", + ) + + @dataclass class JobLogs: """Logs and analysis data for a job.""" From ce76a1d3ab98b6feb6a302036de1ae9a4e0cca77 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Thu, 5 Feb 2026 11:30:04 -0500 Subject: [PATCH 16/25] Document list_buckets tool in Slack bot setup guide Co-Authored-By: Claude Opus 4.5 --- docs/slack-app-setup.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/slack-app-setup.md b/docs/slack-app-setup.md index 06bfafb..52cadb7 100644 --- a/docs/slack-app-setup.md +++ b/docs/slack-app-setup.md @@ -151,6 +151,7 @@ The bot has access to the following tools: | `submit_transfer` | Submit a new data transfer job | | `check_status` | Check status of transfer jobs in the thread | | `list_backends` | List available/allowed rclone backends | +| `list_buckets` | List buckets available at a specific backend | | `cancel_job` | Cancel a running transfer job | | `get_transfer_details` | Get detailed shard-level progress | | `get_manifest_stats` | Scan a source path for file statistics | @@ -158,6 +159,17 @@ The bot has access to the following tools: | `request_backend_access` | Request access to a new backend | | `read_job_logs` | Read job logs and analysis data | +### Discover available buckets + +Ask the bot what buckets exist at an endpoint: + +``` +@xfer-bot what buckets are available on s3src? +@xfer-bot list buckets at gcs +``` + +The bot will return a list of bucket names that you can use in transfer paths. + ### Check data size before transferring Ask the bot to scan a source and report statistics: From 460a690b1ae97aafcf7f91e34a5b03c53a59a445 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 7 Feb 2026 08:13:55 -0500 Subject: [PATCH 17/25] Verify source path exists before submitting transfer job Adds an early check using check_path_exists before creating the run directory or submitting to Slurm, so users get immediate feedback when the source path is invalid. Co-Authored-By: Claude Opus 4.6 --- src/xfer/slackbot/slurm_tools.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index b0d6864..f99443b 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -331,6 +331,17 @@ def submit_transfer( message=f"Invalid {label}: {msg}", ) + # Verify source path exists before creating run directory and submitting + source_check = check_path_exists(source, config) + if not source_check.exists: + error_detail = source_check.error or "Path not found" + return TransferResult( + success=False, + job_id=None, + run_dir=None, + message=f"Source path not found: {source}. {error_detail}", + ) + # Generate run directory timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S") run_name = f"slack_{channel_id}_{timestamp}" From c7187b6718e3cdf666479218959aca7617049ec6 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 7 Feb 2026 08:16:28 -0500 Subject: [PATCH 18/25] Reduce thread history limits to mitigate Slack rate limits Co-Authored-By: Claude Opus 4.6 --- src/xfer/slackbot/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xfer/slackbot/app.py b/src/xfer/slackbot/app.py index e482fcd..96888ab 100644 --- a/src/xfer/slackbot/app.py +++ b/src/xfer/slackbot/app.py @@ -46,7 +46,7 @@ class ConversationStore: In production, you might want to use Redis or similar for persistence. """ - def __init__(self, max_messages: int = 20): + def __init__(self, max_messages: int = 10): self.max_messages = max_messages self._store: dict[str, list[dict]] = {} @@ -101,7 +101,7 @@ def fetch_thread_history(channel: str, thread_ts: str) -> list[dict] | None: result = app.client.conversations_replies( channel=channel, ts=thread_ts, - limit=100, # Fetch up to 100 messages from the thread + limit=20, # Limit to recent messages to avoid rate limits ) messages = result.get("messages", []) From b92f07166aa774771a4f2ff92b8b39160a82a994 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 7 Feb 2026 08:25:59 -0500 Subject: [PATCH 19/25] Read shard logs and improve log diagnostics in read_job_logs tool The read_job_logs tool previously only returned prepare job logs, missing the actual transfer errors in shard logs. Now reads failed shard state files and their attempt logs, and includes recent shard logs for non-failed jobs. Also clarifies tool response keys (prepare_stdout, prepare_stderr) and provides explicit notes when analysis or logs are missing so the agent can explain the situation to users. Co-Authored-By: Claude Opus 4.6 --- src/xfer/slackbot/claude_agent.py | 34 +++++++++++--- src/xfer/slackbot/slurm_tools.py | 78 +++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 6 deletions(-) diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index e0f357f..615e44c 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -240,12 +240,17 @@ - View rclone commands that were run during manifest generation - Debug issues by examining log output - See suggested rclone flags that were determined from file analysis +- Investigate why a transfer failed or is having problems +- See errors from individual shard transfer tasks This returns: -- analysis.json contents (file size histogram, suggested flags, statistics) -- Tail of the prepare job stdout log -- Tail of the prepare job stderr log (if any errors) -- List of rclone commands extracted from logs +- analysis.json contents (file size distribution, suggested rclone flags, file count/size statistics). If missing, it means the prepare job didn't reach the analysis step. +- prepare_stdout: Slurm stdout from the prepare job (prepare-*.out). This shows manifest build progress, sharding, script rendering, and the transfer array job submission. If the prepare job failed, the error will be here. +- prepare_stderr: Slurm stderr from the prepare job (prepare-*.err). Contains warnings and errors from the prepare phase. +- rclone_commands: rclone commands extracted from the prepare logs. +- shard_logs: For failed transfer shards, returns exit codes and log tails showing the actual rclone errors. For non-failed jobs, returns the most recent shard logs. + +IMPORTANT: Always use this tool when investigating transfer issues. Check BOTH the prepare logs (for manifest/setup failures) AND the shard logs (for transfer failures). Present the relevant error details to the user. Only works for jobs that belong to the current thread.""", "input_schema": { @@ -285,6 +290,7 @@ 5. Be helpful but don't make assumptions about paths - ask if unsure 6. When reporting job status, include relevant details like progress and any errors 7. If users want custom rclone flags (e.g., bandwidth limits, checksum verification), pass them via the rclone_flags parameter +8. When a user reports a problem or asks you to investigate an issue with a transfer, ALWAYS use read_job_logs to examine the actual log files. The shard logs contain the real error messages from rclone and the transfer process. Do not guess at causes without reading the logs first. Transfer path format: - Paths should be in rclone format: "remote:bucket/path" @@ -604,10 +610,26 @@ def execute_tool( result["error"] = logs.error if logs.analysis: result["analysis"] = logs.analysis + else: + result["analysis"] = None + result["analysis_note"] = ( + "No analysis.json found. The prepare job may not have " + "reached the manifest analysis step yet, or it failed " + "before completing. Check the prepare logs below." + ) if logs.log_tail: - result["log_tail"] = logs.log_tail + result["prepare_stdout"] = logs.log_tail + else: + result["prepare_stdout"] = None + result["prepare_stdout_note"] = ( + "No prepare-*.out log files found in the run directory." + ) if logs.error_log_tail: - result["error_log_tail"] = logs.error_log_tail + result["prepare_stderr"] = logs.error_log_tail + else: + result["prepare_stderr"] = None + if logs.shard_logs: + result["shard_logs"] = logs.shard_logs return json.dumps(result) diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index f99443b..432c100 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -1014,6 +1014,7 @@ class JobLogs: log_tail: Optional[str] # Last N lines of prepare job output error_log_tail: Optional[str] # Last N lines of prepare job stderr rclone_commands: list[str] # Extracted rclone commands from logs + shard_logs: list[dict] = field(default_factory=list) # Failed/recent shard logs error: Optional[str] = None @@ -1136,6 +1137,82 @@ def get_job_logs( except IOError: pass + # Read shard transfer logs (especially for failed shards) + shard_logs = [] + state_dir = run_dir / "state" + logs_dir = run_dir / "logs" + + if state_dir.exists() and logs_dir.exists(): + # Identify failed shards from state files + fail_files = sorted(state_dir.glob("shard_*.fail")) + failed_shard_ids = set() + for ff in fail_files: + shard_id = ff.stem.replace("shard_", "") + failed_shard_ids.add(shard_id) + try: + exit_code = ff.read_text().strip() + except IOError: + exit_code = "unknown" + + # Find the most recent attempt log for this shard + shard_log_files = sorted( + logs_dir.glob(f"shard_{shard_id}_attempt_*.log"), + key=lambda p: p.stat().st_mtime, + ) + log_content = None + if shard_log_files: + try: + lines = shard_log_files[-1].read_text().splitlines() + log_content = "\n".join(lines[-tail_lines:]) if lines else "" + except IOError: + log_content = f"Failed to read {shard_log_files[-1].name}" + + shard_logs.append( + { + "shard_id": shard_id, + "status": "failed", + "exit_code": exit_code, + "log_file": str(shard_log_files[-1]) if shard_log_files else None, + "log_tail": log_content, + } + ) + + # If no failed shards, include the most recent shard logs + # (useful for investigating in-progress or completed transfers) + if not fail_files: + all_shard_logs = sorted( + logs_dir.glob("shard_*_attempt_*.log"), + key=lambda p: p.stat().st_mtime, + ) + # Take the most recent logs (up to 5) + for log_file in all_shard_logs[-5:]: + try: + # Extract shard ID from filename + name = log_file.stem # e.g. shard_000001_attempt_1 + parts = name.split("_") + shard_id = parts[1] if len(parts) >= 2 else "unknown" + + lines = log_file.read_text().splitlines() + log_content = "\n".join(lines[-tail_lines:]) if lines else "" + + # Check if this shard is done + done_file = state_dir / f"shard_{shard_id}.done" + status = "completed" if done_file.exists() else "in_progress" + + shard_logs.append( + { + "shard_id": shard_id, + "status": status, + "log_file": str(log_file), + "log_tail": log_content, + } + ) + except IOError: + pass + + # Cap total shard logs to avoid overwhelming the response + shard_logs = shard_logs[:20] + return JobLogs( job_id=job_id, run_dir=str(run_dir), @@ -1143,4 +1220,5 @@ def get_job_logs( log_tail=log_tail, error_log_tail=error_log_tail, rclone_commands=rclone_commands, + shard_logs=shard_logs, ) From 9da7243cd88c9ffe74bc97a9f8796b371b6b16e7 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 7 Feb 2026 08:40:36 -0500 Subject: [PATCH 20/25] Add lightweight Haiku triage to filter thread messages Thread replies now go through a fast triage call before responding, so the bot stays silent when users are talking to each other rather than addressing the bot. @mentions and DMs bypass triage entirely. Skipped messages are still stored for conversation context. Co-Authored-By: Claude Opus 4.6 --- src/xfer/slackbot/app.py | 10 ++++ src/xfer/slackbot/claude_agent.py | 74 ++++++++++++++++++++++++++++ src/xfer/slackbot/config.py | 4 ++ tests/test_slackbot_dryrun.py | 80 +++++++++++++++++++++++++++++++ 4 files changed, 168 insertions(+) diff --git a/src/xfer/slackbot/app.py b/src/xfer/slackbot/app.py index 96888ab..944d643 100644 --- a/src/xfer/slackbot/app.py +++ b/src/xfer/slackbot/app.py @@ -271,6 +271,16 @@ def handle_message(event: dict[str, Any], say: Any) -> None: # Bot hasn't participated in this thread return + # Triage: check if message is directed at the bot + if not agent.should_respond_in_thread( + user_message=text, + conversation_history=history.copy() if history else None, + ): + logger.info(f"Triage: skipping message from {user} in {channel}:{thread_ts}") + # Still store the message so future triage/responses have full context + conversations.append(channel, thread_ts, {"role": "user", "content": text}) + return + logger.info( f"Continuing thread conversation with {user}: {text[:100]}..." ) diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index 615e44c..dc992a3 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -7,6 +7,7 @@ from __future__ import annotations import json +import logging from typing import Any import anthropic @@ -316,6 +317,26 @@ Keep responses brief and focused on the task at hand.""" +TRIAGE_SYSTEM_PROMPT = """You are classifying whether a message in a Slack thread is directed at the data transfer bot. + +The bot helps users submit and monitor data transfer jobs on an HPC cluster. + +Respond = true when: +- The message asks about transfers, jobs, or their status +- The message is a follow-up to something the bot said +- The message contains instructions or requests for the bot +- The user asks a question the bot can help with + +Respond = false when: +- Users are talking to each other, not the bot +- The message is a side conversation unrelated to the bot +- Users are thanking or addressing each other +- The message is general chatter not directed at the bot + +Output ONLY valid JSON: {"respond": true, "reason": "..."}""" + +logger = logging.getLogger(__name__) + class ClaudeAgent: """Agent that uses Claude to interpret requests and execute tools.""" @@ -325,6 +346,59 @@ def __init__(self, config: BotConfig, slack_client=None): self.client = anthropic.Anthropic(api_key=config.anthropic_api_key) self.slack_client = slack_client # For posting to support channel + def should_respond_in_thread( + self, + user_message: str, + conversation_history: list[dict] | None = None, + ) -> bool: + """Decide if a thread message is directed at the bot using a lightweight triage call. + + Returns True if the bot should respond, False otherwise. + Fails open (returns True) on any error. + """ + try: + # Build context from recent history + messages: list[dict] = [] + if conversation_history: + for msg in conversation_history[-5:]: + content = msg.get("content", "") + if len(content) > 200: + content = content[:200] + "..." + messages.append({"role": msg["role"], "content": content}) + messages.append({"role": "user", "content": user_message}) + + response = self.client.messages.create( + model=self.config.triage_model, + max_tokens=100, + system=TRIAGE_SYSTEM_PROMPT, + messages=messages, + ) + + raw = response.content[0].text.strip() + + # Try JSON parse first, fall back to string search + try: + result = json.loads(raw) + should_respond = result.get("respond", True) + except (json.JSONDecodeError, AttributeError): + should_respond = '"respond": false' not in raw.lower() + + reason = "" + try: + result = json.loads(raw) + reason = result.get("reason", "") + except Exception: + pass + + logger.info( + f"Triage decision: respond={should_respond}, reason={reason!r}, raw={raw!r}" + ) + return should_respond + + except Exception: + logger.exception("Triage call failed, defaulting to respond") + return True + def execute_tool( self, tool_name: str, diff --git a/src/xfer/slackbot/config.py b/src/xfer/slackbot/config.py index 7dce32d..b43512c 100644 --- a/src/xfer/slackbot/config.py +++ b/src/xfer/slackbot/config.py @@ -60,6 +60,7 @@ class BotConfig: default_factory=lambda: os.environ.get("ANTHROPIC_API_KEY", "") ) claude_model: str = "claude-sonnet-4-20250514" + triage_model: str = "claude-haiku-4-20250414" # Paths runs_base_dir: Path = field(default_factory=lambda: Path.home() / "xfer-runs") @@ -106,6 +107,9 @@ def from_env(cls) -> "BotConfig": if xfer_install_dir := os.environ.get("XFER_INSTALL_DIR"): config.xfer_install_dir = Path(xfer_install_dir) + if triage_model := os.environ.get("XFER_TRIAGE_MODEL"): + config.triage_model = triage_model + return config diff --git a/tests/test_slackbot_dryrun.py b/tests/test_slackbot_dryrun.py index 1a8a9bf..9b06c3c 100644 --- a/tests/test_slackbot_dryrun.py +++ b/tests/test_slackbot_dryrun.py @@ -312,6 +312,83 @@ def test_claude_agent_tools(): print("✓ Claude agent tools tests passed") +def test_triage_respond(): + """Test triage returns True when Haiku says respond=true.""" + print("\n=== Testing triage: respond ===") + + from xfer.slackbot.claude_agent import ClaudeAgent + + config = BotConfig() + config.anthropic_api_key = "test-key" + + with patch("anthropic.Anthropic") as MockClient: + mock_response = MagicMock() + mock_response.content = [MagicMock(text='{"respond": true, "reason": "asking about transfer status"}')] + MockClient.return_value.messages.create.return_value = mock_response + + agent = ClaudeAgent(config) + result = agent.should_respond_in_thread( + user_message="What's the status of my transfer?", + conversation_history=[{"role": "assistant", "content": "Your transfer has been submitted."}], + ) + + assert result is True + + # Verify triage model was used + call_kwargs = MockClient.return_value.messages.create.call_args + assert call_kwargs.kwargs["model"] == config.triage_model + + print("✓ Triage respond test passed") + + +def test_triage_skip(): + """Test triage returns False when Haiku says respond=false.""" + print("\n=== Testing triage: skip ===") + + from xfer.slackbot.claude_agent import ClaudeAgent + + config = BotConfig() + config.anthropic_api_key = "test-key" + + with patch("anthropic.Anthropic") as MockClient: + mock_response = MagicMock() + mock_response.content = [MagicMock(text='{"respond": false, "reason": "users talking to each other"}')] + MockClient.return_value.messages.create.return_value = mock_response + + agent = ClaudeAgent(config) + result = agent.should_respond_in_thread( + user_message="hey @alice, did you get my email?", + conversation_history=[{"role": "assistant", "content": "Transfer submitted."}], + ) + + assert result is False + + print("✓ Triage skip test passed") + + +def test_triage_error_defaults_to_respond(): + """Test triage fails open (returns True) on error.""" + print("\n=== Testing triage: error defaults to respond ===") + + from xfer.slackbot.claude_agent import ClaudeAgent + + config = BotConfig() + config.anthropic_api_key = "test-key" + + with patch("anthropic.Anthropic") as MockClient: + MockClient.return_value.messages.create.side_effect = Exception("API error") + + agent = ClaudeAgent(config) + result = agent.should_respond_in_thread( + user_message="check my transfer", + conversation_history=None, + ) + + assert result is True + + print("✓ Triage error defaults to respond test passed") + + def test_full_flow_simulation(): """Simulate a full transfer request flow.""" print("\n=== Simulating full transfer flow ===") @@ -443,6 +520,9 @@ def main(): test_sacct_json_parsing() test_transfer_progress() test_claude_agent_tools() + test_triage_respond() + test_triage_skip() + test_triage_error_defaults_to_respond() test_full_flow_simulation() print("\n" + "=" * 60) From ef742dbd011085e45bbda0d388f5aeb0a8d4f092 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 7 Feb 2026 08:47:12 -0500 Subject: [PATCH 21/25] Add per-user job ownership for cancel authorization Record the Slack user ID in request.json when submitting transfer jobs and enforce ownership checks in cancel_job so only the submitting user can cancel their own jobs. Legacy jobs without a submitted_by field remain cancellable by anyone for backwards compatibility. Co-Authored-By: Claude Opus 4.6 --- src/xfer/slackbot/app.py | 3 + src/xfer/slackbot/claude_agent.py | 5 ++ src/xfer/slackbot/slurm_tools.py | 21 +++++- tests/test_slackbot_dryrun.py | 109 ++++++++++++++++++++++++++++++ 4 files changed, 136 insertions(+), 2 deletions(-) diff --git a/src/xfer/slackbot/app.py b/src/xfer/slackbot/app.py index 944d643..d83841d 100644 --- a/src/xfer/slackbot/app.py +++ b/src/xfer/slackbot/app.py @@ -177,6 +177,7 @@ def handle_mention(event: dict[str, Any], say: Any) -> None: channel_id=channel, thread_ts=thread_ts, conversation_history=history.copy() if history else None, + user_id=user, ) # Store the exchange @@ -228,6 +229,7 @@ def handle_message(event: dict[str, Any], say: Any) -> None: channel_id=channel, thread_ts=thread_ts, conversation_history=history.copy() if history else None, + user_id=user, ) conversations.append( @@ -291,6 +293,7 @@ def handle_message(event: dict[str, Any], say: Any) -> None: channel_id=channel, thread_ts=thread_ts, conversation_history=history.copy() if history else None, + user_id=user, ) conversations.append( diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index dc992a3..ca388dd 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -405,6 +405,7 @@ def execute_tool( tool_input: dict[str, Any], channel_id: str, thread_ts: str, + user_id: str = "", ) -> str: """Execute a tool and return the result as a string.""" if tool_name == "submit_transfer": @@ -426,6 +427,7 @@ def execute_tool( time_limit=tool_input.get("time_limit"), job_name=tool_input.get("job_name"), rclone_flags=tool_input.get("rclone_flags"), + user_id=user_id, ) return json.dumps(result.__dict__, default=str) @@ -472,6 +474,7 @@ def execute_tool( tool_input["job_id"], channel_id, thread_ts, + user_id=user_id, ) return json.dumps({"success": success, "message": message}) @@ -716,6 +719,7 @@ def process_message( channel_id: str, thread_ts: str, conversation_history: list[dict] | None = None, + user_id: str = "", ) -> str: """ Process a user message and return Claude's response. @@ -748,6 +752,7 @@ def process_message( block.input, channel_id, thread_ts, + user_id=user_id, ) tool_results.append( { diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index 432c100..cc620cf 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -310,6 +310,7 @@ def submit_transfer( time_limit: Optional[str] = None, job_name: Optional[str] = None, rclone_flags: Optional[str] = None, + user_id: str = "", ) -> TransferResult: """ Submit a data transfer job via xfer. @@ -393,6 +394,7 @@ def submit_transfer( "prepare_job_id": job_id, "num_shards": num_shards, "submitted_at": datetime.utcnow().isoformat() + "Z", + "submitted_by": user_id, } (run_dir / "request.json").write_text(json.dumps(request_meta, indent=2)) @@ -630,11 +632,14 @@ def get_transfer_progress_by_job(job_id: str) -> Optional[dict]: return progress -def cancel_job(job_id: str, channel_id: str, thread_ts: str) -> tuple[bool, str]: +def cancel_job( + job_id: str, channel_id: str, thread_ts: str, *, user_id: str = "" +) -> tuple[bool, str]: """ Cancel a Slurm job. - Validates that the job belongs to the requesting thread via comment. + Validates that the job belongs to the requesting thread via comment, + and that the requesting user is the one who submitted the job. """ # First, verify the job belongs to this thread job_info = get_job_status(job_id) @@ -645,6 +650,18 @@ def cancel_job(job_id: str, channel_id: str, thread_ts: str) -> tuple[bool, str] if expected_comment not in job_info.comment: return False, f"Job {job_id} does not belong to this thread. Cannot cancel." + # Check user ownership via request.json in the job's work_dir + if user_id and job_info.work_dir: + request_file = Path(job_info.work_dir) / "request.json" + if request_file.exists(): + try: + meta = json.loads(request_file.read_text()) + submitted_by = meta.get("submitted_by", "") + if submitted_by and submitted_by != user_id: + return False, "Only the user who submitted this job can cancel it." + except (json.JSONDecodeError, IOError): + pass # Allow cancel if request.json is unreadable + # Cancel the job try: run_cmd(["scancel", job_id], check=True) diff --git a/tests/test_slackbot_dryrun.py b/tests/test_slackbot_dryrun.py index 9b06c3c..3f8b160 100644 --- a/tests/test_slackbot_dryrun.py +++ b/tests/test_slackbot_dryrun.py @@ -25,9 +25,11 @@ slack_comment, ) from xfer.slackbot.slurm_tools import ( + JobInfo, TransferResult, _parse_job_from_sacct_json, _write_prepare_script, + cancel_job, get_allowed_backends, get_transfer_progress, validate_backend, @@ -506,6 +508,110 @@ def test_full_flow_simulation(): print("\n✓ Full flow simulation passed") +def test_cancel_job_by_submitter(): + """Test that the user who submitted a job can cancel it.""" + print("\n=== Testing cancel job by submitter ===") + + channel = "C07ABC123" + thread_ts = "1234567890.123456" + comment = slack_comment(channel, thread_ts) + + with tempfile.TemporaryDirectory() as tmpdir: + run_dir = Path(tmpdir) + request_meta = {"submitted_by": "U_ALICE"} + (run_dir / "request.json").write_text(json.dumps(request_meta)) + + job_info = JobInfo( + job_id="123", + array_job_id=None, + state="RUNNING", + name="xfer-slack", + comment=comment, + work_dir=str(run_dir), + submit_time=None, + start_time=None, + end_time=None, + partition="transfer", + ) + + with patch("xfer.slackbot.slurm_tools.get_job_status", return_value=job_info), \ + patch("xfer.slackbot.slurm_tools.run_cmd"): + success, message = cancel_job("123", channel, thread_ts, user_id="U_ALICE") + assert success, f"Expected success, got: {message}" + assert "cancelled" in message + + print("✓ Cancel job by submitter test passed") + + +def test_cancel_job_by_other_user(): + """Test that a different user cannot cancel someone else's job.""" + print("\n=== Testing cancel job by other user ===") + + channel = "C07ABC123" + thread_ts = "1234567890.123456" + comment = slack_comment(channel, thread_ts) + + with tempfile.TemporaryDirectory() as tmpdir: + run_dir = Path(tmpdir) + request_meta = {"submitted_by": "U_ALICE"} + (run_dir / "request.json").write_text(json.dumps(request_meta)) + + job_info = JobInfo( + job_id="123", + array_job_id=None, + state="RUNNING", + name="xfer-slack", + comment=comment, + work_dir=str(run_dir), + submit_time=None, + start_time=None, + end_time=None, + partition="transfer", + ) + + with patch("xfer.slackbot.slurm_tools.get_job_status", return_value=job_info): + success, message = cancel_job("123", channel, thread_ts, user_id="U_BOB") + assert not success, f"Expected failure, got success: {message}" + assert "Only the user who submitted" in message + + print("✓ Cancel job by other user test passed") + + +def test_cancel_job_legacy_no_submitter(): + """Test that jobs without submitted_by field can be cancelled by anyone (backwards compat).""" + print("\n=== Testing cancel job legacy (no submitter) ===") + + channel = "C07ABC123" + thread_ts = "1234567890.123456" + comment = slack_comment(channel, thread_ts) + + with tempfile.TemporaryDirectory() as tmpdir: + run_dir = Path(tmpdir) + request_meta = {"source": "s3src:bucket/data", "dest": "s3dst:archive/data"} + (run_dir / "request.json").write_text(json.dumps(request_meta)) + + job_info = JobInfo( + job_id="123", + array_job_id=None, + state="RUNNING", + name="xfer-slack", + comment=comment, + work_dir=str(run_dir), + submit_time=None, + start_time=None, + end_time=None, + partition="transfer", + ) + + with patch("xfer.slackbot.slurm_tools.get_job_status", return_value=job_info), \ + patch("xfer.slackbot.slurm_tools.run_cmd"): + success, message = cancel_job("123", channel, thread_ts, user_id="U_ANYONE") + assert success, f"Expected success for legacy job, got: {message}" + assert "cancelled" in message + + print("✓ Cancel job legacy (no submitter) test passed") + + def main(): """Run all dry-run tests.""" print("=" * 60) @@ -524,6 +630,9 @@ def main(): test_triage_skip() test_triage_error_defaults_to_respond() test_full_flow_simulation() + test_cancel_job_by_submitter() + test_cancel_job_by_other_user() + test_cancel_job_legacy_no_submitter() print("\n" + "=" * 60) print("All dry-run tests passed! ✓") From 1fc797662064a6b12af4cea72de2e2ad30c5ba87 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 7 Feb 2026 09:51:25 -0500 Subject: [PATCH 22/25] Parallelize manifest build and add progress tracking Split manifest generation into up to 4 concurrent rclone lsjson workers to reduce listing time for large datasets. Add `manifest combine` CLI command, prepare job phase tracking (progress.json), manifest build progress reporting, and bump prepare resources (250G mem, 4-day limit). Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 39 ++++++++ src/xfer/cli.py | 134 ++++++++++++++++++++++++++- src/xfer/slackbot/claude_agent.py | 5 +- src/xfer/slackbot/slurm_tools.py | 144 ++++++++++++++++++++++++++++-- tests/test_slackbot_dryrun.py | 4 +- 5 files changed, 316 insertions(+), 10 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..55edbac --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,39 @@ +# Changelog + +## Unreleased (feature/slack-claude) + +### Parallel Manifest Build + +- Parallelize manifest generation into up to 4 concurrent `rclone lsjson` workers, reducing listing time for large datasets with many top-level subdirectories (`slurm_tools.py`) +- Add `manifest combine` CLI command to merge parallel lsjson part files (with `.prefix` sidecars) into a unified `manifest.jsonl` (`cli.py`) +- Bump prepare job memory from 16 GB to 250 GB to accommodate large listings (`slurm_tools.py`) +- Add `--max-backlog=1000000` to `rclone lsjson` calls to prevent the walker from stalling on large buckets (`cli.py`, `slurm_tools.py`) +- Report manifest build progress (files listed, bytes listed) via `manifest.jsonl.progress` sidecar file (`cli.py`) +- Track prepare job phases (`listing_source`, `combining_manifest`, `analyzing`, `sharding`, `rendering`, `submitting`) in `progress.json` (`slurm_tools.py`) + +### Claude-Powered Slack Bot + +- Add Claude-powered Slack bot for interactive data transfer requests via Slack threads +- Add intelligent rclone flag selection based on file size distribution analysis +- Add `check_path_exists` tool to validate source paths before submitting jobs +- Add `list_buckets` tool to enumerate buckets at remote endpoints +- Add `read_job_logs` tool to access job analysis data, prepare logs, and shard transfer logs +- Add lightweight Haiku triage to filter thread messages and skip unrelated chatter +- Add per-user job ownership so only the submitting user can cancel their jobs +- Restore thread context from Slack API after bot restarts +- Report manifest listing progress (files_listed, bytes_listed) in job status during `building_manifest` phase + +### Slurm Robustness + +- Increase prepare job time limit to 4 days for very large datasets +- Unset conflicting `SLURM_MEM_*` environment variables in prepare.sh +- Add `--export=NONE` to all `sbatch` calls to prevent environment leakage +- Allow users to set lower array concurrency (max 64) +- Verify source path exists before creating run directory and submitting jobs +- Reduce thread history limits to mitigate Slack rate limits + +### Bug Fixes + +- Fix `run_id` format to be filename-safe (no colons) +- Fix markdown rendering in Slack responses +- Improve error logging for manifest build failures (write to `xfer-err/` with full context) diff --git a/src/xfer/cli.py b/src/xfer/cli.py index 77040a9..361ff0c 100644 --- a/src/xfer/cli.py +++ b/src/xfer/cli.py @@ -246,6 +246,7 @@ def manifest_build( rclone_cmd += shlex.split(extra_lsjson_flags) rclone_cmd.append("--files-only") + rclone_cmd.append("--max-backlog=1000000") srun_cmd = ["srun", "-n", "1", "-c", "8", "--no-container-remap-root"] srun_cmd += pyxis_container_args( @@ -301,9 +302,22 @@ def manifest_build( eprint(f"SLURM memory env vars: {slurm_mem_vars}") raise - # Build JSONL + # Build JSONL with progress reporting n = 0 bytes_total = 0 + progress_file = out.parent / "manifest.jsonl.progress" + last_progress_n = 0 + PROGRESS_INTERVAL = 10_000 # update progress file every 10k files + + def _write_progress() -> None: + """Write current listing progress to a sidecar file.""" + try: + progress_file.write_text( + json.dumps({"files_listed": n, "bytes_listed": bytes_total}) + ) + except OSError: + pass + with out.open("w", encoding="utf-8") as f: for item in parse_lsjson_items(cp.stdout): # Skip directories @@ -345,6 +359,13 @@ def manifest_build( f.write(json.dumps(rec, separators=(",", ":")) + "\n") n += 1 + if n - last_progress_n >= PROGRESS_INTERVAL: + _write_progress() + eprint(f" manifest progress: {n:,} files, {bytes_total:,} bytes") + last_progress_n = n + + # Final progress update and cleanup + _write_progress() eprint(f"Wrote {n} items, {bytes_total} bytes -> {out}") @@ -515,6 +536,117 @@ def manifest_analyze( print(json_output) +@manifest_app.command("combine") +def manifest_combine( + source: str = typer.Option( + ..., help="rclone source root, e.g. s3src:bucket/prefix" + ), + dest: str = typer.Option(..., help="rclone dest root, e.g. s3dst:bucket/prefix"), + parts_dir: Path = typer.Option( + ..., exists=True, help="Directory containing lsjson-*.json part files", resolve_path=True + ), + out: Path = typer.Option(..., help="Output manifest JSONL path", resolve_path=True), + run_id: Optional[str] = typer.Option( + None, help="Run identifier; default is generated" + ), +) -> None: + """ + Combine multiple lsjson part files into a unified manifest.jsonl. + + Reads lsjson-*.json files from --parts-dir, adjusts paths using .prefix + sidecar files, and writes a single manifest JSONL. + """ + run_id = run_id or now_run_id() + mkdirp(out.parent) + + # Glob part files + part_files = sorted(parts_dir.glob("lsjson-*.json")) + if not part_files: + eprint(f"No lsjson-*.json files found in {parts_dir}") + raise typer.Exit(code=2) + + n = 0 + bytes_total = 0 + last_progress_n = 0 + PROGRESS_INTERVAL = 10_000 + progress_file = out.parent / "manifest.jsonl.progress" + + def _write_progress() -> None: + try: + progress_file.write_text( + json.dumps({"files_listed": n, "bytes_listed": bytes_total}) + ) + except OSError: + pass + + with out.open("w", encoding="utf-8") as f: + for part_file in part_files: + # Determine prefix from sidecar file + prefix_file = part_file.with_suffix(".prefix") + prefix = "" + if prefix_file.exists(): + prefix = prefix_file.read_text(encoding="utf-8").strip() + + # Read the JSON array + try: + items = json.loads(part_file.read_text(encoding="utf-8")) + if not isinstance(items, list): + eprint(f"WARNING: {part_file} is not a JSON array, skipping") + continue + except (json.JSONDecodeError, OSError) as e: + eprint(f"WARNING: Failed to read {part_file}: {e}, skipping") + continue + + for item in items: + if not isinstance(item, dict): + continue + if item.get("IsDir") is True: + continue + + rel_path = item.get("Path") + if not rel_path or not isinstance(rel_path, str): + continue + + # Adjust path with prefix + if prefix: + rel_path = prefix + "/" + rel_path + + size = int(item.get("Size") or 0) + bytes_total += size + + mtime = item.get("ModTime") + hashes = item.get("Hashes") if isinstance(item.get("Hashes"), dict) else {} + etag = item.get("ETag") or item.get("etag") + storage_class = item.get("StorageClass") + meta = item.get("Metadata") if isinstance(item.get("Metadata"), dict) else {} + + rec = { + "schema": SCHEMA, + "run_id": run_id, + "source_root": source, + "dest_root": dest, + "source": source.rstrip("/") + "/" + rel_path, + "dest": stable_dest_for_source(source, dest, rel_path), + "path": rel_path, + "size": size, + "mtime": mtime, + "hashes": hashes, + "etag": etag, + "storage_class": storage_class, + "meta": meta, + } + f.write(json.dumps(rec, separators=(",", ":")) + "\n") + n += 1 + + if n - last_progress_n >= PROGRESS_INTERVAL: + _write_progress() + eprint(f" manifest progress: {n:,} files, {bytes_total:,} bytes") + last_progress_n = n + + _write_progress() + eprint(f"Combined {len(part_files)} parts -> {n} items, {bytes_total} bytes -> {out}") + + # ----------------------------- # Slurm render/submit # ----------------------------- diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index ca388dd..71b26c8 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -84,7 +84,9 @@ "name": "check_status", "description": """Check the status of transfer jobs in this thread. Use this when the user asks about job status, progress, or wants to know if their transfer is complete. -This tool finds all jobs associated with the current Slack thread and returns their status.""", +This tool finds all jobs associated with the current Slack thread and returns their status. + +When the phase is "building_manifest", the prepare job is listing files at the source. This can take up to several days for large datasets and is normal. The response may include files_listed and bytes_listed if the JSONL writing phase has started, or prepare_phase/prepare_detail for finer-grained progress. Only flag a concern if the job has been in this phase for more than 48 hours with no observable progress.""", "input_schema": { "type": "object", "properties": { @@ -292,6 +294,7 @@ 6. When reporting job status, include relevant details like progress and any errors 7. If users want custom rclone flags (e.g., bandwidth limits, checksum verification), pass them via the rclone_flags parameter 8. When a user reports a problem or asks you to investigate an issue with a transfer, ALWAYS use read_job_logs to examine the actual log files. The shard logs contain the real error messages from rclone and the transfer process. Do not guess at causes without reading the logs first. +9. The preparation phase (manifest building) can take a long time for large datasets — up to several days is normal. The prepare job has a 4-day time limit. Only consider it a potential problem if the prepare job has been running for more than 48 hours with no progress. If the status includes files_listed or bytes_listed, report those numbers to the user so they can see listing is progressing. If those numbers are absent, the rclone listing is still running (it returns results all at once when complete). Transfer path format: - Paths should be in rclone format: "remote:bucket/path" diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index cc620cf..60a4e08 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -202,10 +202,10 @@ def _write_prepare_script( #SBATCH --job-name={job_name}-prepare #SBATCH --output={run_dir}/prepare-%j.out #SBATCH --error={run_dir}/prepare-%j.err -#SBATCH --time=2-00:00:00 +#SBATCH --time=4-00:00:00 #SBATCH --partition={config.slurm.partition} #SBATCH --cpus-per-task=8 -#SBATCH --mem=16G +#SBATCH --mem=250G #SBATCH --comment={shlex.quote(comment)} #SBATCH --chdir={run_dir} {f"#SBATCH --qos={config.slurm.qos}" if config.slurm.qos else ""} @@ -216,6 +216,22 @@ def _write_prepare_script( # (cluster DefMemPerCPU conflicts with job --mem setting) unset SLURM_MEM_PER_CPU SLURM_MEM_PER_GPU SLURM_MEM_PER_NODE +# Helper to update progress.json with current phase and timestamp +update_progress() {{ + local phase="$1" + local detail="${{2:-}}" + python3 -c " +import json, datetime +p = {{'phase': '$phase', 'updated_at': datetime.datetime.now().isoformat(), 'detail': '$detail'}} +try: + old = json.load(open('{run_dir}/progress.json')) + p['started_at'] = old.get('started_at', p['updated_at']) +except Exception: + p['started_at'] = p['updated_at'] +json.dump(p, open('{run_dir}/progress.json', 'w')) +" +}} + # Setup uv environment XFER_DIR="{xfer_dir}" echo "=== Setting up uv environment at $(date -Is) ===" @@ -229,18 +245,108 @@ def _write_prepare_script( echo "Dest: {dest}" echo "Run dir: {run_dir}" -# Phase 1: Build manifest -uv run xfer manifest build \\ +# Phase 1a: List top-level directories at source +update_progress "listing_source" "Listing top-level directories at source" +echo "=== Phase 1a: listing top-level directories ===" +srun -n 1 -c 8 \\ + --container-image {shlex.quote(config.rclone.image)} \\ + --container-mounts "{config.rclone.config_path}:{config.rclone.container_conf_path}:ro" \\ + --no-container-remap-root \\ + rclone lsjson {shlex.quote(source)} \\ + --dirs-only --fast-list --max-backlog=1000000 \\ + --config {config.rclone.container_conf_path} \\ + > {run_dir}/top-dirs.json +echo "=== Phase 1a complete at $(date -Is) ===" + +# Phase 1b: Create task assignments from directory listing +echo "=== Phase 1b: creating task assignments ===" +mkdir -p {run_dir}/lsjson-parts +python3 -c " +import json, os, math +with open('{run_dir}/top-dirs.json') as f: + entries = json.load(f) +dirs = [e['Path'] for e in entries if e.get('IsDir')] +num_dirs = len(dirs) +num_tasks = min(max(num_dirs, 1), 4) +os.makedirs('{run_dir}/lsjson-parts', exist_ok=True) +# Round-robin assign directories to tasks +for proc_id in range(num_tasks): + assigned = [dirs[i] for i in range(proc_id, num_dirs, num_tasks)] + with open(f'{run_dir}/lsjson-parts/dirs-{{proc_id}}.txt', 'w') as fh: + for d in assigned: + fh.write(d + '\\n') +manifest_tasks = {{'num_tasks': num_tasks, 'num_dirs': num_dirs}} +with open('{run_dir}/lsjson-parts/manifest-tasks.json', 'w') as fh: + json.dump(manifest_tasks, fh) +print(f'Assigned {{num_dirs}} directories to {{num_tasks}} tasks') +" +NUM_TASKS=$(python3 -c "import json; print(json.load(open('{run_dir}/lsjson-parts/manifest-tasks.json'))['num_tasks'])") +echo "NUM_TASKS=$NUM_TASKS" +echo "=== Phase 1b complete at $(date -Is) ===" + +# Phase 1c: Write manifest worker script +echo "=== Phase 1c: writing manifest-worker.sh ===" +cat > {run_dir}/manifest-worker.sh << 'WORKER_EOF' +#!/usr/bin/env bash +set -euo pipefail +PROC_ID="${{SLURM_PROCID}}" +echo "Worker ${{PROC_ID}} started on $(hostname) at $(date -Is)" + +# Task 0 lists root-level files (non-recursive) +if [ "${{PROC_ID}}" -eq 0 ]; then + echo "Listing root-level files..." + rclone lsjson {shlex.quote(source)} \\ + --files-only --fast-list --max-backlog=1000000 \\ + --config {config.rclone.container_conf_path} \\ + > {run_dir}/lsjson-parts/lsjson-root-files.json || true + echo "Root-level files listing complete" +fi + +# All tasks: process assigned subdirectories +DIRS_FILE="{run_dir}/lsjson-parts/dirs-${{PROC_ID}}.txt" +if [ -f "${{DIRS_FILE}}" ]; then + IDX=0 + while IFS= read -r SUBDIR; do + [ -z "${{SUBDIR}}" ] && continue + echo "Listing subdir: ${{SUBDIR}} (idx=${{IDX}})" + rclone lsjson {shlex.quote(source)}/"${{SUBDIR}}" \\ + --recursive --files-only --fast-list --max-backlog=1000000 \\ + --config {config.rclone.container_conf_path} \\ + > {run_dir}/lsjson-parts/lsjson-${{PROC_ID}}-${{IDX}}.json + echo "${{SUBDIR}}" > {run_dir}/lsjson-parts/lsjson-${{PROC_ID}}-${{IDX}}.prefix + IDX=$((IDX + 1)) + done < "${{DIRS_FILE}}" +fi + +echo "Worker ${{PROC_ID}} finished at $(date -Is)" +WORKER_EOF +chmod +x {run_dir}/manifest-worker.sh +echo "=== Phase 1c complete at $(date -Is) ===" + +# Phase 1d: Run parallel manifest workers +echo "=== Phase 1d: running $NUM_TASKS parallel manifest workers ===" +update_progress "listing_source" "Running $NUM_TASKS parallel rclone lsjson workers" +srun -n $NUM_TASKS -c 2 \\ + --container-image {shlex.quote(config.rclone.image)} \\ + --container-mounts "{config.rclone.config_path}:{config.rclone.container_conf_path}:ro,{run_dir}:{run_dir}" \\ + --no-container-remap-root \\ + bash {run_dir}/manifest-worker.sh +echo "=== Phase 1d complete at $(date -Is) ===" + +# Phase 1e: Combine manifest parts +echo "=== Phase 1e: combining manifest parts ===" +update_progress "combining_manifest" "Combining parallel listing results into manifest" +uv run xfer manifest combine \\ --source {shlex.quote(source)} \\ --dest {shlex.quote(dest)} \\ - --out {run_dir}/manifest.jsonl \\ - --rclone-image {shlex.quote(config.rclone.image)} \\ - --rclone-config {shlex.quote(str(config.rclone.config_path))} + --parts-dir {run_dir}/lsjson-parts \\ + --out {run_dir}/manifest.jsonl echo "=== Manifest build complete at $(date -Is) ===" # Phase 2: Analyze manifest to determine optimal rclone flags echo "=== Analyzing file size distribution ===" +update_progress "analyzing" "Analyzing file size distribution" uv run xfer manifest analyze \\ --in {run_dir}/manifest.jsonl \\ --out {run_dir}/analysis.json @@ -262,6 +368,7 @@ def _write_prepare_script( echo "=== Analysis complete at $(date -Is) ===" # Phase 3: Shard manifest +update_progress "sharding" "Splitting manifest into {num_shards} shards" uv run xfer manifest shard \\ --in {run_dir}/manifest.jsonl \\ --outdir {run_dir}/shards \\ @@ -270,6 +377,7 @@ def _write_prepare_script( echo "=== Sharding complete at $(date -Is) ===" # Phase 4: Render Slurm scripts +update_progress "rendering" "Generating Slurm transfer scripts" uv run xfer slurm render \\ --run-dir {run_dir} \\ --num-shards {num_shards} \\ @@ -288,6 +396,7 @@ def _write_prepare_script( echo "=== Render complete at $(date -Is) ===" # Phase 5: Submit transfer array job +update_progress "submitting" "Submitting transfer array job" uv run xfer slurm submit --run-dir {run_dir} echo "=== Transfer job submitted at $(date -Is) ===" @@ -558,9 +667,30 @@ def get_transfer_progress(run_dir: Path) -> dict: except json.JSONDecodeError: pass + # Read progress.json for detailed phase info and timing + progress_file = run_dir / "progress.json" + if progress_file.exists(): + try: + progress_data = json.loads(progress_file.read_text()) + result["prepare_phase"] = progress_data.get("phase") + result["prepare_detail"] = progress_data.get("detail") + result["prepare_started_at"] = progress_data.get("started_at") + result["prepare_updated_at"] = progress_data.get("updated_at") + except json.JSONDecodeError: + pass + # Determine phase based on what files exist if not manifest_file.exists(): result["phase"] = "building_manifest" + # Include file count from partial manifest if available + partial = run_dir / "manifest.jsonl.progress" + if partial.exists(): + try: + pdata = json.loads(partial.read_text()) + result["files_listed"] = pdata.get("files_listed", 0) + result["bytes_listed"] = pdata.get("bytes_listed", 0) + except (json.JSONDecodeError, OSError): + pass return result if not shards_meta.exists(): diff --git a/tests/test_slackbot_dryrun.py b/tests/test_slackbot_dryrun.py index 3f8b160..baed4db 100644 --- a/tests/test_slackbot_dryrun.py +++ b/tests/test_slackbot_dryrun.py @@ -174,7 +174,9 @@ def test_prepare_script_generation(): assert "#SBATCH --comment=" in content assert "#SBATCH --chdir=" in content assert "#SBATCH --qos=data-transfer" in content - assert "xfer manifest build" in content + assert "xfer manifest combine" in content + assert "--mem=250G" in content + assert "manifest-worker.sh" in content assert "xfer manifest shard" in content assert "xfer slurm render" in content assert "xfer slurm submit" in content From 9813bd85bdcc12db0a7ce75ee7d3f397093f5fd8 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 7 Feb 2026 10:18:32 -0500 Subject: [PATCH 23/25] Fix duplicate bot responses, add S3 rclone flags, and increase transfer job resources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix triage model ID (claude-haiku-4-20250414 → claude-haiku-4-5-20251001) that was causing 404 errors and fail-open duplicate responses. Deduplicate @mention messages in the thread handler since handle_mention already processes them. Append S3-specific upload flags when destination is an S3 endpoint. Increase transfer job defaults to 4-day wall clock, 16 CPUs, and 100G memory. Co-Authored-By: Claude Opus 4.6 --- src/xfer/slackbot/app.py | 7 ++++++- src/xfer/slackbot/config.py | 8 ++++---- src/xfer/slackbot/slurm_tools.py | 21 +++++++++++++++++++++ 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/xfer/slackbot/app.py b/src/xfer/slackbot/app.py index d83841d..e4a98e9 100644 --- a/src/xfer/slackbot/app.py +++ b/src/xfer/slackbot/app.py @@ -197,7 +197,7 @@ def handle_mention(event: dict[str, Any], say: Any) -> None: ) @app.event("message") - def handle_message(event: dict[str, Any], say: Any) -> None: + def handle_message(event: dict[str, Any], say: Any, context: dict[str, Any]) -> None: """ Handle direct messages and thread replies. @@ -254,6 +254,11 @@ def handle_message(event: dict[str, Any], say: Any) -> None: if not is_allowed_channel(channel): return + # Skip @mentions — handle_mention already processes those + bot_user_id = context.get("bot_user_id", "") + if bot_user_id and f"<@{bot_user_id}>" in text: + return + # Check if we have history in this thread (meaning we've been mentioned) history = conversations.get(channel, thread_ts) diff --git a/src/xfer/slackbot/config.py b/src/xfer/slackbot/config.py index b43512c..0f2eec5 100644 --- a/src/xfer/slackbot/config.py +++ b/src/xfer/slackbot/config.py @@ -15,9 +15,9 @@ class SlurmDefaults: """Default Slurm job parameters.""" partition: str = "transfer" - time_limit: str = "24:00:00" - cpus_per_task: int = 4 - mem: str = "8G" + time_limit: str = "4-00:00:00" + cpus_per_task: int = 16 + mem: str = "100G" array_concurrency: int = 64 num_shards: int = 256 max_attempts: int = 5 @@ -60,7 +60,7 @@ class BotConfig: default_factory=lambda: os.environ.get("ANTHROPIC_API_KEY", "") ) claude_model: str = "claude-sonnet-4-20250514" - triage_model: str = "claude-haiku-4-20250414" + triage_model: str = "claude-haiku-4-5-20251001" # Paths runs_base_dir: Path = field(default_factory=lambda: Path.home() / "xfer-runs") diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index 60a4e08..33a39f2 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -195,6 +195,9 @@ def _write_prepare_script( # User flags to append after analysis-suggested flags user_rclone_flags = rclone_flags or "" + # Extract remote name from dest for S3 detection + dest_remote = dest.split(":")[0] + # Get xfer install directory for uv xfer_dir = config.xfer_install_dir or Path(__file__).parent.parent.parent.parent.resolve() @@ -363,6 +366,24 @@ def _write_prepare_script( else RCLONE_FLAGS="$SUGGESTED_FLAGS" fi + +# Append S3-specific flags when destination is an S3 endpoint +DEST_TYPE=$(python3 -c " +import configparser +remote = '{dest_remote}' +if remote == 's3': + print('s3') +else: + c = configparser.ConfigParser() + c.read('{config.rclone.config_path}') + print(c.get(remote, 'type', fallback='')) +") +if [ "$DEST_TYPE" = "s3" ]; then + S3_FLAGS="--s3-upload-concurrency 8 --buffer-size=32M --multi-thread-stream=8 --multi-thread-cutoff=64M --s3-upload-cutoff 0 --s3-chunk-size 32M" + RCLONE_FLAGS="$RCLONE_FLAGS $S3_FLAGS" + echo "S3 destination detected, appended: $S3_FLAGS" +fi + echo "Final rclone flags: $RCLONE_FLAGS" echo "=== Analysis complete at $(date -Is) ===" From d994cd3efd5f36229b7ab94ca2bcbf0cfc421c7c Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sat, 7 Feb 2026 10:33:18 -0500 Subject: [PATCH 24/25] Fix sbatch_extras literal \n not converted to newlines in rendered scripts The slurm render command received sbatch_extras with literal \n separators but never converted them to actual newlines. This put all #SBATCH directives on one line, causing Slurm to misparse the --comment value. Transfer array jobs ended up with wrong/missing comments, so the bot's thread ownership check failed with "does not belong to this thread". Co-Authored-By: Claude Opus 4.6 --- src/xfer/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xfer/cli.py b/src/xfer/cli.py index 361ff0c..8dbdbb3 100644 --- a/src/xfer/cli.py +++ b/src/xfer/cli.py @@ -885,7 +885,7 @@ def shell_words(s: str) -> str: else s ) - extras = sbatch_extras.rstrip() + extras = sbatch_extras.replace("\\n", "\n").rstrip() extras = extras if extras else "" sbatch_text = SBATCH_ARRAY_SH.format( From 4da504252dc8e699f84b9bb005d5a1269cdd0e48 Mon Sep 17 00:00:00 2001 From: Joe Schoonover Date: Sun, 8 Feb 2026 07:26:37 -0500 Subject: [PATCH 25/25] Add Slurm job state-based phase detection for transfer status Derive transfer phase from sacct job states instead of relying purely on file existence in the run directory. Correlates prepare and transfer jobs by name convention ({name}-prepare / {name}) and enriches with file-based shard progress when available. Handles edge cases like shards finishing before the Slurm job exits, and completed jobs with partial shard failures. Co-Authored-By: Claude Opus 4.6 --- src/xfer/slackbot/claude_agent.py | 28 +++--- src/xfer/slackbot/slurm_tools.py | 153 ++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+), 12 deletions(-) diff --git a/src/xfer/slackbot/claude_agent.py b/src/xfer/slackbot/claude_agent.py index 71b26c8..35042bb 100644 --- a/src/xfer/slackbot/claude_agent.py +++ b/src/xfer/slackbot/claude_agent.py @@ -23,6 +23,7 @@ get_source_stats, get_transfer_progress, get_transfer_progress_by_job, + get_transfer_status_by_thread, list_buckets, submit_transfer, ) @@ -84,9 +85,20 @@ "name": "check_status", "description": """Check the status of transfer jobs in this thread. Use this when the user asks about job status, progress, or wants to know if their transfer is complete. -This tool finds all jobs associated with the current Slack thread and returns their status. +This tool finds all jobs associated with the current Slack thread and returns their status. It correlates prepare and transfer jobs by name and derives a unified phase from Slurm job states, enriched with file-based shard progress when available. -When the phase is "building_manifest", the prepare job is listing files at the source. This can take up to several days for large datasets and is normal. The response may include files_listed and bytes_listed if the JSONL writing phase has started, or prepare_phase/prepare_detail for finer-grained progress. Only flag a concern if the job has been in this phase for more than 48 hours with no observable progress.""", +Phases (derived from Slurm job states): +- "pending" — prepare job is queued +- "preparing" — prepare job is running (manifest build, sharding, etc.) +- "prepare_failed" — prepare job FAILED/CANCELLED/TIMEOUT +- "prepare_complete" — prepare finished but no transfer job found (anomaly) +- "waiting_to_start" — prepare done, transfer job is queued +- "transferring" — transfer job is running +- "complete" — transfer finished successfully +- "complete_with_failures" — transfer job completed but some shards failed +- "failed" — transfer job FAILED/CANCELLED/TIMEOUT + +When the phase is "preparing", the prepare job may be listing files at the source. This can take up to several days for large datasets and is normal. The prepare job has a 4-day time limit. Only flag a concern if the prepare job has been running for more than 48 hours with no progress. The progress field may include files_listed, bytes_listed, or prepare_phase/prepare_detail for finer-grained tracking.""", "input_schema": { "type": "object", "properties": { @@ -446,16 +458,8 @@ def execute_tool( return json.dumps(job.to_dict()) return json.dumps({"error": f"Job {job_id} not found"}) else: - # Get all jobs for this thread with progress info - jobs = get_jobs_by_thread(channel_id, thread_ts) - results = [] - for job in jobs: - if job.work_dir: - progress = get_transfer_progress_by_job(job.job_id) - if progress: - results.append(progress) - continue - results.append(job.to_dict()) + # Get grouped status for all transfers in this thread + results = get_transfer_status_by_thread(channel_id, thread_ts) return json.dumps(results) elif tool_name == "list_backends": diff --git a/src/xfer/slackbot/slurm_tools.py b/src/xfer/slackbot/slurm_tools.py index 33a39f2..a2501ab 100644 --- a/src/xfer/slackbot/slurm_tools.py +++ b/src/xfer/slackbot/slurm_tools.py @@ -632,6 +632,159 @@ def get_jobs_by_thread(channel_id: str, thread_ts: str) -> list[JobInfo]: return [j for j in all_jobs if comment_pattern in (j.comment or "")] +# Slurm states that indicate a terminal failure +_FAILED_STATES = {"FAILED", "CANCELLED", "TIMEOUT", "NODE_FAIL", "OUT_OF_MEMORY"} + + +def _derive_phase( + prepare_job: Optional[JobInfo], + transfer_job: Optional[JobInfo], +) -> tuple[str, str]: + """Derive transfer phase from Slurm job states. + + Returns (phase, detail) tuple. + """ + if not prepare_job: + return ("unknown", "No prepare job found") + + prep_state = prepare_job.state + + if prep_state == "PENDING": + return ("pending", "Prepare job is queued") + if prep_state == "RUNNING": + return ("preparing", "Prepare job is running") + if prep_state in _FAILED_STATES: + return ("prepare_failed", f"Prepare job {prep_state}") + + # Prepare is COMPLETED + if prep_state == "COMPLETED": + if not transfer_job: + return ("prepare_complete", "Prepare finished but no transfer job found") + + xfer_state = transfer_job.state + + if xfer_state == "PENDING": + return ("waiting_to_start", "Transfer job is queued") + if xfer_state == "RUNNING": + return ("transferring", "Transfer job is running") + if xfer_state == "COMPLETED": + return ("complete", "Transfer job completed") + if xfer_state in _FAILED_STATES: + return ("failed", f"Transfer job {xfer_state}") + + return ("transferring", f"Transfer job state: {xfer_state}") + + return ("unknown", f"Prepare job state: {prep_state}") + + +def _group_jobs_by_transfer( + jobs: list[JobInfo], +) -> list[tuple[Optional[JobInfo], Optional[JobInfo], str]]: + """Group jobs into (prepare_job, transfer_job, base_name) tuples. + + Matches by naming convention: prepare jobs end with ``-prepare``, + transfer jobs use the base name directly. When multiple jobs share + the same base name, the most recently submitted one wins. + """ + # Bucket jobs by base name + prepare_by_base: dict[str, list[JobInfo]] = {} + transfer_by_base: dict[str, list[JobInfo]] = {} + + for job in jobs: + name = job.name or "" + if name.endswith("-prepare"): + base = name[: -len("-prepare")] + prepare_by_base.setdefault(base, []).append(job) + else: + base = name + transfer_by_base.setdefault(base, []).append(job) + + # Pick the most recent job per base name (highest submit_time) + def _newest(job_list: list[JobInfo]) -> JobInfo: + return max(job_list, key=lambda j: j.submit_time or "") + + all_bases = set(prepare_by_base) | set(transfer_by_base) + groups = [] + for base in sorted(all_bases): + prep = _newest(prepare_by_base[base]) if base in prepare_by_base else None + xfer = _newest(transfer_by_base[base]) if base in transfer_by_base else None + groups.append((prep, xfer, base)) + + return groups + + +def get_transfer_status_by_thread( + channel_id: str, thread_ts: str +) -> list[dict]: + """Get transfer status for all jobs in a Slack thread. + + Combines Slurm job state (via sacct) with file-based progress for a + unified view. Returns a list of dicts, one per transfer group. + """ + jobs = get_jobs_by_thread(channel_id, thread_ts) + if not jobs: + return [] + + groups = _group_jobs_by_transfer(jobs) + results = [] + + for prep_job, xfer_job, base_name in groups: + phase, detail = _derive_phase(prep_job, xfer_job) + + entry: dict = { + "base_name": base_name, + "phase": phase, + "detail": detail, + } + + # Attach job metadata + if prep_job: + entry["prepare_job"] = { + "job_id": prep_job.job_id, + "state": prep_job.state, + "submit_time": prep_job.submit_time, + } + if xfer_job: + entry["transfer_job"] = { + "job_id": xfer_job.job_id, + "state": xfer_job.state, + "submit_time": xfer_job.submit_time, + } + + # Enrich with file-based progress when a work_dir is available + work_dir = None + if prep_job and prep_job.work_dir: + work_dir = Path(prep_job.work_dir) + elif xfer_job and xfer_job.work_dir: + work_dir = Path(xfer_job.work_dir) + + if work_dir and work_dir.exists(): + file_progress = get_transfer_progress(work_dir) + entry["progress"] = file_progress + + file_phase = file_progress.get("phase", "unknown") + + # Let file-based signals refine the Slurm-derived phase + if phase == "transferring": + if file_phase == "complete": + # Shards finished before job exited + entry["phase"] = "complete" + elif ( + file_phase == "failed" + and file_progress.get("pending", 1) == 0 + and file_progress.get("in_progress", 1) == 0 + ): + entry["phase"] = "failed" + elif phase == "complete": + # Slurm says done — check for partial failures + if file_progress.get("failed", 0) > 0: + entry["phase"] = "complete_with_failures" + + results.append(entry) + + return results + + def get_job_status(job_id: str) -> Optional[JobInfo]: """Get detailed status for a specific job ID.""" cmd = ["sacct", "--json", "-j", job_id, "-X"]