diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4e9cd336..466ddb34 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,23 +1,24 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/python { - "name": "Python 3", + "name": "Data Formulator Dev", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile - "image": "mcr.microsoft.com/devcontainers/python:1-3.12-bullseye", + "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", // Features to add to the dev container. More info: https://containers.dev/features. - "features": { - "ghcr.io/devcontainers/features/node:1": { - "version": "18" - }, - "ghcr.io/devcontainers/features/azure-cli:1": {} - }, + "features": { + "ghcr.io/devcontainers/features/node:1": { + "version": "18" + }, + "ghcr.io/devcontainers/features/azure-cli:1": {}, + "ghcr.io/astral-sh/uv:1": {} + }, // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], + "forwardPorts": [5000, 5173], // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "cd /workspaces/data-formulator && npm install && npm run build && python3 -m venv /workspaces/data-formulator/venv && . /workspaces/data-formulator/venv/bin/activate && pip install -e /workspaces/data-formulator --verbose && data_formulator" + "postCreateCommand": "cd /workspaces/data-formulator && npm install && npm run build && uv sync && uv run data_formulator" // Configure tool-specific properties. // "customizations": {}, diff --git a/.env.template b/.env.template index 2405af77..0d9d9a46 100644 --- a/.env.template +++ b/.env.template @@ -3,6 +3,4 @@ # python -m data_formulator -p 5000 --exec-python-in-subprocess true --disable-display-keys true DISABLE_DISPLAY_KEYS=false # if true, the display keys will not be shown in the frontend -EXEC_PYTHON_IN_SUBPROCESS=false # if true, the python code will be executed in a subprocess to avoid crashing the main app, but it will increase the time of response - -LOCAL_DB_DIR= # the directory to store the local database, if not provided, the app will use the temp directory \ No newline at end of file +EXEC_PYTHON_IN_SUBPROCESS=false # if true, the python code will be executed in a subprocess to avoid crashing the main app, but it will increase the time of response \ No newline at end of file diff --git a/.gitignore b/.gitignore index f3420acd..6b8ca6c3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *env +.venv/ *api-keys.env **/*.ipynb_checkpoints/ .DS_Store diff --git a/.python-version b/.python-version new file mode 100644 index 00000000..2c073331 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 6467a874..3a483e25 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -2,16 +2,34 @@ How to set up your local machine. ## Prerequisites -* Python > 3.11 +* Python >= 3.11 * Node.js * Yarn +* [uv](https://docs.astral.sh/uv/) (recommended) or pip ## Backend (Python) +### Option 1: With uv (recommended) + +uv is faster and provides reproducible builds via lockfile. + +```bash +uv sync # Creates .venv and installs all dependencies +uv run data_formulator # Run app (opens browser automatically) +uv run data_formulator --dev # Run backend only (for frontend development) +``` + +**Which command to use:** +- **End users / testing the full app**: `uv run data_formulator` - starts server and opens browser to http://localhost:5000 +- **Frontend development**: `uv run data_formulator --dev` - starts backend server only, then run `yarn start` separately for the Vite dev server on http://localhost:5173 + +### Option 2: With pip (fallback) + - **Create a Virtual Environment** ```bash python -m venv venv - .\venv\Scripts\activate + source venv/bin/activate # Unix + # or .\venv\Scripts\activate # Windows ``` - **Install Dependencies** @@ -29,7 +47,6 @@ How to set up your local machine. - configure settings as needed: - DISABLE_DISPLAY_KEYS: if true, API keys will not be shown in the frontend - EXEC_PYTHON_IN_SUBPROCESS: if true, Python code runs in a subprocess (safer but slower), you may consider setting it true when you are hosting Data Formulator for others - - LOCAL_DB_DIR: directory to store the local database (uses temp directory if not set) - External database settings (when USE_EXTERNAL_DB=true): - DB_NAME: name to refer to this database connection - DB_TYPE: mysql or postgresql (currently only these two are supported) @@ -41,14 +58,16 @@ How to set up your local machine. - **Run the app** - - **Windows** - ```bash - .\local_server.bat - ``` - - - **Unix-based** ```bash + # Unix ./local_server.sh + + # Windows + .\local_server.bat + + # Or directly + data_formulator # Opens browser automatically + data_formulator --dev # Backend only (for frontend development) ``` ## Frontend (TypeScript) @@ -61,7 +80,12 @@ How to set up your local machine. - **Development mode** - Run the front-end in development mode using, allowing real-time edits and previews: + First, start the backend server (in a separate terminal): + ```bash + uv run data_formulator --dev # or ./local_server.sh + ``` + + Then, run the frontend in development mode with hot reloading: ```bash yarn start ``` @@ -81,6 +105,10 @@ How to set up your local machine. Then, build python package: ```bash + # With uv + uv build + + # Or with pip pip install build python -m build ``` @@ -112,23 +140,23 @@ How to set up your local machine. When deploying Data Formulator to production, please be aware of the following security considerations: -### Database Storage Security +### Database and Data Storage Security -1. **Local DuckDB Files**: When database functionality is enabled (default), Data Formulator stores DuckDB database files locally on the server. These files contain user data and are stored in the system's temporary directory or a configured `LOCAL_DB_DIR`. +1. **Workspace and table data**: Table data is stored in per-identity workspaces (e.g. parquet files). DuckDB is used only in-memory per request when needed (e.g. for SQL mode); no persistent DuckDB database files are created by the app. -2. **Session Management**: - - When database is **enabled**: Session IDs are stored in Flask sessions (cookies) and linked to local DuckDB files - - When database is **disabled**: No persistent storage is used, and no cookies are set. Session IDs are generated per request for API consistency +2. **Identity Management**: + - Each user's data is isolated by a namespaced identity key (e.g., `user:alice@example.com` or `browser:550e8400-...`) + - Anonymous users get a browser-based UUID stored in localStorage + - Authenticated users get their verified user ID from the auth provider -3. **Data Persistence**: User data processed through Data Formulator may be temporarily stored in these local DuckDB files, which could be a security risk in multi-tenant environments. +3. **Data persistence**: User data may be written to workspace storage (e.g. parquet) on the server. In multi-tenant deployments, ensure workspace directories are isolated and access-controlled. ### Recommended Security Measures For production deployment, consider: -1. **Use `--disable-database` flag** for stateless deployments where no data persistence is needed +1. **Use `--disable-database` flag** to disable table-connector routes when you do not need external or uploaded table support 2. **Implement proper authentication, authorization, and other security measures** as needed for your specific use case, for example: - - Store DuckDB file in a database - User authentication (OAuth, JWT tokens, etc.) - Role-based access control - API rate limiting @@ -142,5 +170,90 @@ For production deployment, consider: python -m data_formulator.app --disable-database ``` +## Authentication Architecture + +Data Formulator supports a **hybrid identity system** that supports both anonymous and authenticated users. + +### Identity Flow Overview + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ Frontend Request │ +├─────────────────────────────────────────────────────────────────────┤ +│ Headers: │ +│ X-Identity-Id: "browser:550e8400-..." (namespace sent by client) │ +│ Authorization: Bearer (if custom auth implemented) │ +│ (Azure also adds X-MS-CLIENT-PRINCIPAL-ID automatically) │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ Backend Identity Resolution │ +│ (auth.py: get_identity_id) │ +├─────────────────────────────────────────────────────────────────────┤ +│ Priority 1: Azure X-MS-CLIENT-PRINCIPAL-ID → "user:" │ +│ Priority 2: JWT Bearer token (if implemented) → "user:" │ +│ Priority 3: X-Identity-Id header → ALWAYS "browser:" │ +│ (client-provided namespace is IGNORED for security) │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ Storage Isolation │ +├─────────────────────────────────────────────────────────────────────┤ +│ "user:alice@example.com" → alice's DuckDB file (ONLY via auth) │ +│ "browser:550e8400-..." → anonymous user's DuckDB file │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### Security Model + +**Critical Security Rule:** The backend NEVER trusts the namespace prefix from the client-provided `X-Identity-Id` header. Even if a client sends `X-Identity-Id: "user:alice@..."`, the backend strips the prefix and forces `browser:alice@...`. Only verified authentication (Azure headers or JWT) can result in a `user:` prefixed identity. + +The key security principle is **namespaced isolation with forced prefixing**: + +| Scenario | X-Identity-Id Sent | Backend Resolution | Storage Key | +|----------|-------------------|-------------------|-------------| +| Anonymous user | `browser:550e8400-...` | Strips prefix, forces `browser:` | `browser:550e8400-...` | +| Azure logged-in user | `browser:550e8400-...` | Uses Azure header (priority 1) | `user:alice@...` | +| Attacker spoofing | `user:alice@...` (forged) | No valid auth, strips & forces `browser:` | `browser:alice@...` | + +**Why this is secure:** An attacker sending `X-Identity-Id: user:alice@...` gets `browser:alice@...` as their storage key, which is completely separate from the real `user:alice@...` that only authenticated Alice can access. + +### Implementing Custom Authentication + +To add JWT-based authentication: + +1. **Backend** (`tables_routes.py`): Uncomment and configure the JWT verification code in `get_identity_id()` +2. **Frontend** (`utils.tsx`): Implement `getAuthToken()` to retrieve the JWT from your auth context +3. **Add JWT secret** to Flask config: `current_app.config['JWT_SECRET']` + +### Azure App Service Authentication + +When deployed to Azure with EasyAuth enabled: +- Azure automatically adds `X-MS-CLIENT-PRINCIPAL-ID` header to authenticated requests +- The backend reads this header first (highest priority) +- No frontend changes needed - Azure handles the auth flow + +### Frontend Identity Management + +The frontend (`src/app/identity.ts`) manages identity as follows: + +```typescript +// Identity is always initialized with browser ID +identity: { type: 'browser', id: getBrowserId() } + +// If user logs in (e.g., via Azure), it's updated to: +identity: { type: 'user', id: userInfo.userId } + +// All API requests send namespaced identity: +// X-Identity-Id: "browser:550e8400-..." or "user:alice@..." +``` + +This ensures: +1. **Anonymous users**: Work immediately with localStorage-based browser ID +2. **Logged-in users**: Get their verified user ID from the auth provider +3. **Cross-tab consistency**: Browser ID is shared via localStorage across all tabs + ## Usage See the [Usage section on the README.md page](README.md#usage). diff --git a/README.md b/README.md index 5b4f9e84..db7ff79a 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@

Try Online Demo   - Install Locally + Install Locally

@@ -32,6 +32,9 @@ https://github.com/user-attachments/assets/8ca57b68-4d7a-42cb-bcce-43f8b1681ce2 ## News 🔥🔥🔥 +[01-31-2025] **uv support** — Faster installation with uv +- 🚀 **Install with uv**: Data Formulator now supports installation via [uv](https://docs.astral.sh/uv/), the ultra-fast Python package manager. Get started in seconds with `uvx data_formulator` or `uv pip install data_formulator`. + [01-25-2025] **Data Formulator 0.6** — Real-time insights from live data - ⚡ **Connect to live data**: Connect to URLs and databases with automatic refresh intervals. Visualizations update automatically as your data changes to provide you live insights. [Demo: track international space station position speed live](https://github.com/microsoft/data-formulator/releases/tag/0.6) - 🎨 **UI Updates**: Unified UI for data loading; direct drag-and-drop fields from the data table to update visualization designs. @@ -127,9 +130,30 @@ Data Formulator enables analysts to iteratively explore and visualize data. Star Play with Data Formulator with one of the following options: -- **Option 1: Install via Python PIP** +- **Option 1: Install via uv (recommended)** + + [uv](https://docs.astral.sh/uv/) is an extremely fast Python package manager. If you have uv installed, you can run Data Formulator directly without any setup: + + ```bash + # Run data formulator directly (no install needed) + uvx data_formulator + ``` + + Or install it in a project/virtual environment: + + ```bash + # Install data_formulator + uv pip install data_formulator + + # Run data formulator + python -m data_formulator + ``` + + Data Formulator will be automatically opened in the browser at [http://localhost:5000](http://localhost:5000). + +- **Option 2: Install via pip** - Use Python PIP for an easy setup experience, running locally (recommend: install it in a virtual environment). + Use pip for installation (recommend: install it in a virtual environment). ```bash # install data_formulator @@ -143,13 +167,13 @@ Play with Data Formulator with one of the following options: *you can specify the port number (e.g., 8080) by `python -m data_formulator --port 8080` if the default port is occupied.* -- **Option 2: Codespaces (5 minutes)** +- **Option 3: Codespaces (5 minutes)** You can also run Data Formulator in Codespaces; we have everything pre-configured. For more details, see [CODESPACES.md](CODESPACES.md). [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/microsoft/data-formulator?quickstart=1) -- **Option 3: Working in the developer mode** +- **Option 4: Working in the developer mode** You can build Data Formulator locally if you prefer full control over your development environment and develop your own version on top. For detailed instructions, refer to [DEVELOPMENT.md](DEVELOPMENT.md). diff --git a/local_server.bat b/local_server.bat index b585d712..36026cf9 100644 --- a/local_server.bat +++ b/local_server.bat @@ -7,4 +7,11 @@ :: set https_proxy=http://127.0.0.1:7890 set FLASK_RUN_PORT=5000 -python -m py-src.data_formulator.app --port %FLASK_RUN_PORT% --dev + +:: Use uv if available, otherwise fall back to python +where uv >nul 2>nul +if %ERRORLEVEL% EQU 0 ( + uv run data_formulator --port %FLASK_RUN_PORT% --dev +) else ( + python -m data_formulator.app --port %FLASK_RUN_PORT% --dev +) diff --git a/local_server.sh b/local_server.sh index 0df7db89..fbba1e3b 100644 --- a/local_server.sh +++ b/local_server.sh @@ -5,6 +5,11 @@ # export http_proxy=http://127.0.0.1:7890 # export https_proxy=http://127.0.0.1:7890 -#env FLASK_APP=py-src/data_formulator/app.py FLASK_RUN_PORT=5000 FLASK_RUN_HOST=0.0.0.0 flask run export FLASK_RUN_PORT=5000 -python -m py-src.data_formulator.app --port ${FLASK_RUN_PORT} --dev \ No newline at end of file + +# Use uv if available, otherwise fall back to python +if command -v uv &> /dev/null; then + uv run data_formulator --port ${FLASK_RUN_PORT} --dev +else + python -m data_formulator.app --port ${FLASK_RUN_PORT} --dev +fi \ No newline at end of file diff --git a/public/screenshot-stock-price-live.webp b/public/screenshot-stock-price-live.webp new file mode 100644 index 00000000..b0ebe71b Binary files /dev/null and b/public/screenshot-stock-price-live.webp differ diff --git a/py-src/data_formulator/__init__.py b/py-src/data_formulator/__init__.py index 2f2fd61f..ee0d133d 100644 --- a/py-src/data_formulator/__init__.py +++ b/py-src/data_formulator/__init__.py @@ -3,7 +3,7 @@ def run_app(): """Launch the Data Formulator Flask application.""" - # Import app only when actually running to avoid side effects + # Import app only when actually running to avoid heavy imports at package load from data_formulator.app import run_app as _run_app return _run_app() diff --git a/py-src/data_formulator/agent_routes.py b/py-src/data_formulator/agent_routes.py index 3de374bb..5cc65246 100644 --- a/py-src/data_formulator/agent_routes.py +++ b/py-src/data_formulator/agent_routes.py @@ -12,22 +12,28 @@ mimetypes.add_type('application/javascript', '.mjs') import flask -from flask import request, session, jsonify, Blueprint, current_app, Response, stream_with_context +from flask import request, jsonify, Blueprint, current_app, Response, stream_with_context import logging import json import html import pandas as pd +import duckdb from data_formulator.agents.agent_concept_derive import ConceptDeriveAgent from data_formulator.agents.agent_py_concept_derive import PyConceptDeriveAgent from data_formulator.agents.agent_py_data_transform import PythonDataTransformationAgent -from data_formulator.agents.agent_sql_data_transform import SQLDataTransformationAgent +from data_formulator.agents.agent_sql_data_transform import ( + SQLDataTransformationAgent, + create_duckdb_conn_with_parquet_views, +) from data_formulator.agents.agent_py_data_rec import PythonDataRecAgent from data_formulator.agents.agent_sql_data_rec import SQLDataRecAgent from data_formulator.agents.agent_sort_data import SortDataAgent +from data_formulator.auth import get_identity_id +from data_formulator.datalake.workspace import get_workspace, WorkspaceWithTempData from data_formulator.agents.agent_data_load import DataLoadAgent from data_formulator.agents.agent_data_clean import DataCleanAgent from data_formulator.agents.agent_data_clean_stream import DataCleanAgentStream @@ -36,12 +42,26 @@ from data_formulator.agents.agent_report_gen import ReportGenAgent from data_formulator.agents.client_utils import Client -from data_formulator.db_manager import db_manager from data_formulator.workflows.exploration_flow import run_exploration_flow_streaming # Get logger for this module (logging config done in app.py) logger = logging.getLogger(__name__) + +def get_temp_tables(workspace, input_tables: list[dict]) -> list[dict]: + """ + Determine which input tables are temp tables (not persisted in the workspace datalake). + + Args: + workspace: The user's workspace instance + input_tables: List of table dicts with 'name' and 'rows' keys + + Returns: + List of table dicts that don't exist in the workspace (temp tables) + """ + existing_tables = set(workspace.list_tables()) + return [table for table in input_tables if table.get('name') not in existing_tables] + agent_bp = Blueprint('agent', __name__, url_prefix='/api/agent') def get_client(model_config): @@ -175,23 +195,33 @@ def process_data_on_load_request(): logger.info("# process data query: ") content = request.get_json() token = content["token"] + input_data = content["input_data"] client = get_client(content['model']) logger.info(f" model: {content['model']}") + conn = None try: - conn = db_manager.get_connection(session['session_id']) - except Exception as e: - conn = None + if input_data.get("virtual"): + identity_id = get_identity_id() + workspace = get_workspace(identity_id) + input_tables = [{"name": input_data["name"]}] + conn = create_duckdb_conn_with_parquet_views(workspace, input_tables) + else: + conn = duckdb.connect(":memory:") - agent = DataLoadAgent(client=client, conn=conn) - - candidates = agent.run(content["input_data"]) - - candidates = [c['content'] for c in candidates if c['status'] == 'ok'] + agent = DataLoadAgent(client=client, conn=conn) + candidates = agent.run(content["input_data"]) + candidates = [c['content'] for c in candidates if c['status'] == 'ok'] - response = flask.jsonify({ "status": "ok", "token": token, "result": candidates }) + response = flask.jsonify({ "status": "ok", "token": token, "result": candidates }) + except Exception as e: + logger.exception(e) + response = flask.jsonify({ "token": token, "status": "error", "result": [] }) + finally: + if conn is not None: + conn.close() else: response = flask.jsonify({ "token": -1, "status": "error", "result": [] }) @@ -371,7 +401,6 @@ def derive_data(): chart_encodings = content.get("chart_encodings", {}) instruction = content["extra_prompt"] - language = content.get("language", "python") # whether to use sql or python, default to python max_repair_attempts = content["max_repair_attempts"] if "max_repair_attempts" in content else 1 agent_coding_rules = content.get("agent_coding_rules", "") @@ -395,33 +424,32 @@ def derive_data(): if chart_encodings == {}: mode = "recommendation" - conn = db_manager.get_connection(session['session_id']) if language == "sql" else None + identity_id = get_identity_id() + workspace = get_workspace(identity_id) + temp_data = get_temp_tables(workspace, input_tables) - if mode == "recommendation": - # now it's in recommendation mode - agent = SQLDataRecAgent(client=client, conn=conn, agent_coding_rules=agent_coding_rules) if language == "sql" else PythonDataRecAgent(client=client, exec_python_in_subprocess=current_app.config['CLI_ARGS']['exec_python_in_subprocess'], agent_coding_rules=agent_coding_rules) - results = agent.run(input_tables, instruction, n=1, prev_messages=prev_messages) - else: - agent = SQLDataTransformationAgent(client=client, conn=conn, agent_coding_rules=agent_coding_rules) if language == "sql" else PythonDataTransformationAgent(client=client, exec_python_in_subprocess=current_app.config['CLI_ARGS']['exec_python_in_subprocess'], agent_coding_rules=agent_coding_rules) - results = agent.run(input_tables, instruction, chart_type, chart_encodings, prev_messages) + with WorkspaceWithTempData(workspace, temp_data) as workspace: + if mode == "recommendation": + agent = SQLDataRecAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules) + results = agent.run(input_tables, instruction, n=1, prev_messages=prev_messages) + else: + agent = SQLDataTransformationAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules) + results = agent.run(input_tables, instruction, chart_type, chart_encodings, prev_messages) - repair_attempts = 0 - while results[0]['status'] == 'error' and repair_attempts < max_repair_attempts: # try up to n times - error_message = results[0]['content'] - new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur." + repair_attempts = 0 + while results[0]['status'] == 'error' and repair_attempts < max_repair_attempts: + error_message = results[0]['content'] + new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur." - prev_dialog = results[0]['dialog'] + prev_dialog = results[0]['dialog'] - if mode == "transform": - results = agent.followup(input_tables, prev_dialog, [], chart_type, chart_encodings, new_instruction, n=1) - if mode == "recommendation": - results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1) + if mode == "transform": + results = agent.followup(input_tables, prev_dialog, [], chart_type, chart_encodings, new_instruction, n=1) + if mode == "recommendation": + results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1) + + repair_attempts += 1 - repair_attempts += 1 - - if conn: - conn.close() - response = flask.jsonify({ "token": token, "status": "ok", "results": results }) else: response = flask.jsonify({ "token": "", "status": "error", "results": [] }) @@ -442,7 +470,6 @@ def generate(): # each table is a dict with {"name": xxx, "rows": [...]} input_tables = content["input_tables"] initial_plan = content["initial_plan"] # The exploration question - language = content.get("language", "python") # whether to use sql or python, default to python max_iterations = content.get("max_iterations", 3) # Number of exploration iterations max_repair_attempts = content.get("max_repair_attempts", 1) agent_exploration_rules = content.get("agent_exploration_rules", "") @@ -465,7 +492,8 @@ def generate(): "api_version": content['model'].get('api_version', '') } - session_id = session.get('session_id') if language == "sql" else None + # Get identity for workspace (used for both SQL and Python with WorkspaceWithTempData) + identity_id = get_identity_id() exec_python_in_subprocess = current_app.config['CLI_ARGS']['exec_python_in_subprocess'] try: @@ -473,8 +501,7 @@ def generate(): model_config=model_config, input_tables=input_tables, initial_plan=initial_plan, - language=language, - session_id=session_id, + session_id=identity_id, exec_python_in_subprocess=exec_python_in_subprocess, max_iterations=max_iterations, max_repair_attempts=max_repair_attempts, @@ -550,8 +577,6 @@ def refine_data(): latest_data_sample = content["latest_data_sample"] max_repair_attempts = content.get("max_repair_attempts", 1) agent_coding_rules = content.get("agent_coding_rules", "") - - language = content.get("language", "python") # whether to use sql or python, default to python logger.info("== input tables ===>") for table in input_tables: @@ -563,23 +588,22 @@ def refine_data(): logger.info(chart_encodings) logger.info(new_instruction) - conn = db_manager.get_connection(session['session_id']) if language == "sql" else None + identity_id = get_identity_id() + workspace = get_workspace(identity_id) + temp_data = get_temp_tables(workspace, input_tables) - # always resort to the data transform agent - agent = SQLDataTransformationAgent(client=client, conn=conn, agent_coding_rules=agent_coding_rules) if language == "sql" else PythonDataTransformationAgent(client=client, exec_python_in_subprocess=current_app.config['CLI_ARGS']['exec_python_in_subprocess'], agent_coding_rules=agent_coding_rules) - results = agent.followup(input_tables, dialog, latest_data_sample, chart_type, chart_encodings, new_instruction, n=1) + with WorkspaceWithTempData(workspace, temp_data) as workspace: + agent = SQLDataTransformationAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules) + results = agent.followup(input_tables, dialog, latest_data_sample, chart_type, chart_encodings, new_instruction, n=1) - repair_attempts = 0 - while results[0]['status'] == 'error' and repair_attempts < max_repair_attempts: # only try once - error_message = results[0]['content'] - new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur." - prev_dialog = results[0]['dialog'] + repair_attempts = 0 + while results[0]['status'] == 'error' and repair_attempts < max_repair_attempts: + error_message = results[0]['content'] + new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur." + prev_dialog = results[0]['dialog'] - results = agent.followup(input_tables, prev_dialog, [], chart_type, chart_encodings, new_instruction, n=1) - repair_attempts += 1 - - if conn: - conn.close() + results = agent.followup(input_tables, prev_dialog, [], chart_type, chart_encodings, new_instruction, n=1) + repair_attempts += 1 response = flask.jsonify({ "token": token, "status": "ok", "results": results}) else: @@ -624,34 +648,29 @@ def generate(): client = get_client(content['model']) - language = content.get("language", "python") - if language == "sql": - db_conn = db_manager.get_connection(session['session_id']) - else: - db_conn = None + input_tables = content.get("input_tables", []) + identity_id = get_identity_id() + workspace = get_workspace(identity_id) + temp_data = get_temp_tables(workspace, input_tables) if input_tables else None agent_exploration_rules = content.get("agent_exploration_rules", "") - agent = InteractiveExploreAgent(client=client, agent_exploration_rules=agent_exploration_rules, db_conn=db_conn) - - # Get input tables from the request - input_tables = content.get("input_tables", []) - - # Get exploration thread if provided (for context from previous explorations) mode = content.get("mode", "interactive") start_question = content.get("start_question", None) exploration_thread = content.get("exploration_thread", None) current_chart = content.get("current_chart", None) current_data_sample = content.get("current_data_sample", None) - try: - for chunk in agent.run(input_tables, start_question, exploration_thread, current_data_sample, current_chart, mode): - yield chunk - except Exception as e: - logger.error(e) - error_data = { - "content": "unable to process recommendation questions request" - } - yield 'error: ' + json.dumps(error_data) + '\n' + with WorkspaceWithTempData(workspace, temp_data) as workspace: + agent = InteractiveExploreAgent(client=client, workspace=workspace, agent_exploration_rules=agent_exploration_rules) + try: + for chunk in agent.run(input_tables, start_question, exploration_thread, current_data_sample, current_chart, mode): + yield chunk + except Exception as e: + logger.error(e) + error_data = { + "content": "unable to process recommendation questions request" + } + yield 'error: ' + json.dumps(error_data) + '\n' else: error_data = { "content": "Invalid request format" @@ -675,28 +694,24 @@ def generate(): client = get_client(content['model']) - language = content.get("language", "python") - if language == "sql": - db_conn = db_manager.get_connection(session['session_id']) - else: - db_conn = None - - agent = ReportGenAgent(client=client, conn=db_conn) - - # Get input tables and charts from the request input_tables = content.get("input_tables", []) charts = content.get("charts", []) style = content.get("style", "blog post") - - try: - for chunk in agent.stream(input_tables, charts, style): - yield chunk - except Exception as e: - logger.error(e) - error_data = { - "content": "unable to process report generation request" - } - yield 'error: ' + json.dumps(error_data) + '\n' + identity_id = get_identity_id() + workspace = get_workspace(identity_id) + temp_data = get_temp_tables(workspace, input_tables) if input_tables else None + + with WorkspaceWithTempData(workspace, temp_data) as workspace: + agent = ReportGenAgent(client=client, workspace=workspace) + try: + for chunk in agent.stream(input_tables, charts, style): + yield chunk + except Exception as e: + logger.error(e) + error_data = { + "content": "unable to process report generation request" + } + yield 'error: ' + json.dumps(error_data) + '\n' else: error_data = { "content": "Invalid request format" @@ -726,7 +741,7 @@ def refresh_derived_data(): - message: error message if failed """ try: - from data_formulator.py_sandbox import run_transform_in_sandbox2020 + from data_formulator.sandbox.py_sandbox import run_transform_in_sandbox2020 from flask import current_app data = request.get_json() diff --git a/py-src/data_formulator/agents/agent_interactive_explore.py b/py-src/data_formulator/agents/agent_interactive_explore.py index c9151db9..4e5e6761 100644 --- a/py-src/data_formulator/agents/agent_interactive_explore.py +++ b/py-src/data_formulator/agents/agent_interactive_explore.py @@ -6,7 +6,7 @@ import pandas as pd from data_formulator.agents.agent_utils import extract_json_objects, generate_data_summary -from data_formulator.agents.agent_sql_data_transform import generate_sql_data_summary +from data_formulator.agents.agent_sql_data_transform import generate_sql_data_summary, create_duckdb_conn_with_parquet_views logger = logging.getLogger(__name__) @@ -115,17 +115,18 @@ class InteractiveExploreAgent(object): - def __init__(self, client, agent_exploration_rules="", db_conn=None): + def __init__(self, client, workspace, agent_exploration_rules=""): self.client = client self.agent_exploration_rules = agent_exploration_rules - self.db_conn = db_conn + self.workspace = workspace # when set (SQL/datalake mode), use parquet tables for summary def get_data_summary(self, input_tables, table_name_prefix="Table"): - if self.db_conn: - data_summary = generate_sql_data_summary(self.db_conn, input_tables, table_name_prefix=table_name_prefix) - else: - data_summary = generate_data_summary(input_tables, include_data_samples=False, table_name_prefix=table_name_prefix) - return data_summary + + # Datalake mode: create temporary DuckDB conn with parquet views, then get summary + with create_duckdb_conn_with_parquet_views(self.workspace, input_tables) as conn: + data_summary = generate_sql_data_summary(conn, input_tables, table_name_prefix=table_name_prefix) + return data_summary + def run(self, input_tables, start_question=None, exploration_thread=None, current_data_sample=None, current_chart=None, mode='interactive'): diff --git a/py-src/data_formulator/agents/agent_py_concept_derive.py b/py-src/data_formulator/agents/agent_py_concept_derive.py index 9d2ba055..dc6a9887 100644 --- a/py-src/data_formulator/agents/agent_py_concept_derive.py +++ b/py-src/data_formulator/agents/agent_py_concept_derive.py @@ -5,7 +5,7 @@ import json from data_formulator.agents.agent_utils import generate_data_summary, extract_code_from_gpt_response -import data_formulator.py_sandbox as py_sandbox +import data_formulator.sandbox.py_sandbox as py_sandbox import traceback diff --git a/py-src/data_formulator/agents/agent_py_data_rec.py b/py-src/data_formulator/agents/agent_py_data_rec.py index c61cde63..d56a27a5 100644 --- a/py-src/data_formulator/agents/agent_py_data_rec.py +++ b/py-src/data_formulator/agents/agent_py_data_rec.py @@ -5,7 +5,7 @@ import pandas as pd from data_formulator.agents.agent_utils import extract_json_objects, generate_data_summary, extract_code_from_gpt_response -import data_formulator.py_sandbox as py_sandbox +import data_formulator.sandbox.py_sandbox as py_sandbox import traceback import logging diff --git a/py-src/data_formulator/agents/agent_py_data_transform.py b/py-src/data_formulator/agents/agent_py_data_transform.py index a227d106..58d1f94f 100644 --- a/py-src/data_formulator/agents/agent_py_data_transform.py +++ b/py-src/data_formulator/agents/agent_py_data_transform.py @@ -4,7 +4,7 @@ import json from data_formulator.agents.agent_utils import extract_json_objects, generate_data_summary, extract_code_from_gpt_response -import data_formulator.py_sandbox as py_sandbox +import data_formulator.sandbox.py_sandbox as py_sandbox import pandas as pd import logging diff --git a/py-src/data_formulator/agents/agent_report_gen.py b/py-src/data_formulator/agents/agent_report_gen.py index f1dac083..c8f8375f 100644 --- a/py-src/data_formulator/agents/agent_report_gen.py +++ b/py-src/data_formulator/agents/agent_report_gen.py @@ -4,7 +4,7 @@ import json from data_formulator.agents.agent_utils import extract_json_objects, generate_data_summary -from data_formulator.agents.agent_sql_data_transform import generate_sql_data_summary +from data_formulator.agents.agent_sql_data_transform import generate_sql_data_summary, create_duckdb_conn_with_parquet_views import logging @@ -53,16 +53,14 @@ class ReportGenAgent(object): - def __init__(self, client, conn): + def __init__(self, client, workspace): self.client = client - self.conn = conn + self.workspace = workspace def get_data_summary(self, input_tables): - if self.conn: - data_summary = generate_sql_data_summary(self.conn, input_tables) - else: - data_summary = generate_data_summary(input_tables) - return data_summary + with create_duckdb_conn_with_parquet_views(self.workspace, input_tables) as conn: + data_summary = generate_sql_data_summary(conn, input_tables) + return data_summary def stream(self, input_tables, charts=[], style="blog post"): """derive a new concept based on the raw input data diff --git a/py-src/data_formulator/agents/agent_sql_data_rec.py b/py-src/data_formulator/agents/agent_sql_data_rec.py index bc48c053..f62ffe86 100644 --- a/py-src/data_formulator/agents/agent_sql_data_rec.py +++ b/py-src/data_formulator/agents/agent_sql_data_rec.py @@ -4,7 +4,7 @@ import json from data_formulator.agents.agent_utils import extract_json_objects, extract_code_from_gpt_response -from data_formulator.agents.agent_sql_data_transform import generate_sql_data_summary +from data_formulator.agents.agent_sql_data_transform import generate_sql_data_summary, create_duckdb_conn_with_parquet_views import random import string @@ -13,6 +13,8 @@ import duckdb import pandas as pd +from data_formulator.datalake.parquet_manager import write_parquet, sanitize_table_name as parquet_sanitize_table_name + import logging logger = logging.getLogger(__name__) @@ -217,9 +219,10 @@ class SQLDataRecAgent(object): - def __init__(self, client, conn, system_prompt=None, agent_coding_rules=""): + def __init__(self, client, workspace, system_prompt=None, agent_coding_rules=""): self.client = client - self.conn = conn + self.workspace = workspace + self.conn = None # set per request, closed after use # Incorporate agent coding rules into system prompt if provided if system_prompt is not None: @@ -259,17 +262,26 @@ def process_gpt_response(self, input_tables, messages, response): try: random_suffix = ''.join(random.choices(string.ascii_lowercase, k=4)) - table_name = f"view_{random_suffix}" + view_name = f"view_{random_suffix}" - create_query = f"CREATE VIEW IF NOT EXISTS {table_name} AS {code_str}" + create_query = f"CREATE VIEW IF NOT EXISTS {view_name} AS {code_str}" self.conn.execute(create_query) self.conn.commit() - # Check how many rows are in the table - row_count = self.conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0] + # Check how many rows are in the result + row_count = self.conn.execute(f"SELECT COUNT(*) FROM {view_name}").fetchone()[0] + + # Fetch result: full for datalake write, limited for response payload + if row_count > 5000: + query_output = self.conn.execute(f"SELECT * FROM {view_name} LIMIT 5000").fetch_df() + full_df = self.conn.execute(f"SELECT * FROM {view_name}").fetch_df() + else: + full_df = self.conn.execute(f"SELECT * FROM {view_name}").fetch_df() + query_output = full_df - # Only limit to 5000 if there are more rows - query_output = self.conn.execute(f"SELECT * FROM {table_name} LIMIT 5000").fetch_df() + # Write full result to datalake as parquet (parquet-to-parquet) + output_table_name = parquet_sanitize_table_name(f"derived_{random_suffix}") + write_parquet(self.workspace, full_df, output_table_name) result = { "status": "ok", @@ -277,7 +289,7 @@ def process_gpt_response(self, input_tables, messages, response): "content": { 'rows': json.loads(query_output.to_json(orient='records')), 'virtual': { - 'table_name': table_name, + 'table_name': output_table_name, 'row_count': row_count } }, @@ -323,24 +335,30 @@ def process_gpt_response(self, input_tables, messages, response): def run(self, input_tables, description, n=1, prev_messages: list[dict] = []): - data_summary = generate_sql_data_summary(self.conn, input_tables) + self.conn = create_duckdb_conn_with_parquet_views(self.workspace, input_tables) + try: + data_summary = generate_sql_data_summary(self.conn, input_tables) - user_query = f"[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}" - if len(prev_messages) > 0: - user_query = f"The user wants a new recommendation based off the following updated context and goal:\n\n[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}" + user_query = f"[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}" + if len(prev_messages) > 0: + user_query = f"The user wants a new recommendation based off the following updated context and goal:\n\n[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}" - logger.info(user_query) + logger.info(user_query) - # Filter out system messages from prev_messages - filtered_prev_messages = [msg for msg in prev_messages if msg.get("role") != "system"] + # Filter out system messages from prev_messages + filtered_prev_messages = [msg for msg in prev_messages if msg.get("role") != "system"] - messages = [{"role":"system", "content": self.system_prompt}, - *filtered_prev_messages, - {"role":"user","content": user_query}] - - response = self.client.get_completion(messages = messages) - - return self.process_gpt_response(input_tables, messages, response) + messages = [{"role":"system", "content": self.system_prompt}, + *filtered_prev_messages, + {"role":"user","content": user_query}] + + response = self.client.get_completion(messages = messages) + + return self.process_gpt_response(input_tables, messages, response) + finally: + if self.conn: + self.conn.close() + self.conn = None def followup(self, input_tables, dialog, latest_data_sample, new_instruction: str, n=1): @@ -348,16 +366,21 @@ def followup(self, input_tables, dialog, latest_data_sample, new_instruction: st latest_data_sample: the latest data sample that the user is working on, it's a json object that contains the data sample of the current table new_instruction: the new instruction that the user wants to add to the latest data sample """ + self.conn = create_duckdb_conn_with_parquet_views(self.workspace, input_tables) + try: + logger.info(f"GOAL: \n\n{new_instruction}") - logger.info(f"GOAL: \n\n{new_instruction}") - - # get the current table name - sample_data_str = pd.DataFrame(latest_data_sample).head(10).to_string() + '\n......' + # get the current table name + sample_data_str = pd.DataFrame(latest_data_sample).head(10).to_string() + '\n......' - messages = [*dialog, - {"role":"user", - "content": f"This is the result from the latest sql query:\n\n{sample_data_str}\n\nUpdate the sql query above based on the following instruction:\n\n{new_instruction}"}] + messages = [*dialog, + {"role":"user", + "content": f"This is the result from the latest sql query:\n\n{sample_data_str}\n\nUpdate the sql query above based on the following instruction:\n\n{new_instruction}"}] - response = self.client.get_completion(messages = messages) + response = self.client.get_completion(messages = messages) - return self.process_gpt_response(input_tables, messages, response) \ No newline at end of file + return self.process_gpt_response(input_tables, messages, response) + finally: + if self.conn: + self.conn.close() + self.conn = None \ No newline at end of file diff --git a/py-src/data_formulator/agents/agent_sql_data_transform.py b/py-src/data_formulator/agents/agent_sql_data_transform.py index 89f158ee..f052f5b7 100644 --- a/py-src/data_formulator/agents/agent_sql_data_transform.py +++ b/py-src/data_formulator/agents/agent_sql_data_transform.py @@ -8,6 +8,8 @@ from data_formulator.agents.agent_utils import extract_json_objects, extract_code_from_gpt_response import pandas as pd +from data_formulator.datalake.parquet_manager import write_parquet, sanitize_table_name as parquet_sanitize_table_name + import logging import re # Replace/update the logger configuration @@ -169,11 +171,31 @@ def sanitize_table_name(table_name: str) -> str: sanitized_name = re.sub(r'[^a-zA-Z0-9_\.$]', '', sanitized_name) return sanitized_name + +def create_duckdb_conn_with_parquet_views(workspace, input_tables: list[dict]): + """ + Create an in-memory DuckDB connection with a view for each parquet table in the workspace. + Input tables are expected to be parquet-backed tables in the datalake (parquet-to-parquet). + """ + import duckdb + from data_formulator.datalake.parquet_manager import get_parquet_path + + conn = duckdb.connect(":memory:") + for table in input_tables: + name = table["name"] + view_name = sanitize_table_name(name) + path = get_parquet_path(workspace, name) + path_escaped = str(path).replace("\\", "\\\\").replace("'", "''") + conn.execute(f'CREATE VIEW "{view_name}" AS SELECT * FROM read_parquet(\'{path_escaped}\')') + return conn + + class SQLDataTransformationAgent(object): - def __init__(self, client, conn, system_prompt=None, agent_coding_rules=""): + def __init__(self, client, workspace, system_prompt=None, agent_coding_rules=""): self.client = client - self.conn = conn # duckdb connection + self.workspace = workspace + self.conn = None # set per request, closed after use # Incorporate agent coding rules into system prompt if provided if system_prompt is not None: @@ -214,22 +236,28 @@ def process_gpt_sql_response(self, response, messages): query_str = query_blocks[-1] try: - # Generate unique table name directly with timestamp and random suffix + # Generate unique view name for this execution, then write result to datalake as parquet random_suffix = ''.join(random.choices(string.ascii_lowercase, k=4)) - table_name = f"view_{random_suffix}" + view_name = f"view_{random_suffix}" - create_query = f"CREATE VIEW IF NOT EXISTS {table_name} AS {query_str}" + create_query = f"CREATE VIEW IF NOT EXISTS {view_name} AS {query_str}" self.conn.execute(create_query) self.conn.commit() - # Check how many rows are in the table - row_count = self.conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0] + # Check how many rows are in the result + row_count = self.conn.execute(f"SELECT COUNT(*) FROM {view_name}").fetchone()[0] - # Only limit to 5000 if there are more rows + # Fetch result: full for datalake write, limited for response payload if row_count > 5000: - query_output = self.conn.execute(f"SELECT * FROM {table_name} LIMIT 5000").fetch_df() + query_output = self.conn.execute(f"SELECT * FROM {view_name} LIMIT 5000").fetch_df() + full_df = self.conn.execute(f"SELECT * FROM {view_name}").fetch_df() else: - query_output = self.conn.execute(f"SELECT * FROM {table_name}").fetch_df() + full_df = self.conn.execute(f"SELECT * FROM {view_name}").fetch_df() + query_output = full_df + + # Write full result to datalake as parquet (parquet-to-parquet) + output_table_name = parquet_sanitize_table_name(f"derived_{random_suffix}") + write_parquet(self.workspace, full_df, output_table_name) result = { "status": "ok", @@ -237,7 +265,7 @@ def process_gpt_sql_response(self, response, messages): "content": { 'rows': json.loads(query_output.to_json(orient='records')), 'virtual': { - 'table_name': table_name, + 'table_name': output_table_name, 'row_count': row_count } }, @@ -270,61 +298,43 @@ def process_gpt_sql_response(self, response, messages): def run(self, input_tables, description, chart_type: str, chart_encodings: dict, prev_messages: list[dict] = [], n=1): """Args: - input_tables: list[dict], each dict contains 'name' and 'rows' + input_tables: list[dict], each dict contains 'name' (table name in datalake); tables are parquet. description: str, the description of the data transformation chart_type: str, the chart type for visualization chart_encodings: dict, the chart encodings mapping visualization channels to fields prev_messages: list[dict], the previous messages n: int, the number of candidates """ + self.conn = create_duckdb_conn_with_parquet_views(self.workspace, input_tables) + try: + data_summary = generate_sql_data_summary(self.conn, input_tables) - for table in input_tables: - table_name = sanitize_table_name(table['name']) - - # Check if table exists in the connection - try: - self.conn.execute(f"DESCRIBE {table_name}") - except Exception: - # Table doesn't exist, create it from the dataframe - df = pd.DataFrame(table['rows']) - - # Register the dataframe as a temporary view - self.conn.register(f'df_temp', df) - # Create a permanent table from the temporary view - self.conn.execute(f"CREATE TABLE {table_name} AS SELECT * FROM df_temp") - # Drop the temporary view - self.conn.execute(f"DROP VIEW df_temp") - - r = self.conn.execute(f"SELECT * FROM {table_name} LIMIT 10").fetch_df() - print(r) - # Log the creation of the table - logger.info(f"Created table {table_name} from dataframe") - - - data_summary = generate_sql_data_summary(self.conn, input_tables) + goal = { + "instruction": description, + "chart_type": chart_type, + "chart_encodings": chart_encodings, + } - goal = { - "instruction": description, - "chart_type": chart_type, - "chart_encodings": chart_encodings, - } + user_query = f"[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{json.dumps(goal, indent=4)}" + if len(prev_messages) > 0: + user_query = f"The user wants a new transformation based off the following updated context and goal:\n\n[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}" - user_query = f"[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{json.dumps(goal, indent=4)}" - if len(prev_messages) > 0: - user_query = f"The user wants a new transformation based off the following updated context and goal:\n\n[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}" + logger.info(user_query) - logger.info(user_query) + # Filter out system messages from prev_messages + filtered_prev_messages = [msg for msg in prev_messages if msg.get("role") != "system"] - # Filter out system messages from prev_messages - filtered_prev_messages = [msg for msg in prev_messages if msg.get("role") != "system"] - - messages = [{"role":"system", "content": self.system_prompt}, - *filtered_prev_messages, - {"role":"user","content": user_query}] - - response = self.client.get_completion(messages = messages) + messages = [{"role":"system", "content": self.system_prompt}, + *filtered_prev_messages, + {"role":"user","content": user_query}] + + response = self.client.get_completion(messages = messages) - return self.process_gpt_sql_response(response, messages) + return self.process_gpt_sql_response(response, messages) + finally: + if self.conn: + self.conn.close() + self.conn = None def followup(self, input_tables, dialog, latest_data_sample, chart_type: str, chart_encodings: dict, new_instruction: str, n=1): @@ -335,28 +345,31 @@ def followup(self, input_tables, dialog, latest_data_sample, chart_type: str, ch chart_encodings: the chart encodings that the user wants to use new_instruction: the new instruction that the user wants to add to the latest data sample """ + self.conn = create_duckdb_conn_with_parquet_views(self.workspace, input_tables) + try: + goal = { + "followup_instruction": new_instruction, + "chart_type": chart_type, + "chart_encodings": chart_encodings + } - goal = { - "followup_instruction": new_instruction, - "chart_type": chart_type, - "chart_encodings": chart_encodings - } - - logger.info(f"GOAL: \n\n{goal}") - - #logger.info(dialog) + logger.info(f"GOAL: \n\n{goal}") - updated_dialog = [{"role":"system", "content": self.system_prompt}, *dialog[1:]] + updated_dialog = [{"role":"system", "content": self.system_prompt}, *dialog[1:]] - # get the current table name - sample_data_str = pd.DataFrame(latest_data_sample).head(10).to_string() + '\n......' + # get the current table name + sample_data_str = pd.DataFrame(latest_data_sample).head(10).to_string() + '\n......' - messages = [*updated_dialog, {"role":"user", - "content": f"This is the result from the latest sql query:\n\n{sample_data_str}\n\nUpdate the sql query above based on the following instruction:\n\n{json.dumps(goal, indent=4)}"}] + messages = [*updated_dialog, {"role":"user", + "content": f"This is the result from the latest sql query:\n\n{sample_data_str}\n\nUpdate the sql query above based on the following instruction:\n\n{json.dumps(goal, indent=4)}"}] - response = self.client.get_completion(messages = messages) + response = self.client.get_completion(messages = messages) - return self.process_gpt_sql_response(response, messages) + return self.process_gpt_sql_response(response, messages) + finally: + if self.conn: + self.conn.close() + self.conn = None def generate_sql_data_summary(conn, input_tables: list[dict], diff --git a/py-src/data_formulator/agents/client_utils.py b/py-src/data_formulator/agents/client_utils.py index 4ccce9d0..43cf0ee3 100644 --- a/py-src/data_formulator/agents/client_utils.py +++ b/py-src/data_formulator/agents/client_utils.py @@ -1,27 +1,7 @@ import litellm import openai from azure.identity import DefaultAzureCredential, get_bearer_token_provider -from typing import Dict, Optional, Union -class OpenAIClientAdapter(object): - """ - Wrapper around OpenAI or AzureOpenAI client that provides the same interface as Client. - """ - def __init__(self, openai_client: Union[openai.OpenAI, openai.AzureOpenAI], model: str): - self._openai_client = openai_client - self.model = model - self.params = {} - - def get_completion(self, messages): - """ - Returns a completion using the wrapped OpenAI client. - """ - completion_params = { - "model": self.model, - "messages": messages, - } - - return self._openai_client.chat.completions.create(**completion_params) class Client(object): """ @@ -69,7 +49,7 @@ def __init__(self, endpoint, model, api_key=None, api_base=None, api_version=No self.model = f"ollama/{model}" @classmethod - def from_config(cls, model_config: Dict[str, str]): + def from_config(cls, model_config: dict[str, str]): """ Create a client instance from model configuration. @@ -132,7 +112,7 @@ def get_completion(self, messages, stream=False): ) - def get_response(self, messages: list[dict], tools: Optional[list] = None): + def get_response(self, messages: list[dict], tools: list | None = None): """ Returns a response using OpenAI's Response API approach. """ diff --git a/py-src/data_formulator/agents/web_utils.py b/py-src/data_formulator/agents/web_utils.py index 1fd3aaea..a04f6f48 100644 --- a/py-src/data_formulator/agents/web_utils.py +++ b/py-src/data_formulator/agents/web_utils.py @@ -3,7 +3,6 @@ import requests from bs4 import BeautifulSoup -from typing import Optional, Union import logging from urllib.parse import urlparse import tempfile @@ -111,7 +110,7 @@ def _validate_url_for_ssrf(url: str) -> str: return url -def download_html_content(url: str, timeout: int = 30, headers: Optional[dict] = None) -> str: +def download_html_content(url: str, timeout: int = 30, headers: dict | None = None) -> str: """ Download HTML content from a given URL with SSRF protection. @@ -254,7 +253,7 @@ def html_to_text(html_content: str, remove_scripts: bool = True, remove_styles: # Fallback: return the raw content if parsing fails return html_content -def get_html_title(html_content: str) -> Optional[str]: +def get_html_title(html_content: str) -> str | None: """ Extract the title from HTML content. @@ -276,7 +275,7 @@ def get_html_title(html_content: str) -> Optional[str]: return None -def get_html_meta_description(html_content: str) -> Optional[str]: +def get_html_meta_description(html_content: str) -> str | None: """ Extract the meta description from HTML content. diff --git a/py-src/data_formulator/app.py b/py-src/data_formulator/app.py index a767d277..eb1d45df 100644 --- a/py-src/data_formulator/app.py +++ b/py-src/data_formulator/app.py @@ -2,22 +2,19 @@ # Licensed under the MIT License. import argparse -import random import sys import os import mimetypes -from functools import lru_cache mimetypes.add_type('application/javascript', '.js') mimetypes.add_type('application/javascript', '.mjs') import flask -from flask import Flask, request, send_from_directory, session +from flask import Flask, request, send_from_directory from flask import stream_with_context, Response import webbrowser import threading import numpy as np -import datetime import time import logging @@ -28,28 +25,14 @@ from dotenv import load_dotenv import secrets import base64 -APP_ROOT = Path(Path(__file__).parent).absolute() - -import os - -# blueprints -from data_formulator.tables_routes import tables_bp -from data_formulator.agent_routes import agent_bp -from data_formulator.demo_stream_routes import demo_stream_bp, limiter as demo_stream_limiter -from data_formulator.db_manager import db_manager -from data_formulator.example_datasets_config import EXAMPLE_DATASETS -import queue -from typing import Dict, Any +APP_ROOT = Path(Path(__file__).parent).absolute() +# Create Flask app (lightweight, no heavy imports yet) app = Flask(__name__, static_url_path='', static_folder=os.path.join(APP_ROOT, "dist")) -app.secret_key = secrets.token_hex(16) # Generate a random secret key for sessions +app.secret_key = secrets.token_hex(16) app.json.sort_keys = False -# Initialize rate limiter for demo stream routes that call external APIs -# The limiter is defined in demo_stream_routes.py to avoid circular imports -demo_stream_limiter.init_app(app) - class CustomJSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.int64): @@ -65,7 +48,7 @@ def default(self, obj): load_dotenv(os.path.join(APP_ROOT, 'api-keys.env')) load_dotenv(os.path.join(APP_ROOT, '.env')) -# Add this line to store args at app level +# Default config from env (can be overridden by CLI args) app.config['CLI_ARGS'] = { 'exec_python_in_subprocess': os.environ.get('EXEC_PYTHON_IN_SUBPROCESS', 'false').lower() == 'true', 'disable_display_keys': os.environ.get('DISABLE_DISPLAY_KEYS', 'false').lower() == 'true', @@ -74,19 +57,11 @@ def default(self, obj): 'project_front_page': os.environ.get('PROJECT_FRONT_PAGE', 'false').lower() == 'true' } -# register blueprints -# Only register tables blueprint if database is not disabled -if not app.config['CLI_ARGS']['disable_database']: - app.register_blueprint(tables_bp) -app.register_blueprint(agent_bp) -app.register_blueprint(demo_stream_bp) - # Get logger for this module (logging config moved to run_app function) logger = logging.getLogger(__name__) def configure_logging(): """Configure logging for the Flask application.""" - # Configure root logger for general application logging logging.basicConfig( level=logging.ERROR, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', @@ -98,14 +73,38 @@ def configure_logging(): logging.getLogger('litellm').setLevel(logging.WARNING) logging.getLogger('openai').setLevel(logging.WARNING) - # Configure Flask app logger to use the same settings app.logger.handlers = [] for handler in logging.getLogger().handlers: app.logger.addHandler(handler) +def _register_blueprints(disable_database: bool): + """ + Import and register blueprints. This is where heavy imports happen. + Called from run_app() with progress feedback. + """ + # Import tables routes (imports database connectors) + print(" Loading data connectors...", flush=True) + from data_formulator.tables_routes import tables_bp + + # Import agent routes (imports AI/ML libraries: litellm, sklearn, etc.) + print(" Loading AI agents...", flush=True) + from data_formulator.agent_routes import agent_bp + + # Import demo stream routes + from data_formulator.demo_stream_routes import demo_stream_bp, limiter as demo_stream_limiter + demo_stream_limiter.init_app(app) + + # Register blueprints + if not disable_database: + app.register_blueprint(tables_bp) + app.register_blueprint(agent_bp) + app.register_blueprint(demo_stream_bp) + + @app.route('/api/example-datasets') def get_sample_datasets(): + from data_formulator.example_datasets_config import EXAMPLE_DATASETS return flask.jsonify(EXAMPLE_DATASETS) @@ -116,116 +115,21 @@ def index_alt(path): @app.errorhandler(404) def page_not_found(e): - # your processing here logger.info(app.static_folder) - return send_from_directory(app.static_folder, "index.html") #'Hello 404!' #send_from_directory(app.static_folder, "index.html") - -###### test functions ###### - -@app.route('/api/hello') -def hello(): - values = [ - {"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43}, - {"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53}, - {"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52} - ] - spec = { - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "description": "A simple bar chart with embedded data.", - "data": { "values": values }, - "mark": "bar", - "encoding": { - "x": {"field": "a", "type": "nominal", "axis": {"labelAngle": 0}}, - "y": {"field": "b", "type": "quantitative"} - } - } - return json.dumps(spec) - -@app.route('/api/hello-stream') -def streamed_response(): - def generate(): - values = [ - {"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43}, - {"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53}, - {"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52} - ] - spec = { - "$schema": "https://vega.github.io/schema/vega-lite/v5.json", - "description": "A simple bar chart with embedded data.", - "data": { "values": [] }, - "mark": "bar", - "encoding": { - "x": {"field": "a", "type": "nominal", "axis": {"labelAngle": 0}}, - "y": {"field": "b", "type": "quantitative"} - } - } - for i in range(3): - time.sleep(3) - spec["data"]["values"] = values[i:] - yield json.dumps(spec) - return Response(stream_with_context(generate())) + return send_from_directory(app.static_folder, "index.html") -@app.route('/api/get-session-id', methods=['GET', 'POST']) -def get_session_id(): - """Endpoint to get or confirm a session ID from the client""" - # if it is a POST request, we expect a session_id in the body - # if it is a GET request, we do not expect a session_id in the query params - - current_session_id = None - if request.is_json: - content = request.get_json() - current_session_id = content.get("session_id", None) - - # Check if database is disabled - database_disabled = app.config['CLI_ARGS']['disable_database'] - - if database_disabled: - # When database is disabled, don't use Flask sessions (cookies) - # Just return the provided session_id or generate a new one - if current_session_id is None: - current_session_id = secrets.token_hex(16) - logger.info(f"Generated session ID for disabled database: {current_session_id}") - else: - logger.info(f"Using provided session ID for disabled database: {current_session_id}") - - return flask.jsonify({ - "status": "ok", - "session_id": current_session_id - }) - else: - # When database is enabled, use Flask sessions (cookies) as before - if current_session_id is None: - if 'session_id' not in session: - session['session_id'] = secrets.token_hex(16) - session.permanent = True - logger.info(f"Created new session: {session['session_id']}") - else: - # override the session_id - session['session_id'] = current_session_id - session.permanent = True - - return flask.jsonify({ - "status": "ok", - "session_id": session['session_id'] - }) @app.route('/api/app-config', methods=['GET']) def get_app_config(): """Provide frontend configuration settings from CLI arguments""" args = app.config['CLI_ARGS'] - # When database is disabled, don't try to access session - session_id = None - if not args['disable_database']: - session_id = session.get('session_id', None) - config = { "EXEC_PYTHON_IN_SUBPROCESS": args['exec_python_in_subprocess'], "DISABLE_DISPLAY_KEYS": args['disable_display_keys'], "DISABLE_DATABASE": args['disable_database'], "DISABLE_FILE_UPLOAD": args['disable_file_upload'], "PROJECT_FRONT_PAGE": args['project_front_page'], - "SESSION_ID": session_id } return flask.jsonify(config) @@ -238,7 +142,6 @@ def database_disabled_fallback(path): "message": "Database functionality is disabled. Use --disable-database=false to enable table operations." }), 503 else: - # If database is not disabled but we're hitting this route, it means the tables blueprint wasn't registered return flask.jsonify({ "status": "error", "message": "Table routes are not available" @@ -264,12 +167,12 @@ def parse_args() -> argparse.Namespace: def run_app(): - # Configure logging only when actually running the app - configure_logging() + print("Starting Data Formulator...", flush=True) + configure_logging() args = parse_args() - # Add this line to make args available to routes - # override the args from the env file + + # Override config from CLI args app.config['CLI_ARGS'] = { 'exec_python_in_subprocess': args.exec_python_in_subprocess, 'disable_display_keys': args.disable_display_keys, @@ -278,18 +181,17 @@ def run_app(): 'project_front_page': args.project_front_page } - # Update database manager state - db_manager._disabled = args.disable_database + # Register blueprints (this is where heavy imports happen) + _register_blueprints(args.disable_database) + url = "http://localhost:{0}".format(args.port) + print(f"Ready! Open {url} in your browser.", flush=True) + if not args.dev: - url = "http://localhost:{0}".format(args.port) - threading.Timer(2, lambda: webbrowser.open(url, new=2)).start() + threading.Timer(1.5, lambda: webbrowser.open(url, new=2)).start() - # Enable debug mode and auto-reload in development mode debug_mode = args.dev app.run(host='0.0.0.0', port=args.port, debug=debug_mode, use_reloader=debug_mode) if __name__ == '__main__': - #app.run(debug=True, host='127.0.0.1', port=5000) - #use 0.0.0.0 for public run_app() diff --git a/py-src/data_formulator/auth.py b/py-src/data_formulator/auth.py new file mode 100644 index 00000000..8b28ea56 --- /dev/null +++ b/py-src/data_formulator/auth.py @@ -0,0 +1,88 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Authentication and identity management for Data Formulator. + +This module provides a hybrid identity system that supports both anonymous +browser-based users and authenticated users (via Azure App Service or JWT). + +Security Model: +- Anonymous users: Browser UUID from X-Identity-Id header (prefixed with "browser:") +- Authenticated users: Verified identity from Azure headers or JWT (prefixed with "user:") +- Namespacing ensures authenticated user data cannot be accessed by spoofing headers +""" + +import logging +from flask import request, current_app + +logger = logging.getLogger(__name__) + + +def get_identity_id() -> str: + """ + Get identity ID with proper security priority: + + 1. Verified user from Azure App Service auth headers (trusted, set by Azure) + 2. Verified user from JWT bearer token (trusted, cryptographically verified) + 3. Browser ID from X-Identity-Id header (untrusted, for anonymous users only) + + The key insight: for anonymous users, we trust X-Identity-Id because there's + no security risk (who cares if someone "steals" a random UUID?). For authenticated + users, we MUST extract identity from verified sources, not client-provided headers. + + Identity is namespaced as "user:" or "browser:" to ensure authenticated + user data is never accessible via anonymous browser identity spoofing. + + Returns: + str: The namespaced identity ID string (e.g., "user:alice@..." or "browser:550e8400-...") + + Raises: + ValueError: If no identity could be determined + """ + + # Priority 1: Azure App Service Authentication (EasyAuth) + # When deployed to Azure with authentication enabled, Azure injects these headers. + # These are SET BY AZURE (not the client) after verifying the user's identity. + azure_principal_id = request.headers.get('X-MS-CLIENT-PRINCIPAL-ID') + if azure_principal_id: + logger.debug(f"Using Azure principal ID: {azure_principal_id[:8]}...") + return f"user:{azure_principal_id}" + + # Priority 2: JWT Bearer Token (for custom auth implementations) + # If you implement your own auth, verify the JWT here and extract user ID. + # Example (uncomment and configure when implementing JWT auth): + # + # auth_header = request.headers.get('Authorization', '') + # if auth_header.startswith('Bearer '): + # token = auth_header[7:] + # try: + # import jwt + # payload = jwt.decode(token, current_app.config['JWT_SECRET'], algorithms=['HS256']) + # user_id = payload.get('sub') or payload.get('user_id') + # if user_id: + # logger.debug(f"Using JWT user ID: {user_id[:8]}...") + # return f"user:{user_id}" + # except Exception as e: + # logger.warning(f"Invalid JWT token: {e}") + # # Fall through to browser identity + + # Priority 3: Anonymous browser identity (UNTRUSTED - from client header) + # SECURITY: We NEVER trust the namespace prefix from X-Identity-Id header. + # Even if client sends "user:alice@...", we force "browser:" prefix. + # Only verified auth (Azure headers, JWT) can result in "user:" prefix. + client_identity = request.headers.get('X-Identity-Id') + if client_identity: + # Extract the ID part, ignoring any client-provided prefix + # e.g., "browser:550e8400-..." → "550e8400-..." + # e.g., "user:alice@..." → "alice@..." (but forced to browser: namespace) + if ':' in client_identity: + # Strip the prefix - we don't trust client-provided namespaces + identity_value = client_identity.split(':', 1)[1] + else: + identity_value = client_identity + + # Always use browser: prefix for client-provided identities + return f"browser:{identity_value}" + + raise ValueError("X-Identity-Id header is required. Please refresh the page.") \ No newline at end of file diff --git a/py-src/data_formulator/data_loader/README.md b/py-src/data_formulator/data_loader/README.md index 660e59b3..70079ec9 100644 --- a/py-src/data_formulator/data_loader/README.md +++ b/py-src/data_formulator/data_loader/README.md @@ -1,43 +1,51 @@ ## Data Loader Module -This module provides a framework for loading data from various external sources into DuckDB. It follows an abstract base class pattern to ensure consistent implementation across different data sources. +This module provides a framework for loading data from various external sources into the **workspace** (parquet files). It follows an abstract base class pattern so all loaders behave consistently. + +### Design + +- **Storage**: Ingested data is written as **parquet** in the workspace. DuckDB is **not** used for storage; it is only the computation engine elsewhere in the application. +- **Data flow**: **External source → PyArrow Table → Parquet (workspace)**. +- **Format**: Loaders use **PyArrow** as the standard in-memory format for speed and interoperability. Database loaders (PostgreSQL, MySQL, MSSQL) use **connectorx** for Arrow-native reads where applicable. ### Building a New Data Loader -The abstract class `ExternalDataLoader` defines the data loader interface. Each concrete implementation (e.g., `KustoDataLoader`, `MySQLDataLoader`) handles specific data source connections and data ingestion. +The abstract class `ExternalDataLoader` defines the interface. Each concrete implementation (e.g., `MySQLDataLoader`, `S3DataLoader`) handles one data source. -To create a new data loader: +To add a new data loader: -1. Create a new class that inherits from `ExternalDataLoader` -2. Implement the required abstract methods: - - `list_params()`: Define required connection parameters - - `__init__()`: Initialize connection to data source - - `list_tables()`: List available tables/views - - `ingest_data()`: Load data from source - - `view_query_sample()`: Preview query results - - `ingest_data_from_query()`: Load data from custom query -3. Register the new class into `__init__.py` so that the front-end can automatically discover the new data loader. +1. Create a class that inherits from `ExternalDataLoader`. +2. Implement the required pieces: + - **`list_params()`** (static): Connection parameters (names, types, defaults, descriptions). + - **`auth_instructions()`** (static): Short instructions for obtaining credentials/setup. + - **`__init__(self, params)`**: Validate params and establish or verify connection to the source. No `duck_db_conn`; storage is workspace-only. + - **`fetch_data_as_arrow(source_table, size=..., sort_columns=..., sort_order=...)`**: Fetch data from the source and return a `pyarrow.Table`. Only `source_table` (table/collection/file identifier) is supported; raw query strings are not accepted for security and dialect consistency. + - **`list_tables(table_filter=None)`**: Return a list of `{"name": ..., "metadata": {...}}` for tables/files the user can select. Metadata typically includes `row_count`, `columns`, and `sample_rows`. +3. Register the new class in the package `__init__.py` so the front-end can discover it. -The UI automatically provide the query completion option to help user generate queries for the given data loader (from NL or partial queries). +The base class provides **`ingest_to_workspace(workspace, ...)`**, which calls `fetch_data_as_arrow()` and writes the result to the workspace as parquet. You do not implement ingest logic in the loader. + +The UI uses the same loaders for connection setup, table listing, and ingestion into the workspace. ### Example Implementations -- `AthenaDataLoader`: AWS Athena integration (SQL queries on S3 data lakes) -- `BigQueryDataLoader`: Google BigQuery integration -- `KustoDataLoader`: Azure Data Explorer (Kusto) integration -- `MySQLDataLoader`: MySQL database integration -- `PostgreSQLDataLoader`: PostgreSQL database integration -- `MSSQLDataLoader`: Microsoft SQL Server integration -- `S3DataLoader`: Amazon S3 file integration (CSV, Parquet, JSON) -- `AzureBlobDataLoader`: Azure Blob Storage integration -- `MongoDBDataLoader`: MongoDB integration +- **`AthenaDataLoader`**: AWS Athena (SQL on S3 data lakes) +- **`BigQueryDataLoader`**: Google BigQuery +- **`KustoDataLoader`**: Azure Data Explorer (Kusto) +- **`MySQLDataLoader`**: MySQL (connectorx) +- **`PostgreSQLDataLoader`**: PostgreSQL (connectorx) +- **`MSSQLDataLoader`**: Microsoft SQL Server (connectorx) +- **`S3DataLoader`**: Amazon S3 files (CSV, Parquet, JSON) via PyArrow S3 filesystem +- **`AzureBlobDataLoader`**: Azure Blob Storage via PyArrow +- **`MongoDBDataLoader`**: MongoDB ### Testing -Ensure your implementation: -- Handles connection errors gracefully -- Properly sanitizes table names -- Respects size limits for data ingestion -- Returns consistent metadata format +When implementing or changing a loader: + +- Handle connection and read errors clearly (e.g., raise `ValueError` with a clear message). +- Sanitize or validate table/object names where appropriate. +- Respect the `size` limit (and optional sort) in `fetch_data_as_arrow`. +- Return the same metadata shape from `list_tables()` (e.g., `row_count`, `columns`, `sample_rows`) so the UI behaves consistently. -Launch the front-end and test the data loader. \ No newline at end of file +Test via the front-end: configure the loader, list tables, and run an ingest into the workspace; then confirm parquet appears in the workspace and DuckDB (or other engines) can read it for computation. diff --git a/py-src/data_formulator/data_loader/__init__.py b/py-src/data_formulator/data_loader/__init__.py index f61a6851..898c50f5 100644 --- a/py-src/data_formulator/data_loader/__init__.py +++ b/py-src/data_formulator/data_loader/__init__.py @@ -21,4 +21,15 @@ "athena": AthenaDataLoader } -__all__ = ["ExternalDataLoader", "MySQLDataLoader", "MSSQLDataLoader", "KustoDataLoader", "S3DataLoader", "AzureBlobDataLoader", "PostgreSQLDataLoader", "MongoDBDataLoader", "BigQueryDataLoader", "AthenaDataLoader", "DATA_LOADERS"] \ No newline at end of file +__all__ = [ + "ExternalDataLoader", + "MySQLDataLoader", + "MSSQLDataLoader", + "KustoDataLoader", + "S3DataLoader", + "AzureBlobDataLoader", + "PostgreSQLDataLoader", + "MongoDBDataLoader", + "BigQueryDataLoader", + "AthenaDataLoader", + "DATA_LOADERS"] \ No newline at end of file diff --git a/py-src/data_formulator/data_loader/athena_data_loader.py b/py-src/data_formulator/data_loader/athena_data_loader.py index 8ba617fe..1d546513 100644 --- a/py-src/data_formulator/data_loader/athena_data_loader.py +++ b/py-src/data_formulator/data_loader/athena_data_loader.py @@ -1,19 +1,14 @@ -import json import logging import re import time -import duckdb +import pyarrow as pa +import pyarrow.csv as pa_csv +import boto3 +import botocore.exceptions +from pyarrow import fs as pa_fs from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name -from typing import Dict, Any, List, Optional -from data_formulator.security import validate_sql_query - -try: - import boto3 - import botocore.exceptions - BOTO3_AVAILABLE = True -except ImportError: - BOTO3_AVAILABLE = False +from typing import Any log = logging.getLogger(__name__) @@ -54,22 +49,16 @@ def _validate_s3_url(url: str) -> None: raise ValueError(f"Invalid S3 URL format: '{url}'. Expected format: 's3://bucket/path'") -def _escape_sql_string(value: Optional[str]) -> str: - """Escape single quotes in SQL string values.""" - if value is None: - return "" - return value.replace("'", "''") - - class AthenaDataLoader(ExternalDataLoader): """AWS Athena data loader implementation. - Executes SQL queries on Athena and loads results from S3 into DuckDB. - The output bucket is automatically fetched from the workgroup configuration. + Executes SQL queries on Athena and reads results from S3 via PyArrow. + Output location is taken from the workgroup configuration or the output_location param. + Use ingest_to_workspace() to store results as parquet in the workspace. """ @staticmethod - def list_params() -> List[Dict[str, Any]]: + def list_params() -> list[dict[str, Any]]: params_list = [ {"name": "aws_profile", "type": "string", "required": False, "default": "", "description": "AWS profile name from ~/.aws/credentials (if set, access key and secret are not required)"}, {"name": "aws_access_key_id", "type": "string", "required": False, "default": "", "description": "AWS access key ID (not required if using aws_profile)"}, @@ -160,15 +149,8 @@ def auth_instructions() -> str: **Security:** Never share secret keys, rotate regularly, use least privilege permissions. """ - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): - if not BOTO3_AVAILABLE: - raise ImportError( - "boto3 is required for Athena connections. " - "Install with: pip install boto3" - ) - + def __init__(self, params: dict[str, Any]): self.params = params - self.duck_db_conn = duck_db_conn # Extract parameters self.aws_profile = params.get("aws_profile", "") @@ -219,7 +201,7 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti session = boto3.Session(profile_name=self.aws_profile, region_name=self.region_name) self.athena_client = session.client('athena') - # Get credentials from profile for DuckDB S3 access + # Get credentials from profile for PyArrow S3 access credentials = session.get_credentials() if credentials is None: raise ValueError( @@ -290,16 +272,14 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti # Get output location: prefer user-provided, then try workgroup self.output_location = self._get_output_location() - # Install and load the httpfs extension for S3 access - self.duck_db_conn.install_extension("httpfs") - self.duck_db_conn.load_extension("httpfs") - - # Set AWS credentials for DuckDB - self.duck_db_conn.execute(f"SET s3_region='{self.region_name}'") - self.duck_db_conn.execute(f"SET s3_access_key_id='{self.aws_access_key_id}'") - self.duck_db_conn.execute(f"SET s3_secret_access_key='{self.aws_secret_access_key}'") - if self.aws_session_token: - self.duck_db_conn.execute(f"SET s3_session_token='{self.aws_session_token}'") + # Setup PyArrow S3 filesystem for reading results + self.s3_fs = pa_fs.S3FileSystem( + access_key=self.aws_access_key_id, + secret_key=self.aws_secret_access_key, + session_token=self.aws_session_token if self.aws_session_token else None, + region=self.region_name + ) + log.info("Initialized PyArrow S3 filesystem for Athena results") def _get_output_location(self) -> str: """Get the output location for query results. @@ -398,7 +378,56 @@ def _execute_query(self, query: str) -> str: wait_time = min(2 ** (elapsed // 10), 10) time.sleep(wait_time) - def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: + """ + Fetch data from Athena as a PyArrow Table. + + Executes the query on Athena and reads the CSV results from S3 + using PyArrow's S3 filesystem. + """ + if not source_table: + raise ValueError("source_table must be provided") + + _validate_athena_table_name(source_table) + base_query = f"SELECT * FROM {source_table}" + + # Add ORDER BY if sort columns specified + order_by_clause = "" + if sort_columns and len(sort_columns) > 0: + for col in sort_columns: + _validate_column_name(col) + order_direction = "DESC" if sort_order == 'desc' else "ASC" + sanitized_cols = [f'"{col}" {order_direction}' for col in sort_columns] + order_by_clause = f" ORDER BY {', '.join(sanitized_cols)}" + + query = f"{base_query}{order_by_clause} LIMIT {size}" + + log.info(f"Executing Athena query: {query[:200]}...") + + # Execute query and get result location + result_location = self._execute_query(query) + _validate_s3_url(result_location) + + log.info(f"Reading Athena results from: {result_location}") + + # Parse S3 URL: s3://bucket/key -> bucket/key + s3_path = result_location[5:] if result_location.startswith("s3://") else result_location + + # Athena outputs CSV files + with self.s3_fs.open_input_file(s3_path) as f: + arrow_table = pa_csv.read_csv(f) + + log.info(f"Fetched {arrow_table.num_rows} rows from Athena [Arrow-native]") + + return arrow_table + + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: """List tables from Athena catalog (Glue Data Catalog).""" results = [] @@ -468,92 +497,3 @@ def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: log.info(f"Returning {len(results)} tables") return results - - def ingest_data(self, table_name: str, name_as: str = None, size: int = 1000000, sort_columns: List[str] = None, sort_order: str = 'asc'): - """Ingest data from an Athena table by executing a SELECT query.""" - # Validate table name to prevent SQL injection - _validate_athena_table_name(table_name) - - if name_as is None: - # Extract table name from "database.table" format - name_as = table_name.split('.')[-1] - - name_as = sanitize_table_name(name_as) - - # Validate and build ORDER BY clause if sort_columns are specified - order_by_clause = "" - if sort_columns and len(sort_columns) > 0: - # Validate each column name - for col in sort_columns: - _validate_column_name(col) - order_direction = "DESC" if sort_order == 'desc' else "ASC" - sanitized_cols = [f'"{col}" {order_direction}' for col in sort_columns] - order_by_clause = f"ORDER BY {', '.join(sanitized_cols)}" - - # Validate size is a positive integer - if not isinstance(size, int) or size <= 0: - raise ValueError(f"Size must be a positive integer, got: {size}") - - # Build and execute the query - query = f"SELECT * FROM {table_name} {order_by_clause} LIMIT {size}" - log.info(f"Executing Athena query for table '{name_as}': {query}") - - result_location = self._execute_query(query) - - # Validate the result location is a proper S3 URL - _validate_s3_url(result_location) - - # Load results from S3 into DuckDB - log.info(f"Loading query results from {result_location}") - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM read_csv_auto('{_escape_sql_string(result_location)}') - """) - - log.info(f"Successfully ingested data into table '{name_as}'") - - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - """Execute query and return sample results.""" - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - # Add LIMIT if not present to avoid large result sets - query_upper = query.upper() - if "LIMIT" not in query_upper: - query = f"{query.rstrip().rstrip(';')} LIMIT 10" - - # Execute query on Athena - result_location = self._execute_query(query) - - # Validate the result location is a proper S3 URL - _validate_s3_url(result_location) - - # Load results from S3 - df = self.duck_db_conn.execute(f"SELECT * FROM read_csv_auto('{_escape_sql_string(result_location)}')").df() - - return json.loads(df.head(10).to_json(orient="records")) - - def ingest_data_from_query(self, query: str, name_as: str): - """Execute Athena query and ingest results into DuckDB.""" - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - name_as = sanitize_table_name(name_as) - - # Execute query on Athena - log.info(f"Executing Athena query for table '{name_as}'") - result_location = self._execute_query(query) - - # Validate the result location is a proper S3 URL - _validate_s3_url(result_location) - - # Load results from S3 into DuckDB - log.info(f"Loading query results from {result_location}") - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM read_csv_auto('{_escape_sql_string(result_location)}') - """) - - log.info(f"Successfully ingested data into table '{name_as}'") diff --git a/py-src/data_formulator/data_loader/azure_blob_data_loader.py b/py-src/data_formulator/data_loader/azure_blob_data_loader.py index 1206f4e0..3c8bdf2d 100644 --- a/py-src/data_formulator/data_loader/azure_blob_data_loader.py +++ b/py-src/data_formulator/data_loader/azure_blob_data_loader.py @@ -1,23 +1,22 @@ import json +import logging import pandas as pd -import duckdb -import os +import pyarrow as pa +import pyarrow.parquet as pq +import pyarrow.csv as pa_csv +from azure.storage.blob import BlobServiceClient +from azure.identity import DefaultAzureCredential +from pyarrow import fs as pa_fs from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name -from typing import Dict, Any, List -from data_formulator.security import validate_sql_query +from typing import Any -try: - from azure.storage.blob import BlobServiceClient, ContainerClient - from azure.identity import DefaultAzureCredential, AzureCliCredential, ManagedIdentityCredential, EnvironmentCredential, ChainedTokenCredential - AZURE_BLOB_AVAILABLE = True -except ImportError: - AZURE_BLOB_AVAILABLE = False +logger = logging.getLogger(__name__) class AzureBlobDataLoader(ExternalDataLoader): @staticmethod - def list_params() -> List[Dict[str, Any]]: + def list_params() -> list[dict[str, Any]]: params_list = [ {"name": "account_name", "type": "string", "required": True, "default": "", "description": "Azure storage account name"}, {"name": "container_name", "type": "string", "required": True, "default": "", "description": "Azure blob container name"}, @@ -65,16 +64,9 @@ def auth_instructions() -> str: - JSON files (.json, .jsonl) """ - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): - if not AZURE_BLOB_AVAILABLE: - raise ImportError( - "Azure storage libraries are required for Azure Blob connections. " - "Install with: pip install azure-storage-blob azure-identity" - ) - + def __init__(self, params: dict[str, Any]): self.params = params - self.duck_db_conn = duck_db_conn - + # Extract parameters self.account_name = params.get("account_name", "") self.container_name = params.get("container_name", "") @@ -84,56 +76,93 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti self.sas_token = params.get("sas_token", "") self.endpoint = params.get("endpoint", "blob.core.windows.net") - # Install and load the azure extension - self.duck_db_conn.install_extension("azure") - self.duck_db_conn.load_extension("azure") + # Setup PyArrow Azure filesystem + if self.account_key: + self.azure_fs = pa_fs.AzureFileSystem( + account_name=self.account_name, + account_key=self.account_key + ) + elif self.connection_string: + self.azure_fs = pa_fs.AzureFileSystem.from_connection_string(self.connection_string) + else: + # Use default credential chain + self.azure_fs = pa_fs.AzureFileSystem(account_name=self.account_name) - # Set up Azure authentication using secrets (preferred method) - self._setup_azure_authentication() + logger.info(f"Initialized PyArrow Azure filesystem for account: {self.account_name}") - def _setup_azure_authentication(self): - """Set up Azure authentication using DuckDB secrets.""" - if self.connection_string: - # Use connection string authentication - self.duck_db_conn.execute(f""" - CREATE OR REPLACE SECRET azure_secret ( - TYPE AZURE, - CONNECTION_STRING '{self.connection_string}' - ) - """) - elif self.account_key: - # Use account key authentication - self.duck_db_conn.execute(f""" - CREATE OR REPLACE SECRET azure_secret ( - TYPE AZURE, - ACCOUNT_NAME '{self.account_name}', - ACCOUNT_KEY '{self.account_key}' - ) - """) - elif self.sas_token: - # Use SAS token authentication - self.duck_db_conn.execute(f""" - CREATE OR REPLACE SECRET azure_secret ( - TYPE AZURE, - ACCOUNT_NAME '{self.account_name}', - SAS_TOKEN '{self.sas_token}' - ) - """) + def _azure_path(self, azure_url: str) -> str: + """Convert Azure URL to path for PyArrow (container/blob).""" + if azure_url.startswith("az://"): + parts = azure_url[5:].split("/", 1) + return parts[1] if len(parts) > 1 else azure_url + return f"{self.container_name}/{azure_url}" + + def _read_sample(self, azure_url: str, limit: int) -> pd.DataFrame: + """Read sample rows from an Azure blob using PyArrow. Returns a pandas DataFrame.""" + azure_path = self._azure_path(azure_url) + if azure_url.lower().endswith('.parquet'): + table = pq.read_table(azure_path, filesystem=self.azure_fs) + elif azure_url.lower().endswith('.csv'): + with self.azure_fs.open_input_file(azure_path) as f: + table = pa_csv.read_csv(f) + elif azure_url.lower().endswith('.json') or azure_url.lower().endswith('.jsonl'): + import pyarrow.json as pa_json + with self.azure_fs.open_input_file(azure_path) as f: + table = pa_json.read_json(f) else: - # Use credential chain authentication (default) - self.duck_db_conn.execute(f""" - CREATE OR REPLACE SECRET azure_secret ( - TYPE AZURE, - PROVIDER credential_chain, - ACCOUNT_NAME '{self.account_name}', - CHAIN '{self.credential_chain}' - ) - """) + raise ValueError(f"Unsupported file type: {azure_url}") + if table.num_rows > limit: + table = table.slice(0, limit) + return table.to_pandas() - def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: - # Use Azure SDK to list blobs in the container - from azure.storage.blob import BlobServiceClient + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: + """ + Fetch data from Azure Blob as a PyArrow Table. + + For files (parquet, csv), reads directly using PyArrow's Azure filesystem. + """ + if not source_table: + raise ValueError("source_table (Azure blob URL) must be provided") + azure_url = source_table + azure_path = self._azure_path(azure_url) + + logger.info("Reading Azure blob via PyArrow: %s", azure_url) + + if azure_url.lower().endswith('.parquet'): + arrow_table = pq.read_table(azure_path, filesystem=self.azure_fs) + elif azure_url.lower().endswith('.csv'): + with self.azure_fs.open_input_file(azure_path) as f: + arrow_table = pa_csv.read_csv(f) + elif azure_url.lower().endswith('.json') or azure_url.lower().endswith('.jsonl'): + import pyarrow.json as pa_json + with self.azure_fs.open_input_file(azure_path) as f: + arrow_table = pa_json.read_json(f) + else: + raise ValueError(f"Unsupported file type: {azure_url}") + + # Apply sorting if specified + if sort_columns and len(sort_columns) > 0: + df = arrow_table.to_pandas() + ascending = sort_order != 'desc' + df = df.sort_values(by=sort_columns, ascending=ascending) + arrow_table = pa.Table.from_pandas(df, preserve_index=False) + + # Apply size limit + if arrow_table.num_rows > size: + arrow_table = arrow_table.slice(0, size) + + logger.info(f"Fetched {arrow_table.num_rows} rows from Azure Blob [Arrow-native]") + + return arrow_table + + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: # Create blob service client based on authentication method if self.connection_string: blob_service_client = BlobServiceClient.from_connection_string(self.connection_string) @@ -177,228 +206,95 @@ def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: azure_url = f"az://{self.account_name}.{self.endpoint}/{self.container_name}/{blob_name}" try: - # Choose the appropriate read function based on file extension - if azure_url.lower().endswith('.parquet'): - sample_df = self.duck_db_conn.execute(f"SELECT * FROM read_parquet('{azure_url}') LIMIT 10").df() - elif azure_url.lower().endswith('.json') or azure_url.lower().endswith('.jsonl'): - sample_df = self.duck_db_conn.execute(f"SELECT * FROM read_json_auto('{azure_url}') LIMIT 10").df() - elif azure_url.lower().endswith('.csv'): - sample_df = self.duck_db_conn.execute(f"SELECT * FROM read_csv_auto('{azure_url}') LIMIT 10").df() - - # Get column information + sample_df = self._read_sample(azure_url, 10) + columns = [{ 'name': col, 'type': str(sample_df[col].dtype) } for col in sample_df.columns] - - # Get sample data + sample_rows = json.loads(sample_df.to_json(orient="records")) - - # Estimate row count row_count = self._estimate_row_count(azure_url, blob) - + table_metadata = { "row_count": row_count, "columns": columns, "sample_rows": sample_rows } - + results.append({ "name": azure_url, "metadata": table_metadata }) except Exception as e: - # Skip files that can't be read - print(f"Error reading {azure_url}: {e}") + logger.warning("Error reading %s: %s", azure_url, e) continue return results def _is_supported_file(self, blob_name: str) -> bool: - """Check if the file type is supported by DuckDB.""" + """Check if the file type is supported (PyArrow can read it).""" supported_extensions = ['.csv', '.parquet', '.json', '.jsonl'] return any(blob_name.lower().endswith(ext) for ext in supported_extensions) - + def _estimate_row_count(self, azure_url: str, blob_properties=None) -> int: - """Estimate the number of rows in a file using intelligent strategies.""" + """Estimate the number of rows in a file.""" try: file_extension = azure_url.lower().split('.')[-1] - - # For parquet files, use metadata to get exact count efficiently + if file_extension == 'parquet': try: - # Use DuckDB's parquet_file_metadata to get exact row count without full scan - metadata = self.duck_db_conn.execute( - f"SELECT num_rows FROM parquet_file_metadata('{azure_url}')" - ).fetchone() - if metadata and metadata[0] is not None: - return metadata[0] - except Exception as parquet_error: - print(f"Failed to get parquet metadata for {azure_url}: {parquet_error}") - # Fall back to counting (expensive but accurate) - try: - count = self.duck_db_conn.execute(f"SELECT COUNT(*) FROM read_parquet('{azure_url}')").fetchone()[0] - return count - except Exception: - pass - - # For CSV, JSON, and JSONL files, use intelligent sampling - elif file_extension in ['csv', 'json', 'jsonl']: + azure_path = self._azure_path(azure_url) + pf = pq.ParquetFile(azure_path, filesystem=self.azure_fs) + return pf.metadata.num_rows + except Exception as e: + logger.debug("Failed to get parquet row count for %s: %s", azure_url, e) + return 0 + + if file_extension in ['csv', 'json', 'jsonl']: return self._estimate_rows_by_sampling(azure_url, blob_properties, file_extension) - + return 0 - except Exception as e: - print(f"Error estimating row count for {azure_url}: {e}") + logger.warning("Error estimating row count for %s: %s", azure_url, e) return 0 def _estimate_rows_by_sampling(self, azure_url: str, blob_properties, file_extension: str) -> int: - """Estimate row count for text-based files using sampling and file size.""" + """Estimate row count for text-based files using PyArrow sampling.""" try: - # Get file size from blob properties if available file_size_bytes = None if blob_properties and hasattr(blob_properties, 'size'): file_size_bytes = blob_properties.size - - # If no file size available, try a different approach + if file_size_bytes is None: - # Sample first 10,000 rows and extrapolate if needed return self._estimate_by_row_sampling(azure_url, file_extension) - - # Sample approach: read first N rows and estimate based on size - sample_size = min(10000, file_size_bytes // 100) # Adaptive sample size - sample_size = max(1000, sample_size) # At least 1000 rows - + + sample_size = min(10000, max(1000, file_size_bytes // 100)) try: - if file_extension == 'csv': - sample_df = self.duck_db_conn.execute( - f"SELECT * FROM read_csv_auto('{azure_url}') LIMIT {sample_size}" - ).df() - elif file_extension in ['json', 'jsonl']: - sample_df = self.duck_db_conn.execute( - f"SELECT * FROM read_json_auto('{azure_url}') LIMIT {sample_size}" - ).df() - else: - return 0 - + sample_df = self._read_sample(azure_url, sample_size) sample_rows = len(sample_df) if sample_rows == 0: return 0 - - # If we got fewer rows than requested, that's probably all there is if sample_rows < sample_size: return sample_rows - - # Estimate bytes per row from sample - # For CSV: assume average line length based on file size - if file_extension == 'csv': - # Rough estimate: file_size / (sample_rows * estimated_line_overhead) - # CSV overhead includes delimiters, quotes, newlines - estimated_bytes_per_row = file_size_bytes / sample_rows * (sample_size / file_size_bytes) - estimated_total_rows = int(file_size_bytes / max(estimated_bytes_per_row, 50)) # Min 50 bytes per row - else: - # For JSON: more complex structure, use conservative estimate - # Assume JSON overhead is higher - estimated_bytes_per_row = file_size_bytes / sample_rows * (sample_size / file_size_bytes) - estimated_total_rows = int(file_size_bytes / max(estimated_bytes_per_row, 100)) # Min 100 bytes per row - - # Apply reasonable bounds - estimated_total_rows = max(sample_rows, estimated_total_rows) # At least as many as we sampled - estimated_total_rows = min(estimated_total_rows, file_size_bytes // 10) # Max based on very small rows - + + min_bytes_per_row = 50 if file_extension == 'csv' else 100 + estimated_total_rows = int(file_size_bytes / max(file_size_bytes / sample_rows, min_bytes_per_row)) + estimated_total_rows = max(sample_rows, min(estimated_total_rows, file_size_bytes // 10)) return estimated_total_rows - except Exception as e: - print(f"Error in size-based estimation for {azure_url}: {e}") + logger.debug("Size-based estimation failed for %s: %s", azure_url, e) return self._estimate_by_row_sampling(azure_url, file_extension) - except Exception as e: - print(f"Error in sampling estimation for {azure_url}: {e}") + logger.warning("Error in sampling estimation for %s: %s", azure_url, e) return 0 def _estimate_by_row_sampling(self, azure_url: str, file_extension: str) -> int: - """Fallback method: sample rows without file size info.""" + """Estimate row count by reading a capped sample with PyArrow.""" try: - # Try to read a reasonable sample and see if we get less than requested - # This indicates we've read the whole file test_limit = 50000 - - if file_extension == 'csv': - sample_df = self.duck_db_conn.execute( - f"SELECT * FROM read_csv_auto('{azure_url}') LIMIT {test_limit}" - ).df() - elif file_extension in ['json', 'jsonl']: - sample_df = self.duck_db_conn.execute( - f"SELECT * FROM read_json_auto('{azure_url}') LIMIT {test_limit}" - ).df() - else: - return 0 - - sample_rows = len(sample_df) - - # If we got fewer rows than the limit, that's likely the total - if sample_rows < test_limit: - return sample_rows - - # Otherwise, we can't estimate accurately without more information - # Return the sample size as a lower bound - return sample_rows - + sample_df = self._read_sample(azure_url, test_limit) + return len(sample_df) except Exception as e: - print(f"Error in row sampling for {azure_url}: {e}") - return 0 - - def ingest_data(self, table_name: str, name_as: str = None, size: int = 1000000, sort_columns: List[str] = None, sort_order: str = 'asc'): - if name_as is None: - name_as = table_name.split('/')[-1].split('.')[0] - - name_as = sanitize_table_name(name_as) - - # Build ORDER BY clause if sort_columns are specified - order_by_clause = "" - if sort_columns and len(sort_columns) > 0: - order_direction = "DESC" if sort_order == 'desc' else "ASC" - sanitized_cols = [f'"{col}" {order_direction}' for col in sort_columns] - order_by_clause = f"ORDER BY {', '.join(sanitized_cols)}" - - # Determine file type and use appropriate DuckDB function - if table_name.lower().endswith('.csv'): - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM read_csv_auto('{table_name}') - {order_by_clause} - LIMIT {size} - """) - elif table_name.lower().endswith('.parquet'): - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM read_parquet('{table_name}') - {order_by_clause} - LIMIT {size} - """) - elif table_name.lower().endswith('.json') or table_name.lower().endswith('.jsonl'): - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM read_json_auto('{table_name}') - {order_by_clause} - LIMIT {size} - """) - else: - raise ValueError(f"Unsupported file type: {table_name}") - - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - return json.loads(self.duck_db_conn.execute(query).df().head(10).to_json(orient="records")) - - def ingest_data_from_query(self, query: str, name_as: str): - # Execute the query and get results as a DataFrame - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - df = self.duck_db_conn.execute(query).df() - # Use the base class's method to ingest the DataFrame - self.ingest_df_to_duckdb(df, sanitize_table_name(name_as)) \ No newline at end of file + logger.debug("Row sampling failed for %s: %s", azure_url, e) + return 0 \ No newline at end of file diff --git a/py-src/data_formulator/data_loader/bigquery_data_loader.py b/py-src/data_formulator/data_loader/bigquery_data_loader.py index cd3c1cf6..e9c6807b 100644 --- a/py-src/data_formulator/data_loader/bigquery_data_loader.py +++ b/py-src/data_formulator/data_loader/bigquery_data_loader.py @@ -1,19 +1,12 @@ -import json import logging import re -from typing import Dict, Any, List, Optional -import pandas as pd -import duckdb +from typing import Any +import pyarrow as pa from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name -from data_formulator.security import validate_sql_query -try: - from google.cloud import bigquery - from google.oauth2 import service_account - BIGQUERY_AVAILABLE = True -except ImportError: - BIGQUERY_AVAILABLE = False +from google.cloud import bigquery +from google.oauth2 import service_account log = logging.getLogger(__name__) @@ -21,7 +14,7 @@ class BigQueryDataLoader(ExternalDataLoader): """BigQuery data loader implementation""" @staticmethod - def list_params() -> List[Dict[str, Any]]: + def list_params() -> list[dict[str, Any]]: return [ {"name": "project_id", "type": "text", "required": True, "description": "Google Cloud Project ID", "default": ""}, {"name": "dataset_id", "type": "text", "required": False, "description": "Dataset ID(s) - leave empty for all, or specify one (e.g., 'billing') or multiple separated by commas (e.g., 'billing,enterprise_collected,ga_api')", "default": ""}, @@ -68,17 +61,10 @@ def auth_instructions() -> str: - Execute custom SQL queries """ - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): - if not BIGQUERY_AVAILABLE: - raise ImportError( - "google-cloud-bigquery is required for BigQuery connections. " - "Install with: pip install google-cloud-bigquery google-auth" - ) - + def __init__(self, params: dict[str, Any]): self.params = params - self.duck_db_conn = duck_db_conn self.project_id = params.get("project_id") - self.dataset_ids = [d.strip() for d in params.get("dataset_id", "").split(",") if d.strip()] # Support multiple datasets + self.dataset_ids = [d.strip() for d in params.get("dataset_id", "").split(",") if d.strip()] self.location = params.get("location", "US") # Initialize BigQuery client @@ -95,8 +81,10 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti project=self.project_id, location=self.location ) + + log.info(f"Successfully connected to BigQuery project: {self.project_id}") - def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: """List tables from BigQuery datasets""" results = [] @@ -170,163 +158,78 @@ def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: log.info(f"Returning {len(results)} tables") return results - def _convert_bigquery_dtypes(self, df: pd.DataFrame) -> pd.DataFrame: - """Convert BigQuery-specific dtypes to standard pandas dtypes""" - - def safe_convert(x): - try: - if x is None or pd.isna(x): - return None - if isinstance(x, (dict, list)): - return json.dumps(x, default=str) - if hasattr(x, "__dict__"): - return json.dumps(x.__dict__, default=str) - s = str(x) - if "[object Object]" in s: - return json.dumps(x, default=str) - return s - except Exception: - return str(x) if x is not None else None - - for col in df.columns: - # Convert db_dtypes.DateDtype to standard datetime - if hasattr(df[col].dtype, "name") and "dbdate" in str(df[col].dtype).lower(): - df[col] = pd.to_datetime(df[col]) - # Convert other db_dtypes if needed - elif str(df[col].dtype).startswith("db_dtypes"): - try: - df[col] = df[col].astype(str) - except Exception as e: - logging.error(f"Failed to convert column '{col}' to string: {e}") - # Handle nested objects/JSON columns - elif df[col].dtype == "object": - df[col] = df[col].apply(safe_convert) - - return df - - def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int = 1000000, sort_columns: List[str] = None, sort_order: str = 'asc'): - """Ingest data from BigQuery table into DuckDB with stable, de-duplicated column aliases.""" - if name_as is None: - name_as = table_name.split('.')[-1] - - name_as = sanitize_table_name(name_as) - - - table_ref = self.client.get_table(table_name) - - select_parts: list[str] = [] - used_aliases: dict[str, str] = {} # alias -> field_path - - def build_alias(field_path: str) -> str: - """ - Build a human-readable, globally unique alias from a BigQuery field path. - - Examples: - 'geo.country' -> 'geo_country' - 'device.category' -> 'device_category' - 'event_params.value' -> 'event_params_value' - """ - # path "a.b.c" -> "a_b_c" - alias = field_path.replace('.', '_') - - # remove weird characters - alias = re.sub(r'[^0-9a-zA-Z_]', '_', alias) - alias = re.sub(r'_+', '_', alias).strip('_') or "col" - - # must start with letter or underscore - if not alias[0].isalpha() and alias[0] != '_': - alias = f"_{alias}" - - base_alias = alias - counter = 1 - while alias in used_aliases: - # same alias from another path – suffix and log once - alias = f"{base_alias}_{counter}" - counter += 1 - - used_aliases[alias] = field_path - return alias - - def add_field(field_path: str): - alias = build_alias(field_path) - select_parts.append(f"`{table_name}`.{field_path} AS `{alias}`") - - def process_field(field, parent_path: str = ""): - """ - Recursively process fields, flattening non-repeated RECORDs. - """ - current_path = f"{parent_path}.{field.name}" if parent_path else field.name - - # Flatten STRUCT / RECORD that is not REPEATED - if field.field_type == "RECORD" and field.mode != "REPEATED": - for subfield in field.fields: - process_field(subfield, current_path) - else: - # Regular field or REPEATED RECORD/array – select as a single column - add_field(current_path) - - # Process all top-level fields - for field in table_ref.schema: - process_field(field) - - if not select_parts: - raise ValueError(f"No fields found for table {table_name}") - - # Build ORDER BY clause if sort_columns are specified - order_by_clause = "" - if sort_columns and len(sort_columns) > 0: - # Use backticks for BigQuery column quoting - order_direction = "DESC" if sort_order == 'desc' else "ASC" - sanitized_cols = [f'`{col}` {order_direction}' for col in sort_columns] - order_by_clause = f"ORDER BY {', '.join(sanitized_cols)}" - - query = f"SELECT {', '.join(select_parts)} FROM `{table_name}` {order_by_clause} LIMIT {size}" - - df = self.client.query(query).to_dataframe() - - # Safety net: drop exact duplicate names if something slipped through - if df.columns.duplicated().any(): - dupes = df.columns[df.columns.duplicated()].tolist() - log.warning(f"Duplicate column names detected in DataFrame, dropping later ones: {dupes}") - df = df.loc[:, ~df.columns.duplicated()] - - - # Convert BigQuery-specific dtypes - df = self._convert_bigquery_dtypes(df) - - self.ingest_df_to_duckdb(df, name_as) - - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - """Execute query and return sample results as a list of dictionaries""" - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: + """ + Fetch data from BigQuery as a PyArrow Table using native Arrow support. - # Add LIMIT if not present - if "LIMIT" not in query.upper(): - query += " LIMIT 10" + BigQuery's Python client provides .to_arrow() for efficient Arrow-native + data transfer, avoiding pandas conversion overhead. + """ + if not source_table: + raise ValueError("source_table must be provided") - df = self.client.query(query).to_dataframe() - return json.loads(df.to_json(orient="records")) - - def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: - """Execute custom query and ingest results into DuckDB""" - name_as = sanitize_table_name(name_as) + # Get table schema to handle nested fields + table_ref = self.client.get_table(source_table) + select_parts = self._build_select_parts(table_ref, source_table) + base_query = f"SELECT {', '.join(select_parts)} FROM `{source_table}`" - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) + # Add ORDER BY if sort columns specified + order_by_clause = "" + if sort_columns and len(sort_columns) > 0: + order_direction = "DESC" if sort_order == 'desc' else "ASC" + sanitized_cols = [f'`{col}` {order_direction}' for col in sort_columns] + order_by_clause = f" ORDER BY {', '.join(sanitized_cols)}" - # Execute query and get DataFrame - df = self.client.query(query).to_dataframe() - - # Drop duplicate columns - df = df.loc[:, ~df.columns.duplicated()] - - # Convert BigQuery-specific dtypes - df = self._convert_bigquery_dtypes(df) - - # Use base class method to ingest DataFrame - self.ingest_df_to_duckdb(df, name_as) + query = f"{base_query}{order_by_clause} LIMIT {size}" + + log.info(f"Executing BigQuery query: {query[:200]}...") + + # Execute query and get Arrow table directly (no pandas conversion) + query_job = self.client.query(query) + arrow_table = query_job.to_arrow() - return df + log.info(f"Fetched {arrow_table.num_rows} rows from BigQuery [Arrow-native]") + + return arrow_table + + def _build_select_parts(self, table_ref, table_name: str) -> list[str]: + """Build SELECT parts handling nested BigQuery fields.""" + select_parts: list[str] = [] + used_aliases: dict[str, str] = {} + + def build_alias(field_path: str) -> str: + alias = field_path.replace('.', '_') + alias = re.sub(r'[^0-9a-zA-Z_]', '_', alias) + alias = re.sub(r'_+', '_', alias).strip('_') or "col" + if not alias[0].isalpha() and alias[0] != '_': + alias = f"_{alias}" + base_alias = alias + counter = 1 + while alias in used_aliases: + alias = f"{base_alias}_{counter}" + counter += 1 + used_aliases[alias] = field_path + return alias + + def add_field(field_path: str): + alias = build_alias(field_path) + select_parts.append(f"`{table_name}`.{field_path} AS `{alias}`") + + def process_field(field, parent_path: str = ""): + current_path = f"{parent_path}.{field.name}" if parent_path else field.name + if field.field_type == "RECORD" and field.mode != "REPEATED": + for subfield in field.fields: + process_field(subfield, current_path) + else: + add_field(current_path) + + for field in table_ref.schema: + process_field(field) + + return select_parts if select_parts else ["*"] diff --git a/py-src/data_formulator/data_loader/external_data_loader.py b/py-src/data_formulator/data_loader/external_data_loader.py index 41060d87..2c9057f8 100644 --- a/py-src/data_formulator/data_loader/external_data_loader.py +++ b/py-src/data_formulator/data_loader/external_data_loader.py @@ -1,11 +1,19 @@ from abc import ABC, abstractmethod -from typing import Dict, Any, List +from typing import Any, TYPE_CHECKING import pandas as pd -import json -import duckdb -import random -import string +import pyarrow as pa import re +import logging + +if TYPE_CHECKING: + from data_formulator.datalake.workspace import Workspace + from data_formulator.datalake.metadata import TableMetadata + +logger = logging.getLogger(__name__) + +# Sensitive parameter names that should be excluded from stored metadata +SENSITIVE_PARAMS = {'password', 'api_key', 'secret', 'token', 'access_key', 'secret_key'} + def sanitize_table_name(name_as: str) -> str: if not name_as: @@ -42,74 +50,182 @@ def sanitize_table_name(name_as: str) -> str: return sanitized class ExternalDataLoader(ABC): + """ + Abstract base class for external data loaders. - def ingest_df_to_duckdb(self, df: pd.DataFrame, table_name: str): - # Log DataFrame info before ingestion - import logging - logger = logging.getLogger(__name__) - logger.info(f"Ingesting DataFrame to DuckDB table '{table_name}'") - logger.info(f"DataFrame shape: {df.shape}") - logger.info(f"DataFrame dtypes: {dict(df.dtypes)}") - - # Log sample of datetime columns - for col in df.columns: - if pd.api.types.is_datetime64_any_dtype(df[col]): - sample_values = df[col].dropna().head(3) - logger.info(f"Datetime column '{col}' sample values: {list(sample_values)}") + Data loaders fetch data from external sources (databases, cloud storage, etc.) + and store data as parquet files in the workspace. DuckDB is not used for storage; + it is only the computation engine elsewhere in the application. + + Ingest flow: External Source → PyArrow Table → Parquet (workspace). - # Create or replace table (replaces existing table with same name) - random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=6)) - self.duck_db_conn.register(f'df_temp_{random_suffix}', df) + - `fetch_data_as_arrow()`: each loader must implement; fetches data as PyArrow Table. + - `ingest_to_workspace()`: fetches via Arrow and writes parquet to the given workspace. + """ + + def get_safe_params(self) -> dict[str, Any]: + """ + Get connection parameters with sensitive values removed. - # Log table schema after registration - try: - schema_info = self.duck_db_conn.execute(f"DESCRIBE df_temp_{random_suffix}").fetchall() - logger.info(f"DuckDB table schema: {schema_info}") - except Exception as e: - logger.warning(f"Could not get schema info: {e}") + Returns: + Dictionary of parameters safe to store in metadata + """ + if not hasattr(self, 'params'): + return {} + + return { + k: v for k, v in self.params.items() + if k.lower() not in SENSITIVE_PARAMS + } + + @abstractmethod + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: + """ + Fetch data from the external source as a PyArrow Table. - self.duck_db_conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM df_temp_{random_suffix}") - self.duck_db_conn.execute(f"DROP VIEW df_temp_{random_suffix}") # Drop the temporary view after creating the table + This is the primary method for data fetching. Each loader must implement + this method to fetch data directly as Arrow format for optimal performance. + Only source_table is supported (no raw query strings) to avoid security + and dialect diversity issues across loaders. - logger.info(f"Successfully created/replaced DuckDB table '{table_name}'") + Args: + source_table: Full table name (or table identifier) to fetch from + size: Maximum number of rows to fetch + sort_columns: Columns to sort by before limiting + sort_order: Sort direction ('asc' or 'desc') + + Returns: + PyArrow Table with the fetched data + + Raises: + ValueError: If source_table is not provided + NotImplementedError: If the loader doesn't support this method yet + """ + pass + def fetch_data_as_dataframe( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pd.DataFrame: + """ + Fetch data from the external source as a pandas DataFrame. + + This method converts the Arrow table to pandas. For better performance, + prefer using `fetch_data_as_arrow()` directly when possible. + + Args: + source_table: Full table name to fetch from + size: Maximum number of rows to fetch + sort_columns: Columns to sort by before limiting + sort_order: Sort direction ('asc' or 'desc') + + Returns: + pandas DataFrame with the fetched data + """ + arrow_table = self.fetch_data_as_arrow( + source_table=source_table, + size=size, + sort_columns=sort_columns, + sort_order=sort_order, + ) + return arrow_table.to_pandas() + def ingest_to_workspace( + self, + workspace: "Workspace", + table_name: str, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> "TableMetadata": + """ + Fetch data from external source and store as parquet in workspace. + + Uses PyArrow for efficient data transfer: External Source → Arrow → Parquet. + This avoids pandas conversion overhead entirely. + + Args: + workspace: The workspace to store data in + table_name: Name for the table in the workspace + source_table: Full table name to fetch from + size: Maximum number of rows to fetch + sort_columns: Columns to sort by before limiting + sort_order: Sort direction ('asc' or 'desc') + + Returns: + TableMetadata for the created parquet file + """ + # Import here to avoid circular imports + from data_formulator.datalake.parquet_manager import write_parquet_from_arrow + + # Fetch data as Arrow table (efficient, no pandas conversion) + arrow_table = self.fetch_data_as_arrow( + source_table=source_table, + size=size, + sort_columns=sort_columns, + sort_order=sort_order, + ) + + # Prepare loader metadata + loader_metadata = { + "loader_type": self.__class__.__name__, + "loader_params": self.get_safe_params(), + "source_table": source_table, + } + + # Write Arrow table directly to parquet (no pandas conversion) + table_metadata = write_parquet_from_arrow( + workspace=workspace, + table=arrow_table, + table_name=table_name, + loader_metadata=loader_metadata, + ) + + logger.info( + f"Ingested {arrow_table.num_rows} rows from {self.__class__.__name__} " + f"to workspace as {table_name}.parquet" + ) + + return table_metadata + @staticmethod @abstractmethod - def list_params() -> List[Dict[str, Any]]: + def list_params() -> list[dict[str, Any]]: + """Return list of parameters needed to configure this data loader.""" pass @staticmethod @abstractmethod - def auth_instructions() -> str: pass - - @abstractmethod - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): + def auth_instructions() -> str: + """Return human-readable authentication instructions.""" pass @abstractmethod - def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: - # should include: table_name, column_names, column_types, sample_data - pass + def __init__(self, params: dict[str, Any]): + """ + Initialize the data loader. - @abstractmethod - def ingest_data(self, table_name: str, name_as: str = None, size: int = 1000000, sort_columns: List[str] = None, sort_order: str = 'asc'): - """Ingest data from a table into DuckDB. - Args: - table_name: The source table name - name_as: Optional name for the destination table - size: Maximum number of rows to import (row limit) - sort_columns: Optional list of columns to sort by before applying the limit - sort_order: Sort direction, 'asc' for ascending or 'desc' for descending + params: Configuration parameters for the loader (e.g. host, credentials). """ pass @abstractmethod - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - pass + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: + """ + List available tables (or files) from the data source. - @abstractmethod - def ingest_data_from_query(self, query: str, name_as: str): + Returns: + List of dicts with: name (table/file identifier), metadata (row_count, columns, sample_rows). + """ pass - diff --git a/py-src/data_formulator/data_loader/kusto_data_loader.py b/py-src/data_formulator/data_loader/kusto_data_loader.py index 6ed6d602..ae893f68 100644 --- a/py-src/data_formulator/data_loader/kusto_data_loader.py +++ b/py-src/data_formulator/data_loader/kusto_data_loader.py @@ -1,29 +1,20 @@ +import json import logging -import sys -from typing import Dict, Any, List +from typing import Any import pandas as pd -import json -import duckdb -import random -import string -from datetime import datetime +import pyarrow as pa from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name -try: - from azure.kusto.data import KustoClient, KustoConnectionStringBuilder - from azure.kusto.data.helpers import dataframe_from_result_table - KUSTO_AVAILABLE = True -except ImportError: - KUSTO_AVAILABLE = False +from azure.kusto.data import KustoClient, KustoConnectionStringBuilder +from azure.kusto.data.helpers import dataframe_from_result_table -# Get logger for this module (logging config done in app.py) logger = logging.getLogger(__name__) class KustoDataLoader(ExternalDataLoader): @staticmethod - def list_params() -> bool: + def list_params() -> list[dict[str, Any]]: params_list = [ {"name": "kusto_cluster", "type": "string", "required": True, "description": ""}, {"name": "kusto_database", "type": "string", "required": True, "description": ""}, @@ -60,35 +51,30 @@ def auth_instructions() -> str: - kusto_database: Name of the database you want to access """ - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): - if not KUSTO_AVAILABLE: - raise ImportError( - "azure-kusto-data is required for Kusto/Azure Data Explorer connections. " - "Install with: pip install azure-kusto-data" - ) - + def __init__(self, params: dict[str, Any]): + self.params = params self.kusto_cluster = params.get("kusto_cluster", None) self.kusto_database = params.get("kusto_database", None) - + self.client_id = params.get("client_id", None) self.client_secret = params.get("client_secret", None) self.tenant_id = params.get("tenant_id", None) try: if self.client_id and self.client_secret and self.tenant_id: - # This function provides an interface to Kusto. It uses AAD application key authentication. self.client = KustoClient(KustoConnectionStringBuilder.with_aad_application_key_authentication( self.kusto_cluster, self.client_id, self.client_secret, self.tenant_id)) else: - # This function provides an interface to Kusto. It uses Azure CLI auth, but you can also use other auth types. cluster_url = KustoConnectionStringBuilder.with_az_cli_authentication(self.kusto_cluster) logger.info(f"Connecting to Kusto cluster: {self.kusto_cluster}") self.client = KustoClient(cluster_url) - logger.info("Using Azure CLI authentication for Kusto client. Ensure you have run `az login` in your terminal.") + logger.info("Using Azure CLI authentication for Kusto client.") except Exception as e: logger.error(f"Error creating Kusto client: {e}") - raise Exception(f"Error creating Kusto client: {e}, please authenticate with Azure CLI when starting the app.") - self.duck_db_conn = duck_db_conn + raise RuntimeError( + f"Error creating Kusto client: {e}. " + "Please authenticate with Azure CLI (az login) when starting the app." + ) from e def _convert_kusto_datetime_columns(self, df: pd.DataFrame) -> pd.DataFrame: """Convert Kusto datetime columns to proper pandas datetime format""" @@ -156,7 +142,52 @@ def query(self, kql: str) -> pd.DataFrame: return df - def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: + """ + Fetch data from Kusto/Azure Data Explorer as a PyArrow Table. + + Kusto SDK returns pandas, so we convert to Arrow format. + + Args: + source_table: Kusto table name + size: Maximum number of rows to fetch + sort_columns: Columns to sort by + sort_order: Sort direction + """ + if not source_table: + raise ValueError("source_table must be provided") + + base_query = f"['{source_table}']" + + # Add sort if specified (KQL syntax) + sort_clause = "" + if sort_columns and len(sort_columns) > 0: + order_direction = "desc" if sort_order == 'desc' else "asc" + sort_cols_with_order = [f"{col} {order_direction}" for col in sort_columns] + sort_clause = f" | sort by {', '.join(sort_cols_with_order)}" + + # Add take limit + kql_query = f"{base_query}{sort_clause} | take {size}" + + logger.info(f"Executing Kusto query: {kql_query[:200]}...") + + # Execute query + df = self.query(kql_query) + + # Convert to Arrow + arrow_table = pa.Table.from_pandas(df, preserve_index=False) + + logger.info(f"Fetched {arrow_table.num_rows} rows from Kusto") + + return arrow_table + + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: query = ".show tables" tables_df = self.query(query) @@ -195,71 +226,4 @@ def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: "metadata": table_metadata }) - return tables - - def ingest_data(self, table_name: str, name_as: str = None, size: int = 5000000, sort_columns: List[str] = None, sort_order: str = 'asc') -> pd.DataFrame: - if name_as is None: - name_as = table_name - - # Build sort clause for Kusto (KQL syntax) - sort_clause = "" - if sort_columns and len(sort_columns) > 0: - # Kusto uses | sort by col1 asc/desc syntax - order_direction = "desc" if sort_order == 'desc' else "asc" - sort_cols_with_order = [f"{col} {order_direction}" for col in sort_columns] - sort_clause = f" | sort by {', '.join(sort_cols_with_order)}" - - # Create a subquery that applies random ordering once with a fixed seed - total_rows_ingested = 0 - first_chunk = True - chunk_size = 100000 - - size_estimate_query = f"['{table_name}'] | take {10000} | summarize Total=sum(estimate_data_size(*))" - size_estimate_result = self.query(size_estimate_query) - size_estimate = size_estimate_result['Total'].values[0] - print(f"size_estimate: {size_estimate}") - - chunk_size = min(64 * 1024 * 1024 / size_estimate * 0.9 * 10000, 5000000) - print(f"estimated_chunk_size: {chunk_size}") - - while total_rows_ingested < size: - try: - # Apply sort if specified, then apply row numbering for pagination - query = f"['{table_name}']{sort_clause} | serialize | extend rn=row_number() | where rn >= {total_rows_ingested} and rn < {total_rows_ingested + chunk_size} | project-away rn" - chunk_df = self.query(query) - except Exception as e: - chunk_size = int(chunk_size * 0.8) - continue - - print(f"total_rows_ingested: {total_rows_ingested}") - print(chunk_df.head()) - - # Stop if no more data - if chunk_df.empty: - break - - # Sanitize the table name for SQL compatibility - name_as = sanitize_table_name(name_as) - - # For first chunk, create new table; for subsequent chunks, append - if first_chunk: - self.ingest_df_to_duckdb(chunk_df, name_as) - first_chunk = False - else: - # Append to existing table - random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=6)) - self.duck_db_conn.register(f'df_temp_{random_suffix}', chunk_df) - self.duck_db_conn.execute(f"INSERT INTO {name_as} SELECT * FROM df_temp_{random_suffix}") - self.duck_db_conn.execute(f"DROP VIEW df_temp_{random_suffix}") - - total_rows_ingested += len(chunk_df) - - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - df = self.query(query).head(10) - return json.loads(df.to_json(orient="records", date_format='iso')) - - def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: - # Sanitize the table name for SQL compatibility - name_as = sanitize_table_name(name_as) - df = self.query(query) - self.ingest_df_to_duckdb(df, name_as) \ No newline at end of file + return tables \ No newline at end of file diff --git a/py-src/data_formulator/data_loader/mongodb_data_loader.py b/py-src/data_formulator/data_loader/mongodb_data_loader.py index 6b460354..cf8d84e6 100644 --- a/py-src/data_formulator/data_loader/mongodb_data_loader.py +++ b/py-src/data_formulator/data_loader/mongodb_data_loader.py @@ -1,23 +1,22 @@ import json -import string -import random as rand +import logging +from datetime import datetime import pandas as pd -import duckdb +import pyarrow as pa import pymongo from bson import ObjectId -from datetime import datetime from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name +from typing import Any -from data_formulator.security import validate_sql_query -from typing import Dict, Any, Optional, List +logger = logging.getLogger(__name__) class MongoDBDataLoader(ExternalDataLoader): @staticmethod - def list_params() -> bool: + def list_params() -> list[dict[str, Any]]: params_list = [ {"name": "host", "type": "string", "required": True, "default": "localhost", "description": ""}, {"name": "port", "type": "int", "required": False, "default": 27017, "description": "MongoDB server port (default 27017)"}, @@ -56,48 +55,46 @@ def auth_instructions() -> str: - Test connection: `mongosh --host [host] --port [port]` """ - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): + def __init__(self, params: dict[str, Any]): self.params = params - self.duck_db_conn = duck_db_conn - + + self.host = self.params.get("host", "localhost") + self.port = int(self.params.get("port", 27017)) + self.username = self.params.get("username", "") + self.password = self.params.get("password", "") + self.database_name = self.params.get("database", "") + self.collection_name = self.params.get("collection", "") + auth_source = self.params.get("authSource", "") or self.database_name + try: - # Create MongoDB client - host = self.params.get("host", "localhost") - port = int(self.params.get("port", 27017)) - username = self.params.get("username", "") - password = self.params.get("password", "") - database = self.params.get("database", "") - collection = self.params.get("collection", "") - auth_source = self.params.get("authSource", "") or database # Default to target database - - if username and password: - # Use authSource to specify which database contains user credentials + if self.username and self.password: self.mongo_client = pymongo.MongoClient( - host=host, - port=port, - username=username, - password=password, + host=self.host, + port=self.port, + username=self.username, + password=self.password, authSource=auth_source ) else: - self.mongo_client = pymongo.MongoClient(host=host, port=port) - - self.db = self.mongo_client[database] - self.database_name = database - - self.collection = self.db[collection] if collection else None - + self.mongo_client = pymongo.MongoClient(host=self.host, port=self.port) + + self.db = self.mongo_client[self.database_name] + self.collection = self.db[self.collection_name] if self.collection_name else None + + logger.info(f"Successfully connected to MongoDB: {self.host}:{self.port}/{self.database_name}") + except Exception as e: - raise Exception(f"Failed to connect to MongoDB: {e}") + logger.error(f"Failed to connect to MongoDB: {e}") + raise RuntimeError(f"Failed to connect to MongoDB: {e}") from e def close(self): - """Close the MongoDB connection""" + """Close the MongoDB connection.""" if hasattr(self, 'mongo_client') and self.mongo_client is not None: try: self.mongo_client.close() self.mongo_client = None except Exception as e: - print(f"Warning: Failed to close MongoDB connection: {e}") + logger.warning(f"Failed to close MongoDB connection: {e}") def __enter__(self): """Context manager entry""" @@ -113,7 +110,7 @@ def __del__(self): self.close() @staticmethod - def _flatten_document(doc: Dict[str, Any], parent_key: str = '', sep: str = '_') -> Dict[str, Any]: + def _flatten_document(doc: dict[str, Any], parent_key: str = '', sep: str = '_') -> dict[str, Any]: """ Use recursion to flatten nested MongoDB documents """ @@ -139,7 +136,7 @@ def _flatten_document(doc: Dict[str, Any], parent_key: str = '', sep: str = '_') return dict(items) @staticmethod - def _convert_special_types(doc: Dict[str, Any]) -> Dict[str, Any]: + def _convert_special_types(doc: dict[str, Any]) -> dict[str, Any]: """ Convert MongoDB special types (ObjectId, datetime, etc.) to serializable types """ @@ -165,7 +162,7 @@ def _convert_special_types(doc: Dict[str, Any]) -> Dict[str, Any]: result[key] = value return result - def _process_documents(self, documents: List[Dict[str, Any]]) -> pd.DataFrame: + def _process_documents(self, documents: list[dict[str, Any]]) -> pd.DataFrame: """ Process MongoDB documents list, flatten and convert to DataFrame """ @@ -180,8 +177,64 @@ def _process_documents(self, documents: List[Dict[str, Any]]) -> pd.DataFrame: df = pd.DataFrame(processed_docs) return df + + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: + """ + Fetch data from MongoDB as a PyArrow Table. + + MongoDB doesn't have native Arrow support, so we fetch documents, + process them, and convert to Arrow format. + + Args: + source_table: Collection name to fetch from + size: Maximum number of documents to fetch + sort_columns: Columns to sort by + sort_order: Sort direction ('asc' or 'desc') + """ + if not source_table: + raise ValueError("source_table (collection name) must be provided") + + # Get collection + collection_name = source_table + # Handle full table names like "database.collection" + if '.' in collection_name: + parts = collection_name.split('.') + collection_name = parts[-1] + + collection = self.db[collection_name] + + logger.info(f"Fetching from MongoDB collection: {collection_name}") + + # Build cursor with optional sorting + data_cursor = collection.find() + if sort_columns and len(sort_columns) > 0: + sort_direction = -1 if sort_order == 'desc' else 1 + sort_spec = [(col, sort_direction) for col in sort_columns] + data_cursor = data_cursor.sort(sort_spec) + data_cursor = data_cursor.limit(size) + + # Fetch and process documents + data_list = list(data_cursor) + if not data_list: + logger.warning(f"No data found in MongoDB collection '{collection_name}'") + return pa.table({}) + + df = self._process_documents(data_list) + + # Convert to Arrow + arrow_table = pa.Table.from_pandas(df, preserve_index=False) + + logger.info(f"Fetched {arrow_table.num_rows} rows from MongoDB collection '{collection_name}'") - def list_tables(self, table_filter: str = None): + return arrow_table + + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: """ List all collections """ @@ -236,192 +289,7 @@ def list_tables(self, table_filter: str = None): "metadata": table_metadata }) except Exception as e: + logger.debug(f"Error listing collection {collection_name}: {e}") continue - - return results - - def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int = 100000, sort_columns: List[str] = None, sort_order: str = 'asc'): - """ - Import MongoDB collection data into DuckDB - """ - # Extract collection name from full table name - parts = table_name.split('.') - if len(parts) >= 3: - collection_name = parts[-1] - else: - collection_name = table_name - - if name_as is None: - name_as = collection_name - - # Get and process data from MongoDB (limit rows) - collection = self.db[collection_name] - - # Build cursor with optional sorting - data_cursor = collection.find() - if sort_columns and len(sort_columns) > 0: - # MongoDB sort format: 1 for ascending, -1 for descending - sort_direction = -1 if sort_order == 'desc' else 1 - sort_spec = [(col, sort_direction) for col in sort_columns] - data_cursor = data_cursor.sort(sort_spec) - data_cursor = data_cursor.limit(size) - - data_list = list(data_cursor) - if not data_list: - raise Exception(f"No data found in MongoDB collection '{collection_name}'.") - df = self._process_documents(data_list) - - name_as = sanitize_table_name(name_as) - - self._load_dataframe_to_duckdb(df, name_as, size) - return - - - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - - self._existed_collections_in_duckdb() - self._difference_collections() - self._preload_all_collections(self.collection.name if self.collection else "") - - result, error_message = validate_sql_query(query) - if not result: - print(error_message) - raise ValueError(error_message) - - result_query = json.loads(self.duck_db_conn.execute(query).df().head(10).to_json(orient="records")) - - self._drop_all_loaded_tables() - - for collection_name, df in self.existed_collections.items(): - self._load_dataframe_to_duckdb(df, collection_name) - - return result_query - - def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: - """ - Create a new table from query results - """ - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - name_as = sanitize_table_name(name_as) - - self._existed_collections_in_duckdb() - self._difference_collections() - self._preload_all_collections(self.collection.name if self.collection else "") - - query_result_df = self.duck_db_conn.execute(query).df() - - self._drop_all_loaded_tables() - - for collection_name, existing_df in self.existed_collections.items(): - self._load_dataframe_to_duckdb(existing_df, collection_name) - - self._load_dataframe_to_duckdb(query_result_df, name_as) - - return query_result_df - - @staticmethod - def _quote_identifier(name: str) -> str: - """ - Safely quote a SQL identifier to prevent SQL injection. - Double quotes are escaped by doubling them. - """ - # Escape any double quotes in the identifier by doubling them - escaped = name.replace('"', '""') - return f'"{escaped}"' - - def _existed_collections_in_duckdb(self): - """ - Return the names and contents of tables already loaded into DuckDB - """ - self.existed_collections = {} - duckdb_tables = self.duck_db_conn.execute("SHOW TABLES").df() - for _, row in duckdb_tables.iterrows(): - collection_name = row['name'] - quoted_name = self._quote_identifier(collection_name) - df = self.duck_db_conn.execute(f"SELECT * FROM {quoted_name}").df() - self.existed_collections[collection_name] = df - - - def _difference_collections(self): - """ - Return the difference between all collections and loaded collections - """ - self.diff_collections = [] - all_collections = set(self.db.list_collection_names()) - loaded_collections = set(self.existed_collections) - diff_collections = all_collections - loaded_collections - self.diff_collections = list(diff_collections) - print(f'Difference collections: {self.diff_collections}') - - def _drop_all_loaded_tables(self): - """ - Drop all tables loaded into DuckDB - """ - for table_name in self.loaded_tables.values(): - try: - quoted_name = self._quote_identifier(table_name) - self.duck_db_conn.execute(f"DROP TABLE IF EXISTS main.{quoted_name}") - print(f"Dropped loaded table: {table_name}") - except Exception as e: - print(f"Warning: Failed to drop table '{table_name}': {e}") - - def _preload_all_collections(self, specified_collection: str = "", size: int = 100000): - """ - Preload all MongoDB collections into DuckDB memory - """ - # Get the list of collections to load - if specified_collection: - collection_names = [specified_collection] - else: - collection_names = self.db.list_collection_names() - - # Record loaded tables - self.loaded_tables = {} - - for collection_name in collection_names: - try: - collection = self.db[collection_name] - - # Get data - data_cursor = collection.find().limit(size) - data_list = list(data_cursor) - - if not data_list: - print(f"Skipping empty collection: {collection_name}") - continue - - df = self._process_documents(data_list) - - # Generate table name - table_name = sanitize_table_name(collection_name) - - # Load into DuckDB - self._load_dataframe_to_duckdb(df, table_name) - - # Record mapping - self.loaded_tables[collection_name] = table_name - print(f"Preloaded collection '{collection_name}' as table '{table_name}' ({len(data_list)} rows)") - - except Exception as e: - print(f"Warning: Failed to preload collection '{collection_name}': {e}") - - def _load_dataframe_to_duckdb(self, df: pd.DataFrame, table_name: str, size: int = 1000000): - """ - Load DataFrame into DuckDB - """ - # Create table using a temporary view - random_suffix = ''.join(rand.choices(string.ascii_letters + string.digits, k=6)) - temp_view_name = f'df_temp_{random_suffix}' - self.duck_db_conn.register(temp_view_name, df) - # Use CREATE OR REPLACE to directly replace existing table - # Quote identifiers to prevent SQL injection - quoted_table_name = self._quote_identifier(table_name) - quoted_temp_view = self._quote_identifier(temp_view_name) - # Ensure size is an integer to prevent injection via size parameter - safe_size = int(size) - self.duck_db_conn.execute(f"CREATE OR REPLACE TABLE main.{quoted_table_name} AS SELECT * FROM {quoted_temp_view} LIMIT {safe_size}") - self.duck_db_conn.execute(f"DROP VIEW {quoted_temp_view}") \ No newline at end of file + return results \ No newline at end of file diff --git a/py-src/data_formulator/data_loader/mssql_data_loader.py b/py-src/data_formulator/data_loader/mssql_data_loader.py index 1f18a794..c73e93a8 100644 --- a/py-src/data_formulator/data_loader/mssql_data_loader.py +++ b/py-src/data_formulator/data_loader/mssql_data_loader.py @@ -1,25 +1,19 @@ import json import logging -from typing import Dict, Any, Optional, List +from typing import Any -import duckdb import pandas as pd - -try: - import pyodbc - PYODBC_AVAILABLE = True -except ImportError: - PYODBC_AVAILABLE = False +import pyarrow as pa +import connectorx as cx from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name -from data_formulator.security import validate_sql_query log = logging.getLogger(__name__) class MSSQLDataLoader(ExternalDataLoader): @staticmethod - def list_params() -> bool: + def list_params() -> list[dict[str, Any]]: params_list = [ { "name": "server", @@ -93,14 +87,14 @@ def auth_instructions() -> str: SQL Server Connection Instructions: 1. Prerequisites: - - Install pyodbc dependencies: + - Install connectorx: pip install connectorx (used for fast Arrow-native data access) + - Install ODBC stack for connectorx: * macOS: brew install unixodbc * Linux: sudo apt-get install unixodbc-dev (Ubuntu/Debian) or sudo yum install unixODBC-devel (CentOS/RHEL) - * Windows: Usually included with pyodbc installation - - Install pyodbc: pip install pyodbc + * Windows: Usually included with ODBC driver installation - Install Microsoft ODBC Driver for SQL Server: * Windows: Usually pre-installed with SQL Server - * macOS: Download from Microsoft's official site or use: brew tap microsoft/mssql-release && brew install msodbcsql17 + * macOS: brew tap microsoft/mssql-release && brew install msodbcsql17 * Linux: Install via package manager (msodbcsql17 or msodbcsql18) 2. Local SQL Server Setup: @@ -128,120 +122,99 @@ def auth_instructions() -> str: - Custom port: server='localhost,1434' (note the comma, not colon) 6. Common Issues & Troubleshooting: - - If pyodbc import fails: Install unixodbc first (macOS/Linux) + - If connectorx fails: Install unixodbc first (macOS/Linux) - Ensure SQL Server service is running - Check SQL Server Browser service for named instances - Verify TCP/IP protocol is enabled in SQL Server Configuration Manager - Check Windows Firewall settings for SQL Server port - - Test connection: `sqlcmd -S server -d database -U username -P password` + - Test connection: sqlcmd -S server -d database -U username -P password - For named instances, ensure SQL Server Browser service is running - - Check ODBC drivers: `odbcinst -q -d` (on Unix/Linux) + - Check ODBC drivers: odbcinst -q -d (on Unix/Linux) 7. Driver Installation: - - macOS: `brew install msodbcsql17` or download from Microsoft - - Ubuntu/Debian: `sudo apt-get install msodbcsql17` - - CentOS/RHEL: `sudo yum install msodbcsql17` + - macOS: brew install msodbcsql17 or download from Microsoft + - Ubuntu/Debian: sudo apt-get install msodbcsql17 + - CentOS/RHEL: sudo yum install msodbcsql17 - Windows: Install SQL Server or download ODBC driver separately """ - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): - log.info("Initializing MSSQL DataLoader with parameters: %s", params) - - if not PYODBC_AVAILABLE: - raise ImportError( - "pyodbc is required for MSSQL connections. " - "Install with: pip install pyodbc\n" - "Note for macOS: You may also need to run 'brew install unixodbc' first.\n" - "For other platforms, see: https://github.com/mkleehammer/pyodbc/wiki" - ) + def __init__(self, params: dict[str, Any]): + log.info(f"Initializing MSSQL DataLoader with parameters: {params}") self.params = params - self.duck_db_conn = duck_db_conn - - # Build connection string for pyodbc - self.connection_string = self._build_connection_string() - log.info("SQL Server connection string built") - - # Test the connection - self._test_connection() - - def _build_connection_string(self) -> str: - """Build ODBC connection string from parameters""" - conn_parts = [] - - # Driver - driver = self.params.get("driver", "ODBC Driver 17 for SQL Server") - conn_parts.append(f"DRIVER={{{driver}}}") - - # Server (handle different server formats) - server = self.params.get("server", "localhost") - port = self.params.get("port", "1433") - - # Handle different server formats - if "\\" in server: - # Named instance format: server\instance - conn_parts.append(f"SERVER={server}") - elif "," in server: - # Port already specified in server: server,port - conn_parts.append(f"SERVER={server}") - else: - # Standard format: add port if not default - if port and port != "1433": - conn_parts.append(f"SERVER={server},{port}") - else: - conn_parts.append(f"SERVER={server}") - - # Database - database = self.params.get("database", "master") - conn_parts.append(f"DATABASE={database}") - - # Authentication - user = self.params.get("user", "").strip() - password = self.params.get("password", "").strip() - - if user: - conn_parts.append(f"UID={user}") - conn_parts.append(f"PWD={password}") - else: - # Use Windows Authentication - conn_parts.append("Trusted_Connection=yes") - # Connection settings - encrypt = self.params.get("encrypt", "yes") - trust_cert = self.params.get("trust_server_certificate", "no") - timeout = self.params.get("connection_timeout", "30") - - conn_parts.append(f"Encrypt={encrypt}") - conn_parts.append(f"TrustServerCertificate={trust_cert}") - conn_parts.append(f"Connection Timeout={timeout}") - - return ";".join(conn_parts) + self.server = params.get("server", "localhost") + self.database = params.get("database", "master") + self.user = params.get("user", "").strip() + self.password = params.get("password", "").strip() + self.port = params.get("port", "1433") + + # Build connection URL for connectorx: mssql://user:password@host:port/database + # - Use explicit empty password (user:@host) when user is set but password is blank. + # - Use 127.0.0.1 when server is localhost to force IPv4 TCP and avoid IPv6 ::1 connection issues. + server_for_url = "127.0.0.1" if (self.server or "").strip().lower() == "localhost" else self.server + if self.user: + self.connection_url = f"mssql://{self.user}:{self.password}@{server_for_url}:{self.port}/{self.database}?TrustServerCertificate=true" + else: + self.connection_url = f"mssql://{server_for_url}:{self.port}/{self.database}?TrustServerCertificate=true&IntegratedSecurity=true" - def _test_connection(self): - """Test the SQL Server connection""" try: - with pyodbc.connect(self.connection_string, timeout=10) as conn: - cursor = conn.cursor() - cursor.execute("SELECT @@VERSION") - version = cursor.fetchone()[0] - log.info(f"SQL Server connection successful. Version: {version[:50]}...") + cx.read_sql(self.connection_url, "SELECT 1", return_type="arrow") + log.info(f"Successfully connected to SQL Server: {self.server}/{self.database}") except Exception as e: - log.error(f"SQL Server connection test failed: {e}") - raise ConnectionError(f"Failed to connect to SQL Server: {e}") + log.error(f"Failed to connect to SQL Server: {e}") + raise ValueError(f"Failed to connect to SQL Server '{self.server}': {e}") from e - def _execute_query(self, query: str) -> pd.DataFrame: - """Execute a query and return results as DataFrame""" + def _execute_query(self, query: str) -> pa.Table: + """Execute a query and return results as a PyArrow Table (via connectorx).""" try: - with pyodbc.connect(self.connection_string) as conn: - return pd.read_sql(query, conn) + return cx.read_sql(self.connection_url, query, return_type="arrow") except Exception as e: log.error(f"Failed to execute query: {e}") raise - def list_tables(self): - """List all tables from SQL Server database""" + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: + """ + Fetch data from SQL Server as a PyArrow Table using connectorx. + """ + if not source_table: + raise ValueError("source_table must be provided") + + # Parse table name + if "." in source_table: + schema, table = source_table.split(".", 1) + else: + schema = "dbo" + table = source_table + + base_query = f"SELECT * FROM [{schema}].[{table}]" + + # Add ORDER BY if sort columns specified + order_by_clause = "" + if sort_columns and len(sort_columns) > 0: + order_direction = "DESC" if sort_order == 'desc' else "ASC" + sanitized_cols = [f'[{col}] {order_direction}' for col in sort_columns] + order_by_clause = f" ORDER BY {', '.join(sanitized_cols)}" + + # SQL Server uses TOP instead of LIMIT + query = f"SELECT TOP {size} * FROM ({base_query}{order_by_clause}) AS limited" + + log.info(f"Executing SQL Server query: {query[:200]}...") + + arrow_table = cx.read_sql(self.connection_url, query, return_type="arrow") + log.info(f"Fetched {arrow_table.num_rows} rows from SQL Server [Arrow-native]") + + return arrow_table + + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: + """List all tables from SQL Server database.""" try: - # Query SQL Server system tables to get table information tables_query = """ SELECT TABLE_SCHEMA, @@ -253,7 +226,7 @@ def list_tables(self): ORDER BY TABLE_SCHEMA, TABLE_NAME """ - tables_df = self._execute_query(tables_query) + tables_df = self._execute_query(tables_query).to_pandas() results = [] for _, row in tables_df.iterrows(): @@ -262,6 +235,9 @@ def list_tables(self): table_type = row.get("TABLE_TYPE", "BASE TABLE") full_table_name = f"{schema}.{table_name}" + if table_filter and table_filter.lower() not in full_table_name.lower(): + continue + try: # Get column information columns_query = f""" @@ -277,7 +253,7 @@ def list_tables(self): WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table_name}' ORDER BY ORDINAL_POSITION """ - columns_df = self._execute_query(columns_query) + columns_df = self._execute_query(columns_query).to_pandas() columns = [] for _, col_row in columns_df.iterrows(): @@ -320,7 +296,7 @@ def list_tables(self): # Get sample data (first 10 rows) sample_query = f"SELECT TOP 10 * FROM [{schema}].[{table_name}]" - sample_df = self._execute_query(sample_query) + sample_df = self._execute_query(sample_query).to_pandas() # Handle NaN values in sample data for JSON serialization try: @@ -339,7 +315,7 @@ def list_tables(self): # Get row count count_query = f"SELECT COUNT(*) as row_count FROM [{schema}].[{table_name}]" - count_df = self._execute_query(count_query) + count_df = self._execute_query(count_query).to_pandas() # Handle NaN values in row count raw_count = count_df.iloc[0]["row_count"] @@ -386,80 +362,3 @@ def list_tables(self): results = [] return results - - def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int = 1000000, sort_columns: List[str] = None, sort_order: str = 'asc'): - """Ingest data from SQL Server table into DuckDB""" - # Parse table name (assuming format: schema.table) - if "." in table_name: - schema, table = table_name.split(".", 1) - else: - schema = "dbo" # Default schema - table = table_name - - if name_as is None: - name_as = table - - name_as = sanitize_table_name(name_as) - - try: - # Build ORDER BY clause if sort_columns are specified - order_by_clause = "" - if sort_columns and len(sort_columns) > 0: - # Use square brackets for SQL Server column quoting - order_direction = "DESC" if sort_order == 'desc' else "ASC" - sanitized_cols = [f'[{col}] {order_direction}' for col in sort_columns] - order_by_clause = f"ORDER BY {', '.join(sanitized_cols)}" - - # Query data from SQL Server with limit - query = f"SELECT TOP {size} * FROM [{schema}].[{table}] {order_by_clause}" - df = self._execute_query(query) - - # Use the base class method to ingest DataFrame into DuckDB - self.ingest_df_to_duckdb(df, name_as) - log.info(f"Successfully ingested {len(df)} rows from {schema}.{table} to {name_as}") - except Exception as e: - log.error(f"Failed to ingest data from {table_name}: {e}") - raise - - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - """Execute a custom query and return sample results""" - try: - # Add TOP 10 if not already present for SELECT queries - modified_query = query.strip() - if ( - modified_query.upper().startswith("SELECT") - and not modified_query.upper().startswith("SELECT TOP") - and "TOP " not in modified_query.upper()[:50] - ): # Check first 50 chars - modified_query = modified_query.replace("SELECT", "SELECT TOP 10", 1) - - result, error_message = validate_sql_query(modified_query) - if not result: - raise ValueError(error_message) - - df = self._execute_query(modified_query) - - # Handle NaN values for JSON serialization - df_clean = df.fillna(value=None) - return json.loads( - df_clean.head(10).to_json(orient="records", date_format="iso", default_handler=str) - ) - except Exception as e: - log.error(f"Failed to execute query sample: {e}") - raise - - def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: - """Execute a custom query and ingest results into DuckDB""" - try: - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - df = self._execute_query(query) - # Use the base class's method to ingest the DataFrame - self.ingest_df_to_duckdb(df, sanitize_table_name(name_as)) - log.info(f"Successfully ingested {len(df)} rows from custom query to {name_as}") - return df - except Exception as e: - log.error(f"Failed to execute and ingest custom query: {e}") - raise diff --git a/py-src/data_formulator/data_loader/mysql_data_loader.py b/py-src/data_formulator/data_loader/mysql_data_loader.py index 0430a57a..f10180c9 100644 --- a/py-src/data_formulator/data_loader/mysql_data_loader.py +++ b/py-src/data_formulator/data_loader/mysql_data_loader.py @@ -1,19 +1,12 @@ import json import logging +from typing import Any import pandas as pd -import duckdb +import pyarrow as pa +import connectorx as cx -from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name - -from data_formulator.security import validate_sql_query -from typing import Dict, Any, Optional, List - -try: - import pymysql - PYMYSQL_AVAILABLE = True -except ImportError: - PYMYSQL_AVAILABLE = False +from data_formulator.data_loader.external_data_loader import ExternalDataLoader logger = logging.getLogger(__name__) @@ -21,7 +14,7 @@ class MySQLDataLoader(ExternalDataLoader): @staticmethod - def list_params() -> List[Dict[str, Any]]: + def list_params() -> list[dict[str, Any]]: params_list = [ {"name": "user", "type": "string", "required": True, "default": "root", "description": ""}, {"name": "password", "type": "string", "required": False, "default": "", "description": "leave blank for no password"}, @@ -58,254 +51,159 @@ def auth_instructions() -> str: - Test connection: `mysql -u [username] -p -h [host] -P [port] [database]` """ - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): - if not PYMYSQL_AVAILABLE: - raise ImportError( - "pymysql is required for MySQL connections. " - "Install with: pip install pymysql" - ) - + def __init__(self, params: dict[str, Any]): self.params = params - self.duck_db_conn = duck_db_conn - - # Get params as-is from frontend - host = self.params.get('host', '') - user = self.params.get('user', '') - password = self.params.get('password', '') - database = self.params.get('database', '') - - # Validate required params - if not host: + + self.host = self.params.get("host", "") + self.user = self.params.get("user", "") + self.password = self.params.get("password", "") + self.database = self.params.get("database", "") + + if not self.host: raise ValueError("MySQL host is required") - if not user: + if not self.user: raise ValueError("MySQL user is required") - if not database: + if not self.database: raise ValueError("MySQL database is required") - - # Handle port (only field with sensible default) - port = self.params.get('port', '') + + port = self.params.get("port", "") if isinstance(port, str): - port = int(port) if port else 3306 + self.port = int(port) if port else 3306 elif not port: - port = 3306 + self.port = 3306 + else: + self.port = int(port) - try: - self.mysql_conn = pymysql.connect( - host=host, - user=user, - password=password, - database=database, - port=port, - cursorclass=pymysql.cursors.DictCursor, - charset='utf8mb4' - ) - self.database = database - logger.info(f"Successfully connected to MySQL database: {self.database}") - except Exception as e: - logger.error(f"Failed to connect to MySQL: {e}") - raise - - def _execute_query(self, query: str, params: tuple = None) -> pd.DataFrame: - """Execute a query using native MySQL connection and return a DataFrame. + # Build connection URL for connectorx + # Format: mysql://user:password@host:port/database + # - Use explicit empty password (user:@host) so the URL parser sees user vs password correctly. + # - Use 127.0.0.1 when host is localhost to force IPv4 TCP and avoid IPv6 ::1 connection issues. + host_for_url = "127.0.0.1" if (self.host or "").strip().lower() == "localhost" else self.host + if self.password: + self.connection_url = f"mysql://{self.user}:{self.password}@{host_for_url}:{self.port}/{self.database}" + else: + self.connection_url = f"mysql://{self.user}:@{host_for_url}:{self.port}/{self.database}" - Args: - query: SQL query string. Use %s for parameterized queries. - params: Optional tuple of parameters for parameterized queries. - """ - try: - with self.mysql_conn.cursor() as cursor: - cursor.execute(query, params) - rows = cursor.fetchall() - if rows: - return pd.DataFrame(rows) - else: - # Return empty DataFrame with column names - return pd.DataFrame() - except Exception as e: - logger.error(f"Error executing MySQL query: {e}") - # Try to reconnect if connection was lost - self._reconnect_if_needed() - raise - - def _reconnect_if_needed(self): - """Attempt to reconnect to MySQL if the connection was lost.""" + self._sanitized_url = f"mysql://{self.user}:***@{self.host}:{self.port}/{self.database}" + + # Test connection try: - self.mysql_conn.ping(reconnect=True) + cx.read_sql(self.connection_url, "SELECT 1", return_type="arrow") except Exception as e: - logger.warning(f"Reconnection attempt failed: {e}") - # Try to create a new connection using stored params - host = self.params.get('host', '') - user = self.params.get('user', '') - password = self.params.get('password', '') - - port = self.params.get('port', '') - if isinstance(port, str): - port = int(port) if port else 3306 - elif not port: - port = 3306 - - self.mysql_conn = pymysql.connect( - host=host, - user=user, - password=password, - database=self.database, - port=port, - cursorclass=pymysql.cursors.DictCursor, - charset='utf8mb4' - ) - - def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: - # Get list of tables from the connected database - # Filter by the specific database we're connected to for better performance - tables_query = """ - SELECT TABLE_SCHEMA, TABLE_NAME - FROM information_schema.tables - WHERE TABLE_SCHEMA = %s - AND TABLE_TYPE = 'BASE TABLE' + logger.error(f"Failed to connect to MySQL (mysql://{self.user}:***@{self.host}:{self.port}/{self.database}): {e}") + raise ValueError(f"Failed to connect to MySQL database '{self.database}' on host '{self.host}': {e}") from e + logger.info(f"Successfully connected to MySQL: mysql://{self.user}:***@{self.host}:{self.port}/{self.database}") + + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: """ - tables_df = self._execute_query(tables_query, (self.database,)) + Fetch data from MySQL as a PyArrow Table using connectorx. - if tables_df.empty: - return [] - - results = [] + connectorx provides extremely fast Arrow-native database access. + """ + if not source_table: + raise ValueError("source_table must be provided") - for _, row in tables_df.iterrows(): - schema = row['TABLE_SCHEMA'] - table_name = row['TABLE_NAME'] - - # Apply table filter if provided - if table_filter and table_filter.lower() not in table_name.lower(): - continue - - full_table_name = f"{schema}.{table_name}" - - try: - # Get column information from MySQL - columns_query = ( - "SELECT COLUMN_NAME, DATA_TYPE " - "FROM information_schema.columns " - "WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s " - "ORDER BY ORDINAL_POSITION" - ) - columns_df = self._execute_query(columns_query, (schema, table_name)) - columns = [{ - 'name': col_row['COLUMN_NAME'], - 'type': col_row['DATA_TYPE'] - } for _, col_row in columns_df.iterrows()] - - # Get sample data - sample_query = "SELECT * FROM `{}`.`{}` LIMIT 10".format(schema, table_name) - sample_df = self._execute_query(sample_query) - sample_rows = json.loads(sample_df.to_json(orient="records", date_format='iso')) - - # Get row count - count_query = "SELECT COUNT(*) as cnt FROM `{}`.`{}`".format(schema, table_name) - count_df = self._execute_query(count_query) - row_count = int(count_df['cnt'].iloc[0]) if not count_df.empty else 0 - - table_metadata = { - "row_count": row_count, - "columns": columns, - "sample_rows": sample_rows - } - - results.append({ - "name": full_table_name, - "metadata": table_metadata - }) - except Exception as e: - logger.warning(f"Error processing table {full_table_name}: {e}") - continue - - return results - - def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int = 1000000, sort_columns: List[str] = None, sort_order: str = 'asc'): - """Fetch data from MySQL and ingest into DuckDB.""" - if name_as is None: - name_as = table_name.split('.')[-1] - - name_as = sanitize_table_name(name_as) - - # Validate and sanitize table name components - sanitized_size = None - try: - sanitized_size = int(size) - if sanitized_size <= 0: - raise ValueError("Size must be a positive integer.") - except Exception: - raise ValueError("Size parameter must be a positive integer.") - - # Build ORDER BY clause if sort_columns are specified + # Handle table names + if '.' in source_table: + base_query = f"SELECT * FROM {source_table}" + else: + base_query = f"SELECT * FROM `{source_table}`" + + # Add ORDER BY if sort columns specified order_by_clause = "" if sort_columns and len(sort_columns) > 0: - # Use backticks for MySQL column quoting order_direction = "DESC" if sort_order == 'desc' else "ASC" sanitized_cols = [f'`{col}` {order_direction}' for col in sort_columns] - order_by_clause = f"ORDER BY {', '.join(sanitized_cols)}" - - if '.' in table_name: - parts = table_name.split('.') - schema = sanitize_table_name(parts[0]) - tbl = sanitize_table_name(parts[1]) - query = f"SELECT * FROM `{schema}`.`{tbl}` {order_by_clause} LIMIT {sanitized_size}" - else: - sanitized_table_name = sanitize_table_name(table_name) - query = f"SELECT * FROM `{sanitized_table_name}` {order_by_clause} LIMIT {sanitized_size}" - - # Fetch data from MySQL - df = self._execute_query(query) + order_by_clause = f" ORDER BY {', '.join(sanitized_cols)}" - if df.empty: - logger.warning(f"No data fetched from table {table_name}") - return + query = f"{base_query}{order_by_clause} LIMIT {size}" - # Ingest into DuckDB using the base class method - self.ingest_df_to_duckdb(df, name_as) - logger.info(f"Successfully ingested {len(df)} rows from {table_name} into DuckDB table {name_as}") - - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) + logger.info(f"Executing MySQL query via connectorx: {query[:200]}...") - # Execute query via native MySQL connection - df = self._execute_query(query) - return json.loads(df.head(10).to_json(orient="records", date_format='iso')) - - def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: - """Execute custom query and ingest results into DuckDB.""" - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) + arrow_table = cx.read_sql(self.connection_url, query, return_type="arrow") - # Execute query via native MySQL connection - df = self._execute_query(query) + logger.info(f"Fetched {arrow_table.num_rows} rows from MySQL [Arrow-native]") - # Ingest into DuckDB using the base class method - self.ingest_df_to_duckdb(df, sanitize_table_name(name_as)) - return df - - def close(self): - """Explicitly close the MySQL connection.""" - if hasattr(self, 'mysql_conn') and self.mysql_conn: - try: - self.mysql_conn.close() - except Exception as e: - logger.warning(f"Error closing MySQL connection: {e}") - - def __enter__(self): - """Support context manager entry.""" - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """Support context manager exit and cleanup.""" - self.close() - - def __del__(self): - """Clean up MySQL connection when the loader is destroyed.""" + return arrow_table + + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: + """List available tables from MySQL database.""" + return self._list_tables_connectorx(table_filter) + + def _list_tables_connectorx(self, table_filter: str | None = None) -> list[dict[str, Any]]: + """List tables using connectorx.""" try: - self.close() - except Exception: - # Ignore errors during destruction to prevent exceptions in garbage collection - pass \ No newline at end of file + tables_query = f""" + SELECT TABLE_SCHEMA, TABLE_NAME + FROM information_schema.tables + WHERE TABLE_SCHEMA = '{self.database}' + AND TABLE_TYPE = 'BASE TABLE' + """ + tables_arrow = cx.read_sql(self.connection_url, tables_query, return_type="arrow") + tables_df = tables_arrow.to_pandas() + + if tables_df.empty: + return [] + + results = [] + + for _, row in tables_df.iterrows(): + schema = row['TABLE_SCHEMA'] + table_name = row['TABLE_NAME'] + + if table_filter and table_filter.lower() not in table_name.lower(): + continue + + full_table_name = f"{schema}.{table_name}" + + try: + # Get column information + columns_query = f""" + SELECT COLUMN_NAME, DATA_TYPE + FROM information_schema.columns + WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table_name}' + ORDER BY ORDINAL_POSITION + """ + columns_arrow = cx.read_sql(self.connection_url, columns_query, return_type="arrow") + columns_df = columns_arrow.to_pandas() + columns = [{ + 'name': col_row['COLUMN_NAME'], + 'type': col_row['DATA_TYPE'] + } for _, col_row in columns_df.iterrows()] + + # Get sample data + sample_query = f"SELECT * FROM `{schema}`.`{table_name}` LIMIT 10" + sample_arrow = cx.read_sql(self.connection_url, sample_query, return_type="arrow") + sample_df = sample_arrow.to_pandas() + sample_rows = json.loads(sample_df.to_json(orient="records", date_format='iso')) + + # Get row count + count_query = f"SELECT COUNT(*) as cnt FROM `{schema}`.`{table_name}`" + count_arrow = cx.read_sql(self.connection_url, count_query, return_type="arrow") + row_count = int(count_arrow.to_pandas()['cnt'].iloc[0]) + + table_metadata = { + "row_count": row_count, + "columns": columns, + "sample_rows": sample_rows + } + + results.append({ + "name": full_table_name, + "metadata": table_metadata + }) + except Exception as e: + logger.warning(f"Error processing table {full_table_name}: {e}") + continue + + return results + + except Exception as e: + logger.error(f"Error listing tables: {e}") + return [] \ No newline at end of file diff --git a/py-src/data_formulator/data_loader/postgresql_data_loader.py b/py-src/data_formulator/data_loader/postgresql_data_loader.py index b327a737..779aaf84 100644 --- a/py-src/data_formulator/data_loader/postgresql_data_loader.py +++ b/py-src/data_formulator/data_loader/postgresql_data_loader.py @@ -1,171 +1,183 @@ -import json - -import pandas as pd -import duckdb - -from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name - -from typing import Dict, Any, List, Optional -from data_formulator.security import validate_sql_query - -class PostgreSQLDataLoader(ExternalDataLoader): - - @staticmethod - def list_params() -> List[Dict[str, Any]]: - params_list = [ - {"name": "user", "type": "string", "required": True, "default": "postgres", "description": "PostgreSQL username"}, - {"name": "password", "type": "string", "required": False, "default": "", "description": "leave blank for no password"}, - {"name": "host", "type": "string", "required": True, "default": "localhost", "description": "PostgreSQL host"}, - {"name": "port", "type": "string", "required": False, "default": "5432", "description": "PostgreSQL port"}, - {"name": "database", "type": "string", "required": True, "default": "postgres", "description": "PostgreSQL database name"} - ] - return params_list - - @staticmethod - def auth_instructions() -> str: - return "Provide your PostgreSQL connection details. The user must have SELECT permissions on the tables you want to access." - - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): - self.params = params - self.duck_db_conn = duck_db_conn - - # Get params as-is from frontend - host = self.params.get('host', '') - port = self.params.get('port', '') or '5432' # Only port has a sensible default - user = self.params.get('user', '') - database = self.params.get('database', '') - password = self.params.get('password', '') - - # Validate required params - if not host: - raise ValueError("PostgreSQL host is required") - if not user: - raise ValueError("PostgreSQL user is required") - if not database: - raise ValueError("PostgreSQL database is required") - - # Create a sanitized version for logging (excludes password) - sanitized_attach_string = f"host={host} port={port} user={user} dbname={database}" - - try: - # Install and load the Postgres extension - self.duck_db_conn.install_extension("postgres") - self.duck_db_conn.load_extension("postgres") - - # Prepare the connection string for Postgres - # Note: attach_string contains sensitive credentials - do not log it - password_part = f" password={password}" if password else "" - attach_string = f"host={host} port={port} user={user}{password_part} dbname={database}" - - # Detach existing postgres connection if it exists - try: - self.duck_db_conn.execute("DETACH mypostgresdb;") - except: - pass # Ignore if connection doesn't exist - - # Register Postgres connection - self.duck_db_conn.execute(f"ATTACH '{attach_string}' AS mypostgresdb (TYPE postgres);") - print(f"Successfully connected to PostgreSQL database: {database}") - - except Exception as e: - # Log error with sanitized connection string to avoid exposing password - error_type = type(e).__name__ - print(f"Failed to connect to PostgreSQL ({sanitized_attach_string}): {error_type}") - raise ValueError(f"Failed to connect to PostgreSQL database '{database}' on host '{host}': {error_type}") - - def list_tables(self): - try: - # Query tables through DuckDB's attached PostgreSQL connection - tables_df = self.duck_db_conn.execute(""" - SELECT table_schema as schemaname, table_name as tablename - FROM mypostgresdb.information_schema.tables - WHERE table_schema NOT IN ('information_schema', 'pg_catalog', 'pg_toast') - AND table_schema NOT LIKE '%_intern%' - AND table_schema NOT LIKE '%timescaledb%' - AND table_name NOT LIKE '%/%' - AND table_type = 'BASE TABLE' - ORDER BY table_schema, table_name - """).fetch_df() - - print(f"Found tables: {tables_df}") - - results = [] - - for schema, table_name in tables_df.values: - full_table_name = f"mypostgresdb.{schema}.{table_name}" - - try: - # Get column information using DuckDB's DESCRIBE - columns_df = self.duck_db_conn.execute(f"DESCRIBE {full_table_name}").df() - columns = [{ - 'name': row['column_name'], - 'type': row['column_type'] - } for _, row in columns_df.iterrows()] - - # Get sample data - sample_df = self.duck_db_conn.execute(f"SELECT * FROM {full_table_name} LIMIT 10").df() - sample_rows = json.loads(sample_df.to_json(orient="records")) - - # Get row count - row_count = self.duck_db_conn.execute(f"SELECT COUNT(*) FROM {full_table_name}").fetchone()[0] - - table_metadata = { - "row_count": row_count, - "columns": columns, - "sample_rows": sample_rows - } - - results.append({ - "name": full_table_name, - "metadata": table_metadata - }) - - except Exception as e: - print(f"Error processing table {full_table_name}: {e}") - continue - - return results - - except Exception as e: - print(f"Error listing tables: {e}") - return [] - - def ingest_data(self, table_name: str, name_as: Optional[str] = None, size: int = 1000000, sort_columns: List[str] = None, sort_order: str = 'asc'): - # Create table in the main DuckDB database from Postgres data - if name_as is None: - name_as = table_name.split('.')[-1] - - name_as = sanitize_table_name(name_as) - - # Build ORDER BY clause if sort_columns are specified - order_by_clause = "" - if sort_columns and len(sort_columns) > 0: - # Sanitize column names to prevent SQL injection - order_direction = "DESC" if sort_order == 'desc' else "ASC" - sanitized_cols = [f'"{col}" {order_direction}' for col in sort_columns] - order_by_clause = f"ORDER BY {', '.join(sanitized_cols)}" - - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM {table_name} - {order_by_clause} - LIMIT {size} - """) - - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - return json.loads(self.duck_db_conn.execute(query).df().head(10).to_json(orient="records")) - - def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame: - # Execute the query and get results as a DataFrame - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - df = self.duck_db_conn.execute(query).df() - # Use the base class's method to ingest the DataFrame - self.ingest_df_to_duckdb(df, sanitize_table_name(name_as)) - return df +import json +import logging +from typing import Any + +import pandas as pd +import pyarrow as pa +import connectorx as cx + +from data_formulator.data_loader.external_data_loader import ExternalDataLoader + +logger = logging.getLogger(__name__) + + +class PostgreSQLDataLoader(ExternalDataLoader): + + @staticmethod + def list_params() -> list[dict[str, Any]]: + params_list = [ + {"name": "user", "type": "string", "required": True, "default": "postgres", "description": "PostgreSQL username"}, + {"name": "password", "type": "string", "required": False, "default": "", "description": "leave blank for no password"}, + {"name": "host", "type": "string", "required": True, "default": "localhost", "description": "PostgreSQL host"}, + {"name": "port", "type": "string", "required": False, "default": "5432", "description": "PostgreSQL port"}, + {"name": "database", "type": "string", "required": True, "default": "postgres", "description": "PostgreSQL database name"} + ] + return params_list + + @staticmethod + def auth_instructions() -> str: + return "Provide your PostgreSQL connection details. The user must have SELECT permissions on the tables you want to access. Uses connectorx for fast Arrow-native data access." + + def __init__(self, params: dict[str, Any]): + self.params = params + + self.host = self.params.get("host", "") + self.port = self.params.get("port", "") or "5432" + self.user = self.params.get("user", "") + self.database = self.params.get("database", "") + self.password = self.params.get("password", "") + + if not self.host: + raise ValueError("PostgreSQL host is required") + if not self.user: + raise ValueError("PostgreSQL user is required") + if not self.database: + raise ValueError("PostgreSQL database is required") + + # Build connection URL for connectorx: postgresql://user:password@host:port/database + # - Use explicit empty password (user:@host) so the URL parser sees user vs password correctly. + # - Use 127.0.0.1 when host is localhost to force IPv4 TCP and avoid IPv6 ::1 connection issues. + host_for_url = "127.0.0.1" if (self.host or "").strip().lower() == "localhost" else self.host + if self.password: + self.connection_url = f"postgresql://{self.user}:{self.password}@{host_for_url}:{self.port}/{self.database}" + else: + self.connection_url = f"postgresql://{self.user}:@{host_for_url}:{self.port}/{self.database}" + + try: + cx.read_sql(self.connection_url, "SELECT 1", return_type="arrow") + except Exception as e: + logger.error(f"Failed to connect to PostgreSQL (postgresql://{self.user}:***@{self.host}:{self.port}/{self.database}): {e}") + raise ValueError(f"Failed to connect to PostgreSQL database '{self.database}' on host '{self.host}': {e}") from e + logger.info(f"Successfully connected to PostgreSQL: postgresql://{self.user}:***@{self.host}:{self.port}/{self.database}") + + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: + """ + Fetch data from PostgreSQL as a PyArrow Table using connectorx. + + connectorx provides extremely fast Arrow-native database access, + typically 2-10x faster than pandas-based approaches. + """ + if not source_table: + raise ValueError("source_table must be provided") + + # Handle table names like "mypostgresdb.schema.table" -> "schema.table" + table_ref = source_table + if source_table.startswith("mypostgresdb."): + table_ref = source_table[len("mypostgresdb."):] + base_query = f"SELECT * FROM {table_ref}" + + # Add ORDER BY if sort columns specified + order_by_clause = "" + if sort_columns and len(sort_columns) > 0: + order_direction = "DESC" if sort_order == 'desc' else "ASC" + sanitized_cols = [f'"{col}" {order_direction}' for col in sort_columns] + order_by_clause = f" ORDER BY {', '.join(sanitized_cols)}" + + # Build full query with limit + query = f"{base_query}{order_by_clause} LIMIT {size}" + + logger.info(f"Executing PostgreSQL query via connectorx: {query[:200]}...") + + # Execute with connectorx - returns Arrow table directly + arrow_table = cx.read_sql(self.connection_url, query, return_type="arrow") + + logger.info(f"Fetched {arrow_table.num_rows} rows from PostgreSQL [Arrow-native]") + + return arrow_table + + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: + """List available tables from PostgreSQL.""" + return self._list_tables_connectorx(table_filter) + + def _list_tables_connectorx(self, table_filter: str | None = None) -> list[dict[str, Any]]: + """List tables using connectorx.""" + try: + # Query tables from information_schema + query = """ + SELECT table_schema as schemaname, table_name as tablename + FROM information_schema.tables + WHERE table_schema NOT IN ('information_schema', 'pg_catalog', 'pg_toast') + AND table_schema NOT LIKE '%_intern%' + AND table_schema NOT LIKE '%timescaledb%' + AND table_name NOT LIKE '%/%' + AND table_type = 'BASE TABLE' + ORDER BY table_schema, table_name + """ + tables_arrow = cx.read_sql(self.connection_url, query, return_type="arrow") + tables_df = tables_arrow.to_pandas() + + logger.info(f"Found {len(tables_df)} tables") + + results = [] + + for _, row in tables_df.iterrows(): + schema = row['schemaname'] + table_name = row['tablename'] + full_table_name = f"{schema}.{table_name}" + + # Apply filter if provided + if table_filter and table_filter.lower() not in full_table_name.lower(): + continue + + try: + # Get column information + columns_query = f""" + SELECT column_name, data_type + FROM information_schema.columns + WHERE table_schema = '{schema}' AND table_name = '{table_name}' + ORDER BY ordinal_position + """ + columns_arrow = cx.read_sql(self.connection_url, columns_query, return_type="arrow") + columns_df = columns_arrow.to_pandas() + columns = [{ + 'name': col_row['column_name'], + 'type': col_row['data_type'] + } for _, col_row in columns_df.iterrows()] + + # Get sample data + sample_query = f'SELECT * FROM "{schema}"."{table_name}" LIMIT 10' + sample_arrow = cx.read_sql(self.connection_url, sample_query, return_type="arrow") + sample_df = sample_arrow.to_pandas() + sample_rows = json.loads(sample_df.to_json(orient="records")) + + # Get row count + count_query = f'SELECT COUNT(*) as cnt FROM "{schema}"."{table_name}"' + count_arrow = cx.read_sql(self.connection_url, count_query, return_type="arrow") + row_count = count_arrow.to_pandas()['cnt'].iloc[0] + + table_metadata = { + "row_count": int(row_count), + "columns": columns, + "sample_rows": sample_rows + } + + results.append({ + "name": full_table_name, + "metadata": table_metadata + }) + + except Exception as e: + logger.warning(f"Error processing table {full_table_name}: {e}") + continue + + return results + + except Exception as e: + logger.error(f"Error listing tables: {e}") + return [] diff --git a/py-src/data_formulator/data_loader/s3_data_loader.py b/py-src/data_formulator/data_loader/s3_data_loader.py index d92b7c41..7e703be1 100644 --- a/py-src/data_formulator/data_loader/s3_data_loader.py +++ b/py-src/data_formulator/data_loader/s3_data_loader.py @@ -1,22 +1,23 @@ import json +import logging +from typing import Any + +import boto3 import pandas as pd -import duckdb -import os +import pyarrow as pa +import pyarrow.csv as pa_csv +import pyarrow.parquet as pq +from pyarrow import fs as pa_fs + +from data_formulator.data_loader.external_data_loader import ExternalDataLoader -from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name -from typing import Dict, Any, List -from data_formulator.security import validate_sql_query +logger = logging.getLogger(__name__) -try: - import boto3 - BOTO3_AVAILABLE = True -except ImportError: - BOTO3_AVAILABLE = False class S3DataLoader(ExternalDataLoader): @staticmethod - def list_params() -> List[Dict[str, Any]]: + def list_params() -> list[dict[str, Any]]: params_list = [ {"name": "aws_access_key_id", "type": "string", "required": True, "default": "", "description": "AWS access key ID"}, {"name": "aws_secret_access_key", "type": "string", "required": True, "default": "", "description": "AWS secret access key"}, @@ -63,38 +64,78 @@ def auth_instructions() -> str: **Security:** Never share secret keys, rotate regularly, use least privilege permissions. """ - def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection): - if not BOTO3_AVAILABLE: - raise ImportError( - "boto3 is required for S3 connections. " - "Install with: pip install boto3" - ) - + def __init__(self, params: dict[str, Any]): self.params = params - self.duck_db_conn = duck_db_conn - - # Extract parameters + self.aws_access_key_id = params.get("aws_access_key_id", "") self.aws_secret_access_key = params.get("aws_secret_access_key", "") self.aws_session_token = params.get("aws_session_token", "") self.region_name = params.get("region_name", "us-east-1") self.bucket = params.get("bucket", "") + + self.s3_fs = pa_fs.S3FileSystem( + access_key=self.aws_access_key_id, + secret_key=self.aws_secret_access_key, + session_token=self.aws_session_token if self.aws_session_token else None, + region=self.region_name, + ) + logger.info(f"Initialized PyArrow S3 filesystem for bucket: {self.bucket}") + + def fetch_data_as_arrow( + self, + source_table: str, + size: int = 1000000, + sort_columns: list[str] | None = None, + sort_order: str = 'asc' + ) -> pa.Table: + """ + Fetch data from S3 as a PyArrow Table using PyArrow's native S3 filesystem. + + For files (parquet, csv), reads directly using PyArrow. + """ + if not source_table: + raise ValueError("source_table (S3 URL) must be provided") + + s3_url = source_table + + # Parse S3 URL: s3://bucket/key -> bucket/key for PyArrow + if s3_url.startswith("s3://"): + s3_path = s3_url[5:] # Remove "s3://" + else: + s3_path = f"{self.bucket}/{s3_url}" + + logger.info(f"Reading S3 file via PyArrow: {s3_url}") + + # Read based on file extension + if s3_url.lower().endswith('.parquet'): + arrow_table = pq.read_table(s3_path, filesystem=self.s3_fs) + elif s3_url.lower().endswith('.csv'): + with self.s3_fs.open_input_file(s3_path) as f: + arrow_table = pa_csv.read_csv(f) + elif s3_url.lower().endswith('.json') or s3_url.lower().endswith('.jsonl'): + import pyarrow.json as pa_json + with self.s3_fs.open_input_file(s3_path) as f: + arrow_table = pa_json.read_json(f) + else: + raise ValueError(f"Unsupported file type: {s3_url}") + + # Apply sorting if specified + if sort_columns and len(sort_columns) > 0: + df = arrow_table.to_pandas() + ascending = sort_order != 'desc' + df = df.sort_values(by=sort_columns, ascending=ascending) + arrow_table = pa.Table.from_pandas(df, preserve_index=False) - # Install and load the httpfs extension for S3 access - self.duck_db_conn.install_extension("httpfs") - self.duck_db_conn.load_extension("httpfs") + # Apply size limit + if arrow_table.num_rows > size: + arrow_table = arrow_table.slice(0, size) - # Set AWS credentials for DuckDB - self.duck_db_conn.execute(f"SET s3_region='{self.region_name}'") - self.duck_db_conn.execute(f"SET s3_access_key_id='{self.aws_access_key_id}'") - self.duck_db_conn.execute(f"SET s3_secret_access_key='{self.aws_secret_access_key}'") - if self.aws_session_token: # Add this block - self.duck_db_conn.execute(f"SET s3_session_token='{self.aws_session_token}'") - - def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: - # Use boto3 to list objects in the bucket - import boto3 + logger.info(f"Fetched {arrow_table.num_rows} rows from S3 [Arrow-native]") + return arrow_table + + def list_tables(self, table_filter: str | None = None) -> list[dict[str, Any]]: + """List available files from S3 bucket.""" s3_client = boto3.client( 's3', aws_access_key_id=self.aws_access_key_id, @@ -103,7 +144,6 @@ def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: region_name=self.region_name ) - # List objects in the bucket response = s3_client.list_objects_v2(Bucket=self.bucket) results = [] @@ -112,36 +152,24 @@ def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: for obj in response['Contents']: key = obj['Key'] - # Skip directories and non-data files if key.endswith('/') or not self._is_supported_file(key): continue - # Apply table filter if provided if table_filter and table_filter.lower() not in key.lower(): continue - # Create S3 URL s3_url = f"s3://{self.bucket}/{key}" try: - # Choose the appropriate read function based on file extension - if s3_url.lower().endswith('.parquet'): - sample_df = self.duck_db_conn.execute(f"SELECT * FROM read_parquet('{s3_url}') LIMIT 10").df() - elif s3_url.lower().endswith('.json') or s3_url.lower().endswith('.jsonl'): - sample_df = self.duck_db_conn.execute(f"SELECT * FROM read_json_auto('{s3_url}') LIMIT 10").df() - elif s3_url.lower().endswith('.csv'): # Default to CSV for other formats - sample_df = self.duck_db_conn.execute(f"SELECT * FROM read_csv_auto('{s3_url}') LIMIT 10").df() + sample_table = self._read_sample_arrow(s3_url, 10) + sample_df = sample_table.to_pandas() - # Get column information columns = [{ 'name': col, 'type': str(sample_df[col].dtype) } for col in sample_df.columns] - # Get sample data sample_rows = json.loads(sample_df.to_json(orient="records")) - - # Estimate row count (this is approximate for CSV files) row_count = self._estimate_row_count(s3_url) table_metadata = { @@ -155,83 +183,45 @@ def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]: "metadata": table_metadata }) except Exception as e: - # Skip files that can't be read - print(f"Error reading {s3_url}: {e}") + logger.warning(f"Error reading {s3_url}: {e}") continue return results + def _read_sample_arrow(self, s3_url: str, limit: int) -> pa.Table: + """Read sample data using PyArrow S3 filesystem.""" + s3_path = s3_url[5:] if s3_url.startswith("s3://") else s3_url + + if s3_url.lower().endswith('.parquet'): + table = pq.read_table(s3_path, filesystem=self.s3_fs) + elif s3_url.lower().endswith('.csv'): + with self.s3_fs.open_input_file(s3_path) as f: + table = pa_csv.read_csv(f) + elif s3_url.lower().endswith('.json') or s3_url.lower().endswith('.jsonl'): + import pyarrow.json as pa_json + with self.s3_fs.open_input_file(s3_path) as f: + table = pa_json.read_json(f) + else: + raise ValueError(f"Unsupported file type: {s3_url}") + + return table.slice(0, limit) if table.num_rows > limit else table + def _is_supported_file(self, key: str) -> bool: - """Check if the file type is supported by DuckDB.""" - supported_extensions = ['.csv', '.parquet', '.json', '.jsonl'] + """Check if the file type is supported (CSV, Parquet, JSON).""" + supported_extensions = [".csv", ".parquet", ".json", ".jsonl"] return any(key.lower().endswith(ext) for ext in supported_extensions) def _estimate_row_count(self, s3_url: str) -> int: """Estimate the number of rows in a file.""" try: - # For parquet files, we can get the exact count + # For parquet files, use PyArrow metadata for exact count if s3_url.lower().endswith('.parquet'): - count = self.duck_db_conn.execute(f"SELECT COUNT(*) FROM read_parquet('{s3_url}')").fetchone()[0] - return count + s3_path = s3_url[5:] if s3_url.startswith("s3://") else s3_url + parquet_file = pq.ParquetFile(s3_path, filesystem=self.s3_fs) + return parquet_file.metadata.num_rows - # For CSV, JSON, and JSONL files, we'll skip row count - if s3_url.lower().endswith('.csv') or s3_url.lower().endswith('.json') or s3_url.lower().endswith('.jsonl'): - return 0 - except Exception as e: - print(f"Error estimating row count for {s3_url}: {e}") + # For CSV, JSON, and JSONL files, skip row count for efficiency return 0 - - def ingest_data(self, table_name: str, name_as: str = None, size: int = 1000000, sort_columns: List[str] = None, sort_order: str = 'asc'): - if name_as is None: - name_as = table_name.split('/')[-1].split('.')[0] - - name_as = sanitize_table_name(name_as) - - # Build ORDER BY clause if sort_columns are specified - order_by_clause = "" - if sort_columns and len(sort_columns) > 0: - order_direction = "DESC" if sort_order == 'desc' else "ASC" - sanitized_cols = [f'"{col}" {order_direction}' for col in sort_columns] - order_by_clause = f"ORDER BY {', '.join(sanitized_cols)}" - - # Determine file type and use appropriate DuckDB function - if table_name.lower().endswith('.csv'): - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM read_csv_auto('{table_name}') - {order_by_clause} - LIMIT {size} - """) - elif table_name.lower().endswith('.parquet'): - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM read_parquet('{table_name}') - {order_by_clause} - LIMIT {size} - """) - elif table_name.lower().endswith('.json') or table_name.lower().endswith('.jsonl'): - self.duck_db_conn.execute(f""" - CREATE OR REPLACE TABLE main.{name_as} AS - SELECT * FROM read_json_auto('{table_name}') - {order_by_clause} - LIMIT {size} - """) - else: - raise ValueError(f"Unsupported file type: {table_name}") - - def view_query_sample(self, query: str) -> List[Dict[str, Any]]: - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - return json.loads(self.duck_db_conn.execute(query).df().head(10).to_json(orient="records")) - - def ingest_data_from_query(self, query: str, name_as: str): - # Execute the query and get results as a DataFrame - result, error_message = validate_sql_query(query) - if not result: - raise ValueError(error_message) - - df = self.duck_db_conn.execute(query).df() - # Use the base class's method to ingest the DataFrame - self.ingest_df_to_duckdb(df, sanitize_table_name(name_as)) \ No newline at end of file + except Exception as e: + logger.warning(f"Error estimating row count for {s3_url}: {e}") + return 0 \ No newline at end of file diff --git a/py-src/data_formulator/datalake/__init__.py b/py-src/data_formulator/datalake/__init__.py new file mode 100644 index 00000000..c65216ea --- /dev/null +++ b/py-src/data_formulator/datalake/__init__.py @@ -0,0 +1,106 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Data Lake module for Data Formulator. + +This module provides a unified data management layer that: +- Manages user workspaces with identity-based directories +- Stores user-uploaded files as-is (CSV, Excel, TXT, HTML, JSON, PDF) +- Stores data from external loaders as parquet via pyarrow +- Tracks all data sources in a workspace.yaml metadata file + +Example usage: + + from data_formulator.datalake import Workspace, save_uploaded_file, write_parquet + + # Get or create a workspace for a user + workspace = Workspace("user:123") + + # Save an uploaded file + with open("sales.csv", "rb") as f: + metadata = save_uploaded_file(workspace, f.read(), "sales.csv") + + # Write a DataFrame as parquet (typically from data loaders) + import pandas as pd + df = pd.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]}) + metadata = write_parquet(workspace, df, "customers") + + # List tables in workspace + tables = workspace.list_tables() + + # Read parquet back + df = read_parquet(workspace, "customers") +""" + +# Workspace management +from data_formulator.datalake.workspace import ( + Workspace, + WorkspaceWithTempData, + get_workspace, + get_default_workspace_root, + DATALAKE_ROOT_ENV, +) + +# Metadata types and operations +from data_formulator.datalake.metadata import ( + TableMetadata, + ColumnInfo, + WorkspaceMetadata, + load_metadata, + save_metadata, + metadata_exists, + METADATA_VERSION, + METADATA_FILENAME, +) + +# File operations (for user uploads) +from data_formulator.datalake.file_manager import ( + save_uploaded_file, + save_uploaded_file_from_path, + is_supported_file, + get_file_type, + get_file_info, + SUPPORTED_EXTENSIONS, +) + +# Parquet operations (for data loaders) +from data_formulator.datalake.parquet_manager import ( + write_parquet, + read_parquet, + get_parquet_schema, + refresh_parquet, + DEFAULT_COMPRESSION, +) + +__all__ = [ + # Workspace + "Workspace", + "WorkspaceWithTempData", + "get_workspace", + "get_default_workspace_root", + "DATALAKE_ROOT_ENV", + # Metadata + "TableMetadata", + "ColumnInfo", + "WorkspaceMetadata", + "load_metadata", + "save_metadata", + "metadata_exists", + "METADATA_VERSION", + "METADATA_FILENAME", + # File manager + "save_uploaded_file", + "save_uploaded_file_from_path", + "get_supported_extensions", + "is_supported_file", + "get_file_type", + "get_file_info", + "SUPPORTED_EXTENSIONS", + # Parquet manager + "write_parquet", + "read_parquet", + "get_parquet_schema", + "refresh_parquet", + "DEFAULT_COMPRESSION", +] diff --git a/py-src/data_formulator/datalake/file_manager.py b/py-src/data_formulator/datalake/file_manager.py new file mode 100644 index 00000000..bab8200d --- /dev/null +++ b/py-src/data_formulator/datalake/file_manager.py @@ -0,0 +1,320 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +File manager for user-uploaded files in the Data Lake. + +This module handles storing user-uploaded files (CSV, Excel, TXT, HTML, JSON, PDF) +as-is in the workspace without conversion. +""" + +import hashlib +import logging +import os +from datetime import datetime, timezone +from pathlib import Path +from typing import BinaryIO, Union + +from data_formulator.datalake.metadata import TableMetadata, make_json_safe +from data_formulator.datalake.workspace import Workspace + +logger = logging.getLogger(__name__) + +# Supported file extensions for upload +SUPPORTED_EXTENSIONS = { + '.csv': 'csv', + '.xlsx': 'excel', + '.xls': 'excel', + '.txt': 'txt', + '.html': 'html', + '.htm': 'html', + '.json': 'json', + '.pdf': 'pdf', +} + + +def is_supported_file(filename: str) -> bool: + ext = Path(filename).suffix.lower() + return ext in SUPPORTED_EXTENSIONS + + +def get_file_type(filename: str) -> str | None: + """ + Get the file type based on extension. + + Args: + filename: Name of the file + + Returns: + File type string (e.g., 'csv', 'excel') or None if unsupported + """ + ext = Path(filename).suffix.lower() + return SUPPORTED_EXTENSIONS.get(ext) + + +def compute_file_hash(content: bytes) -> str: + """ + Compute MD5 hash of file content. + + Args: + content: File content as bytes + + Returns: + MD5 hash as hex string + """ + return hashlib.md5(content).hexdigest() + + +def sanitize_table_name(name: str) -> str: + """ + Sanitize a string to be a valid table name. + + Args: + name: Original name + + Returns: + Sanitized name suitable for use as a table identifier + """ + # Remove extension if present + name = Path(name).stem + + # Replace invalid characters with underscores + sanitized = [] + for char in name: + if char.isalnum() or char == '_': + sanitized.append(char) + else: + sanitized.append('_') + + result = ''.join(sanitized) + + # Ensure it starts with a letter or underscore + if result and not (result[0].isalpha() or result[0] == '_'): + result = '_' + result + + # Ensure it's not empty + if not result: + result = '_unnamed' + + return result.lower() + + +def generate_unique_filename( + workspace: Workspace, + desired_filename: str, +) -> str: + """ + Generate a unique filename if the desired one already exists. + + Args: + workspace: The workspace to check + desired_filename: The desired filename + + Returns: + A unique filename (may be the original if it doesn't exist) + """ + if not workspace.file_exists(desired_filename): + return desired_filename + + # Split filename and extension + path = Path(desired_filename) + stem = path.stem + suffix = path.suffix + + # Try adding numbers until we find a unique name + counter = 1 + while True: + new_filename = f"{stem}_{counter}{suffix}" + if not workspace.file_exists(new_filename): + return new_filename + counter += 1 + if counter > 1000: # Safety limit + raise ValueError(f"Could not generate unique filename for {desired_filename}") + + +def save_uploaded_file( + workspace: Workspace, + file_content: Union[bytes, BinaryIO], + filename: str, + table_name: str | None = None, + overwrite: bool = False, +) -> TableMetadata: + """ + Save an uploaded file to the workspace. + + The file is stored as-is without conversion. Metadata is added to track + the file in the workspace. + + Args: + workspace: The workspace to save to + file_content: File content as bytes or file-like object + filename: Original filename (used for extension detection) + table_name: Name to use for the table. If None, derived from filename. + overwrite: If True, overwrite existing file. If False, generate unique name. + + Returns: + TableMetadata for the saved file + + Raises: + ValueError: If file type is not supported + """ + # Validate file type + file_type = get_file_type(filename) + if file_type is None: + raise ValueError( + f"Unsupported file type: {filename}. " + f"Supported extensions: {', '.join(SUPPORTED_EXTENSIONS.keys())}" + ) + + # Read content if it's a file-like object + if hasattr(file_content, 'read'): + content = file_content.read() + else: + content = file_content + + # Best-effort preview sample rows for supported structured formats. + # (Never fail the upload if parsing fails.) + sample_rows = None + try: + import pandas as pd + from io import BytesIO + + sample_limit = 50 + bio = BytesIO(content) + if file_type == "csv": + df_sample = pd.read_csv(bio, nrows=sample_limit) + elif file_type == "excel": + df_sample = pd.read_excel(bio, nrows=sample_limit) + elif file_type == "json": + # pd.read_json reads entire input; cap after load. + df_sample = pd.read_json(bio).head(sample_limit) + else: + df_sample = None + + if df_sample is not None: + # Replace NaN/NaT with None for JSON/YAML friendliness + df_sample = df_sample.astype(object).where(pd.notnull(df_sample), None) + sample_rows = make_json_safe(df_sample.to_dict(orient="records")) + except Exception: + sample_rows = None + + # Determine the actual filename to use + if overwrite: + actual_filename = filename + else: + actual_filename = generate_unique_filename(workspace, filename) + + # Determine table name + if table_name is None: + table_name = sanitize_table_name(actual_filename) + + # Ensure table name is unique in metadata + metadata = workspace.get_metadata() + if table_name in metadata.tables and not overwrite: + # Generate unique table name + base_name = table_name + counter = 1 + while table_name in metadata.tables: + table_name = f"{base_name}_{counter}" + counter += 1 + + # Write the file + file_path = workspace.get_file_path(actual_filename) + with open(file_path, 'wb') as f: + f.write(content) + + # Compute hash and size + content_hash = compute_file_hash(content) + file_size = len(content) + + # Create metadata + table_metadata = TableMetadata( + name=table_name, + source_type="upload", + filename=actual_filename, + file_type=file_type, + created_at=datetime.now(timezone.utc), + content_hash=content_hash, + file_size=file_size, + sample_rows=sample_rows, + ) + + # Save metadata + workspace.add_table_metadata(table_metadata) + + logger.info( + f"Saved uploaded file {actual_filename} as table {table_name} " + f"({file_size} bytes, hash={content_hash[:8]}...)" + ) + + return table_metadata + + +def save_uploaded_file_from_path( + workspace: Workspace, + source_path: Union[str, Path], + table_name: str | None = None, + overwrite: bool = False, +) -> TableMetadata: + """ + Save a file from a local path to the workspace. + + Args: + workspace: The workspace to save to + source_path: Path to the source file + table_name: Name to use for the table. If None, derived from filename. + overwrite: If True, overwrite existing file. + + Returns: + TableMetadata for the saved file + """ + source_path = Path(source_path) + + if not source_path.exists(): + raise FileNotFoundError(f"Source file not found: {source_path}") + + with open(source_path, 'rb') as f: + content = f.read() + + return save_uploaded_file( + workspace=workspace, + file_content=content, + filename=source_path.name, + table_name=table_name, + overwrite=overwrite, + ) + + +def get_file_info(workspace: Workspace, table_name: str) -> dict | None: + """ + Get information about an uploaded file. + + Args: + workspace: The workspace + table_name: Name of the table + + Returns: + Dictionary with file information or None if not found + """ + table_meta = workspace.get_table_metadata(table_name) + if table_meta is None: + return None + + file_path = workspace.get_file_path(table_meta.filename) + + result = { + "table_name": table_name, + "filename": table_meta.filename, + "file_type": table_meta.file_type, + "file_size": table_meta.file_size, + "content_hash": table_meta.content_hash, + "created_at": table_meta.created_at.isoformat(), + "exists": file_path.exists(), + } + + if file_path.exists(): + stat = file_path.stat() + result["current_size"] = stat.st_size + result["modified_at"] = datetime.fromtimestamp(stat.st_mtime).isoformat() + + return result diff --git a/py-src/data_formulator/datalake/metadata.py b/py-src/data_formulator/datalake/metadata.py new file mode 100644 index 00000000..3221f90d --- /dev/null +++ b/py-src/data_formulator/datalake/metadata.py @@ -0,0 +1,299 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Metadata management for the Data Lake workspace. + +This module defines the schema and operations for workspace.yaml, +which tracks all data sources (uploaded files and data loader ingests). +""" + +from dataclasses import dataclass, field, asdict +from datetime import datetime, date, timezone +from decimal import Decimal +from pathlib import Path +from typing import Literal, Any +import yaml +import logging + +logger = logging.getLogger(__name__) + +METADATA_VERSION = "1.1" +METADATA_FILENAME = "workspace.yaml" + + +def make_json_safe(value: Any) -> Any: + """ + Convert a value (possibly containing numpy/pandas/pyarrow scalars) into + a JSON/YAML-safe primitive structure. + """ + if value is None or isinstance(value, (bool, int, float, str)): + return value + + if isinstance(value, (datetime, date)): + return value.isoformat() + + if isinstance(value, Decimal): + return str(value) + + if isinstance(value, Path): + return str(value) + + if isinstance(value, dict): + return {str(k): make_json_safe(v) for k, v in value.items()} + + if isinstance(value, (list, tuple)): + return [make_json_safe(v) for v in value] + + # numpy scalars, pandas scalars, etc. + item = getattr(value, "item", None) + if callable(item): + try: + return make_json_safe(item()) + except Exception: + pass + + return str(value) + + +@dataclass +class ColumnInfo: + """Information about a single column in a table.""" + name: str + dtype: str + + def to_dict(self) -> dict: + return {"name": self.name, "dtype": self.dtype} + + @classmethod + def from_dict(cls, data: dict) -> "ColumnInfo": + return cls(name=data["name"], dtype=data["dtype"]) + + +@dataclass +class TableMetadata: + """Metadata for a single table/file in the workspace.""" + name: str + source_type: Literal["upload", "data_loader"] + filename: str + file_type: str + created_at: datetime + content_hash: str | None = None + file_size: int | None = None + # For data_loader sources: + loader_type: str | None = None + loader_params: dict | None = None + source_table: str | None = None + source_query: str | None = None + last_synced: datetime | None = None + row_count: int | None = None + columns: list[ColumnInfo] | None = None + # Small set of representative rows for previewing (list of records). + sample_rows: list[dict[str, Any]] | None = None + + def to_dict(self) -> dict: + """Convert to dictionary for YAML serialization.""" + result = { + "source_type": self.source_type, + "filename": self.filename, + "file_type": self.file_type, + "created_at": self.created_at.isoformat(), + } + + if self.content_hash is not None: + result["content_hash"] = self.content_hash + if self.file_size is not None: + result["file_size"] = self.file_size + if self.loader_type is not None: + result["loader_type"] = self.loader_type + if self.loader_params is not None: + result["loader_params"] = self.loader_params + if self.source_table is not None: + result["source_table"] = self.source_table + if self.source_query is not None: + result["source_query"] = self.source_query + if self.last_synced is not None: + result["last_synced"] = self.last_synced.isoformat() + if self.row_count is not None: + result["row_count"] = self.row_count + if self.columns is not None: + result["columns"] = [col.to_dict() for col in self.columns] + if self.sample_rows is not None: + result["sample_rows"] = make_json_safe(self.sample_rows) + + return result + + @classmethod + def from_dict(cls, name: str, data: dict) -> "TableMetadata": + """Create from dictionary (YAML deserialization).""" + columns = None + if "columns" in data and data["columns"] is not None: + columns = [ColumnInfo.from_dict(col) for col in data["columns"]] + + created_at = data["created_at"] + if isinstance(created_at, str): + created_at = datetime.fromisoformat(created_at) + + last_synced = data.get("last_synced") + if isinstance(last_synced, str): + last_synced = datetime.fromisoformat(last_synced) + + return cls( + name=name, + source_type=data["source_type"], + filename=data["filename"], + file_type=data["file_type"], + created_at=created_at, + content_hash=data.get("content_hash"), + file_size=data.get("file_size"), + loader_type=data.get("loader_type"), + loader_params=data.get("loader_params"), + source_table=data.get("source_table"), + source_query=data.get("source_query"), + last_synced=last_synced, + row_count=data.get("row_count"), + columns=columns, + sample_rows=data.get("sample_rows"), + ) + + +@dataclass +class WorkspaceMetadata: + """Metadata for the entire workspace.""" + version: str + created_at: datetime + updated_at: datetime + tables: dict[str, TableMetadata] = field(default_factory=dict) + + def add_table(self, table: TableMetadata) -> None: + """Add or update a table in the metadata.""" + self.tables[table.name] = table + self.updated_at = datetime.now(timezone.utc) + + def remove_table(self, name: str) -> bool: + """Remove a table from the metadata. Returns True if removed.""" + if name in self.tables: + del self.tables[name] + self.updated_at = datetime.now(timezone.utc) + return True + return False + + def get_table(self, name: str) -> TableMetadata | None: + """Get metadata for a specific table.""" + return self.tables.get(name) + + def list_tables(self) -> list[str]: + """List all table names.""" + return list(self.tables.keys()) + + def to_dict(self) -> dict: + """Convert to dictionary for YAML serialization.""" + return { + "version": self.version, + "created_at": self.created_at.isoformat(), + "updated_at": self.updated_at.isoformat(), + "tables": { + name: table.to_dict() + for name, table in self.tables.items() + }, + } + + @classmethod + def from_dict(cls, data: dict) -> "WorkspaceMetadata": + """Create from dictionary (YAML deserialization).""" + created_at = data["created_at"] + if isinstance(created_at, str): + created_at = datetime.fromisoformat(created_at) + + updated_at = data["updated_at"] + if isinstance(updated_at, str): + updated_at = datetime.fromisoformat(updated_at) + + tables = {} + tables_data = data.get("tables", {}) + if tables_data: + for name, table_data in tables_data.items(): + tables[name] = TableMetadata.from_dict(name, table_data) + + return cls( + version=data["version"], + created_at=created_at, + updated_at=updated_at, + tables=tables, + ) + + @classmethod + def create_new(cls) -> "WorkspaceMetadata": + """Create a new empty workspace metadata.""" + now = datetime.now(timezone.utc) + return cls( + version=METADATA_VERSION, + created_at=now, + updated_at=now, + tables={}, + ) + + +def load_metadata(workspace_path: Path) -> WorkspaceMetadata: + """ + Load workspace metadata from YAML file. + + Args: + workspace_path: Path to the workspace directory + + Returns: + WorkspaceMetadata object + + Raises: + FileNotFoundError: If metadata file doesn't exist + ValueError: If metadata file is invalid + """ + metadata_file = workspace_path / METADATA_FILENAME + + if not metadata_file.exists(): + raise FileNotFoundError(f"Metadata file not found: {metadata_file}") + + try: + with open(metadata_file, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + + if data is None: + raise ValueError("Empty metadata file") + + return WorkspaceMetadata.from_dict(data) + + except yaml.YAMLError as e: + raise ValueError(f"Invalid YAML in metadata file: {e}") + + +def save_metadata(workspace_path: Path, metadata: WorkspaceMetadata) -> None: + """ + Save workspace metadata to YAML file. + + Args: + workspace_path: Path to the workspace directory + metadata: WorkspaceMetadata object to save + """ + metadata_file = workspace_path / METADATA_FILENAME + + # Update the updated_at timestamp + metadata.updated_at = datetime.now(timezone.utc) + + # Ensure directory exists + workspace_path.mkdir(parents=True, exist_ok=True) + + with open(metadata_file, "w", encoding="utf-8") as f: + yaml.dump( + metadata.to_dict(), + f, + default_flow_style=False, + allow_unicode=True, + sort_keys=False, + ) + + logger.debug(f"Saved metadata to {metadata_file}") + + +def metadata_exists(workspace_path: Path) -> bool: + """Check if workspace metadata file exists.""" + return (workspace_path / METADATA_FILENAME).exists() diff --git a/py-src/data_formulator/datalake/parquet_manager.py b/py-src/data_formulator/datalake/parquet_manager.py new file mode 100644 index 00000000..e26035f1 --- /dev/null +++ b/py-src/data_formulator/datalake/parquet_manager.py @@ -0,0 +1,621 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Parquet manager for the Data Lake. + +This module handles writing DataFrames to parquet files using pyarrow. +Used primarily for data ingested from external data loaders. +""" + +import hashlib +import logging +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Optional + +import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq + +from data_formulator.datalake.metadata import TableMetadata, ColumnInfo, make_json_safe +from data_formulator.datalake.workspace import Workspace + +logger = logging.getLogger(__name__) + +# Default compression for parquet files +DEFAULT_COMPRESSION = "snappy" +# Default number of rows to persist in metadata for preview +DEFAULT_METADATA_SAMPLE_ROWS = 50 + + +def get_sample_rows_from_arrow(table: pa.Table, limit: int = DEFAULT_METADATA_SAMPLE_ROWS) -> list[dict[str, Any]]: + """ + Get a small sample of rows from an Arrow table as JSON/YAML-safe records. + """ + if table.num_rows <= 0 or limit <= 0: + return [] + sample = table.slice(0, min(limit, table.num_rows)) + # Arrow -> python list[dict] + return make_json_safe(sample.to_pylist()) + + +def get_arrow_column_info(table: pa.Table) -> list[ColumnInfo]: + """ + Extract column information from a PyArrow Table. + + Args: + table: PyArrow Table to analyze + + Returns: + List of ColumnInfo objects + """ + columns = [] + for field in table.schema: + columns.append(ColumnInfo(name=field.name, dtype=str(field.type))) + return columns + + +def compute_arrow_table_hash(table: pa.Table, sample_rows: int = 100) -> str: + """ + Compute a hash representing the Arrow Table content. + + Uses row count, column names, and sampled rows for efficiency. + + Args: + table: Arrow Table to hash + sample_rows: Number of rows to sample for hashing + + Returns: + MD5 hash as hex string + """ + hash_parts = [ + f"rows:{table.num_rows}", + f"cols:{','.join(table.column_names)}", + ] + + if table.num_rows > 0: + # Sample rows for hashing + if table.num_rows <= sample_rows: + sample = table + else: + # Take first, last, and random middle rows + n = sample_rows // 3 + indices = ( + list(range(n)) + # first n + list(range(table.num_rows // 4, table.num_rows // 4 + n)) + # middle n + list(range(table.num_rows - n, table.num_rows)) # last n + ) + sample = table.take(indices) + + # Convert sample to string for hashing + hash_parts.append(f"data:{sample.to_string()}") + + content = '|'.join(hash_parts) + return hashlib.md5(content.encode()).hexdigest() + + +def write_parquet_from_arrow( + workspace: Workspace, + table: pa.Table, + table_name: str, + compression: str = DEFAULT_COMPRESSION, + loader_metadata: Optional[dict[str, Any]] = None, +) -> TableMetadata: + """ + Write a PyArrow Table directly to parquet in the workspace. + + This is the preferred method for writing data as it avoids pandas conversion + overhead entirely. Data flows directly: Source → Arrow → Parquet. + + Args: + workspace: The workspace to write to + table: PyArrow Table to write + table_name: Name for the table + compression: Parquet compression algorithm (default: snappy) + loader_metadata: Optional metadata from data loader + + Returns: + TableMetadata for the written file + """ + # Sanitize table name + safe_name = sanitize_table_name(table_name) + filename = f"{safe_name}.parquet" + + # Check if table already exists + metadata = workspace.get_metadata() + if safe_name in metadata.tables: + # Overwrite existing - delete old file first + old_meta = metadata.tables[safe_name] + old_file = workspace.get_file_path(old_meta.filename) + if old_file.exists(): + old_file.unlink() + logger.debug(f"Deleted old parquet file: {old_file}") + + # Write parquet file using pyarrow (direct, no pandas) + file_path = workspace.get_file_path(filename) + + pq.write_table( + table, + file_path, + compression=compression, + ) + + # Get file size + file_size = file_path.stat().st_size + + # Compute content hash from Arrow table + content_hash = compute_arrow_table_hash(table) + + # Get column info from Arrow schema + columns = get_arrow_column_info(table) + + # Get sample rows for preview + sample_rows = get_sample_rows_from_arrow(table) + + # Create metadata + now = datetime.now(timezone.utc) + table_metadata = TableMetadata( + name=safe_name, + source_type="data_loader", + filename=filename, + file_type="parquet", + created_at=now, + content_hash=content_hash, + file_size=file_size, + row_count=table.num_rows, + columns=columns, + sample_rows=sample_rows, + last_synced=now, + ) + + # Add loader metadata if provided + if loader_metadata: + table_metadata.loader_type = loader_metadata.get('loader_type') + table_metadata.loader_params = loader_metadata.get('loader_params') + table_metadata.source_table = loader_metadata.get('source_table') + table_metadata.source_query = loader_metadata.get('source_query') + + # Save metadata + workspace.add_table_metadata(table_metadata) + + logger.info( + f"Wrote parquet file {filename} with {table.num_rows} rows, " + f"{table.num_columns} columns ({file_size} bytes) [Arrow-native]" + ) + + return table_metadata + + +def sanitize_table_name(name: str) -> str: + """ + Sanitize a string to be a valid table/file name. + + Args: + name: Original name + + Returns: + Sanitized name + """ + # Replace invalid characters with underscores + sanitized = [] + for char in name: + if char.isalnum() or char == '_': + sanitized.append(char) + else: + sanitized.append('_') + + result = ''.join(sanitized) + + # Ensure it starts with a letter or underscore + if result and not (result[0].isalpha() or result[0] == '_'): + result = '_' + result + + # Ensure it's not empty + if not result: + result = '_unnamed' + + return result.lower() + + +def get_unique_table_name(workspace: Workspace, base_name: str) -> str: + """ + Return a table name that does not clash with existing tables in the workspace. + + If the sanitized base_name is free, it is returned. Otherwise tries + base_1, base_2, ... until an unused name is found. + + Args: + workspace: The workspace to check for existing table names + base_name: Desired base name (will be sanitized) + + Returns: + A table name that is not yet in the workspace + """ + safe_base = sanitize_table_name(base_name) + existing = set(workspace.list_tables()) + candidate = safe_base + suffix = 0 + while candidate in existing: + suffix += 1 + candidate = f"{safe_base}_{suffix}" + return candidate + + +def compute_dataframe_hash(df: pd.DataFrame, sample_rows: int = 100) -> str: + """ + Compute a hash representing the DataFrame content. + + Uses row count, column names, and sampled rows for efficiency. + + Args: + df: DataFrame to hash + sample_rows: Number of rows to sample for hashing + + Returns: + MD5 hash as hex string + """ + hash_parts = [ + f"rows:{len(df)}", + f"cols:{','.join(df.columns.tolist())}", + ] + + if len(df) > 0: + # Sample rows for hashing + if len(df) <= sample_rows: + sample = df + else: + # Take first, last, and random middle rows + n = sample_rows // 3 + first = df.head(n) + last = df.tail(n) + middle = df.iloc[len(df)//4:len(df)*3//4].sample(min(n, len(df)//2), random_state=42) + sample = pd.concat([first, middle, last]) + + # Convert sample to string for hashing + hash_parts.append(f"data:{sample.to_string()}") + + content = '|'.join(hash_parts) + return hashlib.md5(content.encode()).hexdigest() + + +def get_column_info(df: pd.DataFrame) -> list[ColumnInfo]: + """ + Extract column information from a DataFrame. + + Args: + df: DataFrame to analyze + + Returns: + List of ColumnInfo objects + """ + columns = [] + for col_name in df.columns: + dtype = str(df[col_name].dtype) + columns.append(ColumnInfo(name=str(col_name), dtype=dtype)) + return columns + + +def write_parquet( + workspace: Workspace, + df: pd.DataFrame, + table_name: str, + compression: str = DEFAULT_COMPRESSION, + loader_metadata: Optional[dict[str, Any]] = None, +) -> TableMetadata: + """ + Write a DataFrame to parquet in the workspace. + + Args: + workspace: The workspace to write to + df: DataFrame to write + table_name: Name for the table + compression: Parquet compression algorithm (default: snappy) + loader_metadata: Optional metadata from data loader (loader_type, loader_params, etc.) + + Returns: + TableMetadata for the written file + """ + # Sanitize table name + safe_name = sanitize_table_name(table_name) + filename = f"{safe_name}.parquet" + + # Check if table already exists + metadata = workspace.get_metadata() + if safe_name in metadata.tables: + # Overwrite existing - delete old file first + old_meta = metadata.tables[safe_name] + old_file = workspace.get_file_path(old_meta.filename) + if old_file.exists(): + old_file.unlink() + logger.debug(f"Deleted old parquet file: {old_file}") + + # Write parquet file using pyarrow + file_path = workspace.get_file_path(filename) + + # Convert DataFrame to PyArrow Table + table = pa.Table.from_pandas(df) + + # Get sample rows for preview (before writing) + sample_rows = get_sample_rows_from_arrow(table) + + # Write to parquet + pq.write_table( + table, + file_path, + compression=compression, + ) + + # Get file size + file_size = file_path.stat().st_size + + # Compute content hash + content_hash = compute_dataframe_hash(df) + + # Get column info + columns = get_column_info(df) + + # Create metadata + now = datetime.now(timezone.utc) + table_metadata = TableMetadata( + name=safe_name, + source_type="data_loader", + filename=filename, + file_type="parquet", + created_at=now, + content_hash=content_hash, + file_size=file_size, + row_count=len(df), + columns=columns, + sample_rows=sample_rows, + last_synced=now, + ) + + # Add loader metadata if provided + if loader_metadata: + table_metadata.loader_type = loader_metadata.get('loader_type') + table_metadata.loader_params = loader_metadata.get('loader_params') + table_metadata.source_table = loader_metadata.get('source_table') + table_metadata.source_query = loader_metadata.get('source_query') + + # Save metadata + workspace.add_table_metadata(table_metadata) + + logger.info( + f"Wrote parquet file {filename} with {len(df)} rows, " + f"{len(df.columns)} columns ({file_size} bytes)" + ) + + return table_metadata + + +def read_parquet_as_arrow(workspace: Workspace, table_name: str) -> pa.Table: + """ + Read a parquet file from the workspace as a PyArrow Table. + + This is the preferred method for reading as it avoids pandas conversion. + + Args: + workspace: The workspace to read from + table_name: Name of the table + + Returns: + PyArrow Table with the data + + Raises: + FileNotFoundError: If the parquet file doesn't exist + ValueError: If the table is not a parquet file + """ + # Get table metadata + table_meta = workspace.get_table_metadata(table_name) + if table_meta is None: + raise FileNotFoundError(f"Table not found: {table_name}") + + if table_meta.file_type != "parquet": + raise ValueError( + f"Table {table_name} is not a parquet file " + f"(file_type={table_meta.file_type})" + ) + + file_path = workspace.get_file_path(table_meta.filename) + if not file_path.exists(): + raise FileNotFoundError(f"Parquet file not found: {file_path}") + + # Read parquet file as Arrow table + table = pq.read_table(file_path) + + logger.debug(f"Read parquet file {table_meta.filename}: {table.num_rows} rows [Arrow-native]") + + return table + + +def read_parquet(workspace: Workspace, table_name: str) -> pd.DataFrame: + """ + Read a parquet file from the workspace as a pandas DataFrame. + + For better performance, consider using `read_parquet_as_arrow()` instead. + + Args: + workspace: The workspace to read from + table_name: Name of the table + + Returns: + DataFrame with the data + + Raises: + FileNotFoundError: If the parquet file doesn't exist + ValueError: If the table is not a parquet file + """ + table = read_parquet_as_arrow(workspace, table_name) + return table.to_pandas() + + +def get_parquet_schema(workspace: Workspace, table_name: str) -> dict: + """ + Get the schema of a parquet file without reading all data. + + Args: + workspace: The workspace + table_name: Name of the table + + Returns: + Dictionary with schema information + + Raises: + FileNotFoundError: If the table doesn't exist + """ + table_meta = workspace.get_table_metadata(table_name) + if table_meta is None: + raise FileNotFoundError(f"Table not found: {table_name}") + + if table_meta.file_type != "parquet": + raise ValueError(f"Table {table_name} is not a parquet file") + + file_path = workspace.get_file_path(table_meta.filename) + if not file_path.exists(): + raise FileNotFoundError(f"Parquet file not found: {file_path}") + + # Read schema only + parquet_file = pq.ParquetFile(file_path) + schema = parquet_file.schema_arrow + + return { + "table_name": table_name, + "filename": table_meta.filename, + "num_rows": parquet_file.metadata.num_rows, + "num_columns": len(schema), + "columns": [ + { + "name": field.name, + "type": str(field.type), + "nullable": field.nullable, + } + for field in schema + ], + "created_at": table_meta.created_at.isoformat(), + "last_synced": table_meta.last_synced.isoformat() if table_meta.last_synced else None, + } + + +def get_parquet_path(workspace: Workspace, table_name: str) -> Path: + """ + Return the filesystem path of the parquet file for a table. + + Args: + workspace: The workspace + table_name: Name of the table + + Returns: + Resolved Path to the parquet file + + Raises: + FileNotFoundError: If the table doesn't exist + ValueError: If the table is not a parquet file + """ + table_meta = workspace.get_table_metadata(table_name) + if table_meta is None: + table_meta = workspace.get_table_metadata(sanitize_table_name(table_name)) + if table_meta is None: + raise FileNotFoundError(f"Table not found: {table_name}") + if table_meta.file_type != "parquet": + raise ValueError(f"Table {table_name} is not a parquet file") + path = workspace.get_file_path(table_meta.filename) + if not path.exists(): + raise FileNotFoundError(f"Parquet file not found: {path}") + return path.resolve() + + +def refresh_parquet_from_arrow( + workspace: Workspace, + table_name: str, + table: pa.Table, + compression: str = DEFAULT_COMPRESSION, +) -> tuple[TableMetadata, bool]: + """ + Refresh a parquet file with new data from a PyArrow Table. + + This is the preferred method as it avoids pandas conversion. + Compares content hash to determine if data actually changed. + + Args: + workspace: The workspace + table_name: Name of the table to refresh + table: New PyArrow Table + compression: Parquet compression algorithm + + Returns: + Tuple of (new TableMetadata, bool indicating if data changed) + + Raises: + FileNotFoundError: If the table doesn't exist + """ + # Get existing metadata + old_meta = workspace.get_table_metadata(table_name) + if old_meta is None: + raise FileNotFoundError(f"Table not found: {table_name}") + + # Compute new hash from Arrow table + new_hash = compute_arrow_table_hash(table) + + # Check if data changed + data_changed = old_meta.content_hash != new_hash + + if not data_changed: + # Update last_synced timestamp only + old_meta.last_synced = datetime.now(timezone.utc) + workspace.add_table_metadata(old_meta) + logger.info(f"Table {table_name} unchanged (hash: {new_hash[:8]}...)") + return old_meta, False + + # Data changed - rewrite the file + # Preserve loader metadata from old entry + loader_metadata = { + 'loader_type': old_meta.loader_type, + 'loader_params': old_meta.loader_params, + 'source_table': old_meta.source_table, + 'source_query': old_meta.source_query, + } + + new_meta = write_parquet_from_arrow( + workspace=workspace, + table=table, + table_name=table_name, + compression=compression, + loader_metadata=loader_metadata, + ) + + logger.info( + f"Refreshed table {table_name}: " + f"{old_meta.row_count} -> {new_meta.row_count} rows [Arrow-native]" + ) + + return new_meta, True + + +def refresh_parquet( + workspace: Workspace, + table_name: str, + df: pd.DataFrame, + compression: str = DEFAULT_COMPRESSION, +) -> tuple[TableMetadata, bool]: + """ + Refresh a parquet file with new data from a pandas DataFrame. + + For better performance, consider using `refresh_parquet_from_arrow()` instead. + Compares content hash to determine if data actually changed. + + Args: + workspace: The workspace + table_name: Name of the table to refresh + df: New DataFrame + compression: Parquet compression algorithm + + Returns: + Tuple of (new TableMetadata, bool indicating if data changed) + + Raises: + FileNotFoundError: If the table doesn't exist + """ + # Convert DataFrame to Arrow table + table = pa.Table.from_pandas(df) + return refresh_parquet_from_arrow(workspace, table_name, table, compression) diff --git a/py-src/data_formulator/datalake/workspace.py b/py-src/data_formulator/datalake/workspace.py new file mode 100644 index 00000000..1e32f29c --- /dev/null +++ b/py-src/data_formulator/datalake/workspace.py @@ -0,0 +1,249 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Workspace management for the Data Lake. + +Each user has a workspace directory identified by their identity_id. +The workspace contains all their data files (uploaded and ingested) +plus a workspace.yaml metadata file. +""" + +import os +import shutil +import tempfile +import logging +from pathlib import Path +from typing import Any, Optional + +from data_formulator.datalake.metadata import ( + WorkspaceMetadata, + TableMetadata, + load_metadata, + save_metadata, + metadata_exists, +) + +logger = logging.getLogger(__name__) + +# Environment variable for configuring workspace root +DATALAKE_ROOT_ENV = "DATALAKE_ROOT" + +# Default subdirectory name under temp for workspaces +DEFAULT_WORKSPACE_SUBDIR = "data_formulator_workspaces" + + +def get_default_workspace_root() -> Path: + """ + Get the default workspace root directory. + + Uses DATALAKE_ROOT env variable if set, otherwise uses system temp directory. + """ + env_root = os.getenv(DATALAKE_ROOT_ENV) + if env_root: + return Path(env_root) + return Path(tempfile.gettempdir()) / DEFAULT_WORKSPACE_SUBDIR + + +class Workspace: + """ + Manages a user's workspace directory in the Data Lake. + + The workspace contains: + - workspace.yaml: Metadata file tracking all data sources + - Data files: User uploaded files (CSV, Excel, etc.) and parquet files from data loaders + + All files are stored in a single flat directory per user. + """ + + def __init__(self, identity_id: str, root_dir: Optional[str | Path] = None): + """ + Initialize a workspace for a user. + + Args: + identity_id: Unique identifier for the user (e.g., "user:123" or "browser:abc") + root_dir: Root directory for all workspaces. If None, uses default. + """ + if not identity_id: + raise ValueError("identity_id cannot be empty") + + # Sanitize identity_id for filesystem safety + self._identity_id = identity_id + self._safe_id = self._sanitize_identity_id(identity_id) + + # Determine root directory + if root_dir is None: + self._root = get_default_workspace_root() + else: + self._root = Path(root_dir) + + # Workspace path is root / sanitized_identity_id + self._path = self._root / self._safe_id + + # Ensure workspace directory exists + self._path.mkdir(parents=True, exist_ok=True) + + # Initialize metadata if it doesn't exist + if not metadata_exists(self._path): + self._init_metadata() + + logger.debug(f"Initialized workspace at {self._path}") + + @staticmethod + def _sanitize_identity_id(identity_id: str) -> str: + """ + Sanitize identity_id for use as a directory name. + + Replaces potentially problematic characters with underscores. + """ + # Replace colons, slashes, and other special characters + safe_chars = [] + for char in identity_id: + if char.isalnum() or char in ('_', '-'): + safe_chars.append(char) + else: + safe_chars.append('_') + return ''.join(safe_chars) + + def _init_metadata(self) -> None: + """Initialize a new workspace with empty metadata.""" + metadata = WorkspaceMetadata.create_new() + save_metadata(self._path, metadata) + logger.info(f"Initialized new workspace metadata at {self._path}") + + def get_file_path(self, filename: str) -> Path: + """ + Get the full path for a file in the workspace. + + Args: + filename: Name of the file + + Returns: + Full path to the file + """ + # Prevent directory traversal attacks + safe_filename = Path(filename).name + return self._path / safe_filename + + def file_exists(self, filename: str) -> bool: + """ + Check if a file exists in the workspace. + + Args: + filename: Name of the file + + Returns: + True if file exists, False otherwise + """ + return self.get_file_path(filename).exists() + + + def delete_table(self, table_name: str) -> bool: + """ + Delete a table by name (removes both file and metadata). + + Args: + table_name: Name of the table to delete + + Returns: + True if table was deleted, False if it didn't exist + """ + metadata = self.get_metadata() + table = metadata.get_table(table_name) + + if table is None: + return False + + # Delete the file + file_path = self.get_file_path(table.filename) + if file_path.exists(): + file_path.unlink() + + # Remove from metadata + metadata.remove_table(table_name) + self.save_metadata(metadata) + + logger.info(f"Deleted table {table_name} from workspace {self._safe_id}") + return True + + def get_metadata(self) -> WorkspaceMetadata: + return load_metadata(self._path) + + def save_metadata(self, metadata: WorkspaceMetadata) -> None: + save_metadata(self._path, metadata) + + def add_table_metadata(self, table: TableMetadata) -> None: + metadata = self.get_metadata() + metadata.add_table(table) + self.save_metadata(metadata) + + def get_table_metadata(self, table_name: str) -> Optional[TableMetadata]: + metadata = self.get_metadata() + return metadata.get_table(table_name) + + def list_tables(self) -> list[str]: + metadata = self.get_metadata() + return metadata.list_tables() + + def cleanup(self) -> None: + """ Remove the entire workspace directory. """ + if self._path.exists(): + shutil.rmtree(self._path) + logger.info(f"Cleaned up workspace {self._safe_id}") + + + def __repr__(self) -> str: + return f"Workspace(identity_id={self._identity_id!r}, path={self._path!r})" + + +def get_workspace(identity_id: str, root_dir: Optional[str | Path] = None) -> Workspace: + """ + Get or create a workspace for a user. + + This is a convenience function that creates a Workspace instance. + + Args: + identity_id: Unique identifier for the user + root_dir: Optional root directory for workspaces + + Returns: + Workspace instance + """ + return Workspace(identity_id, root_dir) + + +class WorkspaceWithTempData: + """ + Context manager that temporarily adds temp data (list of {name, rows}) to a workspace + as parquet tables, yields the same workspace, and removes those tables on exit. + + Use when the client sends in-memory data (e.g. language == "python"): wrap the + workspace so temp tables are visible for the block and then cleaned up. + """ + + def __init__(self, workspace: Workspace, temp_data: Optional[list[dict[str, Any]]] = None): + self._workspace = workspace + self._temp_data = temp_data if temp_data else None + self._added_table_names: list[str] = [] + + def __enter__(self) -> Workspace: + if not self._temp_data: + return self._workspace + import pandas as pd + from data_formulator.datalake.parquet_manager import write_parquet, get_unique_table_name + + for item in self._temp_data: + base_name = item.get("name", "table") + name = get_unique_table_name(self._workspace, base_name) + rows = item.get("rows", []) + df = pd.DataFrame(rows) if rows else pd.DataFrame() + meta = write_parquet(self._workspace, df, name) + self._added_table_names.append(meta.name) + logger.debug(f"Added temp table {meta.name} to workspace for duration of context") + return self._workspace + + def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + for name in self._added_table_names: + self._workspace.delete_table(name) + logger.debug(f"Removed temp table {name} from workspace") + self._added_table_names.clear() \ No newline at end of file diff --git a/py-src/data_formulator/db_manager.py b/py-src/data_formulator/db_manager.py deleted file mode 100644 index 66bf2c8c..00000000 --- a/py-src/data_formulator/db_manager.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import duckdb -import pandas as pd -from typing import Dict -import tempfile -import os -from contextlib import contextmanager -from dotenv import load_dotenv -import logging - -logger = logging.getLogger(__name__) - -class DuckDBManager: - def __init__(self, local_db_dir: str, disabled: bool = False): - # Store session db file paths - self._db_files: Dict[str, str] = {} - self._local_db_dir: str = local_db_dir - self._disabled: bool = disabled - - def is_disabled(self) -> bool: - """Check if the database manager is disabled""" - return self._disabled - - @contextmanager - def connection(self, session_id: str): - """Get a DuckDB connection as a context manager that will be closed when exiting the context""" - conn = None - try: - conn = self.get_connection(session_id) - yield conn - finally: - if conn: - conn.close() - - def get_connection(self, session_id: str) -> duckdb.DuckDBPyConnection: - """Internal method to get or create a DuckDB connection for a session""" - if self._disabled: - return duckdb.connect(database=":memory:") - - # Get or create the db file path for this session - if session_id not in self._db_files or self._db_files[session_id] is None: - db_dir = self._local_db_dir if self._local_db_dir else tempfile.gettempdir() - if not os.path.exists(db_dir): - db_dir = tempfile.gettempdir() - db_file = os.path.join(db_dir, f"df_{session_id}.duckdb") - logger.debug(f"=== Creating new db file: {db_file}") - self._db_files[session_id] = db_file - else: - logger.debug(f"=== Using existing db file: {self._db_files[session_id]}") - db_file = self._db_files[session_id] - - # Create a fresh connection to the database file - conn = duckdb.connect(database=db_file) - - return conn - -env = load_dotenv() - -# Initialize the DB manager -db_manager = DuckDBManager( - local_db_dir=os.getenv('LOCAL_DB_DIR'), - disabled=os.getenv('DISABLE_DATABASE', 'false').lower() == 'true' -) \ No newline at end of file diff --git a/py-src/data_formulator/demo_stream_routes.py b/py-src/data_formulator/demo_stream_routes.py index fd1229a4..42bb96e5 100644 --- a/py-src/data_formulator/demo_stream_routes.py +++ b/py-src/data_formulator/demo_stream_routes.py @@ -30,7 +30,7 @@ import math from datetime import datetime, timedelta from flask import Blueprint, Response, request, jsonify -from typing import List, Dict, Any, Optional +from typing import Any from collections import deque import threading @@ -107,9 +107,9 @@ def make_csv_response(rows: list, filename: str = "data.csv") -> Response: # Thread-safe storage for ISS position history _iss_track_lock = threading.Lock() _iss_track_history: deque = deque(maxlen=10000) # Keep last 10000 positions (~20000 min at 5s intervals) -_iss_last_fetch: Optional[datetime] = None +_iss_last_fetch: datetime | None = None -def _fetch_iss_position() -> Optional[Dict[str, Any]]: +def _fetch_iss_position() -> dict[str, Any] | None: """Fetch current ISS position from API""" try: response = requests.get("http://api.open-notify.org/iss-now.json", timeout=10) @@ -1074,7 +1074,7 @@ def get_yfinance_financials(): # Thread-safe storage for sales transaction history _sales_lock = threading.Lock() _sales_history: deque = deque(maxlen=1000) # Keep last 1000 transactions -_sales_last_update: Optional[datetime] = None +_sales_last_update: datetime | None = None # Products with realistic pricing and popularity _SALES_PRODUCTS = [ @@ -1097,7 +1097,7 @@ def get_yfinance_financials(): _SALES_CHANNEL_WEIGHTS = [0.40, 0.35, 0.15, 0.10] -def _generate_sale_transaction(timestamp: datetime) -> Dict[str, Any]: +def _generate_sale_transaction(timestamp: datetime) -> dict[str, Any]: """Generate a single sale transaction""" product = random.choices(_SALES_PRODUCTS, weights=[p["popularity"] for p in _SALES_PRODUCTS])[0] region = random.choices(_SALES_REGIONS, weights=_SALES_REGION_WEIGHTS)[0] diff --git a/py-src/data_formulator/sandbox/__init__.py b/py-src/data_formulator/sandbox/__init__.py new file mode 100644 index 00000000..26dacade --- /dev/null +++ b/py-src/data_formulator/sandbox/__init__.py @@ -0,0 +1,9 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +from .py_sandbox import ( + run_in_subprocess, + run_in_main_process, + run_transform_in_sandbox2020, + run_derive_concept, +) diff --git a/py-src/data_formulator/py_sandbox.py b/py-src/data_formulator/sandbox/py_sandbox.py similarity index 99% rename from py-src/data_formulator/py_sandbox.py rename to py-src/data_formulator/sandbox/py_sandbox.py index 0bd01228..feb9f010 100644 --- a/py-src/data_formulator/py_sandbox.py +++ b/py-src/data_formulator/sandbox/py_sandbox.py @@ -168,4 +168,4 @@ def run_derive_concept(code, output_field_name, table_rows, exec_python_in_subpr result_df[output_field_name] = result['allowed_objects']['new_column'] return { 'status': 'ok', 'content': result_df } else: - return { 'status': 'error', 'content': result['error_message'] } \ No newline at end of file + return { 'status': 'error', 'content': result['error_message'] } diff --git a/py-src/data_formulator/security/__init__.py b/py-src/data_formulator/security/__init__.py deleted file mode 100644 index a536b8d0..00000000 --- a/py-src/data_formulator/security/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from .query_validator import validate_sql_query - -__all__ = [ 'validate_sql_query'] \ No newline at end of file diff --git a/py-src/data_formulator/security/query_validator.py b/py-src/data_formulator/security/query_validator.py deleted file mode 100644 index 8aa03db9..00000000 --- a/py-src/data_formulator/security/query_validator.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import re -import logging -from typing import Tuple, Dict, Any - -logger = logging.getLogger(__name__) - - -class QueryValidationError(Exception): - """Custom exception for query validation failures""" - pass - - -def normalize_query(query: str) -> str: - """ - Normalize query for case-insensitive matching - """ - query_normalized = re.sub(r'--.*$', '', query, flags=re.MULTILINE) # Single line comments - query_normalized = re.sub(r'/\*.*?\*/', '', query_normalized, flags=re.DOTALL) # Multi-line comments - return query_normalized.strip().lower() - -def validate_sql_query(query: str) -> Tuple[bool, str]: - """ - Simple regex-based SQL query validation for dangerous operations. - - Args: - query: SQL query string to validate - - Returns: - Tuple of (is_valid, error_message) - """ - try: - # Normalize query for case-insensitive matching - query_normalized = normalize_query(query) - - # Remove SQL comments - - - # Define dangerous patterns as regex patterns - dangerous_patterns = { - # File read operations - 'file_read_operations': [ - r'\bread_csv_auto\b', r'\bread_csv\b', r'\bread_json\b', r'\bread_parquet\b', - r'\bread_ndjson\b', r'\bread_delim\b', r'\bread_fwf\b', r'\bread_excel\b', - r'\bread_sql\b', r'\bread_table\b', r'\bread_html\b', r'\bread_xml\b', - r'\bread_feather\b', r'\bread_hdf\b', r'\bread_stata\b', r'\bread_sas\b', - r'\bread_spss\b', r'\bread_rdata\b', r'\bread_rds\b' - ], - - # File write operations - 'file_write_operations': [ - r'\bwrite_csv\b', r'\bwrite_json\b', r'\bwrite_parquet\b', r'\bwrite_excel\b', - r'\bwrite_sql\b', r'\bwrite_table\b', r'\bwrite_html\b', r'\bwrite_xml\b', - r'\bwrite_feather\b', r'\bwrite_hdf\b', r'\bwrite_stata\b', r'\bwrite_sas\b', - r'\bwrite_spss\b', r'\bwrite_rdata\b', r'\bwrite_rds\b' - ], - - # File system operations - 'file_system_operations': [ - r'\bglob\b', r'\bcopy\b', r'\bmove\b', r'\brename\b', r'\bdelete\b', - r'\bremove\b', r'\bunlink\b', r'\bmkdir\b', r'\bmakedirs\b', r'\brmdir\b', - r'\bremovedirs\b', r'\bchmod\b', r'\bchown\b', r'\bsymlink\b', r'\blink\b', - r'\btouch\b', r'\btruncate\b', r'\bwrite\b', r'\bappend\b' - ], - - # System operations - 'system_operations': [ - r'\bsystem\b', r'\bexec\b', r'\beval\b', r'\bcompile\b', r'\bexecfile\b', - r'\binput\b', r'\bos\.system\b', r'\bos\.popen\b', r'\bos\.spawn\b', - r'\bos\.fork\b', r'\bos\.kill\b', r'\bsubprocess\b', r'\bsubprocess\.call\b', - r'\bsubprocess\.run\b', r'\bsubprocess\.popen\b', r'\bsubprocess\.check_call\b', - r'\bsubprocess\.check_output\b' - ], - - # Network operations - 'network_operations': [ - r'\burllib\b', r'\brequests\b', r'\bhttp://\b', r'\bhttps://\b', r'\bftp://\b', - r'\bsmtp\b', r'\bpop3\b', r'\bsocket\b', r'\btelnet\b', r'\bssh\b', r'\bscp\b', - r'\bwget\b', r'\bcurl\b' - ], - - # Shell operations - 'shell_operations': [ - r'\bshell\b', r'\bcmd\b', r'\bbash\b', r'\bsh\b', r'\bpowershell\b', - r'\bcmd\.exe\b', r'\bcommand\b', r'\bexecute\b', r'\brun\b', r'\bcall\b', - r'\binvoke\b' - ], - - # DuckDB dangerous operations - 'duckdb_dangerous_operations': [ - r'\binstall\b', r'\bload\b', r'\bunload\b', r'\bexport\b', r'\bimport\b', - r'\bcopy_to\b' - ], - - # SQL injection patterns - 'sql_injection_patterns': [ - r';\s*--', # Comment after semicolon - r';\s*/\*', # Block comment after semicolon - r'\bunion\s+all\s+select\b', # UNION ALL SELECT - r'\bunion\s+select\b', # UNION SELECT - r'\bxp_cmdshell\b', # SQL Server command shell - r'\bsp_executesql\b', # SQL Server dynamic SQL - ], - - # Dangerous SQL keywords - 'dangerous_sql_keywords': [ - r'\binsert\b', r'\bupdate\b', r'\bdelete\b', r'\bdrop\b', r'\bcreate\b', - r'\balter\b', r'\btruncate\b', r'\bgrant\b', r'\brevoke\b', r'\bexecute\b', - r'\bexec\b', r'\bcall\b', r'\bbegin\b', r'\bcommit\b', r'\brollback\b' - ], - - # File path patterns - 'file_path_patterns': [ - r'file://', r'file:///', r'c:\\', r'd:\\', r'e:\\', - r'/etc/', r'/var/', r'/tmp/', r'/home/', r'/root/', - r'/usr/', r'/bin/', r'/sbin/', r'http://', r'https://', - r'ftp://', r'sftp://', r'ssh://' - ] - } - - # Check each category of dangerous patterns - for category, patterns in dangerous_patterns.items(): - for pattern in patterns: - if re.search(pattern, query_normalized, re.IGNORECASE): - return False, f"Dangerous {category.replace('_', ' ')} detected: {pattern}" - - # Check for file paths in string literals - string_literals = re.findall(r"'([^']*)'", query_normalized) + re.findall(r'"([^"]*)"', query_normalized) - for literal in string_literals: - for pattern in dangerous_patterns['file_path_patterns']: - if re.search(pattern, literal, re.IGNORECASE): - return False, f"Dangerous file path detected in string literal: {literal}" - - return True, "Query validation passed" - - except Exception as e: - logger.error(f"Error during query validation: {e}") - return False, f"Query validation error: {str(e)}" - - -def validate_sql_query_strict(query: str) -> Tuple[bool, str]: - """ - Strict validation that only allows SELECT queries and basic operations. - - Args: - query: SQL query string to validate - - Returns: - Tuple of (is_valid, error_message) - """ - try: - # Normalize query - query_normalized = normalize_query(query) - - # Check if it's a SELECT query - if not query_normalized.startswith('select'): - return False, "Only SELECT queries are allowed" - - # Perform regular validation - return validate_sql_query(query) - - except Exception as e: - return False, f"Strict validation error: {str(e)}" - diff --git a/py-src/data_formulator/tables_routes.py b/py-src/data_formulator/tables_routes.py index 80bdeba9..574a2504 100644 --- a/py-src/data_formulator/tables_routes.py +++ b/py-src/data_formulator/tables_routes.py @@ -9,660 +9,583 @@ mimetypes.add_type('application/javascript', '.mjs') import json import traceback -from flask import request, send_from_directory, session, jsonify, Blueprint +from flask import request, jsonify, Blueprint import pandas as pd -import random -import string from pathlib import Path -import uuid -from data_formulator.db_manager import db_manager from data_formulator.data_loader import DATA_LOADERS +from data_formulator.auth import get_identity_id +from data_formulator.datalake.workspace import get_workspace +from data_formulator.datalake.parquet_manager import ( + read_parquet, + write_parquet, + get_parquet_schema, + get_parquet_path, + sanitize_table_name as parquet_sanitize_table_name, +) +from data_formulator.datalake.file_manager import save_uploaded_file, is_supported_file +from data_formulator.datalake.metadata import TableMetadata as DatalakeTableMetadata import re -from typing import Tuple # Get logger for this module (logging config done in app.py) logger = logging.getLogger(__name__) import os -import tempfile + +import duckdb tables_bp = Blueprint('tables', __name__, url_prefix='/api/tables') + +def _get_workspace(): + """Get workspace for the current identity.""" + return get_workspace(get_identity_id()) + + +def _is_parquet_table(workspace, table_name: str) -> bool: + """Return True if the table is stored as parquet (so we can use DuckDB for computation).""" + meta = workspace.get_table_metadata(table_name) + return meta is not None and meta.file_type == "parquet" + + +def _run_parquet_sql(workspace, table_name: str, sql: str) -> pd.DataFrame: + """ + Run a DuckDB SQL query that references the parquet file as a table. + The SQL must use the alias 't' for the parquet table, e.g.: + SELECT * FROM read_parquet(...) AS t LIMIT 10 + We inject the read_parquet(path) part so the path is never user-controlled in raw form. + """ + path = get_parquet_path(workspace, table_name) + path_escaped = str(path).replace("\\", "\\\\").replace("'", "''") + # SQL is expected to contain exactly one placeholder: {parquet} + if "{parquet}" not in sql: + raise ValueError("SQL must contain {parquet} placeholder") + full_sql = sql.format(parquet=f"read_parquet('{path_escaped}')") + conn = duckdb.connect(":memory:") + try: + return conn.execute(full_sql).fetchdf() + finally: + conn.close() + + +def _load_table_df(workspace, table_name: str) -> pd.DataFrame: + """ + Load a table from the workspace as a pandas DataFrame. + Supports parquet (via parquet_manager) and uploaded CSV/Excel/JSON (via file path). + """ + meta = workspace.get_table_metadata(table_name) + if meta is None: + raise FileNotFoundError(f"Table not found: {table_name}") + if meta.file_type == "parquet": + return read_parquet(workspace, table_name) + file_path = workspace.get_file_path(meta.filename) + if not file_path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + if meta.file_type == "csv": + return pd.read_csv(file_path) + if meta.file_type == "excel": + return pd.read_excel(file_path) + if meta.file_type == "json": + return pd.read_json(file_path) + raise ValueError(f"Unsupported file type for table {table_name}: {meta.file_type}") + + +def _quote_duckdb(col: str) -> str: + """Quote identifier for DuckDB (double quotes, escape internal quotes).""" + return '"' + str(col).replace('"', '""') + '"' + + +def _build_parquet_sample_sql( + columns: list[str], + aggregate_fields_and_functions: list, + select_fields: list, + method: str, + order_by_fields: list, + sample_size: int, +) -> tuple[str, str]: + """ + Build DuckDB SQL for sampling (and optional aggregation) over parquet. + Returns (main_sql, count_sql) where each contains {parquet} placeholder. + """ + valid_agg = [(f, fn) for (f, fn) in aggregate_fields_and_functions if f is None or f in columns] + valid_select = [f for f in select_fields if f in columns] + valid_order = [f for f in order_by_fields if f in columns] + + if valid_agg: + select_parts = [] + for field, function in valid_agg: + fn = function.lower() + if field is None and fn == "count": + select_parts.append("COUNT(*) AS _count") + elif field in columns: + q = _quote_duckdb(field) + if fn == "count": + select_parts.append(f"COUNT({q}) AS _count") + elif fn in ("avg", "average", "mean"): + select_parts.append(f"AVG({q}) AS {_quote_duckdb(field + '_' + function)}") + elif fn == "sum": + select_parts.append(f"SUM({q}) AS {_quote_duckdb(field + '_sum')}") + elif fn == "min": + select_parts.append(f"MIN({q}) AS {_quote_duckdb(field + '_min')}") + elif fn == "max": + select_parts.append(f"MAX({q}) AS {_quote_duckdb(field + '_max')}") + for f in valid_select: + select_parts.append(f"t.{_quote_duckdb(f)}") + group_cols = valid_select + group_by = f" GROUP BY {', '.join('t.' + _quote_duckdb(c) for c in group_cols)}" if group_cols else "" + inner = f"SELECT {', '.join(select_parts)} FROM {{parquet}} AS t{group_by}" + count_sql = f"SELECT COUNT(*) FROM ({inner}) AS sub" + if method == "random": + order_by = " ORDER BY RANDOM()" + elif method == "head" and valid_order: + order_by = " ORDER BY " + ", ".join(f"sub.{_quote_duckdb(c)} ASC" for c in valid_order) + elif method == "bottom" and valid_order: + order_by = " ORDER BY " + ", ".join(f"sub.{_quote_duckdb(c)} DESC" for c in valid_order) + else: + order_by = "" + main_sql = f"SELECT * FROM ({inner}) AS sub{order_by} LIMIT {sample_size}" + return main_sql, count_sql + + count_sql = "SELECT COUNT(*) FROM {parquet} AS t" + if method == "random": + order_by = " ORDER BY RANDOM()" + elif method == "head" and valid_order: + order_by = " ORDER BY " + ", ".join(f"t.{_quote_duckdb(c)} ASC" for c in valid_order) + elif method == "bottom" and valid_order: + order_by = " ORDER BY " + ", ".join(f"t.{_quote_duckdb(c)} DESC" for c in valid_order) + else: + order_by = "" + if valid_select: + select_list = ", ".join(f"t.{_quote_duckdb(c)}" for c in valid_select) + main_sql = f"SELECT {select_list} FROM {{parquet}} AS t{order_by} LIMIT {sample_size}" + else: + main_sql = f"SELECT * FROM {{parquet}} AS t{order_by} LIMIT {sample_size}" + return main_sql, count_sql + + +def _table_metadata_to_source_metadata(meta: DatalakeTableMetadata) -> dict | None: + """Convert workspace TableMetadata to API source_metadata dict (for refresh).""" + if meta.loader_type is None and meta.loader_params is None: + return None + return { + "table_name": meta.name, + "data_loader_type": meta.loader_type or "", + "data_loader_params": meta.loader_params or {}, + "source_table_name": meta.source_table, + "source_query": meta.source_query, + "last_refreshed": meta.last_synced.isoformat() if meta.last_synced else None, + "content_hash": meta.content_hash, + } + + @tables_bp.route('/list-tables', methods=['GET']) def list_tables(): - """List all tables in the current session""" + """List all tables in the current workspace (datalake).""" try: + workspace = _get_workspace() result = [] - with db_manager.connection(session['session_id']) as db: - table_metadata_list = db.execute(""" - SELECT database_name, schema_name, table_name, schema_name==current_schema() as is_current_schema, 'table' as object_type - FROM duckdb_tables() - WHERE internal=False AND database_name == current_database() - UNION ALL - SELECT database_name, schema_name, view_name as table_name, schema_name==current_schema() as is_current_schema, 'view' as object_type - FROM duckdb_views() - WHERE view_name NOT LIKE 'duckdb_%' AND view_name NOT LIKE 'sqlite_%' AND view_name NOT LIKE 'pragma_%' AND database_name == current_database() - """).fetchall() - - - for table_metadata in table_metadata_list: - [database_name, schema_name, table_name, is_current_schema, object_type] = table_metadata - table_name = table_name if is_current_schema else '.'.join([database_name, schema_name, table_name]) - - # Skip system databases and internal metadata tables - if database_name in ['system', 'temp']: - continue - if table_name.startswith('_df_'): # Internal Data Formulator metadata tables + for table_name in workspace.list_tables(): + try: + meta = workspace.get_table_metadata(table_name) + if meta is None: continue - - try: - # Get column information - columns = db.execute(f"DESCRIBE {table_name}").fetchall() - # Get row count - row_count = db.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0] - sample_rows = db.execute(f"SELECT * FROM {table_name} LIMIT 1000").fetchdf() if row_count > 0 else pd.DataFrame() - - # Check if this is a view or a table + columns = [{"name": c.name, "type": c.dtype} for c in (meta.columns or [])] + if not columns and meta.file_type == "parquet": try: - # Get both view existence and source in one query - view_info = db.execute(f"SELECT view_name, sql FROM duckdb_views() WHERE view_name = '{table_name}'").fetchone() - view_source = view_info[1] if view_info else None - except Exception as e: - # If the query fails, assume it's a regular table - view_source = None - - # Get source metadata if available (for refreshable tables) - source_metadata = None + schema_info = get_parquet_schema(workspace, table_name) + columns = [{"name": c["name"], "type": c["type"]} for c in schema_info.get("columns", [])] + except Exception: + pass + row_count = meta.row_count + if row_count is None and meta.file_type == "parquet": try: - source_metadata = get_table_metadata(db, table_name) + schema_info = get_parquet_schema(workspace, table_name) + row_count = schema_info.get("num_rows", 0) or 0 except Exception: - pass # Metadata table may not exist yet - - result.append({ - "name": table_name, - "columns": [{"name": col[0], "type": col[1]} for col in columns], - "row_count": row_count, - "sample_rows": json.loads(sample_rows.to_json(orient='records', date_format='iso')), - "view_source": view_source, - "source_metadata": source_metadata - }) - - except Exception as e: - logger.error(f"Error getting table metadata for {table_name}: {str(e)}") - continue - - return jsonify({ - "status": "success", - "tables": result - }) + row_count = 0 + if row_count is None: + row_count = 0 + sample_rows = meta.sample_rows or [] + if not sample_rows and row_count > 0 and meta.file_type == "parquet": + try: + df = _run_parquet_sql(workspace, table_name, "SELECT * FROM {parquet} AS t LIMIT 1000") + sample_rows = json.loads(df.to_json(orient='records', date_format='iso')) + except Exception: + pass + elif not sample_rows and row_count > 0: + try: + df = _load_table_df(workspace, table_name) + sample_rows = json.loads(df.head(1000).to_json(orient='records', date_format='iso')) + except Exception: + pass + source_metadata = _table_metadata_to_source_metadata(meta) + result.append({ + "name": table_name, + "columns": columns, + "row_count": row_count, + "sample_rows": sample_rows, + "view_source": None, + "source_metadata": source_metadata, + }) + except Exception as e: + logger.error(f"Error getting table metadata for {table_name}: {str(e)}") + continue + return jsonify({"status": "success", "tables": result}) except Exception as e: safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + return jsonify({"status": "error", "message": safe_msg}), status_code -def assemble_query(aggregate_fields_and_functions, group_fields, columns, table_name): +def _apply_aggregation_and_sample( + df: pd.DataFrame, + aggregate_fields_and_functions: list, + select_fields: list, + method: str, + order_by_fields: list, + sample_size: int, +) -> tuple[pd.DataFrame, int]: """ - Assembles a SELECT query string based on binning, aggregation, and grouping specifications. - - Args: - bin_fields (list): Fields to be binned into ranges - aggregate_fields_and_functions (list): List of tuples (field, function) for aggregation - group_fields (list): Fields to group by - columns (list): All available column names - - Returns: - str: The assembled SELECT query projection part + Apply aggregation (optional), then sample with ordering. + Returns (sampled_df, total_row_count_after_aggregation). """ - select_parts = [] - output_column_names = [] - - # Handle aggregate fields and functions - for field, function in aggregate_fields_and_functions: - if field is None: - # Handle count(*) case - if function.lower() == 'count': - select_parts.append('COUNT(*) as _count') - output_column_names.append('_count') - elif field in columns: - if function.lower() == 'count': - alias = f'_count' - select_parts.append(f'COUNT(*) as "{alias}"') - output_column_names.append(alias) - else: - # Sanitize function name and create alias - if function in ["avg", "average", "mean"]: - aggregate_function = "AVG" - else: - aggregate_function = function.upper() - - alias = f'{field}_{function}' - select_parts.append(f'{aggregate_function}("{field}") as "{alias}"') - output_column_names.append(alias) - - # Handle group fields - for field in group_fields: - if field in columns: - select_parts.append(f'"{field}"') - output_column_names.append(field) - # If no fields are specified, select all columns - if not select_parts: - select_parts = ["*"] - output_column_names = columns - - from_clause = f"FROM {table_name}" - group_by_clause = f"GROUP BY {', '.join(group_fields)}" if len(group_fields) > 0 and len(aggregate_fields_and_functions) > 0 else "" - - query = f"SELECT {', '.join(select_parts)} {from_clause} {group_by_clause}" - return query, output_column_names + columns = list(df.columns) + valid_agg = [ + (f, fn) for (f, fn) in aggregate_fields_and_functions + if f is None or f in columns + ] + valid_select = [f for f in select_fields if f in columns] + valid_order = [f for f in order_by_fields if f in columns] + + if valid_agg: + group_cols = valid_select + agg_spec = {} + for field, function in valid_agg: + fn = function.lower() + if field is None and fn == "count": + agg_spec["_count"] = ("__size__", "size") + elif field in columns: + if fn == "count": + agg_spec["_count"] = (field, "count") + elif fn in ("avg", "average", "mean"): + agg_spec[f"{field}_{function}"] = (field, "mean") + elif fn == "sum": + agg_spec[f"{field}_sum"] = (field, "sum") + elif fn == "min": + agg_spec[f"{field}_min"] = (field, "min") + elif fn == "max": + agg_spec[f"{field}_max"] = (field, "max") + if "_count" in agg_spec and agg_spec["_count"] == ("__size__", "size"): + df = df.assign(__size__=1) + agg_spec["_count"] = ("__size__", "count") + if group_cols: + df_agg = df.groupby(group_cols, dropna=False).agg(**{k: (c, f) for k, (c, f) in agg_spec.items()}).reset_index() + else: + df_agg = pd.DataFrame([{k: df[c].agg(f) for k, (c, f) in agg_spec.items()}]) + total_row_count = len(df_agg) + work = df_agg + else: + total_row_count = len(df) + work = df[valid_select].copy() if valid_select else df.copy() + + if method == "random": + work = work.sample(n=min(sample_size, len(work)), random_state=None) + elif method == "head": + work = work.sort_values(by=valid_order, ascending=True).head(sample_size) if valid_order else work.head(sample_size) + elif method == "bottom": + work = work.sort_values(by=valid_order, ascending=False).head(sample_size) if valid_order else work.tail(sample_size).iloc[::-1].reset_index(drop=True) + else: + work = work.head(sample_size) + return work, total_row_count + @tables_bp.route('/sample-table', methods=['POST']) def sample_table(): - """Sample a table""" + """Sample a table from the workspace. Uses DuckDB for parquet (no full load).""" try: data = request.get_json() table_id = data.get('table') sample_size = data.get('size', 1000) - aggregate_fields_and_functions = data.get('aggregate_fields_and_functions', []) # each element is a tuple (field, function) - select_fields = data.get('select_fields', []) # if empty, we want to include all fields - method = data.get('method', 'random') # one of 'random', 'head', 'bottom' + aggregate_fields_and_functions = data.get('aggregate_fields_and_functions', []) + select_fields = data.get('select_fields', []) + method = data.get('method', 'random') order_by_fields = data.get('order_by_fields', []) - - total_row_count = 0 - # Validate field names against table columns to prevent SQL injection - with db_manager.connection(session['session_id']) as db: - # Get valid column names - columns = [col[0] for col in db.execute(f"DESCRIBE {table_id}").fetchall()] - - - # Filter order_by_fields to only include valid column names - valid_order_by_fields = [field for field in order_by_fields if field in columns] - valid_aggregate_fields_and_functions = [ - field_and_function for field_and_function in aggregate_fields_and_functions - if field_and_function[0] is None or field_and_function[0] in columns - ] - valid_select_fields = [field for field in select_fields if field in columns] - - query, output_column_names = assemble_query(valid_aggregate_fields_and_functions, valid_select_fields, columns, table_id) - - - # Modify the original query to include the count: - count_query = f"SELECT *, COUNT(*) OVER () as total_count FROM ({query}) as subq LIMIT 1" - result = db.execute(count_query).fetchone() - total_row_count = result[-1] if result else 0 - - - # Add ordering and limit to the main query - if method == 'random': - query += f" ORDER BY RANDOM() LIMIT {sample_size}" - elif method == 'head': - if valid_order_by_fields: - # Build ORDER BY clause with validated fields - order_by_clause = ", ".join([f'"{field}"' for field in valid_order_by_fields]) - query += f" ORDER BY {order_by_clause} LIMIT {sample_size}" - else: - query += f" LIMIT {sample_size}" - elif method == 'bottom': - if valid_order_by_fields: - # Build ORDER BY clause with validated fields in descending order - order_by_clause = ", ".join([f'"{field}" DESC' for field in valid_order_by_fields]) - query += f" ORDER BY {order_by_clause} LIMIT {sample_size}" - else: - query += f" ORDER BY ROWID DESC LIMIT {sample_size}" - - - result = db.execute(query).fetchdf() - - + workspace = _get_workspace() + if _is_parquet_table(workspace, table_id): + schema_info = get_parquet_schema(workspace, table_id) + columns = [c["name"] for c in schema_info.get("columns", [])] + main_sql, count_sql = _build_parquet_sample_sql( + columns, + aggregate_fields_and_functions, + select_fields, + method, + order_by_fields, + sample_size, + ) + total_row_count = int(_run_parquet_sql(workspace, table_id, count_sql).iloc[0, 0]) + result_df = _run_parquet_sql(workspace, table_id, main_sql) + else: + df = _load_table_df(workspace, table_id) + result_df, total_row_count = _apply_aggregation_and_sample( + df, + aggregate_fields_and_functions, + select_fields, + method, + order_by_fields, + sample_size, + ) + rows_json = json.loads(result_df.to_json(orient='records', date_format='iso')) return jsonify({ "status": "success", - "rows": json.loads(result.to_json(orient='records', date_format='iso')), - "total_row_count": total_row_count + "rows": rows_json, + "total_row_count": total_row_count, }) except Exception as e: logger.error(f"Error sampling table: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + return jsonify({"status": "error", "message": safe_msg}), status_code @tables_bp.route('/get-table', methods=['GET']) def get_table_data(): - """Get data from a specific table""" + """Get data from a specific table in the workspace. Uses DuckDB for parquet (LIMIT/OFFSET only).""" try: - with db_manager.connection(session['session_id']) as db: + table_name = request.args.get('table_name') + page = int(request.args.get('page', 1)) + page_size = int(request.args.get('page_size', 100)) + offset = (page - 1) * page_size - table_name = request.args.get('table_name') - # Get pagination parameters - page = int(request.args.get('page', 1)) - page_size = int(request.args.get('page_size', 100)) - offset = (page - 1) * page_size - - if not table_name: - return jsonify({ - "status": "error", - "message": "Table name is required" - }), 400 - - # Get total count - total_rows = db.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0] - - # Get paginated data - result = db.execute( - f"SELECT * FROM {table_name} LIMIT {page_size} OFFSET {offset}" - ).fetchall() - - # Get column names - columns = [col[0] for col in db.execute(f"DESCRIBE {table_name}").fetchall()] - - # Convert to list of dictionaries - rows = [dict(zip(columns, row)) for row in result] - - return jsonify({ - "status": "success", - "table_name": table_name, - "columns": columns, - "rows": rows, - "total_rows": total_rows, - "page": page, - "page_size": page_size - }) - + if not table_name: + return jsonify({"status": "error", "message": "Table name is required"}), 400 + + workspace = _get_workspace() + if _is_parquet_table(workspace, table_name): + count_df = _run_parquet_sql(workspace, table_name, "SELECT COUNT(*) FROM {parquet} AS t") + total_rows = int(count_df.iloc[0, 0]) + page_df = _run_parquet_sql( + workspace, + table_name, + f"SELECT * FROM {{parquet}} AS t LIMIT {page_size} OFFSET {offset}", + ) + columns = list(page_df.columns) + rows = json.loads(page_df.to_json(orient='records', date_format='iso')) + else: + df = _load_table_df(workspace, table_name) + total_rows = len(df) + columns = list(df.columns) + page_df = df.iloc[offset : offset + page_size] + rows = json.loads(page_df.to_json(orient='records', date_format='iso')) + + return jsonify({ + "status": "success", + "table_name": table_name, + "columns": columns, + "rows": rows, + "total_rows": total_rows, + "page": page, + "page_size": page_size, + }) except Exception as e: logger.error(f"Error getting table data: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + return jsonify({"status": "error", "message": safe_msg}), status_code @tables_bp.route('/create-table', methods=['POST']) def create_table(): - """Create a new table from uploaded data""" + """Create a new table from uploaded file or raw data in the workspace.""" try: if 'file' not in request.files and 'raw_data' not in request.form: return jsonify({"status": "error", "message": "No file or raw data provided"}), 400 - + table_name = request.form.get('table_name') if not table_name: return jsonify({"status": "error", "message": "No table name provided"}), 400 - - df = None + + workspace = _get_workspace() + base_name = parquet_sanitize_table_name(table_name) + sanitized_table_name = base_name + counter = 1 + while sanitized_table_name in workspace.list_tables(): + sanitized_table_name = f"{base_name}_{counter}" + counter += 1 + if 'file' in request.files: file = request.files['file'] - # Read file based on extension - if file.filename.endswith('.csv'): - df = pd.read_csv(file) - elif file.filename.endswith(('.xlsx', '.xls')): - df = pd.read_excel(file) - elif file.filename.endswith('.json'): - df = pd.read_json(file) - else: + if not file.filename or not is_supported_file(file.filename): return jsonify({"status": "error", "message": "Unsupported file format"}), 400 + meta = save_uploaded_file( + workspace, + file.stream, + file.filename, + table_name=sanitized_table_name, + overwrite=False, + ) + sanitized_table_name = meta.name + row_count = meta.row_count + columns = [c.name for c in (meta.columns or [])] + if row_count is None or not columns: + df = _load_table_df(workspace, sanitized_table_name) + row_count = len(df) + columns = list(df.columns) else: raw_data = request.form.get('raw_data') try: df = pd.DataFrame(json.loads(raw_data)) except Exception as e: - return jsonify({"status": "error", "message": f"Invalid JSON data: {str(e)}, it must be in the format of a list of dictionaries"}), 400 - - if df is None: - return jsonify({"status": "error", "message": "No data provided"}), 400 + return jsonify({"status": "error", "message": f"Invalid JSON data: {str(e)}, it must be a list of dictionaries"}), 400 + write_parquet(workspace, df, sanitized_table_name) + row_count = len(df) + columns = list(df.columns) - sanitized_table_name = sanitize_table_name(table_name) - - with db_manager.connection(session['session_id']) as db: - # Check if table exists and generate unique name if needed - base_name = sanitized_table_name - counter = 1 - while True: - # Check if table exists - exists = db.execute(f"SELECT COUNT(*) FROM duckdb_tables() WHERE table_name = '{sanitized_table_name}'").fetchone()[0] > 0 - if not exists: - break - # If exists, append counter to base name - sanitized_table_name = f"{base_name}_{counter}" - counter += 1 - - # Create table - db.register('df_temp', df) - db.execute(f"CREATE TABLE {sanitized_table_name} AS SELECT * FROM df_temp") - db.execute("DROP VIEW df_temp") # Drop the temporary view after creating the table - - return jsonify({ - "status": "success", - "table_name": sanitized_table_name, - "row_count": len(df), - "columns": list(df.columns), - "original_name": base_name, # Include the original name in response - "is_renamed": base_name != sanitized_table_name # Flag indicating if name was changed - }) - + return jsonify({ + "status": "success", + "table_name": sanitized_table_name, + "row_count": row_count, + "columns": columns, + "original_name": base_name, + "is_renamed": base_name != sanitized_table_name, + }) except Exception as e: logger.error(f"Error creating table: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + return jsonify({"status": "error", "message": safe_msg}), status_code @tables_bp.route('/delete-table', methods=['POST']) def drop_table(): - """Drop a table or view""" + """Drop a table from the workspace.""" try: data = request.get_json() table_name = data.get('table_name') - if not table_name: return jsonify({"status": "error", "message": "No table name provided"}), 400 - - with db_manager.connection(session['session_id']) as db: - # First check if it exists as a view - view_exists = db.execute(f"SELECT view_name FROM duckdb_views() WHERE view_name = '{table_name}'").fetchone() is not None - if view_exists: - db.execute(f"DROP VIEW IF EXISTS {table_name}") - - # Then check if it exists as a table - table_exists = db.execute(f"SELECT table_name FROM duckdb_tables() WHERE table_name = '{table_name}'").fetchone() is not None - if table_exists: - db.execute(f"DROP TABLE IF EXISTS {table_name}") - - if not view_exists and not table_exists: - return jsonify({ - "status": "error", - "message": f"Table/view '{table_name}' does not exist" - }), 404 - - return jsonify({ - "status": "success", - "message": f"Table/view {table_name} dropped" - }) - + + workspace = _get_workspace() + if not workspace.delete_table(table_name): + return jsonify({"status": "error", "message": f"Table '{table_name}' does not exist"}), 404 + return jsonify({"status": "success", "message": f"Table {table_name} dropped"}) except Exception as e: logger.error(f"Error dropping table: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + return jsonify({"status": "error", "message": safe_msg}), status_code @tables_bp.route('/upload-db-file', methods=['POST']) def upload_db_file(): - """Upload a db file""" - try: - if 'file' not in request.files: - return jsonify({"status": "error", "message": "No file provided"}), 400 - - file = request.files['file'] - if not file.filename.endswith('.db'): - return jsonify({"status": "error", "message": "Invalid file format. Only .db files are supported"}), 400 - - # Get the session ID - if 'session_id' not in session: - return jsonify({"status": "error", "message": "No session ID found"}), 400 - - session_id = session['session_id'] - - # Create temp directory if it doesn't exist - temp_dir = os.path.join(tempfile.gettempdir()) - os.makedirs(temp_dir, exist_ok=True) - - # Save the file temporarily to verify it - temp_db_path = os.path.join(temp_dir, f"temp_{session_id}.db") - file.save(temp_db_path) - - # Verify if it's a valid DuckDB file - try: - import duckdb - # Try to connect to the database - conn = duckdb.connect(temp_db_path, read_only=True) - # Try a simple query to verify it's a valid database - conn.execute("SELECT 1").fetchall() - conn.close() - - # If we get here, the file is valid - move it to final location - db_file_path = os.path.join(temp_dir, f"df_{session_id}.db") - os.replace(temp_db_path, db_file_path) - - # Update the db_manager's file mapping - db_manager._db_files[session_id] = db_file_path - - except Exception as db_error: - # Clean up temp file - logger.error(f"Error uploading db file: {str(db_error)}") - if os.path.exists(temp_db_path): - os.remove(temp_db_path) - return jsonify({ - "status": "error", - "message": f"Invalid DuckDB database file." - }), 400 - - return jsonify({ - "status": "success", - "message": "Database file uploaded successfully", - "session_id": session_id - }) - - except Exception as e: - logger.error(f"Error uploading db file: {str(e)}") - safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + """No longer used: storage is workspace/datalake, not DuckDB. Kept for API compatibility.""" + return jsonify({ + "status": "error", + "message": "Database file upload is no longer supported. Data is stored in the workspace; use create-table with a file or data loaders to add data.", + }), 410 @tables_bp.route('/download-db-file', methods=['GET']) def download_db_file(): - """Download the db file for a session""" - try: - # Check if session exists - if 'session_id' not in session: - return jsonify({ - "status": "error", - "message": "No session ID found" - }), 400 - - session_id = session['session_id'] - - # Get the database file path from db_manager - if session_id not in db_manager._db_files: - return jsonify({ - "status": "error", - "message": "No database file found for this session" - }), 404 - - db_file_path = db_manager._db_files[session_id] - - # Check if file exists - if not os.path.exists(db_file_path): - return jsonify({ - "status": "error", - "message": "Database file not found" - }), 404 - - # Generate a filename for download - download_name = f"data_formulator_{session_id}.db" - - # Return the file as an attachment - return send_from_directory( - os.path.dirname(db_file_path), - os.path.basename(db_file_path), - as_attachment=True, - download_name=download_name, - mimetype='application/x-sqlite3' - ) - - except Exception as e: - logger.error(f"Error downloading db file: {str(e)}") - safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + """No longer used: storage is workspace/datalake. Kept for API compatibility.""" + return jsonify({ + "status": "error", + "message": "Database file download is no longer supported. Data lives in the workspace.", + }), 410 @tables_bp.route('/reset-db-file', methods=['POST']) def reset_db_file(): - """Reset the db file for a session""" + """Reset the workspace for the current session (removes all tables and files).""" try: - if 'session_id' not in session: - return jsonify({ - "status": "error", - "message": "No session ID found" - }), 400 - - session_id = session['session_id'] - - logger.info(f"session_id: {session_id}") - - # First check if there's a reference in db_manager - if session_id in db_manager._db_files: - db_file_path = db_manager._db_files[session_id] - - # Remove the file if it exists - if db_file_path and os.path.exists(db_file_path): - os.remove(db_file_path) - - # Clear the reference - db_manager._db_files[session_id] = None - - # Also check for any temporary files - temp_db_path = os.path.join(tempfile.gettempdir(), f"temp_{session_id}.db") - if os.path.exists(temp_db_path): - os.remove(temp_db_path) - - # Check for the main db file - main_db_path = os.path.join(tempfile.gettempdir(), f"df_{session_id}.db") - if os.path.exists(main_db_path): - os.remove(main_db_path) - - return jsonify({ - "status": "success", - "message": "Database file reset successfully" - }) - + workspace = _get_workspace() + workspace.cleanup() + return jsonify({"status": "success", "message": "Workspace reset successfully"}) except Exception as e: - logger.error(f"Error resetting db file: {str(e)}") + logger.error(f"Error resetting workspace: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + return jsonify({"status": "error", "message": safe_msg}), status_code + +def _is_numeric_duckdb_type(col_type: str) -> bool: + """Return True if DuckDB/parquet type is numeric for min/max/avg.""" + t = (col_type or "").upper() + return any( + t.startswith(k) for k in ("INT", "BIGINT", "SMALLINT", "TINYINT", "DOUBLE", "FLOAT", "REAL", "DECIMAL", "NUMERIC") + ) + -# Example of a more complex query endpoint @tables_bp.route('/analyze', methods=['POST']) def analyze_table(): - """Get basic statistics about a table""" + """Get basic statistics about a table in the workspace. Uses DuckDB for parquet (no full load).""" try: data = request.get_json() table_name = data.get('table_name') - if not table_name: return jsonify({"status": "error", "message": "No table name provided"}), 400 - - with db_manager.connection(session['session_id']) as db: - - # Get column information - columns = db.execute(f"DESCRIBE {table_name}").fetchall() - + + workspace = _get_workspace() + if _is_parquet_table(workspace, table_name): + schema_info = get_parquet_schema(workspace, table_name) + col_infos = schema_info.get("columns", []) stats = [] - for col in columns: - col_name = col[0] - col_type = col[1] - - # Properly quote column names to avoid SQL keywords issues - quoted_col_name = f'"{col_name}"' - - # Basic stats query - stats_query = f""" - SELECT - COUNT(*) as count, - COUNT(DISTINCT {quoted_col_name}) as unique_count, - COUNT(*) - COUNT({quoted_col_name}) as null_count - FROM {table_name} - """ - - # Add numeric stats if applicable - if col_type in ['INTEGER', 'DOUBLE', 'DECIMAL']: - stats_query = f""" - SELECT - COUNT(*) as count, - COUNT(DISTINCT {quoted_col_name}) as unique_count, - COUNT(*) - COUNT({quoted_col_name}) as null_count, - MIN({quoted_col_name}) as min_value, - MAX({quoted_col_name}) as max_value, - AVG({quoted_col_name}) as avg_value - FROM {table_name} - """ - - col_stats = db.execute(stats_query).fetchone() - - # Create a dictionary with appropriate keys based on column type - if col_type in ['INTEGER', 'DOUBLE', 'DECIMAL']: - stats_dict = dict(zip( - ["count", "unique_count", "null_count", "min", "max", "avg"], - col_stats - )) + for col_info in col_infos: + col_name = col_info["name"] + col_type = col_info.get("type", "") + q = _quote_duckdb(col_name) + if _is_numeric_duckdb_type(col_type): + sql = ( + f"SELECT COUNT(*) AS count, COUNT(DISTINCT t.{q}) AS unique_count, " + f"COUNT(*) - COUNT(t.{q}) AS null_count, " + f"MIN(t.{q}) AS min_val, MAX(t.{q}) AS max_val, AVG(t.{q}) AS avg_val " + f"FROM {{parquet}} AS t" + ) + df = _run_parquet_sql(workspace, table_name, sql) + row = df.iloc[0] + stats_dict = { + "count": int(row["count"]), + "unique_count": int(row["unique_count"]), + "null_count": int(row["null_count"]), + "min": float(row["min_val"]) if row["min_val"] is not None else None, + "max": float(row["max_val"]) if row["max_val"] is not None else None, + "avg": float(row["avg_val"]) if row["avg_val"] is not None else None, + } else: - stats_dict = dict(zip( - ["count", "unique_count", "null_count"], - col_stats - )) - - stats.append({ - "column": col_name, - "type": col_type, - "statistics": stats_dict - }) - - return jsonify({ - "status": "success", - "table_name": table_name, - "statistics": stats - }) - + sql = ( + f"SELECT COUNT(*) AS count, COUNT(DISTINCT t.{q}) AS unique_count, " + f"COUNT(*) - COUNT(t.{q}) AS null_count FROM {{parquet}} AS t" + ) + df = _run_parquet_sql(workspace, table_name, sql) + row = df.iloc[0] + stats_dict = { + "count": int(row["count"]), + "unique_count": int(row["unique_count"]), + "null_count": int(row["null_count"]), + } + stats.append({"column": col_name, "type": col_type, "statistics": stats_dict}) + else: + df = _load_table_df(workspace, table_name) + stats = [] + for col_name in df.columns: + s = df[col_name] + col_type = str(s.dtype) + stats_dict = { + "count": int(s.count()), + "unique_count": int(s.nunique()), + "null_count": int(s.isna().sum()), + } + if pd.api.types.is_numeric_dtype(s): + stats_dict["min"] = float(s.min()) if s.notna().any() else None + stats_dict["max"] = float(s.max()) if s.notna().any() else None + stats_dict["avg"] = float(s.mean()) if s.notna().any() else None + stats.append({"column": col_name, "type": col_type, "statistics": stats_dict}) + + return jsonify({"status": "success", "table_name": table_name, "statistics": stats}) except Exception as e: logger.error(f"Error analyzing table: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + return jsonify({"status": "error", "message": safe_msg}), status_code + def sanitize_table_name(table_name: str) -> str: - """ - Sanitize a table name to be a valid DuckDB table name. - """ - # Sanitize table name: - # 1. Convert to lowercase - # 2. Replace hyphens with underscores - # 3. Replace spaces with underscores - # 4. Remove any other special characters - sanitized_table_name = table_name.lower() - sanitized_table_name = sanitized_table_name.replace('-', '_') - sanitized_table_name = sanitized_table_name.replace(' ', '_') - sanitized_table_name = ''.join(c for c in sanitized_table_name if c.isalnum() or c == '_') - - # Ensure table name starts with a letter - if not sanitized_table_name or not sanitized_table_name[0].isalpha(): - sanitized_table_name = 'table_' + sanitized_table_name - - # Verify we have a valid table name after sanitization - if not sanitized_table_name: - return f'table_{uuid.uuid4()}' - return sanitized_table_name + """Sanitize a table name for use in the workspace (delegate to parquet_manager).""" + return parquet_sanitize_table_name(table_name) -def sanitize_db_error_message(error: Exception) -> Tuple[str, int]: +def sanitize_db_error_message(error: Exception) -> tuple[str, int]: """ Sanitize error messages before sending to client. Returns a tuple of (sanitized_message, status_code) @@ -687,7 +610,7 @@ def sanitize_db_error_message(error: Exception) -> Tuple[str, int]: # Data loader errors r"Entity ID": (error_msg, 500), - r"session_id": ("session_id not found, please refresh the page", 500), + r"identity": ("Identity not found, please refresh the page", 500), } # Check if error matches any safe pattern @@ -727,246 +650,70 @@ def data_loader_list_data_loaders(): @tables_bp.route('/data-loader/list-tables', methods=['POST']) def data_loader_list_tables(): - """List tables from a data loader""" - + """List tables from a data loader (no workspace needed).""" try: data = request.get_json() data_loader_type = data.get('data_loader_type') data_loader_params = data.get('data_loader_params') - table_filter = data.get('table_filter', None) # New filter parameter + table_filter = data.get('table_filter', None) if data_loader_type not in DATA_LOADERS: return jsonify({"status": "error", "message": f"Invalid data loader type. Must be one of: {', '.join(DATA_LOADERS.keys())}"}), 400 - with db_manager.connection(session['session_id']) as duck_db_conn: - data_loader = DATA_LOADERS[data_loader_type](data_loader_params, duck_db_conn) - - # Pass table_filter to list_tables if the data loader supports it - if hasattr(data_loader, 'list_tables') and 'table_filter' in data_loader.list_tables.__code__.co_varnames: - tables = data_loader.list_tables(table_filter=table_filter) - else: - tables = data_loader.list_tables() - - return jsonify({ - "status": "success", - "tables": tables - }) + data_loader = DATA_LOADERS[data_loader_type](data_loader_params) + if hasattr(data_loader, 'list_tables') and 'table_filter' in data_loader.list_tables.__code__.co_varnames: + tables = data_loader.list_tables(table_filter=table_filter) + else: + tables = data_loader.list_tables() + return jsonify({"status": "success", "tables": tables}) except Exception as e: logger.error(f"Error listing tables from data loader: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code - - -def ensure_table_metadata_table(db_conn): - """ - Ensure the _df_table_source_metadata table exists for storing table source information. - This stores connection info so backend can refresh tables - frontend manages timing/toggle. - """ - db_conn.execute(""" - CREATE TABLE IF NOT EXISTS _df_table_source_metadata ( - table_name VARCHAR PRIMARY KEY, - data_loader_type VARCHAR, - data_loader_params JSON, - source_table_name VARCHAR, - source_query VARCHAR, - last_refreshed TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - content_hash VARCHAR - ) - """) - - # Add content_hash column if it doesn't exist (for existing databases) - try: - db_conn.execute(""" - ALTER TABLE _df_table_source_metadata ADD COLUMN content_hash VARCHAR - """) - except Exception: - # Column already exists - pass - - -def compute_table_content_hash(db_conn, table_name: str) -> str: - """ - Compute a content hash for a table using DuckDB's built-in hash function. - Uses a sampling strategy for efficiency with large tables: - - Row count - - Column names - - First 50 rows, last 50 rows, and 50 sampled rows from middle - """ - import hashlib - - # Get row count - row_count = db_conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0] - - # Get column names - columns = db_conn.execute(f"DESCRIBE {table_name}").fetchall() - column_names = [col[0] for col in columns] - - # Build hash components - hash_parts = [ - f"count:{row_count}", - f"cols:{','.join(column_names)}" - ] - - if row_count > 0: - # Sample rows for hashing - # First 50 rows - first_rows = db_conn.execute(f""" - SELECT * FROM {table_name} LIMIT 50 - """).fetchall() - - # Last 50 rows (using row number) - last_rows = db_conn.execute(f""" - SELECT * FROM ( - SELECT *, ROW_NUMBER() OVER () as _rn FROM {table_name} - ) WHERE _rn > {max(0, row_count - 50)} - """).fetchall() - - # Middle sample (every Nth row to get ~50 rows) - if row_count > 100: - step = max(1, (row_count - 100) // 50) - middle_rows = db_conn.execute(f""" - SELECT * FROM ( - SELECT *, ROW_NUMBER() OVER () as _rn FROM {table_name} - ) WHERE _rn > 50 AND _rn <= {row_count - 50} AND (_rn - 50) % {step} = 0 - LIMIT 50 - """).fetchall() - else: - middle_rows = [] - - # Convert rows to strings for hashing - all_sample_rows = first_rows + middle_rows + last_rows - row_strs = [str(row) for row in all_sample_rows] - hash_parts.append(f"rows:{';'.join(row_strs)}") - - # Compute hash - content_str = '|'.join(hash_parts) - return hashlib.md5(content_str.encode()).hexdigest() - - -def save_table_metadata(db_conn, table_name: str, data_loader_type: str, data_loader_params: dict, - source_table_name: str = None, source_query: str = None, content_hash: str = None): - """Save or update table source metadata""" - ensure_table_metadata_table(db_conn) - - # Remove sensitive fields from params before storing - safe_params = {k: v for k, v in data_loader_params.items() if k not in ['password', 'api_key', 'secret']} - - # Compute content hash if not provided - if content_hash is None: - try: - content_hash = compute_table_content_hash(db_conn, table_name) - except Exception as e: - logger.warning(f"Failed to compute content hash for {table_name}: {e}") - content_hash = None - - db_conn.execute(""" - INSERT OR REPLACE INTO _df_table_source_metadata - (table_name, data_loader_type, data_loader_params, source_table_name, source_query, last_refreshed, content_hash) - VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?) - """, [table_name, data_loader_type, json.dumps(safe_params), source_table_name, source_query, content_hash]) - - -def get_table_metadata(db_conn, table_name: str) -> dict: - """Get metadata for a specific table (connection info for refresh)""" - ensure_table_metadata_table(db_conn) - - result = db_conn.execute(""" - SELECT table_name, data_loader_type, data_loader_params, source_table_name, source_query, last_refreshed, content_hash - FROM _df_table_source_metadata - WHERE table_name = ? - """, [table_name]).fetchone() - - if result: - return { - "table_name": result[0], - "data_loader_type": result[1], - "data_loader_params": json.loads(result[2]) if result[2] else {}, - "source_table_name": result[3], - "source_query": result[4], - "last_refreshed": str(result[5]) if result[5] else None, - "content_hash": result[6] - } - return None - - -def get_all_table_metadata(db_conn) -> list: - """Get metadata for all tables""" - ensure_table_metadata_table(db_conn) - - results = db_conn.execute(""" - SELECT table_name, data_loader_type, data_loader_params, source_table_name, source_query, last_refreshed, content_hash - FROM _df_table_source_metadata - """).fetchall() - - return [{ - "table_name": r[0], - "data_loader_type": r[1], - "data_loader_params": json.loads(r[2]) if r[2] else {}, - "source_table_name": r[3], - "source_query": r[4], - "last_refreshed": str(r[5]) if r[5] else None, - "content_hash": r[6] - } for r in results] + return jsonify({"status": "error", "message": safe_msg}), status_code @tables_bp.route('/data-loader/ingest-data', methods=['POST']) def data_loader_ingest_data(): - """Ingest data from a data loader""" - + """Ingest data from a data loader into the workspace as parquet.""" try: data = request.get_json() data_loader_type = data.get('data_loader_type') data_loader_params = data.get('data_loader_params') table_name = data.get('table_name') - import_options = data.get('import_options', {}) - - # Extract import options - row_limit = import_options.get('row_limit', 1000000) if import_options else 1000000 - sort_columns = import_options.get('sort_columns', None) if import_options else None - sort_order = import_options.get('sort_order', 'asc') if import_options else 'asc' + import_options = data.get('import_options', {}) or {} + row_limit = import_options.get('row_limit', 1000000) + sort_columns = import_options.get('sort_columns') + sort_order = import_options.get('sort_order', 'asc') if data_loader_type not in DATA_LOADERS: return jsonify({"status": "error", "message": f"Invalid data loader type. Must be one of: {', '.join(DATA_LOADERS.keys())}"}), 400 - with db_manager.connection(session['session_id']) as duck_db_conn: - data_loader = DATA_LOADERS[data_loader_type](data_loader_params, duck_db_conn) - data_loader.ingest_data(table_name, size=row_limit, sort_columns=sort_columns, sort_order=sort_order) - - # Get the actual table name that was created (may be sanitized) - sanitized_name = table_name.split('.')[-1] # Base name - - # Store metadata for refresh capability (include import options for future refresh) - save_table_metadata( - duck_db_conn, - sanitized_name, - data_loader_type, - data_loader_params, - source_table_name=table_name - ) - - return jsonify({ - "status": "success", - "message": "Successfully ingested data from data loader", - "table_name": sanitized_name - }) - + workspace = _get_workspace() + data_loader = DATA_LOADERS[data_loader_type](data_loader_params) + safe_name = parquet_sanitize_table_name(table_name.split('.')[-1] if '.' in table_name else table_name) + meta = data_loader.ingest_to_workspace( + workspace, + safe_name, + source_table=table_name, + size=row_limit, + sort_columns=sort_columns, + sort_order=sort_order, + ) + return jsonify({ + "status": "success", + "message": "Successfully ingested data from data loader", + "table_name": meta.name, + }) except Exception as e: logger.error(f"Error ingesting data from data loader: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code - + return jsonify({"status": "error", "message": safe_msg}), status_code + @tables_bp.route('/data-loader/view-query-sample', methods=['POST']) def data_loader_view_query_sample(): - """View a sample of data from a query""" - + """View a sample of data from a query (fetches from external source, no workspace).""" try: data = request.get_json() data_loader_type = data.get('data_loader_type') @@ -975,203 +722,117 @@ def data_loader_view_query_sample(): if data_loader_type not in DATA_LOADERS: return jsonify({"status": "error", "message": f"Invalid data loader type. Must be one of: {', '.join(DATA_LOADERS.keys())}"}), 400 - - with db_manager.connection(session['session_id']) as duck_db_conn: - data_loader = DATA_LOADERS[data_loader_type](data_loader_params, duck_db_conn) - sample = data_loader.view_query_sample(query) + data_loader = DATA_LOADERS[data_loader_type](data_loader_params) + if hasattr(data_loader, 'view_query_sample') and callable(getattr(data_loader, 'view_query_sample')): + sample = data_loader.view_query_sample(query) + else: return jsonify({ - "status": "success", - "sample": sample, - "message": "Successfully retrieved query sample" - }) + "status": "error", + "message": "Query sample is only supported for loaders that implement view_query_sample. Use a source table to fetch data.", + }), 400 + return jsonify({"status": "success", "sample": sample, "message": "Successfully retrieved query sample"}) except Exception as e: logger.error(f"Error viewing query sample: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "sample": [], - "message": safe_msg - }), status_code - + return jsonify({"status": "error", "sample": [], "message": safe_msg}), status_code + @tables_bp.route('/data-loader/ingest-data-from-query', methods=['POST']) def data_loader_ingest_data_from_query(): - """Ingest data from a data loader""" - - try: - data = request.get_json() - data_loader_type = data.get('data_loader_type') - data_loader_params = data.get('data_loader_params') - query = data.get('query') - name_as = data.get('name_as') - - if data_loader_type not in DATA_LOADERS: - return jsonify({"status": "error", "message": f"Invalid data loader type. Must be one of: {', '.join(DATA_LOADERS.keys())}"}), 400 - - with db_manager.connection(session['session_id']) as duck_db_conn: - data_loader = DATA_LOADERS[data_loader_type](data_loader_params, duck_db_conn) - data_loader.ingest_data_from_query(query, name_as) - - # Store metadata for refresh capability - save_table_metadata( - duck_db_conn, - name_as, - data_loader_type, - data_loader_params, - source_query=query - ) - - return jsonify({ - "status": "success", - "message": "Successfully ingested data from data loader", - "table_name": name_as - }) - - except Exception as e: - logger.error(f"Error ingesting data from data loader: {str(e)}") - safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + """Ingest data from a query into the workspace as parquet.""" + return jsonify({ + "status": "error", + "message": "Ingestion from custom query is not supported. Please select a source table to ingest.", + }), 400 @tables_bp.route('/data-loader/refresh-table', methods=['POST']) def data_loader_refresh_table(): - """ - Refresh a table by re-importing data from its original source. - Requires the table to have been imported via a data loader with stored metadata. - Returns content_hash and data_changed flag so frontend can skip resampling if data unchanged. - """ + """Refresh a table by re-fetching from its source and updating parquet in the workspace.""" try: data = request.get_json() table_name = data.get('table_name') - # Allow passing updated connection params (e.g., for password that wasn't stored) updated_params = data.get('data_loader_params', {}) if not table_name: return jsonify({"status": "error", "message": "table_name is required"}), 400 - with db_manager.connection(session['session_id']) as duck_db_conn: - # Get stored metadata - metadata = get_table_metadata(duck_db_conn, table_name) - - if not metadata: - return jsonify({ - "status": "error", - "message": f"No source metadata found for table '{table_name}'. Cannot refresh." - }), 400 - - # Get old content hash before refresh - old_content_hash = metadata.get('content_hash') - - data_loader_type = metadata['data_loader_type'] - data_loader_params = {**metadata['data_loader_params'], **updated_params} - - if data_loader_type not in DATA_LOADERS: - return jsonify({ - "status": "error", - "message": f"Unknown data loader type: {data_loader_type}" - }), 400 - - # Create data loader and refresh - data_loader = DATA_LOADERS[data_loader_type](data_loader_params, duck_db_conn) - - if metadata['source_query']: - # Refresh from query - data_loader.ingest_data_from_query(metadata['source_query'], table_name) - elif metadata['source_table_name']: - # Refresh from table - data_loader.ingest_data(metadata['source_table_name'], name_as=table_name) - else: - return jsonify({ - "status": "error", - "message": "No source table or query found in metadata" - }), 400 - - # Compute new content hash after refresh - new_content_hash = compute_table_content_hash(duck_db_conn, table_name) - data_changed = old_content_hash != new_content_hash - - # Update last_refreshed timestamp and content_hash - duck_db_conn.execute(""" - UPDATE _df_table_source_metadata - SET last_refreshed = CURRENT_TIMESTAMP, content_hash = ? - WHERE table_name = ? - """, [new_content_hash, table_name]) - - # Get updated row count - row_count = duck_db_conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0] + workspace = _get_workspace() + meta = workspace.get_table_metadata(table_name) + if meta is None: + return jsonify({"status": "error", "message": f"No table '{table_name}' found. Cannot refresh."}), 400 + if not meta.loader_type: + return jsonify({"status": "error", "message": f"No source metadata for table '{table_name}'. Cannot refresh."}), 400 + + old_content_hash = meta.content_hash + data_loader_type = meta.loader_type + data_loader_params = {**(meta.loader_params or {}), **updated_params} + if data_loader_type not in DATA_LOADERS: + return jsonify({"status": "error", "message": f"Unknown data loader type: {data_loader_type}"}), 400 + + data_loader = DATA_LOADERS[data_loader_type](data_loader_params) + if meta.source_table: + arrow_table = data_loader.fetch_data_as_arrow(source_table=meta.source_table) + else: return jsonify({ - "status": "success", - "message": f"Successfully refreshed table '{table_name}'", - "row_count": row_count, - "content_hash": new_content_hash, - "data_changed": data_changed - }) + "status": "error", + "message": "Refresh is not supported for tables ingested from a query. Only table-based sources can be refreshed.", + }), 400 + from data_formulator.datalake.parquet_manager import refresh_parquet_from_arrow + new_meta, data_changed = refresh_parquet_from_arrow(workspace, table_name, arrow_table) + return jsonify({ + "status": "success", + "message": f"Successfully refreshed table '{table_name}'", + "row_count": new_meta.row_count, + "content_hash": new_meta.content_hash, + "data_changed": data_changed, + }) except Exception as e: logger.error(f"Error refreshing table: {str(e)}") logger.error(traceback.format_exc()) safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + return jsonify({"status": "error", "message": safe_msg}), status_code @tables_bp.route('/data-loader/get-table-metadata', methods=['POST']) def data_loader_get_table_metadata(): - """Get source metadata for a specific table""" + """Get source metadata for a specific table from workspace.""" try: data = request.get_json() table_name = data.get('table_name') - if not table_name: return jsonify({"status": "error", "message": "table_name is required"}), 400 - with db_manager.connection(session['session_id']) as duck_db_conn: - metadata = get_table_metadata(duck_db_conn, table_name) - - if metadata: - return jsonify({ - "status": "success", - "metadata": metadata - }) - else: - return jsonify({ - "status": "success", - "metadata": None, - "message": f"No metadata found for table '{table_name}'" - }) - + workspace = _get_workspace() + meta = workspace.get_table_metadata(table_name) + metadata = _table_metadata_to_source_metadata(meta) if meta else None + return jsonify({ + "status": "success", + "metadata": metadata, + "message": f"No metadata found for table '{table_name}'" if metadata is None else None, + }) except Exception as e: logger.error(f"Error getting table metadata: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code + return jsonify({"status": "error", "message": safe_msg}), status_code @tables_bp.route('/data-loader/list-table-metadata', methods=['GET']) def data_loader_list_table_metadata(): - """Get source metadata for all tables""" + """Get source metadata for all tables in the workspace.""" try: - with db_manager.connection(session['session_id']) as duck_db_conn: - metadata_list = get_all_table_metadata(duck_db_conn) - - return jsonify({ - "status": "success", - "metadata": metadata_list - }) - + workspace = _get_workspace() + metadata_list = [] + for name in workspace.list_tables(): + meta = workspace.get_table_metadata(name) + m = _table_metadata_to_source_metadata(meta) if meta else None + if m: + metadata_list.append(m) + return jsonify({"status": "success", "metadata": metadata_list}) except Exception as e: logger.error(f"Error listing table metadata: {str(e)}") safe_msg, status_code = sanitize_db_error_message(e) - return jsonify({ - "status": "error", - "message": safe_msg - }), status_code \ No newline at end of file + return jsonify({"status": "error", "message": safe_msg}), status_code \ No newline at end of file diff --git a/py-src/data_formulator/workflows/create_vl_plots.py b/py-src/data_formulator/workflows/create_vl_plots.py index 41776fec..74b38006 100644 --- a/py-src/data_formulator/workflows/create_vl_plots.py +++ b/py-src/data_formulator/workflows/create_vl_plots.py @@ -1,6 +1,6 @@ import pandas as pd import numpy as np -from typing import Dict, List, Any, Optional +from typing import Any import vl_convert as vlc import base64 @@ -68,7 +68,7 @@ def detect_field_type(series: pd.Series) -> str: ] -def get_chart_template(chart_type: str) -> Optional[Dict]: +def get_chart_template(chart_type: str) -> dict | None: """ Find a chart template by chart type name. """ @@ -77,7 +77,7 @@ def get_chart_template(chart_type: str) -> Optional[Dict]: return template return None -def create_chart_spec(df: pd.DataFrame, fields: List[str], chart_type: str) -> Dict[str, Dict[str, str]]: +def create_chart_spec(df: pd.DataFrame, fields: list[str], chart_type: str) -> dict[str, dict[str, str]]: """ Assign fields to appropriate visualization channels based on their data types and chart type. """ @@ -85,7 +85,7 @@ def create_chart_spec(df: pd.DataFrame, fields: List[str], chart_type: str) -> D return assemble_vegailte_chart(df, chart_type, encodings) -def fields_to_encodings(df, chart_type: str, fields: List[str]) -> Dict[str, Dict[str, str]]: +def fields_to_encodings(df, chart_type: str, fields: list[str]) -> dict[str, dict[str, str]]: """ Assign fields to appropriate visualization channels based on their data types and chart type. @@ -389,9 +389,9 @@ def assign_faceting_channels(): def assemble_vegailte_chart( df: pd.DataFrame, chart_type: str, - encodings: Dict[str, Dict[str, str]], + encodings: dict[str, dict[str, str]], max_nominal_values: int = 68 -) -> Dict: +) -> dict: """ Assemble a Vega-Lite chart specification from a dataframe, chart type, and encodings. @@ -574,7 +574,7 @@ def _get_top_values(df: pd.DataFrame, field_name: str, unique_values: list, return unique_values[:max_values] -def vl_spec_to_png(spec: Dict, output_path: str = None, scale: float = 1.0) -> bytes: +def vl_spec_to_png(spec: dict, output_path: str | None = None, scale: float = 1.0) -> bytes: """ Convert a Vega-Lite specification to a PNG image. @@ -600,7 +600,7 @@ def vl_spec_to_png(spec: Dict, output_path: str = None, scale: float = 1.0) -> b return png_data -def spec_to_base64(spec: Dict, scale: float = 1.0) -> str: +def spec_to_base64(spec: dict, scale: float = 1.0) -> str: """ Convert a Vega-Lite specification to a base64 encoded PNG string. diff --git a/py-src/data_formulator/workflows/exploration_flow.py b/py-src/data_formulator/workflows/exploration_flow.py index dc241a8b..5feda343 100644 --- a/py-src/data_formulator/workflows/exploration_flow.py +++ b/py-src/data_formulator/workflows/exploration_flow.py @@ -1,26 +1,23 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import json import logging -from this import d import pandas as pd -from typing import Dict, List, Any, Optional, Tuple, Generator +from typing import Any, Generator from data_formulator.agents.agent_exploration import ExplorationAgent -from data_formulator.agents.agent_py_data_rec import PythonDataRecAgent from data_formulator.agents.agent_sql_data_rec import SQLDataRecAgent +from data_formulator.agents.agent_sql_data_transform import create_duckdb_conn_with_parquet_views from data_formulator.agents.client_utils import Client -from data_formulator.db_manager import db_manager +from data_formulator.datalake.workspace import get_workspace, WorkspaceWithTempData from data_formulator.workflows.create_vl_plots import assemble_vegailte_chart, spec_to_base64, detect_field_type -from data_formulator.agents.agent_utils import extract_json_objects logger = logging.getLogger(__name__) def create_chart_spec_from_data( - transformed_data: Dict[str, Any], + transformed_data: dict[str, Any], chart_type: str, - chart_encodings: Dict[str, str] + chart_encodings: dict[str, str] ) -> str: """ Create a chart from transformed data using Vega-Lite. @@ -59,17 +56,16 @@ def create_chart_spec_from_data( return None def run_exploration_flow_streaming( - model_config: Dict[str, str], - input_tables: List[Dict[str, Any]], - initial_plan: List[str], - language: str = "python", - session_id: Optional[str] = None, + model_config: dict[str, str], + input_tables: list[dict[str, Any]], + initial_plan: list[str], + session_id: str | None = None, exec_python_in_subprocess: bool = False, max_iterations: int = 5, max_repair_attempts: int = 1, agent_exploration_rules: str = "", agent_coding_rules: str = "" -) -> Generator[Dict[str, Any], None, None]: +) -> Generator[dict[str, Any], None, None]: """ Run the complete exploration flow from high-level question to final insights as a streaming generator. @@ -77,7 +73,6 @@ def run_exploration_flow_streaming( model_config: Dictionary with endpoint, model, api_key, api_base, api_version input_tables: List of input table dictionaries with 'name' 'rows' and 'attached_metadata' plan: List of steps to continue exploring - language: "python" or "sql" for data transformation session_id: Database session ID for SQL connections exec_python_in_subprocess: Whether to execute Python in subprocess max_iterations: Maximum number of exploration iterations @@ -102,232 +97,224 @@ def run_exploration_flow_streaming( # Initialize client and agents client = Client.from_config(model_config) - if language == "sql": - if session_id: - db_conn = db_manager.get_connection(session_id) - else: - yield { - "iteration": iteration, - "type": "data_transformation", - "content": {}, - "status": "error", - "error_message": "Session ID required for SQL transformations" - } - return - else: - db_conn = None - - # This is the exploration agent that revises the exploration plan - exploration_agent = ExplorationAgent(client, db_conn=db_conn, agent_exploration_rules=agent_exploration_rules) - - # rec agent for data transformation - if language == "sql": - rec_agent = SQLDataRecAgent(client=client, conn=db_conn, agent_coding_rules=agent_coding_rules) - else: - rec_agent = PythonDataRecAgent( - client=client, - exec_python_in_subprocess=exec_python_in_subprocess, - agent_coding_rules=agent_coding_rules - ) + if not session_id: + yield { + "iteration": iteration, + "type": "data_transformation", + "content": {}, + "status": "error", + "error_message": "Session ID required for exploration" + } + return - completed_steps = [] - current_question = initial_plan[0] if len(initial_plan) > 0 else "Let's explore something interesting." - current_plan = initial_plan[1:] + workspace = get_workspace(session_id) - # Collect exploration plans at each step - exploration_plan_list = [] - - # Track initial plan if provided - if len(initial_plan) > 1: - exploration_plan_list.append({ - "ref_tables": [{"name": table['name'], "rows": table['rows'][:5] if 'rows' in table else []} for table in input_tables], - "plan": initial_plan[1:] - }) + # Determine temp tables by checking which input tables don't exist in the workspace + existing_tables = set(workspace.list_tables()) + temp_data = [table for table in input_tables if table.get('name') not in existing_tables] + + with WorkspaceWithTempData(workspace, temp_data) as workspace: + db_conn = create_duckdb_conn_with_parquet_views(workspace, input_tables) + exploration_agent = ExplorationAgent(client, db_conn=db_conn, agent_exploration_rules=agent_exploration_rules) + rec_agent = SQLDataRecAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules) - # Main exploration loop - while iteration < max_iterations + 1: - iteration += 1 + completed_steps = [] + current_question = initial_plan[0] if len(initial_plan) > 0 else "Let's explore something interesting." + current_plan = initial_plan[1:] - # Step 1: Use rec agent to transform data based on current question - logger.info(f"Iteration {iteration}: Using rec agent for question: {current_question}") + # Collect exploration plans at each step + exploration_plan_list = [] - attempt = 0 - if previous_transformation_dialog: + # Track initial plan if provided + if len(initial_plan) > 1: + exploration_plan_list.append({ + "ref_tables": [{"name": table['name'], "rows": table['rows'][:5] if 'rows' in table else []} for table in input_tables], + "plan": initial_plan[1:] + }) - if isinstance(previous_transformation_data, dict) and 'rows' in previous_transformation_data: - latest_data_sample = previous_transformation_data['rows'] - else: - latest_data_sample = [] # Use empty list as fallback + # Main exploration loop + while iteration < max_iterations + 1: + iteration += 1 - transformation_results = rec_agent.followup( - input_tables=input_tables, - new_instruction=current_question, - latest_data_sample=latest_data_sample, - dialog=previous_transformation_dialog - ) - else: - transformation_results = rec_agent.run( - input_tables=input_tables, - description=current_question - ) + # Step 1: Use rec agent to transform data based on current question + logger.info(f"Iteration {iteration}: Using rec agent for question: {current_question}") + + attempt = 0 + if previous_transformation_dialog: + + if isinstance(previous_transformation_data, dict) and 'rows' in previous_transformation_data: + latest_data_sample = previous_transformation_data['rows'] + else: + latest_data_sample = [] # Use empty list as fallback + + transformation_results = rec_agent.followup( + input_tables=input_tables, + new_instruction=current_question, + latest_data_sample=latest_data_sample, + dialog=previous_transformation_dialog + ) + else: + transformation_results = rec_agent.run( + input_tables=input_tables, + description=current_question + ) - # give one attempt to fix potential errors - while (not transformation_results or transformation_results[0]['status'] != 'ok'): + # give one attempt to fix potential errors + while (not transformation_results or transformation_results[0]['status'] != 'ok'): - if attempt >= max_repair_attempts or not transformation_results: + if attempt >= max_repair_attempts or not transformation_results: + yield { + "iteration": iteration, + "type": "data_transformation", + "content": {"question": current_question}, + "status": "error", + "error_message": "data transformation failed" + } + break + + attempt += 1 + error_msg = transformation_results[0]['content'] + dialog = transformation_results[0]['dialog'] + + new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_msg}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur." + transformation_results = rec_agent.followup( + input_tables=input_tables, + new_instruction=new_instruction, + latest_data_sample=[], + dialog=dialog + ) + + # if the transformation results is not ok, yield an error and break + if transformation_results[0]['status'] != 'ok': yield { "iteration": iteration, "type": "data_transformation", - "content": {"question": current_question}, + "content": {}, "status": "error", - "error_message": "data transformation failed" + "error_message": transformation_results[0]['content'] } break - attempt += 1 - error_msg = transformation_results[0]['content'] - dialog = transformation_results[0]['dialog'] - - new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_msg}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur." - transformation_results = rec_agent.followup( - input_tables=input_tables, - new_instruction=new_instruction, - latest_data_sample=[], - dialog=dialog - ) + # Extract transformation result + transform_result = transformation_results[0] + transformed_data = transform_result['content'] + refined_goal = transform_result.get('refined_goal', {}) + code = transform_result.get('code', '') + previous_transformation_dialog = transform_result.get('dialog', []) + previous_transformation_data = transformed_data - # if the transformation results is not ok, yield an error and break - if transformation_results[0]['status'] != 'ok': yield { "iteration": iteration, "type": "data_transformation", - "content": {}, - "status": "error", - "error_message": transformation_results[0]['content'] + "content": { + "question": current_question, + "result": transform_result + }, + "status": "success", + "error_message": "" } - break + + # Step 2: Create visualization to help generate followup question + chart_type = refined_goal.get('chart_type', 'bar') + chart_encodings = refined_goal.get('chart_encodings', {}) + + chart_spec = create_chart_spec_from_data( + transformed_data, + chart_type, + chart_encodings + ) + current_visualization = spec_to_base64(chart_spec) if chart_spec else None + + # Store this step for exploration analysis + step_data = { + 'question': current_question, + 'code': code, + 'data': {"rows": transformed_data['rows'], "name": transformed_data['virtual']['table_name'] if 'virtual' in transformed_data else None }, + 'visualization': current_visualization + } + completed_steps.append(step_data) - # Extract transformation result - transform_result = transformation_results[0] - transformed_data = transform_result['content'] - refined_goal = transform_result.get('refined_goal', {}) - code = transform_result.get('code', '') - previous_transformation_dialog = transform_result.get('dialog', []) - previous_transformation_data = transformed_data + # Step 3: Use exploration agent to analyze results and decide next step + logger.info(f"Iteration {iteration}: Using exploration agent to decide next step") + + followup_results = exploration_agent.suggest_followup( + input_tables=input_tables, + completed_steps=completed_steps, + next_steps=current_plan + ) - yield { - "iteration": iteration, - "type": "data_transformation", - "content": { - "question": current_question, - "result": transform_result - }, - "status": "success", - "error_message": "" - } - - # Step 2: Create visualization to help generate followup question - chart_type = refined_goal.get('chart_type', 'bar') - chart_encodings = refined_goal.get('chart_encodings', {}) - - chart_spec = create_chart_spec_from_data( - transformed_data, - chart_type, - chart_encodings - ) - current_visualization = spec_to_base64(chart_spec) if chart_spec else None - - # Store this step for exploration analysis - step_data = { - 'question': current_question, - 'code': code, - 'data': {"rows": transformed_data['rows'], "name": transformed_data['virtual']['table_name'] if 'virtual' in transformed_data else None }, - 'visualization': current_visualization - } - completed_steps.append(step_data) + if not followup_results or followup_results[0]['status'] != 'ok': + error_msg = followup_results[0]['content'] if followup_results else "Follow-up planning failed" + yield { + "iteration": iteration, + "type": "planning", + "content": {}, + "status": "error", + "error_message": error_msg + } + break + + # Extract follow-up decision + followup_plan = followup_results[0]['content'] + + # Check if exploration agent decides to present findings + if followup_plan.get('status') in ['present', 'warning']: + yield { + "iteration": iteration, + "type": "completion", + "content": { + "message": followup_plan.get('summary', ''), + "total_steps": len(completed_steps), + "exploration_plan_list": exploration_plan_list + }, + "status": "success" if followup_plan.get('status') == 'present' else "warning", + "error_message": "" + } + break - # Step 3: Use exploration agent to analyze results and decide next step - logger.info(f"Iteration {iteration}: Using exploration agent to decide next step") - - followup_results = exploration_agent.suggest_followup( - input_tables=input_tables, - completed_steps=completed_steps, - next_steps=current_plan - ) + current_plan = followup_plan.get('next_steps', []) + current_question = current_plan.pop(0) + + # Collect updated plan from exploration agent + # Get table from last completed step (this is the table used for generating the new plan) + if completed_steps: + last_step_data = completed_steps[-1]['data'] + last_step_table = [{ + "name": last_step_data.get('name'), + "rows": last_step_data.get('rows', [])[:5] + }] + else: + last_step_table = [{"name": table['name'], "rows": table['rows'][:5] if 'rows' in table else []} for table in input_tables] + + exploration_plan_list.append({ + "ref_tables": last_step_table, + "plan": current_plan.copy() + }) - if not followup_results or followup_results[0]['status'] != 'ok': - error_msg = followup_results[0]['content'] if followup_results else "Follow-up planning failed" yield { "iteration": iteration, "type": "planning", - "content": {}, - "status": "error", - "error_message": error_msg + "content": { + "message": current_question, + "exploration_steps_count": len(completed_steps) + }, + "status": "success", + "error_message": "" } - break - # Extract follow-up decision - followup_plan = followup_results[0]['content'] + # Clean up connection + db_conn.close() - # Check if exploration agent decides to present findings - if followup_plan.get('status') in ['present', 'warning']: + # If we hit max iterations without presenting + if iteration >= max_iterations: yield { "iteration": iteration, "type": "completion", "content": { - "message": followup_plan.get('summary', ''), "total_steps": len(completed_steps), + "reason": "Reached maximum iterations", "exploration_plan_list": exploration_plan_list }, - "status": "success" if followup_plan.get('status') == 'present' else "warning", - "error_message": "" - } - break - - current_plan = followup_plan.get('next_steps', []) - current_question = current_plan.pop(0) - - # Collect updated plan from exploration agent - # Get table from last completed step (this is the table used for generating the new plan) - if completed_steps: - last_step_data = completed_steps[-1]['data'] - last_step_table = [{ - "name": last_step_data.get('name'), - "rows": last_step_data.get('rows', [])[:5] - }] - else: - last_step_table = [{"name": table['name'], "rows": table['rows'][:5] if 'rows' in table else []} for table in input_tables] - - exploration_plan_list.append({ - "ref_tables": last_step_table, - "plan": current_plan.copy() - }) - - yield { - "iteration": iteration, - "type": "planning", - "content": { - "message": current_question, - "exploration_steps_count": len(completed_steps) - }, - "status": "success", - "error_message": "" - } - - # Clean up connection if used - if db_conn: - db_conn.close() - - # If we hit max iterations without presenting - if iteration >= max_iterations: - yield { - "iteration": iteration, - "type": "completion", - "content": { - "total_steps": len(completed_steps), - "reason": "Reached maximum iterations", - "exploration_plan_list": exploration_plan_list - }, - "status": "success", - "error_message": "Reached maximum iterations" - } \ No newline at end of file + "status": "success", + "error_message": "Reached maximum iterations" + } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 49ed802a..34599461 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" name = "data_formulator" version = "0.6" -requires-python = ">=3.9" +requires-python = ">=3.11" authors = [ {name = "Chenglong Wang", email = "chenglong.wang@microsoft.com"}, {name = "Dan Marshall", email = "danmar@microsoft.com"}, @@ -20,23 +20,22 @@ classifiers = [ "Programming Language :: Python" ] -dependencies = [ - "jupyter", - "pandas", - "flask", - "flask-cors", +dependencies = [ + "jupyter", + "pandas", + "flask", + "flask-cors", "flask-limiter", - "openai", - "python-dotenv", + "openai", + "python-dotenv", "vega_datasets", "litellm", "duckdb", - "numpy", - "vl-convert-python", + "numpy", + "vl-convert-python", "backoff", "beautifulsoup4", "scikit-learn", - "azure-identity", "azure-kusto-data", "azure-keyvault-secrets", @@ -48,7 +47,9 @@ dependencies = [ "pymysql", "pyodbc", "pymongo", - "yfinance" + "yfinance", + "connectorx>=0.4.5", + "pyarrow>=23.0.0", ] [project.urls] @@ -62,3 +63,8 @@ include-package-data = true [project.scripts] data_formulator = "data_formulator:run_app" + +[tool.uv] +dev-dependencies = [ + "build", +] diff --git a/requirements.txt b/requirements.txt index 0fe4db15..07f4b553 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,33 +1,3021 @@ -# Core dependencies (always required) -jupyter -pandas -numpy -flask -flask-cors -flask-limiter -openai -python-dotenv -vega_datasets -litellm -duckdb -vl-convert-python -backoff -beautifulsoup4 -scikit-learn -yfinance # for demo stream routes - -# External data loaders (Azure, BigQuery, AWS S3, MySQL, MSSQL) -azure-identity -azure-kusto-data -azure-keyvault-secrets -azure-storage-blob -google-cloud-bigquery -google-auth -db-dtypes -boto3 -pymysql -pyodbc -pymongo - -# Install data_formulator itself in editable mode --e . \ No newline at end of file +# This file was autogenerated by uv via the following command: +# uv export --format requirements-txt --output-file requirements.txt +-e . +aiohappyeyeballs==2.6.1 \ + --hash=sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558 \ + --hash=sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8 + # via aiohttp +aiohttp==3.13.3 \ + --hash=sha256:01ad2529d4b5035578f5081606a465f3b814c542882804e2e8cda61adf5c71bf \ + --hash=sha256:042e9e0bcb5fba81886c8b4fbb9a09d6b8a00245fd8d88e4d989c1f96c74164c \ + --hash=sha256:05861afbbec40650d8a07ea324367cb93e9e8cc7762e04dd4405df99fa65159c \ + --hash=sha256:084911a532763e9d3dd95adf78a78f4096cd5f58cdc18e6fdbc1b58417a45423 \ + --hash=sha256:0add0900ff220d1d5c5ebbf99ed88b0c1bbf87aa7e4262300ed1376a6b13414f \ + --hash=sha256:10b47b7ba335d2e9b1239fa571131a87e2d8ec96b333e68b2a305e7a98b0bae2 \ + --hash=sha256:1449ceddcdbcf2e0446957863af03ebaaa03f94c090f945411b61269e2cb5daf \ + --hash=sha256:1cb93e166e6c28716c8c6aeb5f99dfb6d5ccf482d29fe9bf9a794110e6d0ab64 \ + --hash=sha256:2712039939ec963c237286113c68dbad80a82a4281543f3abf766d9d73228998 \ + --hash=sha256:27234ef6d85c914f9efeb77ff616dbf4ad2380be0cda40b4db086ffc7ddd1b7d \ + --hash=sha256:28e027cf2f6b641693a09f631759b4d9ce9165099d2b5d92af9bd4e197690eea \ + --hash=sha256:2b8d8ddba8f95ba17582226f80e2de99c7a7948e66490ef8d947e272a93e9463 \ + --hash=sha256:2be0e9ccf23e8a94f6f0650ce06042cefc6ac703d0d7ab6c7a917289f2539ad4 \ + --hash=sha256:2e41b18a58da1e474a057b3d35248d8320029f61d70a37629535b16a0c8f3767 \ + --hash=sha256:2eb752b102b12a76ca02dff751a801f028b4ffbbc478840b473597fc91a9ed43 \ + --hash=sha256:2fc82186fadc4a8316768d61f3722c230e2c1dcab4200d52d2ebdf2482e47592 \ + --hash=sha256:2fff83cfc93f18f215896e3a190e8e5cb413ce01553901aca925176e7568963a \ + --hash=sha256:34749271508078b261c4abb1767d42b8d0c0cc9449c73a4df494777dc55f0687 \ + --hash=sha256:34bac00a67a812570d4a460447e1e9e06fae622946955f939051e7cc895cfab8 \ + --hash=sha256:37239e9f9a7ea9ac5bf6b92b0260b01f8a22281996da609206a84df860bc1261 \ + --hash=sha256:3b61b7169ababd7802f9568ed96142616a9118dd2be0d1866e920e77ec8fa92a \ + --hash=sha256:3d9908a48eb7416dc1f4524e69f1d32e5d90e3981e4e37eb0aa1cd18f9cfa2a4 \ + --hash=sha256:3dd4dce1c718e38081c8f35f323209d4c1df7d4db4bab1b5c88a6b4d12b74587 \ + --hash=sha256:4021b51936308aeea0367b8f006dc999ca02bc118a0cc78c303f50a2ff6afb91 \ + --hash=sha256:425c126c0dc43861e22cb1c14ba4c8e45d09516d0a3ae0a3f7494b79f5f233a3 \ + --hash=sha256:44531a36aa2264a1860089ffd4dce7baf875ee5a6079d5fb42e261c704ef7344 \ + --hash=sha256:48e377758516d262bde50c2584fc6c578af272559c409eecbdd2bae1601184d6 \ + --hash=sha256:49a03727c1bba9a97d3e93c9f93ca03a57300f484b6e935463099841261195d3 \ + --hash=sha256:568f416a4072fbfae453dcf9a99194bbb8bdeab718e08ee13dfa2ba0e4bebf29 \ + --hash=sha256:5b179331a481cb5529fca8b432d8d3c7001cb217513c94cd72d668d1248688a3 \ + --hash=sha256:5b6073099fb654e0a068ae678b10feff95c5cae95bbfcbfa7af669d361a8aa6b \ + --hash=sha256:5d2d94f1f5fcbe40838ac51a6ab5704a6f9ea42e72ceda48de5e6b898521da51 \ + --hash=sha256:5dff64413671b0d3e7d5918ea490bdccb97a4ad29b3f311ed423200b2203e01c \ + --hash=sha256:5e1d8c8b8f1d91cd08d8f4a3c2b067bfca6ec043d3ff36de0f3a715feeedf926 \ + --hash=sha256:5f8ca7f2bb6ba8348a3614c7918cc4bb73268c5ac2a207576b7afea19d3d9f64 \ + --hash=sha256:642f752c3eb117b105acbd87e2c143de710987e09860d674e068c4c2c441034f \ + --hash=sha256:65d2ccb7eabee90ce0503c17716fc77226be026dcc3e65cce859a30db715025b \ + --hash=sha256:693781c45a4033d31d4187d2436f5ac701e7bbfe5df40d917736108c1cc7436e \ + --hash=sha256:694976222c711d1d00ba131904beb60534f93966562f64440d0c9d41b8cdb440 \ + --hash=sha256:697753042d57f4bf7122cab985bf15d0cef23c770864580f5af4f52023a56bd6 \ + --hash=sha256:6de499a1a44e7de70735d0b39f67c8f25eb3d91eb3103be99ca0fa882cdd987d \ + --hash=sha256:6fc0e2337d1a4c3e6acafda6a78a39d4c14caea625124817420abceed36e2415 \ + --hash=sha256:7a4a94eb787e606d0a09404b9c38c113d3b099d508021faa615d70a0131907ce \ + --hash=sha256:7b5e8fe4de30df199155baaf64f2fcd604f4c678ed20910db8e2c66dc4b11603 \ + --hash=sha256:7bfdc049127717581866fa4708791220970ce291c23e28ccf3922c700740fdc0 \ + --hash=sha256:7f9120f7093c2a32d9647abcaf21e6ad275b4fbec5b55969f978b1a97c7c86bf \ + --hash=sha256:8057c98e0c8472d8846b9c79f56766bcc57e3e8ac7bfd510482332366c56c591 \ + --hash=sha256:80dd4c21b0f6237676449c6baaa1039abae86b91636b6c91a7f8e61c87f89540 \ + --hash=sha256:82611aeec80eb144416956ec85b6ca45a64d76429c1ed46ae1b5f86c6e0c9a26 \ + --hash=sha256:8542f41a62bcc58fc7f11cf7c90e0ec324ce44950003feb70640fc2a9092c32a \ + --hash=sha256:87797e645d9d8e222e04160ee32aa06bc5c163e8499f24db719e7852ec23093a \ + --hash=sha256:87b9aab6d6ed88235aa2970294f496ff1a1f9adcd724d800e9b952395a80ffd9 \ + --hash=sha256:90455115e5da1c3c51ab619ac57f877da8fd6d73c05aacd125c5ae9819582aba \ + --hash=sha256:90751b8eed69435bac9ff4e3d2f6b3af1f57e37ecb0fbeee59c0174c9e2d41df \ + --hash=sha256:96d604498a7c782cb15a51c406acaea70d8c027ee6b90c569baa6e7b93073679 \ + --hash=sha256:9ae8dd55c8e6c4257eae3a20fd2c8f41edaea5992ed67156642493b8daf3cecc \ + --hash=sha256:9af5e68ee47d6534d36791bbe9b646d2a7c7deb6fc24d7943628edfbb3581f29 \ + --hash=sha256:9bf9f7a65e7aa20dd764151fb3d616c81088f91f8df39c3893a536e279b4b984 \ + --hash=sha256:9d4c940f02f49483b18b079d1c27ab948721852b281f8b015c058100e9421dd1 \ + --hash=sha256:a19884d2ee70b06d9204b2727a7b9f983d0c684c650254679e716b0b77920632 \ + --hash=sha256:a1e53262fd202e4b40b70c3aff944a8155059beedc8a89bba9dc1f9ef06a1b56 \ + --hash=sha256:a2212ad43c0833a873d0fb3c63fa1bacedd4cf6af2fee62bf4b739ceec3ab239 \ + --hash=sha256:a45530014d7a1e09f4a55f4f43097ba0fd155089372e105e4bff4ca76cb1b168 \ + --hash=sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88 \ + --hash=sha256:add1da70de90a2569c5e15249ff76a631ccacfe198375eead4aadf3b8dc849dc \ + --hash=sha256:b04be762396457bef43f3597c991e192ee7da460a4953d7e647ee4b1c28e7046 \ + --hash=sha256:b0d95340658b9d2f11d9697f59b3814a9d3bb4b7a7c20b131df4bcef464037c0 \ + --hash=sha256:b1a6102b4d3ebc07dad44fbf07b45bb600300f15b552ddf1851b5390202ea2e3 \ + --hash=sha256:b556c85915d8efaed322bf1bdae9486aa0f3f764195a0fb6ee962e5c71ef5ce1 \ + --hash=sha256:b903a4dfee7d347e2d87697d0713be59e0b87925be030c9178c5faa58ea58d5c \ + --hash=sha256:b928f30fe49574253644b1ca44b1b8adbd903aa0da4b9054a6c20fc7f4092a25 \ + --hash=sha256:bb4f7475e359992b580559e008c598091c45b5088f28614e855e42d39c2f1033 \ + --hash=sha256:bbe7d4cecacb439e2e2a8a1a7b935c25b812af7a5fd26503a66dadf428e79ec1 \ + --hash=sha256:c014c7ea7fb775dd015b2d3137378b7be0249a448a1612268b5a90c2d81de04d \ + --hash=sha256:c19b90316ad3b24c69cd78d5c9b4f3aa4497643685901185b65166293d36a00f \ + --hash=sha256:c685f2d80bb67ca8c3837823ad76196b3694b0159d232206d1e461d3d434666f \ + --hash=sha256:d32764c6c9aafb7fb55366a224756387cd50bfa720f32b88e0e6fa45b27dcf29 \ + --hash=sha256:d60ac9663f44168038586cab2157e122e46bdef09e9368b37f2d82d354c23f72 \ + --hash=sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57 \ + --hash=sha256:e636b3c5f61da31a92bf0d91da83e58fdfa96f178ba682f11d24f31944cdd28c \ + --hash=sha256:ea37047c6b367fd4bd632bff8077449b8fa034b69e812a18e0132a00fae6e808 \ + --hash=sha256:f33ed1a2bf1997a36661874b017f5c4b760f41266341af36febaf271d179f6d7 \ + --hash=sha256:f76c1e3fe7d7c8afad7ed193f89a292e1999608170dcc9751a7462a87dfd5bc0 \ + --hash=sha256:f9444f105664c4ce47a2a7171a2418bce5b7bae45fb610f4e2c36045d85911d3 \ + --hash=sha256:fc290605db2a917f6e81b0e1e0796469871f5af381ce15c604a3c5c7e51cb730 \ + --hash=sha256:fc353029f176fd2b3ec6cfc71be166aba1936fe5d73dd1992ce289ca6647a9aa + # via litellm +aiosignal==1.4.0 \ + --hash=sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e \ + --hash=sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7 + # via aiohttp +annotated-types==0.7.0 \ + --hash=sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53 \ + --hash=sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89 + # via pydantic +anyio==4.12.1 \ + --hash=sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703 \ + --hash=sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c + # via + # httpx + # jupyter-server + # openai +appnope==0.1.4 ; sys_platform == 'darwin' \ + --hash=sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee \ + --hash=sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c + # via ipykernel +argon2-cffi==25.1.0 \ + --hash=sha256:694ae5cc8a42f4c4e2bf2ca0e64e51e23a040c6a517a85074683d3959e1346c1 \ + --hash=sha256:fdc8b074db390fccb6eb4a3604ae7231f219aa669a2652e0f20e16ba513d5741 + # via jupyter-server +argon2-cffi-bindings==25.1.0 \ + --hash=sha256:1db89609c06afa1a214a69a462ea741cf735b29a57530478c06eb81dd403de99 \ + --hash=sha256:1e021e87faa76ae0d413b619fe2b65ab9a037f24c60a1e6cc43457ae20de6dc6 \ + --hash=sha256:2630b6240b495dfab90aebe159ff784d08ea999aa4b0d17efa734055a07d2f44 \ + --hash=sha256:3c6702abc36bf3ccba3f802b799505def420a1b7039862014a65db3205967f5a \ + --hash=sha256:3d3f05610594151994ca9ccb3c771115bdb4daef161976a266f0dd8aa9996b8f \ + --hash=sha256:473bcb5f82924b1becbb637b63303ec8d10e84c8d241119419897a26116515d2 \ + --hash=sha256:7aef0c91e2c0fbca6fc68e7555aa60ef7008a739cbe045541e438373bc54d2b0 \ + --hash=sha256:84a461d4d84ae1295871329b346a97f68eade8c53b6ed9a7ca2d7467f3c8ff6f \ + --hash=sha256:87c33a52407e4c41f3b70a9c2d3f6056d88b10dad7695be708c5021673f55623 \ + --hash=sha256:8b8efee945193e667a396cbc7b4fb7d357297d6234d30a489905d96caabde56b \ + --hash=sha256:a1c70058c6ab1e352304ac7e3b52554daadacd8d453c1752e547c76e9c99ac44 \ + --hash=sha256:a98cd7d17e9f7ce244c0803cad3c23a7d379c301ba618a5fa76a67d116618b98 \ + --hash=sha256:aecba1723ae35330a008418a91ea6cfcedf6d31e5fbaa056a166462ff066d500 \ + --hash=sha256:b0fdbcf513833809c882823f98dc2f931cf659d9a1429616ac3adebb49f5db94 \ + --hash=sha256:b55aec3565b65f56455eebc9b9f34130440404f27fe21c3b375bf1ea4d8fbae6 \ + --hash=sha256:b957f3e6ea4d55d820e40ff76f450952807013d361a65d7f28acc0acbf29229d \ + --hash=sha256:ba92837e4a9aa6a508c8d2d7883ed5a8f6c308c89a4790e1e447a220deb79a85 \ + --hash=sha256:c4f9665de60b1b0e99bcd6be4f17d90339698ce954cfd8d9cf4f91c995165a92 \ + --hash=sha256:c87b72589133f0346a1cb8d5ecca4b933e3c9b64656c9d175270a000e73b288d \ + --hash=sha256:d3e924cfc503018a714f94a49a149fdc0b644eaead5d1f089330399134fa028a \ + --hash=sha256:e2fd3bfbff3c5d74fef31a722f729bf93500910db650c925c2d6ef879a7e51cb + # via argon2-cffi +arrow==1.4.0 \ + --hash=sha256:749f0769958ebdc79c173ff0b0670d59051a535fa26e8eba02953dc19eb43205 \ + --hash=sha256:ed0cc050e98001b8779e84d461b0098c4ac597e88704a655582b21d116e526d7 + # via isoduration +asttokens==3.0.1 \ + --hash=sha256:15a3ebc0f43c2d0a50eeafea25e19046c68398e487b9f1f5b517f7c0f40f976a \ + --hash=sha256:71a4ee5de0bde6a31d64f6b13f2293ac190344478f081c3d1bccfcf5eacb0cb7 + # via stack-data +async-lru==2.1.0 \ + --hash=sha256:9eeb2fecd3fe42cc8a787fc32ead53a3a7158cc43d039c3c55ab3e4e5b2a80ed \ + --hash=sha256:fa12dcf99a42ac1280bc16c634bbaf06883809790f6304d85cdab3f666f33a7e + # via jupyterlab +attrs==25.4.0 \ + --hash=sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11 \ + --hash=sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373 + # via + # aiohttp + # jsonschema + # referencing +azure-core==1.38.0 \ + --hash=sha256:8194d2682245a3e4e3151a667c686464c3786fed7918b394d035bdcd61bb5993 \ + --hash=sha256:ab0c9b2cd71fecb1842d52c965c95285d3cfb38902f6766e4a471f1cd8905335 + # via + # azure-identity + # azure-keyvault-secrets + # azure-kusto-data + # azure-storage-blob +azure-identity==1.25.1 \ + --hash=sha256:87ca8328883de6036443e1c37b40e8dc8fb74898240f61071e09d2e369361456 \ + --hash=sha256:e9edd720af03dff020223cd269fa3a61e8f345ea75443858273bcb44844ab651 + # via + # azure-kusto-data + # data-formulator +azure-keyvault-secrets==4.10.0 \ + --hash=sha256:666fa42892f9cee749563e551a90f060435ab878977c95265173a8246d546a36 \ + --hash=sha256:9dbde256077a4ee1a847646671580692e3f9bea36bcfc189c3cf2b9a94eb38b9 + # via data-formulator +azure-kusto-data==6.0.1 \ + --hash=sha256:1d5e04d273376330b58d6d11b055aeadda748cd1ecee1117fdc1b8329e76cb75 \ + --hash=sha256:8d4e7adbe122ea08d5f0053ec37f294171ff734e8d77f36983a29694485bc347 + # via data-formulator +azure-storage-blob==12.28.0 \ + --hash=sha256:00fb1db28bf6a7b7ecaa48e3b1d5c83bfadacc5a678b77826081304bd87d6461 \ + --hash=sha256:e7d98ea108258d29aa0efbfd591b2e2075fa1722a2fae8699f0b3c9de11eff41 + # via data-formulator +babel==2.18.0 \ + --hash=sha256:b80b99a14bd085fcacfa15c9165f651fbb3406e66cc603abf11c5750937c992d \ + --hash=sha256:e2b422b277c2b9a9630c1d7903c2a00d0830c409c59ac8cae9081c92f1aeba35 + # via jupyterlab-server +backoff==2.2.1 \ + --hash=sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba \ + --hash=sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8 + # via data-formulator +beautifulsoup4==4.14.3 \ + --hash=sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb \ + --hash=sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86 + # via + # data-formulator + # nbconvert + # yfinance +bleach==6.3.0 \ + --hash=sha256:6f3b91b1c0a02bb9a78b5a454c92506aa0fdf197e1d5e114d2e00c6f64306d22 \ + --hash=sha256:fe10ec77c93ddf3d13a73b035abaac7a9f5e436513864ccdad516693213c65d6 + # via nbconvert +blinker==1.9.0 \ + --hash=sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf \ + --hash=sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc + # via flask +boto3==1.42.39 \ + --hash=sha256:d03f82363314759eff7f84a27b9e6428125f89d8119e4588e8c2c1d79892c956 \ + --hash=sha256:d9d6ce11df309707b490d2f5f785b761cfddfd6d1f665385b78c9d8ed097184b + # via data-formulator +botocore==1.42.39 \ + --hash=sha256:0f00355050821e91a5fe6d932f7bf220f337249b752899e3e4cf6ed54326249e \ + --hash=sha256:9e0d0fed9226449cc26fcf2bbffc0392ac698dd8378e8395ce54f3ec13f81d58 + # via + # boto3 + # s3transfer +build==1.4.0 \ + --hash=sha256:6a07c1b8eb6f2b311b96fcbdbce5dab5fe637ffda0fd83c9cac622e927501596 \ + --hash=sha256:f1b91b925aa322be454f8330c6fb48b465da993d1e7e7e6fa35027ec49f3c936 +certifi==2026.1.4 \ + --hash=sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c \ + --hash=sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120 + # via + # curl-cffi + # httpcore + # httpx + # requests +cffi==2.0.0 \ + --hash=sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb \ + --hash=sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b \ + --hash=sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f \ + --hash=sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9 \ + --hash=sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c \ + --hash=sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75 \ + --hash=sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e \ + --hash=sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e \ + --hash=sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25 \ + --hash=sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe \ + --hash=sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b \ + --hash=sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91 \ + --hash=sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592 \ + --hash=sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187 \ + --hash=sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c \ + --hash=sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1 \ + --hash=sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94 \ + --hash=sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba \ + --hash=sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529 \ + --hash=sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca \ + --hash=sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6 \ + --hash=sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743 \ + --hash=sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5 \ + --hash=sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5 \ + --hash=sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4 \ + --hash=sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d \ + --hash=sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b \ + --hash=sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93 \ + --hash=sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205 \ + --hash=sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27 \ + --hash=sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512 \ + --hash=sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d \ + --hash=sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c \ + --hash=sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037 \ + --hash=sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26 \ + --hash=sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c \ + --hash=sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8 \ + --hash=sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414 \ + --hash=sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9 \ + --hash=sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664 \ + --hash=sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9 \ + --hash=sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775 \ + --hash=sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc \ + --hash=sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062 \ + --hash=sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe \ + --hash=sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92 \ + --hash=sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5 \ + --hash=sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13 \ + --hash=sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d \ + --hash=sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26 \ + --hash=sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b \ + --hash=sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6 \ + --hash=sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c \ + --hash=sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef \ + --hash=sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5 \ + --hash=sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18 \ + --hash=sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad \ + --hash=sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3 \ + --hash=sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2 \ + --hash=sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5 + # via + # argon2-cffi-bindings + # cryptography + # curl-cffi + # pyzmq +charset-normalizer==3.4.4 \ + --hash=sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394 \ + --hash=sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89 \ + --hash=sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86 \ + --hash=sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f \ + --hash=sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8 \ + --hash=sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161 \ + --hash=sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152 \ + --hash=sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72 \ + --hash=sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4 \ + --hash=sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e \ + --hash=sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3 \ + --hash=sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c \ + --hash=sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2 \ + --hash=sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44 \ + --hash=sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26 \ + --hash=sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016 \ + --hash=sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede \ + --hash=sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf \ + --hash=sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc \ + --hash=sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0 \ + --hash=sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1 \ + --hash=sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed \ + --hash=sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8 \ + --hash=sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133 \ + --hash=sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e \ + --hash=sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef \ + --hash=sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14 \ + --hash=sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0 \ + --hash=sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828 \ + --hash=sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f \ + --hash=sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328 \ + --hash=sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090 \ + --hash=sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381 \ + --hash=sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c \ + --hash=sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb \ + --hash=sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc \ + --hash=sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a \ + --hash=sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec \ + --hash=sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc \ + --hash=sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac \ + --hash=sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569 \ + --hash=sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3 \ + --hash=sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525 \ + --hash=sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894 \ + --hash=sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a \ + --hash=sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9 \ + --hash=sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14 \ + --hash=sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25 \ + --hash=sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1 \ + --hash=sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3 \ + --hash=sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e \ + --hash=sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815 \ + --hash=sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6 \ + --hash=sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15 \ + --hash=sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191 \ + --hash=sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0 \ + --hash=sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897 \ + --hash=sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd \ + --hash=sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2 \ + --hash=sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794 \ + --hash=sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224 \ + --hash=sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838 \ + --hash=sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a \ + --hash=sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d \ + --hash=sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490 \ + --hash=sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9 + # via requests +click==8.3.1 \ + --hash=sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a \ + --hash=sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6 + # via + # flask + # litellm + # typer-slim +colorama==0.4.6 ; os_name == 'nt' or sys_platform == 'win32' \ + --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ + --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 + # via + # build + # click + # ipython + # tqdm +comm==0.2.3 \ + --hash=sha256:2dc8048c10962d55d7ad693be1e7045d891b7ce8d999c97963a5e3e99c055971 \ + --hash=sha256:c615d91d75f7f04f095b30d1c1711babd43bdc6419c1be9886a85f2f4e489417 + # via + # ipykernel + # ipywidgets +connectorx==0.4.5 \ + --hash=sha256:0737254429e22e5012e1fe6a849112da38abb9b56743b3b8c8a1f902e5270e75 \ + --hash=sha256:0ea5feccc2fb3471fa72c1d920bb4ed17ba1b18aedb89dee5ee6009138e35260 \ + --hash=sha256:2073970532a8e6e2a8a2c0b163497eb8e58216e28fdab6693fcd7e58bfc47bfc \ + --hash=sha256:234af0b6ab4a12b64e3818ebea1eb98cc8b47650280fb40924b43e2f1611acb4 \ + --hash=sha256:25efda2317f40e6536582c3dd4f57a8a31c7e5969d708a674272c05591e6f5a2 \ + --hash=sha256:27539e03408705f318572b163c419572a114fdc9baf4d1e6cd746bb87f573cf2 \ + --hash=sha256:31a65ff4ec8fde7ea7aa2812f2b21e7a512a3216b1b22ca1b02d3975b0bf1e75 \ + --hash=sha256:3863bc71677d6314b60cb1e1489a650114d37d8d9f58f2df038cae4a82d2ffc5 \ + --hash=sha256:38ad8a032fddf25c36c6911d857fbe54220fe28439f02a4beb273b29bdef1eb8 \ + --hash=sha256:3ddfe372065b974365bff3b383e39c29cad468c0e7556543dd23753446c441ed \ + --hash=sha256:3fa0811081c84befde6d3aa661ecb17b95be9e3851e20009fd27d0e1b925ceb9 \ + --hash=sha256:3fd7788294417cbbb3811f8942e4fe3b4c190b80627a3c706ceae6c321824bcf \ + --hash=sha256:50c20558beff2719be34ff325213526c1700c3a20743e9e0ba592774ebc9cc92 \ + --hash=sha256:ab1d62a26350055c5e901daa4d6dddb75b11addb923797158c809dffc4f0ac9e \ + --hash=sha256:c68cc9c6bff737d3c9fb8735b27ecc8474238ef640abb701ee0ab213c6c95f8c \ + --hash=sha256:cc01ca122f649e62707f49f7220ba1ae67961b260e2dcff9e8647ea9915a01cf \ + --hash=sha256:e605b5eca75fe63117e5fb93f94e940ede0513340671631da35bdb5a035f8163 \ + --hash=sha256:f139bbfa34840b89d0a5ec760026a9268c18c63fb739568ecbc77660d3e4fc1f \ + --hash=sha256:f5d4754069644a712bd3105345e4f7c680420c5bb1d1264070cda058c7f07fb3 \ + --hash=sha256:ff2f4236a0fc14cd724b03df1f11c03b714442f4381575465f7d0f4f91135766 + # via data-formulator +cryptography==46.0.4 \ + --hash=sha256:01df4f50f314fbe7009f54046e908d1754f19d0c6d3070df1e6268c5a4af09fa \ + --hash=sha256:0563655cb3c6d05fb2afe693340bc050c30f9f34e15763361cf08e94749401fc \ + --hash=sha256:078e5f06bd2fa5aea5a324f2a09f914b1484f1d0c2a4d6a8a28c74e72f65f2da \ + --hash=sha256:0a9ad24359fee86f131836a9ac3bffc9329e956624a2d379b613f8f8abaf5255 \ + --hash=sha256:2067461c80271f422ee7bdbe79b9b4be54a5162e90345f86a23445a0cf3fd8a2 \ + --hash=sha256:281526e865ed4166009e235afadf3a4c4cba6056f99336a99efba65336fd5485 \ + --hash=sha256:2d08bc22efd73e8854b0b7caff402d735b354862f1145d7be3b9c0f740fef6a0 \ + --hash=sha256:3c268a3490df22270955966ba236d6bc4a8f9b6e4ffddb78aac535f1a5ea471d \ + --hash=sha256:3d425eacbc9aceafd2cb429e42f4e5d5633c6f873f5e567077043ef1b9bbf616 \ + --hash=sha256:44cc0675b27cadb71bdbb96099cca1fa051cd11d2ade09e5cd3a2edb929ed947 \ + --hash=sha256:47bcd19517e6389132f76e2d5303ded6cf3f78903da2158a671be8de024f4cd0 \ + --hash=sha256:485e2b65d25ec0d901bca7bcae0f53b00133bf3173916d8e421f6fddde103908 \ + --hash=sha256:5aa3e463596b0087b3da0dbe2b2487e9fc261d25da85754e30e3b40637d61f81 \ + --hash=sha256:5f14fba5bf6f4390d7ff8f086c566454bff0411f6d8aa7af79c88b6f9267aecc \ + --hash=sha256:62217ba44bf81b30abaeda1488686a04a702a261e26f87db51ff61d9d3510abd \ + --hash=sha256:6225d3ebe26a55dbc8ead5ad1265c0403552a63336499564675b29eb3184c09b \ + --hash=sha256:6bb5157bf6a350e5b28aee23beb2d84ae6f5be390b2f8ee7ea179cda077e1019 \ + --hash=sha256:728fedc529efc1439eb6107b677f7f7558adab4553ef8669f0d02d42d7b959a7 \ + --hash=sha256:766330cce7416c92b5e90c3bb71b1b79521760cdcfc3a6a1a182d4c9fab23d2b \ + --hash=sha256:812815182f6a0c1d49a37893a303b44eaac827d7f0d582cecfc81b6427f22973 \ + --hash=sha256:829c2b12bbc5428ab02d6b7f7e9bbfd53e33efd6672d21341f2177470171ad8b \ + --hash=sha256:82a62483daf20b8134f6e92898da70d04d0ef9a75829d732ea1018678185f4f5 \ + --hash=sha256:8a15fb869670efa8f83cbffbc8753c1abf236883225aed74cd179b720ac9ec80 \ + --hash=sha256:8bf75b0259e87fa70bddc0b8b4078b76e7fd512fd9afae6c1193bcf440a4dbef \ + --hash=sha256:91627ebf691d1ea3976a031b61fb7bac1ccd745afa03602275dda443e11c8de0 \ + --hash=sha256:93d8291da8d71024379ab2cb0b5c57915300155ad42e07f76bea6ad838d7e59b \ + --hash=sha256:9b34d8ba84454641a6bf4d6762d15847ecbd85c1316c0a7984e6e4e9f748ec2e \ + --hash=sha256:9b4d17bc7bd7cdd98e3af40b441feaea4c68225e2eb2341026c84511ad246c0c \ + --hash=sha256:9c2da296c8d3415b93e6053f5a728649a87a48ce084a9aaf51d6e46c87c7f2d2 \ + --hash=sha256:a05177ff6296644ef2876fce50518dffb5bcdf903c85250974fc8bc85d54c0af \ + --hash=sha256:a90e43e3ef65e6dcf969dfe3bb40cbf5aef0d523dff95bfa24256be172a845f4 \ + --hash=sha256:a9556ba711f7c23f77b151d5798f3ac44a13455cc68db7697a1096e6d0563cab \ + --hash=sha256:b1de0ebf7587f28f9190b9cb526e901bf448c9e6a99655d2b07fff60e8212a82 \ + --hash=sha256:be8c01a7d5a55f9a47d1888162b76c8f49d62b234d88f0ff91a9fbebe32ffbc3 \ + --hash=sha256:bfd019f60f8abc2ed1b9be4ddc21cfef059c841d86d710bb69909a688cbb8f59 \ + --hash=sha256:c236a44acfb610e70f6b3e1c3ca20ff24459659231ef2f8c48e879e2d32b73da \ + --hash=sha256:c411f16275b0dea722d76544a61d6421e2cc829ad76eec79280dbdc9ddf50061 \ + --hash=sha256:c92010b58a51196a5f41c3795190203ac52edfd5dc3ff99149b4659eba9d2085 \ + --hash=sha256:d5a45ddc256f492ce42a4e35879c5e5528c09cd9ad12420828c972951d8e016b \ + --hash=sha256:daa392191f626d50f1b136c9b4cf08af69ca8279d110ea24f5c2700054d2e263 \ + --hash=sha256:dc1272e25ef673efe72f2096e92ae39dea1a1a450dd44918b15351f72c5a168e \ + --hash=sha256:dce1e4f068f03008da7fa51cc7abc6ddc5e5de3e3d1550334eaf8393982a5829 \ + --hash=sha256:dd5aba870a2c40f87a3af043e0dee7d9eb02d4aff88a797b48f2b43eff8c3ab4 \ + --hash=sha256:de0f5f4ec8711ebc555f54735d4c673fc34b65c44283895f1a08c2b49d2fd99c \ + --hash=sha256:df4a817fa7138dd0c96c8c8c20f04b8aaa1fac3bbf610913dcad8ea82e1bfd3f \ + --hash=sha256:e07ea39c5b048e085f15923511d8121e4a9dc45cee4e3b970ca4f0d338f23095 \ + --hash=sha256:eeeb2e33d8dbcccc34d64651f00a98cb41b2dc69cef866771a5717e6734dfa32 \ + --hash=sha256:fa0900b9ef9c49728887d1576fd8d9e7e3ea872fa9b25ef9b64888adc434e976 \ + --hash=sha256:fdc3daab53b212472f1524d070735b2f0c214239df131903bae1d598016fa822 + # via + # azure-identity + # azure-storage-blob + # google-auth + # msal + # pyjwt +curl-cffi==0.13.0 \ + --hash=sha256:28911b526e8cd4aa0e5e38401bfe6887e8093907272f1f67ca22e6beb2933a51 \ + --hash=sha256:434cadbe8df2f08b2fc2c16dff2779fb40b984af99c06aa700af898e185bb9db \ + --hash=sha256:59afa877a9ae09efa04646a7d068eeea48915a95d9add0a29854e7781679fcd7 \ + --hash=sha256:62ecd90a382bd5023750e3606e0aa7cb1a3a8ba41c14270b8e5e149ebf72c5ca \ + --hash=sha256:66a6b75ce971de9af64f1b6812e275f60b88880577bac47ef1fa19694fa21cd3 \ + --hash=sha256:6d433ffcb455ab01dd0d7bde47109083aa38b59863aa183d29c668ae4c96bf8e \ + --hash=sha256:8eb4083371bbb94e9470d782de235fb5268bf43520de020c9e5e6be8f395443f \ + --hash=sha256:b4e0de45ab3b7a835c72bd53640c2347415111b43421b5c7a1a0b18deae2e541 \ + --hash=sha256:d06ed389e45a7ca97b17c275dbedd3d6524560270e675c720e93a2018a766076 \ + --hash=sha256:d438a3b45244e874794bc4081dc1e356d2bb926dcc7021e5a8fef2e2105ef1d8 + # via yfinance +db-dtypes==1.5.0 \ + --hash=sha256:abdbb2e4eb965800ed6f98af0c5c1cafff9063ace09114be2d26a7f046be2c8a \ + --hash=sha256:ad9e94243f53e104bc77dbf9ae44b580d83a770d3694483aba59c9767966daa5 + # via data-formulator +debugpy==1.8.20 \ + --hash=sha256:077a7447589ee9bc1ff0cdf443566d0ecf540ac8aa7333b775ebcb8ce9f4ecad \ + --hash=sha256:1f7650546e0eded1902d0f6af28f787fa1f1dbdbc97ddabaf1cd963a405930cb \ + --hash=sha256:352036a99dd35053b37b7803f748efc456076f929c6a895556932eaf2d23b07f \ + --hash=sha256:4057ac68f892064e5f98209ab582abfee3b543fb55d2e87610ddc133a954d390 \ + --hash=sha256:4ae3135e2089905a916909ef31922b2d733d756f66d87345b3e5e52b7a55f13d \ + --hash=sha256:55bc8701714969f1ab89a6d5f2f3d40c36f91b2cbe2f65d98bf8196f6a6a2c33 \ + --hash=sha256:5be9bed9ae3be00665a06acaa48f8329d2b9632f15fd09f6a9a8c8d9907e54d7 \ + --hash=sha256:5dff4bb27027821fdfcc9e8f87309a28988231165147c31730128b1c983e282a \ + --hash=sha256:773e839380cf459caf73cc533ea45ec2737a5cc184cf1b3b796cd4fd98504fec \ + --hash=sha256:7de0b7dfeedc504421032afba845ae2a7bcc32ddfb07dae2c3ca5442f821c344 \ + --hash=sha256:84562982dd7cf5ebebfdea667ca20a064e096099997b175fe204e86817f64eaf \ + --hash=sha256:88f47850a4284b88bd2bfee1f26132147d5d504e4e86c22485dfa44b97e19b4b \ + --hash=sha256:9c74df62fc064cd5e5eaca1353a3ef5a5d50da5eb8058fcef63106f7bebe6173 \ + --hash=sha256:a1a8f851e7cf171330679ef6997e9c579ef6dd33c9098458bd9986a0f4ca52e3 \ + --hash=sha256:a98eec61135465b062846112e5ecf2eebb855305acc1dfbae43b72903b8ab5be \ + --hash=sha256:da11dea6447b2cadbf8ce2bec59ecea87cc18d2c574980f643f2d2dfe4862393 \ + --hash=sha256:eada6042ad88fa1571b74bd5402ee8b86eded7a8f7b827849761700aff171f1b \ + --hash=sha256:eb506e45943cab2efb7c6eafdd65b842f3ae779f020c82221f55aca9de135ed7 + # via ipykernel +decorator==5.2.1 \ + --hash=sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360 \ + --hash=sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a + # via ipython +defusedxml==0.7.1 \ + --hash=sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69 \ + --hash=sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61 + # via nbconvert +deprecated==1.3.1 \ + --hash=sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f \ + --hash=sha256:b1b50e0ff0c1fddaa5708a2c6b0a6588bb09b892825ab2b214ac9ea9d92a5223 + # via limits +distro==1.9.0 \ + --hash=sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed \ + --hash=sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2 + # via openai +dnspython==2.8.0 \ + --hash=sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af \ + --hash=sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f + # via pymongo +duckdb==1.4.4 \ + --hash=sha256:0509b39ea7af8cff0198a99d206dca753c62844adab54e545984c2e2c1381616 \ + --hash=sha256:0d636ceda422e7babd5e2f7275f6a0d1a3405e6a01873f00d38b72118d30c10b \ + --hash=sha256:1af6e76fe8bd24875dc56dd8e38300d64dc708cd2e772f67b9fbc635cc3066a3 \ + --hash=sha256:1f8d55843cc940e36261689054f7dfb6ce35b1f5b0953b0d355b6adb654b0d52 \ + --hash=sha256:25874f8b1355e96178079e37312c3ba6d61a2354f51319dae860cf21335c3a20 \ + --hash=sha256:337f8b24e89bc2e12dadcfe87b4eb1c00fd920f68ab07bc9b70960d6523b8bc3 \ + --hash=sha256:452c5b5d6c349dc5d1154eb2062ee547296fcbd0c20e9df1ed00b5e1809089da \ + --hash=sha256:47dd4162da6a2be59a0aef640eb08d6360df1cf83c317dcc127836daaf3b7f7c \ + --hash=sha256:4c25d5b0febda02b7944e94fdae95aecf952797afc8cb920f677b46a7c251955 \ + --hash=sha256:50f2eb173c573811b44aba51176da7a4e5c487113982be6a6a1c37337ec5fa57 \ + --hash=sha256:53cd6423136ab44383ec9955aefe7599b3fb3dd1fe006161e6396d8167e0e0d4 \ + --hash=sha256:5536eb952a8aa6ae56469362e344d4e6403cc945a80bc8c5c2ebdd85d85eb64b \ + --hash=sha256:59c8d76016dde854beab844935b1ec31de358d4053e792988108e995b18c08e7 \ + --hash=sha256:5ba684f498d4e924c7e8f30dd157da8da34c8479746c5011b6c0e037e9c60ad2 \ + --hash=sha256:6703dd1bb650025b3771552333d305d62ddd7ff182de121483d4e042ea6e2e00 \ + --hash=sha256:6792ca647216bd5c4ff16396e4591cfa9b4a72e5ad7cdd312cec6d67e8431a7c \ + --hash=sha256:6cb357cfa3403910e79e2eb46c8e445bb1ee2fd62e9e9588c6b999df4256abc1 \ + --hash=sha256:6fb1225a9ea5877421481d59a6c556a9532c32c16c7ae6ca8d127e2b878c9389 \ + --hash=sha256:7df7351328ffb812a4a289732f500d621e7de9942a3a2c9b6d4afcf4c0e72526 \ + --hash=sha256:8bba52fd2acb67668a4615ee17ee51814124223de836d9e2fdcbc4c9021b3d3c \ + --hash=sha256:8e5c2d8a0452df55e092959c0bfc8ab8897ac3ea0f754cb3b0ab3e165cd79aff \ + --hash=sha256:b297eff642503fd435a9de5a9cb7db4eccb6f61d61a55b30d2636023f149855f \ + --hash=sha256:bf138201f56e5d6fc276a25138341b3523e2f84733613fc43f02c54465619a95 \ + --hash=sha256:c65d15c440c31e06baaebfd2c06d71ce877e132779d309f1edf0a85d23c07e92 \ + --hash=sha256:d0440f59e0cd9936a9ebfcf7a13312eda480c79214ffed3878d75947fc3b7d6d \ + --hash=sha256:d525de5f282b03aa8be6db86b1abffdceae5f1055113a03d5b50cd2fb8cf2ef8 \ + --hash=sha256:ddcfd9c6ff234da603a1edd5fd8ae6107f4d042f74951b65f91bc5e2643856b3 \ + --hash=sha256:f28a18cc790217e5b347bb91b2cab27aafc557c58d3d8382e04b4fe55d0c3f66 \ + --hash=sha256:fb94de6d023de9d79b7edc1ae07ee1d0b4f5fa8a9dcec799650b5befdf7aafec + # via data-formulator +executing==2.2.1 \ + --hash=sha256:3632cc370565f6648cc328b32435bd120a1e4ebb20c77e3fdde9a13cd1e533c4 \ + --hash=sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017 + # via stack-data +fastjsonschema==2.21.2 \ + --hash=sha256:1c797122d0a86c5cace2e54bf4e819c36223b552017172f32c5c024a6b77e463 \ + --hash=sha256:b1eb43748041c880796cd077f1a07c3d94e93ae84bba5ed36800a33554ae05de + # via nbformat +fastuuid==0.14.0 \ + --hash=sha256:05a8dde1f395e0c9b4be515b7a521403d1e8349443e7641761af07c7ad1624b1 \ + --hash=sha256:09098762aad4f8da3a888eb9ae01c84430c907a297b97166b8abc07b640f2995 \ + --hash=sha256:09378a05020e3e4883dfdab438926f31fea15fd17604908f3d39cbeb22a0b4dc \ + --hash=sha256:0c9ec605ace243b6dbe3bd27ebdd5d33b00d8d1d3f580b39fdd15cd96fd71796 \ + --hash=sha256:0df14e92e7ad3276327631c9e7cec09e32572ce82089c55cb1bb8df71cf394ed \ + --hash=sha256:12ac85024637586a5b69645e7ed986f7535106ed3013640a393a03e461740cb7 \ + --hash=sha256:1383fff584fa249b16329a059c68ad45d030d5a4b70fb7c73a08d98fd53bcdab \ + --hash=sha256:178947fc2f995b38497a74172adee64fdeb8b7ec18f2a5934d037641ba265d26 \ + --hash=sha256:1bf539a7a95f35b419f9ad105d5a8a35036df35fdafae48fb2fd2e5f318f0d75 \ + --hash=sha256:1ca61b592120cf314cfd66e662a5b54a578c5a15b26305e1b8b618a6f22df714 \ + --hash=sha256:1e3cc56742f76cd25ecb98e4b82a25f978ccffba02e4bdce8aba857b6d85d87b \ + --hash=sha256:1e690d48f923c253f28151b3a6b4e335f2b06bf669c68a02665bc150b7839e94 \ + --hash=sha256:2b29e23c97e77c3a9514d70ce343571e469098ac7f5a269320a0f0b3e193ab36 \ + --hash=sha256:2fb3c0d7fef6674bbeacdd6dbd386924a7b60b26de849266d1ff6602937675c8 \ + --hash=sha256:33e678459cf4addaedd9936bbb038e35b3f6b2061330fd8f2f6a1d80414c0f87 \ + --hash=sha256:3acdf655684cc09e60fb7e4cf524e8f42ea760031945aa8086c7eae2eeeabeb8 \ + --hash=sha256:73946cb950c8caf65127d4e9a325e2b6be0442a224fd51ba3b6ac44e1912ce34 \ + --hash=sha256:77a09cb7427e7af74c594e409f7731a0cf887221de2f698e1ca0ebf0f3139021 \ + --hash=sha256:77e94728324b63660ebf8adb27055e92d2e4611645bf12ed9d88d30486471d0a \ + --hash=sha256:808527f2407f58a76c916d6aa15d58692a4a019fdf8d4c32ac7ff303b7d7af09 \ + --hash=sha256:9579618be6280700ae36ac42c3efd157049fe4dd40ca49b021280481c78c3176 \ + --hash=sha256:9a133bf9cc78fdbd1179cb58a59ad0100aa32d8675508150f3658814aeefeaa4 \ + --hash=sha256:9bd57289daf7b153bfa3e8013446aa144ce5e8c825e9e366d455155ede5ea2dc \ + --hash=sha256:a0809f8cc5731c066c909047f9a314d5f536c871a7a22e815cc4967c110ac9ad \ + --hash=sha256:a6f46790d59ab38c6aa0e35c681c0484b50dc0acf9e2679c005d61e019313c24 \ + --hash=sha256:a8a0dfea3972200f72d4c7df02c8ac70bad1bb4c58d7e0ec1e6f341679073a7f \ + --hash=sha256:aa75b6657ec129d0abded3bec745e6f7ab642e6dba3a5272a68247e85f5f316f \ + --hash=sha256:ab32f74bd56565b186f036e33129da77db8be09178cd2f5206a5d4035fb2a23f \ + --hash=sha256:ab3f5d36e4393e628a4df337c2c039069344db5f4b9d2a3c9cea48284f1dd741 \ + --hash=sha256:ac60fc860cdf3c3f327374db87ab8e064c86566ca8c49d2e30df15eda1b0c2d5 \ + --hash=sha256:b852a870a61cfc26c884af205d502881a2e59cc07076b60ab4a951cc0c94d1ad \ + --hash=sha256:b9a0ca4f03b7e0b01425281ffd44e99d360e15c895f1907ca105854ed85e2057 \ + --hash=sha256:bbb0c4b15d66b435d2538f3827f05e44e2baafcc003dd7d8472dc67807ab8fd8 \ + --hash=sha256:c0a94245afae4d7af8c43b3159d5e3934c53f47140be0be624b96acd672ceb73 \ + --hash=sha256:c7502d6f54cd08024c3ea9b3514e2d6f190feb2f46e6dbcd3747882264bb5f7b \ + --hash=sha256:caa1f14d2102cb8d353096bc6ef6c13b2c81f347e6ab9d6fbd48b9dea41c153d \ + --hash=sha256:cb9a030f609194b679e1660f7e32733b7a0f332d519c5d5a6a0a580991290022 \ + --hash=sha256:cd5a7f648d4365b41dbf0e38fe8da4884e57bed4e77c83598e076ac0c93995e7 \ + --hash=sha256:d23ef06f9e67163be38cece704170486715b177f6baae338110983f99a72c070 \ + --hash=sha256:d9e4332dc4ba054434a9594cbfaf7823b57993d7d8e7267831c3e059857cf397 \ + --hash=sha256:df61342889d0f5e7a32f7284e55ef95103f2110fee433c2ae7c2c0956d76ac8a \ + --hash=sha256:e150eab56c95dc9e3fefc234a0eedb342fac433dacc273cd4d150a5b0871e1fa \ + --hash=sha256:e23fc6a83f112de4be0cc1990e5b127c27663ae43f866353166f87df58e73d06 \ + --hash=sha256:ec27778c6ca3393ef662e2762dba8af13f4ec1aaa32d08d77f71f2a70ae9feb8 \ + --hash=sha256:f54d5b36c56a2d5e1a31e73b950b28a0d83eb0c37b91d10408875a5a29494bad + # via litellm +filelock==3.20.3 \ + --hash=sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1 \ + --hash=sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1 + # via huggingface-hub +flask==3.1.2 \ + --hash=sha256:bf656c15c80190ed628ad08cdfd3aaa35beb087855e2f494910aa3774cc4fd87 \ + --hash=sha256:ca1d8112ec8a6158cc29ea4858963350011b5c846a414cdb7a954aa9e967d03c + # via + # data-formulator + # flask-cors + # flask-limiter +flask-cors==6.0.2 \ + --hash=sha256:6e118f3698249ae33e429760db98ce032a8bf9913638d085ca0f4c5534ad2423 \ + --hash=sha256:e57544d415dfd7da89a9564e1e3a9e515042df76e12130641ca6f3f2f03b699a + # via data-formulator +flask-limiter==4.1.1 \ + --hash=sha256:ca11608fc7eec43dcea606964ca07c3bd4ec1ae89043a0f67f717899a4f48106 \ + --hash=sha256:e1ae13e06e6b3e39a4902e7d240b901586b25932c2add7bd5f5eeb4bdc11111b + # via data-formulator +fqdn==1.5.1 \ + --hash=sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f \ + --hash=sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014 + # via jsonschema +frozendict==2.4.7 \ + --hash=sha256:972af65924ea25cf5b4d9326d549e69a9a4918d8a76a9d3a7cd174d98b237550 \ + --hash=sha256:e478fb2a1391a56c8a6e10cc97c4a9002b410ecd1ac28c18d780661762e271bd + # via yfinance +frozenlist==1.8.0 \ + --hash=sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686 \ + --hash=sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0 \ + --hash=sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121 \ + --hash=sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd \ + --hash=sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7 \ + --hash=sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c \ + --hash=sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84 \ + --hash=sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d \ + --hash=sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b \ + --hash=sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79 \ + --hash=sha256:11847b53d722050808926e785df837353bd4d75f1d494377e59b23594d834967 \ + --hash=sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f \ + --hash=sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7 \ + --hash=sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef \ + --hash=sha256:17c883ab0ab67200b5f964d2b9ed6b00971917d5d8a92df149dc2c9779208ee9 \ + --hash=sha256:1a7fa382a4a223773ed64242dbe1c9c326ec09457e6b8428efb4118c685c3dfd \ + --hash=sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed \ + --hash=sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b \ + --hash=sha256:2552f44204b744fba866e573be4c1f9048d6a324dfe14475103fd51613eb1d1f \ + --hash=sha256:27c6e8077956cf73eadd514be8fb04d77fc946a7fe9f7fe167648b0b9085cc25 \ + --hash=sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe \ + --hash=sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143 \ + --hash=sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e \ + --hash=sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930 \ + --hash=sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37 \ + --hash=sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128 \ + --hash=sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2 \ + --hash=sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f \ + --hash=sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746 \ + --hash=sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df \ + --hash=sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8 \ + --hash=sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c \ + --hash=sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0 \ + --hash=sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad \ + --hash=sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82 \ + --hash=sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29 \ + --hash=sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30 \ + --hash=sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf \ + --hash=sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62 \ + --hash=sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383 \ + --hash=sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c \ + --hash=sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52 \ + --hash=sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d \ + --hash=sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1 \ + --hash=sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a \ + --hash=sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714 \ + --hash=sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65 \ + --hash=sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506 \ + --hash=sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888 \ + --hash=sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41 \ + --hash=sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608 \ + --hash=sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa \ + --hash=sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8 \ + --hash=sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1 \ + --hash=sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed \ + --hash=sha256:8585e3bb2cdea02fc88ffa245069c36555557ad3609e83be0ec71f54fd4abb52 \ + --hash=sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231 \ + --hash=sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496 \ + --hash=sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a \ + --hash=sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3 \ + --hash=sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24 \ + --hash=sha256:957e7c38f250991e48a9a73e6423db1bb9dd14e722a10f6b8bb8e16a0f55f695 \ + --hash=sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7 \ + --hash=sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4 \ + --hash=sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e \ + --hash=sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e \ + --hash=sha256:ac913f8403b36a2c8610bbfd25b8013488533e71e62b4b4adce9c86c8cea905b \ + --hash=sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8 \ + --hash=sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51 \ + --hash=sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8 \ + --hash=sha256:b6db2185db9be0a04fecf2f241c70b63b1a242e2805be291855078f2b404dd6b \ + --hash=sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806 \ + --hash=sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042 \ + --hash=sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b \ + --hash=sha256:c8d1634419f39ea6f5c427ea2f90ca85126b54b50837f31497f3bf38266e853d \ + --hash=sha256:c9a63152fe95756b85f31186bddf42e4c02c6321207fd6601a1c89ebac4fe567 \ + --hash=sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a \ + --hash=sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2 \ + --hash=sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0 \ + --hash=sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e \ + --hash=sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b \ + --hash=sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d \ + --hash=sha256:d4d3214a0f8394edfa3e303136d0575eece0745ff2b47bd2cb2e66dd92d4351a \ + --hash=sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52 \ + --hash=sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1 \ + --hash=sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94 \ + --hash=sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822 \ + --hash=sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a \ + --hash=sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11 \ + --hash=sha256:edee74874ce20a373d62dc28b0b18b93f645633c2943fd90ee9d898550770581 \ + --hash=sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51 \ + --hash=sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40 \ + --hash=sha256:f4be2e3d8bc8aabd566f8d5b8ba7ecc09249d74ba3c9ed52e54dc23a293f0b92 \ + --hash=sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5 \ + --hash=sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4 \ + --hash=sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93 \ + --hash=sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027 \ + --hash=sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd + # via + # aiohttp + # aiosignal +fsspec==2026.1.0 \ + --hash=sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc \ + --hash=sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b + # via huggingface-hub +google-api-core==2.29.0 \ + --hash=sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7 \ + --hash=sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9 + # via + # google-cloud-bigquery + # google-cloud-core +google-auth==2.48.0 \ + --hash=sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f \ + --hash=sha256:4f7e706b0cd3208a3d940a19a822c37a476ddba5450156c3e6624a71f7c841ce + # via + # data-formulator + # google-api-core + # google-cloud-bigquery + # google-cloud-core +google-cloud-bigquery==3.40.0 \ + --hash=sha256:0469bcf9e3dad3cab65b67cce98180c8c0aacf3253d47f0f8e976f299b49b5ab \ + --hash=sha256:b3ccb11caf0029f15b29569518f667553fe08f6f1459b959020c83fbbd8f2e68 + # via data-formulator +google-cloud-core==2.5.0 \ + --hash=sha256:67d977b41ae6c7211ee830c7912e41003ea8194bff15ae7d72fd6f51e57acabc \ + --hash=sha256:7c1b7ef5c92311717bd05301aa1a91ffbc565673d3b0b4163a52d8413a186963 + # via google-cloud-bigquery +google-crc32c==1.8.0 \ + --hash=sha256:014a7e68d623e9a4222d663931febc3033c5c7c9730785727de2a81f87d5bab8 \ + --hash=sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411 \ + --hash=sha256:17446feb05abddc187e5441a45971b8394ea4c1b6efd88ab0af393fd9e0a156a \ + --hash=sha256:19b40d637a54cb71e0829179f6cb41835f0fbd9e8eb60552152a8b52c36cbe15 \ + --hash=sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb \ + --hash=sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa \ + --hash=sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962 \ + --hash=sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b \ + --hash=sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27 \ + --hash=sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113 \ + --hash=sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f \ + --hash=sha256:71734788a88f551fbd6a97be9668a0020698e07b2bf5b3aa26a36c10cdfb27b2 \ + --hash=sha256:86cfc00fe45a0ac7359e5214a1704e51a99e757d0272554874f419f79838c5f7 \ + --hash=sha256:87fa445064e7db928226b2e6f0d5304ab4cd0339e664a4e9a25029f384d9bb93 \ + --hash=sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8 \ + --hash=sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21 \ + --hash=sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79 \ + --hash=sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2 \ + --hash=sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454 \ + --hash=sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2 \ + --hash=sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697 \ + --hash=sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651 \ + --hash=sha256:f639065ea2042d5c034bf258a9f085eaa7af0cd250667c0635a3118e8f92c69c + # via google-resumable-media +google-resumable-media==2.8.0 \ + --hash=sha256:dd14a116af303845a8d932ddae161a26e86cc229645bc98b39f026f9b1717582 \ + --hash=sha256:f1157ed8b46994d60a1bc432544db62352043113684d4e030ee02e77ebe9a1ae + # via google-cloud-bigquery +googleapis-common-protos==1.72.0 \ + --hash=sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038 \ + --hash=sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5 + # via + # google-api-core + # grpcio-status +grpcio==1.76.0 \ + --hash=sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280 \ + --hash=sha256:06c3d6b076e7b593905d04fdba6a0525711b3466f43b3400266f04ff735de0cd \ + --hash=sha256:08caea849a9d3c71a542827d6df9d5a69067b0a1efbea8a855633ff5d9571465 \ + --hash=sha256:1c9b93f79f48b03ada57ea24725d83a30284a012ec27eab2cf7e50a550cbbbcc \ + --hash=sha256:2107b0c024d1b35f4083f11245c0e23846ae64d02f40b2b226684840260ed054 \ + --hash=sha256:2229ae655ec4e8999599469559e97630185fdd53ae1e8997d147b7c9b2b72cba \ + --hash=sha256:25a18e9810fbc7e7f03ec2516addc116a957f8cbb8cbc95ccc80faa072743d03 \ + --hash=sha256:26ef06c73eb53267c2b319f43e6634c7556ea37672029241a056629af27c10e2 \ + --hash=sha256:2e1743fbd7f5fa713a1b0a8ac8ebabf0ec980b5d8809ec358d488e273b9cf02a \ + --hash=sha256:32483fe2aab2c3794101c2a159070584e5db11d0aa091b2c0ea9c4fc43d0d749 \ + --hash=sha256:3e2a27c89eb9ac3d81ec8835e12414d73536c6e620355d65102503064a4ed6eb \ + --hash=sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958 \ + --hash=sha256:45e0111e73f43f735d70786557dc38141185072d7ff8dc1829d6a77ac1471468 \ + --hash=sha256:479496325ce554792dba6548fae3df31a72cef7bad71ca2e12b0e58f9b336bfc \ + --hash=sha256:490fa6d203992c47c7b9e4a9d39003a0c2bcc1c9aa3c058730884bbbb0ee9f09 \ + --hash=sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980 \ + --hash=sha256:522175aba7af9113c48ec10cc471b9b9bd4f6ceb36aeb4544a8e2c80ed9d252d \ + --hash=sha256:5e8571632780e08526f118f74170ad8d50fb0a48c23a746bef2a6ebade3abd6f \ + --hash=sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882 \ + --hash=sha256:61f69297cba3950a524f61c7c8ee12e55c486cb5f7db47ff9dcee33da6f0d3ae \ + --hash=sha256:6a15c17af8839b6801d554263c546c69c4d7718ad4321e3166175b37eaacca77 \ + --hash=sha256:747fa73efa9b8b1488a95d0ba1039c8e2dca0f741612d80415b1e1c560febf4e \ + --hash=sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73 \ + --hash=sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8 \ + --hash=sha256:83d57312a58dcfe2a3a0f9d1389b299438909a02db60e2f2ea2ae2d8034909d3 \ + --hash=sha256:8843114c0cfce61b40ad48df65abcfc00d4dba82eae8718fab5352390848c5da \ + --hash=sha256:8eddfb4d203a237da6f3cc8a540dad0517d274b5a1e9e636fd8d2c79b5c1d397 \ + --hash=sha256:922fa70ba549fce362d2e2871ab542082d66e2aaf0c19480ea453905b01f384e \ + --hash=sha256:931091142fd8cc14edccc0845a79248bc155425eee9a98b2db2ea4f00a235a42 \ + --hash=sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6 \ + --hash=sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11 \ + --hash=sha256:a8c2cf1209497cf659a667d7dea88985e834c24b7c3b605e6254cbb5076d985c \ + --hash=sha256:b331680e46239e090f5b3cead313cc772f6caa7d0fc8de349337563125361a4a \ + --hash=sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347 \ + --hash=sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4 \ + --hash=sha256:dcfe41187da8992c5f40aa8c5ec086fa3672834d2be57a32384c08d5a05b4c00 \ + --hash=sha256:f0e34c2079d47ae9f6188211db9e777c619a21d4faba6977774e8fa43b085e48 \ + --hash=sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8 \ + --hash=sha256:f9f7bd5faab55f47231ad8dba7787866b69f5e93bc306e3915606779bbfb4ba8 \ + --hash=sha256:fd5ef5932f6475c436c4a55e4336ebbe47bd3272be04964a03d316bbf4afbcbc \ + --hash=sha256:ff8a59ea85a1f2191a0ffcc61298c571bc566332f82e5f5be1b83c9d8e668a62 + # via + # google-api-core + # grpcio-status +grpcio-status==1.76.0 \ + --hash=sha256:25fcbfec74c15d1a1cb5da3fab8ee9672852dc16a5a9eeb5baf7d7a9952943cd \ + --hash=sha256:380568794055a8efbbd8871162df92012e0228a5f6dffaf57f2a00c534103b18 + # via google-api-core +h11==0.16.0 \ + --hash=sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1 \ + --hash=sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86 + # via httpcore +hf-xet==1.2.0 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' \ + --hash=sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e \ + --hash=sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc \ + --hash=sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4 \ + --hash=sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382 \ + --hash=sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090 \ + --hash=sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8 \ + --hash=sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0 \ + --hash=sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd \ + --hash=sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848 \ + --hash=sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737 \ + --hash=sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a \ + --hash=sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f \ + --hash=sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc \ + --hash=sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f \ + --hash=sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865 \ + --hash=sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f \ + --hash=sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813 \ + --hash=sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5 \ + --hash=sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649 \ + --hash=sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c \ + --hash=sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69 \ + --hash=sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832 + # via huggingface-hub +httpcore==1.0.9 \ + --hash=sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55 \ + --hash=sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8 + # via httpx +httpx==0.28.1 \ + --hash=sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc \ + --hash=sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad + # via + # huggingface-hub + # jupyterlab + # litellm + # openai +huggingface-hub==1.3.7 \ + --hash=sha256:5f86cd48f27131cdbf2882699cbdf7a67dd4cbe89a81edfdc31211f42e4a5fd1 \ + --hash=sha256:8155ce937038fa3d0cb4347d752708079bc85e6d9eb441afb44c84bcf48620d2 + # via tokenizers +idna==3.11 \ + --hash=sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea \ + --hash=sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902 + # via + # anyio + # httpx + # jsonschema + # requests + # yarl +ijson==3.4.0.post0 \ + --hash=sha256:043f9b7cf9cc744263a78175e769947733710d2412d25180df44b1086b23ebd5 \ + --hash=sha256:04ac9ca54db20f82aeda6379b5f4f6112fdb150d09ebce04affeab98a17b4ed3 \ + --hash=sha256:05807edc0bcbd222dc6ea32a2b897f0c81dc7f12c8580148bc82f6d7f5e7ec7b \ + --hash=sha256:07f20ecd748602ac7f18c617637e53bd73ded7f3b22260bba3abe401a7fc284e \ + --hash=sha256:0b473112e72c0c506da425da3278367b6680f340ecc093084693a1e819d28435 \ + --hash=sha256:103a0838061297d063bca81d724b0958b616f372bd893bbc278320152252c652 \ + --hash=sha256:114ed248166ac06377e87a245a158d6b98019d2bdd3bb93995718e0bd996154f \ + --hash=sha256:11f13b73194ea2a5a8b4a2863f25b0b4624311f10db3a75747b510c4958179b0 \ + --hash=sha256:1709171023ce82651b2f132575c2e6282e47f64ad67bd3260da476418d0e7895 \ + --hash=sha256:17e45262a5ddef39894013fb1548ee7094e444c8389eb1a97f86708b19bea03e \ + --hash=sha256:226447e40ca9340a39ed07d68ea02ee14b52cb4fe649425b256c1f0073531c83 \ + --hash=sha256:254cfb8c124af68327a0e7a49b50bbdacafd87c4690a3d62c96eb01020a685ef \ + --hash=sha256:27aa193d47ffc6bc4e45453896ad98fb089a367e8283b973f1fe5c0198b60b4e \ + --hash=sha256:2c88f0669d45d4b1aa017c9b68d378e7cd15d188dfb6f0209adc78b7f45590a7 \ + --hash=sha256:339d49f6c5d24051c85d9226be96d2d56e633cb8b7d09dd8099de8d8b51a97e2 \ + --hash=sha256:3505dff18bdeb8b171eb28af6df34857e2be80dc01e2e3b624e77215ad58897f \ + --hash=sha256:3ed19b1e4349240773a8ce4a4bfa450892d4a57949c02c515cd6be5a46b7696a \ + --hash=sha256:40007c977e230e04118b27322f25a72ae342a3d61464b2057fcd9b21eeb7427a \ + --hash=sha256:432fb60ffb952926f9438e0539011e2dfcd108f8426ee826ccc6173308c3ff2c \ + --hash=sha256:45a0b1c833ed2620eaf8da958f06ac8351c59e5e470e078400d23814670ed708 \ + --hash=sha256:461acf4320219459dabe5ed90a45cb86c9ba8cc6d6db9dad0d9427d42f57794c \ + --hash=sha256:47352563e8c594360bacee2e0753e97025f0861234722d02faace62b1b6d2b2a \ + --hash=sha256:4810546e66128af51fd4a0c9a640e84e8508e9c15c4f247d8a3e3253b20e1465 \ + --hash=sha256:4827d9874a6a81625412c59f7ca979a84d01f7f6bfb3c6d4dc4c46d0382b14e0 \ + --hash=sha256:4e39bfdc36b0b460ef15a06550a6a385c64c81f7ac205ccff39bd45147918912 \ + --hash=sha256:54a0e3e05d9a0c95ecba73d9579f146cf6d5c5874116c849dba2d39a5f30380e \ + --hash=sha256:55f7f656b5986326c978cbb3a9eea9e33f3ef6ecc4535b38f1d452c731da39ab \ + --hash=sha256:56169e298c5a2e7196aaa55da78ddc2415876a74fe6304f81b1eb0d3273346f7 \ + --hash=sha256:56b3089dc28c12492d92cc4896d2be585a89ecae34e25d08c1df88f21815cb50 \ + --hash=sha256:5a48b9486242d1295abe7fd0fbb6308867da5ca3f69b55c77922a93c2b6847aa \ + --hash=sha256:5f0a72b1e3c0f78551670c12b2fdc1bf05f2796254d9c2055ba319bec2216020 \ + --hash=sha256:61ab0b8c5bf707201dc67e02c116f4b6545c4afd7feb2264b989d242d9c4348a \ + --hash=sha256:636b6eca96c6c43c04629c6b37fad0181662eaacf9877c71c698485637f752f9 \ + --hash=sha256:6458bd8e679cdff459a0a5e555b107c3bbacb1f382da3fe0f40e392871eb518d \ + --hash=sha256:659acb2843433e080c271ecedf7d19c71adde1ee5274fc7faa2fec0a793f9f1c \ + --hash=sha256:69718ed41710dfcaa7564b0af42abc05875d4f7aaa24627c808867ef32634bc7 \ + --hash=sha256:7206afcb396aaef66c2b066997b4e9d9042c4b7d777f4d994e9cec6d322c2fe6 \ + --hash=sha256:7809ec8c8f40228edaaa089f33e811dff4c5b8509702652870d3f286c9682e27 \ + --hash=sha256:8311f48db6a33116db5c81682f08b6e2405501a4b4e460193ae69fec3cd1f87a \ + --hash=sha256:83fc738d81c9ea686b452996110b8a6678296c481e0546857db24785bff8da92 \ + --hash=sha256:91c61a3e63e04da648737e6b4abd537df1b46fb8cdf3219b072e790bb3c1a46b \ + --hash=sha256:9aa02dc70bb245670a6ca7fba737b992aeeb4895360980622f7e568dbf23e41e \ + --hash=sha256:9c0886234d1fae15cf4581a430bdba03d79251c1ab3b07e30aa31b13ef28d01c \ + --hash=sha256:a0fedf09c0f6ffa2a99e7e7fd9c5f3caf74e655c1ee015a0797383e99382ebc3 \ + --hash=sha256:a39d5d36067604b26b78de70b8951c90e9272450642661fe531a8f7a6936a7fa \ + --hash=sha256:a5269af16f715855d9864937f9dd5c348ca1ac49cee6a2c7a1b7091c159e874f \ + --hash=sha256:a603d7474bf35e7b3a8e49c8dabfc4751841931301adff3f3318171c4e407f32 \ + --hash=sha256:add9242f886eae844a7410b84aee2bbb8bdc83c624f227cb1fdb2d0476a96cb1 \ + --hash=sha256:b005ce84e82f28b00bf777a464833465dfe3efa43a0a26c77b5ac40723e1a728 \ + --hash=sha256:b200df83c901f5bfa416d069ac71077aa1608f854a4c50df1b84ced560e9c9ec \ + --hash=sha256:b2a81aee91633868f5b40280e2523f7c5392e920a5082f47c5e991e516b483f6 \ + --hash=sha256:b39dbf87071f23a23c8077eea2ae7cfeeca9ff9ffec722dfc8b5f352e4dd729c \ + --hash=sha256:b55e49045f4c8031f3673f56662fd828dc9e8d65bd3b03a9420dda0d370e64ba \ + --hash=sha256:b607a500fca26101be47d2baf7cddb457b819ab60a75ce51ed1092a40da8b2f9 \ + --hash=sha256:b982a3597b0439ce9c8f4cfc929d86c6ed43907908be1e8463a34dc35fe5b258 \ + --hash=sha256:ba3478ff0bb49d7ba88783f491a99b6e3fa929c930ab062d2bb7837e6a38fe88 \ + --hash=sha256:c117321cfa7b749cc1213f9b4c80dc958f0a206df98ec038ae4bcbbdb8463a15 \ + --hash=sha256:c8dd327da225887194fe8b93f2b3c9c256353e14a6b9eefc940ed17fde38f5b8 \ + --hash=sha256:ccddb2894eb7af162ba43b9475ac5825d15d568832f82eb8783036e5d2aebd42 \ + --hash=sha256:cf24a48a1c3ca9d44a04feb59ccefeb9aa52bb49b9cb70ad30518c25cce74bb7 \ + --hash=sha256:cf4a34c2cfe852aee75c89c05b0a4531c49dc0be27eeed221afd6fbf9c3e149c \ + --hash=sha256:d14427d366f95f21adcb97d0ed1f6d30f6fdc04d0aa1e4de839152c50c2b8d65 \ + --hash=sha256:d4d4afec780881edb2a0d2dd40b1cdbe246e630022d5192f266172a0307986a7 \ + --hash=sha256:da6a21b88cbf5ecbc53371283988d22c9643aa71ae2873bbeaefd2dea3b6160b \ + --hash=sha256:deda4cfcaafa72ca3fa845350045b1d0fef9364ec9f413241bb46988afbe6ee6 \ + --hash=sha256:e15833dcf6f6d188fdc624a31cd0520c3ba21b6855dc304bc7c1a8aeca02d4ac \ + --hash=sha256:eb5e73028f6e63d27b3d286069fe350ed80a4ccc493b022b590fea4bb086710d \ + --hash=sha256:ec5bb1520cb212ebead7dba048bb9b70552c3440584f83b01b0abc96862e2a09 \ + --hash=sha256:eeb9540f0b1a575cbb5968166706946458f98c16e7accc6f2fe71efa29864241 \ + --hash=sha256:f932969fc1fd4449ca141cf5f47ff357656a154a361f28d9ebca0badc5b02297 \ + --hash=sha256:fe9c84c9b1c8798afa407be1cea1603401d99bfc7c34497e19f4f5e5ddc9b441 \ + --hash=sha256:fecae19b5187d92900c73debb3a979b0b3290a53f85df1f8f3c5ba7d1e9fb9cb \ + --hash=sha256:ffb21203736b08fe27cb30df6a4f802fafb9ef7646c5ff7ef79569b63ea76c57 + # via azure-kusto-data +importlib-metadata==8.7.1 \ + --hash=sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb \ + --hash=sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151 + # via litellm +ipykernel==7.1.0 \ + --hash=sha256:58a3fc88533d5930c3546dc7eac66c6d288acde4f801e2001e65edc5dc9cf0db \ + --hash=sha256:763b5ec6c5b7776f6a8d7ce09b267693b4e5ce75cb50ae696aaefb3c85e1ea4c + # via + # jupyter + # jupyter-console + # jupyterlab +ipython==9.10.0 \ + --hash=sha256:c6ab68cc23bba8c7e18e9b932797014cc61ea7fd6f19de180ab9ba73e65ee58d \ + --hash=sha256:cd9e656be97618a0676d058134cd44e6dc7012c0e5cb36a9ce96a8c904adaf77 + # via + # ipykernel + # ipywidgets + # jupyter-console +ipython-pygments-lexers==1.1.1 \ + --hash=sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81 \ + --hash=sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c + # via ipython +ipywidgets==8.1.8 \ + --hash=sha256:61f969306b95f85fba6b6986b7fe45d73124d1d9e3023a8068710d47a22ea668 \ + --hash=sha256:ecaca67aed704a338f88f67b1181b58f821ab5dc89c1f0f5ef99db43c1c2921e + # via jupyter +isodate==0.7.2 \ + --hash=sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15 \ + --hash=sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6 + # via + # azure-keyvault-secrets + # azure-storage-blob +isoduration==20.11.0 \ + --hash=sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9 \ + --hash=sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042 + # via jsonschema +itsdangerous==2.2.0 \ + --hash=sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef \ + --hash=sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173 + # via flask +jedi==0.19.2 \ + --hash=sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0 \ + --hash=sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9 + # via ipython +jinja2==3.1.6 \ + --hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \ + --hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67 + # via + # flask + # jupyter-server + # jupyterlab + # jupyterlab-server + # litellm + # nbconvert +jiter==0.13.0 \ + --hash=sha256:04670992b576fa65bd056dbac0c39fe8bd67681c380cb2b48efa885711d9d726 \ + --hash=sha256:0733312953b909688ae3c2d58d043aa040f9f1a6a75693defed7bc2cc4bf2654 \ + --hash=sha256:07b75fe09a4ee8e0c606200622e571e44943f47254f95e2436c8bdcaceb36d7d \ + --hash=sha256:0a2bd69fc1d902e89925fc34d1da51b2128019423d7b339a45d9e99c894e0663 \ + --hash=sha256:0a8d76c7524087272c8ae913f5d9d608bd839154b62c4322ef65723d2e5bb0b8 \ + --hash=sha256:0b34c519e17658ed88d5047999a93547f8889f3c1824120c26ad6be5f27b6cf5 \ + --hash=sha256:0bf670e3b1445fc4d31612199f1744f67f889ee1bbae703c4b54dc097e5dd394 \ + --hash=sha256:0c365005b05505a90d1c47856420980d0237adf82f70c4aff7aebd3c1cc143ad \ + --hash=sha256:0e3a5f0cde8ff433b8e88e41aa40131455420fb3649a3c7abdda6145f8cb7202 \ + --hash=sha256:0f0c065695f616a27c920a56ad0d4fc46415ef8b806bf8fc1cacf25002bd24e1 \ + --hash=sha256:1211427574b17b633cfceba5040de8081e5abf114f7a7602f73d2e16f9fdaa59 \ + --hash=sha256:1317fdffd16f5873e46ce27d0e0f7f4f90f0cdf1d86bf6abeaea9f63ca2c401d \ + --hash=sha256:15db60e121e11fe186c0b15236bd5d18381b9ddacdcf4e659feb96fc6c969c92 \ + --hash=sha256:1f4748aad1b4a93c8bdd70f604d0f748cdc0e8744c5547798acfa52f10e79228 \ + --hash=sha256:1f8a55b848cbabf97d861495cd65f1e5c590246fabca8b48e1747c4dfc8f85bf \ + --hash=sha256:24ab43126d5e05f3d53a36a8e11eb2f23304c6c1117844aaaf9a0aa5e40b5018 \ + --hash=sha256:24dc96eca9f84da4131cdf87a95e6ce36765c3b156fc9ae33280873b1c32d5f6 \ + --hash=sha256:2b4972c6df33731aac0742b64fd0d18e0a69bc7d6e03108ce7d40c85fd9e3e6d \ + --hash=sha256:2c26cf47e2cad140fa23b6d58d435a7c0161f5c514284802f25e87fddfe11024 \ + --hash=sha256:2d08c9475d48b92892583df9da592a0e2ac49bcd41fae1fec4f39ba6cf107820 \ + --hash=sha256:3097d665a27bc96fd9bbf7f86178037db139f319f785e4757ce7ccbf390db6c2 \ + --hash=sha256:36ebfbcffafb146d0e6ffb3e74d51e03d9c35ce7c625c8066cdbfc7b953bdc72 \ + --hash=sha256:3b3fb8c2053acaef8580809ac1d1f7481a0a0bdc012fd7f5d8b18fb696a5a089 \ + --hash=sha256:3d744a6061afba08dd7ae375dcde870cffb14429b7477e10f67e9e6d68772a0a \ + --hash=sha256:41f92313d17989102f3cb5dd533a02787cdb99454d494344b0361355da52fcb9 \ + --hash=sha256:45f6f8efb2f3b0603092401dc2df79fa89ccbc027aaba4174d2d4133ed661434 \ + --hash=sha256:47455245307e4debf2ce6c6e65a717550a0244231240dcf3b8f7d64e4c2f22f4 \ + --hash=sha256:5467696f6b827f1116556cb0db620440380434591e93ecee7fd14d1a491b6daa \ + --hash=sha256:57aab48f40be1db920a582b30b116fe2435d184f77f0e4226f546794cedd9cf0 \ + --hash=sha256:597245258e6ad085d064780abfb23a284d418d3e61c57362d9449c6c7317ee2d \ + --hash=sha256:5a1aff1fbdb803a376d4d22a8f63f8e7ccbce0b4890c26cc7af9e501ab339ef0 \ + --hash=sha256:5d9b34ad56761b3bf0fbe8f7e55468704107608512350962d3317ffd7a4382d5 \ + --hash=sha256:632bf7c1d28421c00dd8bbb8a3bac5663e1f57d5cd5ed962bce3c73bf62608e6 \ + --hash=sha256:66aa3e663840152d18cc8ff1e4faad3dd181373491b9cfdc6004b92198d67911 \ + --hash=sha256:682161a67adea11e3aae9038c06c8b4a9a71023228767477d683f69903ebc607 \ + --hash=sha256:6c26a424569a59140fb51160a56df13f438a2b0967365e987889186d5fc2f6f9 \ + --hash=sha256:701a1e77d1e593c1b435315ff625fd071f0998c5f02792038a5ca98899261b7d \ + --hash=sha256:775e10de3849d0631a97c603f996f518159272db00fdda0a780f81752255ee9d \ + --hash=sha256:7772115877c53f62beeb8fd853cab692dbc04374ef623b30f997959a4c0e7e95 \ + --hash=sha256:7b88d649135aca526da172e48083da915ec086b54e8e73a425ba50999468cc08 \ + --hash=sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19 \ + --hash=sha256:7beae3a3d3b5212d3a55d2961db3c292e02e302feb43fce6a3f7a31b90ea6dfe \ + --hash=sha256:879e768938e7b49b5e90b7e3fecc0dbec01b8cb89595861fb39a8967c5220d09 \ + --hash=sha256:87ce0f14c6c08892b610686ae8be350bf368467b6acd5085a5b65441e2bf36d2 \ + --hash=sha256:8d76029f077379374cf0dbc78dbe45b38dec4a2eb78b08b5194ce836b2517afc \ + --hash=sha256:964538479359059a35fb400e769295d4b315ae61e4105396d355a12f7fef09f0 \ + --hash=sha256:9776ebe51713acf438fd9b4405fcd86893ae5d03487546dae7f34993217f8a91 \ + --hash=sha256:98fbafb6e88256f4454de33c1f40203d09fc33ed19162a68b3b257b29ca7f663 \ + --hash=sha256:9950290340acc1adaded363edd94baebcee7dabdfa8bee4790794cd5cfad2af6 \ + --hash=sha256:9d01ecc3a8cbdb6f25a37bd500510550b64ddf9f7d64a107d92f3ccb25035d0f \ + --hash=sha256:9da38b4fedde4fb528c740c2564628fbab737166a0e73d6d46cb4bb5463ff411 \ + --hash=sha256:a13b68cd1cd8cc9de8f244ebae18ccb3e4067ad205220ef324c39181e23bbf66 \ + --hash=sha256:ab44b178f7981fcaea7e0a5df20e773c663d06ffda0198f1a524e91b2fde7e59 \ + --hash=sha256:ade8cb6ff5632a62b7dbd4757d8c5573f7a2e9ae285d6b5b841707d8363205ef \ + --hash=sha256:aed40e099404721d7fcaf5b89bd3b4568a4666358bcac7b6b15c09fb6252ab68 \ + --hash=sha256:b1cbfa133241d0e6bdab48dcdc2604e8ba81512f6bbd68ec3e8e1357dd3c316c \ + --hash=sha256:bb7613e1a427cfcb6ea4544f9ac566b93d5bf67e0d48c787eca673ff9c9dff2b \ + --hash=sha256:bdaba7d87e66f26a2c45d8cbadcbfc4bf7884182317907baf39cfe9775bb4d93 \ + --hash=sha256:c05b450d37ba0c9e21c77fef1f205f56bcee2330bddca68d344baebfc55ae0df \ + --hash=sha256:c1e2b199f446d3e82246b4fd9236d7cb502dc2222b18698ba0d986d2fecc6152 \ + --hash=sha256:c3524798e70655ff19aec58c7d05adb1f074fecff62da857ea9be2b908b6d701 \ + --hash=sha256:cc5223ab19fe25e2f0bf2643204ad7318896fe3729bf12fde41b77bfc4fafff0 \ + --hash=sha256:d2a6394e6af690d462310a86b53c47ad75ac8c21dc79f120714ea449979cb1d3 \ + --hash=sha256:db367d8be9fad6e8ebbac4a7578b7af562e506211036cba2c06c3b998603c3d2 \ + --hash=sha256:e104da1db1c0991b3eaed391ccd650ae8d947eab1480c733e5a3fb28d4313e40 \ + --hash=sha256:e404ea551d35438013c64b4f357b0474c7abf9f781c06d44fcaf7a14c69ff9e2 \ + --hash=sha256:e5562a0f0e90a6223b704163ea28e831bd3a9faa3512a711f031611e6b06c939 \ + --hash=sha256:ea026e70a9a28ebbdddcbcf0f1323128a8db66898a06eaad3a4e62d2f554d096 \ + --hash=sha256:ec7e287d7fbd02cb6e22f9a00dd9c9cd504c40a61f2c61e7e1f9690a82726b4c \ + --hash=sha256:ed9bbc30f5d60a3bdf63ae76beb3f9db280d7f195dfcfa61af792d6ce912d159 \ + --hash=sha256:ee9da221dca6e0429c2704c1b3655fe7b025204a71d4d9b73390c759d776d165 \ + --hash=sha256:f22ef501c3f87ede88f23f9b11e608581c14f04db59b6a801f354397ae13739f \ + --hash=sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4 \ + --hash=sha256:f556aa591c00f2c45eb1b89f68f52441a016034d18b65da60e2d2875bbbf344a \ + --hash=sha256:f7e1d61da332ec412350463891923f960c3073cf1aae93b538f0bb4c8cd46efb \ + --hash=sha256:f917a04240ef31898182f76a332f508f2cc4b57d2b4d7ad2dbfebbfe167eb505 \ + --hash=sha256:fa476ab5dd49f3bf3a168e05f89358c75a17608dbabb080ef65f96b27c19ab10 \ + --hash=sha256:ff732bd0a0e778f43d5009840f20b935e79087b4dc65bd36f1cd0f9b04b8ff7f + # via openai +jmespath==1.1.0 \ + --hash=sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d \ + --hash=sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64 + # via + # boto3 + # botocore +joblib==1.5.3 \ + --hash=sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713 \ + --hash=sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3 + # via scikit-learn +json5==0.13.0 \ + --hash=sha256:9a08e1dd65f6a4d4c6fa82d216cf2477349ec2346a38fd70cc11d2557499fbcc \ + --hash=sha256:b1edf8d487721c0bf64d83c28e91280781f6e21f4a797d3261c7c828d4c165bf + # via jupyterlab-server +jsonpointer==3.0.0 \ + --hash=sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942 \ + --hash=sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef + # via jsonschema +jsonschema==4.26.0 \ + --hash=sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326 \ + --hash=sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce + # via + # jupyter-events + # jupyterlab-server + # litellm + # nbformat +jsonschema-specifications==2025.9.1 \ + --hash=sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe \ + --hash=sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d + # via jsonschema +jupyter==1.1.1 \ + --hash=sha256:7a59533c22af65439b24bbe60373a4e95af8f16ac65a6c00820ad378e3f7cc83 \ + --hash=sha256:d55467bceabdea49d7e3624af7e33d59c37fff53ed3a350e1ac957bed731de7a + # via data-formulator +jupyter-client==8.8.0 \ + --hash=sha256:d556811419a4f2d96c869af34e854e3f059b7cc2d6d01a9cd9c85c267691be3e \ + --hash=sha256:f93a5b99c5e23a507b773d3a1136bd6e16c67883ccdbd9a829b0bbdb98cd7d7a + # via + # ipykernel + # jupyter-console + # jupyter-server + # nbclient +jupyter-console==6.6.3 \ + --hash=sha256:309d33409fcc92ffdad25f0bcdf9a4a9daa61b6f341177570fdac03de5352485 \ + --hash=sha256:566a4bf31c87adbfadf22cdf846e3069b59a71ed5da71d6ba4d8aaad14a53539 + # via jupyter +jupyter-core==5.9.1 \ + --hash=sha256:4d09aaff303b9566c3ce657f580bd089ff5c91f5f89cf7d8846c3cdf465b5508 \ + --hash=sha256:ebf87fdc6073d142e114c72c9e29a9d7ca03fad818c5d300ce2adc1fb0743407 + # via + # ipykernel + # jupyter-client + # jupyter-console + # jupyter-server + # jupyterlab + # nbclient + # nbconvert + # nbformat +jupyter-events==0.12.0 \ + --hash=sha256:6464b2fa5ad10451c3d35fabc75eab39556ae1e2853ad0c0cc31b656731a97fb \ + --hash=sha256:fc3fce98865f6784c9cd0a56a20644fc6098f21c8c33834a8d9fe383c17e554b + # via jupyter-server +jupyter-lsp==2.3.0 \ + --hash=sha256:458aa59339dc868fb784d73364f17dbce8836e906cd75fd471a325cba02e0245 \ + --hash=sha256:e914a3cb2addf48b1c7710914771aaf1819d46b2e5a79b0f917b5478ec93f34f + # via jupyterlab +jupyter-server==2.17.0 \ + --hash=sha256:c38ea898566964c888b4772ae1ed58eca84592e88251d2cfc4d171f81f7e99d5 \ + --hash=sha256:e8cb9c7db4251f51ed307e329b81b72ccf2056ff82d50524debde1ee1870e13f + # via + # jupyter-lsp + # jupyterlab + # jupyterlab-server + # notebook + # notebook-shim +jupyter-server-terminals==0.5.4 \ + --hash=sha256:55be353fc74a80bc7f3b20e6be50a55a61cd525626f578dcb66a5708e2007d14 \ + --hash=sha256:bbda128ed41d0be9020349f9f1f2a4ab9952a73ed5f5ac9f1419794761fb87f5 + # via jupyter-server +jupyterlab==4.5.3 \ + --hash=sha256:4a159f71067cb38e4a82e86a42de8e7e926f384d7f2291964f282282096d27e8 \ + --hash=sha256:63c9f3a48de72ba00df766ad6eed416394f5bb883829f11eeff0872302520ba7 + # via + # jupyter + # notebook +jupyterlab-pygments==0.3.0 \ + --hash=sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d \ + --hash=sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780 + # via nbconvert +jupyterlab-server==2.28.0 \ + --hash=sha256:35baa81898b15f93573e2deca50d11ac0ae407ebb688299d3a5213265033712c \ + --hash=sha256:e4355b148fdcf34d312bbbc80f22467d6d20460e8b8736bf235577dd18506968 + # via + # jupyterlab + # notebook +jupyterlab-widgets==3.0.16 \ + --hash=sha256:423da05071d55cf27a9e602216d35a3a65a3e41cdf9c5d3b643b814ce38c19e0 \ + --hash=sha256:45fa36d9c6422cf2559198e4db481aa243c7a32d9926b500781c830c80f7ecf8 + # via ipywidgets +lark==1.3.1 \ + --hash=sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905 \ + --hash=sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12 + # via rfc3987-syntax +limits==5.6.0 \ + --hash=sha256:807fac75755e73912e894fdd61e2838de574c5721876a19f7ab454ae1fffb4b5 \ + --hash=sha256:b585c2104274528536a5b68864ec3835602b3c4a802cd6aa0b07419798394021 + # via flask-limiter +litellm==1.81.6 \ + --hash=sha256:573206ba194d49a1691370ba33f781671609ac77c35347f8a0411d852cf6341a \ + --hash=sha256:f02b503dfb7d66d1c939f82e4db21aeec1d6e2ed1fe3f5cd02aaec3f792bc4ae + # via data-formulator +markupsafe==3.0.3 \ + --hash=sha256:068f375c472b3e7acbe2d5318dea141359e6900156b5b2ba06a30b169086b91a \ + --hash=sha256:0bf2a864d67e76e5c9a34dc26ec616a66b9888e25e7b9460e1c76d3293bd9dbf \ + --hash=sha256:0db14f5dafddbb6d9208827849fad01f1a2609380add406671a26386cdf15a19 \ + --hash=sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf \ + --hash=sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175 \ + --hash=sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219 \ + --hash=sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb \ + --hash=sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6 \ + --hash=sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab \ + --hash=sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce \ + --hash=sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218 \ + --hash=sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634 \ + --hash=sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad \ + --hash=sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73 \ + --hash=sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c \ + --hash=sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe \ + --hash=sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa \ + --hash=sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37 \ + --hash=sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f \ + --hash=sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d \ + --hash=sha256:3b562dd9e9ea93f13d53989d23a7e775fdfd1066c33494ff43f5418bc8c58a5c \ + --hash=sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97 \ + --hash=sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a \ + --hash=sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19 \ + --hash=sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9 \ + --hash=sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9 \ + --hash=sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc \ + --hash=sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4 \ + --hash=sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354 \ + --hash=sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50 \ + --hash=sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698 \ + --hash=sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9 \ + --hash=sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b \ + --hash=sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc \ + --hash=sha256:7be7b61bb172e1ed687f1754f8e7484f1c8019780f6f6b0786e76bb01c2ae115 \ + --hash=sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485 \ + --hash=sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f \ + --hash=sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12 \ + --hash=sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025 \ + --hash=sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009 \ + --hash=sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d \ + --hash=sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a \ + --hash=sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5 \ + --hash=sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f \ + --hash=sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1 \ + --hash=sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287 \ + --hash=sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6 \ + --hash=sha256:bc51efed119bc9cfdf792cdeaa4d67e8f6fcccab66ed4bfdd6bde3e59bfcbb2f \ + --hash=sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581 \ + --hash=sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed \ + --hash=sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b \ + --hash=sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026 \ + --hash=sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676 \ + --hash=sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e \ + --hash=sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d \ + --hash=sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d \ + --hash=sha256:de8a88e63464af587c950061a5e6a67d3632e36df62b986892331d4620a35c01 \ + --hash=sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795 \ + --hash=sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5 \ + --hash=sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d \ + --hash=sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe \ + --hash=sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda \ + --hash=sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e \ + --hash=sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737 \ + --hash=sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523 \ + --hash=sha256:f9e130248f4462aaa8e2552d547f36ddadbeaa573879158d721bbd33dfe4743a \ + --hash=sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50 + # via + # flask + # jinja2 + # nbconvert + # werkzeug +matplotlib-inline==0.2.1 \ + --hash=sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76 \ + --hash=sha256:e1ee949c340d771fc39e241ea75683deb94762c8fa5f2927ec57c83c4dffa9fe + # via + # ipykernel + # ipython +mistune==3.2.0 \ + --hash=sha256:708487c8a8cdd99c9d90eb3ed4c3ed961246ff78ac82f03418f5183ab70e398a \ + --hash=sha256:febdc629a3c78616b94393c6580551e0e34cc289987ec6c35ed3f4be42d0eee1 + # via nbconvert +msal==1.34.0 \ + --hash=sha256:76ba83b716ea5a6d75b0279c0ac353a0e05b820ca1f6682c0eb7f45190c43c2f \ + --hash=sha256:f669b1644e4950115da7a176441b0e13ec2975c29528d8b9e81316023676d6e1 + # via + # azure-identity + # azure-kusto-data + # msal-extensions +msal-extensions==1.3.1 \ + --hash=sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca \ + --hash=sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4 + # via azure-identity +multidict==6.7.1 \ + --hash=sha256:03ede2a6ffbe8ef936b92cb4529f27f42be7f56afcdab5ab739cd5f27fb1cbf9 \ + --hash=sha256:0458c978acd8e6ea53c81eefaddbbee9c6c5e591f41b3f5e8e194780fe026581 \ + --hash=sha256:0b38ebffd9be37c1170d33bc0f36f4f262e0a09bc1aac1c34c7aa51a7293f0b3 \ + --hash=sha256:0b4c48648d7649c9335cf1927a8b87fa692de3dcb15faa676c6a6f1f1aabda43 \ + --hash=sha256:0e161ddf326db5577c3a4cc2d8648f81456e8a20d40415541587a71620d7a7d1 \ + --hash=sha256:10ae39c9cfe6adedcdb764f5e8411d4a92b055e35573a2eaa88d3323289ef93c \ + --hash=sha256:128441d052254f42989ef98b7b6a6ecb1e6f708aa962c7984235316db59f50fa \ + --hash=sha256:12fad252f8b267cc75b66e8fc51b3079604e8d43a75428ffe193cd9e2195dfd6 \ + --hash=sha256:14525a5f61d7d0c94b368a42cff4c9a4e7ba2d52e2672a7b23d84dc86fb02b0c \ + --hash=sha256:17307b22c217b4cf05033dabefe68255a534d637c6c9b0cc8382718f87be4262 \ + --hash=sha256:1b99af4d9eec0b49927b4402bcbb58dea89d3e0db8806a4086117019939ad3dd \ + --hash=sha256:1d540e51b7e8e170174555edecddbd5538105443754539193e3e1061864d444d \ + --hash=sha256:1e3a8bb24342a8201d178c3b4984c26ba81a577c80d4d525727427460a50c22d \ + --hash=sha256:21f830fe223215dffd51f538e78c172ed7c7f60c9b96a2bf05c4848ad49921c3 \ + --hash=sha256:233b398c29d3f1b9676b4b6f75c518a06fcb2ea0b925119fb2c1bc35c05e1601 \ + --hash=sha256:25167cc263257660290fba06b9318d2026e3c910be240a146e1f66dd114af2b0 \ + --hash=sha256:253282d70d67885a15c8a7716f3a73edf2d635793ceda8173b9ecc21f2fb8292 \ + --hash=sha256:273d23f4b40f3dce4d6c8a821c741a86dec62cded82e1175ba3d99be128147ed \ + --hash=sha256:283ddac99f7ac25a4acadbf004cb5ae34480bbeb063520f70ce397b281859362 \ + --hash=sha256:28ca5ce2fd9716631133d0e9a9b9a745ad7f60bac2bccafb56aa380fc0b6c511 \ + --hash=sha256:2b41f5fed0ed563624f1c17630cb9941cf2309d4df00e494b551b5f3e3d67a23 \ + --hash=sha256:2bbd113e0d4af5db41d5ebfe9ccaff89de2120578164f86a5d17d5a576d1e5b2 \ + --hash=sha256:2e1425e2f99ec5bd36c15a01b690a1a2456209c5deed58f95469ffb46039ccbb \ + --hash=sha256:2e2d2ed645ea29f31c4c7ea1552fcfd7cb7ba656e1eafd4134a6620c9f5fdd9e \ + --hash=sha256:3758692429e4e32f1ba0df23219cd0b4fc0a52f476726fff9337d1a57676a582 \ + --hash=sha256:38fb49540705369bab8484db0689d86c0a33a0a9f2c1b197f506b71b4b6c19b0 \ + --hash=sha256:398c1478926eca669f2fd6a5856b6de9c0acf23a2cb59a14c0ba5844fa38077e \ + --hash=sha256:3ab8b9d8b75aef9df299595d5388b14530839f6422333357af1339443cff777d \ + --hash=sha256:3bd231490fa7217cc832528e1cd8752a96f0125ddd2b5749390f7c3ec8721b65 \ + --hash=sha256:3d51ff4785d58d3f6c91bdbffcb5e1f7ddfda557727043aa20d20ec4f65e324a \ + --hash=sha256:3fccb473e87eaa1382689053e4a4618e7ba7b9b9b8d6adf2027ee474597128cd \ + --hash=sha256:401c5a650f3add2472d1d288c26deebc540f99e2fb83e9525007a74cd2116f1d \ + --hash=sha256:41f2952231456154ee479651491e94118229844dd7226541788be783be2b5108 \ + --hash=sha256:432feb25a1cb67fe82a9680b4d65fb542e4635cb3166cd9c01560651ad60f177 \ + --hash=sha256:439cbebd499f92e9aa6793016a8acaa161dfa749ae86d20960189f5398a19144 \ + --hash=sha256:4885cb0e817aef5d00a2e8451d4665c1808378dc27c2705f1bf4ef8505c0d2e5 \ + --hash=sha256:497394b3239fc6f0e13a78a3e1b61296e72bf1c5f94b4c4eb80b265c37a131cd \ + --hash=sha256:497bde6223c212ba11d462853cfa4f0ae6ef97465033e7dc9940cdb3ab5b48e5 \ + --hash=sha256:4cfb48c6ea66c83bcaaf7e4dfa7ec1b6bbcf751b7db85a328902796dfde4c060 \ + --hash=sha256:538cec1e18c067d0e6103aa9a74f9e832904c957adc260e61cd9d8cf0c3b3d37 \ + --hash=sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56 \ + --hash=sha256:563fe25c678aaba333d5399408f5ec3c383ca5b663e7f774dd179a520b8144df \ + --hash=sha256:57b46b24b5d5ebcc978da4ec23a819a9402b4228b8a90d9c656422b4bdd8a963 \ + --hash=sha256:5a37ca18e360377cfda1d62f5f382ff41f2b8c4ccb329ed974cc2e1643440118 \ + --hash=sha256:5c4b9bfc148f5a91be9244d6264c53035c8a0dcd2f51f1c3c6e30e30ebaa1c84 \ + --hash=sha256:5e01429a929600e7dab7b166062d9bb54a5eed752384c7384c968c2afab8f50f \ + --hash=sha256:5fa6a95dfee63893d80a34758cd0e0c118a30b8dcb46372bf75106c591b77889 \ + --hash=sha256:619e5a1ac57986dbfec9f0b301d865dddf763696435e2962f6d9cf2fdff2bb71 \ + --hash=sha256:6aac4f16b472d5b7dc6f66a0d49dd57b0e0902090be16594dc9ebfd3d17c47e7 \ + --hash=sha256:6b10359683bd8806a200fd2909e7c8ca3a7b24ec1d8132e483d58e791d881048 \ + --hash=sha256:6b83cabdc375ffaaa15edd97eb7c0c672ad788e2687004990074d7d6c9b140c8 \ + --hash=sha256:6d3bc717b6fe763b8be3f2bee2701d3c8eb1b2a8ae9f60910f1b2860c82b6c49 \ + --hash=sha256:7a7e590ff876a3eaf1c02a4dfe0724b6e69a9e9de6d8f556816f29c496046e59 \ + --hash=sha256:7eee46ccb30ff48a1e35bb818cc90846c6be2b68240e42a78599166722cea709 \ + --hash=sha256:7ff981b266af91d7b4b3793ca3382e53229088d193a85dfad6f5f4c27fc73e5d \ + --hash=sha256:841189848ba629c3552035a6a7f5bf3b02eb304e9fea7492ca220a8eda6b0e5c \ + --hash=sha256:844c5bca0b5444adb44a623fb0a1310c2f4cd41f402126bb269cd44c9b3f3e1e \ + --hash=sha256:84e61e3af5463c19b67ced91f6c634effb89ef8bfc5ca0267f954451ed4bb6a2 \ + --hash=sha256:8be1802715a8e892c784c0197c2ace276ea52702a0ede98b6310c8f255a5afb3 \ + --hash=sha256:8f333ec9c5eb1b7105e3b84b53141e66ca05a19a605368c55450b6ba208cb9ee \ + --hash=sha256:9004d8386d133b7e6135679424c91b0b854d2d164af6ea3f289f8f2761064609 \ + --hash=sha256:90efbcf47dbe33dcf643a1e400d67d59abeac5db07dc3f27d6bdeae497a2198c \ + --hash=sha256:935434b9853c7c112eee7ac891bc4cb86455aa631269ae35442cb316790c1445 \ + --hash=sha256:93b1818e4a6e0930454f0f2af7dfce69307ca03cdcfb3739bf4d91241967b6c1 \ + --hash=sha256:95922cee9a778659e91db6497596435777bd25ed116701a4c034f8e46544955a \ + --hash=sha256:960c83bf01a95b12b08fd54324a4eb1d5b52c88932b5cba5d6e712bb3ed12eb5 \ + --hash=sha256:97231140a50f5d447d3164f994b86a0bed7cd016e2682f8650d6a9158e14fd31 \ + --hash=sha256:97891f3b1b3ffbded884e2916cacf3c6fc87b66bb0dde46f7357404750559f33 \ + --hash=sha256:98655c737850c064a65e006a3df7c997cd3b220be4ec8fe26215760b9697d4d7 \ + --hash=sha256:98bc624954ec4d2c7cb074b8eefc2b5d0ce7d482e410df446414355d158fe4ca \ + --hash=sha256:9c90fed18bffc0189ba814749fdcc102b536e83a9f738a9003e569acd540a733 \ + --hash=sha256:9d624335fd4fa1c08a53f8b4be7676ebde19cd092b3895c421045ca87895b429 \ + --hash=sha256:9f9af11306994335398293f9958071019e3ab95e9a707dc1383a35613f6abcb9 \ + --hash=sha256:a0543217a6a017692aa6ae5cc39adb75e587af0f3a82288b1492eb73dd6cc2a4 \ + --hash=sha256:a088b62bd733e2ad12c50dad01b7d0166c30287c166e137433d3b410add807a6 \ + --hash=sha256:a407f13c188f804c759fc6a9f88286a565c242a76b27626594c133b82883b5c2 \ + --hash=sha256:a90f75c956e32891a4eda3639ce6dd86e87105271f43d43442a3aedf3cddf172 \ + --hash=sha256:af959b9beeb66c822380f222f0e0a1889331597e81f1ded7f374f3ecb0fd6c52 \ + --hash=sha256:b0fa96985700739c4c7853a43c0b3e169360d6855780021bfc6d0f1ce7c123e7 \ + --hash=sha256:b26684587228afed0d50cf804cc71062cc9c1cdf55051c4c6345d372947b268c \ + --hash=sha256:b4938326284c4f1224178a560987b6cf8b4d38458b113d9b8c1db1a836e640a2 \ + --hash=sha256:b8c990b037d2fff2f4e33d3f21b9b531c5745b33a49a7d6dbe7a177266af44f6 \ + --hash=sha256:ba0a9fb644d0c1a2194cf7ffb043bd852cea63a57f66fbd33959f7dae18517bf \ + --hash=sha256:bdbf9f3b332abd0cdb306e7c2113818ab1e922dc84b8f8fd06ec89ed2a19ab8b \ + --hash=sha256:bfde23ef6ed9db7eaee6c37dcec08524cb43903c60b285b172b6c094711b3961 \ + --hash=sha256:c0abd12629b0af3cf590982c0b413b1e7395cd4ec026f30986818ab95bfaa94a \ + --hash=sha256:c102791b1c4f3ab36ce4101154549105a53dc828f016356b3e3bcae2e3a039d3 \ + --hash=sha256:c3a32d23520ee37bf327d1e1a656fec76a2edd5c038bf43eddfa0572ec49c60b \ + --hash=sha256:c76c4bec1538375dad9d452d246ca5368ad6e1c9039dadcf007ae59c70619ea1 \ + --hash=sha256:c9035dde0f916702850ef66460bc4239d89d08df4d02023a5926e7446724212c \ + --hash=sha256:cb2a55f408c3043e42b40cc8eecd575afa27b7e0b956dfb190de0f8499a57a53 \ + --hash=sha256:ce1bbd7d780bb5a0da032e095c951f7014d6b0a205f8318308140f1a6aba159e \ + --hash=sha256:d54ecf9f301853f2c5e802da559604b3e95bb7a3b01a9c295c6ee591b9882de8 \ + --hash=sha256:d62b7f64ffde3b99d06b707a280db04fb3855b55f5a06df387236051d0668f4a \ + --hash=sha256:da62917e6076f512daccfbbde27f46fed1c98fee202f0559adec8ee0de67f71a \ + --hash=sha256:df9f19c28adcb40b6aae30bbaa1478c389efd50c28d541d76760199fc1037c32 \ + --hash=sha256:e1c5988359516095535c4301af38d8a8838534158f649c05dd1050222321bcb3 \ + --hash=sha256:e628ef0e6859ffd8273c69412a2465c4be4a9517d07261b33334b5ec6f3c7489 \ + --hash=sha256:e82d14e3c948952a1a85503817e038cba5905a3352de76b9a465075d072fba23 \ + --hash=sha256:e954b24433c768ce78ab7929e84ccf3422e46deb45a4dc9f93438f8217fa2d34 \ + --hash=sha256:eb0ce7b2a32d09892b3dd6cc44877a0d02a33241fafca5f25c8b6b62374f8b75 \ + --hash=sha256:eb304767bca2bb92fb9c5bd33cedc95baee5bb5f6c88e63706533a1c06ad08c8 \ + --hash=sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d \ + --hash=sha256:f2a0a924d4c2e9afcd7ec64f9de35fcd96915149b2216e1cb2c10a56df483855 \ + --hash=sha256:f33dc2a3abe9249ea5d8360f969ec7f4142e7ac45ee7014d8f8d5acddf178b7b \ + --hash=sha256:f5dd81c45b05518b9aa4da4aa74e1c93d715efa234fd3e8a179df611cc85e5f4 \ + --hash=sha256:f99fe611c312b3c1c0ace793f92464d8cd263cc3b26b5721950d977b006b6c4d \ + --hash=sha256:fa263a02f4f2dd2d11a7b1bb4362aa7cb1049f84a9235d31adf63f30143469a0 \ + --hash=sha256:fc5907494fccf3e7d3f94f95c91d6336b092b5fc83811720fae5e2765890dfba \ + --hash=sha256:fcee94dfbd638784645b066074b338bc9cc155d4b4bffa4adce1615c5a426c19 + # via + # aiohttp + # yarl +multitasking==0.0.12 \ + --hash=sha256:2fba2fa8ed8c4b85e227c5dd7dc41c7d658de3b6f247927316175a57349b84d1 + # via yfinance +nbclient==0.10.4 \ + --hash=sha256:1e54091b16e6da39e297b0ece3e10f6f29f4ac4e8ee515d29f8a7099bd6553c9 \ + --hash=sha256:9162df5a7373d70d606527300a95a975a47c137776cd942e52d9c7e29ff83440 + # via nbconvert +nbconvert==7.17.0 \ + --hash=sha256:1b2696f1b5be12309f6c7d707c24af604b87dfaf6d950794c7b07acab96dda78 \ + --hash=sha256:4f99a63b337b9a23504347afdab24a11faa7d86b405e5c8f9881cd313336d518 + # via + # jupyter + # jupyter-server +nbformat==5.10.4 \ + --hash=sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a \ + --hash=sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b + # via + # jupyter-server + # nbclient + # nbconvert +nest-asyncio==1.6.0 \ + --hash=sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe \ + --hash=sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c + # via ipykernel +notebook==7.5.3 \ + --hash=sha256:393ceb269cf9fdb02a3be607a57d7bd5c2c14604f1818a17dbeb38e04f98cbfa \ + --hash=sha256:c997bfa1a2a9eb58c9bbb7e77d50428befb1033dd6f02c482922e96851d67354 + # via jupyter +notebook-shim==0.2.4 \ + --hash=sha256:411a5be4e9dc882a074ccbcae671eda64cceb068767e9a3419096986560e1cef \ + --hash=sha256:b4b2cfa1b65d98307ca24361f5b30fe785b53c3fd07b7a47e89acb5e6ac638cb + # via + # jupyterlab + # notebook +numpy==2.4.2 \ + --hash=sha256:00ab83c56211a1d7c07c25e3217ea6695e50a3e2f255053686b081dc0b091a82 \ + --hash=sha256:068cdb2d0d644cdb45670810894f6a0600797a69c05f1ac478e8d31670b8ee75 \ + --hash=sha256:0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257 \ + --hash=sha256:0fece1d1f0a89c16b03442eae5c56dc0be0c7883b5d388e0c03f53019a4bfd71 \ + --hash=sha256:12e26134a0331d8dbd9351620f037ec470b7c75929cb8a1537f6bfe411152a1a \ + --hash=sha256:1ae241bbfc6ae276f94a170b14785e561cb5e7f626b6688cf076af4110887413 \ + --hash=sha256:1f92f53998a17265194018d1cc321b2e96e900ca52d54c7c77837b71b9465181 \ + --hash=sha256:209fae046e62d0ce6435fcfe3b1a10537e858249b3d9b05829e2a05218296a85 \ + --hash=sha256:20abd069b9cda45874498b245c8015b18ace6de8546bf50dfa8cea1696ed06ef \ + --hash=sha256:21982668592194c609de53ba4933a7471880ccbaadcc52352694a59ecc860b3a \ + --hash=sha256:25f2059807faea4b077a2b6837391b5d830864b3543627f381821c646f31a63c \ + --hash=sha256:2653de5c24910e49c2b106499803124dde62a5a1fe0eedeaecf4309a5f639390 \ + --hash=sha256:2b8f157c8a6f20eb657e240f8985cc135598b2b46985c5bccbde7616dc9c6b1e \ + --hash=sha256:2fb882da679409066b4603579619341c6d6898fc83a8995199d5249f986e8e8f \ + --hash=sha256:40397bda92382fcec844066efb11f13e1c9a3e2a8e8f318fb72ed8b6db9f60f1 \ + --hash=sha256:444be170853f1f9d528428eceb55f12918e4fda5d8805480f36a002f1415e09b \ + --hash=sha256:47c5a6ed21d9452b10227e5e8a0e1c22979811cad7dcc19d8e3e2fb8fa03f1a3 \ + --hash=sha256:4f069069931240b3fc703f1e23df63443dbd6390614c8c44a87d96cd0ec81eb1 \ + --hash=sha256:52b913ec40ff7ae845687b0b34d8d93b60cb66dcee06996dd5c99f2fc9328657 \ + --hash=sha256:5633c0da313330fd20c484c78cdd3f9b175b55e1a766c4a174230c6b70ad8262 \ + --hash=sha256:5daf6f3914a733336dab21a05cdec343144600e964d2fcdabaac0c0269874b2a \ + --hash=sha256:5eea80d908b2c1f91486eb95b3fb6fab187e569ec9752ab7d9333d2e66bf2d6b \ + --hash=sha256:602f65afdef699cda27ec0b9224ae5dc43e328f4c24c689deaf77133dbee74d0 \ + --hash=sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae \ + --hash=sha256:66cb9422236317f9d44b67b4d18f44efe6e9c7f8794ac0462978513359461554 \ + --hash=sha256:6d82351358ffbcdcd7b686b90742a9b86632d6c1c051016484fa0b326a0a1548 \ + --hash=sha256:6e9f61981ace1360e42737e2bae58b27bf28a1b27e781721047d84bd754d32e7 \ + --hash=sha256:6ed0be1ee58eef41231a5c943d7d1375f093142702d5723ca2eb07db9b934b05 \ + --hash=sha256:7cdde6de52fb6664b00b056341265441192d1291c130e99183ec0d4b110ff8b1 \ + --hash=sha256:7df2de1e4fba69a51c06c28f5a3de36731eb9639feb8e1cf7e4a7b0daf4cf622 \ + --hash=sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1 \ + --hash=sha256:7f54844851cdb630ceb623dcec4db3240d1ac13d4990532446761baede94996a \ + --hash=sha256:805cc8de9fd6e7a22da5aed858e0ab16be5a4db6c873dde1d7451c541553aa27 \ + --hash=sha256:8906e71fd8afcb76580404e2a950caef2685df3d2a57fe82a86ac8d33cc007ba \ + --hash=sha256:89f7268c009bc492f506abd6f5265defa7cb3f7487dc21d357c3d290add45082 \ + --hash=sha256:8c50dd1fc8826f5b26a5ee4d77ca55d88a895f4e4819c7ecc2a9f5905047a443 \ + --hash=sha256:8e4549f8a3c6d13d55041925e912bfd834285ef1dd64d6bc7d542583355e2e98 \ + --hash=sha256:8e9afaeb0beff068b4d9cd20d322ba0ee1cecfb0b08db145e4ab4dd44a6b5110 \ + --hash=sha256:98f16a80e917003a12c0580f97b5f875853ebc33e2eaa4bccfc8201ac6869308 \ + --hash=sha256:9e35d3e0144137d9fdae62912e869136164534d64a169f86438bc9561b6ad49f \ + --hash=sha256:9e4424677ce4b47fe73c8b5556d876571f7c6945d264201180db2dc34f676ab5 \ + --hash=sha256:adb6ed2ad29b9e15321d167d152ee909ec73395901b70936f029c3bc6d7f4460 \ + --hash=sha256:aea4f66ff44dfddf8c2cffd66ba6538c5ec67d389285292fe428cb2c738c8aef \ + --hash=sha256:b21041e8cb6a1eb5312dd1d2f80a94d91efffb7a06b70597d44f1bd2dfc315ab \ + --hash=sha256:b2f0073ed0868db1dcd86e052d37279eef185b9c8db5bf61f30f46adac63c909 \ + --hash=sha256:b3a24467af63c67829bfaa61eecf18d5432d4f11992688537be59ecd6ad32f5e \ + --hash=sha256:b9c618d56a29c9cb1c4da979e9899be7578d2e0b3c24d52079c166324c9e8695 \ + --hash=sha256:bba37bc29d4d85761deed3954a1bc62be7cf462b9510b51d367b769a8c8df325 \ + --hash=sha256:bd3a7a9f5847d2fb8c2c6d1c862fa109c31a9abeca1a3c2bd5a64572955b2979 \ + --hash=sha256:be71bf1edb48ebbbf7f6337b5bfd2f895d1902f6335a5830b20141fc126ffba0 \ + --hash=sha256:c02ef4401a506fb60b411467ad501e1429a3487abca4664871d9ae0b46c8ba32 \ + --hash=sha256:c3cd545784805de05aafe1dde61752ea49a359ccba9760c1e5d1c88a93bbf2b7 \ + --hash=sha256:c7ac672d699bf36275c035e16b65539931347d68b70667d28984c9fb34e07fa7 \ + --hash=sha256:cb7bbb88aa74908950d979eeaa24dbdf1a865e3c7e45ff0121d8f70387b55f73 \ + --hash=sha256:cd2bd2bbed13e213d6b55dc1d035a4f91748a7d3edc9480c13898b0353708920 \ + --hash=sha256:cda077c2e5b780200b6b3e09d0b42205a3d1c68f30c6dceb90401c13bff8fe74 \ + --hash=sha256:cf28c0c1d4c4bf00f509fa7eb02c58d7caf221b50b467bcb0d9bbf1584d5c821 \ + --hash=sha256:d0d9b7c93578baafcbc5f0b83eaf17b79d345c6f36917ba0c67f45226911d499 \ + --hash=sha256:d1240d50adff70c2a88217698ca844723068533f3f5c5fa6ee2e3220e3bdb000 \ + --hash=sha256:d30291931c915b2ab5717c2974bb95ee891a1cf22ebc16a8006bd59cd210d40a \ + --hash=sha256:d9f64d786b3b1dd742c946c42d15b07497ed14af1a1f3ce840cce27daa0ce913 \ + --hash=sha256:da6cad4e82cb893db4b69105c604d805e0c3ce11501a55b5e9f9083b47d2ffe8 \ + --hash=sha256:df1b10187212b198dd45fa943d8985a3c8cf854aed4923796e0e019e113a1bda \ + --hash=sha256:e04ae107ac591763a47398bb45b568fc38f02dbc4aa44c063f67a131f99346cb \ + --hash=sha256:e6dee3bb76aa4009d5a912180bf5b2de012532998d094acee25d9cb8dee3e44a \ + --hash=sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825 \ + --hash=sha256:e98c97502435b53741540a5717a6749ac2ada901056c7db951d33e11c885cc7d \ + --hash=sha256:ec055f6dae239a6299cace477b479cca2fc125c5675482daf1dd886933a1076f \ + --hash=sha256:f74f0f7779cc7ae07d1810aab8ac6b1464c3eafb9e283a40da7309d5e6e48fbb \ + --hash=sha256:fbde1b0c6e81d56f5dccd95dd4a711d9b95df1ae4009a60887e56b27e8d903fa \ + --hash=sha256:fcf92bee92742edd401ba41135185866f7026c502617f422eb432cfeca4fe236 \ + --hash=sha256:fd49860271d52127d61197bb50b64f58454e9f578cb4b2c001a6de8b1f50b0b1 + # via + # data-formulator + # db-dtypes + # pandas + # scikit-learn + # scipy + # yfinance +openai==2.16.0 \ + --hash=sha256:42eaa22ca0d8ded4367a77374104d7a2feafee5bd60a107c3c11b5243a11cd12 \ + --hash=sha256:5f46643a8f42899a84e80c38838135d7038e7718333ce61396994f887b09a59b + # via + # data-formulator + # litellm +ordered-set==4.1.0 \ + --hash=sha256:046e1132c71fcf3330438a539928932caf51ddbc582496833e23de611de14562 \ + --hash=sha256:694a8e44c87657c59292ede72891eb91d34131f6531463aab3009191c77364a8 + # via flask-limiter +overrides==7.7.0 ; python_full_version < '3.12' \ + --hash=sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a \ + --hash=sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49 + # via jupyter-server +packaging==26.0 \ + --hash=sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4 \ + --hash=sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529 + # via + # build + # db-dtypes + # google-cloud-bigquery + # huggingface-hub + # ipykernel + # jupyter-events + # jupyter-server + # jupyterlab + # jupyterlab-server + # limits + # nbconvert +pandas==2.3.3 \ + --hash=sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7 \ + --hash=sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593 \ + --hash=sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5 \ + --hash=sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791 \ + --hash=sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec \ + --hash=sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5 \ + --hash=sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac \ + --hash=sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084 \ + --hash=sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87 \ + --hash=sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35 \ + --hash=sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c \ + --hash=sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713 \ + --hash=sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523 \ + --hash=sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3 \ + --hash=sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78 \ + --hash=sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53 \ + --hash=sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c \ + --hash=sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21 \ + --hash=sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5 \ + --hash=sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45 \ + --hash=sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110 \ + --hash=sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493 \ + --hash=sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b \ + --hash=sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450 \ + --hash=sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86 \ + --hash=sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98 \ + --hash=sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89 \ + --hash=sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66 \ + --hash=sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b \ + --hash=sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8 \ + --hash=sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6 \ + --hash=sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc \ + --hash=sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788 \ + --hash=sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151 \ + --hash=sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b \ + --hash=sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d \ + --hash=sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908 \ + --hash=sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0 \ + --hash=sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b \ + --hash=sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c \ + --hash=sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee + # via + # data-formulator + # db-dtypes + # vega-datasets + # yfinance +pandocfilters==1.5.1 \ + --hash=sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e \ + --hash=sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc + # via nbconvert +parso==0.8.5 \ + --hash=sha256:034d7354a9a018bdce352f48b2a8a450f05e9d6ee85db84764e9b6bd96dafe5a \ + --hash=sha256:646204b5ee239c396d040b90f9e272e9a8017c630092bf59980beb62fd033887 + # via jedi +peewee==3.19.0 \ + --hash=sha256:de220b94766e6008c466e00ce4ba5299b9a832117d9eb36d45d0062f3cfd7417 \ + --hash=sha256:f88292a6f0d7b906cb26bca9c8599b8f4d8920ebd36124400d0cbaaaf915511f + # via yfinance +pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32' \ + --hash=sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 \ + --hash=sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f + # via ipython +platformdirs==4.5.1 \ + --hash=sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda \ + --hash=sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31 + # via + # jupyter-core + # yfinance +prometheus-client==0.24.1 \ + --hash=sha256:150db128af71a5c2482b36e588fc8a6b95e498750da4b17065947c16070f4055 \ + --hash=sha256:7e0ced7fbbd40f7b84962d5d2ab6f17ef88a72504dcf7c0b40737b43b2a461f9 + # via jupyter-server +prompt-toolkit==3.0.52 \ + --hash=sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855 \ + --hash=sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955 + # via + # ipython + # jupyter-console +propcache==0.4.1 \ + --hash=sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4 \ + --hash=sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be \ + --hash=sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3 \ + --hash=sha256:05674a162469f31358c30bcaa8883cb7829fa3110bf9c0991fe27d7896c42d85 \ + --hash=sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b \ + --hash=sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367 \ + --hash=sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf \ + --hash=sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393 \ + --hash=sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1 \ + --hash=sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717 \ + --hash=sha256:2bb07ffd7eaad486576430c89f9b215f9e4be68c4866a96e97db9e97fead85dc \ + --hash=sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe \ + --hash=sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75 \ + --hash=sha256:364426a62660f3f699949ac8c621aad6977be7126c5807ce48c0aeb8e7333ea6 \ + --hash=sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e \ + --hash=sha256:3d902a36df4e5989763425a8ab9e98cd8ad5c52c823b34ee7ef307fd50582566 \ + --hash=sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12 \ + --hash=sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367 \ + --hash=sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874 \ + --hash=sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf \ + --hash=sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566 \ + --hash=sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a \ + --hash=sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a \ + --hash=sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1 \ + --hash=sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6 \ + --hash=sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61 \ + --hash=sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726 \ + --hash=sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49 \ + --hash=sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44 \ + --hash=sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af \ + --hash=sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa \ + --hash=sha256:5a103c3eb905fcea0ab98be99c3a9a5ab2de60228aa5aceedc614c0281cf6153 \ + --hash=sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc \ + --hash=sha256:60a8fda9644b7dfd5dece8c61d8a85e271cb958075bfc4e01083c148b61a7caf \ + --hash=sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8 \ + --hash=sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c \ + --hash=sha256:67fad6162281e80e882fb3ec355398cf72864a54069d060321f6cd0ade95fe85 \ + --hash=sha256:6918ecbd897443087a3b7cd978d56546a812517dcaaca51b49526720571fa93e \ + --hash=sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0 \ + --hash=sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1 \ + --hash=sha256:74c1fb26515153e482e00177a1ad654721bf9207da8a494a0c05e797ad27b992 \ + --hash=sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f \ + --hash=sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d \ + --hash=sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1 \ + --hash=sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e \ + --hash=sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89 \ + --hash=sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a \ + --hash=sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b \ + --hash=sha256:981333cb2f4c1896a12f4ab92a9cc8f09ea664e9b7dbdc4eff74627af3a11c0f \ + --hash=sha256:990f6b3e2a27d683cb7602ed6c86f15ee6b43b1194736f9baaeb93d0016633b1 \ + --hash=sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66 \ + --hash=sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded \ + --hash=sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0 \ + --hash=sha256:a9695397f85973bb40427dedddf70d8dc4a44b22f1650dd4af9eedf443d45165 \ + --hash=sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778 \ + --hash=sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455 \ + --hash=sha256:ab4c29b49d560fe48b696cdcb127dd36e0bc2472548f3bf56cc5cb3da2b2984f \ + --hash=sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b \ + --hash=sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237 \ + --hash=sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81 \ + --hash=sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859 \ + --hash=sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c \ + --hash=sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835 \ + --hash=sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393 \ + --hash=sha256:c30b53e7e6bda1d547cabb47c825f3843a0a1a42b0496087bb58d8fedf9f41b5 \ + --hash=sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641 \ + --hash=sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144 \ + --hash=sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74 \ + --hash=sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db \ + --hash=sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403 \ + --hash=sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9 \ + --hash=sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f \ + --hash=sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311 \ + --hash=sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36 \ + --hash=sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f \ + --hash=sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2 \ + --hash=sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7 \ + --hash=sha256:e53f3a38d3510c11953f3e6a33f205c6d1b001129f972805ca9b42fc308bc239 \ + --hash=sha256:e9b0d8d0845bbc4cfcdcbcdbf5086886bc8157aa963c31c777ceff7846c77757 \ + --hash=sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72 \ + --hash=sha256:ecef2343af4cc68e05131e45024ba34f6095821988a9d0a02aa7c73fcc448aa9 \ + --hash=sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4 \ + --hash=sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24 \ + --hash=sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207 \ + --hash=sha256:f10207adf04d08bec185bae14d9606a1444715bc99180f9331c9c02093e1959e \ + --hash=sha256:f1d2f90aeec838a52f1c1a32fe9a619fefd5e411721a9117fbf82aea638fe8a1 \ + --hash=sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d \ + --hash=sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37 \ + --hash=sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e \ + --hash=sha256:fc38cba02d1acba4e2869eef1a57a43dfbd3d49a59bf90dda7444ec2be6a5570 \ + --hash=sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af \ + --hash=sha256:fd6f30fdcf9ae2a70abd34da54f18da086160e4d7d9251f81f3da0ff84fc5a48 + # via + # aiohttp + # yarl +proto-plus==1.27.1 \ + --hash=sha256:912a7460446625b792f6448bade9e55cd4e41e6ac10e27009ef71a7f317fa147 \ + --hash=sha256:e4643061f3a4d0de092d62aa4ad09fa4756b2cbb89d4627f3985018216f9fefc + # via google-api-core +protobuf==6.33.5 \ + --hash=sha256:3093804752167bcab3998bec9f1048baae6e29505adaf1afd14a37bddede533c \ + --hash=sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02 \ + --hash=sha256:6ddcac2a081f8b7b9642c09406bc6a4290128fce5f471cddd165960bb9119e5c \ + --hash=sha256:8afa18e1d6d20af15b417e728e9f60f3aa108ee76f23c3b2c07a2c3b546d3afd \ + --hash=sha256:9b71e0281f36f179d00cbcb119cb19dec4d14a81393e5ea220f64b286173e190 \ + --hash=sha256:a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5 \ + --hash=sha256:cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0 \ + --hash=sha256:d71b040839446bac0f4d162e758bea99c8251161dae9d0983a3b88dee345153b + # via + # google-api-core + # googleapis-common-protos + # grpcio-status + # proto-plus + # yfinance +psutil==7.2.2 \ + --hash=sha256:0746f5f8d406af344fd547f1c8daa5f5c33dbc293bb8d6a16d80b4bb88f59372 \ + --hash=sha256:076a2d2f923fd4821644f5ba89f059523da90dc9014e85f8e45a5774ca5bc6f9 \ + --hash=sha256:11fe5a4f613759764e79c65cf11ebdf26e33d6dd34336f8a337aa2996d71c841 \ + --hash=sha256:1a571f2330c966c62aeda00dd24620425d4b0cc86881c89861fbc04549e5dc63 \ + --hash=sha256:1a7b04c10f32cc88ab39cbf606e117fd74721c831c98a27dc04578deb0c16979 \ + --hash=sha256:1fa4ecf83bcdf6e6c8f4449aff98eefb5d0604bf88cb883d7da3d8d2d909546a \ + --hash=sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b \ + --hash=sha256:7b6d09433a10592ce39b13d7be5a54fbac1d1228ed29abc880fb23df7cb694c9 \ + --hash=sha256:8c233660f575a5a89e6d4cb65d9f938126312bca76d8fe087b947b3a1aaac9ee \ + --hash=sha256:917e891983ca3c1887b4ef36447b1e0873e70c933afc831c6b6da078ba474312 \ + --hash=sha256:ab486563df44c17f5173621c7b198955bd6b613fb87c71c161f827d3fb149a9b \ + --hash=sha256:ae0aefdd8796a7737eccea863f80f81e468a1e4cf14d926bd9b6f5f2d5f90ca9 \ + --hash=sha256:b0726cecd84f9474419d67252add4ac0cd9811b04d61123054b9fb6f57df6e9e \ + --hash=sha256:b58fabe35e80b264a4e3bb23e6b96f9e45a3df7fb7eed419ac0e5947c61e47cc \ + --hash=sha256:c7663d4e37f13e884d13994247449e9f8f574bc4655d509c3b95e9ec9e2b9dc1 \ + --hash=sha256:e452c464a02e7dc7822a05d25db4cde564444a67e58539a00f929c51eddda0cf \ + --hash=sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea \ + --hash=sha256:eb7e81434c8d223ec4a219b5fc1c47d0417b12be7ea866e24fb5ad6e84b3d988 \ + --hash=sha256:ed0cace939114f62738d808fdcecd4c869222507e266e574799e9c0faa17d486 \ + --hash=sha256:eed63d3b4d62449571547b60578c5b2c4bcccc5387148db46e0c2313dad0ee00 \ + --hash=sha256:fd04ef36b4a6d599bbdb225dd1d3f51e00105f6d48a28f006da7f9822f2606d8 + # via ipykernel +ptyprocess==0.7.0 ; os_name != 'nt' or (sys_platform != 'emscripten' and sys_platform != 'win32') \ + --hash=sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 \ + --hash=sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220 + # via + # pexpect + # terminado +pure-eval==0.2.3 \ + --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \ + --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42 + # via stack-data +pyarrow==23.0.0 \ + --hash=sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de \ + --hash=sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c \ + --hash=sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993 \ + --hash=sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c \ + --hash=sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e \ + --hash=sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df \ + --hash=sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615 \ + --hash=sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda \ + --hash=sha256:1a9ff6fa4141c24a03a1a434c63c8fa97ce70f8f36bccabc18ebba905ddf0f17 \ + --hash=sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377 \ + --hash=sha256:2ef0075c2488932e9d3c2eb3482f9459c4be629aa673b725d5e3cf18f777f8e4 \ + --hash=sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef \ + --hash=sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d \ + --hash=sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685 \ + --hash=sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0 \ + --hash=sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be \ + --hash=sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b \ + --hash=sha256:4d85cb6177198f3812db4788e394b757223f60d9a9f5ad6634b3e32be1525803 \ + --hash=sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c \ + --hash=sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059 \ + --hash=sha256:5574d541923efcbfdf1294a2746ae3b8c2498a2dc6cd477882f6f4e7b1ac08d3 \ + --hash=sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8 \ + --hash=sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861 \ + --hash=sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a \ + --hash=sha256:65666fc269669af1ef1c14478c52222a2aa5c907f28b68fb50a203c777e4f60c \ + --hash=sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333 \ + --hash=sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53 \ + --hash=sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c \ + --hash=sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068 \ + --hash=sha256:84839d060a54ae734eb60a756aeacb62885244aaa282f3c968f5972ecc7b1ecc \ + --hash=sha256:a149a647dbfe928ce8830a713612aa0b16e22c64feac9d1761529778e4d4eaa5 \ + --hash=sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6 \ + --hash=sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00 \ + --hash=sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a \ + --hash=sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b \ + --hash=sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43 \ + --hash=sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e \ + --hash=sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a \ + --hash=sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d \ + --hash=sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7 \ + --hash=sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc \ + --hash=sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3 \ + --hash=sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40 + # via + # data-formulator + # db-dtypes +pyasn1==0.6.2 \ + --hash=sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf \ + --hash=sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.2 \ + --hash=sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a \ + --hash=sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6 + # via google-auth +pycparser==3.0 ; implementation_name != 'PyPy' \ + --hash=sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29 \ + --hash=sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992 + # via cffi +pydantic==2.12.5 \ + --hash=sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49 \ + --hash=sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d + # via + # litellm + # openai +pydantic-core==2.41.5 \ + --hash=sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90 \ + --hash=sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740 \ + --hash=sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84 \ + --hash=sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33 \ + --hash=sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0 \ + --hash=sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e \ + --hash=sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0 \ + --hash=sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34 \ + --hash=sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3 \ + --hash=sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815 \ + --hash=sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14 \ + --hash=sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375 \ + --hash=sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf \ + --hash=sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1 \ + --hash=sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808 \ + --hash=sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553 \ + --hash=sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1 \ + --hash=sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470 \ + --hash=sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2 \ + --hash=sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b \ + --hash=sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660 \ + --hash=sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c \ + --hash=sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594 \ + --hash=sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008 \ + --hash=sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a \ + --hash=sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a \ + --hash=sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd \ + --hash=sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284 \ + --hash=sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586 \ + --hash=sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869 \ + --hash=sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294 \ + --hash=sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f \ + --hash=sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66 \ + --hash=sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51 \ + --hash=sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc \ + --hash=sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d \ + --hash=sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c \ + --hash=sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07 \ + --hash=sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36 \ + --hash=sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e \ + --hash=sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05 \ + --hash=sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e \ + --hash=sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612 \ + --hash=sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b \ + --hash=sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe \ + --hash=sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11 \ + --hash=sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd \ + --hash=sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b \ + --hash=sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c \ + --hash=sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a \ + --hash=sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1 \ + --hash=sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf \ + --hash=sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858 \ + --hash=sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2 \ + --hash=sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9 \ + --hash=sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2 \ + --hash=sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3 \ + --hash=sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6 \ + --hash=sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770 \ + --hash=sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc \ + --hash=sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23 \ + --hash=sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26 \ + --hash=sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa \ + --hash=sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d \ + --hash=sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3 \ + --hash=sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d \ + --hash=sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034 \ + --hash=sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9 \ + --hash=sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1 \ + --hash=sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56 \ + --hash=sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b \ + --hash=sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c \ + --hash=sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e \ + --hash=sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9 \ + --hash=sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5 \ + --hash=sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e \ + --hash=sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc \ + --hash=sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb \ + --hash=sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0 \ + --hash=sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8 \ + --hash=sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69 \ + --hash=sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c \ + --hash=sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75 \ + --hash=sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f \ + --hash=sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad \ + --hash=sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b \ + --hash=sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7 + # via pydantic +pygments==2.19.2 \ + --hash=sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887 \ + --hash=sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b + # via + # ipython + # ipython-pygments-lexers + # jupyter-console + # nbconvert +pyjwt==2.11.0 \ + --hash=sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623 \ + --hash=sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469 + # via msal +pymongo==4.16.0 \ + --hash=sha256:12762e7cc0f8374a8cae3b9f9ed8dabb5d438c7b33329232dd9b7de783454033 \ + --hash=sha256:1c01e8a7cd0ea66baf64a118005535ab5bf9f9eb63a1b50ac3935dccf9a54abe \ + --hash=sha256:1d638b0b1b294d95d0fdc73688a3b61e05cc4188872818cd240d51460ccabcb5 \ + --hash=sha256:21d02cc10a158daa20cb040985e280e7e439832fc6b7857bff3d53ef6914ad50 \ + --hash=sha256:25a6b03a68f9907ea6ec8bc7cf4c58a1b51a18e23394f962a6402f8e46d41211 \ + --hash=sha256:2b0714d7764efb29bf9d3c51c964aed7c4c7237b341f9346f15ceaf8321fdb35 \ + --hash=sha256:2cd60cd1e05de7f01927f8e25ca26b3ea2c09de8723241e5d3bcfdc70eaff76b \ + --hash=sha256:2d0082631a7510318befc2b4fdab140481eb4b9dd62d9245e042157085da2a70 \ + --hash=sha256:311d4549d6bf1f8c61d025965aebb5ba29d1481dc6471693ab91610aaffbc0eb \ + --hash=sha256:36ef2fee50eee669587d742fb456e349634b4fcf8926208766078b089054b24b \ + --hash=sha256:3ead8a0050c53eaa55935895d6919d393d0328ec24b2b9115bdbe881aa222673 \ + --hash=sha256:46ffb728d92dd5b09fc034ed91acf5595657c7ca17d4cf3751322cd554153c17 \ + --hash=sha256:4a19ea46a0fe71248965305a020bc076a163311aefbaa1d83e47d06fa30ac747 \ + --hash=sha256:4c4872299ebe315a79f7f922051061634a64fda95b6b17677ba57ef00b2ba2a4 \ + --hash=sha256:4d4f7ba040f72a9f43a44059872af5a8c8c660aa5d7f90d5344f2ed1c3c02721 \ + --hash=sha256:4fbb8d3552c2ad99d9e236003c0b5f96d5f05e29386ba7abae73949bfebc13dd \ + --hash=sha256:55f8d5a6fe2fa0b823674db2293f92d74cd5f970bc0360f409a1fc21003862d3 \ + --hash=sha256:5d9fdb386cf958e6ef6ff537d6149be7edb76c3268cd6833e6c36aa447e4443f \ + --hash=sha256:60307bb91e0ab44e560fe3a211087748b2b5f3e31f403baf41f5b7b0a70bd104 \ + --hash=sha256:61567f712bda04c7545a037e3284b4367cad8d29b3dec84b4bf3b2147020a75b \ + --hash=sha256:6b2a20edb5452ac8daa395890eeb076c570790dfce6b7a44d788af74c2f8cf96 \ + --hash=sha256:6f2077ec24e2f1248f9cac7b9a2dfb894e50cc7939fcebfb1759f99304caabef \ + --hash=sha256:77cfd37a43a53b02b7bd930457c7994c924ad8bbe8dff91817904bcbf291b371 \ + --hash=sha256:78037d02389745e247fe5ab0bcad5d1ab30726eaac3ad79219c7d6bbb07eec53 \ + --hash=sha256:85dc2f3444c346ea019a371e321ac868a4fab513b7a55fe368f0cc78de8177cc \ + --hash=sha256:8a0f73af1ea56c422b2dcfc0437459148a799ef4231c6aee189d2d4c59d6728f \ + --hash=sha256:8a254d49a9ffe9d7f888e3c677eed3729b14ce85abb08cd74732cead6ccc3c66 \ + --hash=sha256:8ba8405065f6e258a6f872fe62d797a28f383a12178c7153c01ed04e845c600c \ + --hash=sha256:91899dd7fb9a8c50f09c3c1cf0cb73bfbe2737f511f641f19b9650deb61c00ca \ + --hash=sha256:91ac0cb0fe2bf17616c2039dac88d7c9a5088f5cb5829b27c9d250e053664d31 \ + --hash=sha256:948152b30eddeae8355495f9943a3bf66b708295c0b9b6f467de1c620f215487 \ + --hash=sha256:9caacac0dd105e2555521002e2d17afc08665187017b466b5753e84c016628e6 \ + --hash=sha256:9d9885aad05f82fd7ea0c9ca505d60939746b39263fa273d0125170da8f59098 \ + --hash=sha256:a1bf44e13cf2d44d2ea2e928a8140d5d667304abe1a61c4d55b4906f389fbe64 \ + --hash=sha256:aa30cd16ddd2f216d07ba01d9635c873e97ddb041c61cf0847254edc37d1c60e \ + --hash=sha256:acda193f440dd88c2023cb00aa8bd7b93a9df59978306d14d87a8b12fe426b05 \ + --hash=sha256:bd4911c40a43a821dfd93038ac824b756b6e703e26e951718522d29f6eb166a8 \ + --hash=sha256:be1099a8295b1a722d03fb7b48be895d30f4301419a583dcf50e9045968a041c \ + --hash=sha256:c126fb72be2518395cc0465d4bae03125119136462e1945aea19840e45d89cfc \ + --hash=sha256:c53338613043038005bf2e41a2fafa08d29cdbc0ce80891b5366c819456c1ae9 \ + --hash=sha256:c789236366525c3ee3cd6e4e450a9ff629a7d1f4d88b8e18a0aea0615fd7ecf8 \ + --hash=sha256:cf0ec79e8ca7077f455d14d915d629385153b6a11abc0b93283ed73a8013e376 \ + --hash=sha256:d15f060bc6d0964a8bb70aba8f0cb6d11ae99715438f640cff11bbcf172eb0e8 \ + --hash=sha256:dabbf3c14de75a20cc3c30bf0c6527157224a93dfb605838eabb1a2ee3be008d \ + --hash=sha256:dbbc5b254c36c37d10abb50e899bc3939bbb7ab1e7c659614409af99bd3e7675 \ + --hash=sha256:dfc320f08ea9a7ec5b2403dc4e8150636f0d6150f4b9792faaae539c88e7db3b \ + --hash=sha256:f1c5f1f818b669875d191323a48912d3fcd2e4906410e8297bb09ac50c4d5ccc \ + --hash=sha256:f25001a955073b80510c0c3db0e043dbbc36904fd69e511c74e3d8640b8a5111 \ + --hash=sha256:f3867dc225d9423c245a51eaac2cfcd53dde8e0a8d8090bb6aed6e31bd6c2d4f \ + --hash=sha256:f513b2c6c0d5c491f478422f6b5b5c27ac1af06a54c93ef8631806f7231bd92e \ + --hash=sha256:f6e42c1bc985d9beee884780ae6048790eb4cd565c46251932906bdb1630034a + # via data-formulator +pymysql==1.1.2 \ + --hash=sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03 \ + --hash=sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9 + # via data-formulator +pyodbc==5.3.0 \ + --hash=sha256:01166162149adf2b8a6dc21a212718f205cabbbdff4047dc0c415af3fd85867e \ + --hash=sha256:08b2439500e212625471d32f8fde418075a5ddec556e095e5a4ba56d61df2dc6 \ + --hash=sha256:0df7ff47fab91ea05548095b00e5eb87ed88ddf4648c58c67b4db95ea4913e23 \ + --hash=sha256:13656184faa3f2d5c6f19b701b8f247342ed581484f58bf39af7315c054e69db \ + --hash=sha256:1629af4706e9228d79dabb4863c11cceb22a6dab90700db0ef449074f0150c0d \ + --hash=sha256:197bb6ddafe356a916b8ee1b8752009057fce58e216e887e2174b24c7ab99269 \ + --hash=sha256:2035c7dfb71677cd5be64d3a3eb0779560279f0a8dc6e33673499498caa88937 \ + --hash=sha256:25c4cfb2c08e77bc6e82f666d7acd52f0e52a0401b1876e60f03c73c3b8aedc0 \ + --hash=sha256:2fe0e063d8fb66efd0ac6dc39236c4de1a45f17c33eaded0d553d21c199f4d05 \ + --hash=sha256:363311bd40320b4a61454bebf7c38b243cd67c762ed0f8a5219de3ec90c96353 \ + --hash=sha256:3cc472c8ae2feea5b4512e23b56e2b093d64f7cbc4b970af51da488429ff7818 \ + --hash=sha256:3f1bdb3ce6480a17afaaef4b5242b356d4997a872f39e96f015cabef00613797 \ + --hash=sha256:58635a1cc859d5af3f878c85910e5d7228fe5c406d4571bffcdd281375a54b39 \ + --hash=sha256:5cbe4d753723c8a8f65020b7a259183ef5f14307587165ce37e8c7e251951852 \ + --hash=sha256:5ceaed87ba2ea848c11223f66f629ef121f6ebe621f605cde9cfdee4fd9f4b68 \ + --hash=sha256:5dd3d5e469f89a3112cf8b0658c43108a4712fad65e576071e4dd44d2bd763c7 \ + --hash=sha256:5ebf6b5d989395efe722b02b010cb9815698a4d681921bf5db1c0e1195ac1bde \ + --hash=sha256:6132554ffbd7910524d643f13ce17f4a72f3a6824b0adef4e9a7f66efac96350 \ + --hash=sha256:676031723aac7dcbbd2813bddda0e8abf171b20ec218ab8dfb21d64a193430ea \ + --hash=sha256:729c535341bb09c476f219d6f7ab194bcb683c4a0a368010f1cb821a35136f05 \ + --hash=sha256:74528fe148980d0c735c0ebb4a4dc74643ac4574337c43c1006ac4d09593f92d \ + --hash=sha256:754d052030d00c3ac38da09ceb9f3e240e8dd1c11da8906f482d5419c65b9ef5 \ + --hash=sha256:7713c740a10f33df3cb08f49a023b7e1e25de0c7c99650876bbe717bc95ee780 \ + --hash=sha256:7e9ab0b91de28a5ab838ac4db0253d7cc8ce2452efe4ad92ee6a57b922bf0c24 \ + --hash=sha256:8339d3094858893c1a68ee1af93efc4dff18b8b65de54d99104b99af6306320d \ + --hash=sha256:9b987a25a384f31e373903005554230f5a6d59af78bce62954386736a902a4b3 \ + --hash=sha256:a48d731432abaee5256ed6a19a3e1528b8881f9cb25cb9cf72d8318146ea991b \ + --hash=sha256:af4d8c9842fc4a6360c31c35508d6594d5a3b39922f61b282c2b4c9d9da99514 \ + --hash=sha256:afe7c4ac555a8d10a36234788fc6cfc22a86ce37fc5ba88a1f75b3e6696665dc \ + --hash=sha256:b180bc5e49b74fd40a24ef5b0fe143d0c234ac1506febe810d7434bf47cb925b \ + --hash=sha256:bc834567c2990584b9726cba365834d039380c9dbbcef3030ddeb00c6541b943 \ + --hash=sha256:bfeb3e34795d53b7d37e66dd54891d4f9c13a3889a8f5fe9640e56a82d770955 \ + --hash=sha256:c2eb0b08e24fe5c40c7ebe9240c5d3bd2f18cd5617229acee4b0a0484dc226f2 \ + --hash=sha256:c5c30c5cd40b751f77bbc73edd32c4498630939bcd4e72ee7e6c9a4b982cc5ca \ + --hash=sha256:c67e7f2ce649155ea89beb54d3b42d83770488f025cf3b6f39ca82e9c598a02e \ + --hash=sha256:c6ccb5315ec9e081f5cbd66f36acbc820ad172b8fa3736cf7f993cdf69bd8a96 \ + --hash=sha256:c79df54bbc25bce9f2d87094e7b39089c28428df5443d1902b0cc5f43fd2da6f \ + --hash=sha256:cf18797a12e70474e1b7f5027deeeccea816372497e3ff2d46b15bec2d18a0cc \ + --hash=sha256:d255f6b117d05cfc046a5201fdf39535264045352ea536c35777cf66d321fbb8 \ + --hash=sha256:d32c3259762bef440707098010035bbc83d1c73d81a434018ab8c688158bd3bb \ + --hash=sha256:d89a7f2e24227150c13be8164774b7e1f9678321a4248f1356a465b9cc17d31e \ + --hash=sha256:e3c39de3005fff3ae79246f952720d44affc6756b4b85398da4c5ea76bf8f506 \ + --hash=sha256:ebc3be93f61ea0553db88589e683ace12bf975baa954af4834ab89f5ee7bf8ae \ + --hash=sha256:f1ad0e93612a6201621853fc661209d82ff2a35892b7d590106fe8f97d9f1f2a \ + --hash=sha256:f927b440c38ade1668f0da64047ffd20ec34e32d817f9a60d07553301324b364 \ + --hash=sha256:fe77eb9dcca5fc1300c9121f81040cc9011d28cff383e2c35416e9ec06d4bc95 + # via data-formulator +pyproject-hooks==1.2.0 \ + --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \ + --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913 + # via build +python-dateutil==2.9.0.post0 \ + --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ + --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 + # via + # arrow + # azure-kusto-data + # botocore + # google-cloud-bigquery + # jupyter-client + # pandas +python-dotenv==1.2.1 \ + --hash=sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6 \ + --hash=sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61 + # via + # data-formulator + # litellm +python-json-logger==4.0.0 \ + --hash=sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2 \ + --hash=sha256:f58e68eb46e1faed27e0f574a55a0455eecd7b8a5b88b85a784519ba3cff047f + # via jupyter-events +pytz==2025.2 \ + --hash=sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3 \ + --hash=sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00 + # via + # pandas + # yfinance +pywinpty==3.0.2 ; os_name == 'nt' \ + --hash=sha256:1505cc4cb248af42cb6285a65c9c2086ee9e7e574078ee60933d5d7fa86fb004 \ + --hash=sha256:18f78b81e4cfee6aabe7ea8688441d30247b73e52cd9657138015c5f4ee13a51 \ + --hash=sha256:28297cecc37bee9f24d8889e47231972d6e9e84f7b668909de54f36ca785029a \ + --hash=sha256:327790d70e4c841ebd9d0f295a780177149aeb405bca44c7115a3de5c2054b23 \ + --hash=sha256:34b55ae9a1b671fe3eae071d86618110538e8eaad18fcb1531c0830b91a82767 \ + --hash=sha256:663383ecfab7fc382cc97ea5c4f7f0bb32c2f889259855df6ea34e5df42d305b \ + --hash=sha256:99fdd9b455f0ad6419aba6731a7a0d2f88ced83c3c94a80ff9533d95fa8d8a9e + # via + # jupyter-server + # jupyter-server-terminals + # terminado +pyyaml==6.0.3 \ + --hash=sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c \ + --hash=sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3 \ + --hash=sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6 \ + --hash=sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c \ + --hash=sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65 \ + --hash=sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a \ + --hash=sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1 \ + --hash=sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310 \ + --hash=sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4 \ + --hash=sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea \ + --hash=sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e \ + --hash=sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac \ + --hash=sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9 \ + --hash=sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7 \ + --hash=sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35 \ + --hash=sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb \ + --hash=sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b \ + --hash=sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c \ + --hash=sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd \ + --hash=sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824 \ + --hash=sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065 \ + --hash=sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c \ + --hash=sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c \ + --hash=sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764 \ + --hash=sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196 \ + --hash=sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b \ + --hash=sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00 \ + --hash=sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac \ + --hash=sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8 \ + --hash=sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e \ + --hash=sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28 \ + --hash=sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3 \ + --hash=sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5 \ + --hash=sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf \ + --hash=sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5 \ + --hash=sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702 \ + --hash=sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788 \ + --hash=sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d \ + --hash=sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc \ + --hash=sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba \ + --hash=sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5 \ + --hash=sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26 \ + --hash=sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f \ + --hash=sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b \ + --hash=sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be \ + --hash=sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c \ + --hash=sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6 \ + --hash=sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0 + # via + # huggingface-hub + # jupyter-events +pyzmq==27.1.0 \ + --hash=sha256:01c0e07d558b06a60773744ea6251f769cd79a41a97d11b8bf4ab8f034b0424d \ + --hash=sha256:0790a0161c281ca9723f804871b4027f2e8b5a528d357c8952d08cd1a9c15581 \ + --hash=sha256:08363b2011dec81c354d694bdecaef4770e0ae96b9afea70b3f47b973655cc05 \ + --hash=sha256:0de3028d69d4cdc475bfe47a6128eb38d8bc0e8f4d69646adfbcd840facbac28 \ + --hash=sha256:15c8bd0fe0dabf808e2d7a681398c4e5ded70a551ab47482067a572c054c8e2e \ + --hash=sha256:1779be8c549e54a1c38f805e56d2a2e5c009d26de10921d7d51cfd1c8d4632ea \ + --hash=sha256:18770c8d3563715387139060d37859c02ce40718d1faf299abddcdcc6a649066 \ + --hash=sha256:190cbf120fbc0fc4957b56866830def56628934a9d112aec0e2507aa6a032b97 \ + --hash=sha256:19c9468ae0437f8074af379e986c5d3d7d7bfe033506af442e8c879732bedbe0 \ + --hash=sha256:1c179799b118e554b66da67d88ed66cd37a169f1f23b5d9f0a231b4e8d44a113 \ + --hash=sha256:1f0b2a577fd770aa6f053211a55d1c47901f4d537389a034c690291485e5fe92 \ + --hash=sha256:226b091818d461a3bef763805e75685e478ac17e9008f49fce2d3e52b3d58b86 \ + --hash=sha256:250e5436a4ba13885494412b3da5d518cd0d3a278a1ae640e113c073a5f88edd \ + --hash=sha256:3837439b7f99e60312f0c926a6ad437b067356dc2bc2ec96eb395fd0fe804233 \ + --hash=sha256:43ad9a73e3da1fab5b0e7e13402f0b2fb934ae1c876c51d0afff0e7c052eca31 \ + --hash=sha256:452631b640340c928fa343801b0d07eb0c3789a5ffa843f6e1a9cee0ba4eb4fc \ + --hash=sha256:53b40f8ae006f2734ee7608d59ed661419f087521edbfc2149c3932e9c14808c \ + --hash=sha256:544b4e3b7198dde4a62b8ff6685e9802a9a1ebf47e77478a5eb88eca2a82f2fd \ + --hash=sha256:5bbf8d3630bf96550b3be8e1fc0fea5cbdc8d5466c1192887bd94869da17a63e \ + --hash=sha256:6bb54ca21bcfe361e445256c15eedf083f153811c37be87e0514934d6913061e \ + --hash=sha256:6df079c47d5902af6db298ec92151db82ecb557af663098b92f2508c398bb54f \ + --hash=sha256:6f3afa12c392f0a44a2414056d730eebc33ec0926aae92b5ad5cf26ebb6cc128 \ + --hash=sha256:7200bb0f03345515df50d99d3db206a0a6bee1955fbb8c453c76f5bf0e08fb96 \ + --hash=sha256:75a2f36223f0d535a0c919e23615fc85a1e23b71f40c7eb43d7b1dedb4d8f15f \ + --hash=sha256:7ccc0700cfdf7bd487bea8d850ec38f204478681ea02a582a8da8171b7f90a1c \ + --hash=sha256:8085a9fba668216b9b4323be338ee5437a235fe275b9d1610e422ccc279733e2 \ + --hash=sha256:80d834abee71f65253c91540445d37c4c561e293ba6e741b992f20a105d69146 \ + --hash=sha256:90e6e9441c946a8b0a667356f7078d96411391a3b8f80980315455574177ec97 \ + --hash=sha256:93ad4b0855a664229559e45c8d23797ceac03183c7b6f5b4428152a6b06684a5 \ + --hash=sha256:9ce490cf1d2ca2ad84733aa1d69ce6855372cb5ce9223802450c9b2a7cba0ccf \ + --hash=sha256:ac0765e3d44455adb6ddbf4417dcce460fc40a05978c08efdf2948072f6db540 \ + --hash=sha256:ac25465d42f92e990f8d8b0546b01c391ad431c3bf447683fdc40565941d0604 \ + --hash=sha256:add071b2d25f84e8189aaf0882d39a285b42fa3853016ebab234a5e78c7a43db \ + --hash=sha256:bafcb3dd171b4ae9f19ee6380dfc71ce0390fefaf26b504c0e5f628d7c8c54f2 \ + --hash=sha256:c65047adafe573ff023b3187bb93faa583151627bc9c51fc4fb2c561ed689d39 \ + --hash=sha256:c895a6f35476b0c3a54e3eb6ccf41bf3018de937016e6e18748317f25d4e925f \ + --hash=sha256:c9f7f6e13dff2e44a6afeaf2cf54cee5929ad64afaf4d40b50f93c58fc687355 \ + --hash=sha256:ce980af330231615756acd5154f29813d553ea555485ae712c491cd483df6b7a \ + --hash=sha256:cedc4c68178e59a4046f97eca31b148ddcf51e88677de1ef4e78cf06c5376c9a \ + --hash=sha256:cf44a7763aea9298c0aa7dbf859f87ed7012de8bda0f3977b6fb1d96745df856 \ + --hash=sha256:d54530c8c8b5b8ddb3318f481297441af102517602b569146185fa10b63f4fa9 \ + --hash=sha256:dc5dbf68a7857b59473f7df42650c621d7e8923fb03fa74a526890f4d33cc4d7 \ + --hash=sha256:e343d067f7b151cfe4eb3bb796a7752c9d369eed007b91231e817071d2c2fec7 \ + --hash=sha256:e829529fcaa09937189178115c49c504e69289abd39967cd8a4c215761373394 \ + --hash=sha256:eca6b47df11a132d1745eb3b5b5e557a7dae2c303277aa0e69c6ba91b8736e07 \ + --hash=sha256:f30f395a9e6fbca195400ce833c731e7b64c3919aa481af4d88c3759e0cb7496 \ + --hash=sha256:f605d884e7c8be8fe1aa94e0a783bf3f591b84c24e4bc4f3e7564c82ac25e271 \ + --hash=sha256:fbb4f2400bfda24f12f009cba62ad5734148569ff4949b1b6ec3b519444342e6 + # via + # ipykernel + # jupyter-client + # jupyter-console + # jupyter-server +referencing==0.37.0 \ + --hash=sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231 \ + --hash=sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8 + # via + # jsonschema + # jsonschema-specifications + # jupyter-events +regex==2026.1.15 \ + --hash=sha256:05d75a668e9ea16f832390d22131fe1e8acc8389a694c8febc3e340b0f810b93 \ + --hash=sha256:0751a26ad39d4f2ade8fe16c59b2bf5cb19eb3d2cd543e709e583d559bd9efde \ + --hash=sha256:08df9722d9b87834a3d701f3fca570b2be115654dbfd30179f30ab2f39d606d3 \ + --hash=sha256:0bf065240704cb8951cc04972cf107063917022511273e0969bdb34fc173456c \ + --hash=sha256:0bf650f26087363434c4e560011f8e4e738f6f3e029b85d4904c50135b86cfa5 \ + --hash=sha256:0dcd31594264029b57bf16f37fd7248a70b3b764ed9e0839a8f271b2d22c0785 \ + --hash=sha256:0f0c7684c7f9ca241344ff95a1de964f257a5251968484270e91c25a755532c5 \ + --hash=sha256:124dc36c85d34ef2d9164da41a53c1c8c122cfb1f6e1ec377a1f27ee81deb794 \ + --hash=sha256:164759aa25575cbc0651bef59a0b18353e54300d79ace8084c818ad8ac72b7d5 \ + --hash=sha256:166551807ec20d47ceaeec380081f843e88c8949780cd42c40f18d16168bed10 \ + --hash=sha256:18388a62989c72ac24de75f1449d0fb0b04dfccd0a1a7c1c43af5eb503d890f6 \ + --hash=sha256:194312a14819d3e44628a44ed6fea6898fdbecb0550089d84c403475138d0a09 \ + --hash=sha256:1ae6020fb311f68d753b7efa9d4b9a5d47a5d6466ea0d5e3b5a471a960ea6e4a \ + --hash=sha256:1cb740d044aff31898804e7bf1181cc72c03d11dfd19932b9911ffc19a79070a \ + --hash=sha256:1e1808471fbe44c1a63e5f577a1d5f02fe5d66031dcbdf12f093ffc1305a858e \ + --hash=sha256:1e8cd52557603f5c66a548f69421310886b28b7066853089e1a71ee710e1cdc1 \ + --hash=sha256:2748c1ec0663580b4510bd89941a31560b4b439a0b428b49472a3d9944d11cd8 \ + --hash=sha256:27618391db7bdaf87ac6c92b31e8f0dfb83a9de0075855152b720140bda177a2 \ + --hash=sha256:2a8d7b50c34578d0d3bf7ad58cde9652b7d683691876f83aedc002862a35dc5e \ + --hash=sha256:2b091aefc05c78d286657cd4db95f2e6313375ff65dcf085e42e4c04d9c8d410 \ + --hash=sha256:2c2b80399a422348ce5de4fe40c418d6299a0fa2803dd61dc0b1a2f28e280fcf \ + --hash=sha256:2f2775843ca49360508d080eaa87f94fa248e2c946bbcd963bb3aae14f333413 \ + --hash=sha256:32655d17905e7ff8ba5c764c43cb124e34a9245e45b83c22e81041e1071aee10 \ + --hash=sha256:3601ffb5375de85a16f407854d11cca8fe3f5febbe3ac78fb2866bb220c74d10 \ + --hash=sha256:3d7d92495f47567a9b1669c51fc8d6d809821849063d168121ef801bbc213846 \ + --hash=sha256:40c86d8046915bb9aeb15d3f3f15b6fd500b8ea4485b30e1bbc799dab3fe29f8 \ + --hash=sha256:4161d87f85fa831e31469bfd82c186923070fc970b9de75339b68f0c75b51903 \ + --hash=sha256:41aef6f953283291c4e4e6850607bd71502be67779586a61472beacb315c97ec \ + --hash=sha256:453078802f1b9e2b7303fb79222c054cb18e76f7bdc220f7530fdc85d319f99e \ + --hash=sha256:492534a0ab925d1db998defc3c302dae3616a2fc3fe2e08db1472348f096ddf2 \ + --hash=sha256:4c5ef43b5c2d4114eb8ea424bb8c9cec01d5d17f242af88b2448f5ee81caadbc \ + --hash=sha256:4c8fcc5793dde01641a35905d6731ee1548f02b956815f8f1cab89e515a5bdf1 \ + --hash=sha256:4def140aa6156bc64ee9912383d4038f3fdd18fee03a6f222abd4de6357ce42a \ + --hash=sha256:5170907244b14303edc5978f522f16c974f32d3aa92109fabc2af52411c9433b \ + --hash=sha256:56a5595d0f892f214609c9f76b41b7428bed439d98dc961efafdd1354d42baae \ + --hash=sha256:57e7d17f59f9ebfa9667e6e5a1c0127b96b87cb9cede8335482451ed00788ba4 \ + --hash=sha256:5ef19071f4ac9f0834793af85bd04a920b4407715624e40cb7a0631a11137cdf \ + --hash=sha256:619843841e220adca114118533a574a9cd183ed8a28b85627d2844c500a2b0db \ + --hash=sha256:621f73a07595d83f28952d7bd1e91e9d1ed7625fb7af0064d3516674ec93a2a2 \ + --hash=sha256:6d220a2517f5893f55daac983bfa9fe998a7dbcaee4f5d27a88500f8b7873788 \ + --hash=sha256:6e42844ad64194fa08d5ccb75fe6a459b9b08e6d7296bd704460168d58a388f3 \ + --hash=sha256:726ea4e727aba21643205edad8f2187ec682d3305d790f73b7a51c7587b64bdd \ + --hash=sha256:74f45d170a21df41508cb67165456538425185baaf686281fa210d7e729abc34 \ + --hash=sha256:7ef7d5d4bd49ec7364315167a4134a015f61e8266c6d446fc116a9ac4456e10d \ + --hash=sha256:8050ba2e3ea1d8731a549e83c18d2f0999fbc99a5f6bd06b4c91449f55291804 \ + --hash=sha256:82345326b1d8d56afbe41d881fdf62f1926d7264b2fc1537f99ae5da9aad7913 \ + --hash=sha256:8355ad842a7c7e9e5e55653eade3b7d1885ba86f124dd8ab1f722f9be6627434 \ + --hash=sha256:86c1077a3cc60d453d4084d5b9649065f3bf1184e22992bd322e1f081d3117fb \ + --hash=sha256:8dd16fba2758db7a3780a051f245539c4451ca20910f5a5e6ea1c08d06d4a76b \ + --hash=sha256:8e32f7896f83774f91499d239e24cebfadbc07639c1494bb7213983842348337 \ + --hash=sha256:91c5036ebb62663a6b3999bdd2e559fd8456d17e2b485bf509784cd31a8b1705 \ + --hash=sha256:9250d087bc92b7d4899ccd5539a1b2334e44eee85d848c4c1aef8e221d3f8c8f \ + --hash=sha256:9479cae874c81bf610d72b85bb681a94c95722c127b55445285fb0e2c82db8e1 \ + --hash=sha256:968c14d4f03e10b2fd960f1d5168c1f0ac969381d3c1fcc973bc45fb06346599 \ + --hash=sha256:99ad739c3686085e614bf77a508e26954ff1b8f14da0e3765ff7abbf7799f952 \ + --hash=sha256:9d787e3310c6a6425eb346be4ff2ccf6eece63017916fd77fe8328c57be83521 \ + --hash=sha256:a1774cd1981cd212506a23a14dba7fdeaee259f5deba2df6229966d9911e767a \ + --hash=sha256:a30a68e89e5a218b8b23a52292924c1f4b245cb0c68d1cce9aec9bbda6e2c160 \ + --hash=sha256:b10e42a6de0e32559a92f2f8dc908478cc0fa02838d7dbe764c44dca3fa13569 \ + --hash=sha256:b2a13dd6a95e95a489ca242319d18fc02e07ceb28fa9ad146385194d95b3c829 \ + --hash=sha256:b30bcbd1e1221783c721483953d9e4f3ab9c5d165aa709693d3f3946747b1aea \ + --hash=sha256:b5a28980a926fa810dbbed059547b02783952e2efd9c636412345232ddb87ff6 \ + --hash=sha256:b5f7d8d2867152cdb625e72a530d2ccb48a3d199159144cbdd63870882fb6f80 \ + --hash=sha256:bfb0d6be01fbae8d6655c8ca21b3b72458606c4aec9bbc932db758d47aba6db1 \ + --hash=sha256:bfd876041a956e6a90ad7cdb3f6a630c07d491280bfeed4544053cd434901681 \ + --hash=sha256:c08c1f3e34338256732bd6938747daa3c0d5b251e04b6e43b5813e94d503076e \ + --hash=sha256:c243da3436354f4af6c3058a3f81a97d47ea52c9bd874b52fd30274853a1d5df \ + --hash=sha256:c32bef3e7aeee75746748643667668ef941d28b003bfc89994ecf09a10f7a1b5 \ + --hash=sha256:c661fc820cfb33e166bf2450d3dadbda47c8d8981898adb9b6fe24e5e582ba60 \ + --hash=sha256:c6c4dcdfff2c08509faa15d36ba7e5ef5fcfab25f1e8f85a0c8f45bc3a30725d \ + --hash=sha256:c6c565d9a6e1a8d783c1948937ffc377dd5771e83bd56de8317c450a954d2056 \ + --hash=sha256:c8a154cf6537ebbc110e24dabe53095e714245c272da9c1be05734bdad4a61aa \ + --hash=sha256:c9c08c2fbc6120e70abff5d7f28ffb4d969e14294fb2143b4b5c7d20e46d1714 \ + --hash=sha256:ca89c5e596fc05b015f27561b3793dc2fa0917ea0d7507eebb448efd35274a70 \ + --hash=sha256:cf8ff04c642716a7f2048713ddc6278c5fd41faa3b9cab12607c7abecd012c22 \ + --hash=sha256:cfecdaa4b19f9ca534746eb3b55a5195d5c95b88cac32a205e981ec0a22b7d31 \ + --hash=sha256:d426616dae0967ca225ab12c22274eb816558f2f99ccb4a1d52ca92e8baf180f \ + --hash=sha256:d5eaa4a4c5b1906bd0d2508d68927f15b81821f85092e06f1a34a4254b0e1af3 \ + --hash=sha256:d639a750223132afbfb8f429c60d9d318aeba03281a5f1ab49f877456448dcf1 \ + --hash=sha256:d920392a6b1f353f4aa54328c867fec3320fa50657e25f64abf17af054fc97ac \ + --hash=sha256:d991483606f3dbec93287b9f35596f41aa2e92b7c2ebbb935b63f409e243c9af \ + --hash=sha256:d9ea2604370efc9a174c1b5dcc81784fb040044232150f7f33756049edfc9026 \ + --hash=sha256:dca3582bca82596609959ac39e12b7dad98385b4fefccb1151b937383cec547d \ + --hash=sha256:e43a55f378df1e7a4fa3547c88d9a5a9b7113f653a66821bcea4718fe6c58763 \ + --hash=sha256:e69d0deeb977ffe7ed3d2e4439360089f9c3f217ada608f0f88ebd67afb6385e \ + --hash=sha256:e90b8db97f6f2c97eb045b51a6b2c5ed69cedd8392459e0642d4199b94fabd7e \ + --hash=sha256:e9bf3f0bbdb56633c07d7116ae60a576f846efdd86a8848f8d62b749e1209ca7 \ + --hash=sha256:ec94c04149b6a7b8120f9f44565722c7ae31b7a6d2275569d2eefa76b83da3be \ + --hash=sha256:eddf73f41225942c1f994914742afa53dc0d01a6e20fe14b878a1b1edc74151f \ + --hash=sha256:ee6854c9000a10938c79238de2379bea30c82e4925a371711af45387df35cab8 \ + --hash=sha256:ef71d476caa6692eea743ae5ea23cde3260677f70122c4d258ca952e5c2d4e84 \ + --hash=sha256:f1862739a1ffb50615c0fde6bae6569b5efbe08d98e59ce009f68a336f64da75 \ + --hash=sha256:f192a831d9575271a22d804ff1a5355355723f94f31d9eef25f0d45a152fdc1a \ + --hash=sha256:f82110ab962a541737bd0ce87978d4c658f06e7591ba899192e2712a517badbb \ + --hash=sha256:f9ca1cbdc0fbfe5e6e6f8221ef2309988db5bcede52443aeaee9a4ad555e0dac \ + --hash=sha256:fe2fda4110a3d0bc163c2e0664be44657431440722c5c5315c65155cab92f9e5 \ + --hash=sha256:febd38857b09867d3ed3f4f1af7d241c5c50362e25ef43034995b77a50df494e + # via tiktoken +requests==2.32.5 \ + --hash=sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6 \ + --hash=sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf + # via + # azure-core + # azure-kusto-data + # google-api-core + # google-cloud-bigquery + # jupyterlab-server + # msal + # tiktoken + # yfinance +rfc3339-validator==0.1.4 \ + --hash=sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b \ + --hash=sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa + # via + # jsonschema + # jupyter-events +rfc3986-validator==0.1.1 \ + --hash=sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9 \ + --hash=sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055 + # via + # jsonschema + # jupyter-events +rfc3987-syntax==1.1.0 \ + --hash=sha256:6c3d97604e4c5ce9f714898e05401a0445a641cfa276432b0a648c80856f6a3f \ + --hash=sha256:717a62cbf33cffdd16dfa3a497d81ce48a660ea691b1ddd7be710c22f00b4a0d + # via jsonschema +rpds-py==0.30.0 \ + --hash=sha256:07ae8a593e1c3c6b82ca3292efbe73c30b61332fd612e05abee07c79359f292f \ + --hash=sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136 \ + --hash=sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7 \ + --hash=sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65 \ + --hash=sha256:12f90dd7557b6bd57f40abe7747e81e0c0b119bef015ea7726e69fe550e394a4 \ + --hash=sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf \ + --hash=sha256:1b151685b23929ab7beec71080a8889d4d6d9fa9a983d213f07121205d48e2c4 \ + --hash=sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2 \ + --hash=sha256:250fa00e9543ac9b97ac258bd37367ff5256666122c2d0f2bc97577c60a1818c \ + --hash=sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4 \ + --hash=sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3 \ + --hash=sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6 \ + --hash=sha256:33f559f3104504506a44bb666b93a33f5d33133765b0c216a5bf2f1e1503af89 \ + --hash=sha256:3896fa1be39912cf0757753826bc8bdc8ca331a28a7c4ae46b7a21280b06bb85 \ + --hash=sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa \ + --hash=sha256:3adbb8179ce342d235c31ab8ec511e66c73faa27a47e076ccc92421add53e2bb \ + --hash=sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6 \ + --hash=sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87 \ + --hash=sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856 \ + --hash=sha256:422c3cb9856d80b09d30d2eb255d0754b23e090034e1deb4083f8004bd0761e4 \ + --hash=sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f \ + --hash=sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53 \ + --hash=sha256:47b0ef6231c58f506ef0b74d44e330405caa8428e770fec25329ed2cb971a229 \ + --hash=sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad \ + --hash=sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23 \ + --hash=sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db \ + --hash=sha256:495aeca4b93d465efde585977365187149e75383ad2684f81519f504f5c13038 \ + --hash=sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27 \ + --hash=sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18 \ + --hash=sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083 \ + --hash=sha256:55f66022632205940f1827effeff17c4fa7ae1953d2b74a8581baaefb7d16f8c \ + --hash=sha256:58edca431fb9b29950807e301826586e5bbf24163677732429770a697ffe6738 \ + --hash=sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898 \ + --hash=sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e \ + --hash=sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7 \ + --hash=sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08 \ + --hash=sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6 \ + --hash=sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551 \ + --hash=sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e \ + --hash=sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0 \ + --hash=sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2 \ + --hash=sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05 \ + --hash=sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0 \ + --hash=sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5 \ + --hash=sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404 \ + --hash=sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7 \ + --hash=sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394 \ + --hash=sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb \ + --hash=sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15 \ + --hash=sha256:8d6d1cc13664ec13c1b84241204ff3b12f9bb82464b8ad6e7a5d3486975c2eed \ + --hash=sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6 \ + --hash=sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e \ + --hash=sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95 \ + --hash=sha256:946fe926af6e44f3697abbc305ea168c2c31d3e3ef1058cf68f379bf0335a78d \ + --hash=sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950 \ + --hash=sha256:9854cf4f488b3d57b9aaeb105f06d78e5529d3145b1e4a41750167e8c213c6d3 \ + --hash=sha256:993914b8e560023bc0a8bf742c5f303551992dcb85e247b1e5c7f4a7d145bda5 \ + --hash=sha256:99b47d6ad9a6da00bec6aabe5a6279ecd3c06a329d4aa4771034a21e335c3a97 \ + --hash=sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e \ + --hash=sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e \ + --hash=sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b \ + --hash=sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd \ + --hash=sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad \ + --hash=sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8 \ + --hash=sha256:a2bffea6a4ca9f01b3f8e548302470306689684e61602aa3d141e34da06cf425 \ + --hash=sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d \ + --hash=sha256:a51033ff701fca756439d641c0ad09a41d9242fa69121c7d8769604a0a629825 \ + --hash=sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51 \ + --hash=sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e \ + --hash=sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f \ + --hash=sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8 \ + --hash=sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f \ + --hash=sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d \ + --hash=sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07 \ + --hash=sha256:ba3af48635eb83d03f6c9735dfb21785303e73d22ad03d489e88adae6eab8877 \ + --hash=sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31 \ + --hash=sha256:c2262bdba0ad4fc6fb5545660673925c2d2a5d9e2e0fb603aad545427be0fc58 \ + --hash=sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94 \ + --hash=sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28 \ + --hash=sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000 \ + --hash=sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1 \ + --hash=sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1 \ + --hash=sha256:d9a0ca5da0386dee0655b4ccdf46119df60e0f10da268d04fe7cc87886872ba7 \ + --hash=sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40 \ + --hash=sha256:dc4f992dfe1e2bc3ebc7444f6c7051b4bc13cd8e33e43511e8ffd13bf407010d \ + --hash=sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0 \ + --hash=sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84 \ + --hash=sha256:dea5b552272a944763b34394d04577cf0f9bd013207bc32323b5a89a53cf9c2f \ + --hash=sha256:dff13836529b921e22f15cb099751209a60009731a68519630a24d61f0b1b30a \ + --hash=sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419 \ + --hash=sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8 \ + --hash=sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a \ + --hash=sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9 \ + --hash=sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be \ + --hash=sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed \ + --hash=sha256:ee6af14263f25eedc3bb918a3c04245106a42dfd4f5c2285ea6f997b1fc3f89a \ + --hash=sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d \ + --hash=sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f \ + --hash=sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2 \ + --hash=sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f \ + --hash=sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5 + # via + # jsonschema + # referencing +rsa==4.9.1 \ + --hash=sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762 \ + --hash=sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75 + # via google-auth +s3transfer==0.16.0 \ + --hash=sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe \ + --hash=sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920 + # via boto3 +scikit-learn==1.8.0 \ + --hash=sha256:00d6f1d66fbcf4eba6e356e1420d33cc06c70a45bb1363cd6f6a8e4ebbbdece2 \ + --hash=sha256:0d6ae97234d5d7079dc0040990a6f7aeb97cb7fa7e8945f1999a429b23569e0a \ + --hash=sha256:146b4d36f800c013d267b29168813f7a03a43ecd2895d04861f1240b564421da \ + --hash=sha256:15fc3b5d19cc2be65404786857f2e13c70c83dd4782676dd6814e3b89dc8f5b9 \ + --hash=sha256:2838551e011a64e3053ad7618dda9310175f7515f1742fa2d756f7c874c05961 \ + --hash=sha256:29ffc74089f3d5e87dfca4c2c8450f88bdc61b0fc6ed5d267f3988f19a1309f6 \ + --hash=sha256:2de443b9373b3b615aec1bb57f9baa6bb3a9bd093f1269ba95c17d870422b271 \ + --hash=sha256:35c007dedb2ffe38fe3ee7d201ebac4a2deccd2408e8621d53067733e3c74809 \ + --hash=sha256:3bad7565bc9cf37ce19a7c0d107742b320c1285df7aab1a6e2d28780df167242 \ + --hash=sha256:4496bb2cf7a43ce1a2d7524a79e40bc5da45cf598dbf9545b7e8316ccba47bb4 \ + --hash=sha256:4511be56637e46c25721e83d1a9cea9614e7badc7040c4d573d75fbe257d6fd7 \ + --hash=sha256:5025ce924beccb28298246e589c691fe1b8c1c96507e6d27d12c5fadd85bfd76 \ + --hash=sha256:56079a99c20d230e873ea40753102102734c5953366972a71d5cb39a32bc40c6 \ + --hash=sha256:5e30adb87f0cc81c7690a84f7932dd66be5bac57cfe16b91cb9151683a4a2d3b \ + --hash=sha256:5fb63362b5a7ddab88e52b6dbb47dac3fd7dafeee740dc6c8d8a446ddedade8e \ + --hash=sha256:6b595b07a03069a2b1740dc08c2299993850ea81cce4fe19b2421e0c970de6b7 \ + --hash=sha256:72358cce49465d140cc4e7792015bb1f0296a9742d5622c67e31399b75468b9e \ + --hash=sha256:74b66d8689d52ed04c271e1329f0c61635bcaf5b926db9b12d58914cdc01fe57 \ + --hash=sha256:7cc267b6108f0a1499a734167282c00c4ebf61328566b55ef262d48e9849c735 \ + --hash=sha256:80832434a6cc114f5219211eec13dcbc16c2bac0e31ef64c6d346cde3cf054cb \ + --hash=sha256:8c497fff237d7b4e07e9ef1a640887fa4fb765647f86fbe00f969ff6280ce2bb \ + --hash=sha256:8fdf95767f989b0cfedb85f7ed8ca215d4be728031f56ff5a519ee1e3276dc2e \ + --hash=sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd \ + --hash=sha256:a0bcfe4d0d14aec44921545fd2af2338c7471de9cb701f1da4c9d85906ab847a \ + --hash=sha256:a69525355a641bf8ef136a7fa447672fb54fe8d60cab5538d9eb7c6438543fb9 \ + --hash=sha256:ada8121bcb4dac28d930febc791a69f7cb1673c8495e5eee274190b73a4559c1 \ + --hash=sha256:bf97c10a3f5a7543f9b88cbf488d33d175e9146115a451ae34568597ba33dcde \ + --hash=sha256:c22a2da7a198c28dd1a6e1136f19c830beab7fdca5b3e5c8bba8394f8a5c45b3 \ + --hash=sha256:c2656924ec73e5939c76ac4c8b026fc203b83d8900362eb2599d8aee80e4880f \ + --hash=sha256:c57b1b610bd1f40ba43970e11ce62821c2e6569e4d74023db19c6b26f246cb3b \ + --hash=sha256:eddde82a035681427cbedded4e6eff5e57fa59216c2e3e90b10b19ab1d0a65c3 \ + --hash=sha256:edec98c5e7c128328124a029bceb09eda2d526997780fef8d65e9a69eead963e \ + --hash=sha256:ee787491dbfe082d9c3013f01f5991658b0f38aa8177e4cd4bf434c58f551702 \ + --hash=sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c \ + --hash=sha256:f984ca4b14914e6b4094c5d52a32ea16b49832c03bd17a110f004db3c223e8e1 \ + --hash=sha256:fb65db5d7531bccf3a4f6bec3462223bea71384e2cda41da0f10b7c292b9e7c4 \ + --hash=sha256:fe1c011a640a9f0791146011dfd3c7d9669785f9fed2b2a5f9e207536cf5c2fd + # via data-formulator +scipy==1.17.0 \ + --hash=sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73 \ + --hash=sha256:031121914e295d9791319a1875444d55079885bbae5bdc9c5e0f2ee5f09d34ff \ + --hash=sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8 \ + --hash=sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e \ + --hash=sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57 \ + --hash=sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00 \ + --hash=sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209 \ + --hash=sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1 \ + --hash=sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269 \ + --hash=sha256:1ff269abf702f6c7e67a4b7aad981d42871a11b9dd83c58d2d2ea624efbd1088 \ + --hash=sha256:255c0da161bd7b32a6c898e7891509e8a9289f0b1c6c7d96142ee0d2b114c2ea \ + --hash=sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e \ + --hash=sha256:272a9f16d6bb4667e8b50d25d71eddcc2158a214df1b566319298de0939d2ab7 \ + --hash=sha256:2abd71643797bd8a106dff97894ff7869eeeb0af0f7a5ce02e4227c6a2e9d6fd \ + --hash=sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1 \ + --hash=sha256:30509da9dbec1c2ed8f168b8d8aa853bc6723fede1dbc23c7d43a56f5ab72a67 \ + --hash=sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf \ + --hash=sha256:357ca001c6e37601066092e7c89cca2f1ce74e2a520ca78d063a6d2201101df2 \ + --hash=sha256:3625c631a7acd7cfd929e4e31d2582cf00f42fcf06011f59281271746d77e061 \ + --hash=sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e \ + --hash=sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d \ + --hash=sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61 \ + --hash=sha256:474da16199f6af66601a01546144922ce402cb17362e07d82f5a6cf8f963e449 \ + --hash=sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2 \ + --hash=sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742 \ + --hash=sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba \ + --hash=sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6 \ + --hash=sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752 \ + --hash=sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45 \ + --hash=sha256:7204fddcbec2fe6598f1c5fdf027e9f259106d05202a959a9f1aecf036adc9f6 \ + --hash=sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97 \ + --hash=sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db \ + --hash=sha256:85b0ac3ad17fa3be50abd7e69d583d98792d7edc08367e01445a1e2076005379 \ + --hash=sha256:87b411e42b425b84777718cc41516b8a7e0795abfa8e8e1d573bf0ef014f0812 \ + --hash=sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e \ + --hash=sha256:9244608d27eafe02b20558523ba57f15c689357c85bdcfe920b1828750aa26eb \ + --hash=sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07 \ + --hash=sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b \ + --hash=sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72 \ + --hash=sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67 \ + --hash=sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e \ + --hash=sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a \ + --hash=sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d \ + --hash=sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04 \ + --hash=sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea \ + --hash=sha256:dac97a27520d66c12a34fd90a4fe65f43766c18c0d6e1c0a80f114d2260080e4 \ + --hash=sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b \ + --hash=sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306 \ + --hash=sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232 \ + --hash=sha256:ebb7446a39b3ae0fe8f416a9a3fdc6fba3f11c634f680f16a239c5187bc487c0 \ + --hash=sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3 \ + --hash=sha256:edce1a1cf66298cccdc48a1bdf8fb10a3bf58e8b58d6c3883dd1530e103f87c0 \ + --hash=sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d \ + --hash=sha256:ef28d815f4d2686503e5f4f00edc387ae58dfd7a2f42e348bb53359538f01558 \ + --hash=sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b \ + --hash=sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8 \ + --hash=sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b \ + --hash=sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467 \ + --hash=sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f \ + --hash=sha256:fc02c37a5639ee67d8fb646ffded6d793c06c5622d36b35cfa8fe5ececb8f042 \ + --hash=sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6 + # via scikit-learn +send2trash==2.1.0 \ + --hash=sha256:0da2f112e6d6bb22de6aa6daa7e144831a4febf2a87261451c4ad849fe9a873c \ + --hash=sha256:1c72b39f09457db3c05ce1d19158c2cbef4c32b8bedd02c155e49282b7ea7459 + # via jupyter-server +setuptools==80.10.2 \ + --hash=sha256:8b0e9d10c784bf7d262c4e5ec5d4ec94127ce206e8738f29a437945fbc219b70 \ + --hash=sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173 + # via jupyterlab +shellingham==1.5.4 \ + --hash=sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686 \ + --hash=sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de + # via huggingface-hub +six==1.17.0 \ + --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ + --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 + # via + # python-dateutil + # rfc3339-validator +sniffio==1.3.1 \ + --hash=sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2 \ + --hash=sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc + # via openai +soupsieve==2.8.3 \ + --hash=sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349 \ + --hash=sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95 + # via beautifulsoup4 +stack-data==0.6.3 \ + --hash=sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9 \ + --hash=sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695 + # via ipython +terminado==0.18.1 \ + --hash=sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0 \ + --hash=sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e + # via + # jupyter-server + # jupyter-server-terminals +threadpoolctl==3.6.0 \ + --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \ + --hash=sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e + # via scikit-learn +tiktoken==0.12.0 \ + --hash=sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa \ + --hash=sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e \ + --hash=sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb \ + --hash=sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25 \ + --hash=sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff \ + --hash=sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b \ + --hash=sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5 \ + --hash=sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3 \ + --hash=sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def \ + --hash=sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded \ + --hash=sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be \ + --hash=sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd \ + --hash=sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a \ + --hash=sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0 \ + --hash=sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0 \ + --hash=sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b \ + --hash=sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37 \ + --hash=sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb \ + --hash=sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3 \ + --hash=sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3 \ + --hash=sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b \ + --hash=sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a \ + --hash=sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3 \ + --hash=sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160 \ + --hash=sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967 \ + --hash=sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646 \ + --hash=sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931 \ + --hash=sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a \ + --hash=sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697 \ + --hash=sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8 \ + --hash=sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa \ + --hash=sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365 \ + --hash=sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e \ + --hash=sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830 \ + --hash=sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16 \ + --hash=sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88 \ + --hash=sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f \ + --hash=sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63 \ + --hash=sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad \ + --hash=sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc \ + --hash=sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71 \ + --hash=sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27 \ + --hash=sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd + # via litellm +tinycss2==1.4.0 \ + --hash=sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7 \ + --hash=sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289 + # via bleach +tokenizers==0.22.2 \ + --hash=sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e \ + --hash=sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001 \ + --hash=sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7 \ + --hash=sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd \ + --hash=sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4 \ + --hash=sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67 \ + --hash=sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a \ + --hash=sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5 \ + --hash=sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917 \ + --hash=sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c \ + --hash=sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a \ + --hash=sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc \ + --hash=sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92 \ + --hash=sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5 \ + --hash=sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48 \ + --hash=sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b + # via litellm +tornado==6.5.4 \ + --hash=sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1 \ + --hash=sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1 \ + --hash=sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843 \ + --hash=sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335 \ + --hash=sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8 \ + --hash=sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f \ + --hash=sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84 \ + --hash=sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7 \ + --hash=sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17 \ + --hash=sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9 \ + --hash=sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f \ + --hash=sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc + # via + # ipykernel + # jupyter-client + # jupyter-server + # jupyterlab + # notebook + # terminado +tqdm==4.67.2 \ + --hash=sha256:649aac53964b2cb8dec76a14b405a4c0d13612cb8933aae547dd144eacc99653 \ + --hash=sha256:9a12abcbbff58b6036b2167d9d3853042b9d436fe7330f06ae047867f2f8e0a7 + # via + # huggingface-hub + # openai +traitlets==5.14.3 \ + --hash=sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7 \ + --hash=sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f + # via + # ipykernel + # ipython + # ipywidgets + # jupyter-client + # jupyter-console + # jupyter-core + # jupyter-events + # jupyter-server + # jupyterlab + # matplotlib-inline + # nbclient + # nbconvert + # nbformat +typer-slim==0.21.1 \ + --hash=sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d \ + --hash=sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd + # via huggingface-hub +typing-extensions==4.15.0 \ + --hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \ + --hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548 + # via + # aiosignal + # anyio + # azure-core + # azure-identity + # azure-keyvault-secrets + # azure-storage-blob + # beautifulsoup4 + # flask-limiter + # grpcio + # huggingface-hub + # ipython + # limits + # openai + # pydantic + # pydantic-core + # referencing + # typer-slim + # typing-inspection +typing-inspection==0.4.2 \ + --hash=sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7 \ + --hash=sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464 + # via pydantic +tzdata==2025.3 \ + --hash=sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1 \ + --hash=sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7 + # via + # arrow + # pandas +uri-template==1.3.0 \ + --hash=sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7 \ + --hash=sha256:a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363 + # via jsonschema +urllib3==2.6.3 \ + --hash=sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed \ + --hash=sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4 + # via + # botocore + # requests +vega-datasets==0.9.0 \ + --hash=sha256:3d7c63917be6ca9b154b565f4779a31fedce57b01b5b9d99d8a34a7608062a1d \ + --hash=sha256:9dbe9834208e8ec32ab44970df315de9102861e4cda13d8e143aab7a80d93fc0 + # via data-formulator +vl-convert-python==1.9.0.post1 \ + --hash=sha256:3c1558fa0055e88c465bd3d71760cde9fa2c94a95f776a0ef9178252fd820b1f \ + --hash=sha256:43e9515f65bbcd317d1ef328787fd7bf0344c2fde9292eb7a0e64d5d3d29fccb \ + --hash=sha256:7e263269ac0d304640ca842b44dfe430ed863accd9edecff42e279bfc48ce940 \ + --hash=sha256:a5b06b3128037519001166f5341ec7831e19fbd7f3a5f78f73d557ac2d5859ef \ + --hash=sha256:b0e7a3245f32addec7e7abeb1badf72b1513ed71ba1dba7aca853901217b3f4e \ + --hash=sha256:e6ecfe4b7e2ea9e8c30fd6d6eaea3ef85475be1ad249407d9796dce4ecdb5b32 + # via data-formulator +wcwidth==0.5.3 \ + --hash=sha256:53123b7af053c74e9fe2e92ac810301f6139e64379031f7124574212fb3b4091 \ + --hash=sha256:d584eff31cd4753e1e5ff6c12e1edfdb324c995713f75d26c29807bb84bf649e + # via prompt-toolkit +webcolors==25.10.0 \ + --hash=sha256:032c727334856fc0b968f63daa252a1ac93d33db2f5267756623c210e57a4f1d \ + --hash=sha256:62abae86504f66d0f6364c2a8520de4a0c47b80c03fc3a5f1815fedbef7c19bf + # via jsonschema +webencodings==0.5.1 \ + --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ + --hash=sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923 + # via + # bleach + # tinycss2 +websocket-client==1.9.0 \ + --hash=sha256:9e813624b6eb619999a97dc7958469217c3176312b3a16a4bd1bc7e08a46ec98 \ + --hash=sha256:af248a825037ef591efbf6ed20cc5faa03d3b47b9e5a2230a529eeee1c1fc3ef + # via jupyter-server +websockets==16.0 \ + --hash=sha256:0298d07ee155e2e9fda5be8a9042200dd2e3bb0b8a38482156576f863a9d457c \ + --hash=sha256:08d7af67b64d29823fed316505a89b86705f2b7981c07848fb5e3ea3020c1abe \ + --hash=sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e \ + --hash=sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec \ + --hash=sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1 \ + --hash=sha256:1c1b30e4f497b0b354057f3467f56244c603a79c0d1dafce1d16c283c25f6e64 \ + --hash=sha256:31a52addea25187bde0797a97d6fc3d2f92b6f72a9370792d65a6e84615ac8a8 \ + --hash=sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206 \ + --hash=sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156 \ + --hash=sha256:349f83cd6c9a415428ee1005cadb5c2c56f4389bc06a9af16103c3bc3dcc8b7d \ + --hash=sha256:417b28978cdccab24f46400586d128366313e8a96312e4b9362a4af504f3bbad \ + --hash=sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2 \ + --hash=sha256:4a1aba3340a8dca8db6eb5a7986157f52eb9e436b74813764241981ca4888f03 \ + --hash=sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8 \ + --hash=sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230 \ + --hash=sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8 \ + --hash=sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea \ + --hash=sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641 \ + --hash=sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6 \ + --hash=sha256:5f451484aeb5cafee1ccf789b1b66f535409d038c56966d6101740c1614b86c6 \ + --hash=sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5 \ + --hash=sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f \ + --hash=sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00 \ + --hash=sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e \ + --hash=sha256:7be95cfb0a4dae143eaed2bcba8ac23f4892d8971311f1b06f3c6b78952ee70b \ + --hash=sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39 \ + --hash=sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9 \ + --hash=sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79 \ + --hash=sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0 \ + --hash=sha256:8d7f0659570eefb578dacde98e24fb60af35350193e4f56e11190787bee77dac \ + --hash=sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5 \ + --hash=sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c \ + --hash=sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8 \ + --hash=sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1 \ + --hash=sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244 \ + --hash=sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3 \ + --hash=sha256:a653aea902e0324b52f1613332ddf50b00c06fdaf7e92624fbf8c77c78fa5767 \ + --hash=sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a \ + --hash=sha256:af80d74d4edfa3cb9ed973a0a5ba2b2a549371f8a741e0800cb07becdd20f23d \ + --hash=sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd \ + --hash=sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e \ + --hash=sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944 \ + --hash=sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82 \ + --hash=sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d \ + --hash=sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4 \ + --hash=sha256:d6297ce39ce5c2e6feb13c1a996a2ded3b6832155fcfc920265c76f24c7cceb5 \ + --hash=sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904 \ + --hash=sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f \ + --hash=sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c \ + --hash=sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89 \ + --hash=sha256:f4a32d1bd841d4bcbffdcb3d2ce50c09c3909fbead375ab28d0181af89fd04da \ + --hash=sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4 + # via yfinance +werkzeug==3.1.5 \ + --hash=sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc \ + --hash=sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67 + # via + # flask + # flask-cors +widgetsnbextension==4.0.15 \ + --hash=sha256:8156704e4346a571d9ce73b84bee86a29906c9abfd7223b7228a28899ccf3366 \ + --hash=sha256:de8610639996f1567952d763a5a41af8af37f2575a41f9852a38f947eb82a3b9 + # via ipywidgets +wrapt==2.1.0 \ + --hash=sha256:01559d2961c29edc6263849fd9d32b29a20737da67648c7fd752a67bd96208c7 \ + --hash=sha256:0b660be1c9cdfb4c711baab4ccbd0e9d1b65a0480d38729ec8cdbf3b29cb7f15 \ + --hash=sha256:0e9129d1b582c55ad0dfb9e29e221daa0e02b18c67d8642bc8d08dd7038b3aed \ + --hash=sha256:0fa64a9a07df7f85b352adc42b43e7f44085fb11191b8f5b9b77219f7aaf7e17 \ + --hash=sha256:0ff9797e6e0b82b330ef80b0cdba7fcd0ca056d4c7af2ca44e3d05fd47929ede \ + --hash=sha256:12687e6271df7ae5706bee44cc1f77fecb7805976ec9f14f58381b30ae2aceb5 \ + --hash=sha256:2893498fe898719ac8fb6b4fe36ca86892bec1e2480d94e3bd1bc592c00527ad \ + --hash=sha256:2ccc89cd504fc29c32f0b24046e8edf3ef0fcbc5d5efe8c91b303c099863d2c8 \ + --hash=sha256:2cd647097df1df78f027ac7d5d663f05daa1a117b69cf7f476cb299f90557747 \ + --hash=sha256:355779ff720c11a2a5cffd03332dbce1005cb4747dca65b0fc8cdd5f8bf1037e \ + --hash=sha256:38bbe336ee32f67eb99f886bd4f040d91310b7e660061bb03b9083d26e8cf915 \ + --hash=sha256:38de19e30e266c15d542ceb0603e657db4e82c53e7f47fd70674ae5da2b41180 \ + --hash=sha256:3e2e156fe2d41700b837be9b1d8d80ebab44e9891589bc7c41578ef110184e29 \ + --hash=sha256:46583aae3c807aa76f96355c4943031225785ed160c84052612bba0e9d456639 \ + --hash=sha256:4b0a29509ef7b501abe47b693a3c91d1f21c9a948711f6ce7afa81eb274c7eae \ + --hash=sha256:52bb58b3207ace156b6134235fd43140994597704fd07d148cbcfb474ee084ea \ + --hash=sha256:5509d9150ed01c4149e40020fa68e917d5c4bb77d311e79535565c2a0418afcb \ + --hash=sha256:57df799e67b011847ef7ac64b05ed4633e56b64e7e7cab5eb83dc9689dbe0acf \ + --hash=sha256:5bacf063143fa86f15b00a21259a81c95c527a18d504b8c820835366d361c879 \ + --hash=sha256:6653bf30dbbafd55cb4553195cc60b94920b6711a8835866c0e02aa9f22c5598 \ + --hash=sha256:66f588c8b3a44863156cfaccb516f946a64b3b03a6880822ab0b878135ca1f5c \ + --hash=sha256:7112cbf72fc4035afe1e3314a311654c41dd92c2932021ef76f5ca87583917b3 \ + --hash=sha256:737e1e491473047cb66944b8b8fd23f3f542019afd6cf0569d1356d18a7ea6d5 \ + --hash=sha256:73e742368b52f9cf0921e1d2bcb8a6a44ede2e372e33df6e77caa136a942099f \ + --hash=sha256:757ff1de7e1d8db1839846672aaecf4978af433cc57e808255b83980e9651914 \ + --hash=sha256:771ec962fe3ccb078177c9b8f3529e204ffcbb11d62d509e0a438e6a83f7ca68 \ + --hash=sha256:7a0471df3fb4e85a9ff62f7142cdb169e31172467cdb79a713f9b1319c555903 \ + --hash=sha256:7c06653908a23a85c4b2455b9d37c085f9756c09058df87b4a2fce2b2f8d58c2 \ + --hash=sha256:7f7bf95bae7ac5f2bbcb307464b3b0ff70569dd3b036a87b1cf7efb2c76e66e5 \ + --hash=sha256:875a10a6f3b667f90a39010af26acf684ba831d9b18a86b242899d57c74550fa \ + --hash=sha256:9b2da9c8f1723994b335dbf9f496fbfabc76bcdd001f73772b8eb2118a714cea \ + --hash=sha256:9e971000347f61271725e801ef44fa5d01b52720e59737f0d96280bffb98c5d1 \ + --hash=sha256:9f1e9bac6a6c1ba65e0ac50e32c575266734a07b6c17e718c4babd91e2faa69b \ + --hash=sha256:a64c0fb29c89810973f312a04c067b63523e7303b9a2653820cbf16474c2e5cf \ + --hash=sha256:a7b158558438874e5fd5cb505b5a635bd08c84857bc937973d9e12e1166cdf3b \ + --hash=sha256:ad3aa174d06a14b4758d5a1678b9adde8b8e657c6695de9a3d4c223f4fcbbcce \ + --hash=sha256:bc7d496b6e16bd2f77e37e8969b21a7b58d6954e46c6689986fb67b9078100e5 \ + --hash=sha256:be2f541a242818829526e5d08c716b6730970ed0dc1b76ba962a546947d0f005 \ + --hash=sha256:bffa584240d41bc3127510e07a752f94223d73bb1283ac2e99ac44235762efd2 \ + --hash=sha256:c0fc3e388a14ef8101c685dc80b4d2932924a639a03e5c44b5ffabbda2f1f2dc \ + --hash=sha256:c70b4829c6f2f4af4cdaa16442032fcaf882063304160555e4a19b43fd2c6c9d \ + --hash=sha256:c87cd4f61a3b7cd65113e74006e1cd6352b74807fcc65d440e8342f001f8de5e \ + --hash=sha256:cbc07f101f5f1e7c23ec06a07e45715f459de992108eeb381b21b76d94dbaf4f \ + --hash=sha256:cc9e37bfe67f6ea738851dd606640a87692ff81bcc76df313fb75d08e05e855f \ + --hash=sha256:ce0cf4c79c19904aaf2e822af280d7b3c23ad902f57e31c5a19433bc86e5d36d \ + --hash=sha256:d3dd4f8c2256fcde1a85037a1837afc52e8d32d086fd669ae469455fd9a988d6 \ + --hash=sha256:d61238a072501ed071a9f4b9567d10c2eb3d2f1a0258ae79b47160871d8f29c3 \ + --hash=sha256:d7fd4c4ee51ebdf245549d54a7c2181a4f39caac97c9dc8a050b5ba814067a29 \ + --hash=sha256:d877003dbc601e1365bd03f6a980965a20d585f90c056f33e1fc241b63a6f0e7 \ + --hash=sha256:da379cbdf3b7d97ace33a69a391b7a7e2130b1aca94dc447246217994233974c \ + --hash=sha256:e00f8559ceac0fb45091daad5f15d37f2c22bdc28ed71521d47ff01aad8fff3d \ + --hash=sha256:e035693a0d25ea5bf5826df3e203dff7d091b0d5442aaefec9ca8f2bab38417f \ + --hash=sha256:e3958ba70aef2895d8c62c2d31f51ced188f60451212294677b92f4b32c12978 \ + --hash=sha256:e45f54903da38fc4f6f66397fd550fc0dac6164b4c5e721c1b4eb05664181821 \ + --hash=sha256:e90656b433808a0ab68e95aaf9f588aea5c8c7a514e180849dfc638ba00ec449 \ + --hash=sha256:eabe95ea5fbe1524a53c0f3fc535c99f2aa376ec1451b0b79d943d2240d80e36 + # via deprecated +yarl==1.22.0 \ + --hash=sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a \ + --hash=sha256:078278b9b0b11568937d9509b589ee83ef98ed6d561dfe2020e24a9fd08eaa2b \ + --hash=sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da \ + --hash=sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093 \ + --hash=sha256:0d6e6885777af0f110b0e5d7e5dda8b704efed3894da26220b7f3d887b839a79 \ + --hash=sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683 \ + --hash=sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2 \ + --hash=sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff \ + --hash=sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02 \ + --hash=sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03 \ + --hash=sha256:1ab72135b1f2db3fed3997d7e7dc1b80573c67138023852b6efb336a5eae6511 \ + --hash=sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c \ + --hash=sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124 \ + --hash=sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c \ + --hash=sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da \ + --hash=sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2 \ + --hash=sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0 \ + --hash=sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d \ + --hash=sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53 \ + --hash=sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138 \ + --hash=sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4 \ + --hash=sha256:3b06bcadaac49c70f4c88af4ffcfbe3dc155aab3163e75777818092478bcbbe7 \ + --hash=sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d \ + --hash=sha256:3e2daa88dc91870215961e96a039ec73e4937da13cf77ce17f9cad0c18df3503 \ + --hash=sha256:3ea66b1c11c9150f1372f69afb6b8116f2dd7286f38e14ea71a44eee9ec51b9d \ + --hash=sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2 \ + --hash=sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa \ + --hash=sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f \ + --hash=sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1 \ + --hash=sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d \ + --hash=sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694 \ + --hash=sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3 \ + --hash=sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a \ + --hash=sha256:4c52a6e78aef5cf47a98ef8e934755abf53953379b7d53e68b15ff4420e6683d \ + --hash=sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a \ + --hash=sha256:51af598701f5299012b8416486b40fceef8c26fc87dc6d7d1f6fc30609ea0aa6 \ + --hash=sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b \ + --hash=sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5 \ + --hash=sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f \ + --hash=sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df \ + --hash=sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b \ + --hash=sha256:669930400e375570189492dc8d8341301578e8493aec04aebc20d4717f899dd6 \ + --hash=sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b \ + --hash=sha256:6944b2dc72c4d7f7052683487e3677456050ff77fcf5e6204e98caf785ad1967 \ + --hash=sha256:6a635ea45ba4ea8238463b4f7d0e721bad669f80878b7bfd1f89266e2ae63da2 \ + --hash=sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708 \ + --hash=sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8 \ + --hash=sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10 \ + --hash=sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b \ + --hash=sha256:792a2af6d58177ef7c19cbf0097aba92ca1b9cb3ffdd9c7470e156c8f9b5e028 \ + --hash=sha256:8009b3173bcd637be650922ac455946197d858b3630b6d8787aa9e5c4564533e \ + --hash=sha256:8218f4e98d3c10d683584cb40f0424f4b9fd6e95610232dd75e13743b070ee33 \ + --hash=sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590 \ + --hash=sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c \ + --hash=sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53 \ + --hash=sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74 \ + --hash=sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f \ + --hash=sha256:9f6d73c1436b934e3f01df1e1b21ff765cd1d28c77dfb9ace207f746d4610ee1 \ + --hash=sha256:9fb17ea16e972c63d25d4a97f016d235c78dd2344820eb35bc034bc32012ee27 \ + --hash=sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520 \ + --hash=sha256:a9b1ba5610a4e20f655258d5a1fdc7ebe3d837bb0e45b581398b99eb98b1f5ca \ + --hash=sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273 \ + --hash=sha256:b266bd01fedeffeeac01a79ae181719ff848a5a13ce10075adbefc8f1daee70e \ + --hash=sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601 \ + --hash=sha256:b6a6f620cfe13ccec221fa312139135166e47ae169f8253f72a0abc0dae94376 \ + --hash=sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7 \ + --hash=sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb \ + --hash=sha256:ba440ae430c00eee41509353628600212112cd5018d5def7e9b05ea7ac34eb65 \ + --hash=sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784 \ + --hash=sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71 \ + --hash=sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b \ + --hash=sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a \ + --hash=sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c \ + --hash=sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face \ + --hash=sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d \ + --hash=sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e \ + --hash=sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9 \ + --hash=sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95 \ + --hash=sha256:d5372ca1df0f91a86b047d1277c2aaf1edb32d78bbcefffc81b40ffd18f027ed \ + --hash=sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf \ + --hash=sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca \ + --hash=sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62 \ + --hash=sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df \ + --hash=sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67 \ + --hash=sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f \ + --hash=sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529 \ + --hash=sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486 \ + --hash=sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a \ + --hash=sha256:e6438cc8f23a9c1478633d216b16104a586b9761db62bfacb6425bac0a36679e \ + --hash=sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74 \ + --hash=sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d \ + --hash=sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b \ + --hash=sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2 \ + --hash=sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e \ + --hash=sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8 \ + --hash=sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82 \ + --hash=sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd \ + --hash=sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249 + # via aiohttp +yfinance==1.1.0 \ + --hash=sha256:1e852ce10a5d6679200efa2b09ed3f3b01dbe84505e2c1c139be7d2b3a597b10 \ + --hash=sha256:e610fec1d2b052e3b8f2cf44bdcff014bcf15458a9b072d5a3e02507e20d69d2 + # via data-formulator +zipp==3.23.0 \ + --hash=sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e \ + --hash=sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166 + # via importlib-metadata diff --git a/src/app/App.tsx b/src/app/App.tsx index 209f3ac5..5bc8d6ea 100644 --- a/src/app/App.tsx +++ b/src/app/App.tsx @@ -10,8 +10,8 @@ import { dfActions, dfSelectors, fetchAvailableModels, - getSessionId, } from './dfSlice' +import { getBrowserId } from './identity'; import { red, purple, blue, brown, yellow, orange, } from '@mui/material/colors'; @@ -68,7 +68,7 @@ import KeyboardArrowDownIcon from '@mui/icons-material/KeyboardArrowDown'; import UploadFileIcon from '@mui/icons-material/UploadFile'; import DownloadIcon from '@mui/icons-material/Download'; import { handleDBDownload } from '../views/DBTableManager'; -import { getUrls } from './utils'; +import { getUrls, fetchWithIdentity } from './utils'; import { UnifiedDataUploadDialog } from '../views/UnifiedDataUploadDialog'; import ChatIcon from '@mui/icons-material/Chat'; import { AgentRulesDialog } from '../views/AgentRulesDialog'; @@ -159,7 +159,7 @@ export const ImportStateButton: React.FC<{}> = ({ }) => { } export const ExportStateButton: React.FC<{}> = ({ }) => { - const sessionId = useSelector((state: DataFormulatorState) => state.sessionId); + const identity = useSelector((state: DataFormulatorState) => state.identity); const tables = useSelector((state: DataFormulatorState) => state.tables); const fullStateJson = useSelector((state: DataFormulatorState) => { // Fields to exclude from serialization @@ -168,7 +168,7 @@ export const ExportStateButton: React.FC<{}> = ({ }) => { 'selectedModelId', 'testedModels', 'dataLoaderConnectParams', - 'sessionId', + 'identity', 'agentRules', 'serverConfig', ]); @@ -197,7 +197,7 @@ export const ExportStateButton: React.FC<{}> = ({ }) => { a.click(); } let firstTableName = tables.length > 0 ? tables[0].id: ''; - download(fullStateJson, `df_state_${firstTableName}_${sessionId?.slice(0, 4)}.json`, 'text/plain'); + download(fullStateJson, `df_state_${firstTableName}_${identity.id.slice(0, 4)}.json`, 'text/plain'); }} startIcon={} > @@ -241,7 +241,7 @@ const TableMenu: React.FC = () => { const SessionMenu: React.FC = () => { const [anchorEl, setAnchorEl] = useState(null); const open = Boolean(anchorEl); - const sessionId = useSelector((state: DataFormulatorState) => state.sessionId); + const identity = useSelector((state: DataFormulatorState) => state.identity); const tables = useSelector((state: DataFormulatorState) => state.tables); const theme = useTheme(); @@ -274,12 +274,12 @@ const SessionMenu: React.FC = () => { database file - {sessionId && tables.some(t => t.virtual) && + {tables.some(t => t.virtual) && This session contains data stored in the database, export and reload the database to resume the session later. } - t.virtual)} onClick={() => { - handleDBDownload(sessionId ?? ''); + t.virtual)} onClick={() => { + handleDBDownload(identity.id); }}> {}}> - - - ) : ( + let actionButtons = (