diff --git a/.github/workflows/code-format.yml b/.github/workflows/code-format.yml new file mode 100644 index 0000000..5d2989b --- /dev/null +++ b/.github/workflows/code-format.yml @@ -0,0 +1,50 @@ +name: Format Code + +on: + pull_request: + branches: [main] + paths: + - "**/*.py" + +permissions: + contents: write + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + format: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + fetch-depth: 1 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install formatters + run: pip install black==24.3.0 isort==5.13.2 + + - name: Format Python files (black + isort) + run: | + isort --profile black . + black --line-length 160 . + + - name: Check for changes + id: git-check + run: git diff --exit-code || echo "changes=true" >> $GITHUB_OUTPUT + + - name: Commit formatting changes + if: steps.git-check.outputs.changes == 'true' + run: | + git config --global user.name 'Code Formatter' + git config --global user.email 'noreply@vectara.com' + git add . + git commit -m "Apply code formatting (black + isort)" + git push diff --git a/.github/workflows/pr-validation.yml b/.github/workflows/pr-validation.yml new file mode 100644 index 0000000..f490282 --- /dev/null +++ b/.github/workflows/pr-validation.yml @@ -0,0 +1,35 @@ +name: Validate Tests + +on: + push: + paths: + - "**/*.py" + - "requirements.txt" + pull_request: + paths: + - "**/*.py" + - "requirements.txt" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + validate: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Collect tests and validate markers + env: + VECTARA_API_KEY: dummy-for-collection-only + run: pytest tests/services/ --collect-only -q diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..94c00eb --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,56 @@ +# Development Guidelines + +## Build Commands +- Install deps: `pip install -r requirements.txt` +- Run all tests: `python run_tests.py --profile full` +- Run sanity tests: `python run_tests.py --profile sanity` +- Run core tests: `python run_tests.py --profile core` +- Run single service: `python run_tests.py --service auth` +- Run single test: `python -m pytest tests/services/auth/test_api_key_validation.py::TestApiKeyValidation::test_health_check -v` +- Run by keyword: `python -m pytest tests/services/ -k "test_health_check" -v` + +## Environment Variables +- `VECTARA_API_KEY` — required, Personal API key +- `VECTARA_BASE_URL` — defaults to `https://api.vectara.io`, use `https://api.vectara.dev` for staging + +## Project Structure +- `tests/services//` — test files organized by API service (auth, corpus, indexing, query, chat, agents) +- `tests/workflows/` — cross-service end-to-end flow tests +- `utils/client.py` — Vectara API client (single class, all HTTP methods) +- `utils/waiters.py` — polling helpers and SSE reader +- `utils/config.py` — environment-based configuration +- `fixtures/sample_data.py` — test data +- `run_tests.py` — CLI runner with `--profile` and `--service` flags + +## Test Markers +- Every service test must have exactly one depth marker: `@pytest.mark.sanity`, `@pytest.mark.core`, or `@pytest.mark.regression` +- Workflow tests use `@pytest.mark.workflow` +- Tests without markers fail collection +- `@pytest.mark.serial` for tests that must not run in parallel + +## Code Style +- Python: PEP8, type hints, snake_case for variables/functions, CamelCase for classes +- Imports: Group by standard library, third-party, then local imports +- Do not add trivial comments. Write self-documenting code with clear naming. Do not delete old explanatory comments though, they are good. +- Do add docstrings for modules and classes. +- Fully implement functionality, do not leave stubs "for later". +- Do not modify tests to make them pass — fix the code under test. +- Error handling: Use appropriate exceptions, avoid catching generic exceptions. +- Before creating a new class/type, search for existing types that serve a similar purpose. Extend existing types rather than creating near-duplicates. +- When modifying a class, modify methods directly rather than adding duplicate methods. +- Strongly prefer explicit types over `None` sentinels. + +## Test Conventions +- Each test is self-contained via fixtures. No test depends on another test having run. +- Use `unique_id` fixture for resource names to avoid collisions. +- Always use explicit UUID keys when creating corpora (`key=f"test_{uuid.uuid4().hex}"`). +- Never mutate the bootstrap API key used to run the suite. +- Use `wait_for()` from `utils/waiters.py` instead of `time.sleep()` for async operations. +- Cleanup resources in `try/finally` blocks. +- Module-scoped fixtures for shared corpora (read-heavy tests), function-scoped for CRUD tests. +- **Assertions must verify actual behavior, not just HTTP status.** Always verify response data, field values, and state changes — not just `response.success`. + +## General Behavior +- Treat the user as an expert. +- Be pithy — use short summaries of actions. +- When refactoring, spawn sub agents for manual updates rather than using sed/grep/awk. diff --git a/README.md b/README.md index b49bbab..0dabe88 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,6 @@ # Vectara API Test Suite -A comprehensive Python-based test suite for validating Vectara API functionality. Designed for customers running on-premise deployments to verify system integrity after version upgrades. - -## Features - -- **Comprehensive API Coverage**: Tests for Authentication, Corpus Management, Indexing, Query/Search, and Agents APIs -- **Simple Authentication**: Command-line argument or environment variable -- **Detailed Reporting**: HTML and JSON reports with response times and error diagnostics -- **Parallel Execution**: Run tests in parallel for faster validation -- **CI/CD Ready**: Easy integration with automated pipelines +A Python-based test suite for validating Vectara API functionality. Designed for deployment verification, smoke testing, and regression testing. ## Prerequisites @@ -17,201 +9,119 @@ A comprehensive Python-based test suite for validating Vectara API functionality ## Installation -1. Navigate to the test suite directory: - -```bash -cd vectara-api-tests -``` - -2. Install dependencies: - ```bash pip install -r requirements.txt ``` -## Test Account Setup - -Before running tests, you need a Vectara Personal API key. - -### Step 1: Copy an API Key - -1. Choose the account you want to test -2. Log into the Vectara Console as the **Account Owner** -3. Ensure you are comfortable testing within this account -4. Use your **Personal API key** for this account - ## Running Tests -### Command-Line Argument - -```bash -python run_tests.py --api-key YOUR_API_KEY -``` - -### Environment Variable (Recommended for CI/CD) +### Quick Start ```bash export VECTARA_API_KEY=your_api_key_here -python run_tests.py +python run_tests.py --profile sanity ``` -### For On-Premise Deployments - -Specify your custom API endpoint: +### Profiles ```bash -python run_tests.py --api-key YOUR_KEY --base-url https://your-vectara-instance.com +python run_tests.py --profile sanity # Fast deploy gate (~30s, 7 tests) +python run_tests.py --profile core # Post-deploy verification (~5 min, 40 tests) +python run_tests.py --profile regression # Edge cases + core (~56 tests) +python run_tests.py --profile full # Everything including workflows ``` -Or via environment variable: +### Select by Service ```bash -export VECTARA_API_KEY=your_key -export VECTARA_BASE_URL=https://your-vectara-instance.com -python run_tests.py +python run_tests.py --service auth +python run_tests.py --service agents,query +python run_tests.py --service corpus --profile sanity ``` -## Test Categories - -Run specific test categories: +### On-Premise Deployments ```bash -# Authentication tests only -python run_tests.py --api-key YOUR_KEY --tests auth - -# Multiple categories -python run_tests.py --api-key YOUR_KEY --tests corpus,indexing - -# All tests (default) -python run_tests.py --api-key YOUR_KEY --tests all +export VECTARA_BASE_URL=https://your-vectara-instance.com +python run_tests.py --profile core ``` -Available categories: -- `auth` - Authentication and authorization tests -- `corpus` - Corpus CRUD operations -- `indexing` - Document indexing tests -- `query` - Query, search, and RAG tests -- `agents` - Conversational AI agent tests -- `all` - Run all tests - -## Reporting - -### HTML Report +### Reporting ```bash -python run_tests.py --api-key YOUR_KEY --html-report +python run_tests.py --profile core --html-report # HTML report +python run_tests.py --profile core --json-report # JSON report +python run_tests.py --profile core --html-report --json-report # Both ``` -Reports are saved to `reports/test_report_YYYYMMDD_HHMMSS.html` - -### JSON Report (for CI/CD) - -```bash -python run_tests.py --api-key YOUR_KEY --json-report -``` +Reports are saved to `reports/` with descriptive names like `test_report_20260403_core.html`. ### Parallel Execution -Speed up test runs with parallel workers: - ```bash -python run_tests.py --api-key YOUR_KEY --parallel 4 +python run_tests.py --profile core -p 4 ``` ## Environment Variables | Variable | Description | Required | |----------|-------------|----------| -| `VECTARA_API_KEY` | Your Personal API key | Yes | -| `VECTARA_BASE_URL` | API URL for on-premise deployments | No (defaults to SaaS) | -| `VECTARA_TIMEOUT` | Request timeout in seconds | No (default: 30) | -| `VECTARA_CORPUS_PREFIX` | Prefix for test corpora | No (default: `api_test_`) | +| `VECTARA_API_KEY` | Personal API key | Yes | +| `VECTARA_BASE_URL` | API URL (default: `https://api.vectara.io`) | No | +| `VECTARA_TIMEOUT` | Request timeout in seconds (default: 30) | No | +| `VECTARA_LLM_NAME` | LLM model name for generation | No | +| `VECTARA_GENERATION_PRESET` | Generation preset name | No | ## Project Structure ``` -vectara-api-tests/ -├── tests/ -│ ├── test_01_authentication.py -│ ├── test_02_corpus_management.py -│ ├── test_03_indexing.py -│ ├── test_04_query_search.py -│ └── test_05_agents.py -├── utils/ -│ ├── client.py # Vectara API client -│ └── config.py # Configuration management -├── fixtures/ # Test data -├── reports/ # Generated test reports -├── conftest.py # Pytest fixtures -├── run_tests.py # Test runner script -├── requirements.txt -└── README.md +tests/ +├── conftest.py # Marker registration, shared fixtures +├── services/ +│ ├── conftest.py # Shared corpus fixtures +│ ├── auth/ # API key validation, permissions +│ ├── corpus/ # Corpus CRUD, filter attributes, pagination +│ ├── indexing/ # Document CRUD, metadata, large docs +│ ├── query/ # Semantic search, RAG, edge cases +│ ├── chat/ # Multi-turn conversations +│ └── agents/ # Agent CRUD, execution, sessions +└── workflows/ # Cross-service E2E flows +utils/ +├── client.py # Vectara API client +├── config.py # Environment-based configuration +└── waiters.py # Polling helpers, SSE reader ``` -## Test Coverage +## Test Markers + +Every service test requires exactly one depth marker: +- `@pytest.mark.sanity` — fast health checks +- `@pytest.mark.core` — critical path operations +- `@pytest.mark.regression` — edge cases, error handling + +Workflow tests use `@pytest.mark.workflow`. -| API Category | Endpoints Tested | Scenarios | -|-------------|------------------|-----------| -| Authentication | API key validation | Valid/invalid keys, permissions | -| Corpus Management | Create, Get, List, Update, Delete | CRUD operations, pagination, error handling | -| Indexing | Index, Get, List, Delete documents | Single/bulk docs, metadata, special characters | -| Query/Search | Query, Summary, Chat | Semantic search, RAG, pagination, filters | -| Agents | Create, Execute, Sessions | Conversational AI, multi-turn, context | +## Services + +| Service | What it tests | +|---------|-------------| +| `auth` | API key validation, permissions | +| `corpus` | Corpus CRUD, filter attributes, pagination | +| `indexing` | Document CRUD, metadata, special characters | +| `query` | Semantic search, RAG summary, pagination | +| `chat` | Multi-turn conversations | +| `agents` | Agent CRUD, execution, sessions | ## Troubleshooting ### "API authentication failed" -- Verify your API key is correct -- Ensure you're using a Personal API key from an Account Owner -- Check if the key has expired +- Verify your API key is correct and is a Personal API key +- Check if the key has expired or been disabled ### "Connection error" - Verify the base URL is correct -- Check network connectivity to Vectara servers - For on-premise: ensure the instance is running -### "Permission denied" -- Verify you're using a Personal API key (not an index/query-specific key) -- Check account-level permissions - -## CI/CD Integration - -Example GitHub Actions workflow: - -```yaml -name: Vectara API Tests - -on: - schedule: - - cron: '0 6 * * *' # Daily at 6 AM - workflow_dispatch: - -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install dependencies - run: pip install -r requirements.txt - - - name: Run tests - env: - VECTARA_API_KEY: ${{ secrets.VECTARA_API_KEY }} - run: python run_tests.py --html-report --json-report - - - name: Upload reports - uses: actions/upload-artifact@v4 - with: - name: test-reports - path: reports/ -``` - ## License -Internal use only. For Vectara on-premise customers. +Internal use only. diff --git a/conftest.py b/conftest.py deleted file mode 100644 index de811b0..0000000 --- a/conftest.py +++ /dev/null @@ -1,165 +0,0 @@ -""" -Pytest configuration and shared fixtures for Vectara API Test Suite. -""" - -import os -import sys -import uuid -import logging -import time -from pathlib import Path - -import pytest - -# Add project root to path -sys.path.insert(0, str(Path(__file__).parent)) - -from utils.config import Config -from utils.client import VectaraClient - - -def pytest_addoption(parser): - """Add custom command-line options.""" - parser.addoption( - "--api-key", - action="store", - default=None, - help="Vectara Personal API key", - ) - parser.addoption( - "--base-url", - action="store", - default=None, - help="Vectara API base URL (for on-premise deployments)", - ) - parser.addoption( - "--generation-preset", - action="store", - default=None, - help="Generation preset name for summarization (e.g., mockingbird-2.0)", - ) - parser.addoption( - "--llm-name", - action="store", - default=None, - help="LLM model name to override preset's model (e.g., gpt-4o)", - ) - - -def pytest_configure(config): - """Configure logging and environment from command-line options.""" - # Set up logging - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - ) - - # Apply command-line options to environment - if config.getoption("--api-key"): - os.environ["VECTARA_API_KEY"] = config.getoption("--api-key") - - if config.getoption("--base-url"): - os.environ["VECTARA_BASE_URL"] = config.getoption("--base-url") - - if config.getoption("--generation-preset"): - os.environ["VECTARA_GENERATION_PRESET"] = config.getoption("--generation-preset") - - if config.getoption("--llm-name"): - os.environ["VECTARA_LLM_NAME"] = config.getoption("--llm-name") - - -@pytest.fixture(scope="session") -def config(): - """Provide configuration object.""" - return Config() - - -@pytest.fixture(scope="session") -def client(config): - """Provide authenticated Vectara API client.""" - return VectaraClient(config) - - -@pytest.fixture(scope="session") -def test_run_id(): - """Generate unique identifier for this test run.""" - return str(uuid.uuid4())[:8] - - -@pytest.fixture(scope="session") -def test_corpus_key(client, config, test_run_id): - """ - Create a test corpus for the session and clean up after. - - This fixture creates a dedicated corpus for testing and ensures - it's deleted after all tests complete. - """ - corpus_name = f"API Test Corpus {test_run_id}" - - # Create test corpus - response = client.create_corpus( - name=corpus_name, - description="Automated test corpus - safe to delete", - ) - - if response.success: - # Use the key returned by the API (not the one we generated) - actual_key = response.data.get("key") - if not actual_key: - pytest.skip(f"Corpus created but no key returned: {response.data}") - - # Allow time for corpus to be ready - time.sleep(1) - - yield actual_key - - # Cleanup: delete test corpus using the actual key - client.delete_corpus(actual_key) - else: - # If corpus creation fails, skip tests that need it - pytest.skip(f"Could not create test corpus: {response.data}") - - -@pytest.fixture -def unique_id(): - """Generate a unique ID for test data.""" - return str(uuid.uuid4())[:12] - - -@pytest.fixture -def sample_document(): - """Provide sample document content for indexing tests.""" - return { - "title": "Test Document", - "text": "This is a sample document for testing the Vectara API. " - "It contains information about artificial intelligence and " - "machine learning technologies. Vector search enables semantic " - "understanding of text content.", - "metadata": { - "source": "test_suite", - "category": "technology", - }, - } - - -@pytest.fixture -def sample_query(): - """Provide sample query for search tests.""" - return "What is vector search?" - - -# ------------------------------------------------------------------------- -# Report hooks -# ------------------------------------------------------------------------- - -def pytest_html_report_title(report): - """Set custom report title.""" - report.title = "Vectara API Test Suite Report" - - -def pytest_html_results_summary(prefix, summary, postfix): - """Add custom summary to HTML report.""" - prefix.extend([ - "

This report validates Vectara API functionality for upgrade verification.

", - "

Tests cover: Authentication, Corpus Management, Indexing, Query/Search, and Agents APIs.

", - ]) diff --git a/fixtures/testdata/table_simple.json b/fixtures/testdata/table_simple.json new file mode 100644 index 0000000..9d17e82 --- /dev/null +++ b/fixtures/testdata/table_simple.json @@ -0,0 +1,179 @@ +[ + { + "title": "Detailed Report - Provinces", + "data": { + "headers": [ + [ + { + "text_value": "Province" + }, + { + "text_value": "Gross Bet" + }, + { + "text_value": "Total Take Out" + }, + { + "text_value": "Fed. Levy" + } + ] + ], + "rows": [ + [ + { + "text_value": "Alberta" + }, + { + "text_value": "$142,265,180.20" + }, + { + "text_value": "$30,155,614.28" + }, + { + "text_value": "$1,138,121.42" + } + ], + [ + { + "text_value": "British Columbia" + }, + { + "text_value": "$159,023,358.40" + }, + { + "text_value": "$34,564,933.67" + }, + { + "text_value": "$1,272,186.85" + } + ], + [ + { + "text_value": "Manitoba" + }, + { + "text_value": "$26,685,857.60" + }, + { + "text_value": "$6,411,191.02" + }, + { + "text_value": "$213,486.83" + } + ], + [ + { + "text_value": "New Brunswick" + }, + { + "text_value": "$5,058,457.60" + }, + { + "text_value": "$1,207,652.69" + }, + { + "text_value": "$40,467.66" + } + ], + [ + { + "text_value": "Newfoundland and Labrador" + }, + { + "text_value": "$1,254,777.60" + }, + { + "text_value": "$284,695.97" + }, + { + "text_value": "$10,038.22" + } + ], + [ + { + "text_value": "Nova Scotia" + }, + { + "text_value": "$12,153,552.40" + }, + { + "text_value": "$2,821,183.72" + }, + { + "text_value": "$97,228.40" + } + ], + [ + { + "text_value": "Ontario" + }, + { + "text_value": "$917,592,843.04" + }, + { + "text_value": "$196,439,489.08" + }, + { + "text_value": "$7,340,742.65" + } + ], + [ + { + "text_value": "Prince Edward Island" + }, + { + "text_value": "$7,076,151.80" + }, + { + "text_value": "$1,789,369.77" + }, + { + "text_value": "$56,609.21" + } + ], + [ + { + "text_value": "Quebec" + }, + { + "text_value": "$53,067,171.20" + }, + { + "text_value": "$11,538,446.66" + }, + { + "text_value": "$424,537.32" + } + ], + [ + { + "text_value": "Saskatchewan" + }, + { + "text_value": "$9,835,176.00" + }, + { + "text_value": "$2,355,458.63" + }, + { + "text_value": "$78,681.39" + } + ], + [ + { + "text_value": "Total" + }, + { + "text_value": "$1,334,012,525.84" + }, + { + "text_value": "$287,568,035.49" + }, + { + "text_value": "$10,672,099.95" + } + ] + ] + } + } +] diff --git a/fixtures/testdata/table_simple.pdf b/fixtures/testdata/table_simple.pdf new file mode 100644 index 0000000..6f5e398 Binary files /dev/null and b/fixtures/testdata/table_simple.pdf differ diff --git a/requirements.txt b/requirements.txt index 75a3b52..741471d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ requests>=2.31.0 # Testing framework pytest>=7.4.0 pytest-html>=4.1.0 +pytest-json-report>=1.5.0 pytest-xdist>=3.5.0 # Configuration management diff --git a/run_tests.py b/run_tests.py index d501bda..4e4bda8 100644 --- a/run_tests.py +++ b/run_tests.py @@ -13,29 +13,45 @@ export VECTARA_API_KEY=your_key python run_tests.py - # Run specific test categories - python run_tests.py --tests auth,corpus + # Run specific services + python run_tests.py --service corpus,auth + + # Run with a depth profile + python run_tests.py --profile core # Generate HTML report python run_tests.py --html-report """ -import os -import sys import argparse +import os import subprocess -from pathlib import Path +import sys from datetime import datetime +from pathlib import Path try: from rich.console import Console from rich.panel import Panel from rich.table import Table + RICH_AVAILABLE = True except ImportError: RICH_AVAILABLE = False +# Profile-to-marker mapping for depth-based test selection +PROFILE_MARKERS = { + "sanity": "sanity", + "core": "sanity or core", + "regression": "sanity or core or regression", + "full": None, # no marker filter +} + +# Available services (auto-discovered from tests/services/ subdirectories) +AVAILABLE_SERVICES = ["agents", "auth", "chat", "corpus", "indexing", "llm", "pipelines", "query", "tools", "users"] + + def get_console(): """Get Rich console or None if not available.""" if RICH_AVAILABLE: @@ -46,11 +62,12 @@ def get_console(): def print_header(console): """Print welcome header.""" if console: - console.print(Panel.fit( - "[bold blue]Vectara API Test Suite[/bold blue]\n" - "[dim]Comprehensive API validation for upgrade verification[/dim]", - border_style="blue", - )) + console.print( + Panel.fit( + "[bold blue]Vectara API Test Suite[/bold blue]\n" "[dim]Comprehensive API validation for upgrade verification[/dim]", + border_style="blue", + ) + ) else: print("=" * 50) print("Vectara API Test Suite") @@ -70,84 +87,140 @@ def validate_api_key(api_key): return errors -def build_pytest_args(args, test_selection): - """Build pytest command-line arguments.""" - pytest_args = [ +def resolve_services(args): + """Resolve the list of services to run from --service or deprecated --tests.""" + raw = args.service or args.tests + if raw: + return [s.strip().lower() for s in raw.split(",")] + return [] + + +def build_pytest_args(args, services, profile): + """Build pytest command-line arguments. + + Returns a list of arg-lists (one per phase) when parallel execution splits + into parallel + sequential phases, otherwise a single-element list. + """ + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + + # --- common flags shared by every phase --- + common = [ "-v", # Verbose output "--tb=short", # Shorter tracebacks ] - # Add HTML report if requested - if args.html_report: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - report_path = Path("reports") / f"test_report_{timestamp}.html" - report_path.parent.mkdir(exist_ok=True) - pytest_args.extend(["--html", str(report_path), "--self-contained-html"]) - - # Add JSON report for CI/CD - if args.json_report: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - json_path = Path("reports") / f"test_results_{timestamp}.json" - json_path.parent.mkdir(exist_ok=True) - pytest_args.extend(["--json-report", f"--json-report-file={json_path}"]) - - # Add parallel execution if requested - if args.parallel: - pytest_args.extend(["-n", str(args.parallel)]) - - # Add test selection - if "all" not in test_selection: - test_files = [] - test_mapping = { - "auth": "tests/test_01_authentication.py", - "corpus": "tests/test_02_corpus_management.py", - "indexing": "tests/test_03_indexing.py", - "query": "tests/test_04_query_search.py", - "agents": "tests/test_05_agents.py", - } - for sel in test_selection: - if sel in test_mapping: - test_files.append(test_mapping[sel]) - - if test_files: - pytest_args.extend(test_files) - else: - pytest_args.append("tests/") - else: - pytest_args.append("tests/") - - # Add API key via command-line option + # Pass-through options if args.api_key: - pytest_args.extend(["--api-key", args.api_key]) + common.extend(["--api-key", args.api_key]) if args.base_url: - pytest_args.extend(["--base-url", args.base_url]) + common.extend(["--base-url", args.base_url]) if args.llm_name: - pytest_args.extend(["--llm-name", args.llm_name]) + common.extend(["--llm-name", args.llm_name]) if args.generation_preset: - pytest_args.extend(["--generation-preset", args.generation_preset]) + common.extend(["--generation-preset", args.generation_preset]) + + # --- marker expression from profile --- + marker_expr = PROFILE_MARKERS.get(profile) + + # --- target directories --- + if services: + targets = [f"tests/services/{svc}/" for svc in services] + elif profile == "full": + targets = ["tests/"] + else: + targets = ["tests/services/"] + + # Build a descriptive label for report filenames + if services: + report_label = "_".join(services) + else: + report_label = profile + + def add_report_flags(phase_args, phase_suffix=""): + """Add report flags with descriptive filenames.""" + name = f"{report_label}_{phase_suffix}" if phase_suffix else report_label + if args.html_report: + report_path = Path("reports") / f"test_report_{timestamp}_{name}.html" + report_path.parent.mkdir(exist_ok=True) + phase_args.extend(["--html", str(report_path), "--self-contained-html"]) + if args.json_report: + json_path = Path("reports") / f"test_results_{timestamp}_{name}.json" + json_path.parent.mkdir(exist_ok=True) + phase_args.extend(["--json-report", f"--json-report-file={json_path}"]) + + # --- build phase(s) --- + if args.parallel: + # Phase 1: parallel run (excluding serial-marked tests) + phase1 = list(common) + phase1.extend(["-n", str(args.parallel)]) + if marker_expr: + phase1.extend(["-m", f"({marker_expr}) and not serial"]) + else: + phase1.extend(["-m", "not serial"]) + phase1.extend(targets) + + phases = [phase1] + + # Phase 2: sequential workflow tests (only when profile is full) + if profile == "full": + phase2 = list(common) + if marker_expr: + phase2.extend(["-m", marker_expr]) + phase2.append("tests/workflows/") + phases.append(phase2) + + # Add report flags — one file per phase if multiple, no suffix if single + if len(phases) == 1: + add_report_flags(phases[0]) + else: + add_report_flags(phases[0], "services") + add_report_flags(phases[1], "workflows") - return pytest_args + return phases + else: + # Single invocation (no parallelism) + single = list(common) + if marker_expr: + single.extend(["-m", marker_expr]) + single.extend(targets) + add_report_flags(single) + return [single] -def run_tests(pytest_args, console): - """Execute pytest with the given arguments.""" +def run_tests(phases, console): + """Execute pytest for each phase and return the first non-zero exit code (or 0).""" if console: console.print("\n[bold green]Starting test execution...[/bold green]\n") else: print("\nStarting test execution...\n") - # Run pytest - cmd = [sys.executable, "-m", "pytest"] + pytest_args + for idx, pytest_args in enumerate(phases): + if len(phases) > 1: + label = "Phase 1 (parallel)" if idx == 0 else "Phase 2 (sequential workflows)" + if console: + console.print(f"\n[bold cyan]{label}[/bold cyan]") + else: + print(f"\n{label}") + + cmd = [sys.executable, "-m", "pytest"] + pytest_args - try: - result = subprocess.run(cmd, cwd=Path(__file__).parent) - return result.returncode - except KeyboardInterrupt: if console: - console.print("\n[yellow]Test execution cancelled by user.[/yellow]") + console.print(f"[dim]Running: pytest {' '.join(pytest_args)}[/dim]\n") else: - print("\nTest execution cancelled by user.") - return 130 + print(f"Running: pytest {' '.join(pytest_args)}\n") + + try: + result = subprocess.run(cmd, cwd=Path(__file__).parent) + if result.returncode != 0: + return result.returncode + except KeyboardInterrupt: + if console: + console.print("\n[yellow]Test execution cancelled by user.[/yellow]") + else: + print("\nTest execution cancelled by user.") + return 130 + + return 0 def main(): @@ -157,10 +230,13 @@ def main(): formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - python run_tests.py --api-key YOUR_KEY # With API key - python run_tests.py --tests auth,corpus # Run specific tests - python run_tests.py --html-report # Generate HTML report - python run_tests.py --llm-name mockingbird-2.0 # Specify LLM model + python run_tests.py --api-key YOUR_KEY # With API key + python run_tests.py --profile sanity # Run sanity tests only + python run_tests.py --profile core --service corpus,auth # Core tests for specific services + python run_tests.py --service corpus,query # Run specific services (default profile: core) + python run_tests.py --profile full -p 4 # Full run, 4 parallel workers + python run_tests.py --html-report # Generate HTML report + python run_tests.py --llm-name mockingbird-2.0 # Specify LLM model python run_tests.py --generation-preset vectara-summary-ext-24-05-med-omni Environment Variables: @@ -173,11 +249,13 @@ def main(): # Credential arguments parser.add_argument( - "--api-key", "-k", + "--api-key", + "-k", help="Vectara Personal API key (or set VECTARA_API_KEY env var)", ) parser.add_argument( - "--base-url", "-u", + "--base-url", + "-u", help="Vectara API base URL for on-premise (default: https://api.vectara.io)", ) @@ -191,10 +269,22 @@ def main(): help="Generation preset name (or set VECTARA_GENERATION_PRESET env var)", ) - # Test selection + # Profile and service selection + parser.add_argument( + "--profile", + choices=["sanity", "core", "regression", "full"], + default="core", + help="Test depth profile (default: core)", + ) + parser.add_argument( + "--service", + "-s", + help="Comma-separated list of services to test: " + ",".join(AVAILABLE_SERVICES), + ) parser.add_argument( - "--tests", "-t", - help="Comma-separated list of test categories: auth,corpus,indexing,query,agents,all", + "--tests", + "-t", + help="(Deprecated, use --service) Comma-separated list of services to test", ) # Report options @@ -211,7 +301,8 @@ def main(): # Execution options parser.add_argument( - "--parallel", "-p", + "--parallel", + "-p", type=int, metavar="N", help="Run tests in parallel with N workers", @@ -222,6 +313,13 @@ def main(): print_header(console) + # Warn about deprecated --tests flag + if args.tests and not args.service: + if console: + console.print("[yellow]Warning: --tests is deprecated, use --service instead.[/yellow]") + else: + print("Warning: --tests is deprecated, use --service instead.") + # Determine API key from args or environment api_key = args.api_key or os.environ.get("VECTARA_API_KEY") base_url = args.base_url or os.environ.get("VECTARA_BASE_URL") @@ -250,41 +348,42 @@ def main(): if base_url: os.environ["VECTARA_BASE_URL"] = base_url - # Get test selection - if args.tests: - test_selection = [t.strip().lower() for t in args.tests.split(",")] - else: - test_selection = ["all"] + # Resolve services and profile + services = resolve_services(args) + profile = args.profile - # Show test categories + # Show configuration table if console: - table = Table(title="Test Categories") - table.add_column("Category", style="cyan") - table.add_column("Status") + table = Table(title="Test Configuration") + table.add_column("Setting", style="cyan") + table.add_column("Value") + + table.add_row("Profile", f"[bold]{profile}[/bold]") - categories = ["auth", "corpus", "indexing", "query", "agents"] - for cat in categories: - status = "[green]✓ Selected[/green]" if "all" in test_selection or cat in test_selection else "[dim]Skipped[/dim]" - table.add_row(cat, status) + if services: + table.add_row("Services", ", ".join(services)) + else: + table.add_row("Services", "[dim]all[/dim]") + + if args.parallel: + table.add_row("Parallelism", f"{args.parallel} workers") + + marker = PROFILE_MARKERS.get(profile) + table.add_row("Marker filter", marker if marker else "[dim]none (full)[/dim]") console.print(table) # Build and run pytest - pytest_args = build_pytest_args(args, test_selection) - - if console: - console.print(f"\n[dim]Running: pytest {' '.join(pytest_args)}[/dim]\n") - else: - print(f"\nRunning: pytest {' '.join(pytest_args)}\n") + phases = build_pytest_args(args, services, profile) - exit_code = run_tests(pytest_args, console) + exit_code = run_tests(phases, console) # Summary if console: if exit_code == 0: - console.print("\n[bold green]✔ All tests passed![/bold green]") + console.print("\n[bold green]All tests passed![/bold green]") else: - console.print(f"\n[bold red]✘ Tests failed with exit code {exit_code}[/bold red]") + console.print(f"\n[bold red]Tests failed with exit code {exit_code}[/bold red]") else: if exit_code == 0: print("\nAll tests passed!") diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..b8b7a03 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,201 @@ +""" +Root pytest configuration for the restructured Vectara API test suite. + +Registers depth-profile markers (sanity / core / regression), enforces that +every service test carries exactly one of them, and provides session- and +per-test fixtures shared across all test directories. +""" + +import logging +import os +import sys +import uuid +from pathlib import Path + +import pytest + +# --------------------------------------------------------------------------- +# Path setup -- allow ``from utils.config import Config`` etc. regardless of +# where pytest is invoked from. +# --------------------------------------------------------------------------- +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from utils.client import VectaraClient +from utils.config import Config + +# --------------------------------------------------------------------------- +# CLI options +# --------------------------------------------------------------------------- + + +def pytest_addoption(parser): + """Add custom command-line options.""" + parser.addoption( + "--api-key", + action="store", + default=None, + help="Vectara Personal API key", + ) + parser.addoption( + "--base-url", + action="store", + default=None, + help="Vectara API base URL (for on-premise deployments)", + ) + parser.addoption( + "--generation-preset", + action="store", + default=None, + help="Generation preset name for summarization (e.g., mockingbird-2.0)", + ) + parser.addoption( + "--llm-name", + action="store", + default=None, + help="LLM model name to override preset's model (e.g., gpt-4o)", + ) + + +# --------------------------------------------------------------------------- +# Configuration & marker registration +# --------------------------------------------------------------------------- + +DEPTH_MARKERS = {"sanity", "core", "regression"} + + +def pytest_configure(config): + """Set env vars from CLI options and register custom markers.""" + # Logging + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + # Forward CLI options into the environment so Config picks them up. + if config.getoption("--api-key", default=None): + os.environ["VECTARA_API_KEY"] = config.getoption("--api-key") + if config.getoption("--base-url", default=None): + os.environ["VECTARA_BASE_URL"] = config.getoption("--base-url") + if config.getoption("--generation-preset", default=None): + os.environ["VECTARA_GENERATION_PRESET"] = config.getoption("--generation-preset") + if config.getoption("--llm-name", default=None): + os.environ["VECTARA_LLM_NAME"] = config.getoption("--llm-name") + + # Register markers + config.addinivalue_line("markers", "sanity: quick smoke-test (< 30 s)") + config.addinivalue_line("markers", "core: standard validation (minutes)") + config.addinivalue_line("markers", "regression: exhaustive coverage") + config.addinivalue_line("markers", "workflow: end-to-end multi-service workflow") + config.addinivalue_line("markers", "serial: must not run in parallel") + + +# --------------------------------------------------------------------------- +# Collection-time validation +# --------------------------------------------------------------------------- + + +def pytest_collection_modifyitems(config, items): + """Fail collection for any service test that has zero or multiple depth markers. + + Tests under ``tests/workflows/`` are exempt from this rule. + """ + errors: list[str] = [] + + for item in items: + # Workflow tests are exempt from depth-marker enforcement. + if "/workflows/" in str(item.fspath): + continue + + # Only enforce on service tests (under tests/services/). + if "/services/" not in str(item.fspath): + continue + + marker_names = {m.name for m in item.iter_markers()} + depth_hits = marker_names & DEPTH_MARKERS + + if len(depth_hits) == 0: + errors.append(f"{item.nodeid}: missing depth marker (add @pytest.mark.sanity, " f"@pytest.mark.core, or @pytest.mark.regression)") + elif len(depth_hits) > 1: + errors.append(f"{item.nodeid}: multiple depth markers ({', '.join(sorted(depth_hits))}); " f"use exactly one") + + if errors: + msg = "Depth-marker violations:\n " + "\n ".join(errors) + raise pytest.UsageError(msg) + + +# --------------------------------------------------------------------------- +# Session-scoped fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def config(): + """Provide configuration object.""" + return Config() + + +@pytest.fixture(scope="session") +def client(config): + """Provide authenticated Vectara API client.""" + return VectaraClient(config) + + +@pytest.fixture(scope="session") +def test_run_id(): + """Generate a unique identifier for this test run.""" + return str(uuid.uuid4())[:8] + + +# --------------------------------------------------------------------------- +# Per-test fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def unique_id(): + """Generate a unique ID for test data.""" + return str(uuid.uuid4())[:12] + + +@pytest.fixture +def sample_document(): + """Provide sample document content for indexing tests.""" + return { + "title": "Test Document", + "text": ( + "This is a sample document for testing the Vectara API. " + "It contains information about artificial intelligence and " + "machine learning technologies. Vector search enables semantic " + "understanding of text content." + ), + "metadata": { + "source": "test_suite", + "category": "technology", + }, + } + + +@pytest.fixture +def sample_query(): + """Provide sample query for search tests.""" + return "What is vector search?" + + +# --------------------------------------------------------------------------- +# HTML report hooks +# --------------------------------------------------------------------------- + + +def pytest_html_report_title(report): + """Set custom report title.""" + report.title = "Vectara API Test Suite Report" + + +def pytest_html_results_summary(prefix, summary, postfix): + """Add custom summary to HTML report.""" + prefix.extend( + [ + "

This report validates Vectara API functionality for upgrade verification.

", + "

Tests cover: Authentication, Corpus Management, Indexing, Query/Search, and Agents APIs.

", + ] + ) diff --git a/tests/services/agents/conftest.py b/tests/services/agents/conftest.py new file mode 100644 index 0000000..c72cdc9 --- /dev/null +++ b/tests/services/agents/conftest.py @@ -0,0 +1,141 @@ +""" +Agent-specific fixtures. + +Provides a module-scoped corpus with agent-focused documents and a reusable +shared agent for execution and session tests. CRUD tests create their own +agents per-test since they mutate agent state. +""" + +import logging +import uuid + +import pytest + +from utils.waiters import wait_for + +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope="module") +def shared_agent_corpus(client): + """Module-scoped corpus with agent-focused docs.""" + corpus_key = f"agent_corpus_{uuid.uuid4().hex}" + + response = client.create_corpus( + name=f"Agent Test Corpus {uuid.uuid4().hex[:8]}", + key=corpus_key, + description="Shared agent test corpus", + ) + if not response.success: + pytest.skip(f"Could not create agent corpus: {response.data}") + + actual_key = response.data.get("key", corpus_key) + + docs = [ + { + "id": f"agent_doc_{uuid.uuid4().hex[:8]}", + "text": "Vectara is a trusted AI platform for enterprise search and RAG applications.", + "metadata": {"topic": "overview"}, + }, + { + "id": f"agent_doc_{uuid.uuid4().hex[:8]}", + "text": "To get started with Vectara, create an account and obtain an API key with QueryService and IndexService permissions.", + "metadata": {"topic": "getting_started"}, + }, + { + "id": f"agent_doc_{uuid.uuid4().hex[:8]}", + "text": "Vectara agents provide conversational AI experiences maintaining context across multiple turns.", + "metadata": {"topic": "agents"}, + }, + ] + + doc_ids = [] + for doc in docs: + resp = client.index_document( + corpus_key=actual_key, + document_id=doc["id"], + text=doc["text"], + metadata=doc["metadata"], + ) + if resp.success: + doc_ids.append(doc["id"]) + + wait_for( + lambda: client.list_documents(actual_key, limit=1).data.get("documents", []), + timeout=15, + interval=1, + description="agent corpus documents to be indexed", + ) + + yield actual_key + + for doc_id in doc_ids: + try: + client.delete_document(actual_key, doc_id) + except Exception: + pass + try: + client.delete_corpus(actual_key) + except Exception: + pass + + +@pytest.fixture(scope="module") +def shared_agent(client, shared_agent_corpus): + """Module-scoped agent for execution and session tests. + + Do NOT use for tests that mutate agent properties (update, delete, identity). + Those tests should create their own agent. + """ + agent_key = f"test_agent_{uuid.uuid4().hex[:8]}" + + response = client.create_agent( + name=f"Shared Test Agent {uuid.uuid4().hex[:8]}", + corpus_keys=[shared_agent_corpus], + description="Shared agent for execution testing", + ) + + # Fallback to minimal agent + if not response.success: + response = client.create_agent( + name=f"Shared Test Agent {uuid.uuid4().hex[:8]}", + description="Shared agent for execution testing", + ) + + if not response.success: + pytest.skip(f"Could not create shared agent: {response.data}") + + agent_id = response.data.get("id") or response.data.get("agent_id") or response.data.get("key") + if not agent_id: + pytest.skip("No agent key in response") + + yield agent_id + + try: + client.delete_agent(agent_id) + except Exception: + pass + + +@pytest.fixture +def agent_with_session(client, shared_agent): + """Create a session on shared_agent, send a message, yield (agent_key, session_key, events).""" + session_resp = client.create_agent_session(shared_agent) + if not session_resp.success: + pytest.skip(f"Could not create agent session: {session_resp.data}") + + session_key = session_resp.data.get("key") + + # Send a message to generate events + client.execute_agent(agent_id=shared_agent, query_text="Setup message", session_id=session_key) + + # List events + events_resp = client.list_session_events(shared_agent, session_key) + events = events_resp.data.get("events", []) if events_resp.success else [] + + yield shared_agent, session_key, events + + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass diff --git a/tests/services/agents/test_agent_config_update.py b/tests/services/agents/test_agent_config_update.py new file mode 100644 index 0000000..660297c --- /dev/null +++ b/tests/services/agents/test_agent_config_update.py @@ -0,0 +1,80 @@ +""" +Agent Configuration Update Tests + +Tests for updating agent model, tools, instructions, metadata, and enabled state. +""" + +import uuid + +import pytest + + +@pytest.mark.core +class TestAgentConfigUpdate: + """Agent configuration update operations.""" + + def _create_test_agent(self, client, unique_id): + """Create a temporary agent for testing updates.""" + name = f"Config Test Agent {unique_id}" + resp = client.create_agent(name=name, description="Agent for config update tests") + assert resp.success, f"Create agent failed: {resp.status_code} - {resp.data}" + agent_id = resp.data.get("id") or resp.data.get("key") + assert agent_id, f"No agent id in create response: {resp.data}" + return agent_id + + def test_update_agent_description(self, client, unique_id): + """Test updating agent description and verifying persistence.""" + agent_id = self._create_test_agent(client, unique_id) + try: + new_desc = f"Updated description {unique_id}" + update_resp = client.update_agent(agent_id, description=new_desc) + assert update_resp.success, f"Update failed: {update_resp.status_code}" + + get_resp = client.get_agent(agent_id) + assert get_resp.success + assert get_resp.data.get("description") == new_desc + finally: + try: + client.delete_agent(agent_id) + except Exception: + pass + + def test_update_agent_metadata(self, client, unique_id): + """Test updating agent metadata.""" + agent_id = self._create_test_agent(client, unique_id) + try: + metadata = {"environment": "test", "version": "1.0"} + update_resp = client.update_agent(agent_id, metadata=metadata) + assert update_resp.success, f"Update metadata failed: {update_resp.status_code}" + + get_resp = client.get_agent(agent_id) + assert get_resp.success + agent_metadata = get_resp.data.get("metadata", {}) + assert agent_metadata.get("environment") == "test", f"Metadata not persisted: {agent_metadata}" + finally: + try: + client.delete_agent(agent_id) + except Exception: + pass + + def test_enable_disable_agent(self, client, unique_id): + """Test disabling and re-enabling an agent.""" + agent_id = self._create_test_agent(client, unique_id) + try: + disable_resp = client.update_agent(agent_id, enabled=False) + assert disable_resp.success, f"Disable failed: {disable_resp.status_code}" + + get_resp = client.get_agent(agent_id) + assert get_resp.success + assert get_resp.data.get("enabled") is False, f"Expected disabled, got: {get_resp.data.get('enabled')}" + + enable_resp = client.update_agent(agent_id, enabled=True) + assert enable_resp.success + + get_resp2 = client.get_agent(agent_id) + assert get_resp2.data.get("enabled") is True + finally: + try: + client.delete_agent(agent_id) + except Exception: + pass diff --git a/tests/services/agents/test_agent_context_preservation.py b/tests/services/agents/test_agent_context_preservation.py new file mode 100644 index 0000000..3935fd9 --- /dev/null +++ b/tests/services/agents/test_agent_context_preservation.py @@ -0,0 +1,109 @@ +""" +Agent Context Preservation Tests + +Verify multi-turn context is retained across 3+ turns and +that context is not shared between separate sessions. +""" + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestAgentContextPreservation: + """Multi-turn context retention tests.""" + + def test_three_turn_context_preservation(self, client, shared_agent): + """Send 3 turns, verify the 3rd turn retains context from turn 1.""" + session_resp = client.create_agent_session(shared_agent) + assert session_resp.success, f"Create session failed: {session_resp.status_code} - {session_resp.data}" + + session_key = session_resp.data.get("key") + try: + wait_for( + lambda: client.get_agent_session(shared_agent, session_key).success, + timeout=10, + interval=0.5, + description="session available", + ) + + turn1 = client.execute_agent( + shared_agent, + "My name is Alexander and I work at Acme Corp.", + session_id=session_key, + ) + assert turn1.success, f"Turn 1 failed: {turn1.status_code} - {turn1.data}" + + turn2 = client.execute_agent( + shared_agent, + "I'm interested in semantic search technology.", + session_id=session_key, + ) + assert turn2.success, f"Turn 2 failed: {turn2.status_code} - {turn2.data}" + + turn3 = client.execute_agent( + shared_agent, + "What company do I work at and what technology am I interested in?", + session_id=session_key, + ) + assert turn3.success, f"Turn 3 failed: {turn3.status_code} - {turn3.data}" + + events = turn3.data.get("events", []) + output_events = [e for e in events if e.get("type") == "agent_output"] + output_text = " ".join(e.get("content", "") for e in output_events).lower() + + assert "acme" in output_text, f"Turn 3 should reference 'Acme' from turn 1, got: {output_text[:200]}" + assert "semantic" in output_text or "search" in output_text, f"Turn 3 should reference 'semantic search' from turn 2, got: {output_text[:200]}" + finally: + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_context_not_shared_across_sessions(self, client, shared_agent): + """Verify context from session A does not leak into session B.""" + session_a = client.create_agent_session(shared_agent) + session_b = client.create_agent_session(shared_agent) + + assert session_a.success, f"Create session A failed: {session_a.status_code} - {session_a.data}" + assert session_b.success, f"Create session B failed: {session_b.status_code} - {session_b.data}" + + key_a = session_a.data.get("key") + key_b = session_b.data.get("key") + + try: + for key in [key_a, key_b]: + wait_for( + lambda k=key: client.get_agent_session(shared_agent, k).success, + timeout=10, + interval=0.5, + description=f"session {key} available", + ) + + resp_a = client.execute_agent( + shared_agent, + "Remember this secret code: XYLOPHONE-7749. My pet iguana is named Bartholomew.", + session_id=key_a, + ) + assert resp_a.success, f"Session A message failed: {resp_a.data}" + + resp_b = client.execute_agent( + shared_agent, + "What is my secret code? What is my pet's name?", + session_id=key_b, + ) + assert resp_b.success, f"Session B message failed: {resp_b.data}" + + events_b = resp_b.data.get("events", []) + output_b = " ".join(e.get("content", "") for e in events_b if e.get("type") == "agent_output").lower() + + assert "xylophone" not in output_b and "7749" not in output_b, f"Session B should NOT know session A's secret code, but got: {output_b[:200]}" + assert "bartholomew" not in output_b, f"Session B should NOT know session A's pet name, but got: {output_b[:200]}" + finally: + for key in [key_a, key_b]: + if key: + try: + client.delete_agent_session(shared_agent, key) + except Exception: + pass diff --git a/tests/services/agents/test_agent_corpora_search.py b/tests/services/agents/test_agent_corpora_search.py new file mode 100644 index 0000000..0dd5cef --- /dev/null +++ b/tests/services/agents/test_agent_corpora_search.py @@ -0,0 +1,104 @@ +""" +Agent Corpora Search Tool Tests + +The #1 user journey: create an agent with a corpora_search tool, +ask questions, verify the agent uses corpus content in its answers. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestAgentCorporaSearch: + """Agent with corpora_search tool — core product flow.""" + + def _create_agent_with_search_tool(self, client, corpus_key, unique_id): + """Create an agent configured with a corpora_search tool.""" + agent_key = f"search_agent_{unique_id}" + resp = client.create_agent( + name=f"Search Agent {unique_id}", + agent_key=agent_key, + tool_configurations={ + "corpus_search": { + "type": "corpora_search", + "query_configuration": { + "search": { + "corpora": [{"corpus_key": corpus_key}], + }, + }, + }, + }, + ) + return resp, agent_key + + def test_create_agent_with_corpora_search_tool(self, client, seeded_corpus, unique_id): + """Create agent with corpora_search tool, verify config persisted.""" + resp, agent_key = self._create_agent_with_search_tool(client, seeded_corpus, unique_id) + assert resp.success, f"Create agent with search tool failed: {resp.status_code} - {resp.data}" + + try: + get_resp = client.get_agent(agent_key) + assert get_resp.success, f"GET agent failed: {get_resp.status_code}" + + tool_configs = get_resp.data.get("tool_configurations", {}) + if isinstance(tool_configs, dict): + has_search_tool = any(tc.get("type") == "corpora_search" for tc in tool_configs.values()) + config_types = [tc.get("type") for tc in tool_configs.values()] + else: + has_search_tool = any(tc.get("type") == "corpora_search" for tc in tool_configs) + config_types = [tc.get("type") for tc in tool_configs] + assert has_search_tool, f"Agent should have corpora_search tool, got: {config_types}" + finally: + try: + client.delete_agent(agent_key) + except Exception: + pass + + def test_agent_corpora_search_returns_corpus_content(self, client, seeded_corpus, unique_id): + """Send question to agent with search tool, verify answer uses corpus content.""" + resp, agent_key = self._create_agent_with_search_tool(client, seeded_corpus, unique_id) + assert resp.success, f"Create agent failed: {resp.status_code} - {resp.data}" + + try: + session_resp = client.create_agent_session(agent_key) + assert session_resp.success, f"Create session failed: {session_resp.status_code} - {session_resp.data}" + + session_key = session_resp.data.get("key") + wait_for( + lambda: client.get_agent_session(agent_key, session_key).success, + timeout=10, + interval=0.5, + description="session available", + ) + + msg_resp = client.execute_agent( + agent_key, + "What is vector search and how does it work?", + session_id=session_key, + ) + assert msg_resp.success, f"Agent execution failed: {msg_resp.status_code} - {msg_resp.data}" + + events = msg_resp.data.get("events", []) + assert len(events) > 0, f"Expected events in response: {msg_resp.data}" + + event_types = [e.get("type") for e in events] + has_output = any(t == "agent_output" for t in event_types) + assert has_output, f"Expected agent_output event, got types: {event_types}" + + output_events = [e for e in events if e.get("type") == "agent_output"] + output_text = " ".join(e.get("content", "") for e in output_events).lower() + assert len(output_text) > 20, f"Agent output should be substantive, got: {output_text[:100]}" + + try: + client.delete_agent_session(agent_key, session_key) + except Exception: + pass + finally: + try: + client.delete_agent(agent_key) + except Exception: + pass diff --git a/tests/services/agents/test_agent_crud.py b/tests/services/agents/test_agent_crud.py new file mode 100644 index 0000000..b7111f0 --- /dev/null +++ b/tests/services/agents/test_agent_crud.py @@ -0,0 +1,177 @@ +""" +Agent CRUD Tests + +Tests for agent create, read, update, delete, and listing operations. +""" + +import time + +import pytest + + +@pytest.mark.sanity +class TestAgentList: + """Agent listing checks.""" + + def test_list_agents(self, client): + """Test listing all agents.""" + response = client.list_agents(limit=10) + + assert response.success, f"List agents failed: {response.status_code} - {response.data}" + assert "agents" in response.data, f"Response missing 'agents' key: {response.data}" + assert isinstance(response.data["agents"], list), f"Expected list, got {type(response.data['agents'])}" + + +@pytest.mark.core +class TestAgentCrud: + """Agent create, get, update, and delete checks.""" + + def test_create_agent(self, client, shared_agent_corpus, unique_id): + """Test creating a new agent.""" + agent_name = f"Test Agent {unique_id}" + + response = client.create_agent( + name=agent_name, + corpus_keys=[shared_agent_corpus], + description="Test agent created by API test suite", + ) + + assert response.success, f"Create agent failed: {response.status_code} - {response.data}" + assert response.data.get("name") == agent_name, f"Expected name {agent_name!r}, got {response.data.get('name')!r}" + assert response.data.get("id") is not None or response.data.get("key") is not None, f"Response missing 'id' or 'key': {response.data}" + + # Get agent ID for cleanup + agent_id = response.data.get("id") or response.data.get("agent_id") or response.data.get("key") + if agent_id: + try: + client.delete_agent(agent_id) + except Exception: + pass + + def test_create_agent_with_config(self, client, shared_agent_corpus, unique_id): + """Test creating an agent with custom configuration.""" + agent_name = f"Configured Agent {unique_id}" + + response = client.create_agent( + name=agent_name, + corpus_keys=[shared_agent_corpus], + description="Agent with custom settings", + ) + + assert response.success, f"Create configured agent failed: {response.status_code} - {response.data}" + assert ( + response.data.get("description") == "Agent with custom settings" + ), f"Expected description 'Agent with custom settings', got {response.data.get('description')!r}" + + agent_id = response.data.get("id") or response.data.get("agent_id") or response.data.get("key") + if agent_id: + try: + client.delete_agent(agent_id) + except Exception: + pass + + def test_get_agent(self, client, shared_agent_corpus, unique_id): + """Test retrieving agent details.""" + # First create an agent + create_response = client.create_agent( + name=f"Get Test Agent {unique_id}", + corpus_keys=[shared_agent_corpus], + ) + + # Fallback to minimal agent + if not create_response.success: + create_response = client.create_agent( + name=f"Get Test Agent {unique_id}", + ) + + if not create_response.success: + pytest.skip(f"Could not create agent for get test: {create_response.data}") + + agent_id = create_response.data.get("id") or create_response.data.get("agent_id") or create_response.data.get("key") + if not agent_id: + pytest.skip("No agent_id in create response") + + try: + # Get the agent + response = client.get_agent(agent_id) + + assert response.success, f"Get agent failed: {response.status_code} - {response.data}" + assert ( + response.data.get("key") == agent_id or response.data.get("id") == agent_id + ), f"Expected agent id {agent_id!r}, got key={response.data.get('key')!r}, id={response.data.get('id')!r}" + assert response.data.get("name") is not None, f"Response missing 'name': {response.data}" + finally: + # Cleanup + client.delete_agent(agent_id) + + def test_update_agent(self, client, shared_agent_corpus, unique_id): + """Test updating an agent.""" + # Create agent + create_response = client.create_agent( + name=f"Update Test Agent {unique_id}", + corpus_keys=[shared_agent_corpus], + description="Original description", + ) + + # Fallback to minimal agent + if not create_response.success: + create_response = client.create_agent( + name=f"Update Test Agent {unique_id}", + description="Original description", + ) + + if not create_response.success: + pytest.skip(f"Could not create agent for update test: {create_response.data}") + + agent_id = create_response.data.get("id") or create_response.data.get("agent_id") or create_response.data.get("key") + if not agent_id: + pytest.skip("No agent_id in create response") + + try: + # Update the agent + new_description = f"Updated description at {time.time()}" + update_response = client.update_agent( + agent_id=agent_id, + description=new_description, + ) + + assert update_response.success, f"Update agent failed: {update_response.status_code} - {update_response.data}" + + get_resp = client.get_agent(agent_id) + assert get_resp.success, f"GET after update failed: {get_resp.status_code}" + assert ( + get_resp.data.get("description") == new_description + ), f"Description not persisted: expected {new_description!r}, got {get_resp.data.get('description')!r}" + finally: + # Cleanup + client.delete_agent(agent_id) + + def test_delete_agent(self, client, shared_agent_corpus, unique_id): + """Test deleting an agent.""" + # Create agent to delete + create_response = client.create_agent( + name=f"Delete Test Agent {unique_id}", + corpus_keys=[shared_agent_corpus], + ) + + # Fallback to minimal agent + if not create_response.success: + create_response = client.create_agent( + name=f"Delete Test Agent {unique_id}", + ) + + if not create_response.success: + pytest.skip(f"Could not create agent for delete test: {create_response.data}") + + agent_id = create_response.data.get("id") or create_response.data.get("agent_id") or create_response.data.get("key") + if not agent_id: + pytest.skip("No agent_id in create response") + + # Delete the agent + delete_response = client.delete_agent(agent_id) + + assert delete_response.success, f"Delete agent failed: {delete_response.status_code} - {delete_response.data}" + + # Verify deletion + get_response = client.get_agent(agent_id) + assert get_response.status_code == 404, f"Deleted agent should return 404, got {get_response.status_code}" diff --git a/tests/services/agents/test_agent_error_cases.py b/tests/services/agents/test_agent_error_cases.py new file mode 100644 index 0000000..76c52aa --- /dev/null +++ b/tests/services/agents/test_agent_error_cases.py @@ -0,0 +1,70 @@ +""" +Agent Error Case Tests + +Tests for error handling on non-existent agents and sessions. +Ported from AgentSessionIntegrationTest.java error case tests. +""" + +import uuid + +import pytest + + +@pytest.mark.regression +class TestAgentErrorCases: + """Error handling for invalid agent/session operations.""" + + def test_send_message_nonexistent_session(self, client, shared_agent): + """testNonSseInputOnNonExistentSession — 404 for bad session.""" + resp = client.execute_agent( + shared_agent, + "Hello", + session_id=f"ase_fake_{uuid.uuid4().hex[:8]}", + ) + assert resp.status_code == 404, f"Expected 404 for non-existent session, got {resp.status_code}: {resp.data}" + + def test_send_message_nonexistent_agent(self, client): + """testNonSseInputOnNonExistentAgent — 404 for bad agent.""" + resp = client.post( + f"/v2/agents/nonexistent_{uuid.uuid4().hex[:8]}/sessions/fake_session/events", + data={ + "type": "input_message", + "messages": [{"type": "text", "content": "Hello"}], + }, + ) + assert resp.status_code == 404, f"Expected 404 for non-existent agent, got {resp.status_code}: {resp.data}" + + def test_fork_session_continue_conversation(self, client, agent_with_session): + """forkSession_withoutCompaction_newSessionCanContinueConversation.""" + agent_key, session_key, events = agent_with_session + + fork_resp = client.create_agent_session( + agent_key, + from_session={"session_key": session_key}, + ) + if not fork_resp.success: + pytest.skip(f"Fork failed: {fork_resp.data}") + + forked_key = fork_resp.data.get("key") + try: + from utils.waiters import wait_for + + wait_for( + lambda: client.get_agent_session(agent_key, forked_key).success, + timeout=10, + interval=0.5, + description="forked session available", + ) + + msg_resp = client.execute_agent(agent_key, "Continue the conversation", session_id=forked_key) + assert msg_resp.success, f"Should be able to chat in forked session: {msg_resp.status_code} - {msg_resp.data}" + + response_events = msg_resp.data.get("events", []) + has_output = any(e.get("type") == "agent_output" for e in response_events) + assert has_output, f"Forked session response should have agent_output: {[e.get('type') for e in response_events]}" + finally: + if forked_key: + try: + client.delete_agent_session(agent_key, forked_key) + except Exception: + pass diff --git a/tests/services/agents/test_agent_execution.py b/tests/services/agents/test_agent_execution.py new file mode 100644 index 0000000..dfd99e5 --- /dev/null +++ b/tests/services/agents/test_agent_execution.py @@ -0,0 +1,106 @@ +""" +Agent Execution Tests + +Tests for executing queries against agents, multi-turn conversations, +response time, and edge cases. +""" + +import pytest + + +@pytest.mark.core +class TestAgentExecution: + """Agent execution checks.""" + + def test_execute_agent_query(self, client, shared_agent): + """Test executing a query against an agent.""" + response = client.execute_agent( + agent_id=shared_agent, + query_text="What is Vectara?", + ) + + assert response.success, f"Agent execution failed: {response.status_code} - {response.data}" + events = response.data.get("events", []) + assert len(events) > 0, f"Expected events in agent response, got: {response.data}" + + def test_execute_agent_with_context(self, client, shared_agent): + """Test multi-turn conversation with an agent.""" + # First turn + response1 = client.execute_agent( + agent_id=shared_agent, + query_text="Tell me about Vectara agents.", + ) + + assert response1.success, f"First turn failed: {response1.status_code} - {response1.data}" + events1 = response1.data.get("events", []) + assert len(events1) > 0, f"Expected events in turn 1 response, got: {response1.data}" + + # Get session ID if available for follow-up + session_id = response1.data.get("session_id") + + # Second turn (follow-up) + response2 = client.execute_agent( + agent_id=shared_agent, + query_text="How do I configure them?", + session_id=session_id, + ) + + assert response2.success, f"Follow-up turn failed: {response2.status_code} - {response2.data}" + events2 = response2.data.get("events", []) + assert len(events2) > 0, f"Expected events in turn 2 response, got: {response2.data}" + + def test_execute_agent_response_time(self, client, shared_agent): + """Test that agent execution completes in acceptable time.""" + response = client.execute_agent( + agent_id=shared_agent, + query_text="What is semantic search?", + ) + + assert response.success, f"Agent execution failed: {response.status_code}" + + # Agent responses involve LLM generation, allow more time + assert response.elapsed_ms < 60000, f"Agent execution took too long: {response.elapsed_ms:.1f}ms" + + +@pytest.mark.regression +class TestAgentExecutionEdgeCases: + """Agent execution edge cases.""" + + def test_execute_nonexistent_agent(self, client): + """Test executing against a non-existent agent.""" + response = client.execute_agent( + agent_id="nonexistent_agent_xyz123", + query_text="test query", + ) + + assert not response.success, "Execution against non-existent agent should fail" + assert response.status_code in [400, 404], f"Expected 400 or 404, got {response.status_code}" + + def test_agent_handles_special_characters(self, client, shared_agent): + """Test agent handles queries with special characters.""" + response = client.execute_agent( + agent_id=shared_agent, + query_text="What's Vectara's approach to AI & machine-learning?", + ) + + assert response.success, f"Special character query failed: {response.status_code}" + events = response.data.get("events", []) + assert len(events) > 0, "Expected events for special character query" + + def test_agent_handles_long_query(self, client, shared_agent): + """Test agent handles longer queries.""" + long_query = ( + "I'm trying to understand how Vectara's conversational AI agents work. " + "Can you explain the process of creating an agent, configuring it with " + "multiple corpora, and then using it for multi-turn conversations? " + "I'm particularly interested in how context is maintained across turns." + ) + + response = client.execute_agent( + agent_id=shared_agent, + query_text=long_query, + ) + + assert response.success, f"Long query failed: {response.status_code}" + events = response.data.get("events", []) + assert len(events) > 0, "Expected events for long query" diff --git a/tests/services/agents/test_agent_execution_streaming.py b/tests/services/agents/test_agent_execution_streaming.py new file mode 100644 index 0000000..6cea519 --- /dev/null +++ b/tests/services/agents/test_agent_execution_streaming.py @@ -0,0 +1,49 @@ +""" +Agent Execution Streaming Tests + +Tests for agent execution event responses, verifying events arrive correctly. +The agent events endpoint returns JSON with an events array (not SSE). +""" + +import pytest + + +@pytest.mark.core +class TestAgentExecutionStreaming: + """Core tests for agent execution event responses.""" + + def test_execute_agent_sse(self, client, shared_agent): + """Send message to agent and verify events arrive in response.""" + session_resp = client.create_agent_session(shared_agent) + if not session_resp.success: + pytest.skip(f"Could not create session: {session_resp.data}") + session_key = session_resp.data.get("key") + + from utils.waiters import wait_for + + wait_for( + lambda: client.get_agent_session(shared_agent, session_key).success, + timeout=10, + interval=0.5, + description="session to be available", + ) + + response = client.execute_agent(shared_agent, "What is Vectara?", session_id=session_key) + + assert response.success, f"Agent execution failed: {response.status_code} - {response.data}" + + events = response.data.get("events", []) + assert len(events) > 0, f"Expected at least one event, got: {response.data}" + + event_types = [e.get("type") for e in events] + has_output = any("output" in et or "message" in et for et in event_types if et) + assert has_output, f"No output event found. Event types: {event_types}" + + output_events = [e for e in events if "output" in e.get("type", "") or "message" in e.get("type", "")] + has_content = any(e.get("content") or e.get("data") or e.get("messages") for e in output_events) + assert has_content, f"Output events have no content: {output_events}" + + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass diff --git a/tests/services/agents/test_agent_identity.py b/tests/services/agents/test_agent_identity.py new file mode 100644 index 0000000..d6d4a06 --- /dev/null +++ b/tests/services/agents/test_agent_identity.py @@ -0,0 +1,49 @@ +""" +Agent Identity Tests + +Tests for agent identity configuration: get, update mode, and error handling. +""" + +import uuid + +import pytest + + +@pytest.mark.core +class TestAgentIdentity: + """Core tests for agent identity configuration.""" + + def test_get_agent_identity(self, client, shared_agent): + """Verify agent identity endpoint returns a response.""" + resp = client.get_agent_identity(shared_agent) + # Some agents may not have identity configured -- just verify the endpoint works + assert resp.status_code in [200, 404], f"Unexpected status: {resp.status_code} - {resp.data}" + if resp.status_code == 200: + assert "mode" in resp.data, f"Identity response missing 'mode' field: {resp.data}" + + def test_update_agent_identity_mode(self, client, shared_agent_corpus, unique_id): + """Update agent identity mode from AUTO to MANUAL and back.""" + agent_resp = client.create_agent( + name=f"Identity Test {unique_id}", + description="Agent for identity testing", + ) + if not agent_resp.success: + pytest.skip(f"Could not create agent: {agent_resp.data}") + + agent_key = agent_resp.data.get("key") or agent_resp.data.get("id") + + try: + # Update to manual mode + update_resp = client.update_agent_identity(agent_key, mode="manual") + # Accept either success or 404 (if identity not supported) + if update_resp.status_code == 404: + pytest.skip("Agent identity not available in this environment") + assert update_resp.success, f"Update identity failed: {update_resp.data}" + + # Verify PATCH response contains the updated mode + assert update_resp.data.get("mode") == "manual", f"Expected manual in PATCH response, got: {update_resp.data}" + finally: + try: + client.delete_agent(agent_key) + except Exception: + pass diff --git a/tests/services/agents/test_agent_sessions.py b/tests/services/agents/test_agent_sessions.py new file mode 100644 index 0000000..6d5ce1b --- /dev/null +++ b/tests/services/agents/test_agent_sessions.py @@ -0,0 +1,28 @@ +""" +Agent Session Tests + +Core-level tests for agent session management. +""" + +import pytest + + +@pytest.mark.core +class TestAgentSessions: + """Core checks for agent session operations.""" + + def test_list_agent_sessions(self, client, shared_agent): + """Test listing sessions for an agent.""" + # First execute a query to create a session + client.execute_agent( + agent_id=shared_agent, + query_text="Create a session", + ) + + # List sessions + response = client.list_agent_sessions(shared_agent, limit=10) + + assert response.success, f"List sessions failed: {response.status_code} - {response.data}" + assert "sessions" in response.data, f"Response missing 'sessions' key: {response.data}" + sessions = response.data["sessions"] + assert isinstance(sessions, list), f"Expected list, got {type(sessions)}" diff --git a/tests/services/agents/test_agent_sessions_advanced.py b/tests/services/agents/test_agent_sessions_advanced.py new file mode 100644 index 0000000..06b1990 --- /dev/null +++ b/tests/services/agents/test_agent_sessions_advanced.py @@ -0,0 +1,49 @@ +""" +Agent Session Advanced Tests + +Core tests for agent session creation with metadata and message sending. +""" + +import pytest + + +@pytest.mark.core +class TestAgentSessionAdvanced: + def test_create_session_with_metadata(self, client, shared_agent): + resp = client.create_agent_session(shared_agent, metadata={"topic": "astronomy", "test": True}) + assert resp.success, f"Create session with metadata failed: {resp.data}" + session_key = resp.data.get("key") + + # Verify session exists and metadata returned + get_resp = client.get_agent_session(shared_agent, session_key) + assert get_resp.success + session_metadata = get_resp.data.get("metadata", {}) + assert session_metadata.get("topic") == "astronomy", f"Expected metadata topic=astronomy, got: {session_metadata}" + + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_send_message_to_session(self, client, shared_agent): + session_resp = client.create_agent_session(shared_agent) + if not session_resp.success: + pytest.skip(f"Could not create session: {session_resp.data}") + session_key = session_resp.data.get("key") + + # Send message via execute_agent with explicit session + exec_resp = client.execute_agent( + agent_id=shared_agent, + query_text="Tell me about vector search", + session_id=session_key, + ) + assert exec_resp.success, f"Send message failed: {exec_resp.data}" + + # Verify response has events with content + events = exec_resp.data.get("events", []) + assert len(events) > 0, f"Expected events in response, got: {exec_resp.data.keys()}" + + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass diff --git a/tests/services/agents/test_compaction.py b/tests/services/agents/test_compaction.py new file mode 100644 index 0000000..ce0d52b --- /dev/null +++ b/tests/services/agents/test_compaction.py @@ -0,0 +1,243 @@ +""" +Agent Session Compaction Tests + +Tests for manual compaction, compaction config on agents, and fork-with-compaction. +Ported from AgentSessionIntegrationTest.java compaction tests. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestCompactionConfig: + """Agent compaction configuration tests.""" + + def test_create_agent_with_compaction_config(self, client, unique_id): + """Verify compaction config persists on agent creation.""" + agent_key = f"compact_cfg_{unique_id}" + resp = client.create_agent( + name=f"Compaction Agent {unique_id}", + agent_key=agent_key, + compaction={ + "enabled": True, + "threshold_percent": 70, + "keep_recent_inputs": 2, + }, + ) + assert resp.success, f"Create agent with compaction failed: {resp.status_code} - {resp.data}" + + try: + get_resp = client.get_agent(agent_key) + assert get_resp.success + compaction = get_resp.data.get("compaction", {}) + assert compaction.get("enabled") is True, f"Compaction should be enabled: {compaction}" + assert compaction.get("threshold_percent") == 70, f"Threshold should be 70: {compaction}" + assert compaction.get("keep_recent_inputs") == 2, f"keep_recent_inputs should be 2: {compaction}" + finally: + try: + client.delete_agent(agent_key) + except Exception: + pass + + def test_update_agent_compaction_config(self, client, unique_id): + """Verify compaction config can be updated on an existing agent.""" + agent_key = f"compact_upd_{unique_id}" + resp = client.create_agent( + name=f"Compaction Update {unique_id}", + agent_key=agent_key, + ) + assert resp.success, f"Create agent failed: {resp.status_code} - {resp.data}" + + try: + update_resp = client.update_agent( + agent_key, + compaction={"enabled": True, "threshold_percent": 60, "keep_recent_inputs": 3}, + ) + assert update_resp.success, f"Update compaction config failed: {update_resp.status_code} - {update_resp.data}" + + get_resp = client.get_agent(agent_key) + compaction = get_resp.data.get("compaction", {}) + assert compaction.get("enabled") is True + assert compaction.get("threshold_percent") == 60 + finally: + try: + client.delete_agent(agent_key) + except Exception: + pass + + +@pytest.mark.core +class TestManualCompaction: + """Manual compaction via the events endpoint.""" + + def test_manual_compaction_on_session(self, client, unique_id): + """manualCompaction — create agent with keep_recent_inputs=1, send 3 turns, compact.""" + agent_key = f"compact_manual_{unique_id}" + agent_resp = client.create_agent( + name=f"Compact Manual {unique_id}", + agent_key=agent_key, + compaction={"enabled": True, "threshold_percent": 50, "keep_recent_inputs": 1}, + ) + assert agent_resp.success, f"Create agent failed: {agent_resp.status_code} - {agent_resp.data}" + + try: + session_resp = client.create_agent_session(agent_key) + assert session_resp.success, f"Create session failed: {session_resp.status_code} - {session_resp.data}" + + session_key = session_resp.data.get("key") + try: + wait_for( + lambda: client.get_agent_session(agent_key, session_key).success, + timeout=10, + interval=0.5, + description="session available", + ) + + for i, msg in enumerate(["Tell me about AI", "What about machine learning?", "How do neural networks work?"], 1): + turn = client.execute_agent(agent_key, msg, session_id=session_key) + assert turn.success, f"Turn {i} failed: {turn.status_code} - {turn.data}" + + wait_for( + lambda: len(client.list_session_events(agent_key, session_key, limit=100).data.get("events", [])) >= 6, + timeout=20, + interval=2, + description="at least 6 events (3 turns) to be committed", + ) + + events_before = client.list_session_events(agent_key, session_key, limit=100) + visible_before = len(events_before.data.get("events", [])) + + compact_resp = client.compact_session(agent_key, session_key) + assert compact_resp.success or compact_resp.status_code == 201, f"Compact failed: {compact_resp.status_code} - {compact_resp.data}" + + compact_events = compact_resp.data.get("events", []) + compact_types = [e.get("type") for e in compact_events] + assert ( + "compaction" in compact_types or "compaction_started" in compact_types + ), f"Expected compaction event in response, got types: {compact_types}" + + all_events = client.list_session_events(agent_key, session_key, limit=100, include_hidden=True) + total_after = len(all_events.data.get("events", [])) + assert total_after >= visible_before, f"Hidden events should still exist: total={total_after} visible_before={visible_before}" + finally: + try: + client.delete_agent_session(agent_key, session_key) + except Exception: + pass + finally: + try: + client.delete_agent(agent_key) + except Exception: + pass + + def test_manual_compaction_not_enough_turns(self, client, shared_agent): + """manualCompaction_streamingNotEnoughTurns_returnsError — compact empty/single-turn session.""" + session_resp = client.create_agent_session(shared_agent) + assert session_resp.success, f"Create session failed: {session_resp.status_code} - {session_resp.data}" + + session_key = session_resp.data.get("key") + try: + wait_for( + lambda: client.get_agent_session(shared_agent, session_key).success, + timeout=10, + interval=0.5, + description="session available", + ) + + compact_resp = client.compact_session(shared_agent, session_key) + compact_events = compact_resp.data.get("events", []) if compact_resp.success else [] + has_error = any(e.get("type") == "error" for e in compact_events) + + assert ( + not compact_resp.success or has_error + ), f"Compact on empty session should fail or return error event: {compact_resp.status_code} - {compact_resp.data}" + finally: + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + +@pytest.mark.core +class TestForkWithCompaction: + """Fork session with compaction — ported from forkSession_withCompaction_compactsEvents.""" + + def test_fork_with_compaction(self, client, agent_with_session): + """Fork a session with compact_up_to_event_id and verify compaction occurs.""" + agent_key, session_key, events = agent_with_session + + if len(events) == 0: + pytest.skip("No events in source session to compact") + + first_event_id = events[0].get("id") + if not first_event_id: + pytest.skip("Could not get first event ID") + + fork_resp = client.create_agent_session( + agent_key, + from_session={ + "session_key": session_key, + "compact_up_to_event_id": first_event_id, + }, + ) + assert fork_resp.success, f"Fork with compaction failed: {fork_resp.status_code} - {fork_resp.data}" + + forked_key = fork_resp.data.get("key") + try: + forked_events = client.list_session_events(agent_key, forked_key, limit=100) + assert forked_events.success + forked_list = forked_events.data.get("events", []) + forked_types = [e.get("type") for e in forked_list] + assert "compaction" in forked_types, f"Forked session should contain compaction event, got types: {forked_types}" + finally: + if forked_key: + try: + client.delete_agent_session(agent_key, forked_key) + except Exception: + pass + + def test_fork_include_up_to_event_id(self, client, agent_with_session): + """forkSession_includeUpToEventId_copiesOnlyEventsUpToThatId.""" + agent_key, session_key, events = agent_with_session + + if len(events) < 2: + pytest.skip("Need at least 2 events for include_up_to test") + + cutoff_event_id = events[0].get("id") + fork_resp = client.create_agent_session( + agent_key, + from_session={ + "session_key": session_key, + "include_up_to_event_id": cutoff_event_id, + }, + ) + assert fork_resp.success, f"Fork failed: {fork_resp.status_code} - {fork_resp.data}" + + forked_key = fork_resp.data.get("key") + try: + forked_events = client.list_session_events(agent_key, forked_key, limit=100) + forked_ids = [e.get("id") for e in forked_events.data.get("events", [])] + assert len(forked_ids) <= len(events), f"Forked session should have fewer or equal events: forked={len(forked_ids)} source={len(events)}" + finally: + if forked_key: + try: + client.delete_agent_session(agent_key, forked_key) + except Exception: + pass + + def test_fork_include_up_to_bad_event_id(self, client, agent_with_session): + """forkSession_includeUpToEventId_notFound_returnsBadRequest.""" + agent_key, session_key, _ = agent_with_session + + fork_resp = client.create_agent_session( + agent_key, + from_session={ + "session_key": session_key, + "include_up_to_event_id": "aev_nonexistent_fake_id", + }, + ) + assert fork_resp.status_code >= 400, f"Fork with bad event ID should fail: {fork_resp.status_code} - {fork_resp.data}" diff --git a/tests/services/agents/test_event_visibility.py b/tests/services/agents/test_event_visibility.py new file mode 100644 index 0000000..a27f3d8 --- /dev/null +++ b/tests/services/agents/test_event_visibility.py @@ -0,0 +1,75 @@ +""" +Agent Event Visibility Tests + +Tests for hiding and unhiding agent session events, including error handling. +""" + +import pytest + + +@pytest.mark.core +class TestEventVisibility: + """Core tests for hiding and unhiding agent events.""" + + def test_hide_and_unhide_event(self, client, shared_agent): + """Hide an event, verify excluded from listing, unhide, verify reappears.""" + session_resp = client.create_agent_session(shared_agent) + if not session_resp.success: + pytest.skip(f"Could not create session: {session_resp.data}") + session_key = session_resp.data.get("key") + + # Send message to generate events + client.execute_agent(agent_id=shared_agent, query_text="Hello for visibility test", session_id=session_key) + + # List events + events_resp = client.list_session_events(shared_agent, session_key) + assert events_resp.success + events = events_resp.data.get("events", []) + assert len(events) > 0, "Expected at least one event" + + event_id = events[0].get("id") + initial_count = len(events) + + # Hide + hide_resp = client.hide_event(shared_agent, session_key, event_id) + assert hide_resp.success, f"Hide failed: {hide_resp.data}" + + # Verify hidden from default listing + visible_resp = client.list_session_events(shared_agent, session_key) + visible_events = visible_resp.data.get("events", []) + assert len(visible_events) == initial_count - 1 + assert all(e.get("id") != event_id for e in visible_events) + + # Unhide + unhide_resp = client.unhide_event(shared_agent, session_key, event_id) + assert unhide_resp.success, f"Unhide failed: {unhide_resp.data}" + + # Verify reappears + after_resp = client.list_session_events(shared_agent, session_key) + after_events = after_resp.data.get("events", []) + assert len(after_events) == initial_count + + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + +@pytest.mark.regression +class TestEventVisibilityErrors: + """Regression tests for event visibility error handling.""" + + def test_hide_nonexistent_event_returns_404(self, client, shared_agent): + """Hiding a nonexistent event should return 404.""" + session_resp = client.create_agent_session(shared_agent) + if not session_resp.success: + pytest.skip("Could not create session") + session_key = session_resp.data.get("key") + + resp = client.hide_event(shared_agent, session_key, "aev_nonexistent") + assert resp.status_code == 404, f"Expected 404, got {resp.status_code}" + + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass diff --git a/tests/services/agents/test_session_crud.py b/tests/services/agents/test_session_crud.py new file mode 100644 index 0000000..5c71b04 --- /dev/null +++ b/tests/services/agents/test_session_crud.py @@ -0,0 +1,207 @@ +""" +Agent Session CRUD Tests + +Tests for session create, get, update, delete operations and error cases. +Ported from AgentSessionIntegrationTest.java. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestSessionCrud: + """Session create, get, update, delete operations.""" + + def test_create_session_returns_key(self, client, shared_agent): + """testCreateSession — verify session key is returned.""" + resp = client.create_agent_session(shared_agent) + assert resp.success, f"Create session failed: {resp.status_code} - {resp.data}" + + session_key = resp.data.get("key") + assert session_key is not None, f"Response should contain 'key': {resp.data}" + assert resp.data.get("agent_key") == shared_agent + + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_create_session_default_values(self, client, shared_agent): + """testCreateSessionDefaultValues — verify defaults are set.""" + resp = client.create_agent_session(shared_agent) + assert resp.success + + session_key = resp.data.get("key") + try: + assert resp.data.get("enabled") is True, f"New session should be enabled: {resp.data}" + finally: + if session_key: + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_create_session_agent_not_found(self, client): + """testCreateSessionAgentNotFound — non-existent agent returns 404.""" + resp = client.create_agent_session(f"nonexistent_{uuid.uuid4().hex[:8]}") + assert resp.status_code == 404, f"Expected 404, got {resp.status_code}: {resp.data}" + + def test_get_session(self, client, shared_agent): + """testGetSession — verify all expected fields present.""" + create_resp = client.create_agent_session(shared_agent) + assert create_resp.success, f"Create session failed: {create_resp.status_code} - {create_resp.data}" + + session_key = create_resp.data.get("key") + try: + get_resp = client.get_agent_session(shared_agent, session_key) + assert get_resp.success, f"Get session failed: {get_resp.status_code}" + assert get_resp.data.get("key") == session_key + assert get_resp.data.get("agent_key") == shared_agent + assert "enabled" in get_resp.data + assert "created_at" in get_resp.data + finally: + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_get_session_not_found(self, client, shared_agent): + """testGetSessionNotFound — non-existent session returns 404.""" + resp = client.get_agent_session(shared_agent, f"ase_fake_{uuid.uuid4().hex[:8]}") + assert resp.status_code == 404, f"Expected 404, got {resp.status_code}" + + def test_delete_session(self, client, shared_agent): + """testDeleteSession — delete and verify 404.""" + create_resp = client.create_agent_session(shared_agent) + assert create_resp.success, f"Create session failed: {create_resp.status_code} - {create_resp.data}" + + session_key = create_resp.data.get("key") + del_resp = client.delete_agent_session(shared_agent, session_key) + assert del_resp.success, f"Delete failed: {del_resp.status_code}" + + get_resp = client.get_agent_session(shared_agent, session_key) + assert get_resp.status_code == 404 + + def test_delete_session_not_found(self, client, shared_agent): + """testDeleteSessionNotFound — delete non-existent returns 404.""" + resp = client.delete_agent_session(shared_agent, f"ase_fake_{uuid.uuid4().hex[:8]}") + assert resp.status_code == 404, f"Expected 404, got {resp.status_code}" + + +@pytest.mark.core +class TestSessionUpdate: + """Session update operations — partial PATCH tests.""" + + def test_update_session_description(self, client, shared_agent): + """testUpdateSessionPartialUpdateDescriptionOnly.""" + create_resp = client.create_agent_session(shared_agent) + assert create_resp.success, f"Create session failed: {create_resp.status_code} - {create_resp.data}" + + session_key = create_resp.data.get("key") + try: + new_desc = f"Updated desc {uuid.uuid4().hex[:8]}" + update_resp = client.update_agent_session(shared_agent, session_key, description=new_desc) + assert update_resp.success, f"Update failed: {update_resp.status_code} - {update_resp.data}" + + get_resp = client.get_agent_session(shared_agent, session_key) + assert get_resp.data.get("description") == new_desc, f"Description not persisted: {get_resp.data.get('description')}" + finally: + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_update_session_name(self, client, shared_agent): + """testUpdateSessionNameOnly.""" + create_resp = client.create_agent_session(shared_agent) + assert create_resp.success, f"Create session failed: {create_resp.status_code} - {create_resp.data}" + + session_key = create_resp.data.get("key") + try: + new_name = f"Session {uuid.uuid4().hex[:8]}" + update_resp = client.update_agent_session(shared_agent, session_key, name=new_name) + assert update_resp.success, f"Update failed: {update_resp.status_code} - {update_resp.data}" + + get_resp = client.get_agent_session(shared_agent, session_key) + assert get_resp.data.get("name") == new_name + finally: + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_update_session_enabled(self, client, shared_agent): + """testUpdateSessionEnabledOnly — disable then re-enable.""" + create_resp = client.create_agent_session(shared_agent) + assert create_resp.success, f"Create session failed: {create_resp.status_code} - {create_resp.data}" + + session_key = create_resp.data.get("key") + try: + disable_resp = client.update_agent_session(shared_agent, session_key, enabled=False) + assert disable_resp.success, f"Disable failed: {disable_resp.status_code} - {disable_resp.data}" + + get_resp = client.get_agent_session(shared_agent, session_key) + assert get_resp.data.get("enabled") is False + + enable_resp = client.update_agent_session(shared_agent, session_key, enabled=True) + assert enable_resp.success + + get_resp2 = client.get_agent_session(shared_agent, session_key) + assert get_resp2.data.get("enabled") is True + finally: + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_update_session_metadata(self, client, shared_agent): + """testUpdateSessionMetadataOnly.""" + create_resp = client.create_agent_session(shared_agent, metadata={"initial": "value"}) + assert create_resp.success, f"Create session failed: {create_resp.status_code} - {create_resp.data}" + + session_key = create_resp.data.get("key") + try: + new_meta = {"priority": "high", "status": "escalated"} + update_resp = client.update_agent_session(shared_agent, session_key, metadata=new_meta) + assert update_resp.success, f"Update failed: {update_resp.status_code} - {update_resp.data}" + + get_resp = client.get_agent_session(shared_agent, session_key) + metadata = get_resp.data.get("metadata", {}) + assert metadata.get("priority") == "high", f"Metadata not updated: {metadata}" + finally: + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_update_session_nonexistent(self, client, shared_agent): + """testUpdateSessionNonexistent — update non-existent returns 404.""" + resp = client.update_agent_session(shared_agent, f"ase_fake_{uuid.uuid4().hex[:8]}", description="nope") + assert resp.status_code == 404, f"Expected 404, got {resp.status_code}" + + def test_update_session_with_special_characters(self, client, shared_agent): + """testUpdateSessionWithSpecialCharacters — unicode in name/description.""" + create_resp = client.create_agent_session(shared_agent) + assert create_resp.success, f"Create session failed: {create_resp.status_code} - {create_resp.data}" + + session_key = create_resp.data.get("key") + try: + update_resp = client.update_agent_session( + shared_agent, + session_key, + name="Session with emojis \U0001f680\U0001f4a1", + description="Description with accents: caf\u00e9, na\u00efve, r\u00e9sum\u00e9", + ) + assert update_resp.success, f"Update with special chars failed: {update_resp.status_code} - {update_resp.data}" + + get_resp = client.get_agent_session(shared_agent, session_key) + assert "\U0001f680" in get_resp.data.get("name", "") + finally: + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass diff --git a/tests/services/agents/test_session_fork.py b/tests/services/agents/test_session_fork.py new file mode 100644 index 0000000..24768a0 --- /dev/null +++ b/tests/services/agents/test_session_fork.py @@ -0,0 +1,117 @@ +""" +Agent Session Fork Tests + +Tests for forking agent sessions, including event copying and error handling. +""" + +import pytest + + +@pytest.mark.core +class TestSessionFork: + """Core tests for forking agent sessions.""" + + def test_fork_session_copies_events(self, client, shared_agent, unique_id): + """Fork a session and verify events are copied with new IDs.""" + session_resp = client.create_agent_session(shared_agent) + if not session_resp.success: + pytest.skip(f"Could not create session: {session_resp.data}") + session_key = session_resp.data.get("key") + + # Send message to generate events + client.execute_agent(agent_id=shared_agent, query_text="Hello", session_id=session_key) + + # List events from source session + events_resp = client.list_session_events(shared_agent, session_key) + assert events_resp.success + source_events = events_resp.data.get("events", []) + + # Fork session + fork_resp = client.create_agent_session( + shared_agent, + metadata={"forked": True}, + from_session={"session_key": session_key}, + ) + assert fork_resp.success, f"Fork failed: {fork_resp.status_code} - {fork_resp.data}" + forked_key = fork_resp.data.get("key") + + # Verify forked session has events + forked_events_resp = client.list_session_events(shared_agent, forked_key) + assert forked_events_resp.success + forked_events = forked_events_resp.data.get("events", []) + assert len(forked_events) == len(source_events), f"Expected {len(source_events)} events, got {len(forked_events)}" + + # Event IDs should be different + source_ids = {e.get("id") for e in source_events} + forked_ids = {e.get("id") for e in forked_events} + assert source_ids.isdisjoint(forked_ids), "Forked events should have new IDs" + + # Event types should match between source and fork + source_types = [e.get("type") for e in source_events] + forked_types = [e.get("type") for e in forked_events] + assert source_types == forked_types, f"Event types mismatch: source={source_types}, forked={forked_types}" + + try: + client.delete_agent_session(shared_agent, forked_key) + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + def test_fork_empty_session(self, client, shared_agent): + """Fork a session with no events.""" + session_resp = client.create_agent_session(shared_agent) + if not session_resp.success: + pytest.skip(f"Could not create session: {session_resp.data}") + session_key = session_resp.data.get("key") + + fork_resp = client.create_agent_session( + shared_agent, + from_session={"session_key": session_key}, + ) + assert fork_resp.success, f"Fork empty session failed: {fork_resp.data}" + forked_key = fork_resp.data.get("key") + + forked_events = client.list_session_events(shared_agent, forked_key) + assert forked_events.success + assert len(forked_events.data.get("events", [])) == 0 + + try: + client.delete_agent_session(shared_agent, forked_key) + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass + + +@pytest.mark.regression +class TestSessionForkErrors: + """Regression tests for session fork error handling.""" + + def test_fork_nonexistent_session_fails(self, client, shared_agent): + """Fork with invalid source session should fail.""" + resp = client.create_agent_session( + shared_agent, + from_session={"session_key": "ses_nonexistent_xyz"}, + ) + assert resp.status_code >= 400, f"Expected error, got {resp.status_code}" + + def test_fork_mutually_exclusive_fields_fails(self, client, shared_agent): + """Both include_up_to_event_id and compact_up_to_event_id should fail.""" + session_resp = client.create_agent_session(shared_agent) + if not session_resp.success: + pytest.skip("Could not create session") + session_key = session_resp.data.get("key") + + resp = client.create_agent_session( + shared_agent, + from_session={ + "session_key": session_key, + "include_up_to_event_id": "aev_fake", + "compact_up_to_event_id": "aev_fake", + }, + ) + assert resp.status_code == 400, f"Expected 400, got {resp.status_code}" + + try: + client.delete_agent_session(shared_agent, session_key) + except Exception: + pass diff --git a/tests/services/auth/test_api_key_lifecycle.py b/tests/services/auth/test_api_key_lifecycle.py new file mode 100644 index 0000000..9b8f1c2 --- /dev/null +++ b/tests/services/auth/test_api_key_lifecycle.py @@ -0,0 +1,77 @@ +""" +API Key Lifecycle Tests + +Core tests for API key create, list, enable, disable, and delete operations. +Never mutates the bootstrap key -- always creates disposable keys. +""" + +import pytest + + +@pytest.mark.core +@pytest.mark.serial +class TestApiKeyLifecycle: + """Tests for API key create, list, enable, disable, delete. + Never mutates the bootstrap key -- always creates disposable keys. + """ + + def test_create_and_delete_api_key(self, client, shared_corpus, unique_id): + response = client.create_api_key( + name=f"test_key_{unique_id}", + api_key_role="serving", + corpus_keys=[shared_corpus], + ) + assert response.success, f"Create API key failed: {response.status_code} - {response.data}" + + key_id = response.data.get("id") or response.data.get("api_key_id") + assert key_id, f"No key ID in response: {response.data}" + + # Verify in list + list_resp = client.list_api_keys() + assert list_resp.success + key_ids = [k.get("id") for k in list_resp.data.get("api_keys", [])] + assert key_id in key_ids, f"Created key {key_id} not found in list: {key_ids}" + + # Delete + del_resp = client.delete_api_key(key_id) + assert del_resp.success, f"Delete API key failed: {del_resp.data}" + + def test_disable_enable_api_key(self, client, shared_corpus, unique_id): + # Create disposable key with a corpus + response = client.create_api_key( + name=f"toggle_key_{unique_id}", + api_key_role="serving", + corpus_keys=[shared_corpus], + ) + if not response.success: + pytest.skip(f"Could not create API key: {response.data}") + + key_id = response.data.get("id") or response.data.get("api_key_id") + + try: + # Disable + disable_resp = client.disable_api_key(key_id) + assert disable_resp.success, f"Disable failed: {disable_resp.data}" + + # Verify disabled state + list_resp = client.list_api_keys() + assert list_resp.success + disabled_key = next((k for k in list_resp.data.get("api_keys", []) if k.get("id") == key_id), None) + assert disabled_key is not None, f"Key {key_id} not found in list" + assert disabled_key.get("enabled") is False, f"Key should be disabled: {disabled_key}" + + # Enable + enable_resp = client.enable_api_key(key_id) + assert enable_resp.success, f"Enable failed: {enable_resp.data}" + + # Verify enabled state + list_resp2 = client.list_api_keys() + assert list_resp2.success + enabled_key = next((k for k in list_resp2.data.get("api_keys", []) if k.get("id") == key_id), None) + assert enabled_key is not None, f"Key {key_id} not found after enable" + assert enabled_key.get("enabled") is True, f"Key should be enabled: {enabled_key}" + finally: + try: + client.delete_api_key(key_id) + except Exception: + pass diff --git a/tests/services/auth/test_api_key_validation.py b/tests/services/auth/test_api_key_validation.py new file mode 100644 index 0000000..ab6fd77 --- /dev/null +++ b/tests/services/auth/test_api_key_validation.py @@ -0,0 +1,43 @@ +""" +API Key Validation Tests + +Sanity-level checks that the configured API key is valid, that invalid +keys are rejected, and that authentication response time is acceptable. +""" + +import pytest + +from utils.client import VectaraClient +from utils.config import Config + + +@pytest.mark.sanity +class TestApiKeyValidation: + """Sanity checks for API key validity.""" + + def test_health_check(self, client): + """Test that the API key is valid and can connect.""" + response = client.health_check() + + assert response.success, f"API authentication failed: {response.status_code} - {response.data}" + assert response.data is not None, "Health check returned no data" + assert "corpora" in response.data or isinstance(response.data, list), f"Expected corpora structure, got: {type(response.data)}" + + def test_invalid_api_key_rejected(self, config): + """Test that invalid API keys are properly rejected.""" + # Create client with invalid key + invalid_config = Config() + invalid_config.set_api_key("invalid_key_12345") + + invalid_client = VectaraClient(invalid_config) + response = invalid_client.health_check() + + assert not response.success, "Invalid API key should be rejected" + assert response.status_code in [401, 403], f"Expected 401 or 403 for invalid key, got {response.status_code}" + + def test_response_time_acceptable(self, client): + """Test that authentication response time is acceptable.""" + response = client.health_check() + + # Authentication should complete within 5 seconds + assert response.elapsed_ms < 5000, f"Authentication took too long: {response.elapsed_ms:.1f}ms" diff --git a/tests/services/auth/test_app_client_lifecycle.py b/tests/services/auth/test_app_client_lifecycle.py new file mode 100644 index 0000000..175e3ad --- /dev/null +++ b/tests/services/auth/test_app_client_lifecycle.py @@ -0,0 +1,127 @@ +""" +App Client Lifecycle Tests + +Tests for app client create, read, update, and delete operations. +""" + +import pytest + +from utils.waiters import wait_for + + +@pytest.fixture(scope="module", autouse=True) +def check_app_clients_available(client): + """Skip all tests if app clients API is not available.""" + resp = client.list_app_clients(limit=1) + if not resp.success: + pytest.skip("App clients API not available") + + +@pytest.mark.core +@pytest.mark.serial +class TestAppClientLifecycle: + """App client CRUD operations.""" + + def test_create_app_client(self, client, unique_id): + """Test creating a client_credentials app client.""" + name = f"test_client_{unique_id}" + response = client.create_app_client(name=name, type="client_credentials") + + try: + assert response.success, f"Create app client failed: {response.status_code} - {response.data}" + assert response.data.get("id") is not None, "Response should contain 'id'" + assert response.data.get("client_id") is not None, "Response should contain 'client_id'" + assert response.data.get("client_secret") is not None, "Response should contain 'client_secret'" + finally: + client_id = response.data.get("id") + if client_id: + try: + client.delete_app_client(client_id) + except Exception: + pass + + def test_list_app_clients(self, client, unique_id): + """Test listing app clients contains a created client.""" + name = f"test_list_client_{unique_id}" + create_resp = client.create_app_client(name=name, type="client_credentials") + if not create_resp.success: + pytest.skip(f"Could not create app client: {create_resp.data}") + + client_id = create_resp.data.get("id") + try: + wait_for( + lambda: any(c.get("id") == client_id for c in client.list_app_clients().data.get("app_clients", [])), + timeout=10, + interval=1, + description="app client to appear in listing", + ) + + list_resp = client.list_app_clients() + assert list_resp.success, f"List app clients failed: {list_resp.status_code}" + clients = list_resp.data.get("app_clients", []) + client_ids = [c.get("id") for c in clients] + assert client_id in client_ids, f"Created client {client_id} not in listing" + finally: + if client_id: + try: + client.delete_app_client(client_id) + except Exception: + pass + + def test_get_app_client(self, client, unique_id): + """Test retrieving a specific app client.""" + name = f"test_get_client_{unique_id}" + create_resp = client.create_app_client(name=name, type="client_credentials") + if not create_resp.success: + pytest.skip(f"Could not create app client: {create_resp.data}") + + client_id = create_resp.data.get("id") + try: + get_resp = client.get_app_client(client_id) + assert get_resp.success, f"Get app client failed: {get_resp.status_code}" + assert get_resp.data.get("id") == client_id + assert get_resp.data.get("name") == name + finally: + if client_id: + try: + client.delete_app_client(client_id) + except Exception: + pass + + def test_update_app_client(self, client, unique_id): + """Test updating an app client description.""" + name = f"test_update_client_{unique_id}" + create_resp = client.create_app_client(name=name, type="client_credentials") + if not create_resp.success: + pytest.skip(f"Could not create app client: {create_resp.data}") + + client_id = create_resp.data.get("id") + try: + new_desc = f"Updated description {unique_id}" + update_resp = client.update_app_client(client_id, description=new_desc) + assert update_resp.success, f"Update app client failed: {update_resp.status_code}" + + get_resp = client.get_app_client(client_id) + assert get_resp.success + assert get_resp.data.get("description") == new_desc, f"Description not persisted: {get_resp.data.get('description')!r}" + finally: + if client_id: + try: + client.delete_app_client(client_id) + except Exception: + pass + + def test_delete_app_client(self, client, unique_id): + """Test deleting an app client and verifying 404.""" + name = f"test_delete_client_{unique_id}" + create_resp = client.create_app_client(name=name, type="client_credentials") + if not create_resp.success: + pytest.skip(f"Could not create app client: {create_resp.data}") + + client_id = create_resp.data.get("id") + + delete_resp = client.delete_app_client(client_id) + assert delete_resp.success, f"Delete app client failed: {delete_resp.status_code}" + + get_resp = client.get_app_client(client_id) + assert get_resp.status_code == 404, f"Deleted app client should return 404, got {get_resp.status_code}" diff --git a/tests/services/auth/test_permissions.py b/tests/services/auth/test_permissions.py new file mode 100644 index 0000000..5873509 --- /dev/null +++ b/tests/services/auth/test_permissions.py @@ -0,0 +1,53 @@ +""" +Permission Tests + +Core-level checks that the API key has the correct permissions +for QueryService and IndexService operations, and that basic +corpus listing works. +""" + +import pytest + + +@pytest.mark.core +class TestPermissions: + """Core checks for API key permissions.""" + + def test_api_key_has_query_permission(self, client, shared_corpus, sample_document): + """Test that API key has QueryService permission.""" + # First index a document to ensure there's something to query + doc_response = client.index_document( + corpus_key=shared_corpus, + document_id="auth_test_doc", + text=sample_document["text"], + metadata=sample_document["metadata"], + ) + + # Now test query permission + response = client.query( + corpus_key=shared_corpus, + query_text="test query", + limit=1, + ) + + assert response.success, f"QueryService permission check failed: {response.status_code}. " f"Ensure API key has QueryService role enabled." + results = response.data.get("search_results", []) + assert isinstance(results, list), f"Expected search_results list, got: {type(results)}" + + def test_api_key_has_index_permission(self, client, shared_corpus): + """Test that API key has IndexService permission.""" + response = client.index_document( + corpus_key=shared_corpus, + document_id="auth_permission_test", + text="Testing IndexService permission", + ) + + assert response.success, f"IndexService permission check failed: {response.status_code}. " f"Ensure API key has IndexService role enabled." + assert response.data.get("id") is not None, f"Index response should contain document id, got: {response.data}" + + def test_list_corpora_works(self, client): + """Test basic corpus listing (requires valid authentication).""" + response = client.list_corpora(limit=10) + + assert response.success, f"List corpora failed: {response.status_code} - {response.data}" + assert "corpora" in response.data or isinstance(response.data, list), "Expected corpora list in response" diff --git a/tests/services/chat/test_chat.py b/tests/services/chat/test_chat.py new file mode 100644 index 0000000..04940eb --- /dev/null +++ b/tests/services/chat/test_chat.py @@ -0,0 +1,102 @@ +""" +Chat Tests + +Core-level tests for chat/conversation operations including +creating, listing, adding turns, and deleting chats. + +Note: Chat requires a configured rephraser on the instance. +Tests will skip gracefully if rephraser is not available. +""" + +import pytest + + +@pytest.mark.core +class TestChat: + """Core checks for chat/conversation operations.""" + + def test_create_chat(self, client, seeded_shared_corpus): + """Test starting a new chat conversation.""" + response = client.create_chat( + corpus_key=seeded_shared_corpus, + query_text="Tell me about AI", + ) + + # Skip if chat rephraser not configured on this instance + if not response.success and "rephraser" in str(response.data).lower(): + pytest.skip("Chat rephraser not configured on this instance") + + assert response.success, f"Create chat failed: {response.status_code} - {response.data}" + + # Should return chat ID + chat_id = response.data.get("chat_id") + assert chat_id is not None, f"Response should contain chat_id, got: {response.data}" + if chat_id: + # Cleanup + try: + client.delete_chat(chat_id) + except Exception: + pass + + def test_list_chats(self, client): + """Test listing chat conversations.""" + response = client.list_chats(limit=10) + + assert response.success, f"List chats failed: {response.status_code} - {response.data}" + assert isinstance(response.data, (dict, list)), f"Expected dict or list response, got: {type(response.data)}" + if isinstance(response.data, dict) and "chats" in response.data: + assert isinstance(response.data["chats"], list), f"Expected chats to be a list, got: {type(response.data['chats'])}" + + def test_chat_turn(self, client, seeded_shared_corpus): + """Test adding turns to a chat conversation.""" + # Create chat + create_response = client.create_chat( + corpus_key=seeded_shared_corpus, + query_text="What is machine learning?", + ) + + if not create_response.success: + pytest.skip("Could not create chat for turn test") + + chat_id = create_response.data.get("chat_id") + if not chat_id: + pytest.skip("No chat_id in response") + + try: + # Add follow-up turn + turn_response = client.add_chat_turn( + chat_id=chat_id, + query_text="Can you give me an example?", + corpus_key=seeded_shared_corpus, + ) + + assert turn_response.success, f"Add chat turn failed: {turn_response.status_code} - {turn_response.data}" + assert turn_response.data is not None, "Turn response should have data" + turn_has_content = turn_response.data.get("answer") is not None or turn_response.data.get("turn_id") is not None + assert turn_has_content, f"Turn response should have answer or turn_id, got: {turn_response.data}" + finally: + # Cleanup + client.delete_chat(chat_id) + + def test_delete_chat(self, client, seeded_shared_corpus): + """Test deleting a chat conversation.""" + # Create chat + create_response = client.create_chat( + corpus_key=seeded_shared_corpus, + query_text="Test chat for deletion", + ) + + if not create_response.success: + pytest.skip("Could not create chat for deletion test") + + chat_id = create_response.data.get("chat_id") + if not chat_id: + pytest.skip("No chat_id in response") + + # Delete chat + delete_response = client.delete_chat(chat_id) + + assert delete_response.success, f"Delete chat failed: {delete_response.status_code} - {delete_response.data}" + + get_resp = client.get_chat(chat_id) + assert get_resp.status_code == 404, f"Deleted chat should return 404, got {get_resp.status_code}" diff --git a/tests/services/chat/test_chat_multiturn.py b/tests/services/chat/test_chat_multiturn.py new file mode 100644 index 0000000..2c8c07d --- /dev/null +++ b/tests/services/chat/test_chat_multiturn.py @@ -0,0 +1,90 @@ +""" +Chat Multi-Turn Tests + +Deep verification of chat turn counts, IDs, and content substantiveness. +""" + +import pytest + + +@pytest.mark.core +class TestChatMultiTurn: + """Chat multi-turn deep verification.""" + + def _create_chat(self, client, corpus_key): + """Create a chat and return (chat_id, turn_id). Fail on error.""" + resp = client.create_chat(corpus_key, "What is artificial intelligence?") + assert resp.success, f"Create chat failed: {resp.status_code} - {resp.data}" + chat_id = resp.data.get("chat_id") or resp.data.get("id") + turn_id = resp.data.get("turn_id") + assert chat_id, f"No chat_id in response: {resp.data}" + return chat_id, turn_id + + def test_multiturn_turn_count_and_ids(self, client, seeded_shared_corpus): + """Create chat + add turn, verify turn count and distinct IDs.""" + chat_id, turn_id_1 = self._create_chat(client, seeded_shared_corpus) + + try: + add_resp = client.add_chat_turn(chat_id, "Tell me about vector databases", seeded_shared_corpus) + assert add_resp.success, f"Add turn failed: {add_resp.status_code} - {add_resp.data}" + turn_id_2 = add_resp.data.get("turn_id") + + list_resp = client.list_chat_turns(chat_id) + assert list_resp.success, f"List turns failed: {list_resp.status_code}" + turns = list_resp.data.get("turns", []) + assert len(turns) >= 2, f"Expected at least 2 turns, got {len(turns)}" + + turn_ids = [t.get("id") for t in turns] + assert len(set(turn_ids)) == len(turn_ids), f"Turn IDs should be distinct: {turn_ids}" + finally: + try: + client.delete_chat(chat_id) + except Exception: + pass + + def test_get_individual_turns_by_id(self, client, seeded_shared_corpus): + """GET each turn by ID, verify chat_id and fields.""" + chat_id, _ = self._create_chat(client, seeded_shared_corpus) + + try: + client.add_chat_turn(chat_id, "Tell me about machine learning", seeded_shared_corpus) + + list_resp = client.list_chat_turns(chat_id) + assert list_resp.success + turns = list_resp.data.get("turns", []) + + for turn in turns: + turn_id = turn.get("id") + if not turn_id: + continue + get_resp = client.get_chat_turn(chat_id, turn_id) + assert get_resp.success, f"GET turn {turn_id} failed: {get_resp.status_code}" + assert get_resp.data.get("id") == turn_id + assert get_resp.data.get("chat_id") == chat_id + finally: + try: + client.delete_chat(chat_id) + except Exception: + pass + + def test_turn_answer_is_substantive(self, client, seeded_shared_corpus): + """Verify each turn answer has real content, not empty.""" + chat_id, _ = self._create_chat(client, seeded_shared_corpus) + + try: + add_resp = client.add_chat_turn(chat_id, "How do vector databases work?", seeded_shared_corpus) + assert add_resp.success + + list_resp = client.list_chat_turns(chat_id) + turns = list_resp.data.get("turns", []) + + turns_with_answers = [t for t in turns if t.get("answer")] + assert len(turns_with_answers) > 0, f"Expected at least one turn with an answer" + for turn in turns_with_answers: + answer = turn["answer"] + assert len(answer) > 20, f"Turn answer should be substantive (>20 chars), got {len(answer)} chars: {answer[:50]!r}" + finally: + try: + client.delete_chat(chat_id) + except Exception: + pass diff --git a/tests/services/chat/test_chat_turns.py b/tests/services/chat/test_chat_turns.py new file mode 100644 index 0000000..caa76f2 --- /dev/null +++ b/tests/services/chat/test_chat_turns.py @@ -0,0 +1,145 @@ +""" +Chat Turn CRUD Tests + +Core-level tests for chat turn operations including listing, retrieving, +updating, and deleting individual turns within a chat conversation. + +Note: Chat requires a configured rephraser on the instance. +Tests will skip gracefully if rephraser is not available. +""" + +import re + +import pytest + + +def _create_chat(client, corpus_key): + """Create a chat and return (chat_id, turn_id, answer). Fail on error.""" + response = client.create_chat( + corpus_key=corpus_key, + query_text="Tell me about AI", + ) + + if not response.success and "rephraser" in str(response.data).lower(): + pytest.skip("Chat rephraser not configured on this instance") + + assert response.success, f"Create chat failed: {response.status_code} - {response.data}" + + chat_id = response.data.get("chat_id") + turn_id = response.data.get("turn_id") + answer = response.data.get("answer") + + assert chat_id, f"No chat_id in create_chat response: {response.data}" + + return chat_id, turn_id, answer + + +@pytest.mark.core +class TestChatTurns: + """Core checks for chat turn CRUD operations.""" + + def test_get_single_chat(self, client, seeded_shared_corpus): + """Create a chat and GET /v2/chats/{id} to verify chat_id is present.""" + chat_id, _, _ = _create_chat(client, seeded_shared_corpus) + + try: + response = client.get_chat(chat_id) + + assert response.success, f"Get chat failed: {response.status_code} - {response.data}" + assert response.data.get("id") is not None, f"Response should contain id, got: {response.data}" + assert re.match(r"cht_.+", response.data["id"]), f"id should match cht_.+ pattern, got: {response.data['id']}" + finally: + try: + client.delete_chat(chat_id) + except Exception: + pass + + def test_chat_not_found_returns_404(self, client): + """GET a non-existent chat should return 404.""" + response = client.get_chat("cht_nonexistent_000000000000") + + assert response.status_code == 404, f"Expected 404 for non-existent chat, got {response.status_code}" + + def test_list_chat_turns(self, client, seeded_shared_corpus): + """Create a chat, list its turns, and verify at least 1 turn exists.""" + chat_id, _, _ = _create_chat(client, seeded_shared_corpus) + + try: + response = client.list_chat_turns(chat_id) + + assert response.success, f"List turns failed: {response.status_code} - {response.data}" + turns = response.data.get("turns", response.data if isinstance(response.data, list) else []) + assert len(turns) >= 1, f"Expected at least 1 turn, got {len(turns)}" + + first_turn = turns[0] + assert first_turn.get("id") is not None, f"Turn should have id, got: {first_turn}" + finally: + try: + client.delete_chat(chat_id) + except Exception: + pass + + def test_get_chat_turn(self, client, seeded_shared_corpus): + """Create a chat, get the turn by ID, and verify fields.""" + chat_id, turn_id, _ = _create_chat(client, seeded_shared_corpus) + + if not turn_id: + pytest.skip("No turn_id in create_chat response") + + try: + response = client.get_chat_turn(chat_id, turn_id) + + assert response.success, f"Get turn failed: {response.status_code} - {response.data}" + assert response.data.get("id") == turn_id, f"turn id mismatch: expected {turn_id}, got {response.data.get('id')}" + assert re.match(r"trn_.+", response.data["id"]), f"turn id should match trn_.+ pattern, got: {response.data['id']}" + assert response.data.get("chat_id") == chat_id, f"chat_id mismatch in turn: expected {chat_id}, got {response.data.get('chat_id')}" + finally: + try: + client.delete_chat(chat_id) + except Exception: + pass + + def test_update_chat_turn(self, client, seeded_shared_corpus): + """Create a chat, PATCH the turn with enabled=false, then GET to verify.""" + chat_id, turn_id, _ = _create_chat(client, seeded_shared_corpus) + + if not turn_id: + pytest.skip("No turn_id in create_chat response") + + try: + update_response = client.update_chat_turn( + chat_id=chat_id, + turn_id=turn_id, + enabled=False, + ) + + assert update_response.success, f"Update turn failed: {update_response.status_code} - {update_response.data}" + + get_response = client.get_chat_turn(chat_id, turn_id) + assert get_response.success, f"Get turn after update failed: {get_response.status_code}" + assert get_response.data.get("enabled") is False, f"Expected enabled=False after update, got: {get_response.data.get('enabled')}" + finally: + try: + client.delete_chat(chat_id) + except Exception: + pass + + def test_delete_chat_turn(self, client, seeded_shared_corpus): + """Create a chat, delete the turn, and verify it returns 404 or error.""" + chat_id, turn_id, _ = _create_chat(client, seeded_shared_corpus) + + if not turn_id: + pytest.skip("No turn_id in create_chat response") + + try: + delete_response = client.delete_chat_turn(chat_id, turn_id) + + assert delete_response.success, f"Delete turn failed: {delete_response.status_code} - {delete_response.data}" + + get_response = client.get_chat_turn(chat_id, turn_id) + assert get_response.status_code in (404, 400), f"Deleted turn should return 404 or 400, got {get_response.status_code}" + finally: + try: + client.delete_chat(chat_id) + except Exception: + pass diff --git a/tests/services/chat/test_chat_validation.py b/tests/services/chat/test_chat_validation.py new file mode 100644 index 0000000..6dad7e8 --- /dev/null +++ b/tests/services/chat/test_chat_validation.py @@ -0,0 +1,101 @@ +""" +Chat Validation Tests + +Validation and edge case tests for chat/conversation operations including +bad requests, response field completeness, and query length limits. + +Note: Chat requires a configured rephraser on the instance. +Tests will skip gracefully if rephraser is not available. +""" + +import pytest + + +@pytest.mark.core +class TestChatValidation: + """Core validation checks for chat operations.""" + + def test_chat_bad_request_missing_corpus(self, client): + """POST /v2/chats without search.corpora should return 400.""" + response = client.post( + "/v2/chats", + data={ + "query": "Tell me about AI", + "search": {}, + "chat": {"store": True}, + }, + ) + + assert response.status_code == 400, f"Expected 400 for missing corpora, got {response.status_code} - {response.data}" + + def test_chat_response_field_completeness(self, client, seeded_shared_corpus): + """Create a chat and verify chat_id, turn_id, answer, and search_results are present.""" + response = client.create_chat( + corpus_key=seeded_shared_corpus, + query_text="What is artificial intelligence?", + ) + + if not response.success and "rephraser" in str(response.data).lower(): + pytest.skip("Chat rephraser not configured on this instance") + + assert response.success, f"Create chat failed: {response.status_code} - {response.data}" + + chat_id = response.data.get("chat_id") + assert chat_id is not None, f"Response missing chat_id: {response.data}" + assert response.data.get("turn_id") is not None, f"Response missing turn_id: {response.data}" + assert response.data.get("answer") is not None, f"Response missing answer: {response.data}" + assert response.data.get("search_results") is not None, f"Response missing search_results: {response.data}" + + if chat_id: + try: + client.delete_chat(chat_id) + except Exception: + pass + + +@pytest.mark.regression +class TestChatEdgeCases: + """Regression tests for chat query length limits.""" + + def test_chat_query_max_length_accepted(self, client, seeded_shared_corpus): + """A 5000 character query should be accepted.""" + long_query = "a" * 5000 + + response = client.create_chat( + corpus_key=seeded_shared_corpus, + query_text=long_query, + ) + + if not response.success and "rephraser" in str(response.data).lower(): + pytest.skip("Chat rephraser not configured on this instance") + + assert response.success, f"5000 char query should succeed, got: {response.status_code} - {response.data}" + + chat_id = response.data.get("chat_id") + if chat_id: + try: + client.delete_chat(chat_id) + except Exception: + pass + + def test_chat_query_exceeds_max_length(self, client, seeded_shared_corpus): + """A 5001 character query should return an error.""" + long_query = "a" * 5001 + + response = client.create_chat( + corpus_key=seeded_shared_corpus, + query_text=long_query, + ) + + if not response.success and "rephraser" in str(response.data).lower(): + pytest.skip("Chat rephraser not configured on this instance") + + assert not response.success, f"5001 char query should fail, got: {response.status_code} - {response.data}" + assert response.status_code in (400, 413, 422), f"Expected 400/413/422 for oversized query, got {response.status_code}" + + chat_id = response.data.get("chat_id") if isinstance(response.data, dict) else None + if chat_id: + try: + client.delete_chat(chat_id) + except Exception: + pass diff --git a/tests/services/conftest.py b/tests/services/conftest.py new file mode 100644 index 0000000..c6904b0 --- /dev/null +++ b/tests/services/conftest.py @@ -0,0 +1,199 @@ +""" +Shared fixtures for service-level tests. + +Provides per-test corpus isolation so that each test function gets its own +fresh corpus that is cleaned up automatically. + +Also provides module-scoped shared fixtures for tests that just need a corpus +as a container (indexing, query, chat) but don't test corpus CRUD itself. +""" + +import logging +import uuid + +import pytest + +from utils.waiters import wait_for + +logger = logging.getLogger(__name__) + + +def _corpus_is_queryable(client, corpus_key): + """Return True once a corpus responds to a get request.""" + resp = client.get_corpus(corpus_key) + return resp.success + + +def _documents_indexed(client, corpus_key, expected_count): + """Return the document list once at least *expected_count* docs are present.""" + resp = client.list_documents(corpus_key, limit=100) + if not resp.success: + return None + docs = resp.data.get("documents", []) + if len(docs) >= expected_count: + return docs + return None + + +@pytest.fixture +def test_corpus(client, unique_id): + """Create a disposable corpus for a single test and delete it on teardown. + + Yields the corpus key string. + """ + corpus_name = f"svc_test_{unique_id}" + corpus_key = f"svc_test_{uuid.uuid4().hex}" + + response = client.create_corpus( + name=corpus_name, + key=corpus_key, + description="Automated service test corpus - safe to delete", + ) + + if not response.success: + pytest.skip(f"Could not create test corpus: {response.data}") + + corpus_key = response.data.get("key") + if not corpus_key: + pytest.skip(f"Corpus created but no key returned: {response.data}") + + wait_for(lambda: _corpus_is_queryable(client, corpus_key), timeout=10, interval=1, description="corpus to become queryable") + + try: + yield corpus_key + finally: + client.delete_corpus(corpus_key) + + +@pytest.fixture +def seeded_corpus(client, test_corpus): + """Seed *test_corpus* with three sample documents and yield the corpus key. + + The documents are removed during teardown (best-effort) so that other + fixtures or tests don't see leftover data. + """ + doc_ids = [] + + docs = [ + { + "id": f"seed_doc_{uuid.uuid4().hex[:8]}", + "text": "Artificial intelligence is transforming industries by enabling machines to learn from data and make decisions.", + "metadata": {"topic": "ai", "source": "seed"}, + }, + { + "id": f"seed_doc_{uuid.uuid4().hex[:8]}", + "text": "Vector databases store high-dimensional embeddings and support fast similarity search for semantic retrieval.", + "metadata": {"topic": "databases", "source": "seed"}, + }, + { + "id": f"seed_doc_{uuid.uuid4().hex[:8]}", + "text": "Cloud computing provides scalable infrastructure that allows organizations to deploy applications globally.", + "metadata": {"topic": "cloud", "source": "seed"}, + }, + ] + + for doc in docs: + resp = client.index_document( + corpus_key=test_corpus, + document_id=doc["id"], + text=doc["text"], + metadata=doc["metadata"], + ) + if resp.success: + doc_ids.append(doc["id"]) + else: + logger.warning("Failed to seed document %s: %s", doc["id"], resp.data) + + wait_for(lambda: _documents_indexed(client, test_corpus, len(doc_ids)), timeout=15, interval=1, description="seeded documents to be indexed") + + try: + yield test_corpus + finally: + for doc_id in doc_ids: + try: + client.delete_document(test_corpus, doc_id) + except Exception: + logger.warning("Failed to clean up seeded document %s", doc_id, exc_info=True) + + +@pytest.fixture(scope="module") +def shared_corpus(client): + """Module-scoped corpus shared by all tests in a module. + + Use for tests that need a corpus as a container (indexing, query, chat) + but don't test corpus CRUD itself. Each test should use unique doc IDs + and clean up after itself. + """ + corpus_key = f"shared_{uuid.uuid4().hex}" + corpus_name = f"shared_test_{uuid.uuid4().hex[:8]}" + + response = client.create_corpus( + name=corpus_name, + key=corpus_key, + description="Shared module test corpus - safe to delete", + ) + + if not response.success: + pytest.skip(f"Could not create shared corpus: {response.data}") + + actual_key = response.data.get("key", corpus_key) + + wait_for(lambda: _corpus_is_queryable(client, actual_key), timeout=10, interval=1, description="shared corpus to become queryable") + + yield actual_key + + try: + client.delete_corpus(actual_key) + except Exception: + pass + + +@pytest.fixture(scope="module") +def seeded_shared_corpus(client, shared_corpus): + """Module-scoped corpus with sample documents seeded. + + For read-only query/chat tests. Do NOT mutate or delete these docs in tests. + """ + doc_ids = [] + docs = [ + { + "id": f"seed_{uuid.uuid4().hex[:8]}", + "text": "Artificial intelligence and machine learning are transforming industries. Deep learning neural networks can process vast amounts of data to find patterns.", + "metadata": {"category": "technology", "topic": "ai"}, + }, + { + "id": f"seed_{uuid.uuid4().hex[:8]}", + "text": "Vector databases enable semantic search capabilities. Unlike keyword search, vector search understands meaning and context of queries.", + "metadata": {"category": "technology", "topic": "databases"}, + }, + { + "id": f"seed_{uuid.uuid4().hex[:8]}", + "text": "Climate change is affecting weather patterns around the world. Renewable energy sources like solar and wind are becoming more important.", + "metadata": {"category": "science", "topic": "climate"}, + }, + { + "id": f"seed_{uuid.uuid4().hex[:8]}", + "text": "The Python programming language is popular for data science. Libraries like NumPy, Pandas, and TensorFlow make it easy to work with data.", + "metadata": {"category": "technology", "topic": "programming"}, + }, + { + "id": f"seed_{uuid.uuid4().hex[:8]}", + "text": "Space exploration has led to many technological innovations. NASA and SpaceX are working on missions to Mars.", + "metadata": {"category": "science", "topic": "space"}, + }, + ] + + for doc in docs: + resp = client.index_document( + corpus_key=shared_corpus, + document_id=doc["id"], + text=doc["text"], + metadata=doc["metadata"], + ) + if resp.success: + doc_ids.append(doc["id"]) + + wait_for(lambda: _documents_indexed(client, shared_corpus, len(doc_ids)), timeout=15, interval=1, description="shared corpus documents to be indexed") + + # Corpus deletion by shared_corpus fixture handles full cleanup. + yield shared_corpus diff --git a/tests/services/corpus/test_corpus_access.py b/tests/services/corpus/test_corpus_access.py new file mode 100644 index 0000000..59a622a --- /dev/null +++ b/tests/services/corpus/test_corpus_access.py @@ -0,0 +1,88 @@ +""" +Corpus Access Control Tests + +Tests for API key scoping and corpus-level access control. +""" + +import uuid + +import pytest + +from utils.client import VectaraClient +from utils.waiters import wait_for + + +@pytest.mark.core +@pytest.mark.serial +class TestCorpusAccess: + """Corpus access control with scoped API keys.""" + + def test_corpus_access_with_scoped_key(self, client, config): + """Create serving key scoped to one corpus, verify it can only query that corpus.""" + uid = uuid.uuid4().hex[:8] + corpus_key = f"access_test_{uid}" + + create_corpus_resp = client.create_corpus(name=f"Access Test {uid}", key=corpus_key) + assert create_corpus_resp.success, f"Create corpus failed: {create_corpus_resp.status_code} - {create_corpus_resp.data}" + + try: + wait_for( + lambda: client.get_corpus(corpus_key).success, + timeout=10, + interval=1, + description="corpus to be available", + ) + + doc_id = f"access_doc_{uid}" + client.index_document(corpus_key, doc_id, "Test content for access control verification.") + wait_for( + lambda: client.get_document(corpus_key, doc_id).success, + timeout=15, + interval=1, + description="document to be indexed", + ) + + key_name = f"test_scoped_{uid}" + create_key_resp = client.create_api_key( + name=key_name, + api_key_role="serving", + corpus_keys=[corpus_key], + ) + assert create_key_resp.success, f"Create API key failed: {create_key_resp.status_code} - {create_key_resp.data}" + + key_id = create_key_resp.data.get("id") + api_key_value = create_key_resp.data.get("api_key") or create_key_resp.data.get("secret_key") + assert api_key_value, f"API key response missing 'api_key'/'secret_key' value: {create_key_resp.data}" + + try: + scoped_client = VectaraClient(config) + scoped_client._session = None + scoped_client.session.headers.update({"x-api-key": api_key_value}) + + query_resp = scoped_client.query( + corpus_key=corpus_key, + query_text="test content", + limit=5, + ) + assert query_resp.success, f"Scoped key should query its corpus: {query_resp.status_code} - {query_resp.data}" + results = query_resp.data.get("search_results", []) + assert isinstance(results, list) + + fake_corpus = f"nonexistent_{uid}" + other_resp = scoped_client.query( + corpus_key=fake_corpus, + query_text="test", + limit=5, + ) + assert not other_resp.success, "Scoped key should not query an unscoped corpus" + finally: + if key_id: + try: + client.delete_api_key(key_id) + except Exception: + pass + finally: + try: + client.delete_corpus(corpus_key) + except Exception: + pass diff --git a/tests/services/corpus/test_corpus_crud.py b/tests/services/corpus/test_corpus_crud.py new file mode 100644 index 0000000..62a6875 --- /dev/null +++ b/tests/services/corpus/test_corpus_crud.py @@ -0,0 +1,128 @@ +""" +Corpus CRUD Tests + +Tests for corpus create, read, update, and delete operations. +Grouped by depth marker into separate classes. +""" + +import time + +import pytest + + +@pytest.mark.sanity +class TestCorpusCreate: + """Corpus creation checks.""" + + def test_create_corpus(self, client, unique_id): + """Test creating a new corpus.""" + import uuid + + corpus_key = f"crud_test_{uuid.uuid4().hex}" + response = client.create_corpus( + name=f"Test Corpus {unique_id}", + key=corpus_key, + description="Created by API test suite", + ) + + assert response.success, f"Corpus creation failed: {response.status_code} - {response.data}" + + # Get the actual key returned by the API + actual_key = response.data.get("key") + assert actual_key, "No key returned in corpus creation response" + + # Cleanup using the actual key + try: + client.delete_corpus(actual_key) + except Exception: + pass + + +@pytest.mark.core +class TestCorpusCrud: + """Corpus get, update, and delete checks.""" + + def test_get_corpus(self, client, test_corpus): + """Test retrieving corpus details.""" + response = client.get_corpus(test_corpus) + + assert response.success, f"Get corpus failed: {response.status_code} - {response.data}" + assert response.data.get("key") == test_corpus, f"Corpus key mismatch: expected {test_corpus}" + + def test_update_corpus_description(self, client, test_corpus): + """Test updating corpus description.""" + new_description = f"Updated at {time.time()}" + + response = client.update_corpus( + corpus_key=test_corpus, + description=new_description, + ) + + assert response.success, f"Corpus update failed: {response.status_code} - {response.data}" + + # Verify update + get_response = client.get_corpus(test_corpus) + assert get_response.data.get("description") == new_description, "Description update not reflected" + + def test_delete_corpus(self, client, unique_id): + """Test corpus deletion.""" + import uuid + + corpus_key = f"del_test_{uuid.uuid4().hex}" + # Create corpus to delete + create_response = client.create_corpus( + name=f"Delete Test {unique_id}", + key=corpus_key, + description="Will be deleted", + ) + assert create_response.success, f"Setup: Corpus creation failed: {create_response.data}" + + # Get the actual key returned by the API + actual_key = create_response.data.get("key") + assert actual_key, "No key returned in corpus creation response" + + # Delete the corpus using the actual key + delete_response = client.delete_corpus(actual_key) + + assert delete_response.success, f"Corpus deletion failed: {delete_response.status_code} - {delete_response.data}" + + # Verify deletion - should get 404 + get_response = client.get_corpus(actual_key) + assert get_response.status_code == 404, f"Deleted corpus should return 404, got {get_response.status_code}" + + +@pytest.mark.regression +class TestCorpusErrorCases: + """Corpus error and edge case checks.""" + + def test_create_duplicate_key_corpus_fails(self, client, test_corpus): + """Test that creating a corpus with an existing key fails.""" + # Attempt to create corpus with the same key as test_corpus + response = client.post( + "/v2/corpora", + data={ + "key": test_corpus, + "name": "Duplicate Key Test", + }, + ) + + # Should fail with conflict (409) or bad request (400) + assert response.status_code in [400, 409], f"Duplicate key corpus creation should fail, got {response.status_code}" + + def test_get_nonexistent_corpus_returns_404(self, client): + """Test that requesting a non-existent corpus returns 404.""" + response = client.get_corpus("nonexistent_corpus_xyz123") + + assert response.status_code == 404, f"Expected 404 for non-existent corpus, got {response.status_code}" + + def test_corpus_operations_response_times(self, client, test_corpus): + """Test that corpus operations complete in acceptable time.""" + # Get operation should be fast + response = client.get_corpus(test_corpus) + + assert response.elapsed_ms < 3000, f"Get corpus took too long: {response.elapsed_ms:.1f}ms" + + # List operation may take longer but should still be reasonable + list_response = client.list_corpora(limit=10) + + assert list_response.elapsed_ms < 5000, f"List corpora took too long: {list_response.elapsed_ms:.1f}ms" diff --git a/tests/services/corpus/test_corpus_lifecycle.py b/tests/services/corpus/test_corpus_lifecycle.py new file mode 100644 index 0000000..0b7d7a7 --- /dev/null +++ b/tests/services/corpus/test_corpus_lifecycle.py @@ -0,0 +1,106 @@ +""" +Corpus Lifecycle Tests + +Core-level tests for corpus lifecycle operations including enable/disable, +replace filter attributes, compute size, and reset. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestCorpusLifecycle: + """Core checks for corpus lifecycle operations.""" + + def test_enable_disable_corpus(self, client, test_corpus): + """Disable a corpus, verify via GET, then re-enable.""" + disable_response = client.update_corpus( + corpus_key=test_corpus, + enabled=False, + ) + assert disable_response.success, f"Disable corpus failed: {disable_response.status_code} - {disable_response.data}" + + def corpus_is_disabled(): + resp = client.get_corpus(test_corpus) + if resp.success and resp.data.get("enabled") is False: + return True + return None + + wait_for(corpus_is_disabled, timeout=10, interval=1, description="corpus to become disabled") + + get_response = client.get_corpus(test_corpus) + assert get_response.success, f"Get corpus failed: {get_response.status_code}" + assert get_response.data.get("enabled") is False, f"Expected enabled=False, got: {get_response.data.get('enabled')}" + + enable_response = client.update_corpus( + corpus_key=test_corpus, + enabled=True, + ) + assert enable_response.success, f"Re-enable corpus failed: {enable_response.status_code} - {enable_response.data}" + + def corpus_is_enabled(): + resp = client.get_corpus(test_corpus) + if resp.success and resp.data.get("enabled") is True: + return True + return None + + wait_for(corpus_is_enabled, timeout=10, interval=1, description="corpus to become enabled") + + def test_replace_filter_attributes(self, client, test_corpus): + """Replace filter attributes on a corpus and verify job_id is returned.""" + response = client.replace_filter_attributes( + corpus_key=test_corpus, + filter_attributes=[ + { + "name": "category", + "level": "document", + "type": "text", + }, + { + "name": "priority", + "level": "document", + "type": "integer", + }, + ], + ) + + assert response.success, f"Replace filter attributes failed: {response.status_code} - {response.data}" + assert response.data.get("job_id") is not None, f"Expected job_id in response, got: {response.data}" + + def test_compute_corpus_size(self, client, seeded_corpus): + """Compute size of a seeded corpus and verify fields are present and > 0.""" + response = client.compute_corpus_size(seeded_corpus) + + assert response.success, f"Compute size failed: {response.status_code} - {response.data}" + + size_data = response.data + assert size_data.get("used_docs") is not None, f"Expected used_docs in response, got: {size_data}" + assert size_data["used_docs"] > 0, f"Expected used_docs > 0, got: {size_data['used_docs']}" + assert size_data.get("used_parts") is not None, f"Expected used_parts in response, got: {size_data}" + assert size_data["used_parts"] > 0, f"Expected used_parts > 0, got: {size_data['used_parts']}" + + def test_reset_corpus(self, client, seeded_corpus): + """Reset a seeded corpus and verify all documents are gone.""" + docs_before = client.list_documents(seeded_corpus, limit=100) + assert docs_before.success, f"List docs failed: {docs_before.status_code}" + before_count = len(docs_before.data.get("documents", [])) + assert before_count > 0, "Seeded corpus should have documents before reset" + + reset_response = client.reset_corpus(seeded_corpus) + assert reset_response.success, f"Reset corpus failed: {reset_response.status_code} - {reset_response.data}" + + def documents_are_gone(): + resp = client.list_documents(seeded_corpus, limit=100) + if resp.success and len(resp.data.get("documents", [])) == 0: + return True + return None + + wait_for(documents_are_gone, timeout=30, interval=2, description="documents to be removed after reset") + + docs_after = client.list_documents(seeded_corpus, limit=100) + assert docs_after.success, f"List docs after reset failed: {docs_after.status_code}" + assert len(docs_after.data.get("documents", [])) == 0, f"Expected 0 documents after reset, got: {len(docs_after.data.get('documents', []))}" diff --git a/tests/services/corpus/test_corpus_validation.py b/tests/services/corpus/test_corpus_validation.py new file mode 100644 index 0000000..c97d99e --- /dev/null +++ b/tests/services/corpus/test_corpus_validation.py @@ -0,0 +1,25 @@ +""" +Corpus Validation Tests + +Tests for corpus creation input validation. +""" + +import pytest + + +@pytest.mark.regression +class TestCorpusValidation: + """Corpus input validation.""" + + def test_invalid_corpus_key_characters(self, client): + """Test that creating a corpus with invalid key characters returns 400.""" + resp = client.create_corpus(name="Invalid Key Test", key="invalid!@#$%^&*()") + assert not resp.success, "Creating corpus with invalid key chars should fail" + assert resp.status_code == 400, f"Expected 400 for invalid key chars, got {resp.status_code}" + + def test_corpus_key_length_limit(self, client): + """Test that creating a corpus with an excessively long key returns 400.""" + long_key = "a" * 300 + resp = client.create_corpus(name="Long Key Test", key=long_key) + assert not resp.success, "Creating corpus with 300+ char key should fail" + assert resp.status_code == 400, f"Expected 400 for key length violation, got {resp.status_code}" diff --git a/tests/services/corpus/test_filter_attributes.py b/tests/services/corpus/test_filter_attributes.py new file mode 100644 index 0000000..1994a30 --- /dev/null +++ b/tests/services/corpus/test_filter_attributes.py @@ -0,0 +1,54 @@ +""" +Corpus Filter Attribute Tests + +Core-level tests for creating corpora with custom filter attributes +(metadata configuration). +""" + +import pytest + + +@pytest.mark.core +class TestFilterAttributes: + """Core checks for corpus filter attribute configuration.""" + + def test_create_corpus_with_metadata(self, client, unique_id): + """Test creating a corpus with custom filter attributes.""" + import uuid + + corpus_key = f"meta_test_{uuid.uuid4().hex}" + response = client.create_corpus( + name=f"Metadata Corpus {unique_id}", + key=corpus_key, + description="Corpus with filter attributes", + filter_attributes=[ + { + "name": "category", + "level": "document", + "type": "text", + }, + { + "name": "priority", + "level": "document", + "type": "integer", + }, + ], + ) + + assert response.success, f"Corpus creation with metadata failed: {response.status_code} - {response.data}" + + # Verify filter attributes were persisted + actual_key = response.data.get("key") + get_resp = client.get_corpus(actual_key) + assert get_resp.success, f"GET corpus failed: {get_resp.status_code}" + attrs = get_resp.data.get("filter_attributes", []) + attr_names = [a.get("name") for a in attrs] + assert "category" in attr_names, f"Expected 'category' in filter attributes, got: {attr_names}" + assert "priority" in attr_names, f"Expected 'priority' in filter attributes, got: {attr_names}" + + # Cleanup using the actual key + if actual_key: + try: + client.delete_corpus(actual_key) + except Exception: + pass diff --git a/tests/services/corpus/test_filter_attributes_types.py b/tests/services/corpus/test_filter_attributes_types.py new file mode 100644 index 0000000..8f7974c --- /dev/null +++ b/tests/services/corpus/test_filter_attributes_types.py @@ -0,0 +1,117 @@ +""" +Filter Attribute Types Tests + +Test multiple filter attribute types (text, integer, boolean) working together. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.regression +class TestFilterAttributeTypes: + """Multiple filter types on a single corpus.""" + + def test_text_integer_boolean_filters(self, client, unique_id): + """Create corpus with 3 filter types, query with each, verify correct results.""" + corpus_key = f"filter_types_{unique_id}" + create_resp = client.create_corpus( + name=f"Filter Types {unique_id}", + key=corpus_key, + filter_attributes=[ + {"name": "category", "level": "part", "type": "text", "indexed": True}, + {"name": "priority", "level": "part", "type": "integer", "indexed": True}, + {"name": "is_public", "level": "part", "type": "boolean", "indexed": True}, + ], + ) + assert create_resp.success, f"Create corpus with filters failed: {create_resp.status_code} - {create_resp.data}" + + try: + wait_for( + lambda: client.get_corpus(corpus_key).success, + timeout=10, + interval=1, + description="corpus available", + ) + + doc1_id = f"tech_doc_{unique_id}" + client.index_document( + corpus_key, + doc1_id, + "Advanced quantum computing research enables faster drug discovery.", + metadata={"category": "tech", "priority": 1, "is_public": True}, + ) + + doc2_id = f"science_doc_{unique_id}" + client.index_document( + corpus_key, + doc2_id, + "Confidential climate modeling data shows accelerating ice melt patterns.", + metadata={"category": "science", "priority": 5, "is_public": False}, + ) + + wait_for( + lambda: (client.get_document(corpus_key, doc1_id).success and client.get_document(corpus_key, doc2_id).success), + timeout=20, + interval=2, + description="both documents indexed", + ) + + text_query = client.post( + "/v2/query", + data={ + "query": "research and data", + "search": { + "corpora": [{"corpus_key": corpus_key, "metadata_filter": "part.category = 'tech'"}], + "limit": 10, + }, + }, + ) + assert text_query.success, f"Text filter query failed: {text_query.status_code}" + text_results = text_query.data.get("search_results", []) + assert len(text_results) > 0, "Text filter should return results" + assert all( + "quantum" in r.get("text", "").lower() for r in text_results + ), f"Text filter for 'tech' should only return tech doc: {[r.get('text', '')[:50] for r in text_results]}" + + int_query = client.post( + "/v2/query", + data={ + "query": "research and data", + "search": { + "corpora": [{"corpus_key": corpus_key, "metadata_filter": "part.priority >= 3"}], + "limit": 10, + }, + }, + ) + assert int_query.success, f"Integer filter query failed: {int_query.status_code}" + int_results = int_query.data.get("search_results", []) + assert len(int_results) > 0, "Integer filter should return results" + assert all( + "climate" in r.get("text", "").lower() for r in int_results + ), f"Integer filter >= 3 should only return science doc: {[r.get('text', '')[:50] for r in int_results]}" + + bool_query = client.post( + "/v2/query", + data={ + "query": "research and data", + "search": { + "corpora": [{"corpus_key": corpus_key, "metadata_filter": "part.is_public = true"}], + "limit": 10, + }, + }, + ) + assert bool_query.success, f"Boolean filter query failed: {bool_query.status_code}" + bool_results = bool_query.data.get("search_results", []) + assert len(bool_results) > 0, "Boolean filter should return results" + assert all( + "quantum" in r.get("text", "").lower() for r in bool_results + ), f"Boolean filter is_public=true should only return tech doc: {[r.get('text', '')[:50] for r in bool_results]}" + finally: + try: + client.delete_corpus(corpus_key) + except Exception: + pass diff --git a/tests/services/corpus/test_pagination.py b/tests/services/corpus/test_pagination.py new file mode 100644 index 0000000..4c34891 --- /dev/null +++ b/tests/services/corpus/test_pagination.py @@ -0,0 +1,36 @@ +""" +Corpus Pagination Tests + +Core-level tests for listing corpora and pagination support. +""" + +import pytest + + +@pytest.mark.core +class TestCorpusPagination: + """Core checks for corpus listing and pagination.""" + + def test_list_corpora(self, client): + """Test listing all corpora.""" + response = client.list_corpora(limit=100) + + assert response.success, f"List corpora failed: {response.status_code} - {response.data}" + + # Response should contain corpora list + data = response.data + assert "corpora" in data or isinstance(data, list), "Expected corpora in response" + + def test_list_corpora_pagination(self, client): + """Test corpus listing with pagination.""" + # First request with small limit + response1 = client.list_corpora(limit=2) + + assert response1.success, f"Paginated list failed: {response1.status_code}" + + # If there's a next page, test pagination + if response1.data.get("metadata", {}).get("page_key"): + page_key = response1.data["metadata"]["page_key"] + response2 = client.list_corpora(limit=2, page_key=page_key) + + assert response2.success, f"Second page request failed: {response2.status_code}" diff --git a/tests/services/indexing/test_custom_dimensions.py b/tests/services/indexing/test_custom_dimensions.py new file mode 100644 index 0000000..21cfd0f --- /dev/null +++ b/tests/services/indexing/test_custom_dimensions.py @@ -0,0 +1,103 @@ +""" +Custom Dimensions Tests + +Tests for indexing and querying documents with custom dimension weights. +Uses a dedicated corpus with custom dimensions configured. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.fixture +def custom_dims_corpus(client): + """Function-scoped corpus with custom dimensions configured.""" + corpus_key = f"dims_test_{uuid.uuid4().hex}" + response = client.create_corpus( + name=f"Custom Dims Test {uuid.uuid4().hex[:8]}", + key=corpus_key, + description="Corpus with custom dimensions for testing", + custom_dimensions=[ + {"name": "importance", "indexing_default": 0, "querying_default": 0}, + {"name": "recency", "indexing_default": 0, "querying_default": 0}, + ], + ) + if not response.success: + pytest.skip(f"Could not create custom dims corpus: {response.data}") + + actual_key = response.data.get("key", corpus_key) + wait_for( + lambda: client.get_corpus(actual_key).success, + timeout=10, + interval=1, + description="custom dims corpus to become queryable", + ) + yield actual_key + + try: + client.delete_corpus(actual_key) + except Exception: + pass + + +@pytest.mark.core +class TestCustomDimensions: + """Core tests for custom dimension indexing and querying.""" + + def test_custom_dimensions_boost(self, client, custom_dims_corpus, unique_id): + """Custom dimensions should boost relevant parts in query results.""" + doc_id = f"dims_doc_{unique_id}" + parts = [ + { + "text": "This is a high-importance document about quantum computing breakthroughs.", + "metadata": {"section": "important"}, + "custom_dimensions": {"importance": 0.95, "recency": 0.85}, + }, + { + "text": "This is a low-importance note about office supplies.", + "metadata": {"section": "filler"}, + "custom_dimensions": {"importance": 0.1, "recency": 0.2}, + }, + ] + + index_response = client.index_document_parts( + corpus_key=custom_dims_corpus, + document_id=doc_id, + parts=parts, + ) + assert index_response.success, f"Index failed: {index_response.status_code} - {index_response.data}" + + # Wait for indexing + wait_for( + lambda: client.list_documents(custom_dims_corpus, limit=1).data.get("documents", []), + timeout=15, + interval=1, + description="custom dims doc to be indexed", + ) + + # Query with dimension weights that favor importance + query_response = client.query_corpus( + corpus_key=custom_dims_corpus, + query_text="What are the latest breakthroughs?", + limit=5, + custom_dimensions={"importance": 0.8, "recency": 0.5}, + ) + assert query_response.success, f"Query failed: {query_response.status_code} - {query_response.data}" + + results = query_response.data.get("search_results", []) + assert len(results) > 0, "Expected at least one result" + + # First result should be the high-importance part + first_result_text = results[0].get("text", "") + assert ( + "quantum computing" in first_result_text.lower() or "high-importance" in first_result_text.lower() + ), f"Expected high-importance part first, got: {first_result_text[:100]}" + + # Cleanup + try: + client.delete_document(custom_dims_corpus, doc_id) + except Exception: + pass diff --git a/tests/services/indexing/test_document_crud.py b/tests/services/indexing/test_document_crud.py new file mode 100644 index 0000000..7f3c801 --- /dev/null +++ b/tests/services/indexing/test_document_crud.py @@ -0,0 +1,97 @@ +""" +Single Document Indexing Tests + +Tests for indexing, retrieving, deleting, and updating individual documents. +""" + +import pytest + + +@pytest.mark.sanity +class TestDocumentIndex: + """Document indexing checks.""" + + def test_index_single_document(self, client, shared_corpus, unique_id, sample_document): + """Test indexing a single document.""" + doc_id = f"single_doc_{unique_id}" + + response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text=sample_document["text"], + metadata=sample_document["metadata"], + ) + + assert response.success, f"Document indexing failed: {response.status_code} - {response.data}" + assert response.data.get("id") is not None, f"Index response should contain document id, got: {response.data}" + + +@pytest.mark.core +class TestDocumentCrud: + """Document get, delete, and update operations.""" + + def test_get_document(self, client, shared_corpus, unique_id): + """Test retrieving an indexed document.""" + doc_id = f"get_doc_{unique_id}" + + # First index a document + index_response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text="Document for retrieval test.", + ) + assert index_response.success, "Setup: Document indexing failed" + + # Retrieve the document + response = client.get_document(shared_corpus, doc_id) + + assert response.success, f"Get document failed: {response.status_code} - {response.data}" + assert response.data.get("id") == doc_id, f"Document ID mismatch: expected {doc_id}" + + def test_delete_document(self, client, shared_corpus, unique_id): + """Test deleting a document.""" + doc_id = f"delete_doc_{unique_id}" + + # Index document + index_response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text="Document to be deleted.", + ) + assert index_response.success, "Setup: Document indexing failed" + + # Delete document + delete_response = client.delete_document(shared_corpus, doc_id) + + assert delete_response.success, f"Document deletion failed: {delete_response.status_code} - {delete_response.data}" + + # Verify deletion - should get 404 + get_response = client.get_document(shared_corpus, doc_id) + assert get_response.status_code == 404, f"Deleted document should return 404, got {get_response.status_code}" + + def test_update_document_by_delete_and_reindex(self, client, shared_corpus, unique_id): + """Test updating a document by deleting and re-indexing.""" + doc_id = f"update_doc_{unique_id}" + + # Index original document + original_response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text="Original content.", + metadata={"version": 1}, + ) + assert original_response.success, "Setup: Original document indexing failed" + + # Delete the original document + delete_response = client.delete_document(shared_corpus, doc_id) + assert delete_response.success, f"Delete failed: {delete_response.data}" + + # Re-index with updated content + update_response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text="Updated content with new information.", + metadata={"version": 2}, + ) + + assert update_response.success, f"Document re-index failed: {update_response.status_code} - {update_response.data}" diff --git a/tests/services/indexing/test_document_lifecycle.py b/tests/services/indexing/test_document_lifecycle.py new file mode 100644 index 0000000..d248079 --- /dev/null +++ b/tests/services/indexing/test_document_lifecycle.py @@ -0,0 +1,61 @@ +""" +Document Lifecycle Tests + +Full lifecycle: index → query finds it → delete → query no longer finds it. +""" + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestDocumentLifecycle: + """Document lifecycle with query verification.""" + + def test_index_query_delete_query_cycle(self, client, test_corpus, unique_id): + """Index a doc, verify query finds it, delete it, verify query no longer finds it.""" + doc_id = f"lifecycle_{unique_id}" + doc_text = "The Krakatoa volcano erupted in 1883 causing massive tsunamis across the Indian Ocean." + + index_resp = client.index_document(test_corpus, doc_id, doc_text) + assert index_resp.success, f"Index failed: {index_resp.status_code} - {index_resp.data}" + + wait_for( + lambda: client.get_document(test_corpus, doc_id).success, + timeout=15, + interval=1, + description="document to be indexed", + ) + + query_resp = client.query(test_corpus, "Krakatoa volcano eruption", limit=10) + assert query_resp.success, f"Query failed: {query_resp.status_code}" + results = query_resp.data.get("search_results", []) + found = any("krakatoa" in r.get("text", "").lower() for r in results) + assert found, f"Expected to find Krakatoa doc in results, got {len(results)} results" + + delete_resp = client.delete_document(test_corpus, doc_id) + assert delete_resp.success, f"Delete failed: {delete_resp.status_code}" + + wait_for( + lambda: client.get_document(test_corpus, doc_id).status_code == 404, + timeout=15, + interval=1, + description="document to be deleted", + ) + + def _krakatoa_gone(): + qr = client.query(test_corpus, "Krakatoa volcano eruption", limit=10) + if not qr.success: + return False + hits = qr.data.get("search_results", []) + return not any("krakatoa" in r.get("text", "").lower() for r in hits) + + wait_for(_krakatoa_gone, timeout=30, interval=3, description="Krakatoa to disappear from search") + + final_query = client.query(test_corpus, "Krakatoa volcano eruption", limit=10) + assert final_query.success + final_results = final_query.data.get("search_results", []) + assert not any( + "krakatoa" in r.get("text", "").lower() for r in final_results + ), f"Deleted doc should not appear in results, but found Krakatoa in {len(final_results)} results" diff --git a/tests/services/indexing/test_document_metadata_ops.py b/tests/services/indexing/test_document_metadata_ops.py new file mode 100644 index 0000000..f922548 --- /dev/null +++ b/tests/services/indexing/test_document_metadata_ops.py @@ -0,0 +1,115 @@ +""" +Document Metadata Operations Tests + +Tests for document metadata PATCH (merge) and PUT (replace) operations, +as well as multipart document indexing. +""" + +import pytest + + +@pytest.mark.core +class TestDocumentMetadataOps: + """Core tests for document metadata update operations.""" + + def test_index_multipart_document(self, client, shared_corpus, unique_id): + """Index a document with multiple parts and metadata.""" + doc_id = f"multipart_{unique_id}" + parts = [ + { + "text": "This is the first part about artificial intelligence.", + "metadata": {"section": "intro", "importance": "high"}, + }, + { + "text": "This is the second part about machine learning applications.", + "metadata": {"section": "details", "importance": "medium"}, + }, + ] + response = client.index_document_parts( + corpus_key=shared_corpus, + document_id=doc_id, + parts=parts, + metadata={"title": "AI Overview", "lang": "en"}, + ) + assert response.success, f"Multipart index failed: {response.status_code} - {response.data}" + + # Verify document was indexed with correct metadata + get_resp = client.get_document(shared_corpus, doc_id) + assert get_resp.success, f"Get indexed doc failed: {get_resp.data}" + doc_metadata = get_resp.data.get("metadata", {}) + assert doc_metadata.get("title") == "AI Overview", f"Expected title 'AI Overview', got: {doc_metadata}" + + # Cleanup + try: + client.delete_document(shared_corpus, doc_id) + except Exception: + pass + + def test_patch_document_metadata(self, client, shared_corpus, unique_id): + """PATCH document metadata -- should merge with existing.""" + doc_id = f"patch_meta_{unique_id}" + # Index with initial metadata + client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text="Document for metadata patching.", + metadata={"title": "Original", "lang": "en"}, + ) + + # PATCH with new key + response = client.update_document_metadata( + corpus_key=shared_corpus, + document_id=doc_id, + metadata={"new_key": "new_value"}, + ) + assert response.success, f"PATCH metadata failed: {response.status_code} - {response.data}" + + # Verify PATCH response contains the new key + patched = response.data.get("metadata", response.data) + assert "new_key" in str(patched), f"New key not in PATCH response: {patched}" + + # Verify via GET that new key is persisted + get_resp = client.get_document(shared_corpus, doc_id) + assert get_resp.success, f"Get doc after PATCH failed: {get_resp.data}" + doc_metadata = get_resp.data.get("metadata", {}) + assert doc_metadata.get("new_key") == "new_value", f"New key not persisted after PATCH: {doc_metadata}" + + # Cleanup + try: + client.delete_document(shared_corpus, doc_id) + except Exception: + pass + + def test_replace_document_metadata(self, client, shared_corpus, unique_id): + """PUT document metadata -- should replace entirely.""" + doc_id = f"replace_meta_{unique_id}" + # Index with initial metadata + client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text="Document for metadata replacement.", + metadata={"title": "Original", "lang": "en", "extra": "will_be_removed"}, + ) + + # PUT replaces all metadata + new_metadata = {"title": "Replaced", "lang": "fr"} + response = client.replace_document_metadata( + corpus_key=shared_corpus, + document_id=doc_id, + metadata=new_metadata, + ) + assert response.success, f"PUT metadata failed: {response.status_code} - {response.data}" + + # Verify: PUT replaces entirely — old keys removed, new keys present + get_response = client.get_document(shared_corpus, doc_id) + assert get_response.success, f"Get doc after PUT failed: {get_response.data}" + doc_metadata = get_response.data.get("metadata", {}) + assert doc_metadata.get("title") == "Replaced", f"Title not replaced: {doc_metadata}" + assert doc_metadata.get("lang") == "fr", f"Lang not updated: {doc_metadata}" + assert "extra" not in doc_metadata, f"Old 'extra' key should be removed after PUT: {doc_metadata}" + + # Cleanup + try: + client.delete_document(shared_corpus, doc_id) + except Exception: + pass diff --git a/tests/services/indexing/test_document_operations.py b/tests/services/indexing/test_document_operations.py new file mode 100644 index 0000000..2d4147d --- /dev/null +++ b/tests/services/indexing/test_document_operations.py @@ -0,0 +1,86 @@ +""" +Document Operations Tests + +Tests for document parts listing, bulk delete, and special character handling. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestDocumentOperations: + """Document operations tests.""" + + def test_list_document_parts(self, client, test_corpus, unique_id): + """Test that a document with multiple parts shows proper structure.""" + doc_id = f"parts_doc_{unique_id}" + parts = [ + {"text": "First part about artificial intelligence.", "metadata": {"section": "intro"}}, + {"text": "Second part about machine learning.", "metadata": {"section": "body"}}, + ] + index_resp = client.index_document_parts(test_corpus, doc_id, parts) + assert index_resp.success, f"Index failed: {index_resp.status_code}" + + wait_for( + lambda: client.get_document(test_corpus, doc_id).success, + timeout=15, + interval=1, + description="document to be indexed", + ) + + get_resp = client.get_document(test_corpus, doc_id) + assert get_resp.success, f"GET document failed: {get_resp.status_code} - {get_resp.data}" + assert get_resp.data.get("id") == doc_id, f"Document id mismatch: expected {doc_id}, got {get_resp.data.get('id')}" + + def test_bulk_delete_documents(self, client, test_corpus, unique_id): + """Test bulk deleting documents by ID.""" + doc_ids = [f"bulk_{unique_id}_{i}" for i in range(3)] + for doc_id in doc_ids: + resp = client.index_document(test_corpus, doc_id, f"Content for {doc_id}") + assert resp.success, f"Index {doc_id} failed: {resp.status_code}" + + wait_for( + lambda: all(client.get_document(test_corpus, d).success for d in doc_ids), + timeout=20, + interval=2, + description="all documents to be indexed", + ) + + delete_resp = client.bulk_delete_documents( + test_corpus, + document_ids=doc_ids, + async_mode=False, + ) + assert delete_resp.success or delete_resp.status_code == 202, f"Bulk delete failed: {delete_resp.status_code} - {delete_resp.data}" + + wait_for( + lambda: all(client.get_document(test_corpus, d).status_code == 404 for d in doc_ids), + timeout=30, + interval=2, + description="all documents to be deleted", + ) + + +@pytest.mark.regression +class TestDocumentEdgeCases: + """Document edge case tests.""" + + def test_delete_document_with_special_chars(self, client, test_corpus, unique_id): + """Test deleting a document with special characters in ID.""" + doc_id = f"doc-special-chars_{unique_id}" + resp = client.index_document(test_corpus, doc_id, "Content with special ID") + assert resp.success, f"Index failed: {resp.status_code}" + + wait_for( + lambda: client.get_document(test_corpus, doc_id).success, + timeout=15, + interval=1, + description="document to be indexed", + ) + + delete_resp = client.delete_document(test_corpus, doc_id) + assert delete_resp.success, f"Delete failed: {delete_resp.status_code}" diff --git a/tests/services/indexing/test_file_upload.py b/tests/services/indexing/test_file_upload.py new file mode 100644 index 0000000..cab72bd --- /dev/null +++ b/tests/services/indexing/test_file_upload.py @@ -0,0 +1,132 @@ +""" +File Upload Tests + +Tests for file upload operations including simple text files +and PDF uploads with table extraction configuration. +""" + +import os +import tempfile +import uuid +from pathlib import Path + +import pytest + +from utils.waiters import wait_for + +TESTDATA_DIR = Path(__file__).parent.parent.parent.parent / "fixtures" / "testdata" + + +@pytest.mark.core +class TestFileUpload: + """Core tests for file upload operations.""" + + def test_upload_simple_file(self, client, shared_corpus, unique_id): + """Upload a simple text file and verify it appears.""" + # Create a temp text file + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: + f.write("This is a test document about artificial intelligence and semantic search.") + temp_path = f.name + + try: + response = client.upload_file( + corpus_key=shared_corpus, + file_path=temp_path, + metadata={"source": "test_upload", "doc_id": unique_id}, + ) + assert response.success, f"File upload failed: {response.status_code} - {response.data}" + + # Verify document appears in corpus + doc_id = response.data.get("id") + assert doc_id, f"No document ID in upload response: {response.data}" + + wait_for( + lambda: client.get_document(shared_corpus, doc_id).success, + timeout=15, + interval=1, + description="uploaded file to appear as document", + ) + finally: + os.unlink(temp_path) + + def test_upload_pdf_with_table_extraction(self, client, unique_id): + """Upload PDF with table extraction config and validate extracted tables.""" + pdf_path = TESTDATA_DIR / "table_simple.pdf" + expected_path = TESTDATA_DIR / "table_simple.json" + + if not pdf_path.exists(): + pytest.skip(f"Test PDF not found at {pdf_path}") + if not expected_path.exists(): + pytest.skip(f"Expected schema not found at {expected_path}") + + # Create dedicated corpus for this test + corpus_key = f"upload_test_{uuid.uuid4().hex}" + corpus_response = client.create_corpus( + name=f"Upload Test {uuid.uuid4().hex[:8]}", + key=corpus_key, + description="Corpus for file upload testing", + ) + if not corpus_response.success: + pytest.skip(f"Could not create corpus: {corpus_response.data}") + + actual_key = corpus_response.data.get("key", corpus_key) + + try: + wait_for( + lambda: client.get_corpus(actual_key).success, + timeout=10, + interval=1, + description="upload test corpus to become queryable", + ) + + # Upload with table extraction + upload_response = client.upload_file( + corpus_key=actual_key, + file_path=str(pdf_path), + metadata={"source": "pdf_table_test"}, + table_extraction_config={ + "extract_tables": True, + "extractor": {"name": "gmft"}, + }, + ) + if not upload_response.success and "Tabular data extraction" in str(upload_response.data): + pytest.skip("Table extraction not available in this environment") + assert upload_response.success, f"PDF upload failed: {upload_response.status_code} - {upload_response.data}" + + # Get the document ID from upload response + doc_id = upload_response.data.get("id") + if doc_id: + # Wait for document to be processed + wait_for( + lambda: client.get_document(actual_key, doc_id).success, + timeout=60, + interval=2, + description="uploaded PDF to be processed", + ) + + # Load expected table structure + with open(expected_path) as f: + import json + + expected = json.load(f) + + # Retrieve and validate + doc_response = client.get_document(actual_key, doc_id) + assert doc_response.success, f"Get doc failed: {doc_response.status_code}" + + # Verify tables were extracted + tables = doc_response.data.get("tables", []) + if tables: + # Validate table structure matches expected + assert len(tables) > 0, "Expected at least one extracted table" + first_table = tables[0] + assert "data" in first_table, f"Table missing 'data' field: {first_table.keys()}" + table_data = first_table["data"] + assert "headers" in table_data, f"Table data missing 'headers'" + assert "rows" in table_data, f"Table data missing 'rows'" + + finally: + try: + client.delete_corpus(actual_key) + except Exception: + pass diff --git a/tests/services/indexing/test_large_documents.py b/tests/services/indexing/test_large_documents.py new file mode 100644 index 0000000..e83cdf9 --- /dev/null +++ b/tests/services/indexing/test_large_documents.py @@ -0,0 +1,113 @@ +""" +Large Document Indexing Tests + +Regression-level tests for indexing large documents, multiple documents, +listing documents, and edge cases like empty documents. +""" + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.regression +class TestLargeDocuments: + """Regression checks for large and bulk document indexing.""" + + def test_index_large_document(self, client, shared_corpus, unique_id): + """Test indexing a larger document with multiple paragraphs.""" + doc_id = f"large_doc_{unique_id}" + + # Generate larger text content + large_text = " ".join( + [ + f"Paragraph {i}: This is test content for paragraph number {i}. " + "It contains information about various topics including technology, " + "science, and general knowledge. Vector databases enable semantic " + "search capabilities that traditional keyword search cannot match." + for i in range(20) + ] + ) + + response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text=large_text, + ) + + assert response.success, f"Large document indexing failed: {response.status_code} - {response.data}" + assert response.data.get("id") is not None, f"Index response should contain document id, got: {response.data}" + + def test_index_multiple_documents(self, client, shared_corpus, unique_id): + """Test indexing multiple documents sequentially.""" + doc_ids = [f"multi_doc_{unique_id}_{i}" for i in range(5)] + + for i, doc_id in enumerate(doc_ids): + response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text=f"Test document number {i} with unique content.", + metadata={"index": i}, + ) + + assert response.success, f"Document {i} indexing failed: {response.status_code}" + + def _docs_indexed(): + list_resp = client.list_documents(shared_corpus, limit=100) + if not list_resp.success: + return False + docs = list_resp.data.get("documents", []) + return len(docs) >= len(doc_ids) + + wait_for(_docs_indexed, timeout=30, interval=2, description="all documents to be indexed") + list_resp = client.list_documents(shared_corpus, limit=100) + listed_ids = [d.get("id") for d in list_resp.data.get("documents", [])] + for did in doc_ids: + assert did in listed_ids, f"Document {did} not found in listing" + + def test_list_documents(self, client, shared_corpus, unique_id): + """Test listing documents in a corpus.""" + # Index a few documents first + doc_ids = [f"list_doc_{unique_id}_{i}" for i in range(3)] + for doc_id in doc_ids: + response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text=f"Document {doc_id} for listing test.", + ) + assert response.success, f"Failed to index {doc_id}: {response.data}" + + # Wait for indexing to complete + wait_for( + lambda: any(d.get("id") in doc_ids for d in client.list_documents(shared_corpus, limit=100).data.get("documents", []) if isinstance(d, dict)), + timeout=15, + interval=1, + description="indexed documents to appear in listing", + ) + + # List documents + response = client.list_documents(shared_corpus, limit=100) + + assert response.success, f"List documents failed: {response.status_code} - {response.data}" + + # Verify documents exist in list + documents = response.data.get("documents", response.data) + doc_ids_in_response = [d.get("id") for d in documents if isinstance(d, dict)] + + # Check that at least some of our documents appear (indexing may be async) + found_count = sum(1 for doc_id in doc_ids if doc_id in doc_ids_in_response) + assert found_count > 0, f"None of the indexed documents found in list. Expected: {doc_ids}, Got: {doc_ids_in_response}" + + def test_index_empty_document_fails(self, client, shared_corpus, unique_id): + """Test that indexing an empty document is handled.""" + doc_id = f"empty_doc_{unique_id}" + + response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text="", # Empty text + ) + + # Empty documents should either fail or be handled gracefully + # Behavior may vary - just ensure no server error + assert response.status_code != 500, "Server error on empty document" diff --git a/tests/services/indexing/test_metadata.py b/tests/services/indexing/test_metadata.py new file mode 100644 index 0000000..bec91cb --- /dev/null +++ b/tests/services/indexing/test_metadata.py @@ -0,0 +1,81 @@ +""" +Document Metadata Indexing Tests + +Core-level tests for indexing documents with custom metadata, +special characters, and verifying indexing response times. +""" + +import time + +import pytest + + +@pytest.mark.core +class TestDocumentMetadata: + """Core checks for document metadata indexing.""" + + def test_index_document_with_metadata(self, client, shared_corpus, unique_id): + """Test indexing a document with custom metadata.""" + doc_id = f"meta_doc_{unique_id}" + + response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text="Document with rich metadata for testing.", + metadata={ + "author": "Test Suite", + "category": "technology", + "priority": 1, + "tags": ["test", "api", "indexing"], + "timestamp": time.time(), + }, + ) + + assert response.success, f"Document with metadata indexing failed: {response.status_code} - {response.data}" + + from utils.waiters import wait_for + + wait_for( + lambda: client.get_document(shared_corpus, doc_id).success, + timeout=15, + interval=1, + description="document to be available", + ) + get_resp = client.get_document(shared_corpus, doc_id) + assert get_resp.success, f"GET document failed: {get_resp.status_code}" + assert get_resp.data.get("id") == doc_id, f"Document id mismatch: expected {doc_id}, got {get_resp.data.get('id')}" + + def test_index_document_special_characters(self, client, shared_corpus, unique_id): + """Test indexing document with special characters.""" + doc_id = f"special_doc_{unique_id}" + + special_text = ( + "Testing special characters: " + "Unicode: \u00e9\u00e8\u00ea \u00f1 \u00fc " + "Symbols: @#$%^&*() " + "Quotes: 'single' \"double\" " + "Newlines:\nLine 1\nLine 2\n" + "Tabs:\tColumn1\tColumn2" + ) + + response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text=special_text, + ) + + assert response.success, f"Special characters document indexing failed: {response.status_code} - {response.data}" + assert response.data.get("id") is not None, f"Index response should contain document id, got: {response.data}" + + def test_indexing_response_time(self, client, shared_corpus, unique_id): + """Test that indexing completes in acceptable time.""" + doc_id = f"perf_doc_{unique_id}" + + response = client.index_document( + corpus_key=shared_corpus, + document_id=doc_id, + text="Performance test document for measuring indexing speed.", + ) + + assert response.success, f"Indexing failed: {response.status_code}" + assert response.elapsed_ms < 10000, f"Indexing took too long: {response.elapsed_ms:.1f}ms" diff --git a/tests/services/indexing/test_upload_edge_cases.py b/tests/services/indexing/test_upload_edge_cases.py new file mode 100644 index 0000000..16daccc --- /dev/null +++ b/tests/services/indexing/test_upload_edge_cases.py @@ -0,0 +1,83 @@ +""" +Upload Edge Case Tests + +Tests for file upload error handling and metadata attachment including +uploads with metadata, uploads to non-existent corpora, and uploads +without a proper filename. +""" + +import os +import tempfile + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestUploadWithMetadata: + """Core tests for file upload with metadata.""" + + def test_upload_with_metadata_fields(self, client, test_corpus): + """Upload a file with metadata, wait for indexing, GET doc, and verify metadata.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: + f.write("Semantic search uses vector embeddings to find relevant documents.") + temp_path = f.name + + try: + metadata = {"author": "test_suite", "category": "technology", "version": "1"} + + response = client.upload_file( + corpus_key=test_corpus, + file_path=temp_path, + metadata=metadata, + ) + assert response.success, f"File upload failed: {response.status_code} - {response.data}" + + doc_id = response.data.get("id") + assert doc_id, f"No document ID in upload response: {response.data}" + + wait_for( + lambda: client.get_document(test_corpus, doc_id).success, + timeout=15, + interval=1, + description="uploaded file to appear as document", + ) + + doc_response = client.get_document(test_corpus, doc_id) + assert doc_response.success, f"Get document failed: {doc_response.status_code} - {doc_response.data}" + + doc_metadata = doc_response.data.get("metadata", {}) + assert doc_metadata.get("author") == "test_suite", f"Expected author='test_suite' in metadata, got: {doc_metadata}" + assert doc_metadata.get("category") == "technology", f"Expected category='technology' in metadata, got: {doc_metadata}" + finally: + os.unlink(temp_path) + + +@pytest.mark.regression +class TestUploadErrors: + """Regression tests for file upload error cases.""" + + def test_upload_to_nonexistent_corpus_returns_404(self, client): + """Upload a file to a non-existent corpus key and expect 404.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: + f.write("This file should not be indexed anywhere.") + temp_path = f.name + + try: + response = client.upload_file( + corpus_key="nonexistent_corpus_xyz123", + file_path=temp_path, + ) + assert response.status_code == 404, f"Expected 404 for non-existent corpus, got {response.status_code} - {response.data}" + finally: + os.unlink(temp_path) + + def test_upload_without_filename_returns_400(self, client, test_corpus): + """Upload without a proper file to verify the API rejects it.""" + response = client.post( + f"/v2/corpora/{test_corpus}/upload_file", + data={}, + ) + + assert response.status_code in (400, 415, 422), f"Expected 400/415/422 for upload without file, got {response.status_code} - {response.data}" diff --git a/tests/services/llm/test_llm_crud.py b/tests/services/llm/test_llm_crud.py new file mode 100644 index 0000000..dbc16f6 --- /dev/null +++ b/tests/services/llm/test_llm_crud.py @@ -0,0 +1,43 @@ +""" +LLM CRUD Tests + +Core and regression tests for LLM configuration management. +""" + +import os + +import pytest + + +@pytest.mark.core +class TestLlmList: + def test_list_llms(self, client): + response = client.list_llms(limit=10) + assert response.success, f"List LLMs failed: {response.status_code} - {response.data}" + assert "llms" in response.data, f"Expected 'llms' key in response: {response.data.keys()}" + + +@pytest.mark.regression +class TestLlmCrud: + def test_create_and_delete_llm(self, client, unique_id): + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + pytest.skip("OPENAI_API_KEY not set") + + response = client.create_llm( + name=f"test_llm_{unique_id}", + model="gpt-4o-mini", + uri="https://api.openai.com/v1/chat/completions", + bearer_token=api_key, + ) + if not response.success and ("quota" in str(response.data).lower() or "verify" in str(response.data).lower()): + pytest.skip(f"LLM provider issue (quota/verification): {response.data}") + assert response.success, f"Create LLM failed: {response.status_code} - {response.data}" + + llm_id = response.data.get("id") + assert llm_id, f"No LLM ID in create response: {response.data}" + assert response.data.get("name") == f"test_llm_{unique_id}", f"LLM name mismatch: {response.data}" + + if llm_id: + del_resp = client.delete_llm(llm_id) + assert del_resp.success, f"Delete LLM failed: {del_resp.data}" diff --git a/tests/services/pipelines/test_pipeline_crud.py b/tests/services/pipelines/test_pipeline_crud.py new file mode 100644 index 0000000..67ebce7 --- /dev/null +++ b/tests/services/pipelines/test_pipeline_crud.py @@ -0,0 +1,22 @@ +""" +Pipeline CRUD Tests + +Core tests for pipeline listing with availability gating. +""" + +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def check_pipelines_available(client): + response = client.list_pipelines(limit=1) + if not response.success: + pytest.skip("Pipelines API not available in this environment") + + +@pytest.mark.core +class TestPipelineCrud: + def test_list_pipelines(self, client): + response = client.list_pipelines(limit=10) + assert response.success, f"List pipelines failed: {response.status_code} - {response.data}" + assert "pipelines" in response.data, f"Expected 'pipelines' key: {response.data.keys()}" diff --git a/tests/services/query/test_cross_corpus_query.py b/tests/services/query/test_cross_corpus_query.py new file mode 100644 index 0000000..84d6e55 --- /dev/null +++ b/tests/services/query/test_cross_corpus_query.py @@ -0,0 +1,80 @@ +""" +Cross-Corpus Query Tests + +Tests for querying across multiple corpora simultaneously. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestCrossCorpusQuery: + """Cross-corpus query operations.""" + + def test_query_across_multiple_corpora(self, client, unique_id): + """Test querying across two corpora returns results from both.""" + corpus1_key = f"test_cross1_{unique_id}" + corpus2_key = f"test_cross2_{unique_id}" + + c1 = client.create_corpus(name=f"Cross1 {unique_id}", key=corpus1_key) + c2 = client.create_corpus(name=f"Cross2 {unique_id}", key=corpus2_key) + + if not c1.success or not c2.success: + for k in [corpus1_key, corpus2_key]: + try: + client.delete_corpus(k) + except Exception: + pass + pytest.skip("Could not create corpora for cross-corpus test") + + try: + for key in [corpus1_key, corpus2_key]: + wait_for( + lambda k=key: client.get_corpus(k).success, + timeout=10, + interval=1, + description=f"corpus {key} available", + ) + + client.index_document(corpus1_key, f"doc1_{unique_id}", "Medical research on heart disease prevention") + client.index_document(corpus2_key, f"doc2_{unique_id}", "Legal precedents in contract law disputes") + + for key, doc_id in [(corpus1_key, f"doc1_{unique_id}"), (corpus2_key, f"doc2_{unique_id}")]: + wait_for( + lambda k=key, d=doc_id: client.get_document(k, d).success, + timeout=15, + interval=1, + description=f"document in {key} indexed", + ) + + query_resp = client.post( + "/v2/query", + data={ + "query": "important topics", + "search": { + "corpora": [ + {"corpus_key": corpus1_key}, + {"corpus_key": corpus2_key}, + ], + "limit": 10, + }, + }, + ) + assert query_resp.success, f"Cross-corpus query failed: {query_resp.status_code}" + results = query_resp.data.get("search_results", []) + assert len(results) > 0, "Expected results from cross-corpus query" + + result_corpus_keys = {r.get("corpus_key") for r in results} + assert ( + corpus1_key in result_corpus_keys or corpus2_key in result_corpus_keys + ), f"Expected results from at least one of the test corpora, got: {result_corpus_keys}" + finally: + for key in [corpus1_key, corpus2_key]: + try: + client.delete_corpus(key) + except Exception: + pass diff --git a/tests/services/query/test_factual_consistency.py b/tests/services/query/test_factual_consistency.py new file mode 100644 index 0000000..a0fd77a --- /dev/null +++ b/tests/services/query/test_factual_consistency.py @@ -0,0 +1,47 @@ +""" +Factual Consistency Score Tests + +Tests for verifying factual consistency scoring in RAG responses. +FCS is enabled by default (OpenAPI spec: default=true) when generation is requested. +""" + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestFactualConsistency: + """Factual consistency score validation.""" + + def test_rag_returns_fcs_score(self, client, seeded_shared_corpus): + """Test that RAG query returns a valid factual consistency score.""" + wait_for( + lambda: len( + client.post( + "/v2/query", + data={ + "query": "technology", + "search": {"corpora": [{"corpus_key": seeded_shared_corpus}], "limit": 5}, + }, + ).data.get("search_results", []) + ) + > 0, + timeout=20, + interval=2, + description="seeded corpus to return search results", + ) + + resp = client.post( + "/v2/query", + data={ + "query": "artificial intelligence and machine learning", + "search": {"corpora": [{"corpus_key": seeded_shared_corpus}], "limit": 10}, + "generation": {}, + }, + ) + assert resp.success, f"RAG query failed: {resp.status_code} - {resp.data}" + + score = resp.data.get("factual_consistency_score") + assert score is not None, f"Expected factual_consistency_score in response, got keys: {list(resp.data.keys())}" + assert 0.0 <= score <= 1.0, f"FCS score out of range [0, 1]: {score}" diff --git a/tests/services/query/test_generation_preset_override.py b/tests/services/query/test_generation_preset_override.py new file mode 100644 index 0000000..0a6b8d3 --- /dev/null +++ b/tests/services/query/test_generation_preset_override.py @@ -0,0 +1,83 @@ +""" +Generation Preset Override Tests + +Verify querying with different generation presets produces valid responses. +""" + +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def check_multiple_presets_available(client): + """Skip if fewer than 2 enabled presets.""" + resp = client.list_generation_presets(limit=50) + if not resp.success: + pytest.skip("Generation presets API not available") + presets = resp.data.get("generation_presets", []) + enabled = [p for p in presets if p.get("enabled")] + if len(enabled) < 2: + pytest.skip(f"Need at least 2 enabled presets, found {len(enabled)}") + + +@pytest.mark.regression +class TestGenerationPresetOverride: + """Generation preset override mechanism.""" + + def test_query_with_different_presets(self, client, seeded_shared_corpus): + """Query with two different presets, verify both return summaries.""" + presets_resp = client.list_generation_presets(limit=50) + enabled = [p for p in presets_resp.data.get("generation_presets", []) if p.get("enabled")] + + preset_a = enabled[0]["name"] + preset_b = enabled[1]["name"] + + resp_a = client.post( + "/v2/query", + data={ + "query": "artificial intelligence", + "search": {"corpora": [{"corpus_key": seeded_shared_corpus}], "limit": 5}, + "generation": {"generation_preset_name": preset_a}, + }, + ) + assert resp_a.success, f"Query with preset {preset_a} failed: {resp_a.status_code}" + summary_a = resp_a.data.get("summary", "") + assert len(summary_a) > 20, f"Preset {preset_a} should produce substantive summary: {summary_a[:50]!r}" + + resp_b = client.post( + "/v2/query", + data={ + "query": "artificial intelligence", + "search": {"corpora": [{"corpus_key": seeded_shared_corpus}], "limit": 5}, + "generation": {"generation_preset_name": preset_b}, + }, + ) + assert resp_b.success, f"Query with preset {preset_b} failed: {resp_b.status_code}" + summary_b = resp_b.data.get("summary", "") + assert len(summary_b) > 20, f"Preset {preset_b} should produce substantive summary: {summary_b[:50]!r}" + + def test_default_vs_explicit_preset(self, client, seeded_shared_corpus): + """Query with default generation vs explicit preset, both should work.""" + default_resp = client.post( + "/v2/query", + data={ + "query": "machine learning", + "search": {"corpora": [{"corpus_key": seeded_shared_corpus}], "limit": 5}, + "generation": {}, + }, + ) + assert default_resp.success, f"Default generation failed: {default_resp.status_code}" + assert len(default_resp.data.get("summary", "")) > 0, "Default should produce summary" + + presets_resp = client.list_generation_presets(limit=50) + enabled = [p for p in presets_resp.data.get("generation_presets", []) if p.get("enabled")] + + explicit_resp = client.post( + "/v2/query", + data={ + "query": "machine learning", + "search": {"corpora": [{"corpus_key": seeded_shared_corpus}], "limit": 5}, + "generation": {"generation_preset_name": enabled[0]["name"]}, + }, + ) + assert explicit_resp.success, f"Explicit preset failed: {explicit_resp.status_code}" + assert len(explicit_resp.data.get("summary", "")) > 0, "Explicit preset should produce summary" diff --git a/tests/services/query/test_generation_presets.py b/tests/services/query/test_generation_presets.py new file mode 100644 index 0000000..7adf524 --- /dev/null +++ b/tests/services/query/test_generation_presets.py @@ -0,0 +1,49 @@ +""" +Generation Preset Tests + +Tests for listing and using generation presets. +""" + +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def check_presets_available(client): + """Skip all tests if generation presets API is not available.""" + resp = client.list_generation_presets(limit=1) + if not resp.success: + pytest.skip("Generation presets API not available") + + +@pytest.mark.core +class TestGenerationPresets: + """Generation preset listing and usage.""" + + def test_list_generation_presets(self, client): + """Test listing generation presets with proper structure.""" + resp = client.list_generation_presets(limit=50) + assert resp.success, f"List presets failed: {resp.status_code}" + presets = resp.data.get("generation_presets", []) + assert isinstance(presets, list) + assert len(presets) > 0, "Expected at least one generation preset" + first = presets[0] + assert "name" in first, "Preset should have 'name' field" + + def test_query_with_preset(self, client, seeded_shared_corpus): + """Test querying with a specific generation preset.""" + list_resp = client.list_generation_presets(limit=50) + if not list_resp.success: + pytest.skip("Could not list presets") + presets = list_resp.data.get("generation_presets", []) + enabled_presets = [p for p in presets if p.get("enabled")] + if not enabled_presets: + pytest.skip("No enabled generation presets available") + + preset_name = enabled_presets[0]["name"] + query_resp = client.query_with_summary( + corpus_key=seeded_shared_corpus, + query_text="artificial intelligence", + summarizer=preset_name, + ) + assert query_resp.success, f"Query with preset failed: {query_resp.status_code} - {query_resp.data}" + assert query_resp.data.get("summary") is not None or query_resp.data.get("generation") is not None, "Expected summary/generation in response" diff --git a/tests/services/query/test_pagination_completeness.py b/tests/services/query/test_pagination_completeness.py new file mode 100644 index 0000000..99c3273 --- /dev/null +++ b/tests/services/query/test_pagination_completeness.py @@ -0,0 +1,112 @@ +""" +Pagination Completeness Tests + +Tests that verify pagination returns all items without duplicates. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.regression +class TestPaginationCompleteness: + """Pagination completeness and correctness.""" + + def test_paginate_all_documents(self, client, unique_id): + """Test paginating through all documents in a corpus.""" + corpus_key = f"test_paginate_{unique_id}" + create_resp = client.create_corpus(name=f"Paginate {unique_id}", key=corpus_key) + if not create_resp.success: + pytest.skip(f"Could not create corpus: {create_resp.data}") + + try: + wait_for( + lambda: client.get_corpus(corpus_key).success, + timeout=10, + interval=1, + description="corpus available", + ) + + num_docs = 6 + doc_ids = [f"page_doc_{unique_id}_{i}" for i in range(num_docs)] + for doc_id in doc_ids: + resp = client.index_document(corpus_key, doc_id, f"Content for {doc_id}") + assert resp.success, f"Index {doc_id} failed: {resp.status_code}" + + wait_for( + lambda: len(client.list_documents(corpus_key, limit=100).data.get("documents", [])) >= num_docs, + timeout=30, + interval=2, + description=f"all {num_docs} documents indexed", + ) + + all_ids = [] + page_key = None + page_limit = 3 + max_pages = 10 + + for _ in range(max_pages): + list_resp = client.list_documents(corpus_key, limit=page_limit, page_key=page_key) + assert list_resp.success, f"List failed: {list_resp.status_code}" + docs = list_resp.data.get("documents", []) + for d in docs: + all_ids.append(d.get("id")) + + page_key = list_resp.data.get("metadata", {}).get("page_key") + if not page_key: + break + + assert len(all_ids) == len(set(all_ids)), f"Duplicate document IDs found: {[x for x in all_ids if all_ids.count(x) > 1]}" + assert len(all_ids) >= num_docs, f"Expected at least {num_docs} docs, got {len(all_ids)}" + finally: + try: + client.delete_corpus(corpus_key) + except Exception: + pass + + def test_paginate_corpora(self, client, unique_id): + """Test paginating through corpora.""" + num_corpora = 4 + corpus_keys = [f"test_page_corp_{unique_id}_{i}" for i in range(num_corpora)] + created = [] + + try: + for key in corpus_keys: + resp = client.create_corpus(name=f"Page Corp {key}", key=key) + if resp.success: + created.append(key) + + if len(created) < num_corpora: + pytest.skip(f"Could not create all {num_corpora} corpora") + + for key in created: + wait_for( + lambda k=key: client.get_corpus(k).success, + timeout=10, + interval=1, + description=f"corpus {key} available", + ) + + all_keys = [] + page_key = None + for _ in range(10): + list_resp = client.list_corpora(limit=2, page_key=page_key) + assert list_resp.success + corpora = list_resp.data.get("corpora", []) + for c in corpora: + all_keys.append(c.get("key")) + page_key = list_resp.data.get("metadata", {}).get("page_key") + if not page_key: + break + + for key in created: + assert key in all_keys, f"Corpus {key} not found via pagination" + finally: + for key in created: + try: + client.delete_corpus(key) + except Exception: + pass diff --git a/tests/services/query/test_query_edge_cases.py b/tests/services/query/test_query_edge_cases.py new file mode 100644 index 0000000..1f98619 --- /dev/null +++ b/tests/services/query/test_query_edge_cases.py @@ -0,0 +1,94 @@ +""" +Query Filtering and Edge Case Tests + +Regression-level tests for empty results, special characters, unicode, +long queries, response time, and querying non-existent corpora. +""" + +import pytest + + +@pytest.mark.regression +class TestQueryFiltering: + """Regression checks for query edge cases and filtering.""" + + def test_query_empty_results(self, client, seeded_shared_corpus): + """Test query that returns no relevant results.""" + response = client.query( + corpus_key=seeded_shared_corpus, + query_text="quantum teleportation through wormholes in the 15th century", + limit=5, + ) + + assert response.success, f"Query failed: {response.status_code}" + results = response.data.get("search_results", response.data.get("results", [])) + assert isinstance(results, list), f"Expected search_results list, got: {type(results)}" + # Query should succeed even with no/few relevant results + + def test_query_special_characters(self, client, seeded_shared_corpus): + """Test query with special characters.""" + response = client.query( + corpus_key=seeded_shared_corpus, + query_text="What's the purpose of AI & machine-learning?", + limit=3, + ) + + assert response.success, f"Query with special characters failed: {response.status_code}" + assert ( + "search_results" in response.data or "results" in response.data + ), f"Response missing search_results key: {list(response.data.keys()) if isinstance(response.data, dict) else type(response.data)}" + + def test_query_unicode(self, client, seeded_shared_corpus): + """Test query with unicode characters.""" + response = client.query( + corpus_key=seeded_shared_corpus, + query_text="intelig\u00eancia artificial e aprendizado de m\u00e1quina", + limit=3, + ) + + assert response.success, f"Query with unicode failed: {response.status_code}" + assert ( + "search_results" in response.data or "results" in response.data + ), f"Response missing search_results key: {list(response.data.keys()) if isinstance(response.data, dict) else type(response.data)}" + + def test_query_long_text(self, client, seeded_shared_corpus): + """Test query with longer query text.""" + long_query = ( + "I am interested in learning about how artificial intelligence and " + "machine learning technologies are being applied in various industries " + "such as healthcare and finance. Can you provide information about " + "the latest developments in deep learning and neural networks?" + ) + + response = client.query( + corpus_key=seeded_shared_corpus, + query_text=long_query, + limit=5, + ) + + assert response.success, f"Long query failed: {response.status_code}" + assert ( + "search_results" in response.data or "results" in response.data + ), f"Response missing search_results key: {list(response.data.keys()) if isinstance(response.data, dict) else type(response.data)}" + + def test_query_response_time(self, client, seeded_shared_corpus): + """Test that queries complete in acceptable time.""" + response = client.query( + corpus_key=seeded_shared_corpus, + query_text="artificial intelligence", + limit=5, + ) + + assert response.success, f"Query failed: {response.status_code}" + assert response.elapsed_ms < 5000, f"Query took too long: {response.elapsed_ms:.1f}ms" + + def test_query_nonexistent_corpus(self, client): + """Test querying a non-existent corpus.""" + response = client.query( + corpus_key="nonexistent_corpus_xyz123", + query_text="test query", + limit=5, + ) + + assert not response.success, "Query to non-existent corpus should fail" + assert response.status_code in [400, 404], f"Expected 400 or 404, got {response.status_code}" diff --git a/tests/services/query/test_query_filters.py b/tests/services/query/test_query_filters.py new file mode 100644 index 0000000..5afbb77 --- /dev/null +++ b/tests/services/query/test_query_filters.py @@ -0,0 +1,127 @@ +""" +Query Filter Tests + +Tests for metadata filter expressions in queries. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestQueryFiltersCore: + """Query with metadata filter tests.""" + + def test_query_with_valid_metadata_filter(self, client, unique_id): + """Test querying with a valid metadata filter returns matching results.""" + corpus_key = f"test_filter_{unique_id}" + + create_resp = client.create_corpus( + name=f"Filter Test {unique_id}", + key=corpus_key, + filter_attributes=[ + {"name": "topic", "level": "part", "type": "text", "indexed": True}, + ], + ) + if not create_resp.success: + pytest.skip(f"Could not create corpus: {create_resp.data}") + + try: + wait_for( + lambda: client.get_corpus(corpus_key).success, + timeout=10, + interval=1, + description="corpus to be available", + ) + + doc_id = f"filter_doc_{unique_id}" + index_resp = client.index_document( + corpus_key=corpus_key, + document_id=doc_id, + text="Artificial intelligence is transforming industries worldwide.", + metadata={"topic": "ai"}, + ) + assert index_resp.success, f"Index failed: {index_resp.status_code} - {index_resp.data}" + + wait_for( + lambda: client.get_document(corpus_key, doc_id).success, + timeout=15, + interval=1, + description="document to be indexed", + ) + + query_resp = client.post( + "/v2/query", + data={ + "query": "artificial intelligence", + "search": { + "corpora": [{"corpus_key": corpus_key, "metadata_filter": "part.topic = 'ai'"}], + "limit": 10, + }, + }, + ) + assert query_resp.success, f"Query failed: {query_resp.status_code} - {query_resp.data}" + results = query_resp.data.get("search_results", []) + assert len(results) > 0, "Expected at least one result for valid filter" + finally: + try: + client.delete_corpus(corpus_key) + except Exception: + pass + + def test_query_empty_corpus_returns_empty_results(self, client, unique_id): + """Test that querying an empty corpus returns an empty results list.""" + corpus_key = f"test_empty_{unique_id}" + + create_resp = client.create_corpus( + name=f"Empty Corpus {unique_id}", + key=corpus_key, + ) + if not create_resp.success: + pytest.skip(f"Could not create corpus: {create_resp.data}") + + try: + wait_for( + lambda: client.get_corpus(corpus_key).success, + timeout=10, + interval=1, + description="corpus to be available", + ) + + query_resp = client.query( + corpus_key=corpus_key, + query_text="anything at all", + limit=10, + ) + assert query_resp.success, f"Query failed: {query_resp.status_code}" + results = query_resp.data.get("search_results", []) + assert isinstance(results, list), f"Expected list, got: {type(results)}" + assert len(results) == 0, f"Expected empty results for empty corpus, got {len(results)}" + finally: + try: + client.delete_corpus(corpus_key) + except Exception: + pass + + +@pytest.mark.regression +class TestQueryFilterErrors: + """Query filter error handling tests.""" + + def test_query_with_invalid_filter_returns_400(self, seeded_corpus, client): + """Test that an invalid filter expression returns 400.""" + query_resp = client.post( + "/v2/query", + data={ + "query": "test", + "search": { + "corpora": [{"corpus_key": seeded_corpus, "metadata_filter": "part.nonexistent_field = 'value'"}], + "limit": 10, + }, + }, + ) + assert not query_resp.success, "Invalid filter should fail" + assert query_resp.status_code == 400, f"Expected 400 for invalid filter, got {query_resp.status_code}" diff --git a/tests/services/query/test_query_history.py b/tests/services/query/test_query_history.py new file mode 100644 index 0000000..69e0f47 --- /dev/null +++ b/tests/services/query/test_query_history.py @@ -0,0 +1,45 @@ +""" +Query History Tests + +Verify that queries are recorded and retrievable via the query history API. +""" + +import pytest + +from utils.waiters import wait_for + + +@pytest.fixture(scope="module", autouse=True) +def check_query_history_available(client): + """Skip all tests if query history API is not available.""" + resp = client.list_query_histories(limit=1) + if not resp.success: + pytest.skip(f"Query history API not available: {resp.status_code}") + + +@pytest.mark.core +class TestQueryHistory: + """Query history tracking and retrieval.""" + + def test_list_query_histories(self, client): + """List query histories returns valid structure.""" + resp = client.list_query_histories(limit=10) + assert resp.success, f"List query histories failed: {resp.status_code}" + entries = resp.data.get("queries", []) + assert isinstance(entries, list), f"Expected list of queries, got: {type(entries)}" + + if entries: + first = entries[0] + assert "id" in first, f"History entry should have 'id': {first}" + assert "query" in first, f"History entry should have 'query': {first}" + assert "started_at" in first, f"History entry should have 'started_at': {first}" + + def test_query_history_contains_generation(self, client): + """Verify query history entries include generation/answer content.""" + hist_resp = client.list_query_histories(limit=5) + entries = hist_resp.data.get("queries", []) + if not entries: + pytest.skip("No query history entries available") + + entries_with_gen = [e for e in entries if e.get("generation")] + assert len(entries_with_gen) > 0, f"Expected at least one entry with generation content, got keys: {[list(e.keys()) for e in entries[:2]]}" diff --git a/tests/services/query/test_query_history_filters.py b/tests/services/query/test_query_history_filters.py new file mode 100644 index 0000000..dad1c99 --- /dev/null +++ b/tests/services/query/test_query_history_filters.py @@ -0,0 +1,33 @@ +""" +Query History Filter Tests + +Verify query history list supports filtering and pagination. +""" + +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def check_query_history_available(client): + """Skip all tests if query history API is not available.""" + resp = client.list_query_histories(limit=1) + if not resp.success: + pytest.skip(f"Query history API not available: {resp.status_code}") + + +@pytest.mark.regression +class TestQueryHistoryFilters: + """Query history filtering and pagination.""" + + def test_query_history_with_limit(self, client): + """Verify limit parameter restricts result count.""" + full_resp = client.list_query_histories(limit=10) + assert full_resp.success + full_count = len(full_resp.data.get("queries", [])) + if full_count < 3: + pytest.skip(f"Need at least 3 history entries for limit test, have {full_count}") + + limited_resp = client.list_query_histories(limit=2) + assert limited_resp.success + limited_entries = limited_resp.data.get("queries", []) + assert len(limited_entries) <= 2, f"Limit=2 should return at most 2 entries, got {len(limited_entries)}" diff --git a/tests/services/query/test_query_streaming.py b/tests/services/query/test_query_streaming.py new file mode 100644 index 0000000..a175557 --- /dev/null +++ b/tests/services/query/test_query_streaming.py @@ -0,0 +1,74 @@ +""" +Query Streaming Tests + +Tests for Server-Sent Events (SSE) streaming query responses. +""" + +import pytest + +from utils.waiters import read_sse_events + + +@pytest.fixture(scope="module", autouse=True) +def check_streaming_available(client, seeded_shared_corpus): + """Skip all tests if streaming query is not supported.""" + try: + raw = client.query_stream( + corpus_key=seeded_shared_corpus, + query_text="test", + ) + if raw.status_code not in (200, 201): + pytest.skip(f"Streaming query not supported: {raw.status_code}") + raw.close() + except Exception as e: + pytest.skip(f"Streaming query not available: {e}") + + +@pytest.mark.core +class TestQueryStreaming: + """Streaming query tests.""" + + def test_streaming_query_events(self, client, seeded_shared_corpus): + """Test that streaming query returns valid SSE events.""" + raw = client.query_stream( + corpus_key=seeded_shared_corpus, + query_text="artificial intelligence", + ) + + try: + assert raw.status_code == 200, f"Stream request failed: {raw.status_code}" + events = list(read_sse_events(raw)) + assert len(events) > 0, "Expected at least one SSE event" + + has_content = any(e.get("data") is not None and e.get("data") != "" for e in events) + assert has_content, f"Expected at least one event with data, got event types: {[e.get('event', '') for e in events]}" + finally: + raw.close() + + def test_streaming_query_fcs(self, client, seeded_shared_corpus): + """Test that streaming query with FCS enabled returns a score.""" + raw = client.query_stream( + corpus_key=seeded_shared_corpus, + query_text="artificial intelligence", + generation_config={ + "enable_factual_consistency_score": True, + }, + ) + + try: + assert raw.status_code == 200, f"Stream request failed: {raw.status_code}" + events = list(read_sse_events(raw)) + + fcs_found = False + for event in events: + data = event.get("data", {}) + if isinstance(data, dict) and "factual_consistency_score" in data: + score = data["factual_consistency_score"] + assert 0.0 <= score <= 1.0, f"FCS score out of range: {score}" + fcs_found = True + break + + if not fcs_found: + pytest.skip("FCS not returned in streaming response -- may not be enabled for this account") + finally: + raw.close() diff --git a/tests/services/query/test_rag_summary.py b/tests/services/query/test_rag_summary.py new file mode 100644 index 0000000..2e68f19 --- /dev/null +++ b/tests/services/query/test_rag_summary.py @@ -0,0 +1,38 @@ +""" +RAG Summary Tests + +Core-level tests for query-with-summary (RAG) operations +and summary response time. +""" + +import pytest + + +@pytest.mark.core +class TestRagSummary: + """Core checks for RAG summarization.""" + + def test_query_with_summary(self, client, seeded_shared_corpus): + """Test query with RAG summarization.""" + response = client.query_with_summary( + corpus_key=seeded_shared_corpus, + query_text="How is AI being used today?", + max_results=3, + ) + + assert response.success, f"Query with summary failed: {response.status_code} - {response.data}" + + # Should contain generated summary + assert "summary" in response.data or "generation" in response.data, "Expected summary/generation in response" + + def test_summary_response_time(self, client, seeded_shared_corpus): + """Test that RAG summarization completes in acceptable time.""" + response = client.query_with_summary( + corpus_key=seeded_shared_corpus, + query_text="What are the main topics covered?", + max_results=3, + ) + + assert response.success, f"Summary query failed: {response.status_code}" + # RAG takes longer due to LLM generation + assert response.elapsed_ms < 30000, f"Summary took too long: {response.elapsed_ms:.1f}ms" diff --git a/tests/services/query/test_rerankers.py b/tests/services/query/test_rerankers.py new file mode 100644 index 0000000..df87e63 --- /dev/null +++ b/tests/services/query/test_rerankers.py @@ -0,0 +1,51 @@ +""" +Reranker Tests + +Tests for listing and using rerankers. +""" + +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def check_rerankers_available(client): + """Skip all tests if rerankers API is not available.""" + resp = client.list_rerankers(limit=1) + if not resp.success: + pytest.skip("Rerankers API not available") + + +@pytest.mark.core +class TestRerankers: + """Reranker listing and usage.""" + + def test_list_rerankers(self, client): + """Test listing rerankers with proper structure.""" + resp = client.list_rerankers(limit=50) + assert resp.success, f"List rerankers failed: {resp.status_code}" + rerankers = resp.data.get("rerankers", []) + assert isinstance(rerankers, list) + assert len(rerankers) > 0, "Expected at least one reranker" + first = rerankers[0] + assert "id" in first or "name" in first, "Reranker should have 'id' or 'name' field" + + def test_query_with_mmr_reranker(self, client, seeded_shared_corpus): + """Test querying with the MMR reranker.""" + query_resp = client.post( + "/v2/query", + data={ + "query": "artificial intelligence", + "search": { + "corpora": [{"corpus_key": seeded_shared_corpus}], + "limit": 10, + "reranker": { + "type": "mmr", + "diversity_bias": 0.3, + }, + }, + }, + ) + assert query_resp.success, f"Query with MMR reranker failed: {query_resp.status_code} - {query_resp.data}" + results = query_resp.data.get("search_results", []) + assert isinstance(results, list) + assert len(results) > 0, "Expected results with MMR reranker" diff --git a/tests/services/query/test_semantic_search.py b/tests/services/query/test_semantic_search.py new file mode 100644 index 0000000..7345d21 --- /dev/null +++ b/tests/services/query/test_semantic_search.py @@ -0,0 +1,87 @@ +""" +Semantic Search Tests + +Tests for basic semantic search, relevance, limit, and offset operations. +""" + +import pytest + + +@pytest.mark.sanity +class TestSemanticSearchBasic: + """Basic semantic search checks.""" + + def test_basic_query(self, client, seeded_shared_corpus): + """Test basic semantic search query.""" + response = client.query( + corpus_key=seeded_shared_corpus, + query_text="What is artificial intelligence?", + limit=5, + ) + + assert response.success, f"Query failed: {response.status_code} - {response.data}" + + # Should return search results + assert "search_results" in response.data or "results" in response.data, "Expected search results in response" + + +@pytest.mark.core +class TestSemanticSearchPagination: + """Semantic search relevance, limit, and offset checks.""" + + def test_query_returns_relevant_results(self, client, seeded_shared_corpus): + """Test that query returns semantically relevant results.""" + response = client.query( + corpus_key=seeded_shared_corpus, + query_text="machine learning and neural networks", + limit=3, + ) + + assert response.success, f"Query failed: {response.status_code}" + + # Results should be returned + results = response.data.get("search_results", response.data.get("results", [])) + assert len(results) > 0, "Expected at least one search result" + + def test_query_with_limit(self, client, seeded_shared_corpus): + """Test query with result limit.""" + response = client.query( + corpus_key=seeded_shared_corpus, + query_text="technology", + limit=2, + ) + + assert response.success, f"Query failed: {response.status_code}" + + results = response.data.get("search_results", response.data.get("results", [])) + assert len(results) <= 2, f"Expected at most 2 results, got {len(results)}" + + def test_query_with_offset(self, client, seeded_shared_corpus): + """Test query with pagination offset.""" + # First query without offset + response1 = client.query( + corpus_key=seeded_shared_corpus, + query_text="science and technology", + limit=2, + offset=0, + ) + + # Second query with offset + response2 = client.query( + corpus_key=seeded_shared_corpus, + query_text="science and technology", + limit=2, + offset=2, + ) + + assert response1.success and response2.success, "Queries failed" + + # Results should be different (pagination working) + results1 = response1.data.get("search_results", response1.data.get("results", [])) + results2 = response2.data.get("search_results", response2.data.get("results", [])) + + if len(results1) > 0 and len(results2) > 0: + # First result of each page should be different + id1 = results1[0].get("document_id", results1[0].get("id")) + id2 = results2[0].get("document_id", results2[0].get("id")) + assert id1 != id2, "Offset pagination not working correctly" diff --git a/tests/services/tools/test_tool_lifecycle.py b/tests/services/tools/test_tool_lifecycle.py new file mode 100644 index 0000000..5408990 --- /dev/null +++ b/tests/services/tools/test_tool_lifecycle.py @@ -0,0 +1,44 @@ +""" +Tool Lifecycle Tests + +Tests for tool enable/disable operations. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.core +class TestToolLifecycle: + """Tool lifecycle operations.""" + + def test_enable_disable_tool(self, client, unique_id): + """Test disabling and re-enabling a tool.""" + tool_name = f"test_tool_{unique_id}" + create_resp = client.create_tool( + name=tool_name, + title=f"Test Tool {unique_id}", + description="A test tool for lifecycle testing", + code="def process(request): return {'result': 'ok'}", + ) + if not create_resp.success: + pytest.skip(f"Could not create tool: {create_resp.data}") + + tool_id = create_resp.data.get("id") or create_resp.data.get("name") + try: + disable_resp = client.update_tool(tool_id, type="lambda", enabled=False) + assert disable_resp.success, f"Disable tool failed: {disable_resp.status_code} - {disable_resp.data}" + assert disable_resp.data.get("enabled") is False, f"Expected enabled=False, got: {disable_resp.data.get('enabled')}" + + enable_resp = client.update_tool(tool_id, type="lambda", enabled=True) + assert enable_resp.success, f"Enable tool failed: {enable_resp.status_code} - {enable_resp.data}" + assert enable_resp.data.get("enabled") is True, f"Expected enabled=True, got: {enable_resp.data.get('enabled')}" + finally: + if tool_id: + try: + client.delete_tool(tool_id) + except Exception: + pass diff --git a/tests/services/tools/test_tools_crud.py b/tests/services/tools/test_tools_crud.py new file mode 100644 index 0000000..c546975 --- /dev/null +++ b/tests/services/tools/test_tools_crud.py @@ -0,0 +1,43 @@ +""" +Tools CRUD Tests + +Core tests for tool creation, update, and deletion. +""" + +import pytest + + +@pytest.mark.core +class TestToolsList: + def test_list_tools(self, client): + response = client.list_tools(limit=10) + assert response.success, f"List tools failed: {response.status_code} - {response.data}" + assert "tools" in response.data, f"Expected 'tools' key: {response.data.keys()}" + + +@pytest.mark.core +class TestToolsCrud: + def test_create_update_delete_tool(self, client, unique_id): + # Create + response = client.create_tool( + name=f"test_tool_{unique_id}", + title=f"Test Tool {unique_id}", + description="A test lambda tool", + code="def process(value: str) -> dict:\n return {'result': value}", + ) + assert response.success, f"Create tool failed: {response.status_code} - {response.data}" + + tool_id = response.data.get("id") + assert tool_id, f"No tool ID in response: {response.data}" + + # Update + update_resp = client.update_tool(tool_id, type="lambda", description="Updated description") + assert update_resp.success, f"Update tool failed: {update_resp.data}" + + # Verify update took effect + updated_desc = update_resp.data.get("description", "") + assert updated_desc == "Updated description", f"Description not updated: {updated_desc}" + + # Delete + del_resp = client.delete_tool(tool_id) + assert del_resp.success, f"Delete tool failed: {del_resp.data}" diff --git a/tests/services/users/__init__.py b/tests/services/users/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/services/users/conftest.py b/tests/services/users/conftest.py new file mode 100644 index 0000000..443ffaa --- /dev/null +++ b/tests/services/users/conftest.py @@ -0,0 +1,11 @@ +"""Users test fixtures.""" + +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def check_users_available(client): + """Skip all user tests if the users API is not available.""" + resp = client.list_users(limit=1) + if not resp.success: + pytest.skip("Users API not available (may require admin permissions)") diff --git a/tests/services/users/test_user_crud.py b/tests/services/users/test_user_crud.py new file mode 100644 index 0000000..8c44dfa --- /dev/null +++ b/tests/services/users/test_user_crud.py @@ -0,0 +1,155 @@ +""" +User CRUD Tests + +Tests for user create, read, update, and delete operations. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +def _extract_username(create_resp, email=None): + """Extract the username/handle for GET/PATCH/DELETE operations. + + The User API operates by handle (username). The create response may + return empty strings for username/email fields even on success. + When that happens, fall back to the email that was sent in the request. + """ + data = create_resp.data or {} + username = data.get("username") + if username: + return username + resp_email = data.get("email") + if resp_email: + return resp_email + if email: + return email + return data.get("id") + + +@pytest.mark.core +@pytest.mark.serial +class TestUserCrud: + """User management CRUD operations.""" + + def test_create_user(self, client, unique_id): + """Test creating a new user and verifying response contains the sent fields.""" + email = f"test_{unique_id}@example.com" + description = f"Test user {unique_id}" + resp = client.create_user(email=email, description=description) + + try: + assert resp.success, f"Create user failed: {resp.status_code} - {resp.data}" + assert resp.data.get("id") is not None, f"Response should contain 'id': {resp.data}" + + assert resp.data.get("email") == email, f"Create response should echo back email: expected {email!r}, got {resp.data.get('email')!r}" + assert ( + resp.data.get("description") == description + ), f"Create response should echo back description: expected {description!r}, got {resp.data.get('description')!r}" + finally: + username = _extract_username(resp, email) if resp.success else None + if username: + try: + client.delete_user(username) + except Exception: + pass + + def test_list_users(self, client, unique_id): + """Test that a created user appears in the user list.""" + email = f"test_list_{unique_id}@example.com" + create_resp = client.create_user(email=email) + assert create_resp.success, f"Create user failed: {create_resp.status_code} - {create_resp.data}" + + username = _extract_username(create_resp, email) + try: + list_resp = client.list_users() + assert list_resp.success, f"List users failed: {list_resp.status_code}" + users = list_resp.data.get("users", list_resp.data if isinstance(list_resp.data, list) else []) + found = any(u.get("username") == username or u.get("id") == username or u.get("email") == email for u in users) + assert found, f"User {username} (email={email}) not found in listing" + finally: + try: + client.delete_user(username) + except Exception: + pass + + def test_get_user(self, client, unique_id): + """Test retrieving a specific user.""" + email = f"test_get_{unique_id}@example.com" + create_resp = client.create_user(email=email) + assert create_resp.success, f"Create user failed: {create_resp.status_code} - {create_resp.data}" + + username = _extract_username(create_resp, email) + try: + get_resp = client.get_user(username) + assert get_resp.success, f"Get user failed: {get_resp.status_code} - {get_resp.data}" + assert get_resp.data.get("email") == email, f"Expected email={email}, got: {get_resp.data.get('email')}" + finally: + try: + client.delete_user(username) + except Exception: + pass + + def test_update_user_description(self, client, unique_id): + """Test updating a user's description.""" + email = f"test_update_{unique_id}@example.com" + create_resp = client.create_user(email=email, description="Original") + assert create_resp.success, f"Create user failed: {create_resp.status_code} - {create_resp.data}" + + username = _extract_username(create_resp, email) + try: + new_desc = f"Updated {unique_id}" + update_resp = client.update_user(username, description=new_desc) + assert update_resp.success, f"Update user failed: {update_resp.status_code} - {update_resp.data}" + + get_resp = client.get_user(username) + assert get_resp.success + assert get_resp.data.get("description") == new_desc + finally: + try: + client.delete_user(username) + except Exception: + pass + + def test_disable_enable_user(self, client, unique_id): + """Test disabling and re-enabling a user.""" + email = f"test_toggle_{unique_id}@example.com" + create_resp = client.create_user(email=email) + assert create_resp.success, f"Create user failed: {create_resp.status_code} - {create_resp.data}" + + username = _extract_username(create_resp, email) + try: + disable_resp = client.update_user(username, enabled=False) + assert disable_resp.success, f"Disable user failed: {disable_resp.status_code} - {disable_resp.data}" + + get_resp = client.get_user(username) + assert get_resp.success + assert get_resp.data.get("enabled") is False, f"Expected disabled, got: {get_resp.data.get('enabled')}" + + enable_resp = client.update_user(username, enabled=True) + assert enable_resp.success + + get_resp2 = client.get_user(username) + assert get_resp2.data.get("enabled") is True + finally: + try: + client.delete_user(username) + except Exception: + pass + + def test_delete_user(self, client, unique_id): + """Test deleting a user and verifying 404.""" + email = f"test_delete_{unique_id}@example.com" + create_resp = client.create_user(email=email) + assert create_resp.success, f"Create user failed: {create_resp.status_code} - {create_resp.data}" + + username = _extract_username(create_resp, email) + + delete_resp = client.delete_user(username) + assert delete_resp.success, f"Delete user failed: {delete_resp.status_code} - {delete_resp.data}" + + get_resp = client.get_user(username) + assert get_resp.status_code == 404, f"Deleted user should return 404, got {get_resp.status_code}" diff --git a/tests/test_01_authentication.py b/tests/test_01_authentication.py deleted file mode 100644 index 9393ce7..0000000 --- a/tests/test_01_authentication.py +++ /dev/null @@ -1,101 +0,0 @@ -""" -Authentication API Tests - -Tests for verifying API key authentication and authorization. -Validates that the provided API key has correct permissions for -QueryService and IndexService operations. -""" - -import pytest - - -class TestAuthentication: - """Test suite for authentication and authorization.""" - - def test_api_key_valid(self, client): - """Test that the API key is valid and can connect.""" - response = client.health_check() - - assert response.success, ( - f"API authentication failed: {response.status_code} - {response.data}" - ) - - def test_api_key_has_query_permission(self, client, test_corpus_key, sample_document): - """Test that API key has QueryService permission.""" - # First index a document to ensure there's something to query - doc_response = client.index_document( - corpus_key=test_corpus_key, - document_id="auth_test_doc", - text=sample_document["text"], - metadata=sample_document["metadata"], - ) - - # Now test query permission - response = client.query( - corpus_key=test_corpus_key, - query_text="test query", - limit=1, - ) - - assert response.success, ( - f"QueryService permission check failed: {response.status_code}. " - f"Ensure API key has QueryService role enabled." - ) - - def test_api_key_has_index_permission(self, client, test_corpus_key): - """Test that API key has IndexService permission.""" - response = client.index_document( - corpus_key=test_corpus_key, - document_id="auth_permission_test", - text="Testing IndexService permission", - ) - - assert response.success, ( - f"IndexService permission check failed: {response.status_code}. " - f"Ensure API key has IndexService role enabled." - ) - - # Cleanup - client.delete_document(test_corpus_key, "auth_permission_test") - - def test_invalid_api_key_rejected(self, config): - """Test that invalid API keys are properly rejected.""" - from utils.client import VectaraClient - - # Create client with invalid key - invalid_config = Config() - invalid_config.set_api_key("invalid_key_12345") - - invalid_client = VectaraClient(invalid_config) - response = invalid_client.health_check() - - assert not response.success, ( - "Invalid API key should be rejected" - ) - assert response.status_code in [401, 403], ( - f"Expected 401 or 403 for invalid key, got {response.status_code}" - ) - - def test_response_time_acceptable(self, client): - """Test that authentication response time is acceptable.""" - response = client.health_check() - - # Authentication should complete within 5 seconds - assert response.elapsed_ms < 5000, ( - f"Authentication took too long: {response.elapsed_ms:.1f}ms" - ) - - def test_list_corpora_works(self, client): - """Test basic corpus listing (requires valid authentication).""" - response = client.list_corpora(limit=10) - - assert response.success, ( - f"List corpora failed: {response.status_code} - {response.data}" - ) - assert "corpora" in response.data or isinstance(response.data, list), ( - "Expected corpora list in response" - ) - - -# Import Config for the invalid key test -from utils.config import Config diff --git a/tests/test_02_corpus_management.py b/tests/test_02_corpus_management.py deleted file mode 100644 index 0602e0b..0000000 --- a/tests/test_02_corpus_management.py +++ /dev/null @@ -1,184 +0,0 @@ -""" -Corpus Management API Tests - -Tests for corpus CRUD operations including creation, retrieval, -update, and deletion of corpora. -""" - -import pytest -import time - - -class TestCorpusManagement: - """Test suite for corpus management operations.""" - - def test_create_corpus(self, client, unique_id): - """Test creating a new corpus.""" - response = client.create_corpus( - name=f"Test Corpus {unique_id}", - description="Created by API test suite", - ) - - assert response.success, ( - f"Corpus creation failed: {response.status_code} - {response.data}" - ) - - # Get the actual key returned by the API - actual_key = response.data.get("key") - assert actual_key, "No key returned in corpus creation response" - - # Cleanup using the actual key - client.delete_corpus(actual_key) - - def test_create_corpus_with_metadata(self, client, unique_id): - """Test creating a corpus with custom filter attributes.""" - response = client.create_corpus( - name=f"Metadata Corpus {unique_id}", - description="Corpus with filter attributes", - filter_attributes=[ - { - "name": "category", - "level": "document", - "type": "text", - }, - { - "name": "priority", - "level": "document", - "type": "integer", - }, - ], - ) - - assert response.success, ( - f"Corpus creation with metadata failed: {response.status_code} - {response.data}" - ) - - # Cleanup using the actual key - actual_key = response.data.get("key") - if actual_key: - client.delete_corpus(actual_key) - - def test_get_corpus(self, client, test_corpus_key): - """Test retrieving corpus details.""" - response = client.get_corpus(test_corpus_key) - - assert response.success, ( - f"Get corpus failed: {response.status_code} - {response.data}" - ) - assert response.data.get("key") == test_corpus_key, ( - f"Corpus key mismatch: expected {test_corpus_key}" - ) - - def test_list_corpora(self, client): - """Test listing all corpora.""" - response = client.list_corpora(limit=100) - - assert response.success, ( - f"List corpora failed: {response.status_code} - {response.data}" - ) - - # Response should contain corpora list - data = response.data - assert "corpora" in data or isinstance(data, list), ( - "Expected corpora in response" - ) - - def test_list_corpora_pagination(self, client): - """Test corpus listing with pagination.""" - # First request with small limit - response1 = client.list_corpora(limit=2) - - assert response1.success, ( - f"Paginated list failed: {response1.status_code}" - ) - - # If there's a next page, test pagination - if response1.data.get("metadata", {}).get("page_key"): - page_key = response1.data["metadata"]["page_key"] - response2 = client.list_corpora(limit=2, page_key=page_key) - - assert response2.success, ( - f"Second page request failed: {response2.status_code}" - ) - - def test_update_corpus_description(self, client, test_corpus_key): - """Test updating corpus description.""" - new_description = f"Updated at {time.time()}" - - response = client.update_corpus( - corpus_key=test_corpus_key, - description=new_description, - ) - - assert response.success, ( - f"Corpus update failed: {response.status_code} - {response.data}" - ) - - # Verify update - get_response = client.get_corpus(test_corpus_key) - assert get_response.data.get("description") == new_description, ( - "Description update not reflected" - ) - - def test_delete_corpus(self, client, unique_id): - """Test corpus deletion.""" - # Create corpus to delete - create_response = client.create_corpus( - name=f"Delete Test {unique_id}", - description="Will be deleted", - ) - assert create_response.success, f"Setup: Corpus creation failed: {create_response.data}" - - # Get the actual key returned by the API - actual_key = create_response.data.get("key") - assert actual_key, "No key returned in corpus creation response" - - # Delete the corpus using the actual key - delete_response = client.delete_corpus(actual_key) - - assert delete_response.success, ( - f"Corpus deletion failed: {delete_response.status_code} - {delete_response.data}" - ) - - # Verify deletion - should get 404 - get_response = client.get_corpus(actual_key) - assert get_response.status_code == 404, ( - f"Deleted corpus should return 404, got {get_response.status_code}" - ) - - def test_create_duplicate_key_corpus_fails(self, client, test_corpus_key): - """Test that creating a corpus with an existing key fails.""" - # Attempt to create corpus with the same key as test_corpus_key - response = client.post("/v2/corpora", data={ - "key": test_corpus_key, - "name": "Duplicate Key Test", - }) - - # Should fail with conflict (409) or bad request (400) - assert response.status_code in [400, 409], ( - f"Duplicate key corpus creation should fail, got {response.status_code}" - ) - - def test_get_nonexistent_corpus_returns_404(self, client): - """Test that requesting a non-existent corpus returns 404.""" - response = client.get_corpus("nonexistent_corpus_xyz123") - - assert response.status_code == 404, ( - f"Expected 404 for non-existent corpus, got {response.status_code}" - ) - - def test_corpus_operations_response_times(self, client, test_corpus_key): - """Test that corpus operations complete in acceptable time.""" - # Get operation should be fast - response = client.get_corpus(test_corpus_key) - - assert response.elapsed_ms < 3000, ( - f"Get corpus took too long: {response.elapsed_ms:.1f}ms" - ) - - # List operation may take longer but should still be reasonable - list_response = client.list_corpora(limit=10) - - assert list_response.elapsed_ms < 5000, ( - f"List corpora took too long: {list_response.elapsed_ms:.1f}ms" - ) diff --git a/tests/test_03_indexing.py b/tests/test_03_indexing.py deleted file mode 100644 index fe2c008..0000000 --- a/tests/test_03_indexing.py +++ /dev/null @@ -1,279 +0,0 @@ -""" -Indexing API Tests - -Tests for document indexing operations including single document -indexing, bulk operations, and document management. -""" - -import pytest -import time - - -class TestIndexing: - """Test suite for document indexing operations.""" - - def test_index_single_document(self, client, test_corpus_key, unique_id, sample_document): - """Test indexing a single document.""" - doc_id = f"single_doc_{unique_id}" - - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text=sample_document["text"], - metadata=sample_document["metadata"], - ) - - assert response.success, ( - f"Document indexing failed: {response.status_code} - {response.data}" - ) - - # Cleanup - client.delete_document(test_corpus_key, doc_id) - - def test_index_document_with_metadata(self, client, test_corpus_key, unique_id): - """Test indexing a document with custom metadata.""" - doc_id = f"meta_doc_{unique_id}" - - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text="Document with rich metadata for testing.", - metadata={ - "author": "Test Suite", - "category": "technology", - "priority": 1, - "tags": ["test", "api", "indexing"], - "timestamp": time.time(), - }, - ) - - assert response.success, ( - f"Document with metadata indexing failed: {response.status_code} - {response.data}" - ) - - # Cleanup - client.delete_document(test_corpus_key, doc_id) - - def test_index_large_document(self, client, test_corpus_key, unique_id): - """Test indexing a larger document with multiple paragraphs.""" - doc_id = f"large_doc_{unique_id}" - - # Generate larger text content - large_text = " ".join([ - f"Paragraph {i}: This is test content for paragraph number {i}. " - "It contains information about various topics including technology, " - "science, and general knowledge. Vector databases enable semantic " - "search capabilities that traditional keyword search cannot match." - for i in range(20) - ]) - - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text=large_text, - ) - - assert response.success, ( - f"Large document indexing failed: {response.status_code} - {response.data}" - ) - - # Cleanup - client.delete_document(test_corpus_key, doc_id) - - def test_index_multiple_documents(self, client, test_corpus_key, unique_id): - """Test indexing multiple documents sequentially.""" - doc_ids = [f"multi_doc_{unique_id}_{i}" for i in range(5)] - - for i, doc_id in enumerate(doc_ids): - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text=f"Test document number {i} with unique content.", - metadata={"index": i}, - ) - - assert response.success, ( - f"Document {i} indexing failed: {response.status_code}" - ) - - # Cleanup - for doc_id in doc_ids: - client.delete_document(test_corpus_key, doc_id) - - def test_get_document(self, client, test_corpus_key, unique_id): - """Test retrieving an indexed document.""" - doc_id = f"get_doc_{unique_id}" - - # First index a document - index_response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text="Document for retrieval test.", - ) - assert index_response.success, "Setup: Document indexing failed" - - # Retrieve the document - response = client.get_document(test_corpus_key, doc_id) - - assert response.success, ( - f"Get document failed: {response.status_code} - {response.data}" - ) - assert response.data.get("id") == doc_id, ( - f"Document ID mismatch: expected {doc_id}" - ) - - # Cleanup - client.delete_document(test_corpus_key, doc_id) - - def test_list_documents(self, client, test_corpus_key, unique_id): - """Test listing documents in a corpus.""" - # Index a few documents first - doc_ids = [f"list_doc_{unique_id}_{i}" for i in range(3)] - for doc_id in doc_ids: - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text=f"Document {doc_id} for listing test.", - ) - assert response.success, f"Failed to index {doc_id}: {response.data}" - - # Wait for indexing to complete (documents may not be immediately available) - time.sleep(3) - - # List documents - response = client.list_documents(test_corpus_key, limit=100) - - assert response.success, ( - f"List documents failed: {response.status_code} - {response.data}" - ) - - # Verify documents exist in list - documents = response.data.get("documents", response.data) - doc_ids_in_response = [d.get("id") for d in documents if isinstance(d, dict)] - - # Check that at least some of our documents appear (indexing may be async) - found_count = sum(1 for doc_id in doc_ids if doc_id in doc_ids_in_response) - assert found_count > 0, ( - f"None of the indexed documents found in list. Expected: {doc_ids}, Got: {doc_ids_in_response}" - ) - - # Cleanup - for doc_id in doc_ids: - client.delete_document(test_corpus_key, doc_id) - - def test_delete_document(self, client, test_corpus_key, unique_id): - """Test deleting a document.""" - doc_id = f"delete_doc_{unique_id}" - - # Index document - index_response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text="Document to be deleted.", - ) - assert index_response.success, "Setup: Document indexing failed" - - # Delete document - delete_response = client.delete_document(test_corpus_key, doc_id) - - assert delete_response.success, ( - f"Document deletion failed: {delete_response.status_code} - {delete_response.data}" - ) - - # Verify deletion - should get 404 - get_response = client.get_document(test_corpus_key, doc_id) - assert get_response.status_code == 404, ( - f"Deleted document should return 404, got {get_response.status_code}" - ) - - def test_update_document_by_delete_and_reindex(self, client, test_corpus_key, unique_id): - """Test updating a document by deleting and re-indexing.""" - doc_id = f"update_doc_{unique_id}" - - # Index original document - original_response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text="Original content.", - metadata={"version": 1}, - ) - assert original_response.success, "Setup: Original document indexing failed" - - # Delete the original document - delete_response = client.delete_document(test_corpus_key, doc_id) - assert delete_response.success, f"Delete failed: {delete_response.data}" - - # Re-index with updated content - update_response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text="Updated content with new information.", - metadata={"version": 2}, - ) - - assert update_response.success, ( - f"Document re-index failed: {update_response.status_code} - {update_response.data}" - ) - - # Cleanup - client.delete_document(test_corpus_key, doc_id) - - def test_index_document_special_characters(self, client, test_corpus_key, unique_id): - """Test indexing document with special characters.""" - doc_id = f"special_doc_{unique_id}" - - special_text = ( - "Testing special characters: " - "Unicode: \u00e9\u00e8\u00ea \u00f1 \u00fc " - "Symbols: @#$%^&*() " - "Quotes: 'single' \"double\" " - "Newlines:\nLine 1\nLine 2\n" - "Tabs:\tColumn1\tColumn2" - ) - - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text=special_text, - ) - - assert response.success, ( - f"Special characters document indexing failed: {response.status_code} - {response.data}" - ) - - # Cleanup - client.delete_document(test_corpus_key, doc_id) - - def test_indexing_response_time(self, client, test_corpus_key, unique_id): - """Test that indexing completes in acceptable time.""" - doc_id = f"perf_doc_{unique_id}" - - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text="Performance test document for measuring indexing speed.", - ) - - assert response.success, f"Indexing failed: {response.status_code}" - assert response.elapsed_ms < 10000, ( - f"Indexing took too long: {response.elapsed_ms:.1f}ms" - ) - - # Cleanup - client.delete_document(test_corpus_key, doc_id) - - def test_index_empty_document_fails(self, client, test_corpus_key, unique_id): - """Test that indexing an empty document is handled.""" - doc_id = f"empty_doc_{unique_id}" - - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc_id, - text="", # Empty text - ) - - # Empty documents should either fail or be handled gracefully - # Behavior may vary - just ensure no server error - assert response.status_code != 500, ( - "Server error on empty document" - ) diff --git a/tests/test_04_query_search.py b/tests/test_04_query_search.py deleted file mode 100644 index 376be01..0000000 --- a/tests/test_04_query_search.py +++ /dev/null @@ -1,348 +0,0 @@ -""" -Query and Search API Tests - -Tests for query operations including semantic search, RAG summarization, -filtering, and pagination. -""" - -import pytest -import time - - -@pytest.fixture(scope="class") -def seeded_corpus(client, test_corpus_key): - """Seed the test corpus with documents for search testing.""" - documents = [ - { - "id": "search_doc_1", - "text": "Artificial intelligence and machine learning are transforming industries. " - "Deep learning neural networks can process vast amounts of data to find patterns " - "that humans might miss. AI is being used in healthcare, finance, and transportation.", - "metadata": {"category": "technology", "topic": "ai"}, - }, - { - "id": "search_doc_2", - "text": "Vector databases enable semantic search capabilities. Unlike traditional keyword search, " - "vector search understands the meaning and context of queries. This allows for " - "more accurate and relevant search results.", - "metadata": {"category": "technology", "topic": "databases"}, - }, - { - "id": "search_doc_3", - "text": "Climate change is affecting weather patterns around the world. Scientists are studying " - "the impact of greenhouse gases on global temperatures. Renewable energy sources " - "like solar and wind power are becoming more important.", - "metadata": {"category": "science", "topic": "climate"}, - }, - { - "id": "search_doc_4", - "text": "The Python programming language is popular for data science and machine learning. " - "Libraries like NumPy, Pandas, and TensorFlow make it easy to work with data " - "and build AI models. Python is known for its readable syntax.", - "metadata": {"category": "technology", "topic": "programming"}, - }, - { - "id": "search_doc_5", - "text": "Space exploration has led to many technological innovations. NASA and SpaceX are " - "working on missions to Mars. Satellite technology enables global communications " - "and weather forecasting.", - "metadata": {"category": "science", "topic": "space"}, - }, - ] - - # Index all documents - for doc in documents: - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc["id"], - text=doc["text"], - metadata=doc["metadata"], - ) - if not response.success: - pytest.skip(f"Could not seed corpus: {response.data}") - - # Allow time for indexing to complete - time.sleep(2) - - yield test_corpus_key - - # Cleanup - for doc in documents: - client.delete_document(test_corpus_key, doc["id"]) - - -class TestQuerySearch: - """Test suite for query and search operations.""" - - def test_basic_query(self, client, seeded_corpus): - """Test basic semantic search query.""" - response = client.query( - corpus_key=seeded_corpus, - query_text="What is artificial intelligence?", - limit=5, - ) - - assert response.success, ( - f"Query failed: {response.status_code} - {response.data}" - ) - - # Should return search results - assert "search_results" in response.data or "results" in response.data, ( - "Expected search results in response" - ) - - def test_query_returns_relevant_results(self, client, seeded_corpus): - """Test that query returns semantically relevant results.""" - response = client.query( - corpus_key=seeded_corpus, - query_text="machine learning and neural networks", - limit=3, - ) - - assert response.success, f"Query failed: {response.status_code}" - - # Results should be returned - results = response.data.get("search_results", response.data.get("results", [])) - assert len(results) > 0, "Expected at least one search result" - - def test_query_with_limit(self, client, seeded_corpus): - """Test query with result limit.""" - response = client.query( - corpus_key=seeded_corpus, - query_text="technology", - limit=2, - ) - - assert response.success, f"Query failed: {response.status_code}" - - results = response.data.get("search_results", response.data.get("results", [])) - assert len(results) <= 2, f"Expected at most 2 results, got {len(results)}" - - def test_query_with_offset(self, client, seeded_corpus): - """Test query with pagination offset.""" - # First query without offset - response1 = client.query( - corpus_key=seeded_corpus, - query_text="science and technology", - limit=2, - offset=0, - ) - - # Second query with offset - response2 = client.query( - corpus_key=seeded_corpus, - query_text="science and technology", - limit=2, - offset=2, - ) - - assert response1.success and response2.success, "Queries failed" - - # Results should be different (pagination working) - results1 = response1.data.get("search_results", response1.data.get("results", [])) - results2 = response2.data.get("search_results", response2.data.get("results", [])) - - if len(results1) > 0 and len(results2) > 0: - # First result of each page should be different - id1 = results1[0].get("document_id", results1[0].get("id")) - id2 = results2[0].get("document_id", results2[0].get("id")) - assert id1 != id2, "Offset pagination not working correctly" - - def test_query_with_summary(self, client, seeded_corpus): - """Test query with RAG summarization.""" - response = client.query_with_summary( - corpus_key=seeded_corpus, - query_text="How is AI being used today?", - max_results=3, - ) - - assert response.success, ( - f"Query with summary failed: {response.status_code} - {response.data}" - ) - - # Should contain generated summary - assert "summary" in response.data or "generation" in response.data, ( - "Expected summary/generation in response" - ) - - def test_query_empty_results(self, client, seeded_corpus): - """Test query that returns no relevant results.""" - response = client.query( - corpus_key=seeded_corpus, - query_text="quantum teleportation through wormholes in the 15th century", - limit=5, - ) - - assert response.success, f"Query failed: {response.status_code}" - # Query should succeed even with no/few relevant results - - def test_query_special_characters(self, client, seeded_corpus): - """Test query with special characters.""" - response = client.query( - corpus_key=seeded_corpus, - query_text="What's the purpose of AI & machine-learning?", - limit=3, - ) - - assert response.success, ( - f"Query with special characters failed: {response.status_code}" - ) - - def test_query_unicode(self, client, seeded_corpus): - """Test query with unicode characters.""" - response = client.query( - corpus_key=seeded_corpus, - query_text="intelig\u00eancia artificial e aprendizado de m\u00e1quina", - limit=3, - ) - - assert response.success, ( - f"Query with unicode failed: {response.status_code}" - ) - - def test_query_long_text(self, client, seeded_corpus): - """Test query with longer query text.""" - long_query = ( - "I am interested in learning about how artificial intelligence and " - "machine learning technologies are being applied in various industries " - "such as healthcare and finance. Can you provide information about " - "the latest developments in deep learning and neural networks?" - ) - - response = client.query( - corpus_key=seeded_corpus, - query_text=long_query, - limit=5, - ) - - assert response.success, ( - f"Long query failed: {response.status_code}" - ) - - def test_query_response_time(self, client, seeded_corpus): - """Test that queries complete in acceptable time.""" - response = client.query( - corpus_key=seeded_corpus, - query_text="artificial intelligence", - limit=5, - ) - - assert response.success, f"Query failed: {response.status_code}" - assert response.elapsed_ms < 5000, ( - f"Query took too long: {response.elapsed_ms:.1f}ms" - ) - - def test_summary_response_time(self, client, seeded_corpus): - """Test that RAG summarization completes in acceptable time.""" - response = client.query_with_summary( - corpus_key=seeded_corpus, - query_text="What are the main topics covered?", - max_results=3, - ) - - assert response.success, f"Summary query failed: {response.status_code}" - # RAG takes longer due to LLM generation - assert response.elapsed_ms < 30000, ( - f"Summary took too long: {response.elapsed_ms:.1f}ms" - ) - - def test_query_nonexistent_corpus(self, client): - """Test querying a non-existent corpus.""" - response = client.query( - corpus_key="nonexistent_corpus_xyz123", - query_text="test query", - limit=5, - ) - - assert not response.success, "Query to non-existent corpus should fail" - assert response.status_code in [400, 404], ( - f"Expected 400 or 404, got {response.status_code}" - ) - - -class TestChat: - """Test suite for chat/conversation operations. - - Note: Chat requires a configured rephraser on the instance. - Tests will skip gracefully if rephraser is not available. - """ - - def test_create_chat(self, client, seeded_corpus): - """Test starting a new chat conversation.""" - response = client.create_chat( - corpus_key=seeded_corpus, - query_text="Tell me about AI", - ) - - # Skip if chat rephraser not configured on this instance - if not response.success and "rephraser" in str(response.data).lower(): - pytest.skip("Chat rephraser not configured on this instance") - - assert response.success, ( - f"Create chat failed: {response.status_code} - {response.data}" - ) - - # Should return chat ID - chat_id = response.data.get("chat_id") - if chat_id: - # Cleanup - client.delete_chat(chat_id) - - def test_list_chats(self, client): - """Test listing chat conversations.""" - response = client.list_chats(limit=10) - - assert response.success, ( - f"List chats failed: {response.status_code} - {response.data}" - ) - - def test_chat_turn(self, client, seeded_corpus): - """Test adding turns to a chat conversation.""" - # Create chat - create_response = client.create_chat( - corpus_key=seeded_corpus, - query_text="What is machine learning?", - ) - - if not create_response.success: - pytest.skip("Could not create chat for turn test") - - chat_id = create_response.data.get("chat_id") - if not chat_id: - pytest.skip("No chat_id in response") - - # Add follow-up turn - turn_response = client.add_chat_turn( - chat_id=chat_id, - query_text="Can you give me an example?", - corpus_key=seeded_corpus, - ) - - assert turn_response.success, ( - f"Add chat turn failed: {turn_response.status_code} - {turn_response.data}" - ) - - # Cleanup - client.delete_chat(chat_id) - - def test_delete_chat(self, client, seeded_corpus): - """Test deleting a chat conversation.""" - # Create chat - create_response = client.create_chat( - corpus_key=seeded_corpus, - query_text="Test chat for deletion", - ) - - if not create_response.success: - pytest.skip("Could not create chat for deletion test") - - chat_id = create_response.data.get("chat_id") - if not chat_id: - pytest.skip("No chat_id in response") - - # Delete chat - delete_response = client.delete_chat(chat_id) - - assert delete_response.success, ( - f"Delete chat failed: {delete_response.status_code} - {delete_response.data}" - ) diff --git a/tests/test_05_agents.py b/tests/test_05_agents.py deleted file mode 100644 index bdf5769..0000000 --- a/tests/test_05_agents.py +++ /dev/null @@ -1,354 +0,0 @@ -""" -Agents API Tests - -Tests for Vectara Agents (conversational AI) including agent creation, -execution, session management, and cleanup. - -NOTE: The Vectara Agents API is currently in tech preview and the schema -may change. These tests validate the API connectivity and will skip -gracefully if schema errors are encountered. -""" - -import pytest -import time - - - - -@pytest.fixture(scope="class") -def seeded_corpus_for_agents(client, test_corpus_key): - """Seed the test corpus with documents for agent testing.""" - documents = [ - { - "id": "agent_doc_1", - "text": "Vectara is a trusted AI platform for enterprise search and RAG applications. " - "It provides semantic search, summarization, and conversational AI capabilities. " - "Vectara supports both SaaS and on-premise deployments for enterprise customers.", - "metadata": {"category": "product", "topic": "overview"}, - }, - { - "id": "agent_doc_2", - "text": "To get started with Vectara, you need to create an account and obtain an API key. " - "The API key should have QueryService and IndexService permissions for full functionality. " - "You can then use the REST API or SDKs to index documents and run queries.", - "metadata": {"category": "documentation", "topic": "getting_started"}, - }, - { - "id": "agent_doc_3", - "text": "Vectara agents provide conversational AI experiences. Agents maintain context " - "across multiple turns of conversation, allowing for natural follow-up questions. " - "Each agent can be configured with specific corpora and generation settings.", - "metadata": {"category": "documentation", "topic": "agents"}, - }, - ] - - # Index all documents - for doc in documents: - response = client.index_document( - corpus_key=test_corpus_key, - document_id=doc["id"], - text=doc["text"], - metadata=doc["metadata"], - ) - if not response.success: - pytest.skip(f"Could not seed corpus for agents: {response.data}") - - # Allow time for indexing - time.sleep(2) - - yield test_corpus_key - - # Cleanup documents - for doc in documents: - client.delete_document(test_corpus_key, doc["id"]) - - -class TestAgents: - """Test suite for Vectara Agents API.""" - - def test_list_agents(self, client): - """Test listing all agents.""" - response = client.list_agents(limit=10) - - assert response.success, ( - f"List agents failed: {response.status_code} - {response.data}" - ) - - def test_create_agent(self, client, seeded_corpus_for_agents, unique_id): - """Test creating a new agent.""" - agent_name = f"Test Agent {unique_id}" - - response = client.create_agent( - name=agent_name, - corpus_keys=[seeded_corpus_for_agents], - description="Test agent created by API test suite", - ) - - assert response.success, ( - f"Create agent failed: {response.status_code} - {response.data}" - ) - - # Get agent ID for cleanup - agent_id = response.data.get("id") or response.data.get("agent_id") or response.data.get("key") - if agent_id: - # Cleanup - client.delete_agent(agent_id) - - def test_create_agent_with_config(self, client, seeded_corpus_for_agents, unique_id): - """Test creating an agent with custom configuration.""" - agent_name = f"Configured Agent {unique_id}" - - response = client.create_agent( - name=agent_name, - corpus_keys=[seeded_corpus_for_agents], - description="Agent with custom settings", - ) - - assert response.success, ( - f"Create configured agent failed: {response.status_code} - {response.data}" - ) - - agent_id = response.data.get("id") or response.data.get("agent_id") or response.data.get("key") - if agent_id: - client.delete_agent(agent_id) - - def test_get_agent(self, client, seeded_corpus_for_agents, unique_id): - """Test retrieving agent details.""" - # First create an agent - create_response = client.create_agent( - name=f"Get Test Agent {unique_id}", - corpus_keys=[seeded_corpus_for_agents], - ) - - # Fallback to minimal agent - if not create_response.success: - create_response = client.create_agent( - name=f"Get Test Agent {unique_id}", - ) - - if not create_response.success: - pytest.skip(f"Could not create agent for get test: {create_response.data}") - - agent_id = create_response.data.get("id") or create_response.data.get("agent_id") or create_response.data.get("key") - if not agent_id: - pytest.skip("No agent_id in create response") - - # Get the agent - response = client.get_agent(agent_id) - - assert response.success, ( - f"Get agent failed: {response.status_code} - {response.data}" - ) - - # Cleanup - client.delete_agent(agent_id) - - def test_update_agent(self, client, seeded_corpus_for_agents, unique_id): - """Test updating an agent.""" - # Create agent - create_response = client.create_agent( - name=f"Update Test Agent {unique_id}", - corpus_keys=[seeded_corpus_for_agents], - description="Original description", - ) - - # Fallback to minimal agent - if not create_response.success: - create_response = client.create_agent( - name=f"Update Test Agent {unique_id}", - description="Original description", - ) - - if not create_response.success: - pytest.skip(f"Could not create agent for update test: {create_response.data}") - - agent_id = create_response.data.get("id") or create_response.data.get("agent_id") or create_response.data.get("key") - if not agent_id: - pytest.skip("No agent_id in create response") - - # Update the agent - new_description = f"Updated description at {time.time()}" - update_response = client.update_agent( - agent_id=agent_id, - description=new_description, - ) - - assert update_response.success, ( - f"Update agent failed: {update_response.status_code} - {update_response.data}" - ) - - # Cleanup - client.delete_agent(agent_id) - - def test_delete_agent(self, client, seeded_corpus_for_agents, unique_id): - """Test deleting an agent.""" - # Create agent to delete - create_response = client.create_agent( - name=f"Delete Test Agent {unique_id}", - corpus_keys=[seeded_corpus_for_agents], - ) - - # Fallback to minimal agent - if not create_response.success: - create_response = client.create_agent( - name=f"Delete Test Agent {unique_id}", - ) - - if not create_response.success: - pytest.skip(f"Could not create agent for delete test: {create_response.data}") - - agent_id = create_response.data.get("id") or create_response.data.get("agent_id") or create_response.data.get("key") - if not agent_id: - pytest.skip("No agent_id in create response") - - # Delete the agent - delete_response = client.delete_agent(agent_id) - - assert delete_response.success, ( - f"Delete agent failed: {delete_response.status_code} - {delete_response.data}" - ) - - # Verify deletion - get_response = client.get_agent(agent_id) - assert get_response.status_code == 404, ( - f"Deleted agent should return 404, got {get_response.status_code}" - ) - - -class TestAgentExecution: - """Test suite for agent execution and conversations.""" - - @pytest.fixture - def test_agent(self, client, seeded_corpus_for_agents, unique_id): - """Create a test agent for execution tests.""" - response = client.create_agent( - name=f"Execution Test Agent {unique_id}", - corpus_keys=[seeded_corpus_for_agents], - description="Agent for execution testing", - ) - - # Fallback to minimal agent - if not response.success: - response = client.create_agent( - name=f"Execution Test Agent {unique_id}", - description="Agent for execution testing", - ) - - if not response.success: - pytest.skip(f"Could not create test agent: {response.data}") - - agent_id = response.data.get("id") or response.data.get("agent_id") or response.data.get("key") - if not agent_id: - pytest.skip("No agent_id in create response") - - yield agent_id - - # Cleanup - client.delete_agent(agent_id) - - def test_execute_agent_query(self, client, test_agent): - """Test executing a query against an agent.""" - response = client.execute_agent( - agent_id=test_agent, - query_text="What is Vectara?", - ) - - assert response.success, ( - f"Agent execution failed: {response.status_code} - {response.data}" - ) - - def test_execute_agent_with_context(self, client, test_agent): - """Test multi-turn conversation with an agent.""" - # First turn - response1 = client.execute_agent( - agent_id=test_agent, - query_text="Tell me about Vectara agents.", - ) - - assert response1.success, ( - f"First turn failed: {response1.status_code} - {response1.data}" - ) - - # Get session ID if available for follow-up - session_id = response1.data.get("session_id") - - # Second turn (follow-up) - response2 = client.execute_agent( - agent_id=test_agent, - query_text="How do I configure them?", - session_id=session_id, - ) - - assert response2.success, ( - f"Follow-up turn failed: {response2.status_code} - {response2.data}" - ) - - def test_execute_agent_response_time(self, client, test_agent): - """Test that agent execution completes in acceptable time.""" - response = client.execute_agent( - agent_id=test_agent, - query_text="What is semantic search?", - ) - - assert response.success, f"Agent execution failed: {response.status_code}" - - # Agent responses involve LLM generation, allow more time - assert response.elapsed_ms < 60000, ( - f"Agent execution took too long: {response.elapsed_ms:.1f}ms" - ) - - def test_list_agent_sessions(self, client, test_agent): - """Test listing sessions for an agent.""" - # First execute a query to create a session - client.execute_agent( - agent_id=test_agent, - query_text="Create a session", - ) - - # List sessions - response = client.list_agent_sessions(test_agent, limit=10) - - assert response.success, ( - f"List sessions failed: {response.status_code} - {response.data}" - ) - - def test_execute_nonexistent_agent(self, client): - """Test executing against a non-existent agent.""" - response = client.execute_agent( - agent_id="nonexistent_agent_xyz123", - query_text="test query", - ) - - assert not response.success, "Execution against non-existent agent should fail" - assert response.status_code in [400, 404], ( - f"Expected 400 or 404, got {response.status_code}" - ) - - def test_agent_handles_special_characters(self, client, test_agent): - """Test agent handles queries with special characters.""" - response = client.execute_agent( - agent_id=test_agent, - query_text="What's Vectara's approach to AI & machine-learning?", - ) - - assert response.success, ( - f"Special character query failed: {response.status_code}" - ) - - def test_agent_handles_long_query(self, client, test_agent): - """Test agent handles longer queries.""" - long_query = ( - "I'm trying to understand how Vectara's conversational AI agents work. " - "Can you explain the process of creating an agent, configuring it with " - "multiple corpora, and then using it for multi-turn conversations? " - "I'm particularly interested in how context is maintained across turns." - ) - - response = client.execute_agent( - agent_id=test_agent, - query_text=long_query, - ) - - assert response.success, ( - f"Long query failed: {response.status_code}" - ) diff --git a/tests/workflows/conftest.py b/tests/workflows/conftest.py new file mode 100644 index 0000000..ad21ffd --- /dev/null +++ b/tests/workflows/conftest.py @@ -0,0 +1,5 @@ +"""Workflow test fixtures. + +Workflow tests are fully self-contained -- each test creates and cleans up +its own resources. No shared fixtures needed. +""" diff --git a/tests/workflows/test_agent_conversation_flow.py b/tests/workflows/test_agent_conversation_flow.py new file mode 100644 index 0000000..3728083 --- /dev/null +++ b/tests/workflows/test_agent_conversation_flow.py @@ -0,0 +1,129 @@ +"""End-to-end agent conversation workflow. + +Creates a corpus, seeds data, creates an agent, starts a session, +has a multi-turn conversation, and verifies context is maintained. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.workflow +class TestAgentConversationFlow: + + def test_agent_multi_turn_conversation(self, client): + """Create corpus -> seed -> create agent -> chat -> verify context -> cleanup.""" + corpus_key = f"agent_wf_{uuid.uuid4().hex}" + agent_key = None + session_key = None + + # Step 1: Create and seed corpus + corpus_resp = client.create_corpus( + name=f"Agent Workflow {uuid.uuid4().hex[:8]}", + key=corpus_key, + description="E2E agent workflow corpus", + ) + assert corpus_resp.success, f"Create corpus failed: {corpus_resp.data}" + actual_corpus_key = corpus_resp.data.get("key", corpus_key) + + try: + wait_for( + lambda: client.get_corpus(actual_corpus_key).success, + timeout=10, + interval=1, + description="agent workflow corpus", + ) + + # Seed documents + doc_ids = [] + docs = [ + { + "id": f"awf_{uuid.uuid4().hex[:8]}", + "text": "Vectara provides semantic search and RAG for enterprise applications.", + "metadata": {"topic": "overview"}, + }, + { + "id": f"awf_{uuid.uuid4().hex[:8]}", + "text": "Agents maintain context across conversation turns for natural follow-up questions.", + "metadata": {"topic": "agents"}, + }, + ] + for doc in docs: + resp = client.index_document(corpus_key=actual_corpus_key, document_id=doc["id"], text=doc["text"], metadata=doc["metadata"]) + if resp.success: + doc_ids.append(doc["id"]) + + wait_for( + lambda: client.list_documents(actual_corpus_key, limit=5).data.get("documents", []), + timeout=15, + interval=1, + description="agent workflow docs indexed", + ) + + # Step 2: Create agent + agent_resp = client.create_agent( + name=f"Workflow Agent {uuid.uuid4().hex[:8]}", + description="E2E workflow test agent", + ) + assert agent_resp.success, f"Create agent failed: {agent_resp.data}" + agent_key = agent_resp.data.get("key") or agent_resp.data.get("id") + + # Step 3: Create session + session_resp = client.create_agent_session(agent_key) + assert session_resp.success, f"Create session failed: {session_resp.data}" + session_key = session_resp.data.get("key") + + # Step 4: First turn + turn1 = client.execute_agent( + agent_id=agent_key, + query_text="What does Vectara do?", + session_id=session_key, + ) + assert turn1.success, f"First turn failed: {turn1.data}" + + # Verify response has content + turn1_events = turn1.data.get("events", []) + assert len(turn1_events) > 0, f"First turn returned no events: {turn1.data.keys()}" + + # Step 5: Follow-up (tests context maintenance) + turn2 = client.execute_agent( + agent_id=agent_key, + query_text="How do agents work?", + session_id=session_key, + ) + assert turn2.success, f"Follow-up failed: {turn2.data}" + + # Verify second turn has content + turn2_events = turn2.data.get("events", []) + assert len(turn2_events) > 0, f"Second turn returned no events: {turn2.data.keys()}" + + # Step 6: Verify events exist + events_resp = client.list_session_events(agent_key, session_key) + assert events_resp.success, f"List events failed: {events_resp.data}" + events = events_resp.data.get("events", []) + assert len(events) >= 2, f"Expected at least 2 events, got {len(events)}" + + finally: + # Cleanup: reverse dependency order + if session_key and agent_key: + try: + client.delete_agent_session(agent_key, session_key) + except Exception: + pass + if agent_key: + try: + client.delete_agent(agent_key) + except Exception: + pass + for doc_id in doc_ids: + try: + client.delete_document(actual_corpus_key, doc_id) + except Exception: + pass + try: + client.delete_corpus(actual_corpus_key) + except Exception: + pass diff --git a/tests/workflows/test_cross_corpus_rag_flow.py b/tests/workflows/test_cross_corpus_rag_flow.py new file mode 100644 index 0000000..42124d0 --- /dev/null +++ b/tests/workflows/test_cross_corpus_rag_flow.py @@ -0,0 +1,109 @@ +""" +Cross-Corpus RAG Workflow Test + +End-to-end test that creates two corpora with distinct domains, +seeds each, performs a RAG query across both, and verifies the +response includes results from both corpora. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.workflow +class TestCrossCorpusRagFlow: + """Cross-corpus RAG workflow.""" + + def test_cross_corpus_rag(self, client): + """Create 2 corpora, seed each, RAG query across both, verify provenance.""" + uid = uuid.uuid4().hex[:8] + corpus1_key = f"rag_medical_{uid}" + corpus2_key = f"rag_legal_{uid}" + + c1 = client.create_corpus(name=f"Medical {uid}", key=corpus1_key) + c2 = client.create_corpus(name=f"Legal {uid}", key=corpus2_key) + + if not c1.success or not c2.success: + for k in [corpus1_key, corpus2_key]: + try: + client.delete_corpus(k) + except Exception: + pass + pytest.skip("Could not create corpora for cross-corpus RAG test") + + try: + for key in [corpus1_key, corpus2_key]: + wait_for( + lambda k=key: client.get_corpus(k).success, + timeout=10, + interval=1, + description=f"corpus {key} available", + ) + + medical_docs = [ + ("med_1", "Heart disease prevention through diet and exercise reduces mortality rates significantly."), + ("med_2", "Clinical trials for new cancer treatments show promising results in early stages."), + ] + legal_docs = [ + ("legal_1", "Contract law requires mutual consideration between parties for enforcement."), + ("legal_2", "Intellectual property rights protect creators from unauthorized use of their work."), + ] + + for doc_id, text in medical_docs: + r = client.index_document(corpus1_key, f"{doc_id}_{uid}", text) + assert r.success, f"Index medical doc failed: {r.data}" + for doc_id, text in legal_docs: + r = client.index_document(corpus2_key, f"{doc_id}_{uid}", text) + assert r.success, f"Index legal doc failed: {r.data}" + + for key, docs in [(corpus1_key, medical_docs), (corpus2_key, legal_docs)]: + wait_for( + lambda k=key, d=docs: all(client.get_document(k, f"{did}_{uid}").success for did, _ in d), + timeout=20, + interval=2, + description=f"documents indexed in {key}", + ) + + generation = {} + if client.generation_preset: + generation["generation_preset_name"] = client.generation_preset + if client.llm_name: + generation["model_parameters"] = {"llm_name": client.llm_name} + + query_resp = client.post( + "/v2/query", + data={ + "query": "important topics in modern society", + "search": { + "corpora": [ + {"corpus_key": corpus1_key}, + {"corpus_key": corpus2_key}, + ], + "limit": 10, + }, + "generation": generation, + }, + ) + assert query_resp.success, f"RAG query failed: {query_resp.status_code} - {query_resp.data}" + + results = query_resp.data.get("search_results", []) + assert len(results) > 0, "Expected search results from cross-corpus RAG" + + result_corpus_keys = {r.get("corpus_key") for r in results} + assert ( + corpus1_key in result_corpus_keys or corpus2_key in result_corpus_keys + ), f"Expected results from at least one test corpus, got keys: {result_corpus_keys}" + + has_summary = query_resp.data.get("summary") is not None or query_resp.data.get("generation") is not None + if has_summary: + summary_text = query_resp.data.get("summary", "") or "" + assert len(summary_text) > 0, "Summary should be non-empty" + finally: + for key in [corpus1_key, corpus2_key]: + try: + client.delete_corpus(key) + except Exception: + pass diff --git a/tests/workflows/test_index_query_flow.py b/tests/workflows/test_index_query_flow.py new file mode 100644 index 0000000..dd31572 --- /dev/null +++ b/tests/workflows/test_index_query_flow.py @@ -0,0 +1,116 @@ +"""End-to-end indexing and query workflow. + +Creates a corpus, indexes documents, queries them with semantic search +and RAG summarization, then cleans up. +""" + +import uuid + +import pytest + +from utils.waiters import wait_for + + +@pytest.mark.workflow +class TestIndexQueryFlow: + + def test_full_indexing_and_query_flow(self, client): + """Create corpus -> index docs -> query -> RAG summary -> cleanup.""" + corpus_key = f"workflow_{uuid.uuid4().hex}" + + # Step 1: Create corpus + corpus_resp = client.create_corpus( + name=f"Workflow Test {uuid.uuid4().hex[:8]}", + key=corpus_key, + description="E2E workflow test corpus", + ) + assert corpus_resp.success, f"Create corpus failed: {corpus_resp.data}" + actual_key = corpus_resp.data.get("key", corpus_key) + + try: + wait_for( + lambda: client.get_corpus(actual_key).success, + timeout=10, + interval=1, + description="workflow corpus to become queryable", + ) + + # Step 2: Index documents + doc_ids = [] + docs = [ + { + "id": f"wf_doc_{uuid.uuid4().hex[:8]}", + "text": "Machine learning enables computers to learn from data without explicit programming.", + "metadata": {"topic": "ml"}, + }, + { + "id": f"wf_doc_{uuid.uuid4().hex[:8]}", + "text": "Neural networks are inspired by biological brain structures and excel at pattern recognition.", + "metadata": {"topic": "nn"}, + }, + { + "id": f"wf_doc_{uuid.uuid4().hex[:8]}", + "text": "Natural language processing allows machines to understand and generate human language.", + "metadata": {"topic": "nlp"}, + }, + ] + for doc in docs: + resp = client.index_document( + corpus_key=actual_key, + document_id=doc["id"], + text=doc["text"], + metadata=doc["metadata"], + ) + assert resp.success, f"Index doc {doc['id']} failed: {resp.data}" + doc_ids.append(doc["id"]) + + # Step 3: Wait for indexing + wait_for( + lambda: len(client.list_documents(actual_key, limit=10).data.get("documents", [])) >= 3, + timeout=15, + interval=1, + description="all 3 docs to be indexed", + ) + + # Step 4: Semantic search + query_resp = client.query( + corpus_key=actual_key, + query_text="How do machines learn from data?", + limit=5, + ) + assert query_resp.success, f"Query failed: {query_resp.data}" + results = query_resp.data.get("search_results", query_resp.data.get("results", [])) + assert len(results) > 0, "Expected at least one search result" + + # Verify top result relates to indexed content + top_text = results[0].get("text", "").lower() + assert any( + term in top_text for term in ["learn", "data", "machine", "neural", "language"] + ), f"Top result doesn't relate to indexed docs: {top_text[:200]}" + + # Step 5: RAG summary + summary_resp = client.query_with_summary( + corpus_key=actual_key, + query_text="Explain how AI works", + max_results=3, + ) + assert summary_resp.success, f"Summary query failed: {summary_resp.data}" + has_summary = "summary" in summary_resp.data or "generation" in summary_resp.data + assert has_summary, f"Expected summary in response: {list(summary_resp.data.keys())}" + + summary_text = summary_resp.data.get("summary", summary_resp.data.get("generation", "")) + if isinstance(summary_text, dict): + summary_text = summary_text.get("text", str(summary_text)) + assert len(str(summary_text)) > 10, f"Summary too short or empty: {summary_text}" + + finally: + # Cleanup in reverse order + for doc_id in doc_ids: + try: + client.delete_document(actual_key, doc_id) + except Exception: + pass + try: + client.delete_corpus(actual_key) + except Exception: + pass diff --git a/utils/client.py b/utils/client.py index fd321d5..976de6e 100644 --- a/utils/client.py +++ b/utils/client.py @@ -8,10 +8,11 @@ - Response time tracking """ -import time import logging -from typing import Any, Optional +import time from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional import requests from requests.adapters import HTTPAdapter @@ -164,13 +165,71 @@ def _request( error=f"Unexpected error: {str(e)}", ) + def _request_raw( + self, + method: str, + endpoint: str, + data: Optional[dict] = None, + params: Optional[dict] = None, + headers: Optional[dict] = None, + files: Optional[dict] = None, + stream: bool = False, + ) -> requests.Response: + """Make an API request and return the raw :class:`requests.Response`. + + This is useful for streaming responses (SSE) or multipart uploads + where the caller needs direct access to the underlying response. + + When *files* is provided the request is sent as ``multipart/form-data`` + (using ``data=`` instead of ``json=``), and the ``Content-Type`` header + is left for *requests* to set automatically so that the multipart + boundary is included. + + Args: + method: HTTP method (GET, POST, etc.) + endpoint: API endpoint path. + data: Request body. Sent as JSON unless *files* is provided. + params: Query parameters. + headers: Additional headers (merged on top of session defaults). + files: Mapping suitable for ``requests``' *files* parameter. + stream: If ``True`` the response body is not downloaded eagerly. + + Returns: + The raw :class:`requests.Response` object. + """ + url = self._build_url(endpoint) + request_headers = {**(headers or {})} + + self.logger.debug(f"{method} {url}") + + kwargs: dict = { + "method": method, + "url": url, + "params": params, + "headers": request_headers, + "timeout": self.config.request_timeout, + "stream": stream, + } + + if files is not None: + # Multipart upload -- use data= (not json=) and let requests + # generate the Content-Type with the correct boundary. + kwargs["data"] = data + kwargs["files"] = files + # Set Content-Type to None to override the session-level default + # (application/json). This tells requests to omit it entirely and + # auto-generate the multipart boundary. + kwargs["headers"]["Content-Type"] = None + else: + kwargs["json"] = data + + return self.session.request(**kwargs) + # ------------------------------------------------------------------------- # Convenience methods for HTTP verbs # ------------------------------------------------------------------------- - def get( - self, endpoint: str, params: Optional[dict] = None, **kwargs - ) -> APIResponse: + def get(self, endpoint: str, params: Optional[dict] = None, **kwargs) -> APIResponse: """Make a GET request.""" return self._request("GET", endpoint, params=params, **kwargs) @@ -182,9 +241,7 @@ def put(self, endpoint: str, data: Optional[dict] = None, **kwargs) -> APIRespon """Make a PUT request.""" return self._request("PUT", endpoint, data=data, **kwargs) - def patch( - self, endpoint: str, data: Optional[dict] = None, **kwargs - ) -> APIResponse: + def patch(self, endpoint: str, data: Optional[dict] = None, **kwargs) -> APIResponse: """Make a PATCH request.""" return self._request("PATCH", endpoint, data=data, **kwargs) @@ -232,9 +289,7 @@ def _build_generation_config( # Vectara API Operations - Corpora # ------------------------------------------------------------------------- - def list_corpora( - self, limit: int = 100, page_key: Optional[str] = None - ) -> APIResponse: + def list_corpora(self, limit: int = 100, page_key: Optional[str] = None) -> APIResponse: """List all corpora for the customer.""" params = {"limit": limit} if page_key: @@ -263,6 +318,36 @@ def update_corpus(self, corpus_key: str, **kwargs) -> APIResponse: """Update corpus properties.""" return self.patch(f"/v2/corpora/{corpus_key}", data=kwargs) + def replace_filter_attributes( + self, + corpus_key: str, + filter_attributes: list[dict], + ) -> APIResponse: + """Replace the filter attributes of a corpus. + + Args: + corpus_key: Target corpus key. + filter_attributes: New filter attribute definitions. + + Returns: + APIResponse with job_id and status (async operation). + """ + return self.post( + f"/v2/corpora/{corpus_key}/replace_filter_attributes", + data={"filter_attributes": filter_attributes}, + ) + + def compute_corpus_size(self, corpus_key: str) -> APIResponse: + """Compute the current size of a corpus. + + Returns document count, part count, and character statistics. + """ + return self.post(f"/v2/corpora/{corpus_key}/compute_size") + + def reset_corpus(self, corpus_key: str) -> APIResponse: + """Remove all documents and data from a corpus.""" + return self.post(f"/v2/corpora/{corpus_key}/reset") + # ------------------------------------------------------------------------- # Vectara API Operations - Documents (Indexing) # ------------------------------------------------------------------------- @@ -309,6 +394,107 @@ def delete_document(self, corpus_key: str, document_id: str) -> APIResponse: """Delete a document from a corpus.""" return self.delete(f"/v2/corpora/{corpus_key}/documents/{document_id}") + def update_document_metadata( + self, + corpus_key: str, + document_id: str, + metadata: dict, + ) -> APIResponse: + """Update (merge) metadata on an existing document. + + Args: + corpus_key: Target corpus key. + document_id: Document to update. + metadata: Metadata fields to merge into the document. + + Returns: + APIResponse with the update result. + """ + return self.patch( + f"/v2/corpora/{corpus_key}/documents/{document_id}", + data={"metadata": metadata}, + ) + + def replace_document_metadata( + self, + corpus_key: str, + document_id: str, + metadata: dict, + ) -> APIResponse: + """Fully replace metadata on an existing document. + + Args: + corpus_key: Target corpus key. + document_id: Document whose metadata will be replaced. + metadata: Complete metadata dict that replaces the current one. + + Returns: + APIResponse with the replacement result. + """ + return self.put( + f"/v2/corpora/{corpus_key}/documents/{document_id}/metadata", + data={"metadata": metadata}, + ) + + def bulk_delete_documents( + self, + corpus_key: str, + document_ids: Optional[list[str]] = None, + metadata_filter: Optional[str] = None, + async_mode: bool = True, + ) -> APIResponse: + """Bulk delete documents from a corpus. + + Args: + corpus_key: Target corpus key. + document_ids: List of document IDs to delete. + metadata_filter: SQL-like filter expression for deletion. + async_mode: If True (default), returns 202 with job_id. + If False, waits for completion and returns 200. + + Returns: + APIResponse with deletion result or job_id. + """ + params: dict = {} + if document_ids is not None: + params["document_ids"] = ",".join(document_ids) + if metadata_filter is not None: + params["metadata_filter"] = metadata_filter + if not async_mode: + params["async"] = "false" + return self._request("DELETE", f"/v2/corpora/{corpus_key}/documents", params=params) + + def index_document_parts( + self, + corpus_key: str, + document_id: str, + parts: list[dict], + metadata: Optional[dict] = None, + **kwargs, + ) -> APIResponse: + """Index a document with explicit parts into a corpus. + + Each part is a dict that must contain ``text`` and may optionally + include ``metadata``, ``custom_dimensions``, and ``context``. + + Args: + corpus_key: Target corpus key. + document_id: Unique document identifier. + parts: List of document part dicts. + metadata: Optional document-level metadata. + + Returns: + APIResponse with the indexing result. + """ + data = { + "id": document_id, + "type": "core", + "metadata": metadata or {}, + "document_parts": parts, + **kwargs, + } + return self.post(f"/v2/corpora/{corpus_key}/documents", data=data) + # ------------------------------------------------------------------------- # Vectara API Operations - Query (Search) # ------------------------------------------------------------------------- @@ -333,6 +519,39 @@ def query( } return self.post("/v2/query", data=data) + def query_corpus( + self, + corpus_key: str, + query_text: str, + limit: int = 10, + custom_dimensions: Optional[dict] = None, + **kwargs, + ) -> APIResponse: + """Execute a query scoped to a single corpus via its dedicated endpoint. + + Unlike :meth:`query` which uses the global ``/v2/query`` endpoint, + this hits ``/v2/corpora/{corpus_key}/query`` directly. + + Args: + corpus_key: The corpus to query. + query_text: The query text. + limit: Maximum number of search results. + custom_dimensions: Optional custom dimension weights for the search. + + Returns: + APIResponse with search results. + """ + search: dict = {"limit": limit} + if custom_dimensions is not None: + search["custom_dimensions"] = custom_dimensions + + data: dict = { + "query": query_text, + "search": search, + **kwargs, + } + return self.post(f"/v2/corpora/{corpus_key}/query", data=data) + def query_with_summary( self, corpus_key: str, @@ -370,6 +589,47 @@ def query_with_summary( } return self.post("/v2/query", data=data) + def query_stream( + self, + corpus_key: str, + query_text: str, + generation_config: Optional[dict] = None, + **kwargs, + ) -> requests.Response: + """Execute a streaming query and return the raw SSE response. + + Streaming requires ``stream_response: true`` in the request body + and ``Accept: text/event-stream`` header. + + Args: + corpus_key: The corpus to query. + query_text: The query text. + generation_config: Optional generation configuration dict. + + Returns: + Raw streaming :class:`requests.Response`. + """ + data: dict = { + "query": query_text, + "search": { + "corpora": [{"corpus_key": corpus_key}], + }, + "stream_response": True, + **kwargs, + } + if generation_config is not None: + data["generation"] = generation_config + elif self.generation_preset or self.llm_name: + data["generation"] = self._build_generation_config() + + return self._request_raw( + method="POST", + endpoint="/v2/query", + data=data, + headers={"Accept": "text/event-stream"}, + stream=True, + ) + # ------------------------------------------------------------------------- # Vectara API Operations - Chat # ------------------------------------------------------------------------- @@ -406,9 +666,7 @@ def delete_chat(self, chat_id: str) -> APIResponse: """Delete a chat conversation.""" return self.delete(f"/v2/chats/{chat_id}") - def add_chat_turn( - self, chat_id: str, query_text: str, corpus_key: str, **kwargs - ) -> APIResponse: + def add_chat_turn(self, chat_id: str, query_text: str, corpus_key: str, **kwargs) -> APIResponse: """Add a turn to an existing chat.""" data = { "query": query_text, @@ -419,6 +677,22 @@ def add_chat_turn( } return self.post(f"/v2/chats/{chat_id}/turns", data=data) + def list_chat_turns(self, chat_id: str) -> APIResponse: + """List turns in a chat.""" + return self.get(f"/v2/chats/{chat_id}/turns") + + def get_chat_turn(self, chat_id: str, turn_id: str) -> APIResponse: + """Get a specific turn in a chat.""" + return self.get(f"/v2/chats/{chat_id}/turns/{turn_id}") + + def update_chat_turn(self, chat_id: str, turn_id: str, **kwargs) -> APIResponse: + """Update a turn in a chat (e.g., disable it).""" + return self.patch(f"/v2/chats/{chat_id}/turns/{turn_id}", data=kwargs) + + def delete_chat_turn(self, chat_id: str, turn_id: str) -> APIResponse: + """Delete a turn from a chat.""" + return self.delete(f"/v2/chats/{chat_id}/turns/{turn_id}") + # ------------------------------------------------------------------------- # Vectara API Operations - API Keys (Admin) # ------------------------------------------------------------------------- @@ -427,6 +701,148 @@ def list_api_keys(self) -> APIResponse: """List all API keys.""" return self.get("/v2/api_keys") + def create_api_key( + self, + name: str, + api_key_role: str = "serving", + corpus_keys: Optional[list[str]] = None, + **kwargs, + ) -> APIResponse: + """Create a new API key. + + Args: + name: Display name for the key. + api_key_role: Role for the key (``serving`` or ``personal``). + corpus_keys: Optional list of corpus keys to scope the key to. + """ + data: dict = { + "name": name, + "api_key_role": api_key_role, + **kwargs, + } + if corpus_keys is not None: + data["corpus_keys"] = corpus_keys + return self.post("/v2/api_keys", data=data) + + def delete_api_key(self, api_key_id: str) -> APIResponse: + """Delete an API key by ID.""" + return self.delete(f"/v2/api_keys/{api_key_id}") + + def enable_api_key(self, api_key_id: str) -> APIResponse: + """Enable a disabled API key.""" + return self.patch(f"/v2/api_keys/{api_key_id}", data={"enabled": True}) + + def disable_api_key(self, api_key_id: str) -> APIResponse: + """Disable an API key.""" + return self.patch(f"/v2/api_keys/{api_key_id}", data={"enabled": False}) + + # ------------------------------------------------------------------------- + # Vectara API Operations - App Clients + # ------------------------------------------------------------------------- + + def create_app_client( + self, + name: str, + type: str = "client_credentials", + description: str = "", + api_roles: Optional[list[dict]] = None, + corpus_roles: Optional[list[dict]] = None, + agent_roles: Optional[list[dict]] = None, + **kwargs, + ) -> APIResponse: + """Create an app client. + + Args: + name: Display name for the app client. + type: Client type (default ``client_credentials``). + description: Optional description. + api_roles: Optional customer-level role assignments. + corpus_roles: Optional corpus-specific role assignments. + agent_roles: Optional agent-specific role assignments. + """ + data: dict = {"name": name, "type": type, "description": description, **kwargs} + if api_roles is not None: + data["api_roles"] = api_roles + if corpus_roles is not None: + data["corpus_roles"] = corpus_roles + if agent_roles is not None: + data["agent_roles"] = agent_roles + return self.post("/v2/app_clients", data=data) + + def list_app_clients(self, limit: int = 100) -> APIResponse: + """List all app clients.""" + return self.get("/v2/app_clients", params={"limit": limit}) + + def get_app_client(self, app_client_id: str) -> APIResponse: + """Get an app client by ID.""" + return self.get(f"/v2/app_clients/{app_client_id}") + + def update_app_client(self, app_client_id: str, **kwargs) -> APIResponse: + """Update an app client.""" + return self.patch(f"/v2/app_clients/{app_client_id}", data=kwargs) + + def delete_app_client(self, app_client_id: str) -> APIResponse: + """Delete an app client by ID.""" + return self.delete(f"/v2/app_clients/{app_client_id}") + + # ------------------------------------------------------------------------- + # Vectara API Operations - Users + # ------------------------------------------------------------------------- + + def create_user( + self, + email: str, + username: Optional[str] = None, + api_roles: Optional[list[dict]] = None, + corpus_roles: Optional[list[dict]] = None, + agent_roles: Optional[list[dict]] = None, + description: str = "", + **kwargs, + ) -> APIResponse: + """Create a user in the current customer account. + + Args: + email: User email address (required). + username: Username (defaults to email if not provided). + api_roles: Optional customer-level role assignments. + corpus_roles: Optional corpus-specific role assignments. + agent_roles: Optional agent-specific role assignments. + description: Optional user description. + """ + data: dict = {"email": email, "description": description, **kwargs} + if username is not None: + data["username"] = username + if api_roles is not None: + data["api_roles"] = api_roles + if corpus_roles is not None: + data["corpus_roles"] = corpus_roles + if agent_roles is not None: + data["agent_roles"] = agent_roles + return self.post("/v2/users", data=data) + + def list_users(self, limit: int = 100) -> APIResponse: + """List users in the account.""" + return self.get("/v2/users", params={"limit": limit}) + + def get_user(self, username: str) -> APIResponse: + """Get a user by username.""" + return self.get(f"/v2/users/{username}") + + def update_user(self, username: str, **kwargs) -> APIResponse: + """Update a user. + + Supported fields: enabled, api_roles, corpus_roles, agent_roles, description. + """ + return self.patch(f"/v2/users/{username}", data=kwargs) + + def delete_user(self, username: str) -> APIResponse: + """Delete a user by username.""" + return self.delete(f"/v2/users/{username}") + + def reset_user_password(self, username: str) -> APIResponse: + """Reset the password for a user.""" + return self.post(f"/v2/users/{username}/reset_password", data={}) + # ------------------------------------------------------------------------- # Vectara API Operations - Jobs # ------------------------------------------------------------------------- @@ -454,6 +870,7 @@ def create_agent( description: str = "", model_name: Optional[str] = None, agent_key: Optional[str] = None, + tool_configurations: Optional[dict] = None, **kwargs, ) -> APIResponse: """Create a new agent for conversational AI. @@ -464,6 +881,7 @@ def create_agent( description: Agent description model_name: LLM model name (uses instance llm_name or defaults to gpt-4o) agent_key: Unique key for the agent (auto-generated if not provided) + tool_configurations: Optional list of tool config dicts (e.g. corpora_search, web_search) """ import uuid @@ -496,9 +914,8 @@ def create_agent( **kwargs, } - # Note: corpus_keys parameter is accepted but not used in agent creation - # Corpus association for agents is handled through tool configuration - # which requires additional setup. Basic agents work without it. + if tool_configurations is not None: + data["tool_configurations"] = tool_configurations return self.post("/v2/agents", data=data) @@ -514,9 +931,30 @@ def update_agent(self, agent_id: str, **kwargs) -> APIResponse: """Update agent properties.""" return self.patch(f"/v2/agents/{agent_id}", data=kwargs) - def create_agent_session(self, agent_key: str) -> APIResponse: - """Create a new session for an agent.""" - return self.post(f"/v2/agents/{agent_key}/sessions", data={}) + def create_agent_session( + self, + agent_key: str, + metadata: Optional[dict] = None, + from_session: Optional[dict] = None, + ) -> APIResponse: + """Create a new session for an agent. + + Args: + agent_key: The agent's unique key. + metadata: Optional metadata dict to attach to the session. + from_session: Optional dict to fork from an existing session. + Must contain ``session_key`` and may optionally include + ``include_up_to_event_id`` and/or ``compact_up_to_event_id``. + + Returns: + APIResponse with the created session details. + """ + data: dict = {} + if metadata is not None: + data["metadata"] = metadata + if from_session is not None: + data["from_session"] = from_session + return self.post(f"/v2/agents/{agent_key}/sessions", data=data) def execute_agent( self, @@ -534,29 +972,28 @@ def execute_agent( session_response = self.create_agent_session(agent_id) if not session_response.success: return session_response - session_id = session_response.data.get("key") or session_response.data.get( - "session_key" - ) + session_id = session_response.data.get("key") or session_response.data.get("session_key") if not session_id: return APIResponse( status_code=500, - data={ - "error": f"No session key in response: {session_response.data}" - }, + data={"error": f"No session key in response: {session_response.data}"}, elapsed_ms=0, ) - # Small delay to ensure session is committed to database - time.sleep(0.5) + # Wait for session to be committed and queryable + from utils.waiters import wait_for - # Verify session exists before executing - verify_response = self.get_agent_session(agent_id, session_id) - if not verify_response.success: + try: + wait_for( + lambda: self.get_agent_session(agent_id, session_id).success, + timeout=10, + interval=0.5, + description=f"agent session {session_id} to become available", + ) + except TimeoutError: return APIResponse( status_code=500, - data={ - "error": f"Session {session_id} created but verification failed: {verify_response.data}" - }, + data={"error": f"Session {session_id} created but not available after 10s"}, elapsed_ms=0, ) @@ -571,9 +1008,7 @@ def execute_agent( ], **kwargs, } - return self.post( - f"/v2/agents/{agent_id}/sessions/{session_id}/events", data=data - ) + return self.post(f"/v2/agents/{agent_id}/sessions/{session_id}/events", data=data) def list_agent_sessions(self, agent_id: str, limit: int = 100) -> APIResponse: """List sessions for an agent.""" @@ -587,6 +1022,426 @@ def delete_agent_session(self, agent_id: str, session_id: str) -> APIResponse: """Delete an agent session.""" return self.delete(f"/v2/agents/{agent_id}/sessions/{session_id}") + def update_agent_session(self, agent_key: str, session_key: str, **kwargs) -> APIResponse: + """Update an agent session. + + Supported fields: name, description, metadata, enabled, tti_minutes. + """ + return self.patch(f"/v2/agents/{agent_key}/sessions/{session_key}", data=kwargs) + + def compact_session( + self, + agent_key: str, + session_key: str, + compact_up_to_event_id: Optional[str] = None, + ) -> APIResponse: + """Send a manual compaction request to a session. + + Args: + agent_key: The agent's unique key. + session_key: The session's unique key. + compact_up_to_event_id: Optional event ID to compact up to. + """ + data: dict = {"type": "compact"} + if compact_up_to_event_id is not None: + data["compact_up_to_event_id"] = compact_up_to_event_id + return self.post(f"/v2/agents/{agent_key}/sessions/{session_key}/events", data=data) + + def list_session_events( + self, + agent_key: str, + session_key: str, + limit: int = 100, + include_hidden: bool = False, + ) -> APIResponse: + """List events in an agent session. + + Args: + agent_key: The agent's unique key. + session_key: The session's unique key. + limit: Maximum number of events to return. + include_hidden: If True, include hidden events in results. + + Returns: + APIResponse with the list of session events. + """ + params: dict = {"limit": limit} + if include_hidden: + params["include_hidden"] = True + return self.get( + f"/v2/agents/{agent_key}/sessions/{session_key}/events", + params=params, + ) + + def hide_event( + self, + agent_key: str, + session_key: str, + event_id: str, + ) -> APIResponse: + """Hide an event in an agent session. + + Args: + agent_key: The agent's unique key. + session_key: The session's unique key. + event_id: The event to hide. + + Returns: + APIResponse with the hide result. + """ + return self.post( + f"/v2/agents/{agent_key}/sessions/{session_key}/events/{event_id}/hide", + data={}, + ) + + def unhide_event( + self, + agent_key: str, + session_key: str, + event_id: str, + ) -> APIResponse: + """Unhide an event in an agent session. + + Args: + agent_key: The agent's unique key. + session_key: The session's unique key. + event_id: The event to unhide. + + Returns: + APIResponse with the unhide result. + """ + return self.post( + f"/v2/agents/{agent_key}/sessions/{session_key}/events/{event_id}/unhide", + data={}, + ) + + def get_agent_identity(self, agent_key: str) -> APIResponse: + """Get the identity configuration of an agent. + + Args: + agent_key: The agent's unique key. + + Returns: + APIResponse with the agent identity details. + """ + return self.get(f"/v2/agents/{agent_key}/identity") + + def update_agent_identity(self, agent_key: str, **kwargs) -> APIResponse: + """Update the identity configuration of an agent. + + Args: + agent_key: The agent's unique key. + **kwargs: Identity fields to update. + + Returns: + APIResponse with the updated identity. + """ + return self.patch(f"/v2/agents/{agent_key}/identity", data=kwargs) + + # ------------------------------------------------------------------------- + # Vectara API Operations - LLMs + # ------------------------------------------------------------------------- + + def list_llms(self, limit: int = 100) -> APIResponse: + """List all LLMs configured for the account.""" + return self.get("/v2/llms", params={"limit": limit}) + + def create_llm( + self, + name: str, + model: str, + uri: str, + bearer_token: Optional[str] = None, + llm_type: str = "openai-compatible", + **kwargs, + ) -> APIResponse: + """Create a custom LLM configuration. + + Args: + name: Display name for the LLM. + model: Model identifier (e.g. ``gpt-4o-mini``). + uri: Endpoint URI for the LLM API. + bearer_token: Optional bearer token for authentication. + llm_type: LLM type (default ``openai-compatible``). + """ + data: dict = { + "type": llm_type, + "name": name, + "model": model, + "uri": uri, + **kwargs, + } + if bearer_token is not None: + data["auth"] = {"type": "bearer", "token": bearer_token} + return self.post("/v2/llms", data=data) + + def delete_llm(self, llm_id: str) -> APIResponse: + """Delete a custom LLM by ID.""" + return self.delete(f"/v2/llms/{llm_id}") + + # ------------------------------------------------------------------------- + # Vectara API Operations - Tools + # ------------------------------------------------------------------------- + + def list_tools(self, limit: int = 100) -> APIResponse: + """List all tools configured for the account.""" + return self.get("/v2/tools", params={"limit": limit}) + + def create_tool( + self, + name: str, + title: str, + description: str, + code: str, + execution_time: int = 30, + max_memory: int = 128, + **kwargs, + ) -> APIResponse: + """Create a lambda tool. + + Args: + name: Unique tool name (letters, numbers, hyphens, underscores). + title: Human-readable title. + description: Tool description. + code: Python function code. + execution_time: Maximum execution time in seconds. + max_memory: Maximum memory in MB. + """ + data: dict = { + "type": "lambda", + "name": name, + "title": title, + "description": description, + "code": code, + "execution_configuration": { + "max_execution_time_seconds": execution_time, + }, + **kwargs, + } + return self.post("/v2/tools", data=data) + + def update_tool(self, tool_id: str, **kwargs) -> APIResponse: + """Update tool properties.""" + return self.patch(f"/v2/tools/{tool_id}", data=kwargs) + + def delete_tool(self, tool_id: str) -> APIResponse: + """Delete a tool by ID.""" + return self.delete(f"/v2/tools/{tool_id}") + + # ------------------------------------------------------------------------- + # Vectara API Operations - Pipelines + # ------------------------------------------------------------------------- + + def list_pipelines(self, limit: int = 100) -> APIResponse: + """List all pipelines.""" + return self.get("/v2/pipelines", params={"limit": limit}) + + def create_pipeline( + self, + name: str, + key: str, + source: dict, + trigger: dict, + transform: dict, + **kwargs, + ) -> APIResponse: + """Create a new pipeline. + + Args: + name: Pipeline display name. + key: Unique pipeline key. + source: Source configuration dict. + trigger: Trigger configuration dict. + transform: Transform configuration dict. + """ + data: dict = { + "name": name, + "key": key, + "source": source, + "trigger": trigger, + "transform": transform, + **kwargs, + } + return self.post("/v2/pipelines", data=data) + + def delete_pipeline(self, pipeline_key: str) -> APIResponse: + """Delete a pipeline by key.""" + return self.delete(f"/v2/pipelines/{pipeline_key}") + + def get_pipeline(self, pipeline_key: str) -> APIResponse: + """Get a pipeline by key.""" + return self.get(f"/v2/pipelines/{pipeline_key}") + + def update_pipeline(self, pipeline_key: str, **kwargs) -> APIResponse: + """Partially update a pipeline.""" + return self.patch(f"/v2/pipelines/{pipeline_key}", data=kwargs) + + def replace_pipeline(self, pipeline_key: str, **kwargs) -> APIResponse: + """Fully replace a pipeline definition.""" + return self.put(f"/v2/pipelines/{pipeline_key}", data=kwargs) + + # ------------------------------------------------------------------------- + # Vectara API Operations - Generation Presets + # ------------------------------------------------------------------------- + + def list_generation_presets(self, limit: int = 100) -> APIResponse: + """List generation presets available for the account.""" + return self.get("/v2/generation_presets", params={"limit": limit}) + + # ------------------------------------------------------------------------- + # Vectara API Operations - Rerankers + # ------------------------------------------------------------------------- + + def list_rerankers(self, limit: int = 100) -> APIResponse: + """List rerankers available for the account.""" + return self.get("/v2/rerankers", params={"limit": limit}) + + # ------------------------------------------------------------------------- + # Vectara API Operations - Guardrails + # ------------------------------------------------------------------------- + + def list_guardrails(self, limit: int = 100) -> APIResponse: + """List available guardrails.""" + return self.get("/v2/guardrails", params={"limit": limit}) + + # ------------------------------------------------------------------------- + # Vectara API Operations - Query History + # ------------------------------------------------------------------------- + + def list_query_histories( + self, + limit: int = 100, + corpus_key: Optional[str] = None, + **kwargs, + ) -> APIResponse: + """List query histories. + + Args: + limit: Maximum number of results. + corpus_key: Optional corpus key to filter by. + **kwargs: Additional query params (chat_id, page_key). + """ + params: dict = {"limit": limit, **kwargs} + if corpus_key is not None: + params["corpus_key"] = corpus_key + return self.get("/v2/queries", params=params) + + def get_query_history(self, query_id: str) -> APIResponse: + """Get a specific query history entry.""" + return self.get(f"/v2/queries/{query_id}") + + # ------------------------------------------------------------------------- + # File Upload + # ------------------------------------------------------------------------- + + def upload_file( + self, + corpus_key: str, + file_path: str, + metadata: Optional[dict] = None, + table_extraction_config: Optional[dict] = None, + ) -> APIResponse: + """Upload a file to a corpus via multipart form-data. + + Args: + corpus_key: Target corpus key. + file_path: Local filesystem path to the file to upload. + metadata: Optional metadata dict to attach to the document. + table_extraction_config: Optional table-extraction configuration dict. + + Returns: + :class:`APIResponse` with the upload result. + """ + import json as _json + + path = Path(file_path) + endpoint = f"/v2/corpora/{corpus_key}/upload_file" + + start_time = time.time() + + try: + with open(path, "rb") as fh: + import mimetypes + + mime_type = mimetypes.guess_type(str(path))[0] or "application/octet-stream" + files: dict = {"file": (path.name, fh, mime_type)} + if metadata is not None: + files["metadata"] = (None, _json.dumps(metadata), "application/json") + if table_extraction_config is not None: + files["table_extraction_config"] = (None, _json.dumps(table_extraction_config), "application/json") + + raw = self._request_raw( + method="POST", + endpoint=endpoint, + files=files, + ) + + elapsed_ms = (time.time() - start_time) * 1000 + + try: + response_data = raw.json() + except ValueError: + response_data = raw.text + + return APIResponse( + status_code=raw.status_code, + data=response_data, + elapsed_ms=elapsed_ms, + headers=dict(raw.headers), + ) + + except Exception as e: + elapsed_ms = (time.time() - start_time) * 1000 + self.logger.error(f"File upload error: {e}") + return APIResponse( + status_code=0, + data=None, + elapsed_ms=elapsed_ms, + error=f"File upload error: {str(e)}", + ) + + # ------------------------------------------------------------------------- + # Agent SSE Streaming + # ------------------------------------------------------------------------- + + def execute_agent_sse( + self, + agent_key: str, + session_key: str, + message: str, + ) -> requests.Response: + """Send a message to an agent session and return the raw SSE stream. + + The returned :class:`requests.Response` has ``stream=True`` so the + caller can iterate over Server-Sent Events with + :func:`utils.waiters.read_sse_events`. + + Args: + agent_key: The agent's unique key. + session_key: The session's unique key. + message: User message text. + + Returns: + Raw streaming :class:`requests.Response`. + """ + endpoint = f"/v2/agents/{agent_key}/sessions/{session_key}/events" + data = { + "type": "input_message", + "messages": [ + { + "type": "text", + "content": message, + } + ], + } + + return self._request_raw( + method="POST", + endpoint=endpoint, + data=data, + stream=True, + ) + # ------------------------------------------------------------------------- # Health Check # ------------------------------------------------------------------------- diff --git a/utils/config.py b/utils/config.py index b53fce5..d538644 100644 --- a/utils/config.py +++ b/utils/config.py @@ -64,9 +64,6 @@ def validate(self) -> tuple[bool, list[str]]: errors = [] if not self.api_key: - errors.append( - "API key is required. Set VECTARA_API_KEY environment variable " - "or provide via --api-key" - ) + errors.append("API key is required. Set VECTARA_API_KEY environment variable " "or provide via --api-key") return len(errors) == 0, errors diff --git a/utils/waiters.py b/utils/waiters.py new file mode 100644 index 0000000..d583198 --- /dev/null +++ b/utils/waiters.py @@ -0,0 +1,121 @@ +""" +Polling helpers and SSE stream parser for the Vectara API test suite. +""" + +import json +import time +from typing import Any, Callable, Iterator + +import requests + + +def wait_for( + predicate: Callable[[], Any], + timeout: float = 30, + interval: float = 1, + description: str = "condition", +) -> Any: + """Poll *predicate* until it returns a truthy value or *timeout* expires. + + Args: + predicate: A zero-argument callable. Called repeatedly until it returns + a truthy value or the timeout is reached. + timeout: Maximum wall-clock seconds to keep polling. + interval: Seconds to sleep between attempts. + description: Human-readable label used in the ``TimeoutError`` message. + + Returns: + The first truthy value returned by *predicate*. + + Raises: + TimeoutError: If *predicate* never returns a truthy value within + *timeout* seconds. The message includes *description* and the + last value returned by *predicate*. + """ + deadline = time.monotonic() + timeout + last_result = None + + while True: + last_result = predicate() + if last_result: + return last_result + + if time.monotonic() >= deadline: + raise TimeoutError(f"Timed out after {timeout}s waiting for {description}. " f"Last state: {last_result!r}") + + remaining = deadline - time.monotonic() + time.sleep(min(interval, max(remaining, 0))) + + +def read_sse_events(response: requests.Response) -> Iterator[dict]: + """Parse Server-Sent Events from a streaming ``requests.Response``. + + The response **must** have been made with ``stream=True``. Each yielded + dict contains: + + * ``event`` -- the SSE event type (empty string if none was set) + * ``data`` -- the concatenated data payload (parsed as JSON when + possible, otherwise kept as a raw string) + + Args: + response: A :class:`requests.Response` opened with ``stream=True``. + + Yields: + ``dict`` with ``event`` and ``data`` keys for every complete SSE + message in the stream. + """ + event_type = "" + data_lines: list[str] = [] + + for raw_line in response.iter_lines(decode_unicode=True): + # iter_lines strips the trailing newline; an empty string means a + # blank line, which is the SSE event delimiter. + if raw_line is None: + continue + + line: str = raw_line # already decoded + + if line == "": + # End of an event block -- emit if we collected any data lines. + if data_lines: + joined = "\n".join(data_lines) + try: + parsed = json.loads(joined) + except (json.JSONDecodeError, ValueError): + parsed = joined + + yield {"event": event_type, "data": parsed} + + # Reset for the next event. + event_type = "" + data_lines = [] + continue + + if line.startswith(":"): + # SSE comment -- ignore. + continue + + if ":" in line: + field, _, value = line.partition(":") + # Per the SSE spec, strip a single leading space from value. + if value.startswith(" "): + value = value[1:] + else: + field = line + value = "" + + if field == "event": + event_type = value + elif field == "data": + data_lines.append(value) + # Other fields (id, retry, etc.) are silently ignored. + + # Flush any trailing event that wasn't followed by a blank line. + if data_lines: + joined = "\n".join(data_lines) + try: + parsed = json.loads(joined) + except (json.JSONDecodeError, ValueError): + parsed = joined + + yield {"event": event_type, "data": parsed}