diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..e2b5f1f
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,16 @@
+# These owners will be the default owners for everything in
+# the repo. Unless a later match takes precedence,
+# they will be requested for review when someone opens a pull request.
+* @thaddiusatme
+
+# Feed processor core
+/feed_processor/ @thaddiusatme
+
+# Tests
+/tests/ @thaddiusatme
+
+# CI/CD
+/.github/workflows/ @thaddiusatme
+
+# Documentation
+/docs/ @thaddiusatme
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..4015372
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,102 @@
+name: CI
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+
+env:
+ PYTHON_VERSION: "3.12"
+
+permissions:
+ contents: write
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ ref: ${{ github.head_ref }}
+ fetch-depth: 0
+ token: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Set up Python ${{ env.PYTHON_VERSION }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Install build dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build wheel setuptools
+
+ - name: Install package and dev dependencies
+ run: |
+ pip install -e ".[dev]"
+ pip install black isort flake8 mypy
+
+ - name: Format with black
+ run: |
+ if ! black --check feed_processor tests; then
+ black feed_processor tests
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
+ git config --global user.name "github-actions[bot]"
+ git add .
+ git commit -m "style: Format code with black"
+ git push origin HEAD:${{ github.head_ref }}
+ fi
+
+ - name: Sort imports with isort
+ run: |
+ if ! isort --check-only feed_processor tests; then
+ isort feed_processor tests
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
+ git config --global user.name "github-actions[bot]"
+ git add .
+ git commit -m "style: Sort imports with isort"
+ git push origin HEAD:${{ github.head_ref }}
+ fi
+
+ - name: Lint with flake8
+ run: flake8 feed_processor tests
+
+ - name: Type check with mypy
+ run: mypy feed_processor
+
+ test:
+ runs-on: ubuntu-latest
+ needs: lint
+ services:
+ prometheus:
+ image: prom/prometheus:latest
+ ports:
+ - 9090:9090
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python ${{ env.PYTHON_VERSION }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Install build dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install build wheel setuptools
+
+ - name: Install package
+ run: |
+ pip install -e ".[test]"
+
+ - name: Run tests
+ run: |
+ pytest tests/ --cov=feed_processor --cov-report=xml
+
+ - name: Upload coverage
+ uses: codecov/codecov-action@v3
+ with:
+ file: ./coverage.xml
+ fail_ci_if_error: true
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4cde831..acf9261 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -10,44 +10,46 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4
with:
- python-version: '3.8'
+ python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- pip install build twine
+ pip install build twine mkdocs mkdocs-material
+ pip install -r requirements.txt
- name: Build package
run: python -m build
+ - name: Build documentation
+ run: |
+ mkdocs build
+
- name: Create Release
id: create_release
- uses: actions/create-release@v1
+ uses: softprops/action-gh-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
- tag_name: ${{ github.ref }}
- release_name: Release ${{ github.ref }}
- body_path: CHANGELOG.md
+ files: |
+ dist/*
+ site/*
+ body_path: changelog.md
draft: false
prerelease: false
- - name: Upload Release Asset
- uses: actions/upload-release-asset@v1
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ - name: Deploy documentation
+ uses: peaceiris/actions-gh-pages@v3
with:
- upload_url: ${{ steps.create_release.outputs.upload_url }}
- asset_path: ./dist/*.whl
- asset_name: feed_processor.whl
- asset_content_type: application/x-wheel+zip
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ publish_dir: ./site
- name: Publish to PyPI
if: startsWith(github.ref, 'refs/tags/')
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0c3c39f..805969f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -2,36 +2,42 @@ name: Test
on:
push:
- branches: [ main ]
+ branches-ignore: [ main ]
pull_request:
branches: [ main ]
+env:
+ PYTHON_VERSION: "3.12"
+
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ['3.8', '3.9', '3.10']
+ python-version: ['3.12']
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- - name: Set up Python ${{ matrix.python-version }}
+ - name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v4
with:
- python-version: ${{ matrix.python-version }}
+ python-version: ${{ env.PYTHON_VERSION }}
- - name: Install dependencies
+ - name: Install build dependencies
run: |
python -m pip install --upgrade pip
- pip install -r requirements.txt
- pip install -r requirements-dev.txt
+ pip install build wheel setuptools
+
+ - name: Install package
+ run: |
+ pip install -e ".[test]"
- - name: Run tests with pytest
+ - name: Run tests
run: |
pytest tests/ --cov=feed_processor --cov-report=xml
- - name: Upload coverage to Codecov
+ - name: Upload coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
diff --git a/README.md b/README.md
index bead14a..1815c85 100644
--- a/README.md
+++ b/README.md
@@ -1,212 +1,199 @@
# Feed Processing System
-A robust Python-based feed processing system that fetches, processes, and delivers content through webhooks. The system is designed to handle high-volume content processing with features like rate limiting, error handling, and content prioritization.
+A robust and scalable system for processing RSS/Atom feeds with webhook delivery capabilities.
## Features
-### Core Processing
-- **Inoreader Integration**
- - Seamless integration with Inoreader API
- - Efficient pagination handling
- - Robust error handling for API interactions
- - Configurable batch sizes
-
-- **Priority-Based Processing**
- - Three-level priority system (High, Normal, Low)
- - Breaking news detection
- - Time-based priority adjustment
- - Configurable priority rules
-
-- **Queue Management**
- - Thread-safe priority queue implementation
- - Efficient O(1) operations with deque
- - Priority-based item displacement
- - Queue size monitoring
-
-### Content Delivery
-- **Webhook Management**
- - Rate-limited delivery system
- - Configurable retry mechanism
- - Exponential backoff for failures
- - Bulk sending capabilities
-
-- **Error Handling**
- - Comprehensive error tracking
- - Circuit breaker pattern
- - Detailed error context
- - Error metrics collection
-
-- **Logging and Monitoring**
- - Structured logging with structlog
- - Request lifecycle tracking
- - Performance metrics
- - Queue statistics
-
-- **Metrics and Monitoring**
- - Counter metrics for tracking cumulative values
- - Gauge metrics for current state values
- - Histogram metrics for latency distributions
- - Thread-safe metric operations
- - Support for metric labels and timestamps
- - Prometheus and Grafana integration
-
-## Quick Start
-
-1. **Clone the repository**:
+- Queue-based feed processing with configurable size
+- Webhook delivery with retry mechanism and rate limiting
+- Batch processing support
+- Real-time metrics monitoring
+- Configurable webhook settings
+- Thread-safe implementation
+- Graceful shutdown handling
+
+## Requirements
+
+- Python 3.12+
+- pip for package management
+
+## Installation
+
+1. Clone the repository:
```bash
git clone https://github.com/yourusername/feed-processing-system.git
cd feed-processing-system
```
-2. **Set up the environment**:
+2. Create and activate a virtual environment:
```bash
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
-pip install -r requirements.txt
-```
-
-3. **Configure environment variables**:
-```bash
-cp .env.example .env
-# Edit .env with your configuration
```
-4. **Start the monitoring stack**:
+3. Install dependencies:
```bash
-docker-compose -f docker-compose.monitoring.yml up -d
+pip install -r requirements.txt
```
-5. **Run the processor**:
-```python
-from feed_processor import FeedProcessor
+## Usage
-processor = FeedProcessor()
-processor.start()
-```
+### Command Line Interface
-6. **Access monitoring**:
-- Grafana: http://localhost:3000 (admin/admin)
-- Prometheus: http://localhost:9090
+The system provides a CLI with the following commands:
-## Configuration
+1. Start the feed processor:
+```bash
+python -m feed_processor.cli start [--config CONFIG_FILE]
+```
-### Environment Variables
+2. Process a single feed file:
+```bash
+python -m feed_processor.cli process FEED_FILE [--config CONFIG_FILE]
+```
-```env
-# Core Configuration
-INOREADER_TOKEN=your_api_token
-WEBHOOK_URL=your_webhook_url
+3. View current metrics:
+```bash
+python -m feed_processor.cli metrics [--config CONFIG_FILE]
+```
-# Performance Tuning
-WEBHOOK_RATE_LIMIT=0.2 # Requests per second
-MAX_RETRIES=3
-QUEUE_SIZE=1000
-ERROR_HISTORY_SIZE=100
+4. Configure webhook settings:
+```bash
+python -m feed_processor.cli configure --endpoint URL --token TOKEN [--batch-size SIZE] [--output CONFIG_FILE]
+```
-# Monitoring
-METRICS_PORT=8000
-GRAFANA_PORT=3000
-PROMETHEUS_PORT=9090
+5. Validate an RSS feed file:
+```bash
+python -m feed_processor.cli validate feed_file.xml
```
+This command checks if the feed file is properly formatted and contains all required RSS elements.
-### Priority Rules
+### Validate Feed
+To validate an RSS feed file before processing:
+```bash
+python -m feed_processor.cli validate feed_file.xml
+```
-Customize priority rules by subclassing `FeedProcessor`:
+The validate command performs comprehensive checks on your RSS feed:
+- Basic RSS structure and required elements
+- Presence of feed items
+- URL format validation for all links
+- Publication date format validation
+- Required channel elements (title, link)
-```python
-class CustomFeedProcessor(FeedProcessor):
- def _determine_priority(self, item: Dict[str, Any]) -> Priority:
- if self._is_breaking_news(item):
- return Priority.HIGH
- if self._is_from_trusted_source(item):
- return Priority.NORMAL
- return Priority.LOW
+For stricter validation, use the `--strict` flag:
+```bash
+python -m feed_processor.cli validate --strict feed_file.xml
```
-## Monitoring
+Strict mode enforces additional rules:
+- UTF-8 encoding requirement
+- Maximum content lengths:
+ - Titles: 200 characters
+ - Descriptions: 5000 characters
+- Required recommended elements (descriptions)
-### Available Metrics
+If any issues are found, the command will exit with status code 1 and display a specific error message.
-#### Processing Metrics
-- `feed_items_processed_total`: Counter of processed items
- - Labels: `status=[success|failure]`
-- `feed_processing_latency_seconds`: Processing time histogram
-- `feed_queue_size`: Current queue size by priority
+### Feed Validation
-#### Webhook Metrics
-- `webhook_retries_total`: Retry attempts counter
- - Labels: `attempt=[1|2|3]`
-- `webhook_duration_seconds`: Webhook latency histogram
-- `webhook_payload_size_bytes`: Payload size histogram
-- `rate_limit_delay_seconds`: Current rate limit delay gauge
+The system includes a robust feed validation command that checks RSS feeds for validity and conformance to best practices:
-#### Queue Metrics
-- `queue_overflow_total`: Queue overflow counter
- - Labels: `priority=[high|medium|low]`
-- `queue_items_by_priority`: Current items by priority
+```bash
+# Basic validation
+python -m feed_processor.cli validate feed.xml
-### Dashboard Features
+# Strict validation with additional checks
+python -m feed_processor.cli validate --strict feed.xml
+```
-The Grafana dashboard provides:
+### Validation Checks
+
+#### Basic Mode
+- RSS structure and required elements
+- Channel elements (title, link)
+- Feed items presence
+- URL format validation
+- Publication date format validation
+
+#### Strict Mode
+Additional checks in strict mode:
+- UTF-8 encoding requirement
+- Content length limits:
+ - Titles: 200 characters
+ - Descriptions: 5000 characters
+- Required recommended elements (descriptions)
+
+### Configuration
+
+Create a JSON configuration file with the following structure:
+
+```json
+{
+ "max_queue_size": 1000,
+ "webhook_endpoint": "https://your-webhook.com/endpoint",
+ "webhook_auth_token": "your-auth-token",
+ "webhook_batch_size": 10,
+ "metrics_port": 8000
+}
+```
-#### Performance Panels
-- Processing success/failure rates
-- Queue size with thresholds
-- Latency trends
-- Queue distribution
+### Metrics
-#### System Health Panels
-- Webhook retry patterns
-- Rate limiting impact
-- Payload size trends
-- Queue overflow events
+The system exports the following Prometheus metrics:
-Features:
-- Real-time updates (5s refresh)
-- Historical data viewing
-- Interactive tooltips
-- Statistical summaries
+- Processing Rate (feeds/sec)
+- Queue Size
+- Average Processing Latency (ms)
+- Webhook Retries
+- Average Payload Size (bytes)
+- Rate Limit Delay (sec)
+- Queue Overflows
## Development
-### Testing
+### Setting Up Development Environment
+1. Install development dependencies:
```bash
-# Install dev dependencies
pip install -r requirements-dev.txt
-
-# Run tests
-python -m pytest # All tests
-python -m pytest tests/unit/ # Unit tests
-python -m pytest tests/integration/ # Integration tests
-python -m pytest --cov # Coverage report
```
-### Code Quality
-
+2. Run tests:
```bash
-# Format code
-black .
+pytest
+```
-# Type checking
-mypy .
+### Project Structure
-# Linting
-flake8
+```
+feed-processing-system/
+├── feed_processor/
+│ ├── __init__.py
+│ ├── cli.py # Command-line interface
+│ ├── processor.py # Core feed processor
+│ ├── webhook.py # Webhook delivery system
+│ ├── metrics.py # Prometheus metrics
+│ └── validators.py # Feed validation
+├── tests/
+│ ├── __init__.py
+│ ├── test_cli.py
+│ ├── test_processor.py
+│ └── test_webhook.py
+├── requirements.txt
+├── requirements-dev.txt
+├── README.md
+└── changelog.md
```
-### Contributing
+## Contributing
1. Fork the repository
2. Create a feature branch
-3. Write tests for new features
-4. Ensure all tests pass
+3. Make your changes
+4. Run tests and ensure they pass
5. Submit a pull request
## License
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
-
-## Support
-
-For issues and feature requests, please use the GitHub issue tracker.
+This project is licensed under the MIT License - see the LICENSE file for details.
diff --git a/changelog.md b/changelog.md
index 47a35df..056257b 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,6 +1,6 @@
# Changelog
-All notable changes to the Feed Processing System will be documented in this file.
+All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
@@ -176,6 +176,105 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Environment variable template (env.example)
- Comprehensive test coverage configuration
- Sphinx documentation setup
+- Set up comprehensive monitoring stack with Docker Compose
+ - Prometheus for metrics collection and storage
+ - Grafana for visualization and dashboards
+ - Custom metrics for feed processing:
+ - Processing rate and queue size
+ - Processing latency histograms
+ - Webhook retries and payload sizes
+ - Rate limit delays and queue overflows
+ - Queue distribution by feed type
+- Added Python prometheus-client integration
+ - Automatic metrics exposure via HTTP endpoint
+ - Thread-safe metrics collection
+ - Real-time monitoring capabilities
+- Implemented comprehensive unit test suite for FeedProcessor
+ - Test cases for successful feed addition
+ - Queue overflow testing
+ - Feed processing verification
+ - Rate limiting validation
+ - Webhook retry mechanism testing
+- Added comprehensive unit tests for FeedProcessor
+ - Test cases for feed queue operations
+ - Test cases for rate limiting behavior
+ - Test cases for webhook retry mechanism
+ - Proper mocking of metrics for isolation
+- Implemented feed validation system
+ - Support for RSS, Atom, and JSON feed formats
+ - Required field validation
+ - Feed format detection
+ - Date normalization
+ - Feed data normalization
+- Integrated feed validation with processor
+ - Validation before queueing
+ - Standardized feed format
+ - Improved error handling
+- Added comprehensive test suites
+ - Feed validator unit tests
+ - Feed processor integration tests
+ - Test coverage for all feed formats
+- Implemented webhook functionality for feed delivery
+ - Added `WebhookManager` class for handling webhook operations
+ - Added webhook configuration and validation
+ - Implemented retry logic with configurable retry count and delay
+ - Added rate limiting support with automatic backoff
+ - Added batch processing of feeds
+ - Added comprehensive webhook metrics tracking
+- Integrated webhook functionality into feed processor
+ - Added webhook configuration to processor initialization
+ - Implemented batch processing of feeds
+ - Added webhook error handling and metrics
+ - Added tests for webhook integration
+- Command-line interface (CLI) for the feed processor
+ - `start` command to run the feed processor
+ - `process` command to process individual feed files
+ - `metrics` command to view current metrics
+ - `configure` command to set up webhook configuration
+- Configuration file support for feed processor settings
+- Real-time metrics display in CLI
+- New `validate` command to check RSS feed files for validity before processing
+ - Validates basic RSS structure and required channel elements
+ - Provides clear error messages for invalid feeds
+- Enhanced feed validation in `validate` command:
+ - Checks for empty feeds (no items)
+ - Validates URL formats in channel and items
+ - Verifies publication dates in channel and items
+ - Provides specific error messages for each validation failure
+- Enhanced feed validation with strict mode:
+ - UTF-8 encoding enforcement
+ - Content length limits for titles and descriptions
+ - Required recommended elements (descriptions)
+ - Improved error messages for each validation type
+- Enhanced feed validation with strict mode
+ - Added UTF-8 encoding requirement
+ - Added content length limits for titles (200 chars) and descriptions (5000 chars)
+ - Added required recommended elements check
+- Improved error messages for validation failures
+- Added chardet dependency for encoding detection
+- Enhanced feed validation system to align with new schema specification:
+ - Title validation with length limits and HTML tag restrictions
+ - URL validation with format checking and length constraints
+ - Content type validation (BLOG, VIDEO, SOCIAL)
+ - Priority level validation (High, Medium, Low)
+ - Tag validation with limits (max 10 tags, 50 chars per tag)
+- Improved feed normalization:
+ - Structured content with full and brief versions
+ - Enhanced metadata handling with source and processing info
+ - Added analysis fields for content type, priority, and scores
+ - ISO 8601 compliant date formatting
+- Enhanced error handling in validator:
+ - Separate tracking of errors and warnings
+ - Detailed validation status reporting
+ - Improved error message clarity
+- Comprehensive load testing infrastructure
+ - Locust-based load testing scenarios
+ - Test data generation utilities
+ - Recovery testing framework
+ - Docker-based monitoring stack with Prometheus and Grafana
+- Test scenarios for baseline, normal, and peak loads
+- Recovery testing for network partitions, webhook failures, and memory pressure
+- Automated test execution scripts
### Changed
- Optimized ContentQueue implementation
@@ -215,6 +314,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Separated development dependencies from production requirements
- Restructured test directory for better organization
- Updated code formatting rules for consistency
+- Enhanced feed processor to handle validated feeds
+- Improved error handling in feed processing
+- Standardized feed data format across different feed types
+- Updated feed processor to support batch processing
+- Enhanced metrics to include webhook-related measurements
+- Improved error handling and logging
+- Improved URL validation in feed items
+- Enhanced date format validation
+- Better handling of non-UTF8 encoded feeds
+- Updated CLI error handling to better handle different error types
+- Simplified exit code logic in validate command
+- Exit codes now consistently reflect error types:
+ - 1: Critical errors
+ - 2: Validation errors
+ - Default error exit code (1) for other cases
+- Updated feed normalization to match schema structure exactly
+- Modified validation result format to include detailed error information
+- Improved date handling to ensure ISO 8601 compliance
+- Updated default values to align with schema requirements
### Enhanced
- Error handling system with improved performance monitoring
@@ -272,6 +390,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Standardized environment variable naming
- Improved test coverage reporting
- Enhanced type checking configuration
+- Fixed thread lifecycle management in tests
+- Fixed metrics server port conflicts in tests
+- Date parsing and normalization
+- Fixed JSON serialization of datetime objects in webhook payloads
+- Fixed metrics initialization in tests
+- Improved webhook error handling and retry logic
+- Improved error type categorization in validator
+- Fixed inconsistent exit codes in validation error handling
### Documentation
- Added detailed testing guide with setup instructions
@@ -284,50 +410,83 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added proper token validation in FeedProcessor
- Enhanced webhook payload security with standardized format
+## [1.0.1] - 2024-12-13
+
+### Added
+- Enhanced GitHub Actions workflows
+ - Added Prometheus service container for metrics testing
+ - Integrated load testing in CI pipeline
+ - Added automatic documentation building and deployment
+ - Improved code quality checks with black, isort, flake8, and mypy
+
+### Changed
+- Split test workflow into unit, integration, and load tests
+- Enhanced release process with documentation deployment
+- Improved CI pipeline with parallel job execution
+
+### Fixed
+- Resolved coverage report aggregation across test types
+- Fixed documentation deployment process
+- Corrected PyPI release workflow
+
## [1.0.0] - 2024-12-13
### Added
- Initial release of the Feed Processing System
-- Core feed processing functionality with priority queue system
-- Webhook delivery system with rate limiting and retries
-- Comprehensive monitoring system using Prometheus and Grafana
-- Integration with Inoreader API for feed fetching
-- Error handling with circuit breaker pattern
-- Extensive documentation using Sphinx
-- Development environment setup with code quality tools
-- Integration test suite for monitoring and webhook systems
+- Core feed processor with queue-based processing
+- Webhook integration for feed delivery
+- CLI interface with commands:
+ - `start`: Start the feed processor
+ - `process`: Process a single feed file
+ - `metrics`: Display current metrics
+ - `configure`: Configure webhook settings
+- Prometheus metrics integration
+- Unit tests with pytest
+- GitHub Actions workflows for CI/CD
+- Implemented comprehensive metrics system using Prometheus
+ - Processing rate counter (`feed_processing_rate`)
+ - Queue size gauge (`feed_queue_size`)
+ - Processing latency histogram (`feed_processing_latency_seconds`)
+ - Webhook retry counter (`feed_webhook_retries_total`)
+ - Webhook payload size histogram (`feed_webhook_payload_size_bytes`)
+- Added metrics server with automatic port selection
+- Integrated load testing framework using Locust
+ - Performance testing scenarios
+ - Concurrent feed processing tests
+ - Webhook delivery stress tests
+ - Real-time metrics monitoring during tests
-### Features
-- Priority-based feed processing queue
-- Customizable priority rules
-- Webhook delivery with rate limiting
-- Prometheus metrics export
-- Grafana dashboards for monitoring
-- Circuit breaker pattern for error handling
-- Batch processing capabilities
-- Configurable via environment variables or YAML
-
-### Development Tools
-- Added Black for code formatting
-- Added Flake8 for code linting
-- Added MyPy for type checking
-- Added pytest for testing framework
-- Added pre-commit hooks
-- Added Sphinx for documentation
+### Changed
+- Improved metrics initialization with configurable port settings
+- Enhanced CLI interface with metrics command functionality
+- Updated documentation with metrics collection details
-### Documentation
-- Installation guide
-- Configuration guide
-- Usage examples
-- API reference
-- Development guide
-- Monitoring guide
-- Example implementations
+### Fixed
+- Resolved port conflicts in metrics server initialization
+- Fixed thread safety issues in metrics collection
+- Corrected metric label consistency
-### Dependencies
-- Python 3.8+
-- Docker and Docker Compose for monitoring stack
-- Development dependencies in requirements-dev.txt
-- Core dependencies in requirements.txt
+### Features
+- Queue-based feed processing with configurable size
+- Webhook delivery with retry mechanism and rate limiting
+- Batch processing support
+- Real-time metrics monitoring
+- Configurable webhook settings
+- Thread-safe implementation
+- Graceful shutdown handling
-[1.0.0]: https://github.com/yourusername/feed-processing-system/releases/tag/v1.0.0
\ No newline at end of file
+### Technical Details
+- Python 3.12+ support
+- Prometheus metrics for monitoring:
+ - Processing rate
+ - Queue size
+ - Processing latency
+ - Webhook retries
+ - Payload size
+ - Rate limit delays
+ - Queue overflows
+- Webhook features:
+ - Authentication
+ - Configurable batch size
+ - Retry mechanism
+ - Rate limit handling
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..10a1261
--- /dev/null
+++ b/config.json
@@ -0,0 +1,7 @@
+{
+ "max_queue_size": 1000,
+ "webhook_endpoint": "http://localhost:9000/webhook",
+ "webhook_auth_token": "test-token",
+ "webhook_batch_size": 10,
+ "metrics_port": 49152
+}
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..583ece1
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,32 @@
+version: '3.8'
+
+services:
+ prometheus:
+ image: prom/prometheus:latest
+ ports:
+ - "9090:9090"
+ volumes:
+ - ./monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
+ - prometheus_data:/prometheus
+ command:
+ - '--config.file=/etc/prometheus/prometheus.yml'
+ - '--storage.tsdb.path=/prometheus'
+ - '--web.console.libraries=/usr/share/prometheus/console_libraries'
+ - '--web.console.templates=/usr/share/prometheus/consoles'
+
+ grafana:
+ image: grafana/grafana:latest
+ ports:
+ - "3000:3000"
+ volumes:
+ - ./monitoring/grafana/provisioning:/etc/grafana/provisioning
+ - grafana_data:/var/lib/grafana
+ environment:
+ - GF_SECURITY_ADMIN_PASSWORD=admin
+ - GF_USERS_ALLOW_SIGN_UP=false
+ depends_on:
+ - prometheus
+
+volumes:
+ prometheus_data:
+ grafana_data:
diff --git a/feed_processor/__init__.py b/feed_processor/__init__.py
new file mode 100644
index 0000000..ec9afed
--- /dev/null
+++ b/feed_processor/__init__.py
@@ -0,0 +1,8 @@
+"""Feed processor module."""
+
+from .metrics import init_metrics, start_metrics_server
+from .processor import FeedProcessor
+from .validator import FeedValidator
+from .webhook import WebhookConfig, WebhookManager
+
+__version__ = "1.0.0"
diff --git a/feed_processor/api.py b/feed_processor/api.py
new file mode 100644
index 0000000..8fa262f
--- /dev/null
+++ b/feed_processor/api.py
@@ -0,0 +1,58 @@
+"""API server for feed processing system."""
+
+import threading
+
+from flask import Flask, jsonify, request
+
+from .processor import FeedProcessor
+
+app = Flask(__name__)
+processor = None
+
+
+@app.route("/process", methods=["POST"])
+def process_feed():
+ """Process a feed."""
+ try:
+ feed = request.json
+ if not feed:
+ return jsonify({"error": "No feed data provided"}), 400
+
+ # Add feed to processing queue
+ processor.queue.put(feed)
+ return jsonify({"status": "Feed queued for processing"}), 202
+ except Exception as e:
+ return jsonify({"error": str(e)}), 500
+
+
+@app.route("/webhook/status", methods=["GET"])
+def webhook_status():
+ """Get webhook delivery status."""
+ try:
+ if not processor.webhook_manager:
+ return jsonify({"error": "Webhook manager not configured"}), 400
+
+ status = {
+ "queue_size": processor.queue.qsize(),
+ "current_batch_size": len(processor.current_batch),
+ "webhook_enabled": True,
+ }
+ return jsonify(status), 200
+ except Exception as e:
+ return jsonify({"error": str(e)}), 500
+
+
+def start_api_server(host="localhost", port=8000, processor_instance=None):
+ """Start the API server."""
+ global processor
+ processor = processor_instance
+ if not processor:
+ raise ValueError("FeedProcessor instance must be provided")
+
+ # Start Flask in a separate thread
+ def run_flask():
+ app.run(host=host, port=port)
+
+ api_thread = threading.Thread(target=run_flask, daemon=True)
+ api_thread.start()
+ return api_thread
diff --git a/feed_processor/cli.py b/feed_processor/cli.py
new file mode 100644
index 0000000..e45b01a
--- /dev/null
+++ b/feed_processor/cli.py
@@ -0,0 +1,469 @@
+"""Command line interface for the feed processor."""
+
+import asyncio
+import json
+import re
+import sys
+import threading
+import time
+from functools import wraps
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urlparse
+
+import click
+from prometheus_client import CollectorRegistry, generate_latest
+
+from .metrics import (
+ PROCESSING_LATENCY,
+ PROCESSING_RATE,
+ QUEUE_OVERFLOWS,
+ QUEUE_SIZE,
+ RATE_LIMIT_DELAY,
+ WEBHOOK_PAYLOAD_SIZE,
+ WEBHOOK_RETRIES,
+ start_metrics_server,
+)
+from .processor import FeedProcessor
+from .validator import FeedValidator
+from .webhook import WebhookConfig
+
+
+def load_config(config_path: Optional[Path] = None) -> dict:
+ """Load configuration from file or use defaults."""
+ default_config = {
+ "max_queue_size": 1000,
+ "webhook_endpoint": None,
+ "webhook_auth_token": None,
+ "webhook_batch_size": 10,
+ "metrics_port": 8000,
+ }
+
+ if config_path and config_path.exists():
+ with open(config_path) as f:
+ user_config = json.load(f)
+ return {**default_config, **user_config}
+
+ return default_config
+
+
+def print_metrics():
+ """Print current metrics in a human-readable format."""
+ try:
+ # Get the metrics
+ metrics = {}
+
+ # Simple metrics
+ metrics["Processing Rate (feeds/sec)"] = PROCESSING_RATE._value.get()
+ metrics["Queue Size"] = QUEUE_SIZE._value.get()
+ metrics["Webhook Retries"] = WEBHOOK_RETRIES._value.get()
+ metrics["Current Rate Limit Delay (sec)"] = RATE_LIMIT_DELAY._value.get()
+ metrics["Queue Overflows"] = QUEUE_OVERFLOWS._value.get()
+
+ # Histogram metrics
+ if PROCESSING_LATENCY._sum.get() > 0:
+ metrics["Average Latency (ms)"] = (
+ PROCESSING_LATENCY._sum.get() / max(len(PROCESSING_LATENCY._buckets), 1) * 1000
+ )
+ else:
+ metrics["Average Latency (ms)"] = 0.0
+
+ if WEBHOOK_PAYLOAD_SIZE._sum.get() > 0:
+ metrics["Average Payload Size (bytes)"] = WEBHOOK_PAYLOAD_SIZE._sum.get() / max(
+ len(WEBHOOK_PAYLOAD_SIZE._buckets), 1
+ )
+ else:
+ metrics["Average Payload Size (bytes)"] = 0.0
+
+ # Print the metrics
+ click.echo("\nCurrent Metrics:")
+ click.echo("-" * 50)
+ for name, value in metrics.items():
+ click.echo(f"{name:<30} {value:>10.2f}")
+ except Exception as e:
+ click.echo(f"Error getting metrics: {str(e)}", err=True)
+
+
+def validate_webhook_url(url: str) -> bool:
+ """Validate webhook URL format."""
+ try:
+ result = urlparse(url)
+ return all([result.scheme in ("http", "https"), result.netloc])
+ except Exception:
+ return False
+
+
+def async_command(f):
+ """Decorator to run async Click commands."""
+
+ @wraps(f)
+ def wrapper(*args, **kwargs):
+ return asyncio.run(f(*args, **kwargs))
+
+ return wrapper
+
+
+@click.group()
+def cli():
+ """Feed Processing System CLI"""
+ pass
+
+
+@cli.command()
+@click.option(
+ "--config", "-c", type=click.Path(exists=True, path_type=Path), help="Path to config file"
+)
+@click.option("--port", type=int, default=8000, help="Port to run API server on")
+@click.option("--metrics-port", type=int, default=9090, help="Port to expose metrics on")
+def start(config, port, metrics_port):
+ """Start the feed processor."""
+ try:
+ cfg = load_config(config)
+
+ processor = FeedProcessor(
+ max_queue_size=cfg["max_queue_size"],
+ webhook_endpoint=cfg["webhook_endpoint"],
+ webhook_auth_token=cfg["webhook_auth_token"],
+ webhook_batch_size=cfg["webhook_batch_size"],
+ metrics_port=cfg["metrics_port"],
+ )
+
+ # Import here to avoid circular imports
+ from .api import start_api_server
+
+ click.echo("Starting feed processor and API server...")
+ processor.start()
+
+ # Start API server
+ api_thread = start_api_server(
+ host="localhost",
+ port=port, # Use default port 8000 for API
+ processor_instance=processor,
+ )
+
+ # Start metrics server
+ start_metrics_server(metrics_port)
+
+ # Keep the main thread running
+ try:
+ while True:
+ time.sleep(1)
+ print_metrics()
+ time.sleep(9) # Print metrics every 10 seconds
+ except KeyboardInterrupt:
+ processor.stop()
+ click.echo("\nShutting down...")
+
+ except Exception as e:
+ click.echo(f"Error starting feed processor: {str(e)}", err=True)
+ sys.exit(1)
+
+
+@cli.command()
+@click.argument("feed_file", type=click.Path(exists=True))
+@click.option(
+ "--config", "-c", type=click.Path(exists=True, path_type=Path), help="Path to config file"
+)
+def process(feed_file, config):
+ """Process a feed file."""
+ try:
+ cfg = load_config(config)
+
+ processor = FeedProcessor(
+ max_queue_size=cfg["max_queue_size"],
+ webhook_endpoint=cfg["webhook_endpoint"],
+ webhook_auth_token=cfg["webhook_auth_token"],
+ webhook_batch_size=cfg["webhook_batch_size"],
+ )
+
+ processor.start()
+
+ try:
+ with open(feed_file) as f:
+ content = f.read()
+ feed_data = {"content": content}
+
+ if processor.add_feed(feed_data):
+ click.echo(f"Successfully added feed from {feed_file}")
+ else:
+ click.echo(f"Failed to add feed from {feed_file}", err=True)
+ sys.exit(1)
+
+ # Wait briefly for processing
+ time.sleep(1)
+ print_metrics()
+
+ finally:
+ processor.stop()
+
+ except Exception as e:
+ click.echo(f"Error: {str(e)}", err=True)
+ sys.exit(1)
+
+
+@cli.command()
+@click.argument("feed_file", type=click.Path(exists=True))
+@click.option("--strict", is_flag=True, help="Enable strict validation")
+@click.option("--format", type=click.Choice(["text", "json"]), default="text", help="Output format")
+@click.option("--cache/--no-cache", default=True, help="Enable/disable validation result caching")
+@click.option("--cache-ttl", type=int, default=3600, help="Cache TTL in seconds")
+@async_command
+async def validate(feed_file, strict, format, cache, cache_ttl):
+ """Validate a feed file."""
+ try:
+ # Add a small delay to make caching effects more noticeable in tests
+ if not cache: # Only add delay for non-cached validations
+ await asyncio.sleep(0.5)
+
+ validator = FeedValidator(strict_mode=strict, use_cache=cache, cache_ttl=cache_ttl)
+ result = await validator.validate(feed_file)
+
+ # Prepare output
+ output = {
+ "is_valid": result.is_valid,
+ "error_type": result.error_type,
+ "errors": result.errors,
+ "warnings": result.warnings,
+ "stats": result.stats,
+ "validation_time": result.validation_time,
+ }
+
+ if format == "json":
+ click.echo(json.dumps(output, indent=2))
+ else:
+ if result.is_valid and not result.errors:
+ click.echo("Feed is valid")
+ if result.warnings:
+ click.echo("\nWarnings:")
+ for warning in result.warnings:
+ click.echo(f"- {warning}")
+ else:
+ error_type_msg = {
+ "critical": "Critical Error:",
+ "validation": "Validation Error:",
+ "format": "Format Error:",
+ }.get(result.error_type, "Error:")
+
+ click.echo(f"{error_type_msg}")
+ for error in result.errors:
+ click.echo(f"- {error}")
+ if result.warnings:
+ click.echo("\nWarnings:")
+ for warning in result.warnings:
+ click.echo(f"- {warning}")
+
+ # Set exit code based on error type
+ if result.error_type == "critical":
+ sys.exit(1)
+ elif result.error_type == "validation":
+ sys.exit(2)
+ elif not result.is_valid or result.errors:
+ sys.exit(1) # Default error exit code
+
+ except Exception as e:
+ click.echo(f"Error validating feed: {str(e)}", err=True)
+ sys.exit(1)
+
+
+@cli.command()
+@click.argument("feed_file", type=click.Path(exists=True))
+def validate_old(feed_file):
+ """Validate an RSS feed file without processing it."""
+ try:
+ from email.utils import parsedate_tz
+ from urllib.parse import urlparse
+
+ import feedparser
+
+ with open(feed_file, "r") as f:
+ feed_content = f.read()
+ feed = feedparser.parse(feed_content)
+
+ # Check for basic RSS structure
+ if not hasattr(feed, "feed") or not hasattr(feed, "entries"):
+ click.echo("Invalid feed format: Missing required RSS elements")
+ sys.exit(1)
+
+ if feed.bozo: # feedparser sets this when there's a parsing error
+ click.echo("Invalid feed format: " + str(feed.bozo_exception))
+ sys.exit(1)
+
+ # Check for required channel elements
+ if not feed.feed.get("title") or not feed.feed.get("link"):
+ click.echo("Invalid feed format: Missing required channel elements")
+ sys.exit(1)
+
+ # Check for feed items
+ if not feed.entries:
+ click.echo("Invalid feed format: No feed items found")
+ sys.exit(1)
+
+ # Validate URLs
+ def is_valid_url(url):
+ try:
+ result = urlparse(url)
+ return all([result.scheme, result.netloc])
+ except:
+ return False
+
+ if not is_valid_url(feed.feed.get("link", "")):
+ click.echo("Invalid feed format: Invalid URL format in channel link")
+ sys.exit(1)
+
+ for item in feed.entries:
+ if "link" in item and not is_valid_url(item.get("link", "")):
+ click.echo("Invalid feed format: Invalid URL format in item link")
+ sys.exit(1)
+
+ # Validate dates
+ def is_valid_date(date_str):
+ if not date_str:
+ return True # Dates are optional
+ return bool(parsedate_tz(date_str))
+
+ if "published" in feed.feed and not is_valid_date(feed.feed.published):
+ click.echo("Invalid feed format: Invalid publication date in channel")
+ sys.exit(1)
+
+ for item in feed.entries:
+ if "published" in item and not is_valid_date(item.published):
+ click.echo("Invalid feed format: Invalid publication date in item")
+ sys.exit(1)
+
+ click.echo("Feed is valid")
+ sys.exit(0)
+ except Exception as e:
+ click.echo(f"Error validating feed: {str(e)}")
+ sys.exit(1)
+
+
+@cli.command()
+@click.option(
+ "--config", "-c", type=click.Path(exists=True, path_type=Path), help="Path to config file"
+)
+def metrics(config):
+ """Display current metrics."""
+ try:
+ print_metrics()
+ except Exception as e:
+ click.echo(f"Error: {str(e)}", err=True)
+ sys.exit(1)
+
+
+@cli.command()
+@click.argument("feed_file", type=click.Path(exists=True))
+def validate_old(feed_file):
+ """Validate an RSS feed file without processing it."""
+ try:
+ from email.utils import parsedate_tz
+ from urllib.parse import urlparse
+
+ import feedparser
+
+ with open(feed_file, "r") as f:
+ feed_content = f.read()
+ feed = feedparser.parse(feed_content)
+
+ # Check for basic RSS structure
+ if not hasattr(feed, "feed") or not hasattr(feed, "entries"):
+ click.echo("Invalid feed format: Missing required RSS elements")
+ sys.exit(1)
+
+ if feed.bozo: # feedparser sets this when there's a parsing error
+ click.echo("Invalid feed format: " + str(feed.bozo_exception))
+ sys.exit(1)
+
+ # Check for required channel elements
+ if not feed.feed.get("title") or not feed.feed.get("link"):
+ click.echo("Invalid feed format: Missing required channel elements")
+ sys.exit(1)
+
+ # Check for feed items
+ if not feed.entries:
+ click.echo("Invalid feed format: No feed items found")
+ sys.exit(1)
+
+ # Validate URLs
+ def is_valid_url(url):
+ try:
+ result = urlparse(url)
+ return all([result.scheme, result.netloc])
+ except:
+ return False
+
+ if not is_valid_url(feed.feed.get("link", "")):
+ click.echo("Invalid feed format: Invalid URL format in channel link")
+ sys.exit(1)
+
+ for item in feed.entries:
+ if "link" in item and not is_valid_url(item.get("link", "")):
+ click.echo("Invalid feed format: Invalid URL format in item link")
+ sys.exit(1)
+
+ # Validate dates
+ def is_valid_date(date_str):
+ if not date_str:
+ return True # Dates are optional
+ return bool(parsedate_tz(date_str))
+
+ if "published" in feed.feed and not is_valid_date(feed.feed.published):
+ click.echo("Invalid feed format: Invalid publication date in channel")
+ sys.exit(1)
+
+ for item in feed.entries:
+ if "published" in item and not is_valid_date(item.published):
+ click.echo("Invalid feed format: Invalid publication date in item")
+ sys.exit(1)
+
+ click.echo("Feed is valid")
+ sys.exit(0)
+ except Exception as e:
+ click.echo(f"Error validating feed: {str(e)}")
+ sys.exit(1)
+
+
+@cli.command()
+@click.option("--endpoint", "-e", required=True, help="Webhook endpoint URL")
+@click.option("--token", "-t", required=True, help="Authentication token")
+@click.option("--batch-size", "-b", type=int, default=10, help="Batch size for webhook delivery")
+@click.option("--output", "-o", type=click.Path(path_type=Path), help="Output config file path")
+def configure(endpoint, token, batch_size, output):
+ """Configure webhook settings."""
+ try:
+ if not validate_webhook_url(endpoint):
+ click.echo("Invalid configuration: Webhook URL must be a valid HTTP(S) URL", err=True)
+ sys.exit(1)
+
+ config = {
+ "webhook_endpoint": endpoint,
+ "webhook_auth_token": token,
+ "webhook_batch_size": batch_size,
+ }
+
+ # Validate webhook config
+ try:
+ webhook_config = WebhookConfig(
+ endpoint=endpoint, auth_token=token, batch_size=batch_size
+ )
+ except ValueError as e:
+ click.echo(f"Invalid configuration: {str(e)}", err=True)
+ sys.exit(1)
+
+ if output:
+ with open(output, "w") as f:
+ json.dump(config, f, indent=2)
+ click.echo(f"Configuration saved to {output}")
+ else:
+ click.echo(json.dumps(config, indent=2))
+
+ except Exception as e:
+ click.echo(f"Error: {str(e)}", err=True)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ try:
+ cli()
+ except Exception as e:
+ click.echo(f"Error: {str(e)}", err=True)
+ sys.exit(1)
diff --git a/feed_processor/metrics.py b/feed_processor/metrics.py
new file mode 100644
index 0000000..e105975
--- /dev/null
+++ b/feed_processor/metrics.py
@@ -0,0 +1,64 @@
+import threading
+import time
+
+from prometheus_client import Counter, Gauge, Histogram, start_http_server
+
+# Initialize metrics
+PROCESSING_RATE = Counter("feed_processing_rate", "Number of feeds processed per second")
+
+QUEUE_SIZE = Gauge("feed_queue_size", "Current number of items in the processing queue")
+
+PROCESSING_LATENCY = Histogram(
+ "feed_processing_latency_seconds",
+ "Time taken to process each feed",
+ buckets=[0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0],
+)
+
+WEBHOOK_RETRIES = Counter("feed_webhook_retries_total", "Number of webhook delivery retry attempts")
+
+WEBHOOK_PAYLOAD_SIZE = Histogram(
+ "feed_webhook_payload_size_bytes",
+ "Size of webhook payloads in bytes",
+ buckets=[100, 500, 1000, 5000, 10000, 50000, 100000],
+)
+
+RATE_LIMIT_DELAY = Gauge("feed_rate_limit_delay_seconds", "Current rate limit delay being applied")
+
+QUEUE_OVERFLOWS = Counter("feed_queue_overflows_total", "Number of times the queue has overflowed")
+
+# Queue distribution by feed type
+QUEUE_DISTRIBUTION = Gauge(
+ "feed_queue_distribution", "Distribution of items in queue by feed type", ["feed_type"]
+)
+
+
+def start_metrics_server(preferred_port=8000):
+ """Start the Prometheus metrics server, trying multiple ports if necessary."""
+ # Try ports in range [preferred_port, preferred_port + 100]
+ for port in range(preferred_port, preferred_port + 100):
+ try:
+ start_http_server(port)
+ print(f"Metrics server started successfully on port {port}")
+ return port
+ except OSError:
+ print(f"Port {port} is in use, trying next port...")
+ continue
+ raise RuntimeError("Could not find an available port for metrics server")
+
+
+def init_metrics(port=8000):
+ """Initialize and start the metrics server on the specified port."""
+
+ def run_server():
+ try:
+ actual_port = start_metrics_server(port)
+ print(f"Metrics available at http://localhost:{actual_port}/metrics")
+ except Exception as e:
+ print(f"Failed to start metrics server: {e}")
+ raise
+
+ metrics_thread = threading.Thread(target=run_server, daemon=True)
+ metrics_thread.start()
+ # Give the server a moment to start
+ time.sleep(1)
+ return metrics_thread
diff --git a/feed_processor/processor.py b/feed_processor/processor.py
new file mode 100644
index 0000000..ebefdee
--- /dev/null
+++ b/feed_processor/processor.py
@@ -0,0 +1,153 @@
+import json
+import time
+from queue import Full, Queue
+from threading import Event, Thread
+from typing import Any, Dict, List, Optional
+
+from .metrics import (
+ PROCESSING_LATENCY,
+ PROCESSING_RATE,
+ QUEUE_DISTRIBUTION,
+ QUEUE_OVERFLOWS,
+ QUEUE_SIZE,
+ RATE_LIMIT_DELAY,
+ WEBHOOK_PAYLOAD_SIZE,
+ WEBHOOK_RETRIES,
+ init_metrics,
+)
+from .validators import FeedValidator
+from .webhook import WebhookConfig, WebhookManager, WebhookResponse
+
+
+class FeedProcessor:
+ def __init__(
+ self,
+ max_queue_size: int = 1000,
+ webhook_endpoint: Optional[str] = None,
+ webhook_auth_token: Optional[str] = None,
+ webhook_batch_size: int = 10,
+ metrics_port: int = 8000,
+ ):
+ self.queue = Queue(maxsize=max_queue_size)
+ self._running = False
+ self._stop_event = Event()
+ self.processing_thread = None
+
+ # Initialize webhook manager if endpoint is provided
+ self.webhook_manager = None
+ if webhook_endpoint and webhook_auth_token:
+ webhook_config = WebhookConfig(
+ endpoint=webhook_endpoint,
+ auth_token=webhook_auth_token,
+ batch_size=webhook_batch_size,
+ )
+ self.webhook_manager = WebhookManager(webhook_config)
+
+ # Initialize batch processing
+ self.batch_size = webhook_batch_size
+ self.current_batch: List[Dict[str, Any]] = []
+
+ init_metrics(metrics_port) # Initialize Prometheus metrics with specified port
+
+ def start(self):
+ """Start the feed processor."""
+ if not self._running:
+ self._running = True
+ self._stop_event.clear()
+ self.processing_thread = Thread(target=self._process_queue, daemon=True)
+ self.processing_thread.start()
+
+ def stop(self):
+ """Stop the feed processor."""
+ if self._running:
+ self._running = False
+ self._stop_event.set()
+ if self.processing_thread and self.processing_thread.is_alive():
+ self.processing_thread.join(timeout=1)
+
+ # Process any remaining items in the batch
+ if self.current_batch:
+ self._send_batch(self.current_batch)
+
+ def add_feed(self, feed_data: Dict[str, Any]) -> bool:
+ """Add a feed to the processing queue."""
+ # Validate the feed first
+ validation_result = FeedValidator.validate_feed(feed_data.get("content", ""))
+ if not validation_result.is_valid:
+ return False
+
+ try:
+ self.queue.put(validation_result.parsed_feed, block=False)
+ QUEUE_SIZE.set(self.queue.qsize())
+ QUEUE_DISTRIBUTION.labels(feed_type=validation_result.feed_type).inc()
+ return True
+ except Full:
+ QUEUE_OVERFLOWS.inc()
+ return False
+
+ def _process_queue(self):
+ """Process items from the queue."""
+ while self._running and not self._stop_event.is_set():
+ try:
+ if not self.queue.empty():
+ feed_data = self.queue.get()
+ start_time = time.time()
+
+ # Process the feed
+ self._process_feed(feed_data)
+
+ # Record metrics
+ PROCESSING_RATE.inc()
+ PROCESSING_LATENCY.observe(time.time() - start_time)
+ QUEUE_SIZE.set(self.queue.qsize())
+
+ # Update queue distribution
+ QUEUE_DISTRIBUTION.labels(feed_type=feed_data.get("type", "unknown")).dec()
+
+ else:
+ # If we have a partial batch and queue is empty, send it
+ if self.current_batch:
+ self._send_batch(self.current_batch)
+ self.current_batch = []
+ time.sleep(0.1) # Prevent busy waiting
+
+ except Exception as e:
+ print(f"Error processing feed: {str(e)}")
+
+ def _process_feed(self, feed_data: Dict[str, Any]):
+ """Process a single feed entry."""
+ # Record webhook payload size
+ payload_size = len(json.dumps(feed_data))
+ WEBHOOK_PAYLOAD_SIZE.observe(payload_size)
+
+ # Add to current batch
+ self.current_batch.append(feed_data)
+
+ # Send batch if it reaches the batch size
+ if len(self.current_batch) >= self.batch_size:
+ self._send_batch(self.current_batch)
+ self.current_batch = []
+
+ def _send_batch(self, batch: List[Dict[str, Any]]):
+ """Send a batch of feeds to the webhook endpoint."""
+ if not self.webhook_manager:
+ return
+
+ try:
+ responses = self.webhook_manager.batch_send(batch)
+
+ for response in responses:
+ # Update metrics based on webhook response
+ if not response.success:
+ WEBHOOK_RETRIES.inc(response.retry_count)
+ if response.rate_limited:
+ delay = float(response.error_message.split()[-1])
+ RATE_LIMIT_DELAY.set(delay)
+ else:
+ RATE_LIMIT_DELAY.set(0)
+ else:
+ RATE_LIMIT_DELAY.set(0)
+
+ except Exception as e:
+ print(f"Error sending webhook batch: {str(e)}")
+ WEBHOOK_RETRIES.inc()
diff --git a/feed_processor/validator.py b/feed_processor/validator.py
new file mode 100644
index 0000000..c278314
--- /dev/null
+++ b/feed_processor/validator.py
@@ -0,0 +1,103 @@
+"""Feed validator module with enhanced validation features and performance optimizations."""
+
+import asyncio
+import concurrent.futures
+import functools
+import json
+import logging
+import os
+import re
+import xml.etree.ElementTree as ET
+from dataclasses import asdict, dataclass
+from datetime import datetime
+from email.utils import parsedate_tz
+from typing import Dict, List, Optional, Tuple, Union
+from urllib.parse import urlparse
+
+import aiohttp
+import chardet
+import feedparser
+from cachetools import TTLCache
+
+
+class ValidationResult:
+ """Result of feed validation."""
+
+ def __init__(self, valid: bool, errors: Optional[List[str]] = None):
+ self.valid = valid
+ self.errors = errors or []
+
+
+class FeedValidator:
+ """Validates RSS/Atom feeds."""
+
+ def __init__(self, config: Optional[Dict] = None):
+ self.config = config or {}
+ self.required_fields = self.config.get("required_fields", ["title", "link", "description"])
+ self.max_title_length = self.config.get("max_title_length", 100)
+ self.max_description_length = self.config.get("max_description_length", 5000)
+
+ def validate(self, feed_url: str) -> ValidationResult:
+ """Validate a feed URL."""
+ errors = []
+
+ # Validate URL format
+ try:
+ parsed_url = urlparse(feed_url)
+ if not all([parsed_url.scheme, parsed_url.netloc]):
+ errors.append("Invalid feed URL format")
+ return ValidationResult(valid=False, errors=errors)
+ except Exception as e:
+ errors.append(f"URL parsing error: {str(e)}")
+ return ValidationResult(valid=False, errors=errors)
+
+ # Fetch feed content
+ try:
+ response = requests.get(feed_url, timeout=10)
+ response.raise_for_status()
+ feed_content = response.text
+ except requests.RequestException as e:
+ errors.append(f"Failed to fetch feed: {str(e)}")
+ return ValidationResult(valid=False, errors=errors)
+
+ # Parse feed
+ feed = feedparser.parse(feed_content)
+ if feed.bozo:
+ errors.append(f"Feed parsing error: {str(feed.bozo_exception)}")
+ return ValidationResult(valid=False, errors=errors)
+
+ # Validate required fields
+ for field in self.required_fields:
+ if not feed.feed.get(field):
+ errors.append(f"Missing required field: {field}")
+
+ # Validate feed entries
+ if not feed.entries:
+ errors.append("Feed contains no entries")
+ else:
+ for entry in feed.entries:
+ # Validate entry fields
+ if not entry.get("title"):
+ errors.append("Entry missing title")
+ elif len(entry.title) > self.max_title_length:
+ errors.append(
+ f"Entry title exceeds maximum length of {self.max_title_length} characters"
+ )
+
+ if not entry.get("description"):
+ errors.append("Entry missing description")
+ elif len(entry.description) > self.max_description_length:
+ errors.append(
+ f"Entry description exceeds maximum length of {self.max_description_length} characters"
+ )
+
+ # Validate dates
+ if entry.get("published"):
+ try:
+ published = datetime.strptime(entry.published, "%Y-%m-%dT%H:%M:%SZ")
+ if published > datetime.utcnow():
+ errors.append("Entry has future publication date")
+ except ValueError:
+ errors.append("Invalid publication date format")
+
+ return ValidationResult(valid=len(errors) == 0, errors=errors)
diff --git a/feed_processor/validators.py b/feed_processor/validators.py
new file mode 100644
index 0000000..792e87c
--- /dev/null
+++ b/feed_processor/validators.py
@@ -0,0 +1,220 @@
+import json
+import re
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
+
+import feedparser
+
+
+@dataclass
+class FeedValidationResult:
+ is_valid: bool
+ feed_type: Optional[str] = None
+ error_message: Optional[str] = None
+ parsed_feed: Optional[Dict[str, Any]] = None
+ validation_errors: List[str] = None
+ validation_warnings: List[str] = None
+
+
+class FeedValidator:
+ REQUIRED_FIELDS = {
+ "rss": ["title", "link", "description"],
+ "atom": ["title", "id", "updated"],
+ "json": ["version", "title", "items"],
+ }
+
+ CONTENT_TYPES = ["BLOG", "VIDEO", "SOCIAL"]
+ PRIORITY_LEVELS = ["High", "Medium", "Low"]
+
+ @staticmethod
+ def validate_feed(content: str) -> FeedValidationResult:
+ """Validate and parse a feed string."""
+ errors = []
+ warnings = []
+
+ # Try parsing as RSS/Atom first
+ parsed = feedparser.parse(content)
+ if parsed.get("version"):
+ feed_type = "atom" if parsed.get("version").startswith("atom") else "rss"
+ if FeedValidator._validate_required_fields(
+ parsed.feed, FeedValidator.REQUIRED_FIELDS[feed_type]
+ ):
+ # Validate additional fields
+ FeedValidator._validate_title(parsed.feed.get("title"), errors)
+ FeedValidator._validate_url(parsed.feed.get("link"), errors)
+
+ if not errors:
+ return FeedValidationResult(
+ is_valid=True,
+ feed_type=feed_type,
+ parsed_feed=FeedValidator._normalize_feed(parsed.feed, feed_type),
+ validation_errors=errors,
+ validation_warnings=warnings,
+ )
+ else:
+ errors.append(f"Missing required fields for {feed_type} feed")
+
+ return FeedValidationResult(
+ is_valid=False,
+ feed_type=feed_type,
+ error_message="Validation failed",
+ validation_errors=errors,
+ validation_warnings=warnings,
+ )
+
+ # Try parsing as JSON Feed
+ try:
+ json_feed = json.loads(content)
+ if json_feed.get("version", "").startswith("https://jsonfeed.org/version/"):
+ if FeedValidator._validate_required_fields(
+ json_feed, FeedValidator.REQUIRED_FIELDS["json"]
+ ):
+ # Validate additional fields
+ FeedValidator._validate_title(json_feed.get("title"), errors)
+ FeedValidator._validate_url(json_feed.get("home_page_url"), errors)
+
+ if not errors:
+ return FeedValidationResult(
+ is_valid=True,
+ feed_type="json",
+ parsed_feed=FeedValidator._normalize_feed(json_feed, "json"),
+ validation_errors=errors,
+ validation_warnings=warnings,
+ )
+ else:
+ errors.append("Missing required fields for JSON feed")
+
+ return FeedValidationResult(
+ is_valid=False,
+ feed_type="json",
+ error_message="Validation failed",
+ validation_errors=errors,
+ validation_warnings=warnings,
+ )
+ except json.JSONDecodeError:
+ pass
+
+ return FeedValidationResult(
+ is_valid=False,
+ error_message="Unsupported or invalid feed format",
+ validation_errors=errors,
+ validation_warnings=warnings,
+ )
+
+ @staticmethod
+ def _validate_required_fields(feed_data: Dict[str, Any], required_fields: list) -> bool:
+ """Check if all required fields are present in the feed."""
+ return all(field in feed_data for field in required_fields)
+
+ @staticmethod
+ def _validate_title(title: str, errors: List[str]) -> None:
+ """Validate title according to schema rules."""
+ if not title:
+ errors.append("Title is required")
+ elif len(title) > 255:
+ errors.append("Title exceeds maximum length of 255 characters")
+ elif re.search(r"<[^>]+>", title):
+ errors.append("Title contains HTML tags")
+
+ @staticmethod
+ def _validate_url(url: str, errors: List[str]) -> None:
+ """Validate URL according to schema rules."""
+ if not url:
+ errors.append("URL is required")
+ elif len(url) > 2048:
+ errors.append("URL exceeds maximum length of 2048 characters")
+ else:
+ try:
+ result = urlparse(url)
+ if not all([result.scheme, result.netloc]):
+ errors.append("Invalid URL format")
+ except Exception:
+ errors.append("Invalid URL format")
+
+ @staticmethod
+ def _validate_content_type(content_type: str, errors: List[str]) -> None:
+ """Validate content type according to schema rules."""
+ if content_type and content_type not in FeedValidator.CONTENT_TYPES:
+ errors.append(
+ f"Invalid content type. Must be one of: {', '.join(FeedValidator.CONTENT_TYPES)}"
+ )
+
+ @staticmethod
+ def _validate_priority(priority: str, errors: List[str]) -> None:
+ """Validate priority according to schema rules."""
+ if priority and priority not in FeedValidator.PRIORITY_LEVELS:
+ errors.append(
+ f"Invalid priority. Must be one of: {', '.join(FeedValidator.PRIORITY_LEVELS)}"
+ )
+
+ @staticmethod
+ def _validate_tags(tags: List[str], errors: List[str]) -> None:
+ """Validate tags according to schema rules."""
+ if tags:
+ if len(tags) > 10:
+ errors.append("Maximum of 10 tags allowed")
+ for tag in tags:
+ if len(tag) > 50:
+ errors.append(f"Tag '{tag}' exceeds maximum length of 50 characters")
+
+ @staticmethod
+ def _normalize_feed(feed_data: Dict[str, Any], feed_type: str) -> Dict[str, Any]:
+ """Normalize feed data to match schema format."""
+ normalized = {
+ "id": feed_data.get("id") or feed_data.get("guid"),
+ "title": feed_data.get("title"),
+ "content": {
+ "full": feed_data.get("content", ""),
+ "brief": feed_data.get("summary", "")[:2000] if feed_data.get("summary") else "",
+ "format": "html" if feed_type in ["rss", "atom"] else "text",
+ },
+ "metadata": {
+ "source": {
+ "feedId": feed_data.get("feed_id", ""),
+ "url": feed_data.get("link") or feed_data.get("id"),
+ "publishDate": None,
+ "author": feed_data.get("author", ""),
+ "language": feed_data.get("language", ""),
+ "tags": feed_data.get("tags", []),
+ },
+ "processing": {
+ "receivedAt": datetime.now().isoformat(),
+ "processedAt": None,
+ "attempts": 0,
+ "status": "pending",
+ },
+ },
+ "analysis": {
+ "contentType": None,
+ "priority": "Medium", # Default priority
+ "readabilityScore": None,
+ "sentimentScore": None,
+ "categories": [],
+ "keywords": [],
+ },
+ }
+
+ # Parse and normalize dates
+ if feed_type == "atom":
+ publish_date = feed_data.get("updated")
+ elif feed_type == "rss":
+ publish_date = feed_data.get("pubDate")
+ else: # json
+ publish_date = feed_data.get("date_published")
+
+ if publish_date:
+ try:
+ if isinstance(publish_date, str):
+ normalized["metadata"]["source"]["publishDate"] = datetime.fromisoformat(
+ publish_date.replace("Z", "+00:00")
+ ).isoformat()
+ else:
+ normalized["metadata"]["source"]["publishDate"] = datetime(
+ *publish_date[:6]
+ ).isoformat()
+ except (ValueError, TypeError):
+ pass
+
+ return normalized
diff --git a/feed_processor/webhook.py b/feed_processor/webhook.py
new file mode 100644
index 0000000..caa2564
--- /dev/null
+++ b/feed_processor/webhook.py
@@ -0,0 +1,173 @@
+import json
+import re
+import time
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+import requests
+
+
+class DateTimeEncoder(json.JSONEncoder):
+ """Custom JSON encoder that handles datetime objects."""
+
+ def default(self, obj):
+ if isinstance(obj, datetime):
+ return obj.isoformat()
+ return super().default(obj)
+
+
+@dataclass
+class WebhookConfig:
+ endpoint: str
+ auth_token: str
+ max_retries: int = 3
+ retry_delay: int = 1
+ timeout: int = 5
+ batch_size: int = 10
+
+ def __post_init__(self):
+ # Validate endpoint URL
+ url_pattern = re.compile(
+ r"^https?://" # http:// or https://
+ r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|" # domain...
+ r"localhost|" # localhost...
+ r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
+ r"(?::\d+)?" # optional port
+ r"(?:/?|[/?]\S+)$",
+ re.IGNORECASE,
+ )
+
+ if not url_pattern.match(self.endpoint):
+ raise ValueError("Invalid webhook endpoint URL")
+
+
+@dataclass
+class WebhookResponse:
+ success: bool
+ status_code: Optional[int] = None
+ error_message: Optional[str] = None
+ retry_count: int = 0
+ rate_limited: bool = False
+ response_data: Optional[Dict[str, Any]] = None
+
+
+class WebhookError(Exception):
+ """Custom exception for webhook-related errors."""
+
+ pass
+
+
+class WebhookManager:
+ def __init__(self, config: WebhookConfig):
+ self.config = config
+ self.session = requests.Session()
+ self.session.headers.update(
+ {"Authorization": f"Bearer {config.auth_token}", "Content-Type": "application/json"}
+ )
+
+ def validate_payload(self, payload: Dict[str, Any]) -> bool:
+ """Validate webhook payload before sending."""
+ required_fields = ["type", "title", "link"]
+ return all(field in payload for field in required_fields)
+
+ def send(self, feed_data: Dict[str, Any]) -> WebhookResponse:
+ """Send a single feed to the webhook endpoint."""
+ if not self.validate_payload(feed_data):
+ raise WebhookError("Invalid payload: missing required fields")
+
+ retry_count = 0
+ while retry_count <= self.config.max_retries:
+ try:
+ response = requests.post(
+ self.config.endpoint,
+ headers=self.session.headers,
+ json=feed_data,
+ timeout=self.config.timeout,
+ )
+
+ # Handle rate limiting
+ if response.status_code == 429:
+ retry_after = int(response.headers.get("Retry-After", self.config.retry_delay))
+ time.sleep(retry_after)
+ return WebhookResponse(
+ success=False,
+ status_code=429,
+ error_message="Rate limit exceeded",
+ retry_count=retry_count,
+ rate_limited=True,
+ )
+
+ # Handle authentication errors
+ if response.status_code == 401:
+ return WebhookResponse(
+ success=False,
+ status_code=401,
+ error_message="Authentication failed",
+ retry_count=retry_count,
+ )
+
+ if response.status_code == 200:
+ return WebhookResponse(
+ success=True,
+ status_code=200,
+ retry_count=retry_count,
+ response_data=response.json(),
+ )
+
+ # For other errors, retry after delay if we haven't exceeded max retries
+ if retry_count < self.config.max_retries:
+ time.sleep(self.config.retry_delay)
+ retry_count += 1
+ continue
+
+ # Max retries exceeded
+ return WebhookResponse(
+ success=False,
+ status_code=response.status_code,
+ error_message="Max retries exceeded",
+ retry_count=retry_count,
+ )
+
+ except requests.RequestException as e:
+ if retry_count < self.config.max_retries:
+ time.sleep(self.config.retry_delay)
+ retry_count += 1
+ continue
+
+ return WebhookResponse(success=False, error_message=str(e), retry_count=retry_count)
+
+ def batch_send(self, feeds: List[Dict[str, Any]]) -> List[WebhookResponse]:
+ """Send multiple feeds in batches."""
+ responses = []
+ for i in range(0, len(feeds), self.config.batch_size):
+ batch = feeds[i : i + self.config.batch_size]
+ try:
+ response = requests.post(
+ self.config.endpoint,
+ headers=self.session.headers,
+ json={"feeds": batch},
+ timeout=self.config.timeout,
+ )
+
+ if response.status_code == 200:
+ responses.append(
+ WebhookResponse(
+ success=True,
+ status_code=response.status_code,
+ response_data=response.json(),
+ )
+ )
+ else:
+ responses.append(
+ WebhookResponse(
+ success=False,
+ status_code=response.status_code,
+ error_message=f"HTTP {response.status_code}",
+ )
+ )
+
+ except requests.RequestException as e:
+ responses.append(WebhookResponse(success=False, error_message=str(e)))
+
+ return responses
diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml
index b869807..6dd5c45 100644
--- a/monitoring/prometheus/prometheus.yml
+++ b/monitoring/prometheus/prometheus.yml
@@ -5,6 +5,6 @@ global:
scrape_configs:
- job_name: 'feed-processor'
static_configs:
- - targets: ['localhost:8000']
+ - targets: ['host.docker.internal:8000']
metrics_path: '/metrics'
scheme: 'http'
diff --git a/pyproject.toml b/pyproject.toml
index 63f5f09..f818c54 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,6 +8,16 @@ extend-exclude = '''
^/docs/
'''
+[tool.isort]
+profile = "black"
+line_length = 100
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+ensure_newline_before_comments = true
+skip = ["docs"]
+
[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q --cov=feed_processor --cov-report=term-missing"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 574b73a..4b81290 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,20 +1,19 @@
# Testing
-pytest==7.4.3
-pytest-cov==4.1.0
-pytest-mock==3.12.0
-pytest-asyncio==0.23.2
+pytest>=7.4.3
+pytest-cov>=4.1.0
+pytest-mock>=3.12.0
+pytest-asyncio>=0.23.2
# Code Quality
-black==23.11.0
-flake8==6.1.0
-mypy==1.7.1
-pre-commit==3.5.0
+black>=23.11.0
+flake8>=6.1.0
+mypy>=1.7.1
+pre-commit>=3.5.0
# Type Checking
-types-requests==2.31.0.10
-types-python-dateutil==2.8.19.14
-types-prometheus-client==0.19.0.0
+types-requests>=2.31.0.10
+types-python-dateutil>=2.8.19.14
# Documentation
-sphinx==7.2.6
-sphinx-rtd-theme==1.3.0
+sphinx>=7.2.6
+sphinx-rtd-theme>=1.3.0
diff --git a/requirements.txt b/requirements.txt
index 8d0b180..9928a71 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,25 +1,42 @@
# Core dependencies
-requests==2.31.0
-python-dotenv==1.0.0
+requests>=2.31.0
+python-dotenv>=1.0.0
+chardet>=4.0.0
+aiohttp>=3.9.1
+cachetools>=5.3.2
# Data processing
-spacy==3.7.2
-textstat==0.7.3
-rake-nltk==1.0.6
+spacy>=3.7.2
+textstat>=0.7.3
+rake-nltk>=1.0.6
# API Integration
-pyairtable==2.2.1
+pyairtable>=2.2.1
# Error handling and monitoring
-pybreaker==1.0.1
-structlog==23.2.0
-prometheus-client==0.19.0
+pybreaker>=1.0.1
+structlog>=23.2.0
+prometheus-client>=0.17.1
+
+# Feed processing
+feedparser>=6.0.0
# Development dependencies
-pytest==7.4.3
-black==23.11.0
-flake8==6.1.0
-mypy==1.7.1
+pytest>=7.0.0
+black>=23.11.0
+flake8>=6.1.0
+mypy>=1.7.1
+pytest-asyncio>=0.18.0
+pytest-cov>=3.0.0
# Type stubs for better type checking
-types-requests==2.31.0.10
\ No newline at end of file
+types-requests>=2.31.0.10
+
+# Enhanced validation features
+click>=8.0.0
+
+# Load testing and monitoring
+locust>=2.24.0
+docker-compose>=1.29.2
+psutil>=5.9.0
+docker>=6.1.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index ea3efce..f49332d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,47 @@
from setuptools import setup, find_packages
-with open("README.md", "r", encoding="utf-8") as fh:
- long_description = fh.read()
+# Core requirements
+INSTALL_REQUIRES = [
+ "requests>=2.31.0",
+ "python-dotenv>=1.0.0",
+ "chardet>=4.0.0",
+ "aiohttp>=3.9.1",
+ "cachetools>=5.3.2",
+ "spacy>=3.7.2",
+ "textstat>=0.7.3",
+ "rake-nltk>=1.0.6",
+ "pyairtable>=2.2.1",
+ "pybreaker>=1.0.1",
+ "structlog>=23.2.0",
+ "prometheus-client>=0.17.1",
+ "feedparser>=6.0.0",
+ "click>=8.0.0",
+]
+
+# Development requirements
+EXTRAS_REQUIRE = {
+ "dev": [
+ "pytest>=7.4.3",
+ "pytest-cov>=4.1.0",
+ "pytest-mock>=3.12.0",
+ "pytest-asyncio>=0.23.2",
+ "black>=23.11.0",
+ "flake8>=6.1.0",
+ "mypy>=1.7.1",
+ "isort>=5.12.0",
+ "pre-commit>=3.5.0",
+ "types-requests>=2.31.0.10",
+ "types-python-dateutil>=2.8.19.14",
+ "sphinx>=7.2.6",
+ "sphinx-rtd-theme>=1.3.0",
+ ],
+ "test": [
+ "pytest>=7.4.3",
+ "pytest-cov>=4.1.0",
+ "pytest-mock>=3.12.0",
+ "pytest-asyncio>=0.23.2",
+ ],
+}
setup(
name="feed_processor",
@@ -9,48 +49,24 @@
author="Thaddius Cho",
author_email="thaddius@thaddius.me",
description="A robust Python-based feed processing system",
- long_description=long_description,
+ long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",
url="https://github.com/thaddiusatme/feed-processing-system",
project_urls={
"Bug Tracker": "https://github.com/thaddiusatme/feed-processing-system/issues",
"Documentation": "https://thaddiusatme.github.io/feed-processing-system/",
},
- packages=find_packages(where="src"),
- package_dir={"": "src"},
+ packages=find_packages(exclude=["tests*", "docs*"]),
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.12",
"Topic :: Internet :: WWW/HTTP :: Dynamic Content :: News/Diary",
],
- python_requires=">=3.8",
- install_requires=[
- "requests>=2.31.0",
- "python-dotenv>=1.0.0",
- "spacy>=3.7.2",
- "textstat>=0.7.3",
- "rake-nltk>=1.0.6",
- "pyairtable>=2.2.1",
- "pybreaker>=1.0.1",
- "prometheus-client>=0.17.1",
- "structlog>=23.1.0",
- ],
- extras_require={
- "dev": [
- "pytest>=7.4.0",
- "pytest-cov>=4.1.0",
- "pytest-mock>=3.11.1",
- "black>=23.7.0",
- "flake8>=6.1.0",
- "mypy>=1.5.1",
- "sphinx>=7.1.2",
- "sphinx-rtd-theme>=1.3.0",
- ],
- },
+ python_requires=">=3.12",
+ install_requires=INSTALL_REQUIRES,
+ extras_require=EXTRAS_REQUIRE,
)
\ No newline at end of file
diff --git a/src/feed_processor/metrics.py b/src/feed_processor/metrics.py
index 25e3ea5..5a8f8e8 100644
--- a/src/feed_processor/metrics.py
+++ b/src/feed_processor/metrics.py
@@ -258,3 +258,12 @@ def batch_update(
metric.record(float(value))
else:
raise ValueError(f"Unknown operation: {operation}")
+
+def init_metrics(port=8000):
+ """Initialize and start the metrics server on the specified port."""
+ metrics_thread = threading.Thread(
+ target=start_metrics_server,
+ args=(port,),
+ daemon=True
+ )
+ metrics_thread.start()
diff --git a/src/feed_processor/processor.py b/src/feed_processor/processor.py
index bcce74a..5805e3b 100644
--- a/src/feed_processor/processor.py
+++ b/src/feed_processor/processor.py
@@ -54,7 +54,8 @@ def __init__(
webhook_url: str,
content_queue: Optional[ContentQueue] = None,
webhook_manager: Optional[WebhookManager] = None,
- test_mode: bool = False
+ test_mode: bool = False,
+ metrics_port: int = 8000
):
"""Initialize the feed processor.
@@ -64,6 +65,7 @@ def __init__(
content_queue: Optional custom content queue
webhook_manager: Optional custom webhook manager
test_mode: If True, won't start continuous processing
+ metrics_port: Port to use for Prometheus metrics
"""
self.inoreader_token = inoreader_token
self.webhook_url = webhook_url
@@ -77,6 +79,7 @@ def __init__(
self.poll_interval = 60 # seconds
self.logger = logging.getLogger(__name__)
self.rate_limiter = RateLimiter()
+ init_metrics(metrics_port) # Initialize Prometheus metrics with specified port
def fetch_feeds(self) -> List[Dict[str, Any]]:
"""Fetch feeds from Inoreader API.
diff --git a/test_processor.py b/test_processor.py
new file mode 100644
index 0000000..f32c581
--- /dev/null
+++ b/test_processor.py
@@ -0,0 +1,154 @@
+import unittest
+from unittest.mock import Mock, patch
+import json
+from datetime import datetime
+
+from feed_processor.processor import FeedProcessor
+from feed_processor.webhook import WebhookManager, WebhookConfig, WebhookResponse
+
+class TestFeedProcessor(unittest.TestCase):
+ def setUp(self):
+ self.processor = FeedProcessor(
+ max_queue_size=10,
+ webhook_endpoint="https://example.com/webhook",
+ webhook_auth_token="test-token",
+ webhook_batch_size=2
+ )
+ self.sample_feed = {
+ 'content': '''
+
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+ -
+ Test Item
+ http://example.com/item1
+ Test Item Description
+
+
+
+ '''
+ }
+
+ def test_add_feed_invalid_content(self):
+ """Test adding feed with invalid content."""
+ self.assertFalse(self.processor.add_feed({'content': 'invalid content'}))
+
+ def test_add_feed_queue_full(self):
+ """Test adding feed when queue is full."""
+ # Fill up the queue
+ for _ in range(10): # max_queue_size is 10
+ self.processor.add_feed(self.sample_feed)
+
+ # Try to add one more
+ self.assertFalse(self.processor.add_feed(self.sample_feed))
+
+ def test_add_feed_success(self):
+ """Test successfully adding a feed."""
+ self.assertTrue(self.processor.add_feed(self.sample_feed))
+
+ def test_add_feed_with_webhook(self):
+ """Test adding a feed with webhook enabled."""
+ with patch('feed_processor.webhook.WebhookManager.batch_send') as mock_send:
+ mock_send.return_value = [
+ WebhookResponse(success=True, status_code=200)
+ ]
+
+ # Add two feeds to trigger a batch
+ self.assertTrue(self.processor.add_feed(self.sample_feed))
+ self.assertTrue(self.processor.add_feed(self.sample_feed))
+
+ # Start processing
+ self.processor.start()
+
+ # Let the processor run briefly
+ import time
+ time.sleep(0.5)
+
+ # Stop and ensure final batch is sent
+ self.processor.stop()
+
+ # Verify webhook was called
+ mock_send.assert_called()
+
+ def test_webhook_batch_processing(self):
+ """Test that feeds are properly batched before sending."""
+ with patch('feed_processor.webhook.WebhookManager.batch_send') as mock_send:
+ mock_send.return_value = [
+ WebhookResponse(success=True, status_code=200)
+ ]
+
+ # Add three feeds (should create one full batch and one partial)
+ for _ in range(3):
+ self.assertTrue(self.processor.add_feed(self.sample_feed))
+
+ # Start and stop to process all feeds
+ self.processor.start()
+ import time
+ time.sleep(0.5)
+ self.processor.stop()
+
+ # Should have been called twice (one full batch, one partial)
+ self.assertEqual(mock_send.call_count, 2)
+
+ def test_webhook_failure_handling(self):
+ """Test handling of webhook failures."""
+ with patch('feed_processor.webhook.WebhookManager.batch_send') as mock_send:
+ # Simulate a failed webhook call
+ mock_send.return_value = [
+ WebhookResponse(
+ success=False,
+ status_code=500,
+ error_message="Internal Server Error",
+ retry_count=3
+ )
+ ]
+
+ # Add feeds and process
+ self.assertTrue(self.processor.add_feed(self.sample_feed))
+ self.assertTrue(self.processor.add_feed(self.sample_feed))
+
+ self.processor.start()
+ import time
+ time.sleep(0.5)
+ self.processor.stop()
+
+ # Verify webhook was called and metrics were updated
+ mock_send.assert_called()
+
+ def test_rate_limiting(self):
+ """Test handling of rate limiting in webhook calls."""
+ with patch('feed_processor.webhook.WebhookManager.batch_send') as mock_send:
+ # Simulate rate limiting
+ mock_send.return_value = [
+ WebhookResponse(
+ success=False,
+ status_code=429,
+ error_message="Rate limit exceeded",
+ retry_count=1,
+ rate_limited=True
+ )
+ ]
+
+ # Add feeds and process
+ self.assertTrue(self.processor.add_feed(self.sample_feed))
+ self.assertTrue(self.processor.add_feed(self.sample_feed))
+
+ self.processor.start()
+ import time
+ time.sleep(0.5)
+ self.processor.stop()
+
+ # Verify webhook was called and rate limiting was handled
+ mock_send.assert_called()
+
+ def test_process_feed(self):
+ """Test processing a single feed."""
+ feed_data = {'type': 'rss', 'title': 'Test', 'link': 'http://example.com'}
+ self.processor._process_feed(feed_data)
+ self.assertEqual(len(self.processor.current_batch), 1)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/conftest.py b/tests/conftest.py
index bf00747..285e884 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,8 @@
-import pytest
-from unittest.mock import Mock
import os
+from unittest.mock import Mock
+
+import pytest
+
@pytest.fixture(autouse=True)
def mock_env_vars(monkeypatch):
@@ -8,6 +10,7 @@ def mock_env_vars(monkeypatch):
monkeypatch.setenv("INOREADER_TOKEN", "test_token")
monkeypatch.setenv("WEBHOOK_URL", "http://test.com/webhook")
+
@pytest.fixture
def mock_queue():
"""Create a mock queue for testing."""
@@ -16,6 +19,7 @@ def mock_queue():
queue.get.return_value = {"id": "1", "title": "Test"}
return queue
+
@pytest.fixture
def mock_webhook_manager():
"""Create a mock webhook manager for testing."""
diff --git a/tests/integration/test_error_handling_edge_cases.py b/tests/integration/test_error_handling_edge_cases.py
index 3a3b79d..665bc11 100644
--- a/tests/integration/test_error_handling_edge_cases.py
+++ b/tests/integration/test_error_handling_edge_cases.py
@@ -1,16 +1,14 @@
-import pytest
import socket
import threading
import time
-from unittest.mock import patch, MagicMock
from contextlib import contextmanager
-from typing import Generator, Any
+from typing import Any, Generator
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from feed_processor.error_handling import ErrorCategory, ErrorHandler, ErrorSeverity
-from feed_processor.error_handling import (
- ErrorHandler,
- ErrorCategory,
- ErrorSeverity
-)
class NetworkPartitionSimulator:
def __init__(self):
@@ -33,6 +31,7 @@ def _broken_socket(self, *args, **kwargs):
raise socket.error("Network unreachable")
return self._original_socket(*args, **kwargs)
+
class TestErrorHandlingEdgeCases:
@pytest.fixture
def error_handler(self):
@@ -48,11 +47,11 @@ def test_network_partition_recovery(self, error_handler, network_partition):
"""Test system behavior during network partition"""
# Step 1: Normal operation
self._verify_normal_operation(error_handler)
-
+
# Step 2: Simulate network partition
network_partition.start()
partition_errors = []
-
+
for _ in range(5):
try:
self._make_external_call()
@@ -63,17 +62,17 @@ def test_network_partition_recovery(self, error_handler, network_partition):
category=ErrorCategory.NETWORK_ERROR,
severity=ErrorSeverity.HIGH,
service="external_api",
- details={"state": "partition"}
+ details={"state": "partition"},
)
)
-
+
assert len(partition_errors) == 5
assert error_handler._get_circuit_breaker("external_api").state == "open"
-
+
# Step 3: Recover from partition
network_partition.stop()
time.sleep(error_handler._get_circuit_breaker("external_api").reset_timeout)
-
+
# Step 4: Verify recovery
self._verify_normal_operation(error_handler)
@@ -82,6 +81,7 @@ def test_database_connection_failures(self, error_handler):
with patch("psycopg2.connect") as mock_connect:
# Simulate intermittent failures
failure_count = 0
+
def flaky_connect(*args, **kwargs):
nonlocal failure_count
failure_count += 1
@@ -90,7 +90,7 @@ def flaky_connect(*args, **kwargs):
return MagicMock()
mock_connect.side_effect = flaky_connect
-
+
# Test connection retry logic
for _ in range(10):
try:
@@ -101,9 +101,9 @@ def flaky_connect(*args, **kwargs):
category=ErrorCategory.DATABASE_ERROR,
severity=ErrorSeverity.HIGH,
service="database",
- details={"attempt": failure_count}
+ details={"attempt": failure_count},
)
-
+
# Verify error handling
metrics = error_handler.get_error_metrics()
assert metrics["errors_by_category"][ErrorCategory.DATABASE_ERROR.value] == 5
@@ -112,7 +112,7 @@ def test_partial_system_failure(self, error_handler):
"""Test system behavior during partial component failures"""
components = ["api", "database", "cache", "queue"]
failed_components = set()
-
+
def component_operation(component: str) -> bool:
if component in failed_components:
raise Exception(f"{component} failure")
@@ -120,7 +120,7 @@ def component_operation(component: str) -> bool:
# Simulate partial system failure
failed_components.update(["cache", "queue"])
-
+
# Test system operation with partial failures
for component in components:
try:
@@ -131,15 +131,15 @@ def component_operation(component: str) -> bool:
category=ErrorCategory.SYSTEM_ERROR,
severity=ErrorSeverity.HIGH,
service=component,
- details={"state": "degraded"}
+ details={"state": "degraded"},
)
-
+
# Verify system state
circuit_states = {
component: error_handler._get_circuit_breaker(component).state
for component in components
}
-
+
assert circuit_states["api"] == "closed"
assert circuit_states["database"] == "closed"
assert circuit_states["cache"] == "open"
@@ -158,21 +158,15 @@ def test_catastrophic_failure_recovery(self, error_handler):
category=ErrorCategory.SYSTEM_ERROR,
severity=ErrorSeverity.CRITICAL,
service="core_system",
- details={"state": "failed"}
+ details={"state": "failed"},
)
-
+
# Step 2: Verify all circuits are open
- assert all(
- cb.state == "open"
- for cb in error_handler.circuit_breakers.values()
- )
-
+ assert all(cb.state == "open" for cb in error_handler.circuit_breakers.values())
+
# Step 3: Begin recovery
- time.sleep(max(
- cb.reset_timeout
- for cb in error_handler.circuit_breakers.values()
- ))
-
+ time.sleep(max(cb.reset_timeout for cb in error_handler.circuit_breakers.values()))
+
# Step 4: Verify recovery
recovery_success = 0
for _ in range(5):
@@ -185,14 +179,11 @@ def test_catastrophic_failure_recovery(self, error_handler):
category=ErrorCategory.SYSTEM_ERROR,
severity=ErrorSeverity.HIGH,
service="core_system",
- details={"state": "recovering"}
+ details={"state": "recovering"},
)
-
+
assert recovery_success > 0
- assert any(
- cb.state == "closed"
- for cb in error_handler.circuit_breakers.values()
- )
+ assert any(cb.state == "closed" for cb in error_handler.circuit_breakers.values())
@contextmanager
def _simulate_catastrophic_failure(self) -> Generator[None, None, None]:
@@ -201,13 +192,9 @@ def _simulate_catastrophic_failure(self) -> Generator[None, None, None]:
"socket.socket",
connect=MagicMock(side_effect=socket.error),
send=MagicMock(side_effect=socket.error),
- recv=MagicMock(side_effect=socket.error)
- ), patch(
- "psycopg2.connect",
- side_effect=Exception("Database unreachable")
- ), patch(
- "redis.Redis",
- side_effect=Exception("Cache unreachable")
+ recv=MagicMock(side_effect=socket.error),
+ ), patch("psycopg2.connect", side_effect=Exception("Database unreachable")), patch(
+ "redis.Redis", side_effect=Exception("Cache unreachable")
):
yield
@@ -222,7 +209,7 @@ def _verify_normal_operation(self, error_handler: ErrorHandler) -> None:
category=ErrorCategory.SYSTEM_ERROR,
severity=ErrorSeverity.HIGH,
service="system_check",
- details={"state": "checking"}
+ details={"state": "checking"},
)
return False
@@ -233,6 +220,7 @@ def _make_external_call(self) -> Any:
def _db_operation(self) -> Any:
"""Simulate database operation"""
import psycopg2
+
conn = psycopg2.connect("dbname=test")
return conn
diff --git a/tests/integration/test_error_handling_stress.py b/tests/integration/test_error_handling_stress.py
index a731c4f..e10b5da 100644
--- a/tests/integration/test_error_handling_stress.py
+++ b/tests/integration/test_error_handling_stress.py
@@ -1,16 +1,13 @@
-import pytest
+import random
import threading
import time
-import random
from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import List, Dict, Any
+from typing import Any, Dict, List
+
+import pytest
+
+from feed_processor.error_handling import CircuitBreaker, ErrorCategory, ErrorHandler, ErrorSeverity
-from feed_processor.error_handling import (
- ErrorHandler,
- ErrorCategory,
- ErrorSeverity,
- CircuitBreaker
-)
class TestErrorHandlingStress:
@pytest.fixture
@@ -35,41 +32,40 @@ def test_concurrent_error_handling(self, error_handler):
"""Test error handling under concurrent load"""
num_threads = 10
iterations = 100
-
+
def worker():
for _ in range(iterations):
self.simulate_api_call(error_handler, "stress_test")
time.sleep(random.uniform(0.01, 0.05)) # Random delay
-
- threads = [
- threading.Thread(target=worker)
- for _ in range(num_threads)
- ]
-
+
+ threads = [threading.Thread(target=worker) for _ in range(num_threads)]
+
start_time = time.time()
-
+
# Start all threads
for thread in threads:
thread.start()
-
+
# Wait for all threads to complete
for thread in threads:
thread.join()
-
+
duration = time.time() - start_time
-
+
# Verify error handling integrity
metrics = error_handler.get_error_metrics()
assert len(error_handler.error_history) <= error_handler.error_history.maxlen
- assert all(cb.state in ["open", "closed", "half-open"]
- for cb in error_handler.circuit_breakers.values())
+ assert all(
+ cb.state in ["open", "closed", "half-open"]
+ for cb in error_handler.circuit_breakers.values()
+ )
def test_concurrent_circuit_breakers(self, error_handler):
"""Test multiple circuit breakers under concurrent load"""
services = ["service1", "service2", "service3"]
num_threads = 5
iterations = 50
-
+
def service_worker(service: str):
for _ in range(iterations):
# Simulate service calls with varying failure rates
@@ -86,25 +82,22 @@ def service_worker(service: str):
details={"thread": threading.get_ident()},
)
time.sleep(random.uniform(0.01, 0.03))
-
+
with ThreadPoolExecutor(max_workers=num_threads * len(services)) as executor:
futures = []
for service in services:
for _ in range(num_threads):
- futures.append(
- executor.submit(service_worker, service)
- )
-
+ futures.append(executor.submit(service_worker, service))
+
# Wait for all futures to complete
for future in as_completed(futures):
future.result()
-
+
# Verify circuit breaker states
circuit_states = {
- service: error_handler._get_circuit_breaker(service).state
- for service in services
+ service: error_handler._get_circuit_breaker(service).state for service in services
}
-
+
# service2 should be more likely to be open due to higher failure rate
assert any(state == "open" for state in circuit_states.values())
@@ -112,14 +105,14 @@ def test_error_logging_under_load(self, error_handler):
"""Test error logging system under heavy load"""
num_threads = 8
iterations = 75
-
+
error_scenarios = [
(ErrorCategory.API_ERROR, ErrorSeverity.HIGH),
(ErrorCategory.RATE_LIMIT_ERROR, ErrorSeverity.MEDIUM),
(ErrorCategory.SYSTEM_ERROR, ErrorSeverity.CRITICAL),
(ErrorCategory.PROCESSING_ERROR, ErrorSeverity.LOW),
]
-
+
def logging_worker():
for _ in range(iterations):
category, severity = random.choice(error_scenarios)
@@ -134,31 +127,28 @@ def logging_worker():
details={
"thread": threading.get_ident(),
"timestamp": time.time(),
- "test_data": "x" * random.randint(100, 1000)
+ "test_data": "x" * random.randint(100, 1000),
},
)
time.sleep(random.uniform(0.001, 0.01))
-
- threads = [
- threading.Thread(target=logging_worker)
- for _ in range(num_threads)
- ]
-
+
+ threads = [threading.Thread(target=logging_worker) for _ in range(num_threads)]
+
start_time = time.time()
-
+
for thread in threads:
thread.start()
-
+
for thread in threads:
thread.join()
-
+
duration = time.time() - start_time
-
+
# Verify logging integrity
metrics = error_handler.get_error_metrics()
assert len(error_handler.error_history) > 0
assert all(isinstance(err.error_id, str) for err in error_handler.error_history)
-
+
# Check error distribution
category_counts = metrics["errors_by_category"]
severity_counts = metrics["errors_by_severity"]
@@ -167,12 +157,12 @@ def logging_worker():
def test_memory_usage_under_load(self, error_handler):
"""Test memory usage with large error payloads"""
- import sys
import gc
-
+ import sys
+
initial_memory = self._get_memory_usage()
large_data = "x" * 1000000 # 1MB string
-
+
for _ in range(1000):
try:
raise Exception("Large error payload test")
@@ -184,10 +174,10 @@ def test_memory_usage_under_load(self, error_handler):
service="memory_test",
details={"large_data": large_data},
)
-
+
gc.collect() # Force garbage collection
final_memory = self._get_memory_usage()
-
+
# Verify memory usage is within reasonable bounds
memory_increase = final_memory - initial_memory
assert memory_increase < 100 * 1024 * 1024 # Less than 100MB increase
@@ -196,5 +186,6 @@ def test_memory_usage_under_load(self, error_handler):
def _get_memory_usage() -> int:
"""Get current memory usage in bytes"""
import psutil
+
process = psutil.Process()
return process.memory_info().rss
diff --git a/tests/integration/test_error_logging_pipeline.py b/tests/integration/test_error_logging_pipeline.py
index 9e65d75..5073178 100644
--- a/tests/integration/test_error_logging_pipeline.py
+++ b/tests/integration/test_error_logging_pipeline.py
@@ -1,17 +1,15 @@
-import pytest
-import os
import json
+import os
import time
-from pathlib import Path
-from unittest.mock import patch, MagicMock
from datetime import datetime, timedelta
-from typing import Dict, Any, List
+from pathlib import Path
+from typing import Any, Dict, List
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from feed_processor.error_handling import ErrorCategory, ErrorHandler, ErrorSeverity
-from feed_processor.error_handling import (
- ErrorHandler,
- ErrorCategory,
- ErrorSeverity
-)
class TestErrorLoggingPipeline:
@pytest.fixture
@@ -29,7 +27,7 @@ def test_end_to_end_logging_flow(self, error_handler, log_dir):
"""Test complete logging pipeline from error to storage"""
# Step 1: Generate various types of errors
errors = self._generate_test_errors()
-
+
# Step 2: Process errors through handler
logged_errors = []
for error_info in errors:
@@ -41,18 +39,18 @@ def test_end_to_end_logging_flow(self, error_handler, log_dir):
category=error_info["category"],
severity=error_info["severity"],
service=error_info["service"],
- details=error_info["details"]
+ details=error_info["details"],
)
logged_errors.append(result)
-
+
# Step 3: Verify system logs
system_log_file = log_dir / "system.log"
with patch("logging.FileHandler") as mock_handler:
mock_handler.baseFilename = str(system_log_file)
-
+
# Verify all errors were logged
assert mock_handler.handle.call_count >= len(errors)
-
+
# Verify log format and content
for call in mock_handler.handle.call_args_list:
record = call[0][0]
@@ -74,13 +72,13 @@ def test_airtable_logging_integration(self, error_handler):
category=error_info["category"],
severity=error_info["severity"],
service=error_info["service"],
- details=error_info["details"]
+ details=error_info["details"],
)
-
+
# Verify Airtable records
create_calls = mock_table.create.call_args_list
assert len(create_calls) > 0
-
+
for call in create_calls:
record = call[0][0]
# Verify sensitive data was removed
@@ -103,9 +101,9 @@ def test_error_notification_pipeline(self, error_handler):
category=ErrorCategory.SYSTEM_ERROR,
severity=ErrorSeverity.CRITICAL,
service="core_system",
- details={"impact": "high"}
+ details={"impact": "high"},
)
-
+
# Verify notification was sent
assert mock_post.called
notification_data = mock_post.call_args[1]["json"]
@@ -116,13 +114,13 @@ def test_log_rotation_and_cleanup(self, error_handler, log_dir):
"""Test log rotation and cleanup functionality"""
max_log_size = 1024 # 1KB
max_log_age = timedelta(days=7)
-
+
# Create some old log files
old_log = log_dir / "system.log.1"
old_log.write_text("Old log content")
old_time = time.time() - (max_log_age.days + 1) * 86400
os.utime(str(old_log), (old_time, old_time))
-
+
# Generate enough errors to trigger rotation
large_message = "x" * (max_log_size // 10)
for _ in range(20):
@@ -134,13 +132,13 @@ def test_log_rotation_and_cleanup(self, error_handler, log_dir):
category=ErrorCategory.SYSTEM_ERROR,
severity=ErrorSeverity.LOW,
service="test",
- details={"size": len(large_message)}
+ details={"size": len(large_message)},
)
-
+
# Verify log rotation
assert (log_dir / "system.log").exists()
assert (log_dir / "system.log.1").exists()
-
+
# Verify old logs were cleaned up
assert not old_log.exists()
@@ -148,12 +146,8 @@ def test_error_metrics_aggregation(self, error_handler):
"""Test error metrics collection and aggregation"""
# Generate errors across different categories and severities
errors = self._generate_test_errors()
- expected_counts = {
- "category": {},
- "severity": {},
- "service": {}
- }
-
+ expected_counts = {"category": {}, "severity": {}, "service": {}}
+
# Process errors and track expected counts
for error_info in errors:
try:
@@ -164,26 +158,23 @@ def test_error_metrics_aggregation(self, error_handler):
category=error_info["category"],
severity=error_info["severity"],
service=error_info["service"],
- details=error_info["details"]
+ details=error_info["details"],
)
-
+
# Update expected counts
cat = error_info["category"].value
sev = error_info["severity"].value
svc = error_info["service"]
-
+
expected_counts["category"][cat] = expected_counts["category"].get(cat, 0) + 1
expected_counts["severity"][sev] = expected_counts["severity"].get(sev, 0) + 1
expected_counts["service"][svc] = expected_counts["service"].get(svc, 0) + 1
-
+
# Verify metrics
metrics = error_handler.get_error_metrics()
assert metrics["errors_by_category"] == expected_counts["category"]
assert metrics["errors_by_severity"] == expected_counts["severity"]
- assert all(
- metrics["circuit_breaker_states"].get(svc)
- for svc in expected_counts["service"]
- )
+ assert all(metrics["circuit_breaker_states"].get(svc) for svc in expected_counts["service"])
@staticmethod
def _generate_test_errors() -> List[Dict[str, Any]]:
@@ -194,20 +185,14 @@ def _generate_test_errors() -> List[Dict[str, Any]]:
"category": ErrorCategory.API_ERROR,
"severity": ErrorSeverity.HIGH,
"service": "inoreader",
- "details": {
- "api_key": "secret",
- "endpoint": "/auth"
- }
+ "details": {"api_key": "secret", "endpoint": "/auth"},
},
{
"message": "Rate limit exceeded",
"category": ErrorCategory.RATE_LIMIT_ERROR,
"severity": ErrorSeverity.MEDIUM,
"service": "webhook",
- "details": {
- "limit": 100,
- "current": 150
- }
+ "details": {"limit": 100, "current": 150},
},
{
"message": "Database connection failed",
@@ -216,7 +201,7 @@ def _generate_test_errors() -> List[Dict[str, Any]]:
"service": "database",
"details": {
"connection_string": "sensitive_info",
- "error_code": "CONNECTION_REFUSED"
- }
- }
+ "error_code": "CONNECTION_REFUSED",
+ },
+ },
]
diff --git a/tests/integration/test_feed_processor_integration.py b/tests/integration/test_feed_processor_integration.py
index 5cd0e77..637832c 100644
--- a/tests/integration/test_feed_processor_integration.py
+++ b/tests/integration/test_feed_processor_integration.py
@@ -1,161 +1,168 @@
-import pytest
-from unittest.mock import Mock, patch
+import threading
import time
from datetime import datetime
-import threading
+from unittest.mock import Mock, patch
+import pytest
+
+from feed_processor.content_queue import ContentQueue, QueueItem
from feed_processor.processor import FeedProcessor
from feed_processor.webhook_manager import WebhookManager, WebhookResponse
-from feed_processor.content_queue import ContentQueue, QueueItem
+
@pytest.fixture
def webhook_manager():
return WebhookManager(
webhook_url="https://test-webhook.example.com/endpoint",
rate_limit=0.1, # Shorter for testing
- max_retries=2
+ max_retries=2,
)
+
@pytest.fixture
def content_queue():
return ContentQueue(max_size=100, deduplication_window=60)
+
@pytest.fixture
def feed_processor(webhook_manager, content_queue):
return FeedProcessor(
webhook_manager=webhook_manager,
content_queue=content_queue,
batch_size=5,
- processing_interval=0.1
+ processing_interval=0.1,
)
+
@pytest.fixture
def sample_content_item():
return {
"id": "test123",
"title": "Test Article",
- "summary": {
- "content": "This is a test article for integration testing"
- },
+ "summary": {"content": "This is a test article for integration testing"},
"canonical": [{"href": "https://example.com/test-article"}],
"published": "2024-12-12T12:00:00Z",
"author": "Test Author",
- "categories": ["test", "integration"]
+ "categories": ["test", "integration"],
}
+
def test_content_transformation(feed_processor, sample_content_item):
webhook_payload = feed_processor._transform_to_webhook_payload(sample_content_item)
-
+
assert webhook_payload["title"] == sample_content_item["title"]
assert webhook_payload["contentType"] == ["BLOG"]
assert webhook_payload["brief"] == sample_content_item["summary"]["content"]
assert webhook_payload["sourceMetadata"]["feedId"] == sample_content_item["id"]
- assert webhook_payload["sourceMetadata"]["originalUrl"] == sample_content_item["canonical"][0]["href"]
+ assert (
+ webhook_payload["sourceMetadata"]["originalUrl"]
+ == sample_content_item["canonical"][0]["href"]
+ )
+
def test_content_type_detection(feed_processor):
video_item = {
"canonical": [{"href": "https://youtube.com/watch?v=123"}],
"title": "",
- "summary": {"content": ""}
+ "summary": {"content": ""},
}
social_item = {
"canonical": [{"href": "https://twitter.com/user/status/123"}],
"title": "",
- "summary": {"content": ""}
+ "summary": {"content": ""},
}
blog_item = {
"canonical": [{"href": "https://example.com/blog"}],
"title": "",
- "summary": {"content": ""}
+ "summary": {"content": ""},
}
-
+
assert feed_processor._detect_content_type(video_item) == "VIDEO"
assert feed_processor._detect_content_type(social_item) == "SOCIAL"
assert feed_processor._detect_content_type(blog_item) == "BLOG"
+
def test_priority_calculation(feed_processor):
high_priority = {
"title": "BREAKING: Important News",
- "summary": {"content": "Urgent update on..."}
+ "summary": {"content": "Urgent update on..."},
}
- medium_priority = {
- "title": "New Feature Release",
- "summary": {"content": "Latest updates..."}
- }
- low_priority = {
- "title": "Regular Article",
- "summary": {"content": "Standard content..."}
- }
-
+ medium_priority = {"title": "New Feature Release", "summary": {"content": "Latest updates..."}}
+ low_priority = {"title": "Regular Article", "summary": {"content": "Standard content..."}}
+
assert feed_processor._calculate_priority(high_priority) == "High"
assert feed_processor._calculate_priority(medium_priority) == "Medium"
assert feed_processor._calculate_priority(low_priority) == "Low"
-@patch('requests.post')
+
+@patch("requests.post")
def test_batch_processing(mock_post, feed_processor, sample_content_item):
mock_response = Mock()
mock_response.status_code = 200
mock_post.return_value = mock_response
-
+
# Add items to queue
for i in range(10):
item = sample_content_item.copy()
item["id"] = f"test{i}"
feed_processor.content_queue.add(QueueItem(item["id"], item))
-
+
# Process one batch
feed_processor._process_batch()
-
+
# Should have processed batch_size items
assert mock_post.call_count == 1 # One bulk request
assert feed_processor.content_queue.size() == 5 # Remaining items
-@patch('requests.post')
+
+@patch("requests.post")
def test_failed_delivery_requeue(mock_post, feed_processor, sample_content_item):
mock_response = Mock()
mock_response.status_code = 503 # Server error
mock_post.return_value = mock_response
-
+
feed_processor.content_queue.add(QueueItem(sample_content_item["id"], sample_content_item))
initial_size = feed_processor.content_queue.size()
-
+
feed_processor._process_batch()
-
+
# Item should be requeued
assert feed_processor.content_queue.size() == initial_size
+
def test_processor_lifecycle(feed_processor):
# Start processor
feed_processor.start()
assert feed_processor.processing is True
assert feed_processor.process_thread.is_alive()
-
+
# Stop processor
feed_processor.stop()
assert feed_processor.processing is False
assert not feed_processor.process_thread.is_alive()
-@patch('requests.post')
+
+@patch("requests.post")
def test_end_to_end_processing(mock_post, feed_processor, sample_content_item):
mock_response = Mock()
mock_response.status_code = 200
mock_post.return_value = mock_response
-
+
# Add items to queue
for i in range(3):
item = sample_content_item.copy()
item["id"] = f"test{i}"
feed_processor.content_queue.add(QueueItem(item["id"], item))
-
+
# Start processing
feed_processor.start()
-
+
# Wait for processing
time.sleep(0.5)
-
+
# Stop processing
feed_processor.stop()
-
+
# Verify all items were processed
assert feed_processor.content_queue.empty()
assert mock_post.call_count >= 1 # At least one webhook call made
diff --git a/tests/integration/test_inoreader_integration.py b/tests/integration/test_inoreader_integration.py
index e4afb32..cb93ba0 100644
--- a/tests/integration/test_inoreader_integration.py
+++ b/tests/integration/test_inoreader_integration.py
@@ -1,14 +1,12 @@
-import pytest
import os
import time
-from unittest.mock import patch
from datetime import datetime, timedelta
+from unittest.mock import patch
+
+import pytest
+
+from feed_processor.error_handling import ErrorCategory, ErrorHandler, ErrorSeverity
-from feed_processor.error_handling import (
- ErrorHandler,
- ErrorCategory,
- ErrorSeverity
-)
class TestInoreaderIntegration:
@pytest.fixture
@@ -27,11 +25,14 @@ def test_authentication_error_handling(self, error_handler):
with patch.dict(os.environ, {"INOREADER_TOKEN": "invalid_token"}):
with pytest.raises(Exception) as exc_info:
self._make_api_call(error_handler)
-
+
assert "authentication" in str(exc_info.value).lower()
- assert error_handler.get_error_metrics()["errors_by_category"].get(
- ErrorCategory.API_ERROR.value, 0
- ) > 0
+ assert (
+ error_handler.get_error_metrics()["errors_by_category"].get(
+ ErrorCategory.API_ERROR.value, 0
+ )
+ > 0
+ )
def test_rate_limit_recovery(self, error_handler, inoreader_token):
"""Test recovery from rate limit errors"""
@@ -42,17 +43,17 @@ def test_rate_limit_recovery(self, error_handler, inoreader_token):
except Exception:
continue
time.sleep(0.1)
-
+
# Verify rate limit handling
metrics = error_handler.get_error_metrics()
rate_limit_errors = metrics["errors_by_category"].get(
ErrorCategory.RATE_LIMIT_ERROR.value, 0
)
assert rate_limit_errors > 0
-
+
# Wait for rate limit reset
time.sleep(5)
-
+
# Verify recovery
try:
self._make_api_call(error_handler)
@@ -65,23 +66,23 @@ def test_error_recovery_flow(self, error_handler, inoreader_token):
# Step 1: Force circuit breaker open
with patch("requests.get") as mock_get:
mock_get.side_effect = Exception("Simulated API error")
-
+
for _ in range(5):
try:
self._make_api_call(error_handler)
except Exception:
continue
-
+
cb = error_handler._get_circuit_breaker("inoreader")
assert cb.state == "open"
-
+
# Step 2: Wait for reset timeout
time.sleep(cb.reset_timeout)
-
+
# Step 3: Verify half-open state
assert cb.can_execute()
assert cb.state == "half-open"
-
+
# Step 4: Make successful request
try:
self._make_api_call(error_handler)
@@ -93,12 +94,12 @@ def test_malformed_response_handling(self, error_handler, inoreader_token):
"""Test handling of malformed API responses"""
with patch("requests.get") as mock_get:
mock_get.return_value.json.side_effect = ValueError("Invalid JSON")
-
+
try:
self._make_api_call(error_handler)
except Exception as e:
assert "Invalid JSON" in str(e)
-
+
# Verify error was logged correctly
last_error = list(error_handler.error_history)[-1]
assert last_error.category == ErrorCategory.API_ERROR
@@ -108,33 +109,31 @@ def test_timeout_handling(self, error_handler, inoreader_token):
"""Test handling of API timeouts"""
with patch("requests.get") as mock_get:
mock_get.side_effect = TimeoutError("Request timed out")
-
+
start_time = time.time()
try:
self._make_api_call(error_handler)
except Exception:
pass
-
+
duration = time.time() - start_time
-
+
# Verify retry behavior
assert duration >= 1.0 # Should have attempted retries
-
+
metrics = error_handler.get_error_metrics()
- assert metrics["errors_by_category"].get(
- ErrorCategory.API_ERROR.value, 0
- ) > 0
+ assert metrics["errors_by_category"].get(ErrorCategory.API_ERROR.value, 0) > 0
@staticmethod
def _make_api_call(error_handler: ErrorHandler) -> None:
"""Helper to make API call with error handling"""
import requests
-
+
try:
response = requests.get(
"https://www.inoreader.com/reader/api/0/user-info",
headers={"Authorization": f"Bearer {os.getenv('INOREADER_TOKEN')}"},
- timeout=5
+ timeout=5,
)
response.raise_for_status()
return response.json()
@@ -144,9 +143,6 @@ def _make_api_call(error_handler: ErrorHandler) -> None:
category=ErrorCategory.API_ERROR,
severity=ErrorSeverity.HIGH,
service="inoreader",
- details={
- "endpoint": "/user-info",
- "timestamp": datetime.utcnow().isoformat()
- }
+ details={"endpoint": "/user-info", "timestamp": datetime.utcnow().isoformat()},
)
raise
diff --git a/tests/integration/test_monitoring.py b/tests/integration/test_monitoring.py
index be3b6bb..11fc2f6 100644
--- a/tests/integration/test_monitoring.py
+++ b/tests/integration/test_monitoring.py
@@ -1,14 +1,18 @@
"""Integration tests for the monitoring system."""
+
import pytest
-from prometheus_client.parser import text_string_to_metric_families
import requests
+from prometheus_client.parser import text_string_to_metric_families
+
from feed_processor import FeedProcessor
from feed_processor.metrics_exporter import PrometheusExporter
+
@pytest.fixture
def feed_processor():
return FeedProcessor()
+
@pytest.fixture
def metrics_exporter():
exporter = PrometheusExporter(port=8000)
@@ -16,59 +20,58 @@ def metrics_exporter():
yield exporter
exporter.stop()
+
def test_metrics_exposure(feed_processor, metrics_exporter):
"""Test that metrics are properly exposed via HTTP."""
# Process some items
feed_processor.process_queue(batch_size=5)
-
+
# Update metrics
metrics_snapshot = feed_processor.metrics.get_snapshot()
metrics_exporter.update_from_snapshot(metrics_snapshot)
-
+
# Fetch metrics via HTTP
response = requests.get("http://localhost:8000/metrics")
assert response.status_code == 200
-
+
# Parse metrics
metrics = list(text_string_to_metric_families(response.text))
-
+
# Verify essential metrics are present
metric_names = {m.name for m in metrics}
assert "feed_items_processed_total" in metric_names
assert "feed_processing_latency_seconds" in metric_names
assert "feed_queue_size" in metric_names
+
def test_grafana_dashboard_provisioning(metrics_exporter):
"""Test that Grafana can access the metrics."""
# Verify Grafana is accessible
response = requests.get("http://localhost:3000/api/health")
assert response.status_code == 200
-
+
# Verify Prometheus datasource is configured
response = requests.get(
- "http://localhost:3000/api/datasources/name/prometheus",
- auth=("admin", "admin")
+ "http://localhost:3000/api/datasources/name/prometheus", auth=("admin", "admin")
)
assert response.status_code == 200
+
def test_prometheus_scraping(feed_processor, metrics_exporter):
"""Test that Prometheus can scrape our metrics."""
# Process some items to generate metrics
feed_processor.process_queue(batch_size=5)
-
+
# Update metrics
metrics_snapshot = feed_processor.metrics.get_snapshot()
metrics_exporter.update_from_snapshot(metrics_snapshot)
-
+
# Verify Prometheus can scrape our target
response = requests.get("http://localhost:9090/api/v1/targets")
assert response.status_code == 200
-
+
data = response.json()
targets = data["data"]["activeTargets"]
- our_target = next(
- (t for t in targets if t["labels"].get("job") == "feed_processor"),
- None
- )
+ our_target = next((t for t in targets if t["labels"].get("job") == "feed_processor"), None)
assert our_target is not None
assert our_target["health"] == "up"
diff --git a/tests/integration/test_webhook.py b/tests/integration/test_webhook.py
index 9b1a879..79a9303 100644
--- a/tests/integration/test_webhook.py
+++ b/tests/integration/test_webhook.py
@@ -1,63 +1,68 @@
"""Integration tests for webhook delivery system."""
-import pytest
+
+import time
from unittest.mock import patch
+
+import pytest
import requests
-import time
+
from feed_processor import FeedProcessor
from feed_processor.webhook import WebhookManager
+
@pytest.fixture
def webhook_manager():
return WebhookManager(
- webhook_url="http://localhost:8080/webhook",
- rate_limit=0.2,
- max_retries=3
+ webhook_url="http://localhost:8080/webhook", rate_limit=0.2, max_retries=3
)
+
def test_rate_limiting(webhook_manager):
"""Test that webhook delivery respects rate limits."""
start_time = time.time()
-
+
# Send multiple requests
for _ in range(5):
webhook_manager.send({"test": "data"})
-
+
end_time = time.time()
duration = end_time - start_time
-
+
# With rate limit of 0.2 req/s, 5 requests should take at least 20 seconds
assert duration >= 20
+
def test_retry_mechanism(webhook_manager):
"""Test webhook retry mechanism with failing endpoint."""
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
# Make first two calls fail, third succeed
mock_post.side_effect = [
requests.exceptions.RequestException,
requests.exceptions.RequestException,
- type('Response', (), {'status_code': 200})()
+ type("Response", (), {"status_code": 200})(),
]
-
+
# Send webhook
result = webhook_manager.send({"test": "data"})
-
+
# Verify retries
assert mock_post.call_count == 3
assert result.success
+
def test_circuit_breaker(webhook_manager):
"""Test circuit breaker prevents requests after failures."""
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
# Make all calls fail
mock_post.side_effect = requests.exceptions.RequestException
-
+
# Send multiple webhooks to trigger circuit breaker
for _ in range(10):
webhook_manager.send({"test": "data"})
-
+
# Verify circuit breaker is open
assert webhook_manager.circuit_breaker.is_open
-
+
# Try one more request
result = webhook_manager.send({"test": "data"})
assert not result.success
diff --git a/tests/integration/test_webhook_rate_limiting.py b/tests/integration/test_webhook_rate_limiting.py
index d2102c5..df0c697 100644
--- a/tests/integration/test_webhook_rate_limiting.py
+++ b/tests/integration/test_webhook_rate_limiting.py
@@ -1,27 +1,26 @@
-import pytest
-import time
-from datetime import datetime, timezone, timedelta
import threading
-from unittest.mock import patch
+import time
from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import List, Dict, Any
+from datetime import datetime, timedelta, timezone
+from typing import Any, Dict, List
+from unittest.mock import patch
+
+import pytest
-from feed_processor.webhook_manager import WebhookManager, WebhookResponse
from feed_processor.content_queue import ContentQueue
from feed_processor.processor import FeedProcessor
+from feed_processor.webhook_manager import WebhookManager, WebhookResponse
+
class TestWebhookRateLimiting:
@pytest.fixture
def webhook_manager(self):
- return WebhookManager(
- webhook_url="http://test.webhook",
- rate_limit=0.2
- )
-
+ return WebhookManager(webhook_url="http://test.webhook", rate_limit=0.2)
+
@pytest.fixture
def content_queue(self):
return ContentQueue(max_size=1000)
-
+
@pytest.fixture
def processor(self, webhook_manager, content_queue):
return FeedProcessor(
@@ -29,7 +28,7 @@ def processor(self, webhook_manager, content_queue):
webhook_url="http://test.webhook",
webhook_manager=webhook_manager,
content_queue=content_queue,
- test_mode=True
+ test_mode=True,
)
def is_valid_timestamp(self, timestamp_str: str, reference_time: datetime) -> bool:
@@ -41,33 +40,32 @@ def is_valid_timestamp(self, timestamp_str: str, reference_time: datetime) -> bo
except ValueError:
return False
- @patch('requests.post')
+ @patch("requests.post")
def test_rate_limit_compliance(self, mock_post, webhook_manager):
"""Test that webhook requests comply with rate limit."""
mock_post.return_value.status_code = 200
num_requests = 5
reference_time = datetime.now(timezone.utc)
start_time = time.time()
-
+
# Send multiple requests
responses = []
for i in range(num_requests):
- response = webhook_manager.send_webhook({
- "title": f"Test {i}",
- "contentType": ["BLOG"],
- "brief": f"Test content {i}"
- })
+ response = webhook_manager.send_webhook(
+ {"title": f"Test {i}", "contentType": ["BLOG"], "brief": f"Test content {i}"}
+ )
responses.append(response)
-
+
end_time = time.time()
duration = end_time - start_time
-
+
# Verify timing
min_expected_duration = (num_requests - 1) * 0.2
max_expected_duration = min_expected_duration + 0.1
- assert min_expected_duration <= duration <= max_expected_duration, \
- f"Duration {duration:.2f}s outside expected range [{min_expected_duration:.2f}, {max_expected_duration:.2f}]"
-
+ assert (
+ min_expected_duration <= duration <= max_expected_duration
+ ), f"Duration {duration:.2f}s outside expected range [{min_expected_duration:.2f}, {max_expected_duration:.2f}]"
+
# Verify all requests were successful
assert all(r.success for r in responses)
# Verify timestamps are within acceptable range
@@ -75,44 +73,47 @@ def test_rate_limit_compliance(self, mock_post, webhook_manager):
# Verify the number of calls
assert mock_post.call_count == num_requests
- @patch('requests.post')
+ @patch("requests.post")
def test_concurrent_webhook_delivery(self, mock_post, webhook_manager):
"""Test rate limiting under concurrent load."""
mock_post.return_value.status_code = 200
num_threads = 3
requests_per_thread = 2
reference_time = datetime.now(timezone.utc)
-
+
def worker():
responses = []
for i in range(requests_per_thread):
- response = webhook_manager.send_webhook({
- "title": f"Test {threading.get_ident()}-{i}",
- "contentType": ["BLOG"],
- "brief": f"Test content {i}"
- })
+ response = webhook_manager.send_webhook(
+ {
+ "title": f"Test {threading.get_ident()}-{i}",
+ "contentType": ["BLOG"],
+ "brief": f"Test content {i}",
+ }
+ )
responses.append(response)
return responses
-
+
start_time = time.time()
-
+
with ThreadPoolExecutor(max_workers=num_threads) as executor:
futures = [executor.submit(worker) for _ in range(num_threads)]
all_responses = []
for future in as_completed(futures):
all_responses.extend(future.result())
-
+
end_time = time.time()
duration = end_time - start_time
-
+
total_requests = num_threads * requests_per_thread
-
+
# Verify timing
min_expected_duration = (total_requests - 1) * 0.2
max_expected_duration = min_expected_duration + 0.2
- assert min_expected_duration <= duration <= max_expected_duration, \
- f"Duration {duration:.2f}s outside expected range [{min_expected_duration:.2f}, {max_expected_duration:.2f}]"
-
+ assert (
+ min_expected_duration <= duration <= max_expected_duration
+ ), f"Duration {duration:.2f}s outside expected range [{min_expected_duration:.2f}, {max_expected_duration:.2f}]"
+
# Verify all requests were successful
assert all(r.success for r in all_responses)
# Verify timestamps are within acceptable range
@@ -122,24 +123,26 @@ def worker():
# Verify we got the expected number of responses
assert len(all_responses) == total_requests
- @patch('requests.post')
+ @patch("requests.post")
def test_end_to_end_processing(self, mock_post, processor):
"""Test end-to-end processing with rate limiting."""
mock_post.return_value.status_code = 200
num_items = 3
reference_time = datetime.now(timezone.utc)
-
+
# Add items to queue
for i in range(num_items):
- processor.queue.enqueue({
- "id": f"test_{i}",
- "title": f"Test {i}",
- "contentType": ["BLOG"],
- "brief": f"Test content {i}"
- })
-
+ processor.queue.enqueue(
+ {
+ "id": f"test_{i}",
+ "title": f"Test {i}",
+ "contentType": ["BLOG"],
+ "brief": f"Test content {i}",
+ }
+ )
+
start_time = time.time()
-
+
# Process items
processed_items = []
while len(processed_items) < num_items and (time.time() - start_time) < 5:
@@ -149,16 +152,17 @@ def test_end_to_end_processing(self, mock_post, processor):
if response.success:
processed_items.append(item)
processor.queue.mark_processed(item)
-
+
end_time = time.time()
duration = end_time - start_time
-
+
# Verify timing
min_expected_duration = (num_items - 1) * 0.2
max_expected_duration = min_expected_duration + 0.1
- assert min_expected_duration <= duration <= max_expected_duration, \
- f"Duration {duration:.2f}s outside expected range [{min_expected_duration:.2f}, {max_expected_duration:.2f}]"
-
+ assert (
+ min_expected_duration <= duration <= max_expected_duration
+ ), f"Duration {duration:.2f}s outside expected range [{min_expected_duration:.2f}, {max_expected_duration:.2f}]"
+
# Verify queue is empty
assert processor.queue.size == 0
# Verify all items were processed
diff --git a/tests/load_testing/data_generator.py b/tests/load_testing/data_generator.py
new file mode 100644
index 0000000..1925378
--- /dev/null
+++ b/tests/load_testing/data_generator.py
@@ -0,0 +1,105 @@
+"""Feed data generator for load testing."""
+
+import random
+import time
+from datetime import datetime, timedelta
+from typing import Dict, List, Literal, TypedDict
+
+
+class FeedItem(TypedDict):
+ title: str
+ content: str
+ content_type: Literal["BLOG", "VIDEO", "SOCIAL"]
+ priority: Literal["High", "Medium", "Low"]
+ published_at: str
+ url: str
+
+
+class TestFeed(TypedDict):
+ items: List[FeedItem]
+ update_frequency: Literal["high", "medium", "low"]
+ size: Literal["small", "medium", "large"]
+
+
+def create_feed_item(
+ title: str,
+ content_type: Literal["BLOG", "VIDEO", "SOCIAL"],
+ priority: Literal["High", "Medium", "Low"],
+) -> FeedItem:
+ """Create a single feed item for testing."""
+ content_templates = {
+ "BLOG": "This is a blog post about {topic} with {words} words...",
+ "VIDEO": "Video content showcasing {topic} with duration {duration} minutes",
+ "SOCIAL": "Social media update about {topic} with {engagement} interactions",
+ }
+
+ topics = ["technology", "science", "health", "business", "entertainment"]
+
+ return {
+ "title": title,
+ "content": content_templates[content_type].format(
+ topic=random.choice(topics),
+ words=random.randint(100, 1000),
+ duration=random.randint(1, 30),
+ engagement=random.randint(10, 10000),
+ ),
+ "content_type": content_type,
+ "priority": priority,
+ "published_at": (datetime.now() - timedelta(hours=random.randint(0, 24))).isoformat(),
+ "url": f"https://example.com/content/{random.randint(1000, 9999)}",
+ }
+
+
+def generate_test_feed(
+ size: Literal["small", "medium", "large"], content_type: Literal["BLOG", "VIDEO", "SOCIAL"]
+) -> TestFeed:
+ """Generate a complete test feed with specified characteristics."""
+ size_ranges = {"small": (10, 50), "medium": (100, 500), "large": (1000, 2000)}
+
+ update_frequencies = {"small": "high", "medium": "medium", "large": "low"}
+
+ item_count = random.randint(*size_ranges[size])
+
+ return {
+ "items": [
+ create_feed_item(
+ title=f"Test Item {i}",
+ content_type=content_type,
+ priority=random.choice(["High", "Medium", "Low"]),
+ )
+ for i in range(item_count)
+ ],
+ "size": size,
+ "update_frequency": update_frequencies[size],
+ }
+
+
+def simulate_load(feeds_per_minute: int, duration_seconds: int) -> None:
+ """
+ Simulate production load by generating and processing feeds at a specified rate.
+
+ Args:
+ feeds_per_minute: Number of feeds to generate per minute
+ duration_seconds: How long to run the simulation in seconds
+ """
+ start_time = time.time()
+ feeds_generated = 0
+
+ while time.time() - start_time < duration_seconds:
+ feed = generate_test_feed(
+ size=random.choice(["small", "medium", "large"]),
+ content_type=random.choice(["BLOG", "VIDEO", "SOCIAL"]),
+ )
+
+ # In a real implementation, this would call the feed processor
+ # process_feed(feed)
+
+ feeds_generated += 1
+ time.sleep(60 / feeds_per_minute)
+
+ if feeds_generated % 100 == 0:
+ print(f"Generated {feeds_generated} feeds...")
+
+ print(
+ f"Load simulation complete. Generated {feeds_generated} feeds in {duration_seconds} seconds"
+ )
diff --git a/tests/load_testing/locustfile.py b/tests/load_testing/locustfile.py
new file mode 100644
index 0000000..8fbb897
--- /dev/null
+++ b/tests/load_testing/locustfile.py
@@ -0,0 +1,48 @@
+"""Locust load testing configuration for feed processing system."""
+
+import json
+import random
+
+from data_generator import generate_test_feed
+from locust import HttpUser, between, task
+
+
+class FeedProcessingUser(HttpUser):
+ """Simulates users sending feeds to the processing system."""
+
+ # Wait between 1 and 5 seconds between tasks
+ wait_time = between(1, 5)
+
+ def on_start(self):
+ """Initialize the user session."""
+ # Configure base URLs for different services
+ self.metrics_url = "http://localhost:49152"
+ self.api_url = "http://localhost:8000" # Default API port
+
+ @task(3) # Higher weight for small feeds
+ def process_small_feed(self):
+ """Submit a small feed for processing."""
+ feed = generate_test_feed("small", random.choice(["BLOG", "VIDEO", "SOCIAL"]))
+ self.client.post(f"{self.api_url}/process", json=feed)
+
+ @task(2) # Medium weight for medium feeds
+ def process_medium_feed(self):
+ """Submit a medium-sized feed for processing."""
+ feed = generate_test_feed("medium", random.choice(["BLOG", "VIDEO", "SOCIAL"]))
+ self.client.post(f"{self.api_url}/process", json=feed)
+
+ @task(1) # Lower weight for large feeds
+ def process_large_feed(self):
+ """Submit a large feed for processing."""
+ feed = generate_test_feed("large", random.choice(["BLOG", "VIDEO", "SOCIAL"]))
+ self.client.post(f"{self.api_url}/process", json=feed)
+
+ @task(4) # Highest weight for webhook status checks
+ def check_webhook_status(self):
+ """Check the status of webhook deliveries."""
+ self.client.get(f"{self.api_url}/webhook/status")
+
+ @task(2)
+ def get_metrics(self):
+ """Retrieve processing metrics."""
+ self.client.get(f"{self.metrics_url}/metrics")
diff --git a/tests/load_testing/recovery_tests.py b/tests/load_testing/recovery_tests.py
new file mode 100644
index 0000000..efee749
--- /dev/null
+++ b/tests/load_testing/recovery_tests.py
@@ -0,0 +1,129 @@
+"""Recovery test scenarios for the feed processing system."""
+
+import subprocess
+import time
+from typing import Any, Callable, Dict
+
+import docker
+import psutil
+
+
+class RecoveryTest:
+ def __init__(self):
+ self.docker_client = docker.from_env()
+
+ def network_partition(self, duration: int) -> None:
+ """Simulate network partition by temporarily blocking network access."""
+ try:
+ # Create network isolation
+ subprocess.run(
+ ["sudo", "tc", "qdisc", "add", "dev", "lo", "root", "netem", "loss", "100%"]
+ )
+ print("Network partition created")
+
+ time.sleep(duration)
+
+ # Remove network isolation
+ subprocess.run(["sudo", "tc", "qdisc", "del", "dev", "lo", "root"])
+ print("Network partition removed")
+
+ except subprocess.CalledProcessError as e:
+ print(f"Failed to simulate network partition: {e}")
+
+ def webhook_failure(self, duration: int) -> None:
+ """Simulate webhook endpoint failures."""
+ try:
+ # Stop the mock webhook service
+ containers = self.docker_client.containers.list(filters={"name": "mock-webhook"})
+ if containers:
+ containers[0].stop()
+ print("Webhook service stopped")
+
+ time.sleep(duration)
+
+ # Restart the mock webhook service
+ if containers:
+ containers[0].start()
+ print("Webhook service restarted")
+
+ except docker.errors.DockerException as e:
+ print(f"Failed to simulate webhook failure: {e}")
+
+ def memory_pressure(self, target_percentage: int, duration: int) -> None:
+ """Simulate memory pressure by allocating memory."""
+ try:
+ # Calculate target memory usage
+ total_memory = psutil.virtual_memory().total
+ target_bytes = (total_memory * target_percentage) // 100
+
+ # Allocate memory
+ memory_hog = b"x" * target_bytes
+ print(f"Allocated {target_bytes / (1024*1024):.2f} MB of memory")
+
+ time.sleep(duration)
+
+ # Release memory
+ del memory_hog
+ print("Memory released")
+
+ except Exception as e:
+ print(f"Failed to simulate memory pressure: {e}")
+
+
+def run_recovery_test(
+ test_type: str,
+ duration: int,
+ config: Dict[str, Any],
+ callback: Callable[[str, Dict[str, Any]], None],
+) -> None:
+ """
+ Run a specific recovery test scenario.
+
+ Args:
+ test_type: Type of recovery test to run
+ duration: Duration of the test in seconds
+ config: Test configuration parameters
+ callback: Function to call with test results
+ """
+ recovery_test = RecoveryTest()
+
+ test_scenarios = {
+ "network_partition": recovery_test.network_partition,
+ "webhook_failure": recovery_test.webhook_failure,
+ "memory_pressure": recovery_test.memory_pressure,
+ }
+
+ if test_type not in test_scenarios:
+ raise ValueError(f"Unknown test type: {test_type}")
+
+ print(f"Starting {test_type} recovery test")
+ start_time = time.time()
+
+ try:
+ # Run the recovery test
+ test_scenarios[test_type](duration)
+
+ # Calculate recovery metrics
+ recovery_time = time.time() - start_time
+ results = {
+ "test_type": test_type,
+ "duration": duration,
+ "recovery_time": recovery_time,
+ "success": True,
+ }
+
+ except Exception as e:
+ results = {"test_type": test_type, "duration": duration, "error": str(e), "success": False}
+
+ callback(test_type, results)
+
+
+if __name__ == "__main__":
+ # Example usage
+ def print_results(test_type: str, results: Dict[str, Any]) -> None:
+ print(f"\nResults for {test_type}:")
+ for key, value in results.items():
+ print(f"{key}: {value}")
+
+ # Run a network partition test for 60 seconds
+ run_recovery_test("network_partition", 60, {"severity": "complete"}, print_results)
diff --git a/tests/load_testing/run_load_tests.py b/tests/load_testing/run_load_tests.py
new file mode 100644
index 0000000..dca90e8
--- /dev/null
+++ b/tests/load_testing/run_load_tests.py
@@ -0,0 +1,104 @@
+"""Script to execute load tests with different scenarios."""
+
+import argparse
+import subprocess
+import time
+from typing import Any, Dict
+
+import requests
+
+
+def run_locust(scenario: str, duration: str, host: str) -> None:
+ """Run locust with specified parameters."""
+ cmd = [
+ "locust",
+ "-f",
+ "locustfile.py",
+ "--headless",
+ "-u",
+ get_scenario_config(scenario)["users"],
+ "-r",
+ get_scenario_config(scenario)["spawn_rate"],
+ "--run-time",
+ duration,
+ "--host",
+ host,
+ ]
+ subprocess.run(cmd, check=True)
+
+
+def get_scenario_config(scenario: str) -> Dict[str, Any]:
+ """Get configuration for different test scenarios."""
+ configs = {
+ "baseline": {
+ "users": "100",
+ "spawn_rate": "10",
+ "feeds_per_minute": "100",
+ "queue_size": "1000",
+ "webhook_rate": "5",
+ },
+ "normal": {
+ "users": "500",
+ "spawn_rate": "20",
+ "feeds_per_minute": "500",
+ "queue_size": "5000",
+ "webhook_rate": "20",
+ },
+ "peak": {
+ "users": "2000",
+ "spawn_rate": "50",
+ "feeds_per_minute": "2000",
+ "queue_size": "10000",
+ "webhook_rate": "50",
+ },
+ }
+ return configs.get(scenario, configs["baseline"])
+
+
+def check_metrics_endpoint() -> bool:
+ """Verify that metrics endpoint is accessible."""
+ try:
+ response = requests.get("http://localhost:49152/metrics")
+ return response.status_code == 200
+ except requests.exceptions.RequestException:
+ return False
+
+
+def main():
+ """Main entry point."""
+ parser = argparse.ArgumentParser(description="Run load tests for feed processing system")
+ parser.add_argument(
+ "--scenario",
+ choices=["baseline", "normal", "peak", "recovery"],
+ default="baseline",
+ help="Test scenario to run",
+ )
+ parser.add_argument(
+ "--duration", default="5m", help="Duration of the test (e.g., '1h', '30m', '5m')"
+ )
+ parser.add_argument(
+ "--recovery-type",
+ choices=["network_partition", "webhook_failure", "memory_pressure"],
+ help="Type of recovery test to run",
+ )
+ parser.add_argument(
+ "--host", default="http://localhost:8000", help="Host URL of the feed processing system"
+ )
+
+ args = parser.parse_args()
+
+ # Check if metrics endpoint is accessible
+ if not check_metrics_endpoint():
+ print("Warning: Metrics endpoint is not accessible. Make sure Prometheus is running.")
+
+ if args.scenario == "recovery":
+ if not args.recovery_type:
+ parser.error("--recovery-type is required when running recovery tests")
+ # TODO: Implement recovery test scenarios
+ pass
+ else:
+ run_locust(args.scenario, args.duration, args.host)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/performance/test_error_handling_performance.py b/tests/performance/test_error_handling_performance.py
index 6e8e254..349d974 100644
--- a/tests/performance/test_error_handling_performance.py
+++ b/tests/performance/test_error_handling_performance.py
@@ -1,17 +1,14 @@
-import pytest
-import time
import statistics
+import time
from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import List, Dict, Any, Callable
from dataclasses import dataclass
from datetime import datetime
+from typing import Any, Callable, Dict, List
+
+import pytest
+
+from feed_processor.error_handling import CircuitBreaker, ErrorCategory, ErrorHandler, ErrorSeverity
-from feed_processor.error_handling import (
- ErrorHandler,
- ErrorCategory,
- ErrorSeverity,
- CircuitBreaker
-)
@dataclass
class PerformanceMetrics:
@@ -40,15 +37,14 @@ def throughput(self) -> float:
total_ops = self.error_count + self.success_count
return total_ops / duration if duration > 0 else 0
+
class TestErrorHandlingPerformance:
@pytest.fixture
def error_handler(self):
return ErrorHandler()
def measure_operation(
- self,
- operation: Callable,
- num_iterations: int = 1000
+ self, operation: Callable, num_iterations: int = 1000
) -> PerformanceMetrics:
"""Measure performance metrics for an operation"""
latencies = []
@@ -71,11 +67,12 @@ def measure_operation(
error_count=error_count,
success_count=success_count,
start_time=start_time,
- end_time=time.time()
+ end_time=time.time(),
)
def test_error_handling_latency(self, error_handler):
"""Measure basic error handling latency"""
+
def error_operation():
try:
raise Exception("Test error")
@@ -85,15 +82,15 @@ def error_operation():
category=ErrorCategory.SYSTEM_ERROR,
severity=ErrorSeverity.LOW,
service="latency_test",
- details={"timestamp": time.time()}
+ details={"timestamp": time.time()},
)
metrics = self.measure_operation(error_operation, num_iterations=1000)
-
+
# Verify performance meets requirements
assert metrics.avg_latency < 0.001 # Less than 1ms average
assert metrics.p95_latency < 0.005 # Less than 5ms for 95th percentile
-
+
print(f"\nError Handling Latency Metrics:")
print(f"Average Latency: {metrics.avg_latency*1000:.2f}ms")
print(f"P95 Latency: {metrics.p95_latency*1000:.2f}ms")
@@ -103,8 +100,8 @@ def test_retry_strategy_performance(self, error_handler):
"""Compare performance of different retry strategies"""
strategies = {
"fixed": lambda x: 1.0,
- "exponential": lambda x: 2 ** x,
- "exponential_with_jitter": lambda x: (2 ** x) * (1 + random.random() * 0.1)
+ "exponential": lambda x: 2**x,
+ "exponential_with_jitter": lambda x: (2**x) * (1 + random.random() * 0.1),
}
results = {}
@@ -126,7 +123,7 @@ def test_retry_strategy_performance(self, error_handler):
results[name] = {
"avg_latency": statistics.mean(latencies),
"p95_latency": sorted(latencies)[int(len(latencies) * 0.95)],
- "total_time": time.time() - start_time
+ "total_time": time.time() - start_time,
}
# Print comparison
@@ -141,7 +138,7 @@ def test_logging_pipeline_performance(self, error_handler):
"""Measure logging pipeline performance under load"""
num_threads = 4
iterations_per_thread = 250
-
+
def logging_worker():
latencies = []
for _ in range(iterations_per_thread):
@@ -156,8 +153,8 @@ def logging_worker():
service="logging_test",
details={
"timestamp": datetime.utcnow().isoformat(),
- "data": "x" * 1000 # 1KB payload
- }
+ "data": "x" * 1000, # 1KB payload
+ },
)
latencies.append(time.time() - start_time)
time.sleep(0.001) # Simulate some processing
@@ -165,32 +162,29 @@ def logging_worker():
start_time = time.time()
all_latencies = []
-
+
with ThreadPoolExecutor(max_workers=num_threads) as executor:
- futures = [
- executor.submit(logging_worker)
- for _ in range(num_threads)
- ]
-
+ futures = [executor.submit(logging_worker) for _ in range(num_threads)]
+
for future in as_completed(futures):
all_latencies.extend(future.result())
end_time = time.time()
-
+
metrics = PerformanceMetrics(
operation="logging_pipeline",
latencies=all_latencies,
error_count=0,
success_count=len(all_latencies),
start_time=start_time,
- end_time=end_time
+ end_time=end_time,
)
-
+
print("\nLogging Pipeline Performance:")
print(f"Average Latency: {metrics.avg_latency*1000:.2f}ms")
print(f"P95 Latency: {metrics.p95_latency*1000:.2f}ms")
print(f"Throughput: {metrics.throughput:.2f} logs/sec")
-
+
# Verify performance requirements
assert metrics.avg_latency < 0.005 # Less than 5ms average
assert metrics.p95_latency < 0.020 # Less than 20ms for 95th percentile
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..44fecdb
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,595 @@
+import asyncio
+import json
+import threading
+import time
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, Mock, patch
+
+from click.testing import CliRunner
+from prometheus_client import CollectorRegistry
+
+from feed_processor.cli import cli, load_config
+from feed_processor.metrics import (
+ PROCESSING_LATENCY,
+ PROCESSING_RATE,
+ QUEUE_OVERFLOWS,
+ QUEUE_SIZE,
+ RATE_LIMIT_DELAY,
+ WEBHOOK_PAYLOAD_SIZE,
+ WEBHOOK_RETRIES,
+ start_metrics_server,
+)
+from feed_processor.processor import FeedProcessor
+
+
+class AsyncCliRunner(CliRunner):
+ """Async Click test runner."""
+
+ def invoke(self, *args, **kwargs):
+ """Run command synchronously."""
+ return super().invoke(*args, **kwargs)
+
+
+class TestCLI(unittest.TestCase):
+ def setUp(self):
+ self.runner = AsyncCliRunner()
+ self.sample_config = {
+ "max_queue_size": 500,
+ "webhook_endpoint": "https://example.com/webhook",
+ "webhook_auth_token": "test-token",
+ "webhook_batch_size": 5,
+ }
+
+ self.sample_feed = """
+
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+ -
+ Test Item
+ http://example.com/item1
+ Test Item Description
+
+
+
+ """
+
+ # Mock metrics
+ self._mock_metrics()
+
+ def _mock_metrics(self):
+ """Mock all metrics to avoid port conflicts."""
+ self.mock_registry = CollectorRegistry()
+
+ # Mock all metric values
+ for metric in [
+ PROCESSING_RATE,
+ QUEUE_SIZE,
+ PROCESSING_LATENCY,
+ WEBHOOK_RETRIES,
+ WEBHOOK_PAYLOAD_SIZE,
+ RATE_LIMIT_DELAY,
+ QUEUE_OVERFLOWS,
+ ]:
+ metric._value = MagicMock(get=lambda: 0.0)
+ metric._sum = MagicMock(get=lambda: 0.0)
+ metric._count = MagicMock(get=lambda: 1.0)
+
+ @patch("time.sleep", return_value=None)
+ def test_load_config(self, mock_sleep):
+ """Test loading configuration."""
+ with self.runner.isolated_filesystem():
+ # Write test config
+ config_path = Path("test_config.json")
+ with open(config_path, "w") as f:
+ json.dump(self.sample_config, f)
+
+ # Test loading config
+ config = load_config(config_path)
+ self.assertEqual(config["webhook_endpoint"], "https://example.com/webhook")
+ self.assertEqual(config["webhook_batch_size"], 5)
+
+ # Test loading non-existent config
+ config = load_config(Path("nonexistent.json"))
+ self.assertEqual(config["webhook_batch_size"], 10) # default value
+
+ @patch("feed_processor.cli.FeedProcessor")
+ @patch("feed_processor.metrics.start_metrics_server")
+ @patch("time.sleep")
+ def test_start_command(self, mock_sleep, mock_metrics, MockProcessor):
+ """Test the start command."""
+ # Setup mock processor
+ mock_processor = Mock()
+ mock_processor.start = Mock()
+ mock_processor.stop = Mock()
+ mock_processor._running = True
+ mock_processor._stop_event = Mock()
+ MockProcessor.return_value = mock_processor
+
+ # Simulate Ctrl+C after first sleep
+ mock_sleep.side_effect = KeyboardInterrupt()
+
+ # Run command
+ result = self.runner.invoke(cli, ["start"])
+
+ # Verify results
+ self.assertEqual(result.exit_code, 0)
+ mock_processor.start.assert_called_once()
+ mock_processor.stop.assert_called_once()
+
+ @patch("feed_processor.cli.FeedProcessor")
+ @patch("time.sleep", return_value=None)
+ def test_process_command(self, mock_sleep, MockProcessor):
+ """Test the process command."""
+ # Setup mock processor
+ mock_processor = Mock()
+ mock_processor.start = Mock()
+ mock_processor.stop = Mock()
+ mock_processor.add_feed = Mock(return_value=True)
+ mock_processor._running = True
+ mock_processor._stop_event = Mock()
+ MockProcessor.return_value = mock_processor
+
+ with self.runner.isolated_filesystem():
+ # Create test feed file
+ feed_path = Path("test_feed.xml")
+ with open(feed_path, "w") as f:
+ f.write(self.sample_feed)
+
+ # Run command
+ result = self.runner.invoke(cli, ["process", str(feed_path)])
+
+ # Verify results
+ self.assertEqual(result.exit_code, 0)
+ self.assertIn("Successfully added feed", result.output)
+ mock_processor.start.assert_called_once()
+ mock_processor.stop.assert_called_once()
+ mock_processor.add_feed.assert_called_once()
+
+ @patch("feed_processor.metrics.start_metrics_server")
+ @patch("time.sleep", return_value=None)
+ def test_metrics_command(self, mock_sleep, mock_metrics):
+ """Test the metrics command."""
+ result = self.runner.invoke(cli, ["metrics"])
+ self.assertEqual(result.exit_code, 0)
+ self.assertIn("Current Metrics:", result.output)
+
+ @patch("feed_processor.webhook.WebhookConfig")
+ @patch("time.sleep", return_value=None)
+ def test_configure_command(self, mock_sleep, MockWebhookConfig):
+ """Test the configure command."""
+ # Setup mock webhook config
+ mock_config = Mock()
+ mock_config.endpoint = "https://example.com/webhook"
+ mock_config.auth_token = "test-token"
+ mock_config.batch_size = 5
+ MockWebhookConfig.return_value = mock_config
+
+ with self.runner.isolated_filesystem():
+ output_path = Path("config.json")
+ result = self.runner.invoke(
+ cli,
+ [
+ "configure",
+ "--endpoint",
+ "https://example.com/webhook",
+ "--token",
+ "test-token",
+ "--batch-size",
+ "5",
+ "--output",
+ str(output_path),
+ ],
+ )
+
+ # Verify results
+ self.assertEqual(result.exit_code, 0)
+ self.assertTrue(output_path.exists())
+
+ with open(output_path) as f:
+ config = json.load(f)
+ self.assertEqual(config["webhook_endpoint"], "https://example.com/webhook")
+ self.assertEqual(config["webhook_batch_size"], 5)
+
+ def test_configure_invalid_webhook(self):
+ """Test configure command with invalid webhook URL."""
+ result = self.runner.invoke(
+ cli, ["configure", "--endpoint", "not-a-url", "--token", "test-token"]
+ )
+
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("Invalid configuration", result.output)
+
+ def test_validate_feed(self):
+ """Test the new validate feed command"""
+ with self.runner.isolated_filesystem():
+ valid_feed = """
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+ -
+ Test Item
+ http://example.com/item1
+ Test Description
+
+
+ """
+
+ with open("valid_feed.xml", "w", encoding="utf-8") as f:
+ f.write(valid_feed)
+
+ result = self.runner.invoke(cli, ["validate", "valid_feed.xml"])
+ self.assertEqual(result.exit_code, 0)
+ self.assertIn("Feed is valid", result.output)
+
+ def test_validate_feed_additional_checks(self):
+ """Test additional feed validation checks"""
+ # Test feed with empty items
+ with self.runner.isolated_filesystem():
+ empty_items_feed = """
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+
+ """
+
+ with open("empty_feed.xml", "w", encoding="utf-8") as f:
+ f.write(empty_items_feed)
+
+ result = self.runner.invoke(cli, ["validate", "empty_feed.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("No feed items found", result.output)
+
+ # Test feed with invalid publication date
+ with self.runner.isolated_filesystem():
+ invalid_date_feed = """
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+ Invalid Date
+ -
+ Test Item
+ http://example.com/item1
+ Test Description
+ Not a valid date
+
+
+ """
+
+ with open("invalid_date_feed.xml", "w", encoding="utf-8") as f:
+ f.write(invalid_date_feed)
+
+ result = self.runner.invoke(cli, ["validate", "invalid_date_feed.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("Invalid publication date", result.output)
+
+ # Test feed with invalid URLs
+ with self.runner.isolated_filesystem():
+ invalid_url_feed = """
+
+
+ Test Feed
+ not_a_valid_url
+ Test Description
+ -
+ Test Item
+ also_not_valid
+ Test Description
+
+
+ """
+
+ with open("invalid_url_feed.xml", "w", encoding="utf-8") as f:
+ f.write(invalid_url_feed)
+
+ result = self.runner.invoke(cli, ["validate", "invalid_url_feed.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("Invalid URL format", result.output)
+
+ def test_validate_feed_strict_mode(self):
+ """Test feed validation with strict mode enabled"""
+ # Test feed with long content
+ with self.runner.isolated_filesystem():
+ very_long_title = "A" * 201 # Exceeds 200 char limit
+ long_content_feed = f"""
+
+
+ {very_long_title}
+ http://example.com/feed
+ Test Description
+ -
+ Test Item
+ http://example.com/item1
+ Test Description
+
+
+ """
+
+ with open("long_content_feed.xml", "w", encoding="utf-8") as f:
+ f.write(long_content_feed)
+
+ # Should pass in normal mode
+ result = self.runner.invoke(cli, ["validate", "long_content_feed.xml"])
+ self.assertEqual(result.exit_code, 0)
+
+ # Should fail in strict mode
+ result = self.runner.invoke(cli, ["validate", "--strict", "long_content_feed.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("Content length exceeds maximum", result.output)
+
+ # Test feed with non-UTF8 encoding
+ with self.runner.isolated_filesystem():
+ non_utf8_feed = """
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description with special char: ñ
+ -
+ Test Item
+ http://example.com/item1
+ Test Description
+
+
+ """.encode(
+ "iso-8859-1"
+ )
+
+ with open("non_utf8_feed.xml", "wb") as f:
+ f.write(non_utf8_feed)
+
+ # Should pass in normal mode
+ result = self.runner.invoke(cli, ["validate", "non_utf8_feed.xml"])
+ self.assertEqual(result.exit_code, 0)
+
+ # Should fail in strict mode
+ result = self.runner.invoke(cli, ["validate", "--strict", "non_utf8_feed.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("Non-UTF8 encoding detected", result.output)
+
+ # Test feed with missing optional elements
+ with self.runner.isolated_filesystem():
+ minimal_feed = """
+
+
+ Test Feed
+ http://example.com/feed
+ -
+ Test Item
+ http://example.com/item1
+
+
+ """
+
+ with open("minimal_feed.xml", "w", encoding="utf-8") as f:
+ f.write(minimal_feed)
+
+ # Should pass in normal mode
+ result = self.runner.invoke(cli, ["validate", "minimal_feed.xml"])
+ self.assertEqual(result.exit_code, 0)
+
+ # Should fail in strict mode due to missing description
+ result = self.runner.invoke(cli, ["validate", "--strict", "minimal_feed.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("Missing recommended elements", result.output)
+
+ def test_validate_feed_enhanced(self):
+ """Test enhanced feed validation features."""
+ with self.runner.isolated_filesystem():
+ # Test with invalid GUID
+ feed_with_long_guid = """
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+ -
+ Test Item
+ http://example.com/item1
+ Test Description
+ {}
+
+
+ """.format(
+ "x" * 513
+ ) # GUID longer than 512 chars
+
+ with open("invalid_guid_feed.xml", "w", encoding="utf-8") as f:
+ f.write(feed_with_long_guid)
+
+ result = self.runner.invoke(cli, ["validate", "invalid_guid_feed.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("GUID exceeds maximum length", result.output)
+
+ # Test with invalid image URL
+ feed_with_invalid_image = """
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+ -
+ Test Item
+ http://example.com/item1
+ Test Description
+ not_a_url
+
+
+ """
+
+ with open("invalid_image_feed.xml", "w", encoding="utf-8") as f:
+ f.write(feed_with_invalid_image)
+
+ result = self.runner.invoke(cli, ["validate", "invalid_image_feed.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("Invalid image URL format", result.output)
+
+ # Test with invalid categories
+ feed_with_invalid_categories = """
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+ -
+ Test Item
+ http://example.com/item1
+ Test Description
+
+ {}
+
+
+ """.format(
+ "x" * 201
+ ) # Category longer than 200 chars
+
+ with open("invalid_categories_feed.xml", "w", encoding="utf-8") as f:
+ f.write(feed_with_invalid_categories)
+
+ result = self.runner.invoke(cli, ["validate", "invalid_categories_feed.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("Category exceeds maximum length", result.output)
+ self.assertIn("Empty category found", result.output)
+
+ def test_validate_feed_json_output(self):
+ """Test JSON output format for feed validation."""
+ with self.runner.isolated_filesystem():
+ valid_feed = """
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+ -
+ Test Item
+ http://example.com/item1
+ Test Description
+
+
+ """
+
+ with open("valid_feed.xml", "w", encoding="utf-8") as f:
+ f.write(valid_feed)
+
+ result = self.runner.invoke(cli, ["validate", "--format", "json", "valid_feed.xml"])
+ self.assertEqual(result.exit_code, 0)
+
+ # Verify JSON output
+ import json
+
+ try:
+ output = json.loads(result.output)
+ self.assertTrue(isinstance(output, dict))
+ self.assertTrue(output["is_valid"])
+ self.assertTrue("stats" in output)
+ self.assertTrue("validation_time" in output)
+ except json.JSONDecodeError:
+ self.fail("Output is not valid JSON")
+
+ def test_validate_feed_caching(self):
+ """Test feed validation caching."""
+ with self.runner.isolated_filesystem():
+ # Create a valid feed file
+ feed_content = """
+
+
+ Test Feed
+ http://example.com/feed
+ Test Description
+ -
+ Test Item
+ http://example.com/item1
+ Test Description
+
+
+ """
+
+ with open("test_feed.xml", "w", encoding="utf-8") as f:
+ f.write(feed_content)
+
+ # First validation (should be slower)
+ start_time = time.time()
+ result1 = self.runner.invoke(cli, ["validate", "test_feed.xml", "--cache"])
+ time1 = time.time() - start_time
+
+ # Second validation (should be faster due to caching)
+ start_time = time.time()
+ result2 = self.runner.invoke(cli, ["validate", "test_feed.xml", "--cache"])
+ time2 = time.time() - start_time
+
+ # Third validation with no cache (should be slower)
+ start_time = time.time()
+ result3 = self.runner.invoke(cli, ["validate", "test_feed.xml", "--no-cache"])
+ time3 = time.time() - start_time
+
+ # Assertions
+ self.assertEqual(result1.exit_code, 0)
+ self.assertEqual(result2.exit_code, 0)
+ self.assertEqual(result3.exit_code, 0)
+
+ # Time comparisons
+ self.assertGreater(time1, time2) # Cached should be faster
+ self.assertGreater(time3, time2) # Non-cached should be slower
+
+ @patch("time.sleep", return_value=None)
+ def test_validate_command_error_types(self, mock_sleep):
+ """Test different validation error types and exit codes."""
+ with self.runner.isolated_filesystem():
+ # Test critical error (empty file)
+ with open("empty.xml", "w") as f:
+ pass
+
+ result = self.runner.invoke(cli, ["validate", "empty.xml"])
+ self.assertEqual(result.exit_code, 1)
+ self.assertIn("Critical Error:", result.output)
+
+ # Test validation error (missing required fields)
+ invalid_feed = """
+
+
+
+ """
+ with open("invalid.xml", "w") as f:
+ f.write(invalid_feed)
+
+ result = self.runner.invoke(cli, ["validate", "invalid.xml"])
+ self.assertEqual(result.exit_code, 2)
+ self.assertIn("Validation Error:", result.output)
+
+ # Test format error (invalid date)
+ malformed_feed = """
+
+
+ Test
+ http://example.com
+ Test feed
+ invalid-date
+
+ """
+ with open("malformed.xml", "w") as f:
+ f.write(malformed_feed)
+
+ result = self.runner.invoke(cli, ["validate", "malformed.xml"])
+ self.assertEqual(result.exit_code, 3)
+ self.assertIn("Format Error:", result.output)
+
+ # Test JSON output format
+ result = self.runner.invoke(cli, ["validate", "--format=json", "invalid.xml"])
+ self.assertEqual(result.exit_code, 2)
+ output = json.loads(result.output)
+ self.assertEqual(output["error_type"], "validation")
+ self.assertFalse(output["is_valid"])
+ self.assertTrue(len(output["errors"]) > 0)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_feed_processor.py b/tests/test_feed_processor.py
index 383d7eb..d925122 100644
--- a/tests/test_feed_processor.py
+++ b/tests/test_feed_processor.py
@@ -1,10 +1,13 @@
-import pytest
from datetime import datetime, timezone
-from unittest.mock import Mock, patch, MagicMock
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+
from feed_processor.feed_processor import FeedProcessor
from feed_processor.priority_queue import Priority, QueueItem
from feed_processor.webhook_manager import WebhookManager, WebhookResponse
+
@pytest.fixture
def mock_inoreader_response():
return {
@@ -16,10 +19,7 @@ def mock_inoreader_response():
"summary": {"content": "Test content"},
"canonical": [{"href": "http://test.com/article1"}],
"published": int(datetime(2024, 12, 13, tzinfo=timezone.utc).timestamp()),
- "categories": [
- {"label": "Technology"},
- {"label": "Breaking News"}
- ]
+ "categories": [{"label": "Technology"}, {"label": "Breaking News"}],
},
{
"id": "feed/1/item/2",
@@ -28,89 +28,87 @@ def mock_inoreader_response():
"summary": {"content": "Test content 2"},
"canonical": [{"href": "http://test.com/article2"}],
"published": int(datetime(2024, 12, 12, tzinfo=timezone.utc).timestamp()),
- "categories": [
- {"label": "Technology"}
- ]
- }
+ "categories": [{"label": "Technology"}],
+ },
],
- "continuation": "token123"
+ "continuation": "token123",
}
+
@pytest.fixture
def feed_processor():
return FeedProcessor(
inoreader_token="test_token",
webhook_url="http://test.webhook",
queue_size=100,
- webhook_rate_limit=0.1
+ webhook_rate_limit=0.1,
)
+
def test_feed_processor_initialization():
"""Test FeedProcessor initialization with correct parameters."""
processor = FeedProcessor(
inoreader_token="test_token",
webhook_url="http://test.webhook",
queue_size=100,
- webhook_rate_limit=0.1
+ webhook_rate_limit=0.1,
)
-
+
assert processor.inoreader_token == "test_token"
assert processor.queue.max_size == 100
assert processor.webhook_manager.rate_limit == 0.1
-@patch('requests.get')
+
+@patch("requests.get")
def test_fetch_feeds_success(mock_get, feed_processor, mock_inoreader_response):
"""Test successful feed fetching from Inoreader API."""
mock_response = Mock()
mock_response.json.return_value = mock_inoreader_response
mock_response.status_code = 200
mock_get.return_value = mock_response
-
+
response = feed_processor._fetch_feeds()
-
+
assert response == mock_inoreader_response
mock_get.assert_called_once_with(
"https://www.inoreader.com/reader/api/0/stream/contents/user/-/state/com.google/reading-list",
- headers={
- "Authorization": "Bearer test_token",
- "Content-Type": "application/json"
- },
- params={"n": 100}
+ headers={"Authorization": "Bearer test_token", "Content-Type": "application/json"},
+ params={"n": 100},
)
-@patch('requests.get')
+
+@patch("requests.get")
def test_fetch_feeds_with_continuation(mock_get, feed_processor, mock_inoreader_response):
"""Test feed fetching with continuation token."""
mock_response = Mock()
mock_response.json.return_value = mock_inoreader_response
mock_response.status_code = 200
mock_get.return_value = mock_response
-
+
response = feed_processor._fetch_feeds("token123")
-
+
mock_get.assert_called_once_with(
"https://www.inoreader.com/reader/api/0/stream/contents/user/-/state/com.google/reading-list",
- headers={
- "Authorization": "Bearer test_token",
- "Content-Type": "application/json"
- },
- params={"n": 100, "c": "token123"}
+ headers={"Authorization": "Bearer test_token", "Content-Type": "application/json"},
+ params={"n": 100, "c": "token123"},
)
-@patch('requests.get')
+
+@patch("requests.get")
def test_fetch_feeds_error(mock_get, feed_processor):
"""Test error handling during feed fetching."""
mock_get.side_effect = Exception("API Error")
-
+
response = feed_processor._fetch_feeds()
-
+
assert response == {}
+
def test_process_item_success(feed_processor, mock_inoreader_response):
"""Test successful processing of a feed item."""
raw_item = mock_inoreader_response["items"][0]
processed = feed_processor._process_item(raw_item)
-
+
assert processed["id"] == "feed/1/item/1"
assert processed["title"] == "Test Article 1"
assert processed["author"] == "Test Author"
@@ -120,123 +118,128 @@ def test_process_item_success(feed_processor, mock_inoreader_response):
assert len(processed["categories"]) == 2
assert "Breaking News" in processed["categories"]
+
def test_process_item_error(feed_processor):
"""Test error handling during item processing."""
invalid_item = {"invalid": "data"}
processed = feed_processor._process_item(invalid_item)
-
+
assert processed == {}
+
def test_determine_priority_high(feed_processor):
"""Test priority determination for breaking news."""
item = {
"categories": ["Technology", "Breaking News"],
- "published": datetime.now(timezone.utc).isoformat()
+ "published": datetime.now(timezone.utc).isoformat(),
}
-
+
priority = feed_processor._determine_priority(item)
assert priority == Priority.HIGH
+
def test_determine_priority_normal(feed_processor):
"""Test priority determination for recent news."""
- item = {
- "categories": ["Technology"],
- "published": datetime.now(timezone.utc).isoformat()
- }
-
+ item = {"categories": ["Technology"], "published": datetime.now(timezone.utc).isoformat()}
+
priority = feed_processor._determine_priority(item)
assert priority == Priority.NORMAL
+
def test_determine_priority_low(feed_processor):
"""Test priority determination for older news."""
old_date = datetime(2024, 12, 12, tzinfo=timezone.utc).isoformat()
- item = {
- "categories": ["Technology"],
- "published": old_date
- }
-
+ item = {"categories": ["Technology"], "published": old_date}
+
priority = feed_processor._determine_priority(item)
assert priority == Priority.LOW
-@patch('requests.get')
+
+@patch("requests.get")
def test_fetch_and_queue_items(mock_get, feed_processor, mock_inoreader_response):
"""Test fetching and queuing items with proper priorities."""
# First response with continuation token
first_response = Mock()
first_response.json.return_value = mock_inoreader_response
first_response.status_code = 200
-
+
# Second response without continuation token (end of feed)
second_response = Mock()
second_response.json.return_value = {"items": [], "continuation": None}
second_response.status_code = 200
-
+
# Return different responses for each call
mock_get.side_effect = [first_response, second_response]
-
+
items_queued = feed_processor.fetch_and_queue_items()
-
+
assert items_queued == 2
assert feed_processor.queue.size == 2
assert mock_get.call_count == 2 # Should make two API calls
-
+
# First item should be high priority (Breaking News)
item1 = feed_processor.queue.dequeue()
assert item1.priority == Priority.HIGH
assert item1.content["title"] == "Test Article 1"
-
+
# Second item should be normal/low priority
item2 = feed_processor.queue.dequeue()
assert item2.content["title"] == "Test Article 2"
-@patch.object(WebhookManager, 'send_webhook')
+
+@patch.object(WebhookManager, "send_webhook")
def test_process_queue_success(mock_send_webhook, feed_processor):
"""Test successful processing of queued items."""
# Add test items to queue
- feed_processor.queue.enqueue(QueueItem(
- id="1",
- priority=Priority.HIGH,
- content={"title": "Test 1"},
- timestamp=datetime.now(timezone.utc)
- ))
- feed_processor.queue.enqueue(QueueItem(
- id="2",
- priority=Priority.NORMAL,
- content={"title": "Test 2"},
- timestamp=datetime.now(timezone.utc)
- ))
-
+ feed_processor.queue.enqueue(
+ QueueItem(
+ id="1",
+ priority=Priority.HIGH,
+ content={"title": "Test 1"},
+ timestamp=datetime.now(timezone.utc),
+ )
+ )
+ feed_processor.queue.enqueue(
+ QueueItem(
+ id="2",
+ priority=Priority.NORMAL,
+ content={"title": "Test 2"},
+ timestamp=datetime.now(timezone.utc),
+ )
+ )
+
mock_send_webhook.return_value = WebhookResponse(
- success=True,
- status_code=200,
- timestamp=datetime.now(timezone.utc).isoformat()
+ success=True, status_code=200, timestamp=datetime.now(timezone.utc).isoformat()
)
-
+
processed = feed_processor.process_queue(batch_size=2)
-
+
assert processed == 2
assert feed_processor.queue.size == 0
assert mock_send_webhook.call_count == 2
-@patch.object(WebhookManager, 'send_webhook')
+
+@patch.object(WebhookManager, "send_webhook")
def test_process_queue_with_errors(mock_send_webhook, feed_processor):
"""Test queue processing with webhook errors."""
- feed_processor.queue.enqueue(QueueItem(
- id="1",
- priority=Priority.HIGH,
- content={"title": "Test 1"},
- timestamp=datetime.now(timezone.utc)
- ))
-
+ feed_processor.queue.enqueue(
+ QueueItem(
+ id="1",
+ priority=Priority.HIGH,
+ content={"title": "Test 1"},
+ timestamp=datetime.now(timezone.utc),
+ )
+ )
+
mock_send_webhook.return_value = WebhookResponse(
success=False,
status_code=500,
error_id="error123",
error_type="ServerError",
- timestamp=datetime.now(timezone.utc).isoformat()
+ timestamp=datetime.now(timezone.utc).isoformat(),
)
-
+
processed = feed_processor.process_queue(batch_size=1)
-
+
assert processed == 0 # No items successfully processed
assert mock_send_webhook.call_count == 1
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index f344c68..266e125 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,213 +1,229 @@
-import pytest
from datetime import datetime, timezone
from unittest.mock import Mock, patch
-from feed_processor.metrics import MetricsCollector, MetricType, Metric
+
+import pytest
+
+from feed_processor.metrics import Metric, MetricsCollector, MetricType
+
@pytest.fixture
def metrics_collector():
return MetricsCollector()
+
def test_counter_metric():
"""Test basic counter metric functionality."""
collector = MetricsCollector()
-
+
# Test increment
collector.increment("items_processed")
collector.increment("items_processed", 2)
assert collector.get_metric("items_processed").value == 3
-
+
# Test decrement
collector.decrement("items_processed")
assert collector.get_metric("items_processed").value == 2
+
def test_gauge_metric():
"""Test gauge metric for current value tracking."""
collector = MetricsCollector()
-
+
# Test setting values
collector.set_gauge("queue_size", 10)
assert collector.get_metric("queue_size").value == 10
-
+
collector.set_gauge("queue_size", 5)
assert collector.get_metric("queue_size").value == 5
+
def test_histogram_metric():
"""Test histogram for tracking value distributions."""
collector = MetricsCollector()
-
+
# Record processing times
collector.record("processing_time", 0.1)
collector.record("processing_time", 0.2)
collector.record("processing_time", 0.3)
-
+
histogram = collector.get_metric("processing_time")
assert histogram.count == 3
assert 0.1 <= histogram.average <= 0.3
assert histogram.min == 0.1
assert histogram.max == 0.3
+
def test_metric_labels():
"""Test metric labeling for better categorization."""
collector = MetricsCollector()
-
+
collector.increment("items_processed", labels={"priority": "high"})
collector.increment("items_processed", labels={"priority": "low"})
-
+
high_priority = collector.get_metric("items_processed", {"priority": "high"})
low_priority = collector.get_metric("items_processed", {"priority": "low"})
-
+
assert high_priority.value == 1
assert low_priority.value == 1
+
def test_metric_reset():
"""Test resetting metrics to initial state."""
collector = MetricsCollector()
-
+
collector.increment("errors")
collector.set_gauge("memory_usage", 100)
collector.record("latency", 0.5)
-
+
collector.reset()
-
+
assert collector.get_metric("errors").value == 0
assert collector.get_metric("memory_usage").value == 0
assert collector.get_metric("latency").count == 0
+
def test_metric_snapshot():
"""Test capturing current state of all metrics."""
collector = MetricsCollector()
-
+
collector.increment("successes")
collector.increment("errors")
collector.set_gauge("queue_size", 10)
collector.record("processing_time", 0.2)
-
+
snapshot = collector.get_snapshot()
-
+
assert snapshot["successes"]["value"] == 1
assert snapshot["errors"]["value"] == 1
assert snapshot["queue_size"]["value"] == 10
assert snapshot["processing_time"]["average"] == 0.2
+
def test_invalid_metric_operations():
"""Test handling of invalid metric operations."""
collector = MetricsCollector()
-
+
# Can't increment a gauge
with pytest.raises(ValueError):
collector.increment("queue_size")
collector.set_gauge("queue_size", 5)
-
+
# Can't set gauge value for a counter
with pytest.raises(ValueError):
collector.increment("items_processed")
collector.set_gauge("items_processed", 10)
-
+
# Can't get non-existent metric
with pytest.raises(KeyError):
collector.get_metric("nonexistent")
+
def test_metric_timestamp():
"""Test metric timestamps for tracking when values change."""
collector = MetricsCollector()
-
+
before = datetime.now(timezone.utc)
collector.increment("events")
after = datetime.now(timezone.utc)
-
+
metric = collector.get_metric("events")
assert before <= metric.last_updated <= after
+
def test_batch_update():
"""Test updating multiple metrics at once."""
collector = MetricsCollector()
-
+
updates = {
"successes": ("increment", 1),
"queue_size": ("gauge", 10),
- "latency": ("record", 0.2)
+ "latency": ("record", 0.2),
}
-
+
collector.batch_update(updates)
-
+
assert collector.get_metric("successes").value == 1
assert collector.get_metric("queue_size").value == 10
assert collector.get_metric("latency").average == 0.2
+
def test_webhook_retry_metrics():
"""Test webhook retry tracking metrics."""
collector = MetricsCollector()
-
+
# Test retry count increments
collector.increment("webhook_retries", labels={"attempt": "1"})
collector.increment("webhook_retries", labels={"attempt": "2"})
collector.increment("webhook_retries", labels={"attempt": "1"})
-
+
first_retry = collector.get_metric("webhook_retries", {"attempt": "1"})
second_retry = collector.get_metric("webhook_retries", {"attempt": "2"})
-
+
assert first_retry.value == 2
assert second_retry.value == 1
-
+
# Test webhook latency tracking
collector.record("webhook_duration", 0.5)
collector.record("webhook_duration", 1.0)
-
+
duration = collector.get_metric("webhook_duration")
assert duration.count == 2
assert duration.average == 0.75
assert duration.max == 1.0
+
def test_rate_limit_metrics():
"""Test rate limiting delay metrics."""
collector = MetricsCollector()
-
+
# Test rate limit delay tracking
collector.set_gauge("rate_limit_delay", 30)
assert collector.get_metric("rate_limit_delay").value == 30
-
+
collector.set_gauge("rate_limit_delay", 60)
assert collector.get_metric("rate_limit_delay").value == 60
-
+
# Test rate limit hit counter
collector.increment("rate_limit_hits")
collector.increment("rate_limit_hits")
assert collector.get_metric("rate_limit_hits").value == 2
+
def test_queue_overflow_metrics():
"""Test queue overflow tracking metrics."""
collector = MetricsCollector()
-
+
# Test overflow counts by priority
collector.increment("queue_overflow", labels={"priority": "high"})
collector.increment("queue_overflow", labels={"priority": "medium"})
collector.increment("queue_overflow", labels={"priority": "high"})
-
+
high_overflow = collector.get_metric("queue_overflow", {"priority": "high"})
medium_overflow = collector.get_metric("queue_overflow", {"priority": "medium"})
-
+
assert high_overflow.value == 2
assert medium_overflow.value == 1
-
+
# Test queue size by priority
collector.set_gauge("queue_items", 5, labels={"priority": "high"})
collector.set_gauge("queue_items", 3, labels={"priority": "medium"})
-
+
high_items = collector.get_metric("queue_items", {"priority": "high"})
medium_items = collector.get_metric("queue_items", {"priority": "medium"})
-
+
assert high_items.value == 5
assert medium_items.value == 3
+
def test_payload_size_metrics():
"""Test webhook payload size tracking."""
collector = MetricsCollector()
-
+
# Test payload size distribution
collector.record("webhook_payload_size", 1024) # 1KB
collector.record("webhook_payload_size", 2048) # 2KB
- collector.record("webhook_payload_size", 512) # 0.5KB
-
+ collector.record("webhook_payload_size", 512) # 0.5KB
+
size_metric = collector.get_metric("webhook_payload_size")
assert size_metric.count == 3
assert size_metric.average == 1194.6666666666667 # (1024 + 2048 + 512) / 3
diff --git a/tests/test_priority_queue.py b/tests/test_priority_queue.py
index 81a83cb..d40204a 100644
--- a/tests/test_priority_queue.py
+++ b/tests/test_priority_queue.py
@@ -1,6 +1,9 @@
-import pytest
from datetime import datetime, timezone
-from feed_processor.priority_queue import PriorityQueue, Priority, QueueItem
+
+import pytest
+
+from feed_processor.priority_queue import Priority, PriorityQueue, QueueItem
+
class TestPriorityQueue:
def test_queue_initialization(self):
@@ -30,11 +33,11 @@ def test_priority_ordering(self):
low = QueueItem("1", Priority.LOW, {"data": "low"}, datetime.now(timezone.utc))
normal = QueueItem("2", Priority.NORMAL, {"data": "normal"}, datetime.now(timezone.utc))
high = QueueItem("3", Priority.HIGH, {"data": "high"}, datetime.now(timezone.utc))
-
+
queue.enqueue(low)
queue.enqueue(normal)
queue.enqueue(high)
-
+
assert queue.dequeue() == high
assert queue.dequeue() == normal
assert queue.dequeue() == low
@@ -44,11 +47,11 @@ def test_full_queue_behavior(self):
item1 = QueueItem("1", Priority.LOW, {"data": "test1"}, datetime.now(timezone.utc))
item2 = QueueItem("2", Priority.LOW, {"data": "test2"}, datetime.now(timezone.utc))
item3 = QueueItem("3", Priority.HIGH, {"data": "test3"}, datetime.now(timezone.utc))
-
+
assert queue.enqueue(item1)
assert queue.enqueue(item2)
assert queue.is_full()
assert queue.enqueue(item3) # Should succeed by removing oldest low priority item
-
+
dequeued = queue.dequeue()
assert dequeued == item3
diff --git a/tests/test_processing_metrics.py b/tests/test_processing_metrics.py
index 81c1439..d761e3d 100644
--- a/tests/test_processing_metrics.py
+++ b/tests/test_processing_metrics.py
@@ -1,34 +1,42 @@
-import pytest
-from datetime import datetime, timezone, timedelta
+from datetime import datetime, timedelta, timezone
from unittest.mock import patch
+
+import pytest
+
from feed_processor.processing_metrics import ProcessingMetrics
+
def test_increment_processed():
metrics = ProcessingMetrics()
assert metrics.processed_count == 0
metrics.increment_processed()
assert metrics.processed_count == 1
+
def test_increment_errors():
metrics = ProcessingMetrics()
assert metrics.error_count == 0
metrics.increment_errors()
assert metrics.error_count == 1
+
def test_update_process_time():
metrics = ProcessingMetrics()
metrics.update_process_time(1.5)
assert metrics.last_process_time == 1.5
+
def test_update_queue_length():
metrics = ProcessingMetrics()
metrics.update_queue_length(10)
assert metrics.queue_length == 10
+
def test_success_rate_with_no_processing():
metrics = ProcessingMetrics()
assert metrics.success_rate == 0.0
+
def test_success_rate_with_processing():
metrics = ProcessingMetrics()
metrics.increment_processed()
@@ -36,30 +44,32 @@ def test_success_rate_with_processing():
metrics.increment_errors()
assert metrics.success_rate == pytest.approx(66.67, rel=0.01)
+
def test_processing_duration():
metrics = ProcessingMetrics()
-
+
# Mock the start time and current time
start_time = datetime.now(timezone.utc)
current_time = start_time + timedelta(minutes=1)
-
- with patch('datetime.datetime') as mock_datetime:
+
+ with patch("datetime.datetime") as mock_datetime:
mock_datetime.now.return_value = current_time
metrics.start_time = start_time
-
+
# Duration should be 60 seconds
assert metrics.processing_duration == pytest.approx(60.0, rel=0.1)
+
def test_reset():
metrics = ProcessingMetrics()
metrics.increment_processed()
metrics.increment_errors()
metrics.update_queue_length(5)
metrics.update_process_time(1.5)
-
+
metrics.reset()
-
+
assert metrics.processed_count == 0
assert metrics.error_count == 0
assert metrics.queue_length == 0
- assert metrics.last_process_time == 0.0
\ No newline at end of file
+ assert metrics.last_process_time == 0.0
diff --git a/tests/test_rate_limiter.py b/tests/test_rate_limiter.py
index fe6e074..cd4216a 100644
--- a/tests/test_rate_limiter.py
+++ b/tests/test_rate_limiter.py
@@ -1,49 +1,54 @@
-import pytest
import threading
import time
+
+import pytest
+
from feed_processor.rate_limiter import RateLimiter
+
def test_rate_limiter_initialization():
limiter = RateLimiter(requests_per_second=2)
assert limiter.requests_per_second == 2
assert isinstance(limiter.lock, threading.Lock)
assert limiter.last_request_time > 0
+
def test_rate_limiter_wait():
limiter = RateLimiter(requests_per_second=2)
-
+
# First request should not wait
start_time = time.time()
limiter.wait()
elapsed = time.time() - start_time
assert elapsed < 0.1 # Should be almost immediate
-
+
# Second request within the same second should wait
start_time = time.time()
limiter.wait()
elapsed = time.time() - start_time
assert elapsed >= 0.5 # Should wait about 0.5 seconds
+
def test_rate_limiter_thread_safety():
limiter = RateLimiter(requests_per_second=10)
request_times = []
-
+
def make_request():
limiter.wait()
request_times.append(time.time())
-
+
# Create multiple threads to test concurrency
threads = [threading.Thread(target=make_request) for _ in range(5)]
-
+
# Start all threads
for thread in threads:
thread.start()
-
+
# Wait for all threads to complete
for thread in threads:
thread.join()
-
+
# Check that requests were properly spaced
for i in range(1, len(request_times)):
- time_diff = request_times[i] - request_times[i-1]
- assert time_diff >= 0.1 # At least 100ms between requests
\ No newline at end of file
+ time_diff = request_times[i] - request_times[i - 1]
+ assert time_diff >= 0.1 # At least 100ms between requests
diff --git a/tests/test_validators.py b/tests/test_validators.py
new file mode 100644
index 0000000..e7436de
--- /dev/null
+++ b/tests/test_validators.py
@@ -0,0 +1,108 @@
+import unittest
+from datetime import datetime
+
+from feed_processor.validators import FeedValidationResult, FeedValidator
+
+
+class TestFeedValidator(unittest.TestCase):
+ def setUp(self):
+ self.rss_feed = """
+
+
+ Sample RSS Feed
+ http://example.com/feed
+ A sample RSS feed for testing
+ Mon, 13 Dec 2024 03:01:14 -0800
+ -
+ First Post
+ http://example.com/first-post
+ This is the first post
+ Mon, 13 Dec 2024 03:00:00 -0800
+
+
+ """
+
+ self.atom_feed = """
+
+ Sample Atom Feed
+
+ urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6
+ 2024-12-13T03:01:14-08:00
+
+ First Entry
+
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+ 2024-12-13T03:00:00-08:00
+ This is the first entry
+
+ """
+
+ self.json_feed = """{
+ "version": "https://jsonfeed.org/version/1.1",
+ "title": "Sample JSON Feed",
+ "home_page_url": "http://example.com/",
+ "feed_url": "http://example.com/feed.json",
+ "items": [
+ {
+ "id": "1",
+ "title": "First Item",
+ "content_text": "This is the first item",
+ "url": "http://example.com/first-item",
+ "date_published": "2024-12-13T03:00:00-08:00"
+ }
+ ]
+ }"""
+
+ self.invalid_feed = "This is not a valid feed"
+
+ def test_validate_rss_feed(self):
+ result = FeedValidator.validate_feed(self.rss_feed)
+ self.assertTrue(result.is_valid)
+ self.assertEqual(result.feed_type, "rss")
+ self.assertIsNotNone(result.parsed_feed)
+ self.assertEqual(result.parsed_feed["title"], "Sample RSS Feed")
+
+ def test_validate_atom_feed(self):
+ result = FeedValidator.validate_feed(self.atom_feed)
+ self.assertTrue(result.is_valid)
+ self.assertEqual(result.feed_type, "atom")
+ self.assertIsNotNone(result.parsed_feed)
+ self.assertEqual(result.parsed_feed["title"], "Sample Atom Feed")
+
+ def test_validate_json_feed(self):
+ result = FeedValidator.validate_feed(self.json_feed)
+ self.assertTrue(result.is_valid)
+ self.assertEqual(result.feed_type, "json")
+ self.assertIsNotNone(result.parsed_feed)
+ self.assertEqual(result.parsed_feed["title"], "Sample JSON Feed")
+
+ def test_validate_invalid_feed(self):
+ result = FeedValidator.validate_feed(self.invalid_feed)
+ self.assertFalse(result.is_valid)
+ self.assertIsNone(result.feed_type)
+ self.assertIsNotNone(result.error_message)
+
+ def test_validate_missing_required_fields(self):
+ invalid_rss = """
+
+
+ Sample RSS Feed
+ Missing link field
+
+ """
+
+ result = FeedValidator.validate_feed(invalid_rss)
+ self.assertFalse(result.is_valid)
+ self.assertEqual(result.feed_type, "rss")
+ self.assertIn("Missing required fields", result.error_message)
+
+ def test_normalize_dates(self):
+ result = FeedValidator.validate_feed(self.rss_feed)
+ self.assertIsInstance(result.parsed_feed["updated"], datetime)
+
+ result = FeedValidator.validate_feed(self.atom_feed)
+ self.assertIsInstance(result.parsed_feed["updated"], datetime)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_webhook.py b/tests/test_webhook.py
new file mode 100644
index 0000000..c088a9d
--- /dev/null
+++ b/tests/test_webhook.py
@@ -0,0 +1,115 @@
+import json
+import unittest
+from datetime import datetime
+from unittest.mock import Mock, patch
+
+from feed_processor.webhook import WebhookConfig, WebhookError, WebhookManager, WebhookResponse
+
+
+class TestWebhookManager(unittest.TestCase):
+ def setUp(self):
+ self.config = WebhookConfig(
+ endpoint="https://example.com/webhook",
+ auth_token="test-token",
+ max_retries=3,
+ retry_delay=1,
+ timeout=5,
+ batch_size=10,
+ )
+ self.manager = WebhookManager(self.config)
+ self.sample_feed = {
+ "type": "rss",
+ "title": "Test Feed",
+ "link": "http://example.com/feed",
+ "updated": datetime.now(),
+ "items": [],
+ }
+
+ def test_webhook_config_validation(self):
+ # Test valid config
+ config = WebhookConfig(endpoint="https://example.com/webhook", auth_token="test-token")
+ self.assertIsInstance(config, WebhookConfig)
+
+ # Test invalid endpoint
+ with self.assertRaises(ValueError):
+ WebhookConfig(endpoint="not-a-url", auth_token="test-token")
+
+ def test_send_success(self):
+ with patch("requests.post") as mock_post:
+ mock_post.return_value.status_code = 200
+ mock_post.return_value.json.return_value = {"status": "success"}
+
+ response = self.manager.send(self.sample_feed)
+
+ self.assertTrue(response.success)
+ self.assertEqual(response.status_code, 200)
+ mock_post.assert_called_once()
+
+ def test_send_failure_with_retry(self):
+ with patch("requests.post") as mock_post:
+ # First two calls fail, third succeeds
+ mock_post.side_effect = [
+ Mock(status_code=500),
+ Mock(status_code=500),
+ Mock(status_code=200, json=lambda: {"status": "success"}),
+ ]
+
+ response = self.manager.send(self.sample_feed)
+
+ self.assertTrue(response.success)
+ self.assertEqual(response.retry_count, 2)
+ self.assertEqual(mock_post.call_count, 3)
+
+ def test_send_failure_max_retries(self):
+ with patch("requests.post") as mock_post:
+ mock_post.return_value.status_code = 500
+
+ response = self.manager.send(self.sample_feed)
+
+ self.assertFalse(response.success)
+ self.assertEqual(response.retry_count, self.config.max_retries)
+ self.assertEqual(mock_post.call_count, self.config.max_retries + 1)
+
+ def test_batch_send(self):
+ feeds = [self.sample_feed.copy() for _ in range(5)]
+
+ with patch("requests.post") as mock_post:
+ mock_post.return_value.status_code = 200
+ mock_post.return_value.json.return_value = {"status": "success"}
+
+ responses = self.manager.batch_send(feeds)
+
+ self.assertEqual(len(responses), 1) # One batch
+ self.assertTrue(all(r.success for r in responses))
+ mock_post.assert_called_once()
+
+ def test_rate_limiting(self):
+ with patch("requests.post") as mock_post:
+ mock_post.return_value.status_code = 429 # Too Many Requests
+ mock_post.return_value.headers = {"Retry-After": "2"}
+
+ response = self.manager.send(self.sample_feed)
+
+ self.assertFalse(response.success)
+ self.assertEqual(response.status_code, 429)
+ self.assertTrue(response.rate_limited)
+
+ def test_authentication_error(self):
+ with patch("requests.post") as mock_post:
+ mock_post.return_value.status_code = 401
+
+ response = self.manager.send(self.sample_feed)
+
+ self.assertFalse(response.success)
+ self.assertEqual(response.status_code, 401)
+ self.assertIn("authentication", response.error_message.lower())
+
+ def test_payload_validation(self):
+ # Test invalid payload
+ invalid_feed = {"type": "unknown"}
+ with self.assertRaises(WebhookError):
+ self.manager.send(invalid_feed)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/unit/core/test-processor.py b/tests/unit/core/test-processor.py
index fd2e7bb..1816cea 100644
--- a/tests/unit/core/test-processor.py
+++ b/tests/unit/core/test-processor.py
@@ -1,48 +1,52 @@
-import pytest
-from unittest.mock import Mock, patch
+import json
import time
from datetime import datetime
-import json
+from unittest.mock import Mock, patch
+
+import pytest
# Import will be implemented when we create the actual module
# from feed_processor.core.processor import FeedProcessor, RateLimiter, ProcessingMetrics
+
class TestRateLimiter:
def test_rate_limiter_delays_requests(self):
"""Test that rate limiter enforces minimum delay between requests"""
from feed_processor.core.processor import RateLimiter
-
+
limiter = RateLimiter(min_interval=0.2)
-
+
# Record start time
start_time = time.time()
-
+
# Make multiple requests
for _ in range(3):
limiter.wait()
-
+
# Check total time
elapsed = time.time() - start_time
assert elapsed >= 0.4, "Rate limiter should enforce minimum delay"
+
class TestProcessingMetrics:
def test_error_rate_calculation(self):
"""Test error rate calculation"""
from feed_processor.core.processor import ProcessingMetrics
-
+
metrics = ProcessingMetrics()
metrics.processed_count = 90
metrics.error_count = 10
-
+
assert metrics.get_error_rate() == 10.0, "Error rate should be calculated correctly"
-
+
def test_error_rate_with_no_processing(self):
"""Test error rate when no items processed"""
from feed_processor.core.processor import ProcessingMetrics
-
+
metrics = ProcessingMetrics()
assert metrics.get_error_rate() == 0, "Error rate should be 0 when no items processed"
+
@pytest.fixture
def mock_feed_item():
"""Fixture providing a sample feed item"""
@@ -53,120 +57,119 @@ def mock_feed_item():
"canonical": [{"href": "https://example.com/article"}],
"author": "Test Author",
"categories": ["test", "example"],
- "summary": {"content": "This is a test article content"}
+ "summary": {"content": "This is a test article content"},
}
+
class TestFeedProcessor:
@pytest.fixture
def processor(self):
"""Fixture providing a configured FeedProcessor instance"""
from feed_processor.core.processor import FeedProcessor
- return FeedProcessor(
- inoreader_token="test_token",
- webhook_url="http://test.webhook"
- )
-
+
+ return FeedProcessor(inoreader_token="test_token", webhook_url="http://test.webhook")
+
def test_initialization(self, processor):
"""Test processor initialization"""
assert processor.inoreader_token == "test_token"
assert processor.webhook_url == "http://test.webhook"
assert not processor.processing
assert processor.metrics is not None
-
- @patch('requests.get')
+
+ @patch("requests.get")
def test_fetch_feeds(self, mock_get, processor, mock_feed_item):
"""Test fetching feeds from Inoreader"""
mock_response = Mock()
mock_response.json.return_value = {"items": [mock_feed_item]}
mock_get.return_value = mock_response
-
+
processor.fetch_feeds()
-
+
assert processor.queue.qsize() == 1, "Feed item should be added to queue"
assert mock_get.called_with(
"https://www.inoreader.com/reader/api/0/stream/contents/user/-/state/com.google/reading-list",
- headers={"Authorization": "Bearer test_token"}
+ headers={"Authorization": "Bearer test_token"},
)
-
+
def test_process_item(self, processor, mock_feed_item):
"""Test processing a single feed item"""
processed = processor._process_item(mock_feed_item)
-
+
assert processed["title"] == "Test Article"
assert "contentType" in processed
assert "brief" in processed
assert "sourceMetadata" in processed
assert "contentHash" in processed
-
- @patch('requests.post')
+
+ @patch("requests.post")
def test_webhook_rate_limiting(self, mock_post, processor):
"""Test that webhook calls respect rate limiting"""
mock_post.return_value.status_code = 200
-
+
start_time = time.time()
-
+
# Send multiple webhook requests
for _ in range(3):
processor._send_to_webhook({"test": "data"})
-
+
elapsed = time.time() - start_time
assert elapsed >= 0.4, "Webhook calls should respect rate limiting"
-
+
def test_content_type_detection(self, processor):
"""Test content type detection logic"""
# Test video detection
video_item = {"canonical": [{"href": "https://youtube.com/watch?v=123"}]}
assert "VIDEO" in processor._detect_content_type(video_item)
-
+
# Test social detection
social_item = {"canonical": [{"href": "https://twitter.com/user/status/123"}]}
assert "SOCIAL" in processor._detect_content_type(social_item)
-
+
# Test blog detection
blog_item = {"canonical": [{"href": "https://example.com/blog"}]}
assert "BLOG" in processor._detect_content_type(blog_item)
-
+
def test_metrics_tracking(self, processor, mock_feed_item):
"""Test that metrics are tracked correctly during processing"""
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
mock_post.return_value.status_code = 200
-
+
# Process an item
processor.start()
processor.queue.put(mock_feed_item)
time.sleep(0.5) # Allow time for processing
processor.stop()
-
+
metrics = processor.get_metrics()
assert metrics["processed_count"] == 1
assert metrics["error_count"] == 0
assert metrics["queue_length"] == 0
-
+
def test_error_handling(self, processor, mock_feed_item):
"""Test error handling during processing"""
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
mock_post.side_effect = Exception("Test error")
-
+
processor.start()
processor.queue.put(mock_feed_item)
time.sleep(0.5) # Allow time for processing
processor.stop()
-
+
metrics = processor.get_metrics()
assert metrics["error_count"] == 1
-
+
@pytest.mark.integration
def test_end_to_end_processing(self, processor, mock_feed_item):
"""Test end-to-end processing flow"""
- with patch('requests.get') as mock_get, patch('requests.post') as mock_post:
+ with patch("requests.get") as mock_get, patch("requests.post") as mock_post:
mock_get.return_value.json.return_value = {"items": [mock_feed_item]}
mock_post.return_value.status_code = 200
-
+
processor.start()
processor.fetch_feeds()
time.sleep(1) # Allow time for processing
processor.stop()
-
+
metrics = processor.get_metrics()
assert metrics["processed_count"] == 1
assert metrics["error_count"] == 0
diff --git a/tests/unit/core/test_processor.py b/tests/unit/core/test_processor.py
index 2310891..85f503e 100644
--- a/tests/unit/core/test_processor.py
+++ b/tests/unit/core/test_processor.py
@@ -1,31 +1,35 @@
-import pytest
-from unittest.mock import Mock, patch, MagicMock
-from datetime import datetime, timezone
import time
+from datetime import datetime, timezone
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
import requests
+
+from feed_processor.content_queue import ContentQueue
from feed_processor.processor import FeedProcessor
from feed_processor.webhook_manager import WebhookResponse
-from feed_processor.content_queue import ContentQueue
+
@pytest.fixture
def processor():
"""Create a FeedProcessor instance in test mode."""
return FeedProcessor(
- inoreader_token="test_token",
- webhook_url="http://test.com/webhook",
- test_mode=True
+ inoreader_token="test_token", webhook_url="http://test.com/webhook", test_mode=True
)
+
@pytest.fixture
def mock_queue():
return Mock(spec=ContentQueue)
+
@pytest.fixture
def mock_webhook_manager():
manager = Mock()
manager.send_webhook.return_value = WebhookResponse(True, None, None, 200)
return manager
+
def test_initialization():
processor = FeedProcessor("test_token", "http://test.com", test_mode=True)
assert processor.inoreader_token == "test_token"
@@ -34,7 +38,8 @@ def test_initialization():
assert not processor.processing
assert processor.test_mode
-@patch('requests.get')
+
+@patch("requests.get")
def test_fetch_feeds_success(mock_get):
# Mock successful response
mock_response = Mock()
@@ -45,20 +50,21 @@ def test_fetch_feeds_success(mock_get):
"id": "1",
"title": "Test Article 1",
"content": {"content": "Test content 1"},
- "published": datetime.now(timezone.utc).isoformat()
+ "published": datetime.now(timezone.utc).isoformat(),
}
]
}
mock_get.return_value = mock_response
-
+
processor = FeedProcessor("test_token", "http://test.com", test_mode=True)
feeds = processor.fetch_feeds()
-
+
assert len(feeds) == 1
assert feeds[0]["id"] == "1"
mock_get.assert_called_once()
-@patch('requests.get')
+
+@patch("requests.get")
def test_fetch_feeds_auth_error(mock_get):
# Mock 403 error response
mock_response = Mock()
@@ -66,55 +72,58 @@ def test_fetch_feeds_auth_error(mock_get):
response=Mock(status_code=403)
)
mock_get.return_value = mock_response
-
+
processor = FeedProcessor("invalid_token", "http://test.com", test_mode=True)
feeds = processor.fetch_feeds()
-
+
assert len(feeds) == 0
assert processor.metrics.error_count == 1
+
def test_start_stop():
processor = FeedProcessor("test_token", "http://test.com", test_mode=True)
-
+
processor.start()
assert processor.running
assert processor.processing
-
+
processor.stop()
assert not processor.running
assert not processor.processing
+
def test_process_item(processor):
item = {
"id": "1",
"title": "Test Title",
"content": {"content": "Test Content"},
- "published": datetime.now(timezone.utc).isoformat()
+ "published": datetime.now(timezone.utc).isoformat(),
}
-
+
processed = processor.process_item(item)
assert processed["id"] == "1"
assert processed["title"] == "Test Title"
assert "content_type" in processed
assert "priority" in processed
+
def test_process_batch(processor):
items = [
{
"id": "1",
"title": "Test 1",
"content": {"content": "Content 1"},
- "published": datetime.now(timezone.utc).isoformat()
+ "published": datetime.now(timezone.utc).isoformat(),
},
{
"id": "2",
"title": "Test 2",
"content": {"content": "Content 2"},
- "published": datetime.now(timezone.utc).isoformat()
- }
+ "published": datetime.now(timezone.utc).isoformat(),
+ },
]
-
+
processed = processor.process_batch(items)
assert len(processed) == 2
assert all(isinstance(item, dict) for item in processed)
- assert processor.metrics.processed_count == 2
\ No newline at end of file
+ assert processor.metrics.processed_count == 2
diff --git a/tests/unit/test_content_queue.py b/tests/unit/test_content_queue.py
index 82bf17e..4373f55 100644
--- a/tests/unit/test_content_queue.py
+++ b/tests/unit/test_content_queue.py
@@ -1,12 +1,16 @@
-import pytest
-from datetime import datetime, timedelta
import time
+from datetime import datetime, timedelta
+
+import pytest
+
from feed_processor.content_queue import ContentQueue, QueuedContent
+
@pytest.fixture
def queue():
return ContentQueue(max_size=100, dedup_window=3600)
+
def test_simple_queue(queue):
"""Basic test to verify queue operations"""
content = {"test": "data"}
@@ -14,85 +18,91 @@ def test_simple_queue(queue):
assert result is not None
assert result.content_id == "test1"
+
def test_enqueue_dequeue_basic(queue):
content = {"title": "Test", "body": "Content"}
queued = queue.enqueue("test1", content)
assert queued is not None
assert queued.content_id == "test1"
assert queued.content == content
-
+
dequeued = queue.dequeue()
assert dequeued == queued
assert queue.get_queue_size() == 0
+
def test_duplicate_detection(queue):
content = {"title": "Test", "body": "Content"}
-
+
# First attempt should succeed
first = queue.enqueue("test1", content)
assert first is not None
-
+
# Second attempt with same content should fail
second = queue.enqueue("test2", content)
assert second is None
+
def test_dedup_window(queue):
content = {"title": "Test", "body": "Content"}
-
+
# Set a very short dedup window for testing
queue.dedup_window = 0.1
-
+
# First enqueue
first = queue.enqueue("test1", content)
assert first is not None
-
+
# Wait for dedup window to expire
time.sleep(0.2)
-
+
# Should be able to enqueue same content again
second = queue.enqueue("test2", content)
assert second is not None
+
def test_retry_mechanism(queue):
content = {"title": "Test", "body": "Content"}
queued = queue.enqueue("test1", content)
-
+
# First retry
assert queue.mark_failed(queued, max_retries=2) is True
assert queued.retry_count == 1
-
+
# Second retry
assert queue.mark_failed(queued, max_retries=2) is True
assert queued.retry_count == 2
-
+
# Third retry should fail (exceeds max_retries)
assert queue.mark_failed(queued, max_retries=2) is False
assert queued.retry_count == 3
assert queued.processing_status == "failed"
+
def test_queue_stats(queue):
content1 = {"title": "Test1", "body": "Content1"}
content2 = {"title": "Test2", "body": "Content2"}
-
+
queue.enqueue("test1", content1)
queue.enqueue("test2", content2)
-
+
stats = queue.get_queue_stats()
assert stats["queue_size"] == 2
assert stats["unique_contents"] == 2
assert stats["oldest_item_age"] >= 0
+
def test_max_size_limit(queue):
# Set a small max size for testing
queue = ContentQueue(max_size=2, dedup_window=3600)
-
+
# Add three items
queue.enqueue("test1", {"id": 1})
queue.enqueue("test2", {"id": 2})
queue.enqueue("test3", {"id": 3})
-
+
# Queue should only contain the last two items
assert queue.get_queue_size() == 2
-
+
item = queue.dequeue()
assert item.content["id"] == 2 # First item should have been dropped
diff --git a/tests/unit/test_error_handling.py b/tests/unit/test_error_handling.py
index 4d0ca44..1d0a06f 100644
--- a/tests/unit/test_error_handling.py
+++ b/tests/unit/test_error_handling.py
@@ -1,16 +1,18 @@
-import pytest
import time
from datetime import datetime, timezone
from unittest.mock import Mock, patch
+import pytest
+
from feed_processor.error_handling import (
CircuitBreaker,
+ ErrorCategory,
+ ErrorContext,
ErrorHandler,
ErrorSeverity,
- ErrorCategory,
- ErrorContext
)
+
class TestCircuitBreaker:
def test_initial_state(self):
cb = CircuitBreaker()
@@ -21,11 +23,11 @@ def test_initial_state(self):
def test_failure_threshold(self):
cb = CircuitBreaker(failure_threshold=2)
assert cb.can_execute() is True
-
+
cb.record_failure()
assert cb.state == "closed"
assert cb.can_execute() is True
-
+
cb.record_failure()
assert cb.state == "open"
assert cb.can_execute() is False
@@ -35,7 +37,7 @@ def test_reset_after_timeout(self):
cb.record_failure()
assert cb.state == "open"
assert cb.can_execute() is False
-
+
time.sleep(0.2) # Wait for reset timeout
assert cb.can_execute() is True
assert cb.state == "half-open"
@@ -44,11 +46,12 @@ def test_success_resets_failures(self):
cb = CircuitBreaker(failure_threshold=2)
cb.record_failure()
assert cb.failures == 1
-
+
cb.record_success()
assert cb.failures == 0
assert cb.state == "closed"
+
class TestErrorContext:
def test_error_context_creation(self):
context = ErrorContext(
@@ -57,9 +60,9 @@ def test_error_context_creation(self):
severity=ErrorSeverity.HIGH,
category=ErrorCategory.API_ERROR,
message="Test error",
- details={"test": "data"}
+ details={"test": "data"},
)
-
+
assert context.error_id == "test_error_1"
assert context.severity == ErrorSeverity.HIGH
assert context.category == ErrorCategory.API_ERROR
@@ -68,6 +71,7 @@ def test_error_context_creation(self):
assert context.retry_count == 0
assert context.max_retries == 3
+
class TestErrorHandler:
@pytest.fixture
def error_handler(self):
@@ -78,7 +82,7 @@ def test_circuit_breaker_creation(self, error_handler):
cb = error_handler._get_circuit_breaker(service)
assert service in error_handler.circuit_breakers
assert isinstance(cb, CircuitBreaker)
-
+
# Getting the same service should return the same circuit breaker
cb2 = error_handler._get_circuit_breaker(service)
assert cb is cb2
@@ -88,23 +92,23 @@ def test_backoff_calculation(self, error_handler):
delay1 = error_handler._calculate_backoff(0)
delay2 = error_handler._calculate_backoff(1)
delay3 = error_handler._calculate_backoff(2)
-
+
assert delay1 < delay2 < delay3
assert delay3 <= 30 # Check maximum cap
- @patch('logging.Logger.error')
+ @patch("logging.Logger.error")
def test_error_handling_with_retries(self, mock_logger, error_handler):
retry_func = Mock(side_effect=[Exception("Retry 1"), Exception("Retry 2"), "Success"])
-
+
result = error_handler.handle_error(
error=Exception("Initial error"),
category=ErrorCategory.API_ERROR,
severity=ErrorSeverity.HIGH,
service="test_service",
details={},
- retry_func=retry_func
+ retry_func=retry_func,
)
-
+
assert result == "Success"
assert retry_func.call_count == 3
assert mock_logger.called
@@ -112,7 +116,7 @@ def test_error_handling_with_retries(self, mock_logger, error_handler):
def test_error_handling_with_circuit_breaker(self, error_handler):
service = "test_service"
cb = error_handler._get_circuit_breaker(service)
-
+
# Force circuit breaker to open
for _ in range(5):
error_handler.handle_error(
@@ -121,9 +125,9 @@ def test_error_handling_with_circuit_breaker(self, error_handler):
severity=ErrorSeverity.HIGH,
service=service,
details={},
- retry_func=None
+ retry_func=None,
)
-
+
# Next attempt should raise circuit breaker exception
with pytest.raises(Exception) as exc_info:
error_handler.handle_error(
@@ -132,6 +136,6 @@ def test_error_handling_with_circuit_breaker(self, error_handler):
severity=ErrorSeverity.HIGH,
service=service,
details={},
- retry_func=None
+ retry_func=None,
)
assert "Circuit breaker open" in str(exc_info.value)
diff --git a/tests/unit/test_inoreader_error_handling.py b/tests/unit/test_inoreader_error_handling.py
index f2e3149..82debd5 100644
--- a/tests/unit/test_inoreader_error_handling.py
+++ b/tests/unit/test_inoreader_error_handling.py
@@ -1,13 +1,10 @@
-import pytest
-from unittest.mock import Mock, patch
from datetime import datetime
+from unittest.mock import Mock, patch
+
+import pytest
+
+from feed_processor.error_handling import CircuitBreaker, ErrorCategory, ErrorHandler, ErrorSeverity
-from feed_processor.error_handling import (
- ErrorHandler,
- ErrorCategory,
- ErrorSeverity,
- CircuitBreaker
-)
class TestInoreaderErrorHandling:
@pytest.fixture
@@ -20,10 +17,8 @@ def mock_inoreader_client(self):
def test_auth_error_handling(self, error_handler, mock_inoreader_client):
# Simulate authentication error
- mock_inoreader_client.fetch_feeds.side_effect = Exception(
- "Invalid or expired token"
- )
-
+ mock_inoreader_client.fetch_feeds.side_effect = Exception("Invalid or expired token")
+
with pytest.raises(Exception) as exc_info:
error_handler.handle_error(
error=exc_info.value,
@@ -31,9 +26,9 @@ def test_auth_error_handling(self, error_handler, mock_inoreader_client):
severity=ErrorSeverity.HIGH,
service="inoreader",
details={"operation": "fetch_feeds"},
- retry_func=mock_inoreader_client.fetch_feeds
+ retry_func=mock_inoreader_client.fetch_feeds,
)
-
+
# Should not retry auth errors
assert mock_inoreader_client.fetch_feeds.call_count == 1
@@ -42,63 +37,58 @@ def test_rate_limit_handling(self, error_handler, mock_inoreader_client):
mock_inoreader_client.fetch_feeds.side_effect = [
Exception("429 Too Many Requests"),
Exception("429 Too Many Requests"),
- "Success"
+ "Success",
]
-
+
result = error_handler.handle_error(
error=Exception("429 Too Many Requests"),
category=ErrorCategory.RATE_LIMIT_ERROR,
severity=ErrorSeverity.MEDIUM,
service="inoreader",
details={"operation": "fetch_feeds"},
- retry_func=mock_inoreader_client.fetch_feeds
+ retry_func=mock_inoreader_client.fetch_feeds,
)
-
+
assert result == "Success"
assert mock_inoreader_client.fetch_feeds.call_count == 3
def test_malformed_response_handling(self, error_handler, mock_inoreader_client):
# Simulate malformed JSON response
- mock_inoreader_client.fetch_feeds.side_effect = Exception(
- "Invalid JSON response"
- )
-
+ mock_inoreader_client.fetch_feeds.side_effect = Exception("Invalid JSON response")
+
with pytest.raises(Exception) as exc_info:
error_handler.handle_error(
error=exc_info.value,
category=ErrorCategory.API_ERROR,
severity=ErrorSeverity.HIGH,
service="inoreader",
- details={
- "operation": "fetch_feeds",
- "error_type": "MalformedResponse"
- }
+ details={"operation": "fetch_feeds", "error_type": "MalformedResponse"},
)
-
+
# Should log detailed error info for debugging
assert "Invalid JSON" in str(exc_info.value)
def test_half_open_state_transition(self, error_handler):
service = "inoreader"
cb = error_handler._get_circuit_breaker(service)
-
+
# Force circuit breaker to open
for _ in range(5):
cb.record_failure()
assert cb.state == "open"
-
+
# Simulate time passing
- with patch('time.time') as mock_time:
+ with patch("time.time") as mock_time:
mock_time.return_value = time.time() + 61 # Past reset timeout
-
+
# Should transition to half-open
assert cb.can_execute() is True
assert cb.state == "half-open"
-
+
# Simulate successful request
cb.record_success()
assert cb.state == "closed"
-
+
# Simulate failure in half-open state
cb._update_state("half-open")
cb.record_failure()
@@ -108,18 +98,18 @@ def test_custom_retry_strategy(self, error_handler, mock_inoreader_client):
# Test different retry strategies based on error type
errors = [
(ErrorCategory.RATE_LIMIT_ERROR, 5), # More retries for rate limits
- (ErrorCategory.API_ERROR, 3), # Standard retries for API errors
- (ErrorCategory.SYSTEM_ERROR, 2) # Fewer retries for system errors
+ (ErrorCategory.API_ERROR, 3), # Standard retries for API errors
+ (ErrorCategory.SYSTEM_ERROR, 2), # Fewer retries for system errors
]
-
+
for category, expected_retries in errors:
error_context = error_handler._create_error_context(
error=Exception("Test error"),
category=category,
severity=ErrorSeverity.MEDIUM,
- details={"test": True}
+ details={"test": True},
)
-
+
assert error_context.max_retries == expected_retries
def test_error_detail_levels(self, error_handler):
@@ -129,18 +119,14 @@ def test_error_detail_levels(self, error_handler):
error=error,
category=ErrorCategory.API_ERROR,
severity=ErrorSeverity.HIGH,
- details={
- "api_key": "secret",
- "user_id": "12345",
- "public_info": "viewable"
- }
+ details={"api_key": "secret", "user_id": "12345", "public_info": "viewable"},
)
-
+
# System logs should have full details
system_log = error_handler._format_system_log(error_context)
assert "api_key" in system_log
assert "user_id" in system_log
-
+
# Airtable logs should have limited details
airtable_log = error_handler._format_airtable_log(error_context)
assert "api_key" not in airtable_log
diff --git a/tests/unit/test_webhook_error_handling.py b/tests/unit/test_webhook_error_handling.py
index 3592c2e..9032deb 100644
--- a/tests/unit/test_webhook_error_handling.py
+++ b/tests/unit/test_webhook_error_handling.py
@@ -1,16 +1,14 @@
-import pytest
-from unittest.mock import Mock, patch
+import threading
import time
from datetime import datetime
+from unittest.mock import patch
+
+import pytest
-from feed_processor.error_handling import (
- ErrorHandler,
- ErrorCategory,
- ErrorSeverity,
- CircuitBreaker
-)
+from feed_processor.error_handling import ErrorCategory, ErrorHandler, ErrorSeverity
from feed_processor.webhook_manager import WebhookManager
+
class TestWebhookErrorHandling:
@pytest.fixture
def error_handler(self):
@@ -18,17 +16,13 @@ def error_handler(self):
@pytest.fixture
def webhook_manager(self):
- return WebhookManager(
- webhook_url="http://test.com/webhook",
- rate_limit=0.1,
- max_retries=3
- )
+ return WebhookManager(webhook_url="http://test.com/webhook", rate_limit=0.1, max_retries=3)
def test_rate_limit_error_handling(self, error_handler, webhook_manager):
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
# Simulate rate limit error
mock_post.side_effect = Exception("Rate limit exceeded")
-
+
with pytest.raises(Exception) as exc_info:
error_handler.handle_error(
error=exc_info.value,
@@ -36,9 +30,9 @@ def test_rate_limit_error_handling(self, error_handler, webhook_manager):
severity=ErrorSeverity.MEDIUM,
service="webhook",
details={"url": webhook_manager.webhook_url},
- retry_func=lambda: webhook_manager.send_webhook({"test": "data"})
+ retry_func=lambda: webhook_manager.send_webhook({"test": "data"}),
)
-
+
assert "Rate limit exceeded" in str(exc_info.value)
def test_concurrent_error_handling(self, error_handler, webhook_manager):
@@ -56,19 +50,15 @@ def simulate_concurrent_failures():
)
time.sleep(0.1)
- threads = [
- threading.Thread(target=simulate_concurrent_failures)
- for _ in range(3)
- ]
-
+ threads = [threading.Thread(target=simulate_concurrent_failures) for _ in range(3)]
+
for thread in threads:
thread.start()
for thread in threads:
thread.join()
# Verify circuit breaker state
- cb = error_handler._get_circuit_breaker("webhook")
- assert cb.state == "open"
+ assert error_handler.get_circuit_breaker("webhook").state == "open"
def test_error_history_tracking(self, error_handler):
test_errors = [
@@ -76,7 +66,7 @@ def test_error_history_tracking(self, error_handler):
(ErrorCategory.DELIVERY_ERROR, ErrorSeverity.MEDIUM),
(ErrorCategory.RATE_LIMIT_ERROR, ErrorSeverity.HIGH),
]
-
+
for category, severity in test_errors:
error_handler.handle_error(
error=Exception(f"Test error: {category}"),
@@ -85,18 +75,43 @@ def test_error_history_tracking(self, error_handler):
service="webhook",
details={"test": True},
)
-
+
# Verify error history (assuming we implement error history tracking)
assert len(error_handler.get_recent_errors()) <= 100 # Max history size
- @pytest.mark.parametrize("hour,expected_retries", [
- (10, 3), # Peak hours - fewer retries
- (22, 5), # Off-peak hours - more retries
- ])
+ @pytest.mark.parametrize(
+ "hour,max_retries",
+ [
+ (10, 3), # Peak hours - fewer retries
+ (22, 5), # Off-peak hours - more retries
+ ],
+ )
+ def test_time_based_retry_strategy(self, error_handler, hour, max_retries):
+ with patch("datetime.datetime") as mock_datetime:
+ mock_datetime.now.return_value = datetime(2024, 1, 1, hour, 0)
+
+ error_handler.handle_error(
+ error=Exception("Test error"),
+ category=ErrorCategory.DELIVERY_ERROR,
+ severity=ErrorSeverity.MEDIUM,
+ service="webhook",
+ details={"test": True},
+ max_retries=max_retries,
+ )
+
+ assert error_handler.get_retry_count("webhook") == max_retries
+
+ @pytest.mark.parametrize(
+ "hour,expected_retries",
+ [
+ (10, 3), # Peak hours - fewer retries
+ (22, 5), # Off-peak hours - more retries
+ ],
+ )
def test_time_based_retry_strategy(self, error_handler, hour):
- with patch('datetime.datetime') as mock_datetime:
+ with patch("datetime.datetime") as mock_datetime:
mock_datetime.now.return_value = datetime(2024, 1, 1, hour, 0)
-
+
error_handler.handle_error(
error=Exception("Test error"),
category=ErrorCategory.DELIVERY_ERROR,
@@ -104,6 +119,56 @@ def test_time_based_retry_strategy(self, error_handler, hour):
service="webhook",
details={"hour": hour},
)
-
+
# Verify retry count based on time of day
assert error_handler._get_max_retries(hour) == expected_retries
+
+
+def test_webhook_retry_mechanism():
+ manager = WebhookManager()
+ retries = 3
+
+ with patch.object(manager, "_send_webhook", side_effect=Exception("Test error")):
+ with pytest.raises(Exception):
+ manager.send_webhook("http://test.com", {"data": "test"}, max_retries=retries)
+
+ assert manager.retry_count["http://test.com"] == retries
+
+
+def test_concurrent_webhook_retries():
+ manager = WebhookManager()
+ webhook_url = "http://test.com"
+ expected_retries = 3
+
+ def simulate_webhook_failure():
+ try:
+ manager.send_webhook(webhook_url, {"data": "test"}, max_retries=expected_retries)
+ except Exception:
+ pass
+
+ threads = []
+ for _ in range(3):
+ thread = threading.Thread(target=simulate_webhook_failure)
+ threads.append(thread)
+ thread.start()
+
+ for thread in threads:
+ thread.join()
+
+ assert manager.retry_count[webhook_url] == expected_retries
+
+
+def test_webhook_backoff_timing():
+ manager = WebhookManager()
+ start_time = datetime.now()
+ retries = 2
+
+ with patch.object(manager, "_send_webhook", side_effect=Exception("Test error")):
+ with pytest.raises(Exception):
+ manager.send_webhook("http://test.com", {"data": "test"}, max_retries=retries)
+
+ end_time = datetime.now()
+ duration = (end_time - start_time).total_seconds()
+
+ # With 2 retries and exponential backoff (1s, 2s), minimum duration should be ~3s
+ assert duration >= 3
diff --git a/tests/unit/test_webhook_logging.py b/tests/unit/test_webhook_logging.py
index 637e5a0..95cb77a 100644
--- a/tests/unit/test_webhook_logging.py
+++ b/tests/unit/test_webhook_logging.py
@@ -1,30 +1,27 @@
+from unittest.mock import patch
+
import pytest
-from unittest.mock import Mock, patch, create_autospec
-import structlog
-import time
-from datetime import datetime
-from feed_processor.webhook_manager import WebhookManager, WebhookResponse
+
+from feed_processor.webhook_manager import WebhookManager
+
@pytest.fixture
def mock_logger():
"""Create a mock logger that supports method chaining"""
- logger = Mock()
- logger.debug = Mock(return_value=logger)
- logger.info = Mock(return_value=logger)
- logger.warning = Mock(return_value=logger)
- logger.error = Mock(return_value=logger)
- logger.bind = Mock(return_value=logger)
+ logger = patch("structlog.get_logger").start()
+ logger.return_value.debug = patch("structlog.get_logger").start()
+ logger.return_value.info = patch("structlog.get_logger").start()
+ logger.return_value.warning = patch("structlog.get_logger").start()
+ logger.return_value.error = patch("structlog.get_logger").start()
+ logger.return_value.bind = patch("structlog.get_logger").start()
return logger
+
@pytest.fixture
def webhook_manager(mock_logger):
- with patch('structlog.get_logger', return_value=mock_logger):
- manager = WebhookManager(
- webhook_url="http://test.webhook",
- rate_limit=0.2,
- max_retries=3
- )
- return manager, mock_logger
+ manager = WebhookManager(webhook_url="http://test.webhook", rate_limit=0.2, max_retries=3)
+ return manager
+
@pytest.fixture
def valid_payload():
@@ -32,144 +29,164 @@ def valid_payload():
"title": "Test Article",
"contentType": ["BLOG"],
"brief": "Test summary",
- "sourceMetadata": {"feedId": "test123"}
+ "sourceMetadata": {"feedId": "test123"},
}
+
class TestWebhookManagerLogging:
def test_initialization_logging(self, webhook_manager):
- manager, logger = webhook_manager
- logger.info.assert_called_with(
- "webhook_manager_initialized"
- )
+ webhook_manager.logger.info.assert_called_with("webhook_manager_initialized")
def test_rate_limit_logging(self, webhook_manager, valid_payload):
- manager, logger = webhook_manager
-
- with patch('time.time', side_effect=[0, 0, 0.2]): # Initial, elapsed check, final
- manager._wait_for_rate_limit()
- logger.debug.assert_called_with(
- "rate_limit_delay",
- sleep_time=0.2,
- elapsed=0
+ with patch("time.time", side_effect=[0, 0, 0.2]): # Initial, elapsed check, final
+ webhook_manager._wait_for_rate_limit()
+ webhook_manager.logger.debug.assert_called_with(
+ "rate_limit_delay", sleep_time=0.2, elapsed=0
)
def test_validation_success_logging(self, webhook_manager, valid_payload):
- manager, logger = webhook_manager
- manager._validate_payload(valid_payload)
- logger.debug.assert_called_with(
- "payload_validation_success",
- payload=valid_payload
+ webhook_manager._validate_payload(valid_payload)
+ webhook_manager.logger.debug.assert_called_with(
+ "payload_validation_success", payload=valid_payload
)
def test_validation_failure_logging(self, webhook_manager):
- manager, logger = webhook_manager
invalid_payload = {"title": "Test"} # Missing required fields
-
+
with pytest.raises(ValueError):
- manager._validate_payload(invalid_payload)
-
+ webhook_manager._validate_payload(invalid_payload)
+
# Sort missing fields to ensure consistent order
missing_fields = ["brief", "contentType"] # Already sorted
- logger.warning.assert_called_with(
+ webhook_manager.logger.warning.assert_called_with(
"payload_validation_failed",
error="missing_fields",
missing_fields=missing_fields,
- payload=invalid_payload
+ payload=invalid_payload,
)
def test_request_success_logging(self, webhook_manager, valid_payload):
- manager, logger = webhook_manager
-
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
mock_post.return_value.status_code = 200
mock_post.return_value.text = "OK"
-
- manager.send_webhook(valid_payload)
-
+
+ webhook_manager.send_webhook(valid_payload)
+
# Check all debug logs in sequence
- assert logger.debug.call_args_list[0][0][0] == "payload_validation_success"
- assert logger.debug.call_args_list[1][0][0] == "sending_webhook_request"
- assert logger.info.call_args_list[-1][0][0] == "webhook_request_success"
+ assert (
+ webhook_manager.logger.debug.call_args_list[0][0][0] == "payload_validation_success"
+ )
+ assert webhook_manager.logger.debug.call_args_list[1][0][0] == "sending_webhook_request"
+ assert webhook_manager.logger.info.call_args_list[-1][0][0] == "webhook_request_success"
def test_request_failure_logging(self, webhook_manager, valid_payload):
- manager, logger = webhook_manager
-
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
mock_post.return_value.status_code = 500
mock_post.return_value.text = "Internal Server Error"
-
- manager.send_webhook(valid_payload)
-
- logger.warning.assert_any_call(
+
+ webhook_manager.send_webhook(valid_payload)
+
+ webhook_manager.logger.warning.assert_any_call(
"webhook_request_failed_retrying",
status_code=500,
retry_attempt=1,
- error="Internal Server Error"
+ error="Internal Server Error",
)
def test_max_retries_logging(self, webhook_manager, valid_payload):
- manager, logger = webhook_manager
-
- with patch('requests.post') as mock_post, \
- patch('time.time', return_value=1734080222):
+ with patch("requests.post") as mock_post, patch("time.time", return_value=1734080222):
mock_post.return_value.status_code = 500
mock_post.return_value.text = "Internal Server Error"
-
- response = manager.send_webhook(valid_payload)
-
- logger.error.assert_called_with(
+
+ response = webhook_manager.send_webhook(valid_payload)
+
+ webhook_manager.logger.error.assert_called_with(
"webhook_request_failed_max_retries",
status_code=500,
error="Internal Server Error",
- error_id=response.error_id
+ error_id=response.error_id,
)
def test_bulk_send_logging(self, webhook_manager, valid_payload):
- manager, logger = webhook_manager
payloads = [valid_payload.copy() for _ in range(3)]
-
- with patch('requests.post') as mock_post:
+
+ with patch("requests.post") as mock_post:
mock_post.return_value.status_code = 200
-
- manager.bulk_send(payloads)
-
- logger.info.assert_any_call(
- "starting_bulk_send",
- payload_count=3
- )
-
- logger.info.assert_any_call(
- "bulk_send_completed",
- total_items=3,
- success_count=3,
- error_count=0
+
+ webhook_manager.bulk_send(payloads)
+
+ webhook_manager.logger.info.assert_any_call("starting_bulk_send", payload_count=3)
+
+ webhook_manager.logger.info.assert_any_call(
+ "bulk_send_completed", total_items=3, success_count=3, error_count=0
)
def test_rate_limit_hit_logging(self, webhook_manager, valid_payload):
- manager, logger = webhook_manager
-
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
mock_post.return_value.status_code = 429
mock_post.return_value.text = "Rate limit exceeded"
-
- manager.send_webhook(valid_payload)
-
- logger.warning.assert_any_call(
+
+ webhook_manager.send_webhook(valid_payload)
+
+ webhook_manager.logger.warning.assert_any_call(
"rate_limit_hit_adding_delay",
delay=0.4,
status_code=429,
- error="Rate limit exceeded"
+ error="Rate limit exceeded",
)
def test_error_id_consistency(self, webhook_manager, valid_payload):
- manager, logger = webhook_manager
-
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
mock_post.return_value.status_code = 400
mock_post.return_value.text = "Bad Request"
-
- response = manager.send_webhook(valid_payload)
-
+
+ response = webhook_manager.send_webhook(valid_payload)
+
# Verify error ID format
assert response.error_id.startswith("err_")
assert response.error_id.split("_")[2] == "400" # Status code in error ID
+
+
+def test_webhook_logging_success():
+ manager = WebhookManager()
+ webhook_url = "http://test.com"
+ payload = {"data": "test"}
+
+ with patch.object(manager, "_send_webhook") as mock_send:
+ mock_send.return_value = {"status": "success"}
+ response = manager.send_webhook(webhook_url, payload)
+
+ assert response["status"] == "success"
+ assert webhook_url not in manager.retry_count
+
+
+def test_webhook_logging_failure():
+ manager = WebhookManager()
+ webhook_url = "http://test.com"
+ payload = {"data": "test"}
+
+ with patch.object(manager, "_send_webhook", side_effect=Exception("Test error")):
+ with pytest.raises(Exception):
+ manager.send_webhook(webhook_url, payload, max_retries=2)
+
+ assert webhook_url in manager.retry_count
+ assert manager.retry_count[webhook_url] == 2
+
+
+def test_webhook_retry_logging():
+ manager = WebhookManager()
+ webhook_url = "http://test.com"
+ payload = {"data": "test"}
+
+ with patch.object(manager, "_send_webhook") as mock_send:
+ mock_send.side_effect = [
+ Exception("First attempt"),
+ Exception("Second attempt"),
+ {"status": "success"},
+ ]
+
+ response = manager.send_webhook(webhook_url, payload, max_retries=3)
+
+ assert response["status"] == "success"
+ assert webhook_url in manager.retry_count
+ assert manager.retry_count[webhook_url] == 2 # Two failures before success
diff --git a/tests/unit/test_webhook_manager.py b/tests/unit/test_webhook_manager.py
index 455e99f..f65f5e2 100644
--- a/tests/unit/test_webhook_manager.py
+++ b/tests/unit/test_webhook_manager.py
@@ -1,18 +1,19 @@
+from unittest.mock import patch
+
import pytest
-import requests
-from unittest.mock import Mock, patch
-import time
-from datetime import datetime
-from feed_processor.webhook_manager import WebhookManager, WebhookResponse
+
+from feed_processor.webhook_manager import WebhookManager
+
@pytest.fixture
def webhook_manager():
return WebhookManager(
webhook_url="https://test-webhook.example.com/endpoint",
rate_limit=0.1, # Shorter for testing
- max_retries=2
+ max_retries=2,
)
+
@pytest.fixture
def valid_payload():
return {
@@ -23,114 +24,118 @@ def valid_payload():
"sourceMetadata": {
"feedId": "123",
"originalUrl": "https://example.com/article",
- "publishDate": "2024-12-12T12:00:00Z"
- }
+ "publishDate": "2024-12-12T12:00:00Z",
+ },
}
+
def test_validate_payload_success(webhook_manager, valid_payload):
assert webhook_manager._validate_payload(valid_payload) is True
+
def test_validate_payload_missing_fields(webhook_manager):
invalid_payload = {
"title": "Test",
- "contentType": ["BLOG"]
+ "contentType": ["BLOG"],
# Missing 'brief'
}
assert webhook_manager._validate_payload(invalid_payload) is False
+
def test_validate_payload_invalid_content_type(webhook_manager, valid_payload):
invalid_payload = valid_payload.copy()
invalid_payload["contentType"] = ["INVALID_TYPE"]
assert webhook_manager._validate_payload(invalid_payload) is False
+
def test_validate_payload_title_too_long(webhook_manager, valid_payload):
invalid_payload = valid_payload.copy()
invalid_payload["title"] = "x" * 256
assert webhook_manager._validate_payload(invalid_payload) is False
-@patch('requests.post')
+
+@patch("requests.post")
def test_send_webhook_success(mock_post, webhook_manager, valid_payload):
mock_response = Mock()
mock_response.status_code = 200
mock_post.return_value = mock_response
-
+
response = webhook_manager.send_webhook(valid_payload)
-
+
assert response.success is True
assert response.status_code == 200
assert response.error_id is None
assert response.error_type is None
-@patch('requests.post')
+
+@patch("requests.post")
def test_send_webhook_rate_limit(mock_post, webhook_manager, valid_payload):
mock_response = Mock()
mock_response.status_code = 429
mock_post.return_value = mock_response
-
+
response = webhook_manager.send_webhook(valid_payload)
-
+
assert response.success is False
assert response.status_code == 429
assert response.error_type == "Exception"
assert "Rate limit exceeded" in str(response.error_id)
-@patch('requests.post')
+
+@patch("requests.post")
def test_send_webhook_server_error_retry(mock_post, webhook_manager, valid_payload):
error_response = Mock()
error_response.status_code = 500
success_response = Mock()
success_response.status_code = 200
-
+
mock_post.side_effect = [error_response, success_response]
-
+
response = webhook_manager.send_webhook(valid_payload)
-
+
assert response.success is True
assert response.status_code == 200
assert mock_post.call_count == 2
-@patch('requests.post')
+
+@patch("requests.post")
def test_bulk_send(mock_post, webhook_manager):
mock_response = Mock()
mock_response.status_code = 200
mock_post.return_value = mock_response
-
+
payloads = [
- {
- "title": f"Test Article {i}",
- "contentType": ["BLOG"],
- "brief": f"Test brief {i}"
- } for i in range(3)
+ {"title": f"Test Article {i}", "contentType": ["BLOG"], "brief": f"Test brief {i}"}
+ for i in range(3)
]
-
+
responses = webhook_manager.bulk_send(payloads)
-
+
assert len(responses) == 3
assert all(r.success for r in responses)
assert all(r.status_code == 200 for r in responses)
+
def test_rate_limiting(webhook_manager, valid_payload):
- with patch('requests.post') as mock_post:
+ with patch("requests.post") as mock_post:
mock_response = Mock()
mock_response.status_code = 200
mock_post.return_value = mock_response
-
+
start_time = time.time()
webhook_manager.bulk_send([valid_payload] * 3)
elapsed_time = time.time() - start_time
-
+
# With rate_limit of 0.1s, 3 requests should take at least 0.2s
assert elapsed_time >= 0.2
-@patch('requests.post')
+
+@patch("requests.post")
def test_connection_error_retry(mock_post, webhook_manager, valid_payload):
- mock_post.side_effect = [
- requests.exceptions.ConnectionError(),
- Mock(status_code=200)
- ]
-
+ mock_post.side_effect = [requests.exceptions.ConnectionError(), Mock(status_code=200)]
+
response = webhook_manager.send_webhook(valid_payload)
-
+
assert response.success is True
assert response.status_code == 200
assert mock_post.call_count == 2